[ros-dev] [ros-diffs] [tkreuzer] 42353: asm version of DIB_32BPP_ColorFill: - Add frame pointer - Get rid of algin_draw, 32bpp surfaces must be DWORD aligned - Optimize the loop - Add comments

Jose Catena jc1 at diwaves.com
Tue Aug 4 18:06:08 CEST 2009


Below my C code based on the C code previously shown here, and the assembly
generated by vc.
This function, as most ones, does not benefit much from asm coding, although
some cycles can be saved, most notably inside the loop (a cmp and additional
branch in vc generated code).
Some algorithms can benefit a lot from asm, though. For example the Fletcher
checksum or incrementing/decrementing variables larger than the register
size, where the use of the carry flag can save many cycles. Also when a
function exec time is very critical may deserve asm coding, but I think in
this case it does not worth it, as the saving in percentage is tiny (any
compiler I know will use rep stosd for the inner loop, which has the largest
weight in the total time).


BOOLEAN DIB_32BPP_ColorFill(SURFOBJ* pso, RECTL* prcl, ULONG iColor)
{
    LONG lDelta, cx, cy;
    char * pulLine;

    lDelta = pso->lDelta;
    pulLine= (char *)((char *)pso->pvScan0 + prcl->top * lDelta +
(prcl->left << 2));

    cx = prcl->right - prcl->left;
    if (cx <= 0)
        return TRUE;

    cy = prcl->bottom - prcl->top;
    if (cy <= 0)
        return TRUE;

	ULONG *p;
	ULONG c;
	for(; cy--; pulLine += lDelta)
	{
		for(p = (ULONG *)pulLine, c = cx; c--; )
		{
			*p++ = iColor;
		}
	}

    return TRUE;
}


PUBLIC	?DIB_32BPP_ColorFill@@YAEPAU_SURFOBJ@@PAU_RECTL@@K at Z ;
DIB_32BPP_ColorFill
; Function compile flags: /Ogtpy
_TEXT	SEGMENT
?DIB_32BPP_ColorFill@@YAEPAU_SURFOBJ@@PAU_RECTL@@K at Z PROC ;
DIB_32BPP_ColorFill
; Line 52
	mov	ecx, DWORD PTR ds:4
; Line 54
	mov	edx, DWORD PTR ds:8
	push	ebp
	mov	ebp, DWORD PTR ds:36
	imul	ecx, ebp
	xor	eax, eax
	mov	eax, DWORD PTR [eax]
	push	esi
	lea	esi, DWORD PTR [ecx+eax*4]
	add	esi, DWORD PTR ds:32
	sub	edx, eax
; Line 55
	test	edx, edx
; Line 56
	jle	SHORT $LN22 at DIB_32BPP_
	push	ebx
; Line 58
	mov	ebx, DWORD PTR ds:12
	sub	ebx, DWORD PTR ds:4
; Line 59
	test	ebx, ebx
; Line 60
	jle	SHORT $LN21 at DIB_32BPP_
	push	edi
	npad	4
$LL18 at DIB_32BPP_:
; Line 64
	dec	ebx
; Line 66
	test	edx, edx
	je	SHORT $LN2 at DIB_32BPP_
	mov	ecx, edx
	xor	eax, eax
	mov	edi, esi
	rep stosd
$LN2 at DIB_32BPP_:
	add	esi, ebp
	test	ebx, ebx
	jne	SHORT $LL18 at DIB_32BPP_
	pop	edi
$LN21 at DIB_32BPP_:
	pop	ebx
$LN22 at DIB_32BPP_:
	pop	esi
; Line 72
	mov	al, 1
	pop	ebp
; Line 73
	ret	0
?DIB_32BPP_ColorFill@@YAEPAU_SURFOBJ@@PAU_RECTL@@K at Z ENDP ;
DIB_32BPP_ColorFill


In asm I would write the loop as:
	mov eax, iColor	
	mov ebx, pulLine
	mov edx, cy
L1:
	mov di, bx
	mov cx, _cx
	rep stosd
	add dx, lDelta
	dec dx
	jnz l1



Jose Catena
DIGIWAVES S.L.






More information about the Ros-dev mailing list