[ros-dev] [ros-diffs] [tkreuzer] 42353: asm version of DIB_32BPP_ColorFill: - Add frame pointer - Get rid of algin_draw, 32bpp surfaces must be DWORD aligned - Optimize the loop - Add comments
Timo Kreuzer
timo.kreuzer at web.de
Mon Aug 3 14:35:10 CEST 2009
That would be a few lines, wouldn't it?
Ok, let me do the work for you.
And now compile and show me how the loop would be optimized anywhere
near the asm code.
Or can you do better?
BOOLEAN
DIB_32BPP_ColorFill(SURFOBJ* pso, RECTL* prcl, ULONG iColor);
{
ULONG lDelta, cx, cy;
ULONG pulLine;
lDelta = pso->lDelta;
pulLine= (PULONG)((PCHAR)pso->pvScan0 + prcl->top * lDelta + prcl->left * 4);
cx = prcl->right - prcl->left;
if (cx <= 0)
return TRUE;
cy = prcl->bottom - prcl->top;
if (cy <= 0)
return TRUE;
do
{
memset(pulLine, iColor, cx);
pulLine += lDelta / 4;
cy--;
} while (cy > 0);
return TRUE;
}
Aleksey Bragin schrieb:
> "in a few lines" - and what if about using the same algorithm you used
> in this assembly, but without pretending to be compiler?
>
>
> WBR,
> Aleksey.
>
> On Aug 3, 2009, at 7:31 AM, Timo Kreuzer wrote:
>
>> I hereby challenge you to provide portable C code, that - compiled
>> with gcc - is faster than this assembly code.
>> Should be done in a few lines.
>>
>> I bet my ass on it: You will fail! No matter what optimization you
>> choose.
>> You would also fail with msvc or Intel compiler.
>>
>> Regards,
>> Timo
>>
>> Alex Ionescu wrote:
>>> The version that GCC 4.4 and CL 15 will generate would be way more
>>> optimized
>>> than this unportable/slower assembly code.
>>> This isn't 1994 anymore. You can't beat the compiler anymore.
>>>
>>> Best regards,
>>> Alex Ionescu
>>>
>>>
>>> On Sun, Aug 2, 2009 at 3:31 PM, <tkreuzer at svn.reactos.org> wrote:
>>>
>>>
>>>> Author: tkreuzer
>>>> Date: Mon Aug 3 00:31:29 2009
>>>> New Revision: 42353
>>>>
>>>> URL: http://svn.reactos.org/svn/reactos?rev=42353&view=rev
>>>> Log:
>>>> asm version of DIB_32BPP_ColorFill:
>>>> - Add frame pointer
>>>> - Get rid of algin_draw, 32bpp surfaces must be DWORD aligned
>>>> - Optimize the loop
>>>> - Add comments
>>>>
>>>> Modified:
>>>> trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
>>>>
>>>> Modified:
>>>> trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
>>>> URL:
>>>> http://svn.reactos.org/svn/reactos/trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s?rev=42353&r1=42352&r2=42353&view=diff
>>>>
>>>>
>>>> ==============================================================================
>>>>
>>>> ---
>>>> trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
>>>> [iso-8859-1] (original)
>>>> +++
>>>> trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
>>>> [iso-8859-1] Mon Aug 3 00:31:29 2009
>>>> @@ -4,78 +4,62 @@
>>>> * FILE:
>>>> subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.c
>>>> * PURPOSE: ASM optimised 32bpp ColorFill
>>>> * PROGRAMMERS: Magnus Olsen
>>>> + * Timo Kreuzer (timo.kreuzer at rectos.org)
>>>> */
>>>>
>>>> - .globl _DIB_32BPP_ColorFill
>>>> - .intel_syntax noprefix
>>>> +.intel_syntax noprefix
>>>>
>>>> - .def _DIB_32BPP_ColorFill;
>>>> - .scl 2;
>>>> - .type 32;
>>>> - .endef
>>>> -
>>>> - _DIB_32BPP_ColorFill:
>>>> - sub esp, 24
>>>> - mov ecx, [esp+32]
>>>> - mov [esp+8], ebx
>>>> - mov ebx, [esp+28]
>>>> - mov [esp+20], ebp
>>>> - mov ebp, [esp+36]
>>>> - mov [esp+12], esi
>>>> - mov [esp+16], edi
>>>> - mov edi, [ecx]
>>>> - mov esi, [ecx+8]
>>>> - mov edx, [ebx+36]
>>>> - sub esi, edi
>>>> - mov edi, [ecx+4]
>>>> - mov eax, edi
>>>> - imul eax, edx
>>>> - add eax, [ebx+32]
>>>> - mov ebx, [ecx]
>>>> - lea eax, [eax+ebx*4]
>>>> - mov [esp+4], eax
>>>> - mov eax, [ecx+12]
>>>> - cmp eax, edi
>>>> - jbe end
>>>> - sub eax, edi
>>>> - mov [esp], eax
>>>> - lea esi, [esi+0]
>>>> +/*
>>>> + * BOOLEAN
>>>> + * _cdecl
>>>> + * DIB_32BPP_ColorFill(SURFOBJ* pso, RECTL* prcl, ULONG iColor);
>>>> +*/
>>>>
>>>> - for_loop:
>>>> - mov eax, ebp
>>>> - cld
>>>> - mov ebx, esi
>>>> - mov edi, [esp+4]
>>>> - test edi, 3
>>>> - jnz algin_draw
>>>> - mov ecx, esi
>>>> - rep stosd
>>>> - add [esp+4], edx
>>>> - dec dword ptr [esp]
>>>> - jnz for_loop
>>>> - end:
>>>> - mov ebx, [esp+8]
>>>> - mov eax, 1
>>>> - mov esi, [esp+12]
>>>> - mov edi, [esp+16]
>>>> - mov ebp, [esp+20]
>>>> - add esp, 24
>>>> - ret
>>>> +.globl _DIB_32BPP_ColorFill
>>>> +_DIB_32BPP_ColorFill:
>>>> + push ebp
>>>> + mov ebp, esp
>>>> + push ebx
>>>> + push esi
>>>> + push edi
>>>> + sub esp, 4 /* Space for lDelta */
>>>>
>>>> - algin_draw:
>>>> - stosd
>>>> - dec ebx
>>>> - mov ecx, ebx
>>>> - rol eax, 16
>>>> - stosd
>>>> - add [esp+4], edx
>>>> - dec dword ptr [esp]
>>>> - jnz for_loop
>>>> + mov edx, [ebp+12] /* edx = prcl */
>>>> + mov ecx, [ebp+8] /* ecx = pso */
>>>>
>>>> - mov ebx, [esp+8]
>>>> - mov eax, 1
>>>> - mov esi, [esp+12]
>>>> - mov edi, [esp+16]
>>>> - mov ebp, [esp+20]
>>>> - add esp, 24
>>>> - ret
>>>> + mov ebx, [ecx+0x24] /* ebx = pso->lDelta; */
>>>> + mov [esp], ebx /* lDelta = pso->lDelta; */
>>>> + mov edi, [edx+4] /* edi = prcl->top; */
>>>> + mov eax, edi /* eax = prcl->top; */
>>>> + imul eax, ebx /* eax = prcl->top *
>>>> pso->lDelta; */
>>>> + add eax, [ecx+0x20] /* eax += pso->pvScan0; */
>>>> + mov ebx, [edx] /* ebx = prcl->left; */
>>>> + lea esi, [eax+ebx*4] /* esi = pvLine0 = eax + 4 *
>>>> prcl->left;
>>>> */
>>>> +
>>>> + mov ebx, [edx+8] /* ebx = prcl->right; */
>>>> + sub ebx, [edx] /* ebx = prcl->right -
>>>> prcl->left; */
>>>> + jbe end /* if (ebx <= 0) goto end; */
>>>> +
>>>> + mov edx, [edx+12] /* edx = prcl->bottom; */
>>>> + sub edx, edi /* edx -= prcl->top; */
>>>> + jbe end /* if (eax <= 0) goto end; */
>>>> +
>>>> + mov eax, [ebp+16] /* eax = iColor; */
>>>> + cld
>>>> +
>>>> +for_loop: /* do { */
>>>> + mov edi, esi /* edi = pvLine0; */
>>>> + mov ecx, ebx /* ecx = cx; */
>>>> + rep stosd /* memset(pvLine0, iColor,
>>>> cx); */
>>>> + add esi, [esp] /* pvLine0 += lDelta; */
>>>> + dec edx /* cy--; */
>>>> + jnz for_loop /* } while (cy > 0); */
>>>> +
>>>> +end:
>>>> + mov eax, 1
>>>> + add esp, 4
>>>> + pop edi
>>>> + pop esi
>>>> + pop ebx
>>>> + pop ebp
>>>> + ret
>>>>
>>>>
>>>>
>>>>
>>>
>>> _______________________________________________
>>> Ros-dev mailing list
>>> Ros-dev at reactos.org
>>> http://www.reactos.org/mailman/listinfo/ros-dev
>>
>> _______________________________________________
>> Ros-dev mailing list
>> Ros-dev at reactos.org
>> http://www.reactos.org/mailman/listinfo/ros-dev
>
>
> ------------------------------------------------------------------------
>
> _______________________________________________
> Ros-dev mailing list
> Ros-dev at reactos.org
> http://www.reactos.org/mailman/listinfo/ros-dev
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://www.reactos.org/pipermail/ros-dev/attachments/20090803/26e38f0a/attachment.htm
More information about the Ros-dev
mailing list