[ros-dev] [ros-diffs] [tkreuzer] 42353: asm version of DIB_32BPP_ColorFill: - Add frame pointer - Get rid of algin_draw, 32bpp surfaces must be DWORD aligned - Optimize the loop - Add comments

Aleksey Bragin aleksey at reactos.org
Mon Aug 3 10:11:28 CEST 2009


"in a few lines" - and what if about using the same algorithm you  
used in this assembly, but without pretending to be compiler?


WBR,
Aleksey.

On Aug 3, 2009, at 7:31 AM, Timo Kreuzer wrote:

> I hereby challenge you to provide portable C code, that - compiled  
> with gcc - is faster than this assembly code.
> Should be done in a few lines.
>
> I bet my ass on it: You will fail! No matter what optimization you  
> choose.
> You would also fail with msvc or Intel compiler.
>
> Regards,
> Timo
>
> Alex Ionescu wrote:
>> The version that GCC 4.4 and CL 15 will generate would be way more  
>> optimized
>> than this unportable/slower assembly code.
>> This isn't 1994 anymore. You can't beat the compiler anymore.
>>
>> Best regards,
>> Alex Ionescu
>>
>>
>> On Sun, Aug 2, 2009 at 3:31 PM, <tkreuzer at svn.reactos.org> wrote:
>>
>>
>>> Author: tkreuzer
>>> Date: Mon Aug  3 00:31:29 2009
>>> New Revision: 42353
>>>
>>> URL: http://svn.reactos.org/svn/reactos?rev=42353&view=rev
>>> Log:
>>> asm version of DIB_32BPP_ColorFill:
>>> - Add frame pointer
>>> - Get rid of algin_draw, 32bpp surfaces must be DWORD aligned
>>> - Optimize the loop
>>> - Add comments
>>>
>>> Modified:
>>>    trunk/reactos/subsystems/win32/win32k/dib/i386/ 
>>> dib32bpp_colorfill.s
>>>
>>> Modified:
>>> trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
>>> URL:
>>> http://svn.reactos.org/svn/reactos/trunk/reactos/subsystems/win32/ 
>>> win32k/dib/i386/dib32bpp_colorfill.s? 
>>> rev=42353&r1=42352&r2=42353&view=diff
>>>
>>> ==================================================================== 
>>> ==========
>>> --- trunk/reactos/subsystems/win32/win32k/dib/i386/ 
>>> dib32bpp_colorfill.s
>>> [iso-8859-1] (original)
>>> +++ trunk/reactos/subsystems/win32/win32k/dib/i386/ 
>>> dib32bpp_colorfill.s
>>> [iso-8859-1] Mon Aug  3 00:31:29 2009
>>> @@ -4,78 +4,62 @@
>>>  * FILE:            subsystems/win32/win32k/dib/i386/ 
>>> dib32bpp_colorfill.c
>>>  * PURPOSE:         ASM optimised 32bpp ColorFill
>>>  * PROGRAMMERS:     Magnus Olsen
>>> + *                  Timo Kreuzer (timo.kreuzer at rectos.org)
>>>  */
>>>
>>> -  .globl _DIB_32BPP_ColorFill
>>> -  .intel_syntax noprefix
>>> +.intel_syntax noprefix
>>>
>>> -  .def   _DIB_32BPP_ColorFill;
>>> -  .scl 2;
>>> -  .type        32;
>>> -  .endef
>>> -
>>> -  _DIB_32BPP_ColorFill:
>>> -                        sub     esp, 24
>>> -                        mov     ecx, [esp+32]
>>> -                        mov     [esp+8], ebx
>>> -                        mov     ebx, [esp+28]
>>> -                        mov     [esp+20], ebp
>>> -                        mov     ebp, [esp+36]
>>> -                        mov     [esp+12], esi
>>> -                        mov     [esp+16], edi
>>> -                        mov     edi, [ecx]
>>> -                        mov     esi, [ecx+8]
>>> -                        mov     edx, [ebx+36]
>>> -                        sub     esi, edi
>>> -                        mov     edi, [ecx+4]
>>> -                        mov     eax, edi
>>> -                        imul    eax, edx
>>> -                        add     eax, [ebx+32]
>>> -                        mov     ebx, [ecx]
>>> -                        lea     eax, [eax+ebx*4]
>>> -                        mov     [esp+4], eax
>>> -                        mov     eax, [ecx+12]
>>> -                        cmp     eax, edi
>>> -                        jbe     end
>>> -                        sub     eax, edi
>>> -                        mov     [esp], eax
>>> -                        lea     esi, [esi+0]
>>> +/*
>>> + * BOOLEAN
>>> + * _cdecl
>>> + * DIB_32BPP_ColorFill(SURFOBJ* pso, RECTL* prcl, ULONG iColor);
>>> +*/
>>>
>>> -               for_loop:
>>> -                        mov     eax, ebp
>>> -                        cld
>>> -                        mov     ebx, esi
>>> -                        mov     edi, [esp+4]
>>> -                        test    edi, 3
>>> -                        jnz     algin_draw
>>> -                        mov     ecx, esi
>>> -                        rep stosd
>>> -                        add     [esp+4], edx
>>> -                        dec     dword ptr [esp]
>>> -                        jnz     for_loop
>>> -               end:
>>> -                        mov     ebx, [esp+8]
>>> -                        mov     eax, 1
>>> -                        mov     esi, [esp+12]
>>> -                        mov     edi, [esp+16]
>>> -                        mov     ebp, [esp+20]
>>> -                        add     esp, 24
>>> -                        ret
>>> +.globl _DIB_32BPP_ColorFill
>>> +_DIB_32BPP_ColorFill:
>>> +        push    ebp
>>> +        mov     ebp, esp
>>> +        push    ebx
>>> +        push    esi
>>> +        push    edi
>>> +        sub     esp, 4            /* Space for lDelta */
>>>
>>> -               algin_draw:
>>> -                        stosd
>>> -                        dec     ebx
>>> -                        mov     ecx, ebx
>>> -                        rol     eax, 16
>>> -                        stosd
>>> -                        add     [esp+4], edx
>>> -                        dec     dword ptr [esp]
>>> -                        jnz     for_loop
>>> +        mov     edx, [ebp+12]     /* edx = prcl */
>>> +        mov     ecx, [ebp+8]      /* ecx = pso */
>>>
>>> -                        mov     ebx, [esp+8]
>>> -                        mov     eax, 1
>>> -                        mov     esi, [esp+12]
>>> -                        mov     edi, [esp+16]
>>> -                        mov     ebp, [esp+20]
>>> -                        add     esp, 24
>>> -                        ret
>>> +        mov     ebx, [ecx+0x24]   /* ebx = pso->lDelta; */
>>> +        mov     [esp], ebx        /* lDelta = pso->lDelta; */
>>> +        mov     edi, [edx+4]      /* edi = prcl->top; */
>>> +        mov     eax, edi          /* eax = prcl->top; */
>>> +        imul    eax, ebx          /* eax = prcl->top * pso- 
>>> >lDelta; */
>>> +        add     eax, [ecx+0x20]   /* eax += pso->pvScan0; */
>>> +        mov     ebx, [edx]        /* ebx = prcl->left; */
>>> +        lea     esi, [eax+ebx*4]  /* esi = pvLine0 = eax + 4 *  
>>> prcl->left;
>>> */
>>> +
>>> +        mov     ebx, [edx+8]      /* ebx = prcl->right; */
>>> +        sub     ebx, [edx]        /* ebx = prcl->right - prcl- 
>>> >left; */
>>> +        jbe     end               /* if (ebx <= 0) goto end; */
>>> +
>>> +        mov     edx, [edx+12]     /* edx = prcl->bottom; */
>>> +        sub     edx, edi          /* edx -= prcl->top; */
>>> +        jbe     end               /* if (eax <= 0) goto end; */
>>> +
>>> +        mov     eax, [ebp+16]     /* eax = iColor; */
>>> +        cld
>>> +
>>> +for_loop:                         /* do { */
>>> +        mov     edi, esi          /*   edi = pvLine0; */
>>> +        mov     ecx, ebx          /*   ecx = cx; */
>>> +        rep stosd                 /*   memset(pvLine0, iColor,  
>>> cx); */
>>> +        add     esi, [esp]        /*   pvLine0 += lDelta; */
>>> +        dec     edx               /*   cy--; */
>>> +        jnz     for_loop          /* } while (cy > 0); */
>>> +
>>> +end:
>>> +        mov     eax, 1
>>> +        add     esp, 4
>>> +        pop     edi
>>> +        pop     esi
>>> +        pop     ebx
>>> +        pop     ebp
>>> +        ret
>>>
>>>
>>>
>>>
>>
>> _______________________________________________
>> Ros-dev mailing list
>> Ros-dev at reactos.org
>> http://www.reactos.org/mailman/listinfo/ros-dev
>
> _______________________________________________
> Ros-dev mailing list
> Ros-dev at reactos.org
> http://www.reactos.org/mailman/listinfo/ros-dev

-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://www.reactos.org/pipermail/ros-dev/attachments/20090803/f5d95244/attachment-0001.htm 


More information about the Ros-dev mailing list