[ros-dev] [ros-diffs] [tkreuzer] 42353: asm version of DIB_32BPP_ColorFill: - Add frame pointer - Get rid of algin_draw, 32bpp surfaces must be DWORD aligned - Optimize the loop - Add comments

Alexander Potashev aspotashev at gmail.com
Tue Aug 4 05:37:24 CEST 2009


Hey, relax, guys!


Btw, 'memset' can only fill a chunk of memory with identical bytes,
thus it can't fill an array of DWORDs.

2009/8/3 Timo Kreuzer <timo.kreuzer at web.de>:
> That would be a few lines, wouldn't it?
> Ok, let me do the work for you.
> And now compile and show me how the loop would be optimized anywhere near
> the asm code.
> Or can you do better?
>
> BOOLEAN
> DIB_32BPP_ColorFill(SURFOBJ* pso, RECTL* prcl, ULONG iColor);
> {
>     ULONG lDelta, cx, cy;
>     ULONG pulLine;
>
>     lDelta = pso->lDelta;
>     pulLine= (PULONG)((PCHAR)pso->pvScan0 + prcl->top * lDelta + prcl->left
> * 4);
>
>     cx = prcl->right - prcl->left;
>     if (cx <= 0)
>         return TRUE;
>
>     cy = prcl->bottom - prcl->top;
>     if (cy <= 0)
>         return TRUE;
>
>     do
>     {
>         memset(pulLine, iColor, cx);
>         pulLine += lDelta / 4;
>         cy--;
>     } while (cy > 0);
>
>     return TRUE;
> }
>
>
> Aleksey Bragin schrieb:
>
> "in a few lines" - and what if about using the same algorithm you used in
> this assembly, but without pretending to be compiler?
>
>
> WBR,
> Aleksey.
>
> On Aug 3, 2009, at 7:31 AM, Timo Kreuzer wrote:
>
> I hereby challenge you to provide portable C code, that - compiled with gcc
> - is faster than this assembly code.
> Should be done in a few lines.
>
> I bet my ass on it: You will fail! No matter what optimization you choose.
> You would also fail with msvc or Intel compiler.
>
> Regards,
> Timo
>
> Alex Ionescu wrote:
>
> The version that GCC 4.4 and CL 15 will generate would be way more optimized
> than this unportable/slower assembly code.
> This isn't 1994 anymore. You can't beat the compiler anymore.
>
> Best regards,
> Alex Ionescu
>
>
> On Sun, Aug 2, 2009 at 3:31 PM, <tkreuzer at svn.reactos.org> wrote:
>
>
> Author: tkreuzer
> Date: Mon Aug  3 00:31:29 2009
> New Revision: 42353
>
> URL: http://svn.reactos.org/svn/reactos?rev=42353&view=rev
> Log:
> asm version of DIB_32BPP_ColorFill:
> - Add frame pointer
> - Get rid of algin_draw, 32bpp surfaces must be DWORD aligned
> - Optimize the loop
> - Add comments
>
> Modified:
>    trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
>
> Modified:
> trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
> URL:
> http://svn.reactos.org/svn/reactos/trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s?rev=42353&r1=42352&r2=42353&view=diff
>
> ==============================================================================
> --- trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
> [iso-8859-1] (original)
> +++ trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
> [iso-8859-1] Mon Aug  3 00:31:29 2009
> @@ -4,78 +4,62 @@
>  * FILE:            subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.c
>  * PURPOSE:         ASM optimised 32bpp ColorFill
>  * PROGRAMMERS:     Magnus Olsen
> + *                  Timo Kreuzer (timo.kreuzer at rectos.org)
>  */
>
> -  .globl _DIB_32BPP_ColorFill
> -  .intel_syntax noprefix
> +.intel_syntax noprefix
>
> -  .def   _DIB_32BPP_ColorFill;
> -  .scl 2;
> -  .type        32;
> -  .endef
> -
> -  _DIB_32BPP_ColorFill:
> -                        sub     esp, 24
> -                        mov     ecx, [esp+32]
> -                        mov     [esp+8], ebx
> -                        mov     ebx, [esp+28]
> -                        mov     [esp+20], ebp
> -                        mov     ebp, [esp+36]
> -                        mov     [esp+12], esi
> -                        mov     [esp+16], edi
> -                        mov     edi, [ecx]
> -                        mov     esi, [ecx+8]
> -                        mov     edx, [ebx+36]
> -                        sub     esi, edi
> -                        mov     edi, [ecx+4]
> -                        mov     eax, edi
> -                        imul    eax, edx
> -                        add     eax, [ebx+32]
> -                        mov     ebx, [ecx]
> -                        lea     eax, [eax+ebx*4]
> -                        mov     [esp+4], eax
> -                        mov     eax, [ecx+12]
> -                        cmp     eax, edi
> -                        jbe     end
> -                        sub     eax, edi
> -                        mov     [esp], eax
> -                        lea     esi, [esi+0]
> +/*
> + * BOOLEAN
> + * _cdecl
> + * DIB_32BPP_ColorFill(SURFOBJ* pso, RECTL* prcl, ULONG iColor);
> +*/
>
> -               for_loop:
> -                        mov     eax, ebp
> -                        cld
> -                        mov     ebx, esi
> -                        mov     edi, [esp+4]
> -                        test    edi, 3
> -                        jnz     algin_draw
> -                        mov     ecx, esi
> -                        rep stosd
> -                        add     [esp+4], edx
> -                        dec     dword ptr [esp]
> -                        jnz     for_loop
> -               end:
> -                        mov     ebx, [esp+8]
> -                        mov     eax, 1
> -                        mov     esi, [esp+12]
> -                        mov     edi, [esp+16]
> -                        mov     ebp, [esp+20]
> -                        add     esp, 24
> -                        ret
> +.globl _DIB_32BPP_ColorFill
> +_DIB_32BPP_ColorFill:
> +        push    ebp
> +        mov     ebp, esp
> +        push    ebx
> +        push    esi
> +        push    edi
> +        sub     esp, 4            /* Space for lDelta */
>
> -               algin_draw:
> -                        stosd
> -                        dec     ebx
> -                        mov     ecx, ebx
> -                        rol     eax, 16
> -                        stosd
> -                        add     [esp+4], edx
> -                        dec     dword ptr [esp]
> -                        jnz     for_loop
> +        mov     edx, [ebp+12]     /* edx = prcl */
> +        mov     ecx, [ebp+8]      /* ecx = pso */
>
> -                        mov     ebx, [esp+8]
> -                        mov     eax, 1
> -                        mov     esi, [esp+12]
> -                        mov     edi, [esp+16]
> -                        mov     ebp, [esp+20]
> -                        add     esp, 24
> -                        ret
> +        mov     ebx, [ecx+0x24]   /* ebx = pso->lDelta; */
> +        mov     [esp], ebx        /* lDelta = pso->lDelta; */
> +        mov     edi, [edx+4]      /* edi = prcl->top; */
> +        mov     eax, edi          /* eax = prcl->top; */
> +        imul    eax, ebx          /* eax = prcl->top * pso->lDelta; */
> +        add     eax, [ecx+0x20]   /* eax += pso->pvScan0; */
> +        mov     ebx, [edx]        /* ebx = prcl->left; */
> +        lea     esi, [eax+ebx*4]  /* esi = pvLine0 = eax + 4 * prcl->left;
> */
> +
> +        mov     ebx, [edx+8]      /* ebx = prcl->right; */
> +        sub     ebx, [edx]        /* ebx = prcl->right - prcl->left; */
> +        jbe     end               /* if (ebx <= 0) goto end; */
> +
> +        mov     edx, [edx+12]     /* edx = prcl->bottom; */
> +        sub     edx, edi          /* edx -= prcl->top; */
> +        jbe     end               /* if (eax <= 0) goto end; */
> +
> +        mov     eax, [ebp+16]     /* eax = iColor; */
> +        cld
> +
> +for_loop:                         /* do { */
> +        mov     edi, esi          /*   edi = pvLine0; */
> +        mov     ecx, ebx          /*   ecx = cx; */
> +        rep stosd                 /*   memset(pvLine0, iColor, cx); */
> +        add     esi, [esp]        /*   pvLine0 += lDelta; */
> +        dec     edx               /*   cy--; */
> +        jnz     for_loop          /* } while (cy > 0); */
> +
> +end:
> +        mov     eax, 1
> +        add     esp, 4
> +        pop     edi
> +        pop     esi
> +        pop     ebx
> +        pop     ebp
> +        ret
>
>
>
>
>
> _______________________________________________
> Ros-dev mailing list
> Ros-dev at reactos.org
> http://www.reactos.org/mailman/listinfo/ros-dev
>
> _______________________________________________
> Ros-dev mailing list
> Ros-dev at reactos.org
> http://www.reactos.org/mailman/listinfo/ros-dev
>
>
> ________________________________
> _______________________________________________
> Ros-dev mailing list
> Ros-dev at reactos.org
> http://www.reactos.org/mailman/listinfo/ros-dev
>
> _______________________________________________
> Ros-dev mailing list
> Ros-dev at reactos.org
> http://www.reactos.org/mailman/listinfo/ros-dev
>


More information about the Ros-dev mailing list