[ros-dev] [ros-diffs] [tkreuzer] 42353: asm version of DIB_32BPP_ColorFill: - Add frame pointer - Get rid of algin_draw, 32bpp surfaces must be DWORD aligned - Optimize the loop - Add comments
Alexander Potashev
aspotashev at gmail.com
Tue Aug 4 05:37:24 CEST 2009
Hey, relax, guys!
Btw, 'memset' can only fill a chunk of memory with identical bytes,
thus it can't fill an array of DWORDs.
2009/8/3 Timo Kreuzer <timo.kreuzer at web.de>:
> That would be a few lines, wouldn't it?
> Ok, let me do the work for you.
> And now compile and show me how the loop would be optimized anywhere near
> the asm code.
> Or can you do better?
>
> BOOLEAN
> DIB_32BPP_ColorFill(SURFOBJ* pso, RECTL* prcl, ULONG iColor);
> {
> ULONG lDelta, cx, cy;
> ULONG pulLine;
>
> lDelta = pso->lDelta;
> pulLine= (PULONG)((PCHAR)pso->pvScan0 + prcl->top * lDelta + prcl->left
> * 4);
>
> cx = prcl->right - prcl->left;
> if (cx <= 0)
> return TRUE;
>
> cy = prcl->bottom - prcl->top;
> if (cy <= 0)
> return TRUE;
>
> do
> {
> memset(pulLine, iColor, cx);
> pulLine += lDelta / 4;
> cy--;
> } while (cy > 0);
>
> return TRUE;
> }
>
>
> Aleksey Bragin schrieb:
>
> "in a few lines" - and what if about using the same algorithm you used in
> this assembly, but without pretending to be compiler?
>
>
> WBR,
> Aleksey.
>
> On Aug 3, 2009, at 7:31 AM, Timo Kreuzer wrote:
>
> I hereby challenge you to provide portable C code, that - compiled with gcc
> - is faster than this assembly code.
> Should be done in a few lines.
>
> I bet my ass on it: You will fail! No matter what optimization you choose.
> You would also fail with msvc or Intel compiler.
>
> Regards,
> Timo
>
> Alex Ionescu wrote:
>
> The version that GCC 4.4 and CL 15 will generate would be way more optimized
> than this unportable/slower assembly code.
> This isn't 1994 anymore. You can't beat the compiler anymore.
>
> Best regards,
> Alex Ionescu
>
>
> On Sun, Aug 2, 2009 at 3:31 PM, <tkreuzer at svn.reactos.org> wrote:
>
>
> Author: tkreuzer
> Date: Mon Aug 3 00:31:29 2009
> New Revision: 42353
>
> URL: http://svn.reactos.org/svn/reactos?rev=42353&view=rev
> Log:
> asm version of DIB_32BPP_ColorFill:
> - Add frame pointer
> - Get rid of algin_draw, 32bpp surfaces must be DWORD aligned
> - Optimize the loop
> - Add comments
>
> Modified:
> trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
>
> Modified:
> trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
> URL:
> http://svn.reactos.org/svn/reactos/trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s?rev=42353&r1=42352&r2=42353&view=diff
>
> ==============================================================================
> --- trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
> [iso-8859-1] (original)
> +++ trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
> [iso-8859-1] Mon Aug 3 00:31:29 2009
> @@ -4,78 +4,62 @@
> * FILE: subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.c
> * PURPOSE: ASM optimised 32bpp ColorFill
> * PROGRAMMERS: Magnus Olsen
> + * Timo Kreuzer (timo.kreuzer at rectos.org)
> */
>
> - .globl _DIB_32BPP_ColorFill
> - .intel_syntax noprefix
> +.intel_syntax noprefix
>
> - .def _DIB_32BPP_ColorFill;
> - .scl 2;
> - .type 32;
> - .endef
> -
> - _DIB_32BPP_ColorFill:
> - sub esp, 24
> - mov ecx, [esp+32]
> - mov [esp+8], ebx
> - mov ebx, [esp+28]
> - mov [esp+20], ebp
> - mov ebp, [esp+36]
> - mov [esp+12], esi
> - mov [esp+16], edi
> - mov edi, [ecx]
> - mov esi, [ecx+8]
> - mov edx, [ebx+36]
> - sub esi, edi
> - mov edi, [ecx+4]
> - mov eax, edi
> - imul eax, edx
> - add eax, [ebx+32]
> - mov ebx, [ecx]
> - lea eax, [eax+ebx*4]
> - mov [esp+4], eax
> - mov eax, [ecx+12]
> - cmp eax, edi
> - jbe end
> - sub eax, edi
> - mov [esp], eax
> - lea esi, [esi+0]
> +/*
> + * BOOLEAN
> + * _cdecl
> + * DIB_32BPP_ColorFill(SURFOBJ* pso, RECTL* prcl, ULONG iColor);
> +*/
>
> - for_loop:
> - mov eax, ebp
> - cld
> - mov ebx, esi
> - mov edi, [esp+4]
> - test edi, 3
> - jnz algin_draw
> - mov ecx, esi
> - rep stosd
> - add [esp+4], edx
> - dec dword ptr [esp]
> - jnz for_loop
> - end:
> - mov ebx, [esp+8]
> - mov eax, 1
> - mov esi, [esp+12]
> - mov edi, [esp+16]
> - mov ebp, [esp+20]
> - add esp, 24
> - ret
> +.globl _DIB_32BPP_ColorFill
> +_DIB_32BPP_ColorFill:
> + push ebp
> + mov ebp, esp
> + push ebx
> + push esi
> + push edi
> + sub esp, 4 /* Space for lDelta */
>
> - algin_draw:
> - stosd
> - dec ebx
> - mov ecx, ebx
> - rol eax, 16
> - stosd
> - add [esp+4], edx
> - dec dword ptr [esp]
> - jnz for_loop
> + mov edx, [ebp+12] /* edx = prcl */
> + mov ecx, [ebp+8] /* ecx = pso */
>
> - mov ebx, [esp+8]
> - mov eax, 1
> - mov esi, [esp+12]
> - mov edi, [esp+16]
> - mov ebp, [esp+20]
> - add esp, 24
> - ret
> + mov ebx, [ecx+0x24] /* ebx = pso->lDelta; */
> + mov [esp], ebx /* lDelta = pso->lDelta; */
> + mov edi, [edx+4] /* edi = prcl->top; */
> + mov eax, edi /* eax = prcl->top; */
> + imul eax, ebx /* eax = prcl->top * pso->lDelta; */
> + add eax, [ecx+0x20] /* eax += pso->pvScan0; */
> + mov ebx, [edx] /* ebx = prcl->left; */
> + lea esi, [eax+ebx*4] /* esi = pvLine0 = eax + 4 * prcl->left;
> */
> +
> + mov ebx, [edx+8] /* ebx = prcl->right; */
> + sub ebx, [edx] /* ebx = prcl->right - prcl->left; */
> + jbe end /* if (ebx <= 0) goto end; */
> +
> + mov edx, [edx+12] /* edx = prcl->bottom; */
> + sub edx, edi /* edx -= prcl->top; */
> + jbe end /* if (eax <= 0) goto end; */
> +
> + mov eax, [ebp+16] /* eax = iColor; */
> + cld
> +
> +for_loop: /* do { */
> + mov edi, esi /* edi = pvLine0; */
> + mov ecx, ebx /* ecx = cx; */
> + rep stosd /* memset(pvLine0, iColor, cx); */
> + add esi, [esp] /* pvLine0 += lDelta; */
> + dec edx /* cy--; */
> + jnz for_loop /* } while (cy > 0); */
> +
> +end:
> + mov eax, 1
> + add esp, 4
> + pop edi
> + pop esi
> + pop ebx
> + pop ebp
> + ret
>
>
>
>
>
> _______________________________________________
> Ros-dev mailing list
> Ros-dev at reactos.org
> http://www.reactos.org/mailman/listinfo/ros-dev
>
> _______________________________________________
> Ros-dev mailing list
> Ros-dev at reactos.org
> http://www.reactos.org/mailman/listinfo/ros-dev
>
>
> ________________________________
> _______________________________________________
> Ros-dev mailing list
> Ros-dev at reactos.org
> http://www.reactos.org/mailman/listinfo/ros-dev
>
> _______________________________________________
> Ros-dev mailing list
> Ros-dev at reactos.org
> http://www.reactos.org/mailman/listinfo/ros-dev
>
More information about the Ros-dev
mailing list