[ros-dev] [ros-diffs] [tkreuzer] 42353: asm version of DIB_32BPP_ColorFill: - Add frame pointer - Get rid of algin_draw, 32bpp surfaces must be DWORD aligned - Optimize the loop - Add comments
Aleksey Bragin
aleksey at reactos.org
Mon Aug 3 10:11:28 CEST 2009
"in a few lines" - and what if about using the same algorithm you
used in this assembly, but without pretending to be compiler?
WBR,
Aleksey.
On Aug 3, 2009, at 7:31 AM, Timo Kreuzer wrote:
> I hereby challenge you to provide portable C code, that - compiled
> with gcc - is faster than this assembly code.
> Should be done in a few lines.
>
> I bet my ass on it: You will fail! No matter what optimization you
> choose.
> You would also fail with msvc or Intel compiler.
>
> Regards,
> Timo
>
> Alex Ionescu wrote:
>> The version that GCC 4.4 and CL 15 will generate would be way more
>> optimized
>> than this unportable/slower assembly code.
>> This isn't 1994 anymore. You can't beat the compiler anymore.
>>
>> Best regards,
>> Alex Ionescu
>>
>>
>> On Sun, Aug 2, 2009 at 3:31 PM, <tkreuzer at svn.reactos.org> wrote:
>>
>>
>>> Author: tkreuzer
>>> Date: Mon Aug 3 00:31:29 2009
>>> New Revision: 42353
>>>
>>> URL: http://svn.reactos.org/svn/reactos?rev=42353&view=rev
>>> Log:
>>> asm version of DIB_32BPP_ColorFill:
>>> - Add frame pointer
>>> - Get rid of algin_draw, 32bpp surfaces must be DWORD aligned
>>> - Optimize the loop
>>> - Add comments
>>>
>>> Modified:
>>> trunk/reactos/subsystems/win32/win32k/dib/i386/
>>> dib32bpp_colorfill.s
>>>
>>> Modified:
>>> trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
>>> URL:
>>> http://svn.reactos.org/svn/reactos/trunk/reactos/subsystems/win32/
>>> win32k/dib/i386/dib32bpp_colorfill.s?
>>> rev=42353&r1=42352&r2=42353&view=diff
>>>
>>> ====================================================================
>>> ==========
>>> --- trunk/reactos/subsystems/win32/win32k/dib/i386/
>>> dib32bpp_colorfill.s
>>> [iso-8859-1] (original)
>>> +++ trunk/reactos/subsystems/win32/win32k/dib/i386/
>>> dib32bpp_colorfill.s
>>> [iso-8859-1] Mon Aug 3 00:31:29 2009
>>> @@ -4,78 +4,62 @@
>>> * FILE: subsystems/win32/win32k/dib/i386/
>>> dib32bpp_colorfill.c
>>> * PURPOSE: ASM optimised 32bpp ColorFill
>>> * PROGRAMMERS: Magnus Olsen
>>> + * Timo Kreuzer (timo.kreuzer at rectos.org)
>>> */
>>>
>>> - .globl _DIB_32BPP_ColorFill
>>> - .intel_syntax noprefix
>>> +.intel_syntax noprefix
>>>
>>> - .def _DIB_32BPP_ColorFill;
>>> - .scl 2;
>>> - .type 32;
>>> - .endef
>>> -
>>> - _DIB_32BPP_ColorFill:
>>> - sub esp, 24
>>> - mov ecx, [esp+32]
>>> - mov [esp+8], ebx
>>> - mov ebx, [esp+28]
>>> - mov [esp+20], ebp
>>> - mov ebp, [esp+36]
>>> - mov [esp+12], esi
>>> - mov [esp+16], edi
>>> - mov edi, [ecx]
>>> - mov esi, [ecx+8]
>>> - mov edx, [ebx+36]
>>> - sub esi, edi
>>> - mov edi, [ecx+4]
>>> - mov eax, edi
>>> - imul eax, edx
>>> - add eax, [ebx+32]
>>> - mov ebx, [ecx]
>>> - lea eax, [eax+ebx*4]
>>> - mov [esp+4], eax
>>> - mov eax, [ecx+12]
>>> - cmp eax, edi
>>> - jbe end
>>> - sub eax, edi
>>> - mov [esp], eax
>>> - lea esi, [esi+0]
>>> +/*
>>> + * BOOLEAN
>>> + * _cdecl
>>> + * DIB_32BPP_ColorFill(SURFOBJ* pso, RECTL* prcl, ULONG iColor);
>>> +*/
>>>
>>> - for_loop:
>>> - mov eax, ebp
>>> - cld
>>> - mov ebx, esi
>>> - mov edi, [esp+4]
>>> - test edi, 3
>>> - jnz algin_draw
>>> - mov ecx, esi
>>> - rep stosd
>>> - add [esp+4], edx
>>> - dec dword ptr [esp]
>>> - jnz for_loop
>>> - end:
>>> - mov ebx, [esp+8]
>>> - mov eax, 1
>>> - mov esi, [esp+12]
>>> - mov edi, [esp+16]
>>> - mov ebp, [esp+20]
>>> - add esp, 24
>>> - ret
>>> +.globl _DIB_32BPP_ColorFill
>>> +_DIB_32BPP_ColorFill:
>>> + push ebp
>>> + mov ebp, esp
>>> + push ebx
>>> + push esi
>>> + push edi
>>> + sub esp, 4 /* Space for lDelta */
>>>
>>> - algin_draw:
>>> - stosd
>>> - dec ebx
>>> - mov ecx, ebx
>>> - rol eax, 16
>>> - stosd
>>> - add [esp+4], edx
>>> - dec dword ptr [esp]
>>> - jnz for_loop
>>> + mov edx, [ebp+12] /* edx = prcl */
>>> + mov ecx, [ebp+8] /* ecx = pso */
>>>
>>> - mov ebx, [esp+8]
>>> - mov eax, 1
>>> - mov esi, [esp+12]
>>> - mov edi, [esp+16]
>>> - mov ebp, [esp+20]
>>> - add esp, 24
>>> - ret
>>> + mov ebx, [ecx+0x24] /* ebx = pso->lDelta; */
>>> + mov [esp], ebx /* lDelta = pso->lDelta; */
>>> + mov edi, [edx+4] /* edi = prcl->top; */
>>> + mov eax, edi /* eax = prcl->top; */
>>> + imul eax, ebx /* eax = prcl->top * pso-
>>> >lDelta; */
>>> + add eax, [ecx+0x20] /* eax += pso->pvScan0; */
>>> + mov ebx, [edx] /* ebx = prcl->left; */
>>> + lea esi, [eax+ebx*4] /* esi = pvLine0 = eax + 4 *
>>> prcl->left;
>>> */
>>> +
>>> + mov ebx, [edx+8] /* ebx = prcl->right; */
>>> + sub ebx, [edx] /* ebx = prcl->right - prcl-
>>> >left; */
>>> + jbe end /* if (ebx <= 0) goto end; */
>>> +
>>> + mov edx, [edx+12] /* edx = prcl->bottom; */
>>> + sub edx, edi /* edx -= prcl->top; */
>>> + jbe end /* if (eax <= 0) goto end; */
>>> +
>>> + mov eax, [ebp+16] /* eax = iColor; */
>>> + cld
>>> +
>>> +for_loop: /* do { */
>>> + mov edi, esi /* edi = pvLine0; */
>>> + mov ecx, ebx /* ecx = cx; */
>>> + rep stosd /* memset(pvLine0, iColor,
>>> cx); */
>>> + add esi, [esp] /* pvLine0 += lDelta; */
>>> + dec edx /* cy--; */
>>> + jnz for_loop /* } while (cy > 0); */
>>> +
>>> +end:
>>> + mov eax, 1
>>> + add esp, 4
>>> + pop edi
>>> + pop esi
>>> + pop ebx
>>> + pop ebp
>>> + ret
>>>
>>>
>>>
>>>
>>
>> _______________________________________________
>> Ros-dev mailing list
>> Ros-dev at reactos.org
>> http://www.reactos.org/mailman/listinfo/ros-dev
>
> _______________________________________________
> Ros-dev mailing list
> Ros-dev at reactos.org
> http://www.reactos.org/mailman/listinfo/ros-dev
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://www.reactos.org/pipermail/ros-dev/attachments/20090803/f5d95244/attachment-0001.htm
More information about the Ros-dev
mailing list