--- trunk/xvidcore/src/image/x86_asm/postprocessing_sse2.asm 2008/11/26 01:04:34 1795 +++ trunk/xvidcore/src/image/x86_asm/postprocessing_sse2.asm 2008/12/04 14:41:50 1844 @@ -37,7 +37,7 @@ ; Code ;============================================================================= -SECTION .rotext align=SECTION_ALIGN +TEXT cglobal image_brightness_sse2 @@ -66,8 +66,11 @@ ALIGN SECTION_ALIGN image_brightness_sse2: - - mov eax, prm5d ; brightness offset value +%ifdef ARCH_IS_X86_64 + XVID_MOVSX _EAX, prm5d +%else + mov eax, prm5 ; brightness offset value +%endif mov TMP1, prm1 ; Dst mov TMP0, prm2 ; stride @@ -75,14 +78,14 @@ push _EDI ; 8 bytes offset for push sub _ESP, 32 ; 32 bytes for local data (16bytes will be used, 16bytes more to align correctly mod 16) - movdqa xmm6, [xmm_0x80] + movdqa xmm2, [xmm_0x80] ; Create a offset...offset vector - mov TMP1, _ESP ; TMP1 will be esp aligned mod 16 - add TMP1, 15 ; TMP1 = esp + 15 - and TMP1, ~15 ; TMP1 = (esp + 15)&(~15) - CREATE_OFFSET_VECTOR TMP1, al - movdqa xmm7, [TMP1] + mov _ESI, _ESP ; TMP1 will be esp aligned mod 16 + add _ESI, 15 ; TMP1 = esp + 15 + and _ESI, ~15 ; TMP1 = (esp + 15)&(~15) + CREATE_OFFSET_VECTOR _ESI, al + movdqa xmm3, [_ESI] %ifdef ARCH_IS_X86_64 mov _ESI, prm3 @@ -99,12 +102,12 @@ movdqa xmm0, [TMP1 + _EAX] movdqa xmm1, [TMP1 + _EAX + 16] ; xmm0 = [dst] - paddb xmm0, xmm6 ; unsigned -> signed domain - paddb xmm1, xmm6 - paddsb xmm0, xmm7 - paddsb xmm1, xmm7 ; xmm0 += offset - psubb xmm0, xmm6 - psubb xmm1, xmm6 ; signed -> unsigned domain + paddb xmm0, xmm2 ; unsigned -> signed domain + paddb xmm1, xmm2 + paddsb xmm0, xmm3 + paddsb xmm1, xmm3 ; xmm0 += offset + psubb xmm0, xmm2 + psubb xmm1, xmm2 ; signed -> unsigned domain movdqa [TMP1 + _EAX], xmm0 movdqa [TMP1 + _EAX + 16], xmm1 ; [dst] = xmm0 @@ -114,7 +117,7 @@ jl .xloop add TMP1, TMP0 ; dst += stride - sub _EDI, 1 + dec _EDI jg .yloop add _ESP, 32