--- trunk/xvidcore/src/image/x86_asm/postprocessing_sse2.asm 2004/07/15 10:09:30 1493 +++ trunk/xvidcore/src/image/x86_asm/postprocessing_sse2.asm 2008/11/26 01:04:34 1795 @@ -22,114 +22,110 @@ ; * ; *************************************************************************/ -BITS 32 - -%macro cglobal 1 - %ifdef PREFIX - global _%1 - %define %1 _%1 - %else - global %1 - %endif -%endmacro - -%macro FILLBYTES 2 - - mov [%1], %2 - mov [%1 + 1], %2 - mov [%1 + 2], %2 - mov [%1 + 3], %2 - mov [%1 + 4], %2 - mov [%1 + 5], %2 - mov [%1 + 6], %2 - mov [%1 + 7], %2 - mov [%1 + 8], %2 - mov [%1 + 9], %2 - mov [%1 + 10], %2 - mov [%1 + 11], %2 - mov [%1 + 12], %2 - mov [%1 + 13], %2 - mov [%1 + 14], %2 - mov [%1 + 15], %2 - -%endmacro - +%include "nasm.inc" ;=========================================================================== ; read only data ;=========================================================================== -%ifdef FORMAT_COFF -SECTION .rodata data -%else -SECTION .rodata data align=16 -%endif +DATA xmm_0x80: times 16 db 0x80 -offset_xmm: - times 16 db 0x00 - ;============================================================================= ; Code ;============================================================================= -SECTION .text +SECTION .rotext align=SECTION_ALIGN cglobal image_brightness_sse2 - ;////////////////////////////////////////////////////////////////////// ;// image_brightness_sse2 ;////////////////////////////////////////////////////////////////////// -align 16 -image_brightness_sse2: - - push esi - push edi - - movdqa xmm6, [xmm_0x80] - - mov eax, [esp+8+20] ; offset - - FILLBYTES offset_xmm, al - - movdqa xmm7, [offset_xmm] - - mov edx, [esp+8+4] ; Dst - mov ecx, [esp+8+8] ; stride - mov esi, [esp+8+12] ; width - mov edi, [esp+8+16] ; height - -.yloop - xor eax, eax +%macro CREATE_OFFSET_VECTOR 2 + mov [%1 + 0], %2 + mov [%1 + 1], %2 + mov [%1 + 2], %2 + mov [%1 + 3], %2 + mov [%1 + 4], %2 + mov [%1 + 5], %2 + mov [%1 + 6], %2 + mov [%1 + 7], %2 + mov [%1 + 8], %2 + mov [%1 + 9], %2 + mov [%1 + 10], %2 + mov [%1 + 11], %2 + mov [%1 + 12], %2 + mov [%1 + 13], %2 + mov [%1 + 14], %2 + mov [%1 + 15], %2 +%endmacro -.xloop - movdqa xmm0, [edx + eax] - movdqa xmm1, [edx + eax + 16] ; xmm0 = [dst] +ALIGN SECTION_ALIGN +image_brightness_sse2: - paddb xmm0, xmm6 ; unsigned -> signed domain - paddb xmm1, xmm6 - paddsb xmm0, xmm7 - paddsb xmm1, xmm7 ; xmm0 += offset - psubb xmm0, xmm6 - psubb xmm1, xmm6 ; signed -> unsigned domain + mov eax, prm5d ; brightness offset value + mov TMP1, prm1 ; Dst + mov TMP0, prm2 ; stride + + push _ESI + push _EDI ; 8 bytes offset for push + sub _ESP, 32 ; 32 bytes for local data (16bytes will be used, 16bytes more to align correctly mod 16) + + movdqa xmm6, [xmm_0x80] + + ; Create a offset...offset vector + mov TMP1, _ESP ; TMP1 will be esp aligned mod 16 + add TMP1, 15 ; TMP1 = esp + 15 + and TMP1, ~15 ; TMP1 = (esp + 15)&(~15) + CREATE_OFFSET_VECTOR TMP1, al + movdqa xmm7, [TMP1] + +%ifdef ARCH_IS_X86_64 + mov _ESI, prm3 + mov _EDI, prm4 +%else + mov _ESI, [_ESP+8+32+12] ; width + mov _EDI, [_ESP+8+32+16] ; height +%endif - movdqa [edx + eax], xmm0 - movdqa [edx + eax + 16], xmm1 ; [dst] = xmm0 +.yloop: + xor _EAX, _EAX - add eax,32 - cmp eax,esi - jl .xloop +.xloop: + movdqa xmm0, [TMP1 + _EAX] + movdqa xmm1, [TMP1 + _EAX + 16] ; xmm0 = [dst] + + paddb xmm0, xmm6 ; unsigned -> signed domain + paddb xmm1, xmm6 + paddsb xmm0, xmm7 + paddsb xmm1, xmm7 ; xmm0 += offset + psubb xmm0, xmm6 + psubb xmm1, xmm6 ; signed -> unsigned domain + + movdqa [TMP1 + _EAX], xmm0 + movdqa [TMP1 + _EAX + 16], xmm1 ; [dst] = xmm0 + + add _EAX,32 + cmp _EAX,_ESI + jl .xloop + + add TMP1, TMP0 ; dst += stride + sub _EDI, 1 + jg .yloop + + add _ESP, 32 + pop _EDI + pop _ESI - add edx, ecx ; dst += stride - sub edi, 1 - jg .yloop + ret +ENDFUNC +;////////////////////////////////////////////////////////////////////// - pop edi - pop esi +%ifidn __OUTPUT_FORMAT__,elf +section ".note.GNU-stack" noalloc noexec nowrite progbits +%endif - ret -;//////////////////////////////////////////////////////////////////////