37 |
; Code |
; Code |
38 |
;============================================================================= |
;============================================================================= |
39 |
|
|
40 |
SECTION .rotext align=SECTION_ALIGN |
TEXT |
41 |
|
|
42 |
cglobal image_brightness_sse2 |
cglobal image_brightness_sse2 |
43 |
|
|
66 |
|
|
67 |
ALIGN SECTION_ALIGN |
ALIGN SECTION_ALIGN |
68 |
image_brightness_sse2: |
image_brightness_sse2: |
|
|
|
69 |
%ifdef ARCH_IS_X86_64 |
%ifdef ARCH_IS_X86_64 |
70 |
movsx _EAX, prm5d |
XVID_MOVSX _EAX, prm5d |
71 |
%else |
%else |
72 |
mov eax, prm5 ; brightness offset value |
mov eax, prm5 ; brightness offset value |
73 |
%endif |
%endif |
78 |
push _EDI ; 8 bytes offset for push |
push _EDI ; 8 bytes offset for push |
79 |
sub _ESP, 32 ; 32 bytes for local data (16bytes will be used, 16bytes more to align correctly mod 16) |
sub _ESP, 32 ; 32 bytes for local data (16bytes will be used, 16bytes more to align correctly mod 16) |
80 |
|
|
81 |
movdqa xmm6, [xmm_0x80] |
movdqa xmm2, [xmm_0x80] |
82 |
|
|
83 |
; Create a offset...offset vector |
; Create a offset...offset vector |
84 |
mov _ESI, _ESP ; TMP1 will be esp aligned mod 16 |
mov _ESI, _ESP ; TMP1 will be esp aligned mod 16 |
85 |
add _ESI, 15 ; TMP1 = esp + 15 |
add _ESI, 15 ; TMP1 = esp + 15 |
86 |
and _ESI, ~15 ; TMP1 = (esp + 15)&(~15) |
and _ESI, ~15 ; TMP1 = (esp + 15)&(~15) |
87 |
CREATE_OFFSET_VECTOR _ESI, al |
CREATE_OFFSET_VECTOR _ESI, al |
88 |
movdqa xmm7, [_ESI] |
movdqa xmm3, [_ESI] |
89 |
|
|
90 |
%ifdef ARCH_IS_X86_64 |
%ifdef ARCH_IS_X86_64 |
91 |
mov _ESI, prm3 |
mov _ESI, prm3 |
102 |
movdqa xmm0, [TMP1 + _EAX] |
movdqa xmm0, [TMP1 + _EAX] |
103 |
movdqa xmm1, [TMP1 + _EAX + 16] ; xmm0 = [dst] |
movdqa xmm1, [TMP1 + _EAX + 16] ; xmm0 = [dst] |
104 |
|
|
105 |
paddb xmm0, xmm6 ; unsigned -> signed domain |
paddb xmm0, xmm2 ; unsigned -> signed domain |
106 |
paddb xmm1, xmm6 |
paddb xmm1, xmm2 |
107 |
paddsb xmm0, xmm7 |
paddsb xmm0, xmm3 |
108 |
paddsb xmm1, xmm7 ; xmm0 += offset |
paddsb xmm1, xmm3 ; xmm0 += offset |
109 |
psubb xmm0, xmm6 |
psubb xmm0, xmm2 |
110 |
psubb xmm1, xmm6 ; signed -> unsigned domain |
psubb xmm1, xmm2 ; signed -> unsigned domain |
111 |
|
|
112 |
movdqa [TMP1 + _EAX], xmm0 |
movdqa [TMP1 + _EAX], xmm0 |
113 |
movdqa [TMP1 + _EAX + 16], xmm1 ; [dst] = xmm0 |
movdqa [TMP1 + _EAX + 16], xmm1 ; [dst] = xmm0 |
117 |
jl .xloop |
jl .xloop |
118 |
|
|
119 |
add TMP1, TMP0 ; dst += stride |
add TMP1, TMP0 ; dst += stride |
120 |
sub _EDI, 1 |
dec _EDI |
121 |
jg .yloop |
jg .yloop |
122 |
|
|
123 |
add _ESP, 32 |
add _ESP, 32 |