19 |
; * along with this program; if not, write to the Free Software |
; * along with this program; if not, write to the Free Software |
20 |
; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
21 |
; * |
; * |
22 |
; * $Id: colorspace_yuv_mmx.asm,v 1.9 2008-11-26 01:04:34 Isibaar Exp $ |
; * $Id: colorspace_yuv_mmx.asm,v 1.14 2008-12-04 18:30:36 Isibaar Exp $ |
23 |
; * |
; * |
24 |
; ***************************************************************************/ |
; ***************************************************************************/ |
25 |
|
|
29 |
; Helper macros |
; Helper macros |
30 |
;============================================================================= |
;============================================================================= |
31 |
|
|
32 |
|
%macro _MOVQ 3 |
33 |
|
%if %1 == 1 |
34 |
|
movntq %2, %3 ; xmm |
35 |
|
%else |
36 |
|
movq %2, %3 ; plain mmx |
37 |
|
%endif |
38 |
|
%endmacro |
39 |
|
|
40 |
;------------------------------------------------------------------------------ |
;------------------------------------------------------------------------------ |
41 |
; PLANE_COPY ( DST, DST_STRIDE, SRC, SRC_STRIDE, WIDTH, HEIGHT, OPT ) |
; PLANE_COPY ( DST, DST_STRIDE, SRC, SRC_STRIDE, WIDTH, HEIGHT, OPT ) |
42 |
; DST dst buffer |
; DST dst buffer |
89 |
movq mm7, [SRC + 48] |
movq mm7, [SRC + 48] |
90 |
movq mm0, [SRC + 56] |
movq mm0, [SRC + 56] |
91 |
|
|
92 |
%if OPT == 0 ; plain mmx |
_MOVQ OPT, [DST ], mm1 ; write to y_out |
93 |
movq [DST ], mm1 ; write to y_out |
_MOVQ OPT, [DST + 8], mm2 |
94 |
movq [DST + 8], mm2 |
_MOVQ OPT, [DST + 16], mm3 |
95 |
movq [DST + 16], mm3 |
_MOVQ OPT, [DST + 24], mm4 |
96 |
movq [DST + 24], mm4 |
_MOVQ OPT, [DST + 32], mm5 |
97 |
movq [DST + 32], mm5 |
_MOVQ OPT, [DST + 40], mm6 |
98 |
movq [DST + 40], mm6 |
_MOVQ OPT, [DST + 48], mm7 |
99 |
movq [DST + 48], mm7 |
_MOVQ OPT, [DST + 56], mm0 |
|
movq [DST + 56], mm0 |
|
|
%else |
|
|
movntq [DST ], mm1 ; write to y_out |
|
|
movntq [DST + 8], mm2 |
|
|
movntq [DST + 16], mm3 |
|
|
movntq [DST + 24], mm4 |
|
|
movntq [DST + 32], mm5 |
|
|
movntq [DST + 40], mm6 |
|
|
movntq [DST + 48], mm7 |
|
|
movntq [DST + 56], mm0 |
|
|
%endif |
|
100 |
|
|
101 |
add SRC, 64 |
add SRC, 64 |
102 |
add DST, 64 |
add DST, 64 |
110 |
%%loop16_pc: |
%%loop16_pc: |
111 |
movq mm1, [SRC] |
movq mm1, [SRC] |
112 |
movq mm2, [SRC + 8] |
movq mm2, [SRC + 8] |
113 |
%if OPT == 0 ; plain mmx |
|
114 |
movq [DST], mm1 |
_MOVQ OPT, [DST], mm1 |
115 |
movq [DST + 8], mm2 |
_MOVQ OPT, [DST + 8], mm2 |
|
%else |
|
|
movntq [DST], mm1 |
|
|
movntq [DST + 8], mm2 |
|
|
%endif |
|
116 |
|
|
117 |
add SRC, 16 |
add SRC, 16 |
118 |
add DST, 16 |
add DST, 16 |
126 |
pop DST |
pop DST |
127 |
|
|
128 |
%ifdef ARCH_IS_X86_64 |
%ifdef ARCH_IS_X86_64 |
129 |
movsx _ECX, SRC_STRIDE |
XVID_MOVSXD _ECX, SRC_STRIDE |
130 |
add SRC, _ECX |
add SRC, _ECX |
131 |
mov ecx, DST_STRIDE |
mov ecx, DST_STRIDE |
132 |
add DST, _ECX |
add DST, _ECX |
134 |
add SRC, SRC_STRIDE |
add SRC, SRC_STRIDE |
135 |
add DST, DST_STRIDE |
add DST, DST_STRIDE |
136 |
%endif |
%endif |
137 |
|
|
138 |
dec HEIGHT |
dec HEIGHT |
139 |
jg near %%loop64_start_pc |
jg near %%loop64_start_pc |
140 |
|
|
183 |
|
|
184 |
%%loop64_pf: |
%%loop64_pf: |
185 |
|
|
186 |
%if OPT == 0 ; plain mmx |
_MOVQ OPT, [DST ], mm0 ; write to y_out |
187 |
movq [DST ], mm0 ; write to y_out |
_MOVQ OPT, [DST + 8], mm0 |
188 |
movq [DST + 8], mm0 |
_MOVQ OPT, [DST + 16], mm0 |
189 |
movq [DST + 16], mm0 |
_MOVQ OPT, [DST + 24], mm0 |
190 |
movq [DST + 24], mm0 |
_MOVQ OPT, [DST + 32], mm0 |
191 |
movq [DST + 32], mm0 |
_MOVQ OPT, [DST + 40], mm0 |
192 |
movq [DST + 40], mm0 |
_MOVQ OPT, [DST + 48], mm0 |
193 |
movq [DST + 48], mm0 |
_MOVQ OPT, [DST + 56], mm0 |
|
movq [DST + 56], mm0 |
|
|
%else |
|
|
movntq [DST ], mm0 ; write to y_out |
|
|
movntq [DST + 8], mm0 |
|
|
movntq [DST + 16], mm0 |
|
|
movntq [DST + 24], mm0 |
|
|
movntq [DST + 32], mm0 |
|
|
movntq [DST + 40], mm0 |
|
|
movntq [DST + 48], mm0 |
|
|
movntq [DST + 56], mm0 |
|
|
%endif |
|
194 |
|
|
195 |
add DST, 64 |
add DST, 64 |
196 |
loop %%loop64_pf |
loop %%loop64_pf |
201 |
jz %%loop1_start_pf |
jz %%loop1_start_pf |
202 |
|
|
203 |
%%loop16_pf: |
%%loop16_pf: |
204 |
%if OPT == 0 ; plain mmx |
_MOVQ OPT, [DST ], mm0 |
205 |
movq [DST ], mm0 |
_MOVQ OPT, [DST + 8], mm0 |
|
movq [DST + 8], mm0 |
|
|
%else |
|
|
movntq [DST ], mm0 |
|
|
movntq [DST + 8], mm0 |
|
|
%endif |
|
206 |
|
|
207 |
add DST, 16 |
add DST, 16 |
208 |
loop %%loop16_pf |
loop %%loop16_pf |
259 |
%define shadow 0 |
%define shadow 0 |
260 |
%else |
%else |
261 |
%define pushsize 4*PTR_SIZE |
%define pushsize 4*PTR_SIZE |
262 |
%define shadow 32 + 16 |
%define shadow 32 + 2*PTR_SIZE |
263 |
%endif |
%endif |
264 |
|
|
265 |
%define prm_vflip dword [_ESP + localsize + pushsize + shadow + 7*PTR_SIZE] |
%define prm_vflip dword [_ESP + localsize + pushsize + shadow + 7*PTR_SIZE] |
287 |
mov _EDI, prm1 |
mov _EDI, prm1 |
288 |
mov TMP1, prm2 |
mov TMP1, prm2 |
289 |
|
|
290 |
mov _ESI, [_ESP + localsize + pushsize + shadow - 1*PTR_SIZE] |
mov _ESI, [_ESP + localsize + pushsize + shadow + 0*PTR_SIZE] |
291 |
mov TMP0d, dword [_ESP + localsize + pushsize + shadow - 2*PTR_SIZE] |
mov TMP0d, dword [_ESP + localsize + pushsize + shadow - 1*PTR_SIZE] |
292 |
|
|
293 |
%else |
%else |
294 |
push _EBP ; _ESP + localsize + 0*PTR_SIZE |
push _EBP ; _ESP + localsize + 0*PTR_SIZE |
445 |
; Code |
; Code |
446 |
;============================================================================= |
;============================================================================= |
447 |
|
|
448 |
SECTION .rotext align=SECTION_ALIGN |
TEXT |
449 |
|
|
450 |
MAKE_YV12_TO_YV12 yv12_to_yv12_mmx, 0 |
MAKE_YV12_TO_YV12 yv12_to_yv12_mmx, 0 |
451 |
|
|