21 |
; * along with this program ; if not, write to the Free Software |
; * along with this program ; if not, write to the Free Software |
22 |
; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
23 |
; * |
; * |
24 |
; * $Id: quantize_h263_mmx.asm,v 1.10 2008-11-26 01:04:34 Isibaar Exp $ |
; * $Id: quantize_h263_mmx.asm,v 1.11.2.4 2009-09-16 17:11:39 Isibaar Exp $ |
25 |
; * |
; * |
26 |
; ****************************************************************************/ |
; ****************************************************************************/ |
27 |
|
|
90 |
; Code |
; Code |
91 |
;============================================================================= |
;============================================================================= |
92 |
|
|
93 |
SECTION .rotext align=SECTION_ALIGN |
TEXT |
94 |
|
|
95 |
cglobal quant_h263_intra_mmx |
cglobal quant_h263_intra_mmx |
96 |
cglobal quant_h263_intra_sse2 |
cglobal quant_h263_intra_sse2 |
128 |
add _EAX,TMP0 ; + (dcscalar/2)*sgn(data[0]) |
add _EAX,TMP0 ; + (dcscalar/2)*sgn(data[0]) |
129 |
|
|
130 |
mov TMP0, prm3 ; quant |
mov TMP0, prm3 ; quant |
|
cdq |
|
|
idiv prm4d ; dcscalar |
|
131 |
lea TMP1, [mmx_div] |
lea TMP1, [mmx_div] |
132 |
movq mm7, [TMP1+TMP0 * 8 - 8] |
movq mm7, [TMP1+TMP0 * 8 - 8] |
133 |
|
%ifdef ARCH_IS_X86_64 |
134 |
|
%ifdef WINDOWS |
135 |
|
mov TMP1, prm2 |
136 |
|
%endif |
137 |
|
%endif |
138 |
|
cdq |
139 |
|
idiv prm4d ; dcscalar |
140 |
|
%ifdef ARCH_IS_X86_64 |
141 |
|
%ifdef WINDOWS |
142 |
|
mov prm2, TMP1 |
143 |
|
%endif |
144 |
|
%endif |
145 |
cmp TMP0, 1 |
cmp TMP0, 1 |
146 |
mov TMP1, prm1 ; coeff |
mov TMP1, prm1 ; coeff |
147 |
je .low |
je .low |
247 |
|
|
248 |
ALIGN SECTION_ALIGN |
ALIGN SECTION_ALIGN |
249 |
quant_h263_intra_sse2: |
quant_h263_intra_sse2: |
250 |
|
PUSH_XMM6_XMM7 |
251 |
mov _EAX, prm2 ; data |
mov _EAX, prm2 ; data |
252 |
|
|
253 |
movsx _EAX, word [_EAX] ; data[0] |
movsx _EAX, word [_EAX] ; data[0] |
259 |
sub TMP1,TMP0 |
sub TMP1,TMP0 |
260 |
cmovl _EAX,TMP1 ; +/- dcscalar/2 |
cmovl _EAX,TMP1 ; +/- dcscalar/2 |
261 |
mov TMP0, prm3 ; quant |
mov TMP0, prm3 ; quant |
262 |
|
lea TMP1, [mmx_div] |
263 |
|
movq xmm7, [TMP1+TMP0 * 8 - 8] |
264 |
|
|
265 |
|
%ifdef ARCH_IS_X86_64 |
266 |
|
%ifdef WINDOWS |
267 |
|
mov TMP1, prm2 |
268 |
|
%endif |
269 |
|
%endif |
270 |
cdq |
cdq |
271 |
idiv prm4d ; dcscalar |
idiv prm4d ; dcscalar |
272 |
|
%ifdef ARCH_IS_X86_64 |
273 |
|
%ifdef WINDOWS |
274 |
|
mov prm2, TMP1 |
275 |
|
%endif |
276 |
|
%endif |
277 |
cmp TMP0, 1 |
cmp TMP0, 1 |
|
lea TMP1, [mmx_div] |
|
|
movq xmm7, [TMP1+TMP0 * 8 - 8] |
|
278 |
mov TMP1, prm1 ; coeff |
mov TMP1, prm1 ; coeff |
279 |
je near .low |
je near .low |
280 |
|
|
363 |
mov TMP1, prm1 ; coeff |
mov TMP1, prm1 ; coeff |
364 |
mov [TMP1],ax |
mov [TMP1],ax |
365 |
xor _EAX,_EAX ; return 0 |
xor _EAX,_EAX ; return 0 |
366 |
|
POP_XMM6_XMM7 |
367 |
ret |
ret |
368 |
ENDFUNC |
ENDFUNC |
369 |
|
|
483 |
|
|
484 |
ALIGN SECTION_ALIGN |
ALIGN SECTION_ALIGN |
485 |
quant_h263_inter_sse2: |
quant_h263_inter_sse2: |
486 |
|
PUSH_XMM6_XMM7 |
487 |
|
|
488 |
mov TMP1, prm1 ; coeff |
mov TMP1, prm1 ; coeff |
489 |
mov _EAX, prm3 ; quant |
mov _EAX, prm3 ; quant |
550 |
|
|
551 |
movd eax, mm0 ; return sum |
movd eax, mm0 ; return sum |
552 |
|
|
553 |
|
POP_XMM6_XMM7 |
554 |
ret |
ret |
555 |
|
|
556 |
.qes2_q1_routine: |
.qes2_q1_routine: |
773 |
|
|
774 |
ALIGN SECTION_ALIGN |
ALIGN SECTION_ALIGN |
775 |
dequant_h263_intra_sse2: |
dequant_h263_intra_sse2: |
776 |
|
PUSH_XMM6_XMM7 |
777 |
|
|
778 |
mov TMP0, prm3 ; quant |
mov TMP0, prm3 ; quant |
779 |
mov _EAX, prm2 ; coeff |
mov _EAX, prm2 ; coeff |
847 |
mov [TMP1], ax |
mov [TMP1], ax |
848 |
|
|
849 |
xor _EAX, _EAX ; return 0 |
xor _EAX, _EAX ; return 0 |
850 |
|
|
851 |
|
POP_XMM6_XMM7 |
852 |
ret |
ret |
853 |
ENDFUNC |
ENDFUNC |
854 |
|
|
1006 |
|
|
1007 |
ALIGN SECTION_ALIGN |
ALIGN SECTION_ALIGN |
1008 |
dequant_h263_inter_sse2: |
dequant_h263_inter_sse2: |
1009 |
|
PUSH_XMM6_XMM7 |
1010 |
|
|
1011 |
mov TMP0, prm3 ; quant |
mov TMP0, prm3 ; quant |
1012 |
mov _EAX, prm2 ; coeff |
mov _EAX, prm2 ; coeff |
1065 |
jne .loop |
jne .loop |
1066 |
|
|
1067 |
xor _EAX, _EAX ; return 0 |
xor _EAX, _EAX ; return 0 |
1068 |
|
|
1069 |
|
POP_XMM6_XMM7 |
1070 |
ret |
ret |
1071 |
ENDFUNC |
ENDFUNC |
1072 |
|
|
1073 |
|
NON_EXEC_STACK |
|
%ifidn __OUTPUT_FORMAT__,elf |
|
|
section ".note.GNU-stack" noalloc noexec nowrite progbits |
|
|
%endif |
|
|
|
|