21 |
; * along with this program ; if not, write to the Free Software |
; * along with this program ; if not, write to the Free Software |
22 |
; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
23 |
; * |
; * |
24 |
; * $Id: quantize_mpeg_xmm.asm,v 1.2 2008-08-19 09:06:48 Isibaar Exp $ |
; * $Id: quantize_mpeg_xmm.asm,v 1.3 2008-11-11 20:46:24 Isibaar Exp $ |
25 |
; * |
; * |
26 |
; ***************************************************************************/ |
; ***************************************************************************/ |
27 |
|
|
37 |
%ifdef MARK_FUNCS |
%ifdef MARK_FUNCS |
38 |
global _%1:function %1.endfunc-%1 |
global _%1:function %1.endfunc-%1 |
39 |
%define %1 _%1:function %1.endfunc-%1 |
%define %1 _%1:function %1.endfunc-%1 |
40 |
|
%define ENDFUNC .endfunc |
41 |
%else |
%else |
42 |
global _%1 |
global _%1 |
43 |
%define %1 _%1 |
%define %1 _%1 |
44 |
|
%define ENDFUNC |
45 |
%endif |
%endif |
46 |
%else |
%else |
47 |
%ifdef MARK_FUNCS |
%ifdef MARK_FUNCS |
48 |
global %1:function %1.endfunc-%1 |
global %1:function %1.endfunc-%1 |
49 |
|
%define ENDFUNC .endfunc |
50 |
%else |
%else |
51 |
global %1 |
global %1 |
52 |
|
%define ENDFUNC |
53 |
%endif |
%endif |
54 |
%endif |
%endif |
55 |
%endmacro |
%endmacro |
196 |
jg near .lloop |
jg near .lloop |
197 |
|
|
198 |
ALIGN 16 |
ALIGN 16 |
199 |
.loop |
.loop: |
200 |
movq mm1, [rax + 8*rsi+112] ; mm0 = [1st] |
movq mm1, [rax + 8*rsi+112] ; mm0 = [1st] |
201 |
psubw mm0, mm1 ;-mm1 |
psubw mm0, mm1 ;-mm1 |
202 |
movq mm4, [rax + 8*rsi + 120] ; |
movq mm4, [rax + 8*rsi + 120] ; |
246 |
add rsi, byte 2 |
add rsi, byte 2 |
247 |
jng near .loop |
jng near .loop |
248 |
|
|
249 |
.done |
.done: |
250 |
; calculate data[0] // (int32_t)dcscalar) |
; calculate data[0] // (int32_t)dcscalar) |
251 |
; mov esi, [esp + 12 + 16] ; dcscalar |
; mov esi, [esp + 12 + 16] ; dcscalar |
252 |
mov rsi, r9 ; dcscalar |
mov rsi, r9 ; dcscalar |
282 |
ret |
ret |
283 |
|
|
284 |
ALIGN 16 |
ALIGN 16 |
285 |
.q1loop |
.q1loop: |
286 |
movq mm1, [rax + 8*rsi+112] ; mm0 = [1st] |
movq mm1, [rax + 8*rsi+112] ; mm0 = [1st] |
287 |
psubw mm0, mm1 ;-mm1 |
psubw mm0, mm1 ;-mm1 |
288 |
movq mm4, [rax + 8*rsi+120] ; |
movq mm4, [rax + 8*rsi+120] ; |
333 |
jmp near .done |
jmp near .done |
334 |
|
|
335 |
ALIGN 8 |
ALIGN 8 |
336 |
.lloop |
.lloop: |
337 |
movq mm1, [rax + 8*rsi+112] ; mm0 = [1st] |
movq mm1, [rax + 8*rsi+112] ; mm0 = [1st] |
338 |
psubw mm0, mm1 ;-mm1 |
psubw mm0, mm1 ;-mm1 |
339 |
movq mm4, [rax + 8*rsi+120] |
movq mm4, [rax + 8*rsi+120] |
386 |
add rsi,byte 2 |
add rsi,byte 2 |
387 |
jng near .lloop |
jng near .lloop |
388 |
jmp near .done |
jmp near .done |
389 |
.endfunc |
ENDFUNC |
390 |
|
|
391 |
;----------------------------------------------------------------------------- |
;----------------------------------------------------------------------------- |
392 |
; |
; |
422 |
jg near .lloop |
jg near .lloop |
423 |
|
|
424 |
ALIGN 16 |
ALIGN 16 |
425 |
.loop |
.loop: |
426 |
movq mm1, [rax + 8*rsi+112] ; mm0 = [1st] |
movq mm1, [rax + 8*rsi+112] ; mm0 = [1st] |
427 |
psubw mm0, mm1 ;-mm1 |
psubw mm0, mm1 ;-mm1 |
428 |
movq mm4, [rax + 8*rsi + 120] ; |
movq mm4, [rax + 8*rsi + 120] ; |
486 |
ret |
ret |
487 |
|
|
488 |
ALIGN 16 |
ALIGN 16 |
489 |
.q1loop |
.q1loop: |
490 |
movq mm1, [rax + 8*rsi+112] ; mm0 = [1st] |
movq mm1, [rax + 8*rsi+112] ; mm0 = [1st] |
491 |
psubw mm0, mm1 ;-mm1 |
psubw mm0, mm1 ;-mm1 |
492 |
movq mm4, [rax + 8*rsi+120] |
movq mm4, [rax + 8*rsi+120] |
535 |
jmp near .done |
jmp near .done |
536 |
|
|
537 |
ALIGN 8 |
ALIGN 8 |
538 |
.lloop |
.lloop: |
539 |
movq mm1, [rax + 8*rsi+112] ; mm0 = [1st] |
movq mm1, [rax + 8*rsi+112] ; mm0 = [1st] |
540 |
psubw mm0,mm1 ;-mm1 |
psubw mm0,mm1 ;-mm1 |
541 |
movq mm4, [rax + 8*rsi+120] |
movq mm4, [rax + 8*rsi+120] |
587 |
movq [rdx + 8*rsi +120-16], mm7 |
movq [rdx + 8*rsi +120-16], mm7 |
588 |
jng near .lloop |
jng near .lloop |
589 |
jmp near .done |
jmp near .done |
590 |
.endfunc |
ENDFUNC |
591 |
|
|
592 |
;----------------------------------------------------------------------------- |
;----------------------------------------------------------------------------- |
593 |
; |
; |
702 |
|
|
703 |
xor rax, rax |
xor rax, rax |
704 |
ret |
ret |
705 |
.endfunc |
ENDFUNC |
706 |
|
|
707 |
;----------------------------------------------------------------------------- |
;----------------------------------------------------------------------------- |
708 |
; |
; |
734 |
pxor mm3, mm3 |
pxor mm3, mm3 |
735 |
|
|
736 |
ALIGN 16 |
ALIGN 16 |
737 |
.loop |
.loop: |
738 |
movq mm0, [rcx+8*rax + 7*16 ] ; mm0 = coeff[i] |
movq mm0, [rcx+8*rax + 7*16 ] ; mm0 = coeff[i] |
739 |
pcmpgtw mm1, mm0 ; mm1 = sgn(c) (preserved) |
pcmpgtw mm1, mm0 ; mm1 = sgn(c) (preserved) |
740 |
movq mm2, [rcx+8*rax + 7*16 +8] ; mm2 = coeff[i+1] |
movq mm2, [rcx+8*rax + 7*16 +8] ; mm2 = coeff[i+1] |
811 |
|
|
812 |
xor rax, rax |
xor rax, rax |
813 |
ret |
ret |
814 |
.endfunc |
ENDFUNC |
815 |
|
|
816 |
%ifidn __OUTPUT_FORMAT__,elf |
%ifidn __OUTPUT_FORMAT__,elf |
817 |
section ".note.GNU-stack" noalloc noexec nowrite progbits |
section ".note.GNU-stack" noalloc noexec nowrite progbits |