20 |
; * along with this program; if not, write to the Free Software |
; * along with this program; if not, write to the Free Software |
21 |
; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
22 |
; * |
; * |
23 |
; * $Id: sad_mmx.asm,v 1.15 2004-08-22 11:46:10 edgomez Exp $ |
; * $Id: sad_mmx.asm,v 1.19 2008-11-11 20:46:24 Isibaar Exp $ |
24 |
; * |
; * |
25 |
; ***************************************************************************/ |
; ***************************************************************************/ |
26 |
|
|
29 |
%macro cglobal 1 |
%macro cglobal 1 |
30 |
%ifdef PREFIX |
%ifdef PREFIX |
31 |
%ifdef MARK_FUNCS |
%ifdef MARK_FUNCS |
32 |
global _%1:function |
global _%1:function %1.endfunc-%1 |
33 |
%define %1 _%1:function |
%define %1 _%1:function %1.endfunc-%1 |
34 |
|
%define ENDFUNC .endfunc |
35 |
%else |
%else |
36 |
global _%1 |
global _%1 |
37 |
%define %1 _%1 |
%define %1 _%1 |
38 |
|
%define ENDFUNC |
39 |
%endif |
%endif |
40 |
%else |
%else |
41 |
%ifdef MARK_FUNCS |
%ifdef MARK_FUNCS |
42 |
global %1:function |
global %1:function %1.endfunc-%1 |
43 |
|
%define ENDFUNC .endfunc |
44 |
%else |
%else |
45 |
global %1 |
global %1 |
46 |
|
%define ENDFUNC |
47 |
%endif |
%endif |
48 |
%endif |
%endif |
49 |
%endmacro |
%endmacro |
78 |
lea eax, [eax+ecx] |
lea eax, [eax+ecx] |
79 |
movq mm5, mm2 |
movq mm5, mm2 |
80 |
psubusb mm2, mm3 |
psubusb mm2, mm3 |
|
lea edx, [edx+ecx] |
|
81 |
|
|
82 |
psubusb mm1, mm4 |
psubusb mm1, mm4 |
|
por mm0, mm1 |
|
83 |
psubusb mm3, mm5 |
psubusb mm3, mm5 |
84 |
|
por mm0, mm1 |
85 |
por mm2, mm3 |
por mm2, mm3 |
86 |
|
|
87 |
movq mm1, mm0 |
movq mm1, mm0 |
|
movq mm3, mm2 |
|
|
|
|
88 |
punpcklbw mm0,mm7 |
punpcklbw mm0,mm7 |
89 |
|
movq mm3, mm2 |
90 |
punpckhbw mm1,mm7 |
punpckhbw mm1,mm7 |
91 |
|
lea edx, [edx+ecx] |
92 |
punpcklbw mm2,mm7 |
punpcklbw mm2,mm7 |
|
punpckhbw mm3,mm7 |
|
|
|
|
93 |
paddusw mm0, mm1 |
paddusw mm0, mm1 |
94 |
|
punpckhbw mm3,mm7 |
95 |
paddusw mm6, mm0 |
paddusw mm6, mm0 |
96 |
paddusw mm2, mm3 |
paddusw mm2, mm3 |
97 |
paddusw mm6, mm2 |
paddusw mm6, mm2 |
98 |
|
|
99 |
%endmacro |
%endmacro |
100 |
|
|
101 |
%macro SAD_8x8_MMX 0 |
%macro SAD_8x8_MMX 0 |
114 |
psubusb mm2, mm3 |
psubusb mm2, mm3 |
115 |
|
|
116 |
psubusb mm1, mm4 |
psubusb mm1, mm4 |
|
por mm0, mm1 |
|
117 |
psubusb mm3, mm5 |
psubusb mm3, mm5 |
118 |
|
por mm0, mm1 |
119 |
por mm2, mm3 |
por mm2, mm3 |
120 |
|
|
121 |
movq mm1,mm0 |
movq mm1,mm0 |
|
movq mm3,mm2 |
|
|
|
|
122 |
punpcklbw mm0,mm7 |
punpcklbw mm0,mm7 |
123 |
|
movq mm3,mm2 |
124 |
punpckhbw mm1,mm7 |
punpckhbw mm1,mm7 |
125 |
punpcklbw mm2,mm7 |
punpcklbw mm2,mm7 |
|
punpckhbw mm3,mm7 |
|
|
|
|
126 |
paddusw mm0,mm1 |
paddusw mm0,mm1 |
127 |
|
punpckhbw mm3,mm7 |
128 |
paddusw mm6,mm0 |
paddusw mm6,mm0 |
129 |
paddusw mm2,mm3 |
paddusw mm2,mm3 |
130 |
paddusw mm6,mm2 |
paddusw mm6,mm2 |
131 |
%endmacro |
%endmacro |
132 |
|
|
133 |
|
|
134 |
%macro SADV_16x16_MMX 0 |
%macro SADV_16x16_MMX 0 |
135 |
movq mm0, [eax] |
movq mm0, [eax] |
136 |
movq mm1, [edx] |
movq mm1, [edx] |
137 |
|
|
138 |
movq mm2, [eax+8] |
movq mm2, [eax+8] |
|
movq mm3, [edx+8] |
|
|
|
|
139 |
movq mm4, mm0 |
movq mm4, mm0 |
140 |
|
movq mm3, [edx+8] |
141 |
psubusb mm0, mm1 |
psubusb mm0, mm1 |
142 |
|
|
143 |
psubusb mm1, mm4 |
psubusb mm1, mm4 |
|
por mm0, mm1 |
|
144 |
lea eax,[eax+ecx] |
lea eax,[eax+ecx] |
145 |
|
por mm0, mm1 |
146 |
|
|
147 |
movq mm4, mm2 |
movq mm4, mm2 |
148 |
psubusb mm2, mm3 |
psubusb mm2, mm3 |
149 |
|
|
150 |
psubusb mm3, mm4 |
psubusb mm3, mm4 |
151 |
por mm2, mm3 |
por mm2, mm3 |
|
lea edx,[edx+ecx] |
|
152 |
|
|
153 |
movq mm1,mm0 |
movq mm1,mm0 |
|
movq mm3,mm2 |
|
|
|
|
154 |
punpcklbw mm0,mm7 |
punpcklbw mm0,mm7 |
155 |
|
movq mm3,mm2 |
156 |
punpckhbw mm1,mm7 |
punpckhbw mm1,mm7 |
157 |
punpcklbw mm2,mm7 |
punpcklbw mm2,mm7 |
|
punpckhbw mm3,mm7 |
|
|
|
|
158 |
paddusw mm0,mm1 |
paddusw mm0,mm1 |
159 |
paddusw mm2,mm3 |
punpckhbw mm3,mm7 |
|
|
|
160 |
paddusw mm5, mm0 |
paddusw mm5, mm0 |
161 |
|
paddusw mm2,mm3 |
162 |
|
lea edx,[edx+ecx] |
163 |
paddusw mm6, mm2 |
paddusw mm6, mm2 |
164 |
%endmacro |
%endmacro |
165 |
|
|
218 |
movq mm2, [eax+8] |
movq mm2, [eax+8] |
219 |
lea eax, [eax+ecx] |
lea eax, [eax+ecx] |
220 |
movq mm1, mm0 |
movq mm1, mm0 |
|
movq mm3, mm2 |
|
221 |
punpcklbw mm0, mm7 |
punpcklbw mm0, mm7 |
222 |
punpcklbw mm2, mm7 |
movq mm3, mm2 |
223 |
punpckhbw mm1, mm7 |
punpckhbw mm1, mm7 |
|
punpckhbw mm3, mm7 |
|
224 |
paddw mm5, mm0 |
paddw mm5, mm0 |
225 |
|
punpcklbw mm2, mm7 |
226 |
paddw mm6, mm1 |
paddw mm6, mm1 |
227 |
|
punpckhbw mm3, mm7 |
228 |
paddw mm5, mm2 |
paddw mm5, mm2 |
229 |
paddw mm6, mm3 |
paddw mm6, mm3 |
230 |
%endmacro |
%endmacro |
326 |
movd eax, mm6 |
movd eax, mm6 |
327 |
|
|
328 |
ret |
ret |
329 |
|
ENDFUNC |
330 |
|
|
331 |
;----------------------------------------------------------------------------- |
;----------------------------------------------------------------------------- |
332 |
; |
; |
359 |
movd eax, mm6 |
movd eax, mm6 |
360 |
|
|
361 |
ret |
ret |
362 |
|
ENDFUNC |
363 |
|
|
364 |
;----------------------------------------------------------------------------- |
;----------------------------------------------------------------------------- |
365 |
; |
; |
450 |
pop ebx |
pop ebx |
451 |
|
|
452 |
ret |
ret |
453 |
|
ENDFUNC |
454 |
|
|
455 |
;----------------------------------------------------------------------------- |
;----------------------------------------------------------------------------- |
456 |
; |
; |
471 |
|
|
472 |
pxor mm6, mm6 ; accum2 |
pxor mm6, mm6 ; accum2 |
473 |
pxor mm7, mm7 |
pxor mm7, mm7 |
474 |
.Loop |
.Loop: |
475 |
SADBI_16x16_MMX 0, 0 |
SADBI_16x16_MMX 0, 0 |
476 |
SADBI_16x16_MMX 8, 1 |
SADBI_16x16_MMX 8, 1 |
477 |
SADBI_16x16_MMX 0, 0 |
SADBI_16x16_MMX 0, 0 |
515 |
pop ebx |
pop ebx |
516 |
|
|
517 |
ret |
ret |
518 |
|
ENDFUNC |
519 |
|
|
520 |
;----------------------------------------------------------------------------- |
;----------------------------------------------------------------------------- |
521 |
; |
; |
536 |
|
|
537 |
pxor mm6, mm6 ; accum2 |
pxor mm6, mm6 ; accum2 |
538 |
pxor mm7, mm7 |
pxor mm7, mm7 |
539 |
.Loop |
.Loop: |
540 |
SADBI_16x16_MMX 0, 1 |
SADBI_16x16_MMX 0, 1 |
541 |
SADBI_16x16_MMX 0, 1 |
SADBI_16x16_MMX 0, 1 |
542 |
SADBI_16x16_MMX 0, 1 |
SADBI_16x16_MMX 0, 1 |
554 |
movd eax, mm6 |
movd eax, mm6 |
555 |
pop ebx |
pop ebx |
556 |
ret |
ret |
557 |
|
ENDFUNC |
558 |
|
|
559 |
;----------------------------------------------------------------------------- |
;----------------------------------------------------------------------------- |
560 |
; |
; |
634 |
movd eax, mm6 |
movd eax, mm6 |
635 |
|
|
636 |
ret |
ret |
637 |
|
ENDFUNC |
638 |
|
|
639 |
;----------------------------------------------------------------------------- |
;----------------------------------------------------------------------------- |
640 |
; |
; |
684 |
pop edi |
pop edi |
685 |
pop esi |
pop esi |
686 |
ret |
ret |
687 |
|
ENDFUNC |
688 |
|
|
689 |
;----------------------------------------------------------------------------- |
;----------------------------------------------------------------------------- |
690 |
; |
; |
749 |
pop edi |
pop edi |
750 |
pop esi |
pop esi |
751 |
ret |
ret |
752 |
|
ENDFUNC |
753 |
|
|
754 |
|
|
755 |
|
%ifidn __OUTPUT_FORMAT__,elf |
756 |
|
section ".note.GNU-stack" noalloc noexec nowrite progbits |
757 |
|
%endif |
758 |
|
|