20 |
; * along with this program; if not, write to the Free Software |
; * along with this program; if not, write to the Free Software |
21 |
; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
22 |
; * |
; * |
23 |
; * $Id: gmc_mmx.asm,v 1.1 2006-06-14 21:44:07 Skal Exp $ |
; * $Id: gmc_mmx.asm,v 1.5 2008-11-14 15:43:27 Isibaar Exp $ |
24 |
; * |
; * |
25 |
; *************************************************************************/ |
; *************************************************************************/ |
26 |
|
|
39 |
%ifdef MARK_FUNCS |
%ifdef MARK_FUNCS |
40 |
global _%1:function %1.endfunc-%1 |
global _%1:function %1.endfunc-%1 |
41 |
%define %1 _%1:function %1.endfunc-%1 |
%define %1 _%1:function %1.endfunc-%1 |
42 |
|
%define ENDFUNC .endfunc |
43 |
%else |
%else |
44 |
global _%1 |
global _%1 |
45 |
%define %1 _%1 |
%define %1 _%1 |
46 |
|
%define ENDFUNC |
47 |
%endif |
%endif |
48 |
%else |
%else |
49 |
%ifdef MARK_FUNCS |
%ifdef MARK_FUNCS |
50 |
global %1:function %1.endfunc-%1 |
global %1:function %1.endfunc-%1 |
51 |
|
%define ENDFUNC .endfunc |
52 |
%else |
%else |
53 |
global %1 |
global %1 |
54 |
|
%define ENDFUNC |
55 |
%endif |
%endif |
56 |
%endif |
%endif |
57 |
%endmacro |
%endmacro |
60 |
|
|
61 |
cglobal xvid_GMC_Core_Lin_8_mmx |
cglobal xvid_GMC_Core_Lin_8_mmx |
62 |
cglobal xvid_GMC_Core_Lin_8_sse2 |
cglobal xvid_GMC_Core_Lin_8_sse2 |
63 |
|
cglobal xvid_GMC_Core_Lin_8_sse41 |
64 |
|
|
65 |
;////////////////////////////////////////////////////////////////////// |
;////////////////////////////////////////////////////////////////////// |
66 |
|
|
144 |
movq [eax], mm5 |
movq [eax], mm5 |
145 |
|
|
146 |
ret |
ret |
147 |
.endfunc |
ENDFUNC |
148 |
|
|
149 |
;////////////////////////////////////////////////////////////////////// |
;////////////////////////////////////////////////////////////////////// |
150 |
;// SSE2 version |
;// SSE2 version |
151 |
|
|
152 |
%macro GMC_8_SSE2 0 |
%macro GMC_8_SSE2 1 |
153 |
|
|
154 |
pcmpeqw xmm0, xmm0 |
pcmpeqw xmm0, xmm0 |
155 |
movdqa xmm1, [eax ] ; u... |
movdqa xmm1, [eax ] ; u... |
168 |
pmullw xmm0, xmm4 ; (16-u).(16-v) |
pmullw xmm0, xmm4 ; (16-u).(16-v) |
169 |
pmullw xmm1, xmm4 ; u .(16-v) |
pmullw xmm1, xmm4 ; u .(16-v) |
170 |
|
|
171 |
|
%if (%1!=0) ; SSE41 |
172 |
|
pmovzxbw xmm4, [ecx+edx ] ; src2 |
173 |
|
pmovzxbw xmm5, [ecx+edx+1] ; src3 |
174 |
|
%else |
175 |
movq xmm4, [ecx+edx ] ; src2 |
movq xmm4, [ecx+edx ] ; src2 |
176 |
movq xmm5, [ecx+edx+1] ; src3 |
movq xmm5, [ecx+edx+1] ; src3 |
177 |
punpcklbw xmm4, xmm7 |
punpcklbw xmm4, xmm7 |
178 |
punpcklbw xmm5, xmm7 |
punpcklbw xmm5, xmm7 |
179 |
|
%endif |
180 |
pmullw xmm2, xmm4 |
pmullw xmm2, xmm4 |
181 |
pmullw xmm3, xmm5 |
pmullw xmm3, xmm5 |
182 |
|
|
183 |
|
%if (%1!=0) ; SSE41 |
184 |
|
pmovzxbw xmm4, [ecx ] ; src0 |
185 |
|
pmovzxbw xmm5, [ecx +1] ; src1 |
186 |
|
%else |
187 |
movq xmm4, [ecx ] ; src0 |
movq xmm4, [ecx ] ; src0 |
188 |
movq xmm5, [ecx +1] ; src1 |
movq xmm5, [ecx +1] ; src1 |
189 |
punpcklbw xmm4, xmm7 |
punpcklbw xmm4, xmm7 |
190 |
punpcklbw xmm5, xmm7 |
punpcklbw xmm5, xmm7 |
191 |
|
%endif |
192 |
pmullw xmm4, xmm0 |
pmullw xmm4, xmm0 |
193 |
pmullw xmm5, xmm1 |
pmullw xmm5, xmm1 |
194 |
|
|
206 |
|
|
207 |
pxor xmm7, xmm7 |
pxor xmm7, xmm7 |
208 |
|
|
209 |
GMC_8_SSE2 |
GMC_8_SSE2 0 |
210 |
|
|
211 |
|
movd xmm4, [esp +20] |
212 |
|
pshuflw xmm4, xmm4, 01010101b ; Rounder (bits [16..31]) |
213 |
|
punpckldq xmm4, xmm4 |
214 |
|
mov eax, [esp + 4] ; Dst |
215 |
|
|
216 |
|
paddw xmm5, xmm4 |
217 |
|
psrlw xmm5, 8 |
218 |
|
packuswb xmm5, xmm5 |
219 |
|
movq [eax], xmm5 |
220 |
|
|
221 |
|
ret |
222 |
|
ENDFUNC |
223 |
|
|
224 |
pshuflw xmm4, [esp +20], 01010101b ; Rounder (bits [16..31]) |
align 16 |
225 |
|
xvid_GMC_Core_Lin_8_sse41: |
226 |
|
mov eax, [esp + 8] ; Offsets |
227 |
|
mov ecx, [esp +12] ; Src0 |
228 |
|
mov edx, [esp +16] ; BpS |
229 |
|
|
230 |
|
GMC_8_SSE2 1 |
231 |
|
|
232 |
|
movd xmm4, [esp +20] |
233 |
|
pshuflw xmm4, xmm4, 01010101b ; Rounder (bits [16..31]) |
234 |
punpckldq xmm4, xmm4 |
punpckldq xmm4, xmm4 |
235 |
mov eax, [esp + 4] ; Dst |
mov eax, [esp + 4] ; Dst |
236 |
|
|
240 |
movq [eax], xmm5 |
movq [eax], xmm5 |
241 |
|
|
242 |
ret |
ret |
243 |
.endfunc |
ENDFUNC |
244 |
|
|
245 |
;////////////////////////////////////////////////////////////////////// |
;////////////////////////////////////////////////////////////////////// |
246 |
|
|
247 |
|
%ifidn __OUTPUT_FORMAT__,elf |
248 |
|
section ".note.GNU-stack" noalloc noexec nowrite progbits |
249 |
|
%endif |