20 |
; * along with this program; if not, write to the Free Software |
; * along with this program; if not, write to the Free Software |
21 |
; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
22 |
; * |
; * |
23 |
; * $Id: gmc_mmx.asm,v 1.4 2008-11-11 20:46:24 Isibaar Exp $ |
; * $Id: gmc_mmx.asm,v 1.5 2008-11-14 15:43:27 Isibaar Exp $ |
24 |
; * |
; * |
25 |
; *************************************************************************/ |
; *************************************************************************/ |
26 |
|
|
60 |
|
|
61 |
cglobal xvid_GMC_Core_Lin_8_mmx |
cglobal xvid_GMC_Core_Lin_8_mmx |
62 |
cglobal xvid_GMC_Core_Lin_8_sse2 |
cglobal xvid_GMC_Core_Lin_8_sse2 |
63 |
|
cglobal xvid_GMC_Core_Lin_8_sse41 |
64 |
|
|
65 |
;////////////////////////////////////////////////////////////////////// |
;////////////////////////////////////////////////////////////////////// |
66 |
|
|
149 |
;////////////////////////////////////////////////////////////////////// |
;////////////////////////////////////////////////////////////////////// |
150 |
;// SSE2 version |
;// SSE2 version |
151 |
|
|
152 |
%macro GMC_8_SSE2 0 |
%macro GMC_8_SSE2 1 |
153 |
|
|
154 |
pcmpeqw xmm0, xmm0 |
pcmpeqw xmm0, xmm0 |
155 |
movdqa xmm1, [eax ] ; u... |
movdqa xmm1, [eax ] ; u... |
168 |
pmullw xmm0, xmm4 ; (16-u).(16-v) |
pmullw xmm0, xmm4 ; (16-u).(16-v) |
169 |
pmullw xmm1, xmm4 ; u .(16-v) |
pmullw xmm1, xmm4 ; u .(16-v) |
170 |
|
|
171 |
|
%if (%1!=0) ; SSE41 |
172 |
|
pmovzxbw xmm4, [ecx+edx ] ; src2 |
173 |
|
pmovzxbw xmm5, [ecx+edx+1] ; src3 |
174 |
|
%else |
175 |
movq xmm4, [ecx+edx ] ; src2 |
movq xmm4, [ecx+edx ] ; src2 |
176 |
movq xmm5, [ecx+edx+1] ; src3 |
movq xmm5, [ecx+edx+1] ; src3 |
177 |
punpcklbw xmm4, xmm7 |
punpcklbw xmm4, xmm7 |
178 |
punpcklbw xmm5, xmm7 |
punpcklbw xmm5, xmm7 |
179 |
|
%endif |
180 |
pmullw xmm2, xmm4 |
pmullw xmm2, xmm4 |
181 |
pmullw xmm3, xmm5 |
pmullw xmm3, xmm5 |
182 |
|
|
183 |
|
%if (%1!=0) ; SSE41 |
184 |
|
pmovzxbw xmm4, [ecx ] ; src0 |
185 |
|
pmovzxbw xmm5, [ecx +1] ; src1 |
186 |
|
%else |
187 |
movq xmm4, [ecx ] ; src0 |
movq xmm4, [ecx ] ; src0 |
188 |
movq xmm5, [ecx +1] ; src1 |
movq xmm5, [ecx +1] ; src1 |
189 |
punpcklbw xmm4, xmm7 |
punpcklbw xmm4, xmm7 |
190 |
punpcklbw xmm5, xmm7 |
punpcklbw xmm5, xmm7 |
191 |
|
%endif |
192 |
pmullw xmm4, xmm0 |
pmullw xmm4, xmm0 |
193 |
pmullw xmm5, xmm1 |
pmullw xmm5, xmm1 |
194 |
|
|
206 |
|
|
207 |
pxor xmm7, xmm7 |
pxor xmm7, xmm7 |
208 |
|
|
209 |
GMC_8_SSE2 |
GMC_8_SSE2 0 |
210 |
|
|
211 |
|
movd xmm4, [esp +20] |
212 |
|
pshuflw xmm4, xmm4, 01010101b ; Rounder (bits [16..31]) |
213 |
|
punpckldq xmm4, xmm4 |
214 |
|
mov eax, [esp + 4] ; Dst |
215 |
|
|
216 |
|
paddw xmm5, xmm4 |
217 |
|
psrlw xmm5, 8 |
218 |
|
packuswb xmm5, xmm5 |
219 |
|
movq [eax], xmm5 |
220 |
|
|
221 |
|
ret |
222 |
|
ENDFUNC |
223 |
|
|
224 |
|
align 16 |
225 |
|
xvid_GMC_Core_Lin_8_sse41: |
226 |
|
mov eax, [esp + 8] ; Offsets |
227 |
|
mov ecx, [esp +12] ; Src0 |
228 |
|
mov edx, [esp +16] ; BpS |
229 |
|
|
230 |
|
GMC_8_SSE2 1 |
231 |
|
|
232 |
movd xmm4, [esp +20] |
movd xmm4, [esp +20] |
233 |
pshuflw xmm4, xmm4, 01010101b ; Rounder (bits [16..31]) |
pshuflw xmm4, xmm4, 01010101b ; Rounder (bits [16..31]) |