Parent Directory | Revision Log
Revision 1803 -
(view)
(download)
Original Path: trunk/xvidcore/src/image/x86_asm/gmc_mmx.asm
1 : | Skal | 1709 | ;/***************************************************************************** |
2 : | ; * | ||
3 : | ; * XVID MPEG-4 VIDEO CODEC | ||
4 : | ; * - GMC core functions - | ||
5 : | ; * Copyright(C) 2006 Pascal Massimino <skal@planet-d.net> | ||
6 : | ; * | ||
7 : | ; * This file is part of XviD, a free MPEG-4 video encoder/decoder | ||
8 : | ; * | ||
9 : | ; * XviD is free software; you can redistribute it and/or modify it | ||
10 : | ; * under the terms of the GNU General Public License as published by | ||
11 : | ; * the Free Software Foundation; either version 2 of the License, or | ||
12 : | ; * (at your option) any later version. | ||
13 : | ; * | ||
14 : | ; * This program is distributed in the hope that it will be useful, | ||
15 : | ; * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 : | ; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 : | ; * GNU General Public License for more details. | ||
18 : | ; * | ||
19 : | ; * You should have received a copy of the GNU General Public License | ||
20 : | ; * along with this program; if not, write to the Free Software | ||
21 : | ; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
22 : | ; * | ||
23 : | Isibaar | 1803 | ; * $Id: gmc_mmx.asm,v 1.7 2008-11-26 23:35:50 Isibaar Exp $ |
24 : | Skal | 1709 | ; * |
25 : | ; *************************************************************************/ | ||
26 : | |||
27 : | ;/************************************************************************** | ||
28 : | ; * | ||
29 : | ; * History: | ||
30 : | ; * | ||
31 : | ; * Jun 14 2006: initial version (during Germany/Poland match;) | ||
32 : | ; * | ||
33 : | ; *************************************************************************/ | ||
34 : | |||
35 : | Isibaar | 1795 | %include "nasm.inc" |
36 : | Skal | 1709 | |
37 : | ;////////////////////////////////////////////////////////////////////// | ||
38 : | |||
39 : | cglobal xvid_GMC_Core_Lin_8_mmx | ||
40 : | cglobal xvid_GMC_Core_Lin_8_sse2 | ||
41 : | Isibaar | 1794 | cglobal xvid_GMC_Core_Lin_8_sse41 |
42 : | Skal | 1709 | |
43 : | ;////////////////////////////////////////////////////////////////////// | ||
44 : | |||
45 : | Isibaar | 1795 | DATA |
46 : | Skal | 1709 | |
47 : | Isibaar | 1795 | align SECTION_ALIGN |
48 : | Skal | 1709 | Cst16: |
49 : | times 8 dw 16 | ||
50 : | |||
51 : | Isibaar | 1795 | SECTION .rotext align=SECTION_ALIGN |
52 : | Skal | 1709 | |
53 : | ;////////////////////////////////////////////////////////////////////// | ||
54 : | ;// mmx version | ||
55 : | |||
56 : | %macro GMC_4_SSE 2 ; %1: i %2: out reg (mm5 or mm6) | ||
57 : | |||
58 : | pcmpeqw mm0, mm0 | ||
59 : | Isibaar | 1795 | movq mm1, [_EAX+2*(%1) ] ; u0 | u1 | u2 | u3 |
60 : | Skal | 1709 | psrlw mm0, 12 ; mask 0x000f |
61 : | Isibaar | 1795 | movq mm2, [_EAX+2*(%1)+2*16] ; v0 | v1 | v2 | v3 |
62 : | Skal | 1709 | |
63 : | pand mm1, mm0 ; u0 | ||
64 : | pand mm2, mm0 ; v0 | ||
65 : | |||
66 : | movq mm0, [Cst16] | ||
67 : | movq mm3, mm1 ; u | ... | ||
68 : | movq mm4, mm0 | ||
69 : | pmullw mm3, mm2 ; u.v | ||
70 : | psubw mm0, mm1 ; 16-u | ||
71 : | psubw mm4, mm2 ; 16-v | ||
72 : | pmullw mm2, mm0 ; (16-u).v | ||
73 : | pmullw mm0, mm4 ; (16-u).(16-v) | ||
74 : | pmullw mm1, mm4 ; u .(16-v) | ||
75 : | |||
76 : | Isibaar | 1795 | movd mm4, [TMP0+TMP1 +%1] ; src2 |
77 : | movd %2, [TMP0+TMP1+1+%1] ; src3 | ||
78 : | Skal | 1709 | punpcklbw mm4, mm7 |
79 : | punpcklbw %2, mm7 | ||
80 : | pmullw mm2, mm4 | ||
81 : | pmullw mm3, %2 | ||
82 : | |||
83 : | Isibaar | 1795 | movd mm4, [TMP0 +%1] ; src0 |
84 : | movd %2, [TMP0 +1+%1] ; src1 | ||
85 : | Skal | 1709 | punpcklbw mm4, mm7 |
86 : | punpcklbw %2, mm7 | ||
87 : | pmullw mm4, mm0 | ||
88 : | pmullw %2, mm1 | ||
89 : | |||
90 : | paddw mm2, mm3 | ||
91 : | paddw %2, mm4 | ||
92 : | |||
93 : | paddw %2, mm2 | ||
94 : | %endmacro | ||
95 : | |||
96 : | Isibaar | 1795 | align SECTION_ALIGN |
97 : | Skal | 1709 | xvid_GMC_Core_Lin_8_mmx: |
98 : | Isibaar | 1795 | mov _EAX, prm2 ; Offsets |
99 : | mov TMP0, prm3 ; Src0 | ||
100 : | mov TMP1, prm4 ; BpS | ||
101 : | Skal | 1709 | |
102 : | pxor mm7, mm7 | ||
103 : | |||
104 : | GMC_4_SSE 0, mm5 | ||
105 : | GMC_4_SSE 4, mm6 | ||
106 : | |||
107 : | Isibaar | 1803 | ; pshufw mm4, prm5d, 01010101b ; Rounder (bits [16..31]) |
108 : | Isibaar | 1795 | movd mm4, prm5d ; Rounder (bits [16..31]) |
109 : | mov _EAX, prm1 ; Dst | ||
110 : | Skal | 1709 | punpcklwd mm4, mm4 |
111 : | punpckhdq mm4, mm4 | ||
112 : | |||
113 : | paddw mm5, mm4 | ||
114 : | paddw mm6, mm4 | ||
115 : | psrlw mm5, 8 | ||
116 : | psrlw mm6, 8 | ||
117 : | packuswb mm5, mm6 | ||
118 : | Isibaar | 1795 | movq [_EAX], mm5 |
119 : | Skal | 1709 | |
120 : | ret | ||
121 : | Isibaar | 1793 | ENDFUNC |
122 : | Skal | 1709 | |
123 : | ;////////////////////////////////////////////////////////////////////// | ||
124 : | ;// SSE2 version | ||
125 : | |||
126 : | Isibaar | 1794 | %macro GMC_8_SSE2 1 |
127 : | Skal | 1709 | |
128 : | pcmpeqw xmm0, xmm0 | ||
129 : | Isibaar | 1795 | movdqa xmm1, [_EAX ] ; u... |
130 : | Skal | 1709 | psrlw xmm0, 12 ; mask = 0x000f |
131 : | Isibaar | 1795 | movdqa xmm2, [_EAX+2*16] ; v... |
132 : | Skal | 1709 | pand xmm1, xmm0 |
133 : | pand xmm2, xmm0 | ||
134 : | |||
135 : | movdqa xmm0, [Cst16] | ||
136 : | movdqa xmm3, xmm1 ; u | ... | ||
137 : | movdqa xmm4, xmm0 | ||
138 : | pmullw xmm3, xmm2 ; u.v | ||
139 : | psubw xmm0, xmm1 ; 16-u | ||
140 : | psubw xmm4, xmm2 ; 16-v | ||
141 : | pmullw xmm2, xmm0 ; (16-u).v | ||
142 : | pmullw xmm0, xmm4 ; (16-u).(16-v) | ||
143 : | pmullw xmm1, xmm4 ; u .(16-v) | ||
144 : | |||
145 : | Isibaar | 1794 | %if (%1!=0) ; SSE41 |
146 : | Isibaar | 1795 | pmovzxbw xmm4, [TMP0+TMP1 ] ; src2 |
147 : | pmovzxbw xmm5, [TMP0+TMP1+1] ; src3 | ||
148 : | Isibaar | 1794 | %else |
149 : | Isibaar | 1795 | movq xmm4, [TMP0+TMP1 ] ; src2 |
150 : | movq xmm5, [TMP0+TMP1+1] ; src3 | ||
151 : | Skal | 1709 | punpcklbw xmm4, xmm7 |
152 : | punpcklbw xmm5, xmm7 | ||
153 : | Isibaar | 1794 | %endif |
154 : | Skal | 1709 | pmullw xmm2, xmm4 |
155 : | pmullw xmm3, xmm5 | ||
156 : | |||
157 : | Isibaar | 1794 | %if (%1!=0) ; SSE41 |
158 : | Isibaar | 1795 | pmovzxbw xmm4, [TMP0 ] ; src0 |
159 : | pmovzxbw xmm5, [TMP0 +1] ; src1 | ||
160 : | Isibaar | 1794 | %else |
161 : | Isibaar | 1795 | movq xmm4, [TMP0 ] ; src0 |
162 : | movq xmm5, [TMP0 +1] ; src1 | ||
163 : | Skal | 1709 | punpcklbw xmm4, xmm7 |
164 : | punpcklbw xmm5, xmm7 | ||
165 : | Isibaar | 1794 | %endif |
166 : | Skal | 1709 | pmullw xmm4, xmm0 |
167 : | pmullw xmm5, xmm1 | ||
168 : | |||
169 : | paddw xmm2, xmm3 | ||
170 : | paddw xmm5, xmm4 | ||
171 : | |||
172 : | paddw xmm5, xmm2 | ||
173 : | %endmacro | ||
174 : | |||
175 : | Isibaar | 1795 | align SECTION_ALIGN |
176 : | Skal | 1709 | xvid_GMC_Core_Lin_8_sse2: |
177 : | Isibaar | 1795 | mov _EAX, prm2 ; Offsets |
178 : | mov TMP0, prm3 ; Src0 | ||
179 : | mov TMP1, prm4 ; BpS | ||
180 : | Skal | 1709 | |
181 : | pxor xmm7, xmm7 | ||
182 : | |||
183 : | Isibaar | 1794 | GMC_8_SSE2 0 |
184 : | Skal | 1709 | |
185 : | Isibaar | 1795 | movd xmm4, prm5d |
186 : | Skal | 1756 | pshuflw xmm4, xmm4, 01010101b ; Rounder (bits [16..31]) |
187 : | Skal | 1709 | punpckldq xmm4, xmm4 |
188 : | Isibaar | 1795 | mov _EAX, prm1 ; Dst |
189 : | Skal | 1709 | |
190 : | paddw xmm5, xmm4 | ||
191 : | psrlw xmm5, 8 | ||
192 : | packuswb xmm5, xmm5 | ||
193 : | Isibaar | 1795 | movq [_EAX], xmm5 |
194 : | Skal | 1709 | |
195 : | ret | ||
196 : | Isibaar | 1793 | ENDFUNC |
197 : | Skal | 1709 | |
198 : | Isibaar | 1795 | align SECTION_ALIGN |
199 : | Isibaar | 1794 | xvid_GMC_Core_Lin_8_sse41: |
200 : | Isibaar | 1795 | mov _EAX, prm2 ; Offsets |
201 : | mov TMP0, prm3 ; Src0 | ||
202 : | mov TMP1, prm4 ; BpS | ||
203 : | Isibaar | 1794 | |
204 : | GMC_8_SSE2 1 | ||
205 : | |||
206 : | Isibaar | 1795 | movd xmm4, prm5d |
207 : | Isibaar | 1794 | pshuflw xmm4, xmm4, 01010101b ; Rounder (bits [16..31]) |
208 : | punpckldq xmm4, xmm4 | ||
209 : | Isibaar | 1795 | mov _EAX, prm1 ; Dst |
210 : | Isibaar | 1794 | |
211 : | paddw xmm5, xmm4 | ||
212 : | psrlw xmm5, 8 | ||
213 : | packuswb xmm5, xmm5 | ||
214 : | Isibaar | 1795 | movq [_EAX], xmm5 |
215 : | Isibaar | 1794 | |
216 : | ret | ||
217 : | ENDFUNC | ||
218 : | |||
219 : | Skal | 1709 | ;////////////////////////////////////////////////////////////////////// |
220 : | Isibaar | 1790 | |
221 : | %ifidn __OUTPUT_FORMAT__,elf | ||
222 : | section ".note.GNU-stack" noalloc noexec nowrite progbits | ||
223 : | %endif |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |