Annotation of /trunk/xvidcore/src/image/x86_asm/interpolate8x8_mmx.asm

Revision 434 - (view) (download)

1 :	chl	434	;/*****************************************************************************
2 :	Isibaar	262	; *
3 :	chl	434	; * XVID MPEG-4 VIDEO CODEC
4 :			; * mmx 8x8 block-based halfpel interpolation
5 :	Isibaar	262	; *
6 :	chl	434	; * Copyright(C) 2002 Peter Ross <pross@xvid.org>
7 :			; * Copyright(C) 2002 Michael Militzer <michael@xvid.org>
8 :	Isibaar	262	; *
9 :	chl	434	; * This program is an implementation of a part of one or more MPEG-4
10 :			; * Video tools as specified in ISO/IEC 14496-2 standard. Those intending
11 :			; * to use this software module in hardware or software products are
12 :			; * advised that its use may infringe existing patents or copyrights, and
13 :			; * any such use would be at such party's own risk. The original
14 :			; * developer of this software module and his/her company, and subsequent
15 :			; * editors and their companies, will have no liability for use of this
16 :			; * software or modifications or derivatives thereof.
17 :	Isibaar	262	; *
18 :	chl	434	; * This program is free software; you can redistribute it and/or modify
19 :			; * it under the terms of the GNU General Public License as published by
20 :			; * the Free Software Foundation; either version 2 of the License, or
21 :			; * (at your option) any later version.
22 :	Isibaar	262	; *
23 :	chl	434	; * This program is distributed in the hope that it will be useful,
24 :			; * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 :			; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 :			; * GNU General Public License for more details.
27 :	Isibaar	262	; *
28 :	chl	434	; * You should have received a copy of the GNU General Public License
29 :			; * along with this program; if not, write to the Free Software
30 :			; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
31 :	Isibaar	262	; *
32 :	chl	434	; ****************************************************************************/
33 :	Isibaar	262
34 :			bits 32
35 :
36 :			%macro cglobal 1
37 :			%ifdef PREFIX
38 :			global _%1
39 :			%define %1 _%1
40 :			%else
41 :			global %1
42 :			%endif
43 :			%endmacro
44 :
45 :			section .data
46 :
47 :			align 16
48 :
49 :			;===========================================================================
50 :			; (1 - r) rounding table
51 :			;===========================================================================
52 :
53 :			rounding1_mmx
54 :			times 4 dw 1
55 :			times 4 dw 0
56 :
57 :			;===========================================================================
58 :			; (2 - r) rounding table
59 :			;===========================================================================
60 :
61 :			rounding2_mmx
62 :			times 4 dw 2
63 :			times 4 dw 1
64 :
65 :			mmx_one
66 :			times 8 db 1
67 :
68 :			section .text
69 :
70 :			%macro CALC_AVG 6
71 :			punpcklbw %3, %6
72 :			punpckhbw %4, %6
73 :
74 :			paddusw %1, %3 ; mm01 += mm23
75 :			paddusw %2, %4
76 :			paddusw %1, %5 ; mm01 += rounding
77 :			paddusw %2, %5
78 :
79 :			psrlw %1, 1 ; mm01 >>= 1
80 :			psrlw %2, 1
81 :
82 :			%endmacro
83 :
84 :
85 :			;===========================================================================
86 :			;
87 :			; void interpolate8x8_halfpel_h_mmx(uint8_t * const dst,
88 :			; const uint8_t * const src,
89 :			; const uint32_t stride,
90 :			; const uint32_t rounding);
91 :			;
92 :			;===========================================================================
93 :
94 :			%macro COPY_H_MMX 0
95 :			movq mm0, [esi]
96 :			movq mm2, [esi + 1]
97 :			movq mm1, mm0
98 :			movq mm3, mm2
99 :
100 :			punpcklbw mm0, mm6 ; mm01 = [src]
101 :			punpckhbw mm1, mm6 ; mm23 = [src + 1]
102 :
103 :			CALC_AVG mm0, mm1, mm2, mm3, mm7, mm6
104 :
105 :			packuswb mm0, mm1
106 :			movq [edi], mm0 ; [dst] = mm01
107 :
108 :			add esi, edx ; src += stride
109 :			add edi, edx ; dst += stride
110 :			%endmacro
111 :
112 :			align 16
113 :			cglobal interpolate8x8_halfpel_h_mmx
114 :			interpolate8x8_halfpel_h_mmx
115 :
116 :			push esi
117 :			push edi
118 :
119 :			mov eax, [esp + 8 + 16] ; rounding
120 :
121 :			interpolate8x8_halfpel_h_mmx.start
122 :			movq mm7, [rounding1_mmx + eax * 8]
123 :
124 :			mov edi, [esp + 8 + 4] ; dst
125 :			mov esi, [esp + 8 + 8] ; src
126 :			mov edx, [esp + 8 + 12] ; stride
127 :
128 :			pxor mm6, mm6 ; zero
129 :
130 :			COPY_H_MMX
131 :			COPY_H_MMX
132 :			COPY_H_MMX
133 :			COPY_H_MMX
134 :			COPY_H_MMX
135 :			COPY_H_MMX
136 :			COPY_H_MMX
137 :			COPY_H_MMX
138 :
139 :			pop edi
140 :			pop esi
141 :
142 :			ret
143 :
144 :
145 :			;===========================================================================
146 :			;
147 :			; void interpolate8x8_halfpel_v_mmx(uint8_t * const dst,
148 :			; const uint8_t * const src,
149 :			; const uint32_t stride,
150 :			; const uint32_t rounding);
151 :			;
152 :			;===========================================================================
153 :
154 :			%macro COPY_V_MMX 0
155 :			movq mm0, [esi]
156 :			movq mm2, [esi + edx]
157 :			movq mm1, mm0
158 :			movq mm3, mm2
159 :
160 :			punpcklbw mm0, mm6 ; mm01 = [src]
161 :			punpckhbw mm1, mm6 ; mm23 = [src + 1]
162 :
163 :			CALC_AVG mm0, mm1, mm2, mm3, mm7, mm6
164 :
165 :			packuswb mm0, mm1
166 :			movq [edi], mm0 ; [dst] = mm01
167 :
168 :			add esi, edx ; src += stride
169 :			add edi, edx ; dst += stride
170 :			%endmacro
171 :
172 :			align 16
173 :			cglobal interpolate8x8_halfpel_v_mmx
174 :			interpolate8x8_halfpel_v_mmx
175 :
176 :			push esi
177 :			push edi
178 :
179 :			mov eax, [esp + 8 + 16] ; rounding
180 :
181 :			interpolate8x8_halfpel_v_mmx.start
182 :			movq mm7, [rounding1_mmx + eax * 8]
183 :
184 :			mov edi, [esp + 8 + 4] ; dst
185 :			mov esi, [esp + 8 + 8] ; src
186 :			mov edx, [esp + 8 + 12] ; stride
187 :
188 :			pxor mm6, mm6 ; zero
189 :
190 :
191 :			COPY_V_MMX
192 :			COPY_V_MMX
193 :			COPY_V_MMX
194 :			COPY_V_MMX
195 :			COPY_V_MMX
196 :			COPY_V_MMX
197 :			COPY_V_MMX
198 :			COPY_V_MMX
199 :
200 :			pop edi
201 :			pop esi
202 :
203 :			ret
204 :
205 :
206 :			;===========================================================================
207 :			;
208 :			; void interpolate8x8_halfpel_hv_mmx(uint8_t * const dst,
209 :			; const uint8_t * const src,
210 :			; const uint32_t stride,
211 :			; const uint32_t rounding);
212 :			;
213 :			;
214 :			;===========================================================================
215 :
216 :			%macro COPY_HV_MMX 0
217 :			; current row
218 :
219 :			movq mm0, [esi]
220 :			movq mm2, [esi + 1]
221 :
222 :			movq mm1, mm0
223 :			movq mm3, mm2
224 :
225 :			punpcklbw mm0, mm6 ; mm01 = [src]
226 :			punpcklbw mm2, mm6 ; mm23 = [src + 1]
227 :			punpckhbw mm1, mm6
228 :			punpckhbw mm3, mm6
229 :
230 :			paddusw mm0, mm2 ; mm01 += mm23
231 :			paddusw mm1, mm3
232 :
233 :			; next row
234 :
235 :			movq mm4, [esi + edx]
236 :			movq mm2, [esi + edx + 1]
237 :
238 :			movq mm5, mm4
239 :			movq mm3, mm2
240 :
241 :			punpcklbw mm4, mm6 ; mm45 = [src + stride]
242 :			punpcklbw mm2, mm6 ; mm23 = [src + stride + 1]
243 :			punpckhbw mm5, mm6
244 :			punpckhbw mm3, mm6
245 :
246 :			paddusw mm4, mm2 ; mm45 += mm23
247 :			paddusw mm5, mm3
248 :
249 :			; add current + next row
250 :
251 :			paddusw mm0, mm4 ; mm01 += mm45
252 :			paddusw mm1, mm5
253 :			paddusw mm0, mm7 ; mm01 += rounding2
254 :			paddusw mm1, mm7
255 :
256 :			psrlw mm0, 2 ; mm01 >>= 2
257 :			psrlw mm1, 2
258 :
259 :			packuswb mm0, mm1
260 :			movq [edi], mm0 ; [dst] = mm01
261 :
262 :			add esi, edx ; src += stride
263 :			add edi, edx ; dst += stride
264 :			%endmacro
265 :
266 :			align 16
267 :			cglobal interpolate8x8_halfpel_hv_mmx
268 :			interpolate8x8_halfpel_hv_mmx
269 :
270 :			push esi
271 :			push edi
272 :
273 :			mov eax, [esp + 8 + 16] ; rounding
274 :			interpolate8x8_halfpel_hv_mmx.start
275 :
276 :			movq mm7, [rounding2_mmx + eax * 8]
277 :
278 :			mov edi, [esp + 8 + 4] ; dst
279 :			mov esi, [esp + 8 + 8] ; src
280 :
281 :			mov eax, 8
282 :
283 :			pxor mm6, mm6 ; zero
284 :
285 :			mov edx, [esp + 8 + 12] ; stride
286 :
287 :			COPY_HV_MMX
288 :			COPY_HV_MMX
289 :			COPY_HV_MMX
290 :			COPY_HV_MMX
291 :			COPY_HV_MMX
292 :			COPY_HV_MMX
293 :			COPY_HV_MMX
294 :			COPY_HV_MMX
295 :
296 :			pop edi
297 :			pop esi
298 :
299 :	chl	434	ret

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4