Annotation of /branches/dev-api-4/xvidcore/src/image/x86_asm/interpolate8x8_mmx.asm

Revision 651 - (view) (download)
Original Path: trunk/xvidcore/src/image/x86_asm/interpolate8x8_mmx.asm

1 :	chl	434	;/*****************************************************************************
2 :	Isibaar	262	; *
3 :	chl	434	; * XVID MPEG-4 VIDEO CODEC
4 :			; * mmx 8x8 block-based halfpel interpolation
5 :	Isibaar	262	; *
6 :	chl	434	; * Copyright(C) 2002 Peter Ross <pross@xvid.org>
7 :	Isibaar	262	; *
8 :	edgomez	651	; * This file is part of XviD, a free MPEG-4 video encoder/decoder
9 :	Isibaar	262	; *
10 :	edgomez	651	; * XviD is free software; you can redistribute it and/or modify it
11 :			; * under the terms of the GNU General Public License as published by
12 :	chl	434	; * the Free Software Foundation; either version 2 of the License, or
13 :			; * (at your option) any later version.
14 :	Isibaar	262	; *
15 :	chl	434	; * This program is distributed in the hope that it will be useful,
16 :			; * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 :			; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 :			; * GNU General Public License for more details.
19 :	Isibaar	262	; *
20 :	chl	434	; * You should have received a copy of the GNU General Public License
21 :			; * along with this program; if not, write to the Free Software
22 :			; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 :	Isibaar	262	; *
24 :	edgomez	651	; * Under section 8 of the GNU General Public License, the copyright
25 :			; * holders of XVID explicitly forbid distribution in the following
26 :			; * countries:
27 :			; *
28 :			; * - Japan
29 :			; * - United States of America
30 :			; *
31 :			; * Linking XviD statically or dynamically with other modules is making a
32 :			; * combined work based on XviD. Thus, the terms and conditions of the
33 :			; * GNU General Public License cover the whole combination.
34 :			; *
35 :			; * As a special exception, the copyright holders of XviD give you
36 :			; * permission to link XviD with independent modules that communicate with
37 :			; * XviD solely through the VFW1.1 and DShow interfaces, regardless of the
38 :			; * license terms of these independent modules, and to copy and distribute
39 :			; * the resulting combined work under terms of your choice, provided that
40 :			; * every copy of the combined work is accompanied by a complete copy of
41 :			; * the source code of XviD (the version of XviD used to produce the
42 :			; * combined work), being distributed under the terms of the GNU General
43 :			; * Public License plus this exception. An independent module is a module
44 :			; * which is not derived from or based on XviD.
45 :			; *
46 :			; * Note that people who make modified versions of XviD are not obligated
47 :			; * to grant this special exception for their modified versions; it is
48 :			; * their choice whether to do so. The GNU General Public License gives
49 :			; * permission to release a modified version without this exception; this
50 :			; * exception also makes it possible to release a modified version which
51 :			; * carries forward this exception.
52 :			; *
53 :			; * $Id: interpolate8x8_mmx.asm,v 1.11 2002-11-17 00:20:30 edgomez Exp $
54 :			; *
55 :	chl	434	; ****************************************************************************/
56 :	Isibaar	262
57 :			bits 32
58 :
59 :			%macro cglobal 1
60 :			%ifdef PREFIX
61 :			global _%1
62 :			%define %1 _%1
63 :			%else
64 :			global %1
65 :			%endif
66 :			%endmacro
67 :
68 :			section .data
69 :
70 :			align 16
71 :
72 :			;===========================================================================
73 :			; (1 - r) rounding table
74 :			;===========================================================================
75 :
76 :			rounding1_mmx
77 :			times 4 dw 1
78 :			times 4 dw 0
79 :
80 :			;===========================================================================
81 :			; (2 - r) rounding table
82 :			;===========================================================================
83 :
84 :			rounding2_mmx
85 :			times 4 dw 2
86 :			times 4 dw 1
87 :
88 :			mmx_one
89 :			times 8 db 1
90 :
91 :			section .text
92 :
93 :			%macro CALC_AVG 6
94 :			punpcklbw %3, %6
95 :			punpckhbw %4, %6
96 :
97 :			paddusw %1, %3 ; mm01 += mm23
98 :			paddusw %2, %4
99 :			paddusw %1, %5 ; mm01 += rounding
100 :			paddusw %2, %5
101 :
102 :			psrlw %1, 1 ; mm01 >>= 1
103 :			psrlw %2, 1
104 :
105 :			%endmacro
106 :
107 :
108 :			;===========================================================================
109 :			;
110 :			; void interpolate8x8_halfpel_h_mmx(uint8_t * const dst,
111 :			; const uint8_t * const src,
112 :			; const uint32_t stride,
113 :			; const uint32_t rounding);
114 :			;
115 :			;===========================================================================
116 :
117 :			%macro COPY_H_MMX 0
118 :			movq mm0, [esi]
119 :			movq mm2, [esi + 1]
120 :			movq mm1, mm0
121 :			movq mm3, mm2
122 :
123 :			punpcklbw mm0, mm6 ; mm01 = [src]
124 :			punpckhbw mm1, mm6 ; mm23 = [src + 1]
125 :
126 :			CALC_AVG mm0, mm1, mm2, mm3, mm7, mm6
127 :
128 :			packuswb mm0, mm1
129 :			movq [edi], mm0 ; [dst] = mm01
130 :
131 :			add esi, edx ; src += stride
132 :			add edi, edx ; dst += stride
133 :			%endmacro
134 :
135 :			align 16
136 :			cglobal interpolate8x8_halfpel_h_mmx
137 :			interpolate8x8_halfpel_h_mmx
138 :
139 :			push esi
140 :			push edi
141 :
142 :			mov eax, [esp + 8 + 16] ; rounding
143 :
144 :			interpolate8x8_halfpel_h_mmx.start
145 :			movq mm7, [rounding1_mmx + eax * 8]
146 :
147 :			mov edi, [esp + 8 + 4] ; dst
148 :			mov esi, [esp + 8 + 8] ; src
149 :			mov edx, [esp + 8 + 12] ; stride
150 :
151 :			pxor mm6, mm6 ; zero
152 :
153 :			COPY_H_MMX
154 :			COPY_H_MMX
155 :			COPY_H_MMX
156 :			COPY_H_MMX
157 :			COPY_H_MMX
158 :			COPY_H_MMX
159 :			COPY_H_MMX
160 :			COPY_H_MMX
161 :
162 :			pop edi
163 :			pop esi
164 :
165 :			ret
166 :
167 :
168 :			;===========================================================================
169 :			;
170 :			; void interpolate8x8_halfpel_v_mmx(uint8_t * const dst,
171 :			; const uint8_t * const src,
172 :			; const uint32_t stride,
173 :			; const uint32_t rounding);
174 :			;
175 :			;===========================================================================
176 :
177 :			%macro COPY_V_MMX 0
178 :			movq mm0, [esi]
179 :			movq mm2, [esi + edx]
180 :			movq mm1, mm0
181 :			movq mm3, mm2
182 :
183 :			punpcklbw mm0, mm6 ; mm01 = [src]
184 :			punpckhbw mm1, mm6 ; mm23 = [src + 1]
185 :
186 :			CALC_AVG mm0, mm1, mm2, mm3, mm7, mm6
187 :
188 :			packuswb mm0, mm1
189 :			movq [edi], mm0 ; [dst] = mm01
190 :
191 :			add esi, edx ; src += stride
192 :			add edi, edx ; dst += stride
193 :			%endmacro
194 :
195 :			align 16
196 :			cglobal interpolate8x8_halfpel_v_mmx
197 :			interpolate8x8_halfpel_v_mmx
198 :
199 :			push esi
200 :			push edi
201 :
202 :			mov eax, [esp + 8 + 16] ; rounding
203 :
204 :			interpolate8x8_halfpel_v_mmx.start
205 :			movq mm7, [rounding1_mmx + eax * 8]
206 :
207 :			mov edi, [esp + 8 + 4] ; dst
208 :			mov esi, [esp + 8 + 8] ; src
209 :			mov edx, [esp + 8 + 12] ; stride
210 :
211 :			pxor mm6, mm6 ; zero
212 :
213 :
214 :			COPY_V_MMX
215 :			COPY_V_MMX
216 :			COPY_V_MMX
217 :			COPY_V_MMX
218 :			COPY_V_MMX
219 :			COPY_V_MMX
220 :			COPY_V_MMX
221 :			COPY_V_MMX
222 :
223 :			pop edi
224 :			pop esi
225 :
226 :			ret
227 :
228 :
229 :			;===========================================================================
230 :			;
231 :			; void interpolate8x8_halfpel_hv_mmx(uint8_t * const dst,
232 :			; const uint8_t * const src,
233 :			; const uint32_t stride,
234 :			; const uint32_t rounding);
235 :			;
236 :			;
237 :			;===========================================================================
238 :
239 :			%macro COPY_HV_MMX 0
240 :			; current row
241 :
242 :			movq mm0, [esi]
243 :			movq mm2, [esi + 1]
244 :
245 :			movq mm1, mm0
246 :			movq mm3, mm2
247 :
248 :			punpcklbw mm0, mm6 ; mm01 = [src]
249 :			punpcklbw mm2, mm6 ; mm23 = [src + 1]
250 :			punpckhbw mm1, mm6
251 :			punpckhbw mm3, mm6
252 :
253 :			paddusw mm0, mm2 ; mm01 += mm23
254 :			paddusw mm1, mm3
255 :
256 :			; next row
257 :
258 :			movq mm4, [esi + edx]
259 :			movq mm2, [esi + edx + 1]
260 :
261 :			movq mm5, mm4
262 :			movq mm3, mm2
263 :
264 :			punpcklbw mm4, mm6 ; mm45 = [src + stride]
265 :			punpcklbw mm2, mm6 ; mm23 = [src + stride + 1]
266 :			punpckhbw mm5, mm6
267 :			punpckhbw mm3, mm6
268 :
269 :			paddusw mm4, mm2 ; mm45 += mm23
270 :			paddusw mm5, mm3
271 :
272 :			; add current + next row
273 :
274 :			paddusw mm0, mm4 ; mm01 += mm45
275 :			paddusw mm1, mm5
276 :			paddusw mm0, mm7 ; mm01 += rounding2
277 :			paddusw mm1, mm7
278 :
279 :			psrlw mm0, 2 ; mm01 >>= 2
280 :			psrlw mm1, 2
281 :
282 :			packuswb mm0, mm1
283 :			movq [edi], mm0 ; [dst] = mm01
284 :
285 :			add esi, edx ; src += stride
286 :			add edi, edx ; dst += stride
287 :			%endmacro
288 :
289 :			align 16
290 :			cglobal interpolate8x8_halfpel_hv_mmx
291 :			interpolate8x8_halfpel_hv_mmx
292 :
293 :			push esi
294 :			push edi
295 :
296 :			mov eax, [esp + 8 + 16] ; rounding
297 :			interpolate8x8_halfpel_hv_mmx.start
298 :
299 :			movq mm7, [rounding2_mmx + eax * 8]
300 :
301 :			mov edi, [esp + 8 + 4] ; dst
302 :			mov esi, [esp + 8 + 8] ; src
303 :
304 :			mov eax, 8
305 :
306 :			pxor mm6, mm6 ; zero
307 :
308 :			mov edx, [esp + 8 + 12] ; stride
309 :
310 :			COPY_HV_MMX
311 :			COPY_HV_MMX
312 :			COPY_HV_MMX
313 :			COPY_HV_MMX
314 :			COPY_HV_MMX
315 :			COPY_HV_MMX
316 :			COPY_HV_MMX
317 :			COPY_HV_MMX
318 :
319 :			pop edi
320 :			pop esi
321 :
322 :	chl	434	ret

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4