Annotation of /trunk/xvidcore/src/motion/gmc.c

Revision 1756 - (view) (download)

1 :	edgomez	1382	/*****************************************************************************
2 :			*
3 :			* XVID MPEG-4 VIDEO CODEC
4 :			* - GMC interpolation module -
5 :			*
6 :			* Copyright(C) 2002-2003 Pascal Massimino <skal@planet-d.net>
7 :			*
8 :			* This program is free software ; you can redistribute it and/or modify
9 :			* it under the terms of the GNU General Public License as published by
10 :			* the Free Software Foundation ; either version 2 of the License, or
11 :			* (at your option) any later version.
12 :			*
13 :			* This program is distributed in the hope that it will be useful,
14 :			* but WITHOUT ANY WARRANTY ; without even the implied warranty of
15 :			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 :			* GNU General Public License for more details.
17 :			*
18 :			* You should have received a copy of the GNU General Public License
19 :			* along with this program ; if not, write to the Free Software
20 :			* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 :			*
22 :	Skal	1756	* $Id: gmc.c,v 1.7 2006-11-07 19:59:03 Skal Exp $
23 :	edgomez	1382	*
24 :			****************************************************************************/
25 :
26 :			#include "../portab.h"
27 :			#include "../global.h"
28 :			#include "../encoder.h"
29 :			#include "gmc.h"
30 :	Skal	1756	#include "../utils/emms.h"
31 :	edgomez	1382
32 :			#include <stdio.h>
33 :
34 :	Skal	1709	/* initialized by init_GMC(), for 3points */
35 :			static
36 :			void (Predict_16x16_func)(const NEW_GMC_DATA const This,
37 :			uint8_t dst, const uint8_t src,
38 :			int dststride, int srcstride, int x, int y, int rounding) = 0;
39 :			static
40 :			void (Predict_8x8_func)(const NEW_GMC_DATA const This,
41 :			uint8_t uDst, const uint8_t uSrc,
42 :			uint8_t vDst, const uint8_t vSrc,
43 :			int dststride, int srcstride, int x, int y, int rounding) = 0;
44 :
45 :			/****************************************************************************/
46 :			/* this is borrowed from bitstream.c until we find a common solution */
47 :			static uint32_t __inline
48 :			log2bin(uint32_t value)
49 :			{
50 :			/* Changed by Chenm001 */
51 :			#if !defined(_MSC_VER)
52 :			int n = 0;
53 :
54 :			while (value) {
55 :			value >>= 1;
56 :			n++;
57 :			}
58 :			return n;
59 :			#else
60 :			__asm {
61 :			bsr eax, value
62 :			inc eax
63 :			}
64 :			#endif
65 :			}
66 :
67 :			/* 16sizeof(int) -> 1 or 2 cachelines /
68 :			/* table lookup might be faster! (still to be benchmarked) */
69 :
70 :			/*
71 :			static int log2bin_table[16] =
72 :			{ 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4};
73 :			*/
74 :			/* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 */
75 :
76 :			#define RDIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b))
77 :			#define RSHIFT(a,b) ( (a)>0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b))
78 :
79 :			#define MLT(i) (((16-(i))<<16) + (i))
80 :			static const uint32_t MTab[16] = {
81 :			MLT( 0), MLT( 1), MLT( 2), MLT( 3), MLT( 4), MLT( 5), MLT( 6), MLT( 7),
82 :			MLT( 8), MLT( 9), MLT(10), MLT(11), MLT(12), MLT(13), MLT(14), MLT(15)
83 :			};
84 :			#undef MLT
85 :
86 :	edgomez	1382	/* ************************************************************
87 :			* Pts = 2 or 3
88 :			*
89 :			* Warning! *src is the global frame pointer (that is: adress
90 :			* of pixel 0,0), not the macroblock one.
91 :			* Conversely, *dst is the macroblock top-left adress.
92 :			*/
93 :
94 :	Skal	1709	static
95 :	edgomez	1382	void Predict_16x16_C(const NEW_GMC_DATA * const This,
96 :	Skal	1709	uint8_t dst, const uint8_t src,
97 :			int dststride, int srcstride, int x, int y, int rounding)
98 :	edgomez	1382	{
99 :			const int W = This->sW;
100 :			const int H = This->sH;
101 :			const int rho = 3 - This->accuracy;
102 :			const int Rounder = ( (1<<7) - (rounding<<(2*rho)) ) << 16;
103 :
104 :			const int dUx = This->dU[0];
105 :			const int dVx = This->dV[0];
106 :			const int dUy = This->dU[1];
107 :			const int dVy = This->dV[1];
108 :
109 :			int Uo = This->Uo + 16(dUyy + dUx*x);
110 :			int Vo = This->Vo + 16(dVyy + dVx*x);
111 :
112 :			int i, j;
113 :
114 :			dst += 16;
115 :			for (j=16; j>0; --j) {
116 :			int U = Uo, V = Vo;
117 :			Uo += dUy; Vo += dVy;
118 :			for (i=-16; i<0; ++i) {
119 :			unsigned int f0, f1, ri = 16, rj = 16;
120 :			int Offset;
121 :			int u = ( U >> 16 ) << rho;
122 :			int v = ( V >> 16 ) << rho;
123 :
124 :			U += dUx; V += dVx;
125 :
126 :			if (u > 0 && u <= W) { ri = MTab[u&15]; Offset = u>>4; }
127 :			else {
128 :			if (u > W) Offset = W>>4;
129 :			else Offset = 0;
130 :			ri = MTab[0];
131 :			}
132 :
133 :			if (v > 0 && v <= H) { rj = MTab[v&15]; Offset += (v>>4)*srcstride; }
134 :			else {
135 :			if (v > H) Offset += (H>>4)*srcstride;
136 :			rj = MTab[0];
137 :			}
138 :
139 :			f0 = src[Offset + 0];
140 :			f0 \|= src[Offset + 1] << 16;
141 :			f1 = src[Offset + srcstride + 0];
142 :			f1 \|= src[Offset + srcstride + 1] << 16;
143 :			f0 = (ri*f0)>>16;
144 :			f1 = (ri*f1) & 0x0fff0000;
145 :			f0 \|= f1;
146 :			f0 = (rj*f0 + Rounder) >> 24;
147 :
148 :			dst[i] = (uint8_t)f0;
149 :			}
150 :			dst += dststride;
151 :			}
152 :			}
153 :
154 :	Skal	1709	static
155 :	edgomez	1382	void Predict_8x8_C(const NEW_GMC_DATA * const This,
156 :	Skal	1709	uint8_t uDst, const uint8_t uSrc,
157 :			uint8_t vDst, const uint8_t vSrc,
158 :			int dststride, int srcstride, int x, int y, int rounding)
159 :	edgomez	1382	{
160 :			const int W = This->sW >> 1;
161 :			const int H = This->sH >> 1;
162 :			const int rho = 3-This->accuracy;
163 :			const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16;
164 :
165 :			const int32_t dUx = This->dU[0];
166 :			const int32_t dVx = This->dV[0];
167 :			const int32_t dUy = This->dU[1];
168 :			const int32_t dVy = This->dV[1];
169 :
170 :			int32_t Uo = This->Uco + 8(dUyy + dUx*x);
171 :			int32_t Vo = This->Vco + 8(dVyy + dVx*x);
172 :
173 :			int i, j;
174 :
175 :			uDst += 8;
176 :			vDst += 8;
177 :			for (j=8; j>0; --j) {
178 :			int32_t U = Uo, V = Vo;
179 :			Uo += dUy; Vo += dVy;
180 :
181 :			for (i=-8; i<0; ++i) {
182 :			int Offset;
183 :			uint32_t f0, f1, ri, rj;
184 :			int32_t u, v;
185 :
186 :			u = ( U >> 16 ) << rho;
187 :			v = ( V >> 16 ) << rho;
188 :			U += dUx; V += dVx;
189 :
190 :			if (u > 0 && u <= W) {
191 :			ri = MTab[u&15];
192 :			Offset = u>>4;
193 :			} else {
194 :			if (u>W) Offset = W>>4;
195 :			else Offset = 0;
196 :			ri = MTab[0];
197 :			}
198 :
199 :			if (v > 0 && v <= H) {
200 :			rj = MTab[v&15];
201 :			Offset += (v>>4)*srcstride;
202 :			} else {
203 :			if (v>H) Offset += (H>>4)*srcstride;
204 :			rj = MTab[0];
205 :			}
206 :
207 :			f0 = uSrc[Offset + 0];
208 :			f0 \|= uSrc[Offset + 1] << 16;
209 :			f1 = uSrc[Offset + srcstride + 0];
210 :			f1 \|= uSrc[Offset + srcstride + 1] << 16;
211 :			f0 = (ri*f0)>>16;
212 :			f1 = (ri*f1) & 0x0fff0000;
213 :			f0 \|= f1;
214 :			f0 = (rj*f0 + Rounder) >> 24;
215 :
216 :			uDst[i] = (uint8_t)f0;
217 :
218 :			f0 = vSrc[Offset + 0];
219 :			f0 \|= vSrc[Offset + 1] << 16;
220 :			f1 = vSrc[Offset + srcstride + 0];
221 :			f1 \|= vSrc[Offset + srcstride + 1] << 16;
222 :			f0 = (ri*f0)>>16;
223 :			f1 = (ri*f1) & 0x0fff0000;
224 :			f0 \|= f1;
225 :			f0 = (rj*f0 + Rounder) >> 24;
226 :
227 :			vDst[i] = (uint8_t)f0;
228 :			}
229 :			uDst += dststride;
230 :			vDst += dststride;
231 :			}
232 :			}
233 :
234 :	Skal	1709	static
235 :	edgomez	1382	void get_average_mv_C(const NEW_GMC_DATA * const Dsp, VECTOR * const mv,
236 :	Skal	1709	int x, int y, int qpel)
237 :	edgomez	1382	{
238 :			int i, j;
239 :			int vx = 0, vy = 0;
240 :			int32_t uo = Dsp->Uo + 16(Dsp->dU[1]y + Dsp->dU[0]*x);
241 :			int32_t vo = Dsp->Vo + 16(Dsp->dV[1]y + Dsp->dV[0]*x);
242 :			for (j=16; j>0; --j)
243 :			{
244 :			int32_t U, V;
245 :			U = uo; uo += Dsp->dU[1];
246 :			V = vo; vo += Dsp->dV[1];
247 :			for (i=16; i>0; --i)
248 :			{
249 :			int32_t u,v;
250 :			u = U >> 16; U += Dsp->dU[0]; vx += u;
251 :			v = V >> 16; V += Dsp->dV[0]; vy += v;
252 :			}
253 :			}
254 :			vx -= (256x+120) << (5+Dsp->accuracy); / 120 = 1516/2 /
255 :			vy -= (256*y+120) << (5+Dsp->accuracy);
256 :
257 :			mv->x = RSHIFT( vx, 8+Dsp->accuracy - qpel );
258 :			mv->y = RSHIFT( vy, 8+Dsp->accuracy - qpel );
259 :			}
260 :
261 :			/* ************************************************************
262 :			* simplified version for 1 warp point
263 :			*/
264 :
265 :	Skal	1709	static
266 :	edgomez	1382	void Predict_1pt_16x16_C(const NEW_GMC_DATA * const This,
267 :	Skal	1709	uint8_t Dst, const uint8_t Src,
268 :			int dststride, int srcstride, int x, int y, int rounding)
269 :	edgomez	1382	{
270 :			const int W = This->sW;
271 :			const int H = This->sH;
272 :			const int rho = 3-This->accuracy;
273 :			const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16;
274 :
275 :
276 :			int32_t uo = This->Uo + (x<<8); /* ((16x)<<4) /
277 :			int32_t vo = This->Vo + (y<<8);
278 :			uint32_t ri = MTab[uo & 15];
279 :			uint32_t rj = MTab[vo & 15];
280 :			int i, j;
281 :
282 :			int32_t Offset;
283 :	Skal	1708	if (vo>=(-16<<4) && vo<=H) Offset = (vo>>4)*srcstride;
284 :	edgomez	1382	else {
285 :			if (vo>H) Offset = ( H>>4)*srcstride;
286 :			else Offset =-16*srcstride;
287 :			rj = MTab[0];
288 :			}
289 :	Skal	1708	if (uo>=(-16<<4) && uo<=W) Offset += (uo>>4);
290 :	edgomez	1382	else {
291 :			if (uo>W) Offset += (W>>4);
292 :			else Offset -= 16;
293 :			ri = MTab[0];
294 :			}
295 :
296 :			Dst += 16;
297 :
298 :			for(j=16; j>0; --j, Offset+=srcstride-16)
299 :			{
300 :			for(i=-16; i<0; ++i, ++Offset)
301 :			{
302 :			uint32_t f0, f1;
303 :			f0 = Src[ Offset +0 ];
304 :			f0 \|= Src[ Offset +1 ] << 16;
305 :			f1 = Src[ Offset+srcstride +0 ];
306 :			f1 \|= Src[ Offset+srcstride +1 ] << 16;
307 :			f0 = (ri*f0)>>16;
308 :			f1 = (ri*f1) & 0x0fff0000;
309 :			f0 \|= f1;
310 :			f0 = ( rj*f0 + Rounder ) >> 24;
311 :			Dst[i] = (uint8_t)f0;
312 :			}
313 :			Dst += dststride;
314 :			}
315 :			}
316 :
317 :	Skal	1709	static
318 :	edgomez	1382	void Predict_1pt_8x8_C(const NEW_GMC_DATA * const This,
319 :	Skal	1709	uint8_t uDst, const uint8_t uSrc,
320 :			uint8_t vDst, const uint8_t vSrc,
321 :			int dststride, int srcstride, int x, int y, int rounding)
322 :	edgomez	1382	{
323 :			const int W = This->sW >> 1;
324 :			const int H = This->sH >> 1;
325 :			const int rho = 3-This->accuracy;
326 :			const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16;
327 :
328 :			int32_t uo = This->Uco + (x<<7);
329 :			int32_t vo = This->Vco + (y<<7);
330 :			uint32_t rri = MTab[uo & 15];
331 :			uint32_t rrj = MTab[vo & 15];
332 :			int i, j;
333 :
334 :			int32_t Offset;
335 :	Skal	1708	if (vo>=(-8<<4) && vo<=H) Offset = (vo>>4)*srcstride;
336 :	edgomez	1382	else {
337 :			if (vo>H) Offset = ( H>>4)*srcstride;
338 :			else Offset =-8*srcstride;
339 :			rrj = MTab[0];
340 :			}
341 :	Skal	1708	if (uo>=(-8<<4) && uo<=W) Offset += (uo>>4);
342 :	edgomez	1382	else {
343 :			if (uo>W) Offset += ( W>>4);
344 :			else Offset -= 8;
345 :			rri = MTab[0];
346 :			}
347 :
348 :			uDst += 8;
349 :			vDst += 8;
350 :			for(j=8; j>0; --j, Offset+=srcstride-8)
351 :			{
352 :			for(i=-8; i<0; ++i, Offset++)
353 :			{
354 :			uint32_t f0, f1;
355 :			f0 = uSrc[ Offset + 0 ];
356 :			f0 \|= uSrc[ Offset + 1 ] << 16;
357 :			f1 = uSrc[ Offset + srcstride + 0 ];
358 :			f1 \|= uSrc[ Offset + srcstride + 1 ] << 16;
359 :			f0 = (rri*f0)>>16;
360 :			f1 = (rri*f1) & 0x0fff0000;
361 :			f0 \|= f1;
362 :			f0 = ( rrj*f0 + Rounder ) >> 24;
363 :			uDst[i] = (uint8_t)f0;
364 :
365 :			f0 = vSrc[ Offset + 0 ];
366 :			f0 \|= vSrc[ Offset + 1 ] << 16;
367 :			f1 = vSrc[ Offset + srcstride + 0 ];
368 :			f1 \|= vSrc[ Offset + srcstride + 1 ] << 16;
369 :			f0 = (rri*f0)>>16;
370 :			f1 = (rri*f1) & 0x0fff0000;
371 :			f0 \|= f1;
372 :			f0 = ( rrj*f0 + Rounder ) >> 24;
373 :			vDst[i] = (uint8_t)f0;
374 :			}
375 :			uDst += dststride;
376 :			vDst += dststride;
377 :			}
378 :			}
379 :
380 :	Skal	1709	static
381 :	edgomez	1382	void get_average_mv_1pt_C(const NEW_GMC_DATA * const Dsp, VECTOR * const mv,
382 :			int x, int y, int qpel)
383 :			{
384 :			mv->x = RSHIFT(Dsp->Uo<<qpel, 3);
385 :			mv->y = RSHIFT(Dsp->Vo<<qpel, 3);
386 :			}
387 :
388 :	Skal	1709	#if defined(ARCH_IS_IA32)
389 :	edgomez	1382	/* *************************************************************
390 :	Skal	1709	* MMX core function
391 :			*/
392 :
393 :			static
394 :			void (GMC_Core_Lin_8)(uint8_t Dst, const uint16_t * Offsets,
395 :			const uint8_t * const Src0, const int BpS, const int Rounder) = 0;
396 :
397 :			extern void xvid_GMC_Core_Lin_8_mmx(uint8_t Dst, const uint16_t Offsets,
398 :			const uint8_t * const Src0, const int BpS, const int Rounder);
399 :
400 :			extern void xvid_GMC_Core_Lin_8_sse2(uint8_t Dst, const uint16_t Offsets,
401 :			const uint8_t * const Src0, const int BpS, const int Rounder);
402 :
403 :			/* *************************************************************/
404 :
405 :			static void GMC_Core_Non_Lin_8(uint8_t *Dst,
406 :			const uint16_t * Offsets,
407 :			const uint8_t * const Src0, const int srcstride,
408 :			const int Rounder)
409 :			{
410 :			int i;
411 :			for(i=0; i<8; ++i)
412 :			{
413 :			uint32_t u = Offsets[i ];
414 :			uint32_t v = Offsets[i+16];
415 :			const uint32_t ri = MTab[u&0x0f];
416 :			const uint32_t rj = MTab[v&0x0f];
417 :			uint32_t f0, f1;
418 :			const uint8_t * const Src = Src0 + (u>>4) + (v>>4)*srcstride;
419 :			f0 = Src[0];
420 :			f0 \|= Src[1] << 16;
421 :			f1 = Src[srcstride +0];
422 :			f1 \|= Src[srcstride +1] << 16;
423 :			f0 = (ri*f0)>>16;
424 :			f1 = (ri*f1) & 0x0fff0000;
425 :			f0 \|= f1;
426 :			f0 = ( rj*f0 + Rounder ) >> 24;
427 :			Dst[i] = (uint8_t)f0;
428 :			}
429 :			}
430 :
431 :			//////////////////////////////////////////////////////////
432 :
433 :			static
434 :			void Predict_16x16_mmx(const NEW_GMC_DATA * const This,
435 :			uint8_t dst, const uint8_t src,
436 :			int dststride, int srcstride, int x, int y, int rounding)
437 :			{
438 :			const int W = This->sW;
439 :			const int H = This->sH;
440 :			const int rho = 3 - This->accuracy;
441 :			const int Rounder = ( 128 - (rounding<<(2*rho)) ) << 16;
442 :			const uint32_t W2 = W<<(16-rho);
443 :			const uint32_t H2 = H<<(16-rho);
444 :
445 :			const int dUx = This->dU[0];
446 :			const int dVx = This->dV[0];
447 :			const int dUy = This->dU[1];
448 :			const int dVy = This->dV[1];
449 :
450 :			int Uo = This->Uo + 16(dUyy + dUx*x);
451 :			int Vo = This->Vo + 16(dVyy + dVx*x);
452 :
453 :			int i, j;
454 :
455 :			DECLARE_ALIGNED_MATRIX(Offsets, 2,16, uint16_t, CACHE_LINE);
456 :			for(j=16; j>0; --j)
457 :			{
458 :			int32_t U = Uo, V = Vo;
459 :			Uo += dUy; Vo += dVy;
460 :			if ( W2>(uint32_t)U && W2>(uint32_t)(U+15*dUx) &&
461 :			H2>(uint32_t)V && H2>(uint32_t)(V+15*dVx) )
462 :			{
463 :	Skal	1756	uint32_t UV1, UV2;
464 :	Skal	1709	for(i=0; i<16; ++i)
465 :			{
466 :			uint32_t u = ( U >> 16 ) << rho;
467 :			uint32_t v = ( V >> 16 ) << rho;
468 :			U += dUx; V += dVx;
469 :			Offsets[ i] = u;
470 :			Offsets[16+i] = v;
471 :			}
472 :			// batch 8 input pixels when linearity says it's ok
473 :	Skal	1756
474 :			UV1 = (Offsets[0] \| (Offsets[16]<<16)) & 0xfff0fff0U;
475 :			UV2 = (Offsets[7] \| (Offsets[23]<<16)) & 0xfff0fff0U;
476 :			if (UV1+7*16==UV2)
477 :			GMC_Core_Lin_8(dst, Offsets, src + (Offsets[0]>>4) + (Offsets[16]>>4)*srcstride, srcstride, Rounder);
478 :			else
479 :			GMC_Core_Non_Lin_8(dst, Offsets, src, srcstride, Rounder);
480 :			UV1 = (Offsets[ 8] \| (Offsets[24]<<16)) & 0xfff0fff0U;
481 :			UV2 = (Offsets[15] \| (Offsets[31]<<16)) & 0xfff0fff0U;
482 :			if (UV1+7*16==UV2)
483 :			GMC_Core_Lin_8(dst+8, Offsets+8, src + (Offsets[8]>>4) + (Offsets[24]>>4)*srcstride, srcstride, Rounder);
484 :			else
485 :			GMC_Core_Non_Lin_8(dst+8, Offsets+8, src, srcstride, Rounder);
486 :			}
487 :	Skal	1709	else
488 :			{
489 :			for(i=0; i<16; ++i)
490 :			{
491 :			int u = ( U >> 16 ) << rho;
492 :			int v = ( V >> 16 ) << rho;
493 :			U += dUx; V += dVx;
494 :
495 :			Offsets[ i] = (u<0) ? 0 : (u>=W) ? W : u;
496 :			Offsets[16+i] = (v<0) ? 0 : (v>=H) ? H : v;
497 :			}
498 :			// due to boundary clipping, we cannot infer the 8-pixels batchability
499 :			// simply by using the linearity. Oh well, not a big deal...
500 :			GMC_Core_Non_Lin_8(dst, Offsets, src, srcstride, Rounder);
501 :			GMC_Core_Non_Lin_8(dst+8, Offsets+8, src, srcstride, Rounder);
502 :			}
503 :			dst += dststride;
504 :			}
505 :			}
506 :
507 :			static
508 :			void Predict_8x8_mmx(const NEW_GMC_DATA * const This,
509 :			uint8_t uDst, const uint8_t uSrc,
510 :			uint8_t vDst, const uint8_t vSrc,
511 :			int dststride, int srcstride, int x, int y, int rounding)
512 :			{
513 :			const int W = This->sW >> 1;
514 :			const int H = This->sH >> 1;
515 :			const int rho = 3-This->accuracy;
516 :			const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16;
517 :			const uint32_t W2 = W<<(16-rho);
518 :			const uint32_t H2 = H<<(16-rho);
519 :
520 :			const int dUx = This->dU[0];
521 :			const int dVx = This->dV[0];
522 :			const int dUy = This->dU[1];
523 :			const int dVy = This->dV[1];
524 :
525 :			int Uo = This->Uco + 8(dUyy + dUx*x);
526 :			int Vo = This->Vco + 8(dVyy + dVx*x);
527 :
528 :			DECLARE_ALIGNED_MATRIX(Offsets, 2,16, uint16_t, CACHE_LINE);
529 :			int i, j;
530 :			for(j=8; j>0; --j)
531 :			{
532 :			int32_t U = Uo, V = Vo;
533 :			Uo += dUy; Vo += dVy;
534 :			if ( W2>(uint32_t)U && W2>(uint32_t)(U+15*dUx) &&
535 :			H2>(uint32_t)V && H2>(uint32_t)(V+15*dVx) )
536 :			{
537 :	Skal	1756	uint32_t UV1, UV2;
538 :	Skal	1709	for(i=0; i<8; ++i)
539 :			{
540 :			int32_t u = ( U >> 16 ) << rho;
541 :			int32_t v = ( V >> 16 ) << rho;
542 :			U += dUx; V += dVx;
543 :			Offsets[ i] = u;
544 :			Offsets[16+i] = v;
545 :			}
546 :	Isibaar	1711
547 :			// batch 8 input pixels when linearity says it's ok
548 :	Skal	1756	UV1 = (Offsets[ 0] \| (Offsets[16]<<16)) & 0xfff0fff0U;
549 :			UV2 = (Offsets[ 7] \| (Offsets[23]<<16)) & 0xfff0fff0U;
550 :			if (UV1+7*16==UV2)
551 :			{
552 :			const uint32_t Off = (Offsets[0]>>4) + (Offsets[16]>>4)*srcstride;
553 :			GMC_Core_Lin_8(uDst, Offsets, uSrc+Off, srcstride, Rounder);
554 :			GMC_Core_Lin_8(vDst, Offsets, vSrc+Off, srcstride, Rounder);
555 :			}
556 :			else {
557 :			GMC_Core_Non_Lin_8(uDst, Offsets, uSrc, srcstride, Rounder);
558 :			GMC_Core_Non_Lin_8(vDst, Offsets, vSrc, srcstride, Rounder);
559 :			}
560 :	Skal	1709	}
561 :			else
562 :			{
563 :			for(i=0; i<8; ++i)
564 :			{
565 :			int u = ( U >> 16 ) << rho;
566 :			int v = ( V >> 16 ) << rho;
567 :			U += dUx; V += dVx;
568 :			Offsets[ i] = (u<0) ? 0 : (u>=W) ? W : u;
569 :			Offsets[16+i] = (v<0) ? 0 : (v>=H) ? H : v;
570 :			}
571 :			GMC_Core_Non_Lin_8(uDst, Offsets, uSrc, srcstride, Rounder);
572 :			GMC_Core_Non_Lin_8(vDst, Offsets, vSrc, srcstride, Rounder);
573 :			}
574 :			uDst += dststride;
575 :			vDst += dststride;
576 :			}
577 :			}
578 :
579 :			#endif /* ARCH_IS_IA32 */
580 :
581 :			/* *************************************************************
582 :			* will initialize internal pointers
583 :			*/
584 :
585 :			void init_GMC(const unsigned int cpu_flags)
586 :			{
587 :			Predict_16x16_func = Predict_16x16_C;
588 :			Predict_8x8_func = Predict_8x8_C;
589 :
590 :	Isibaar	1711	#if defined(ARCH_IS_IA32)
591 :	Skal	1709	if ((cpu_flags & XVID_CPU_MMX) \|\| (cpu_flags & XVID_CPU_MMXEXT) \|\|
592 :			(cpu_flags & XVID_CPU_3DNOW) \|\| (cpu_flags & XVID_CPU_3DNOWEXT) \|\|
593 :			(cpu_flags & XVID_CPU_SSE) \|\| (cpu_flags & XVID_CPU_SSE2))
594 :			{
595 :			Predict_16x16_func = Predict_16x16_mmx;
596 :			Predict_8x8_func = Predict_8x8_mmx;
597 :			if (cpu_flags & XVID_CPU_SSE2)
598 :			GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_sse2;
599 :			else
600 :			GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_mmx;
601 :			}
602 :			#endif
603 :			}
604 :
605 :			/* *************************************************************
606 :	edgomez	1382	* Warning! It's Accuracy being passed, not 'resolution'!
607 :			*/
608 :
609 :			void generate_GMCparameters( int nb_pts, const int accuracy,
610 :			const WARPPOINTS *const pts,
611 :			const int width, const int height,
612 :			NEW_GMC_DATA *const gmc)
613 :			{
614 :			gmc->sW = width << 4;
615 :			gmc->sH = height << 4;
616 :			gmc->accuracy = accuracy;
617 :			gmc->num_wp = nb_pts;
618 :
619 :			/* reduce the number of points, if possible */
620 :	edgomez	1398	if (nb_pts<2 \|\| (pts->duv[2].x==0 && pts->duv[2].y==0 && pts->duv[1].x==0 && pts->duv[1].y==0 )) {
621 :			if (nb_pts<2 \|\| (pts->duv[1].x==0 && pts->duv[1].y==0)) {
622 :			if (nb_pts<1 \|\| (pts->duv[0].x==0 && pts->duv[0].y==0)) {
623 :			nb_pts = 0;
624 :			}
625 :			else nb_pts = 1;
626 :			}
627 :			else nb_pts = 2;
628 :			}
629 :	edgomez	1382
630 :			/* now, nb_pts stores the actual number of points required for interpolation */
631 :
632 :			if (nb_pts<=1)
633 :			{
634 :			if (nb_pts==1) {
635 :			/* store as 4b fixed point */
636 :			gmc->Uo = pts->duv[0].x << accuracy;
637 :			gmc->Vo = pts->duv[0].y << accuracy;
638 :			gmc->Uco = ((pts->duv[0].x>>1) \| (pts->duv[0].x&1)) << accuracy; /* DIV2RND() */
639 :			gmc->Vco = ((pts->duv[0].y>>1) \| (pts->duv[0].y&1)) << accuracy; /* DIV2RND() */
640 :			}
641 :			else { /* zero points?! */
642 :			gmc->Uo = gmc->Vo = 0;
643 :			gmc->Uco = gmc->Vco = 0;
644 :			}
645 :
646 :			gmc->predict_16x16 = Predict_1pt_16x16_C;
647 :			gmc->predict_8x8 = Predict_1pt_8x8_C;
648 :			gmc->get_average_mv = get_average_mv_1pt_C;
649 :			}
650 :			else { /* 2 or 3 points */
651 :			const int rho = 3 - accuracy; /* = {3,2,1,0} for Acc={0,1,2,3} */
652 :			int Alpha = log2bin(width-1);
653 :			int Ws = 1 << Alpha;
654 :
655 :			gmc->dU[0] = 16Ws + RDIV( 8Wspts->duv[1].x, width ); / dU/dx */
656 :			gmc->dV[0] = RDIV( 8Wspts->duv[1].y, width ); /* dV/dx */
657 :
658 :			if (nb_pts==2) {
659 :			gmc->dU[1] = -gmc->dV[0]; /* -Sin */
660 :			gmc->dV[1] = gmc->dU[0] ; /* Cos */
661 :			}
662 :			else
663 :			{
664 :			const int Beta = log2bin(height-1);
665 :			const int Hs = 1<<Beta;
666 :			gmc->dU[1] = RDIV( 8Hspts->duv[2].x, height ); /* dU/dy */
667 :			gmc->dV[1] = 16Hs + RDIV( 8Hspts->duv[2].y, height ); / dV/dy */
668 :			if (Beta>Alpha) {
669 :			gmc->dU[0] <<= (Beta-Alpha);
670 :			gmc->dV[0] <<= (Beta-Alpha);
671 :			Alpha = Beta;
672 :			Ws = Hs;
673 :			}
674 :			else {
675 :			gmc->dU[1] <<= Alpha - Beta;
676 :			gmc->dV[1] <<= Alpha - Beta;
677 :			}
678 :			}
679 :			/* upscale to 16b fixed-point */
680 :			gmc->dU[0] <<= (16-Alpha - rho);
681 :			gmc->dU[1] <<= (16-Alpha - rho);
682 :			gmc->dV[0] <<= (16-Alpha - rho);
683 :			gmc->dV[1] <<= (16-Alpha - rho);
684 :
685 :			gmc->Uo = ( pts->duv[0].x <<(16+ accuracy)) + (1<<15);
686 :			gmc->Vo = ( pts->duv[0].y <<(16+ accuracy)) + (1<<15);
687 :			gmc->Uco = ((pts->duv[0].x-1)<<(17+ accuracy)) + (1<<17);
688 :			gmc->Vco = ((pts->duv[0].y-1)<<(17+ accuracy)) + (1<<17);
689 :			gmc->Uco = (gmc->Uco + gmc->dU[0] + gmc->dU[1])>>2;
690 :			gmc->Vco = (gmc->Vco + gmc->dV[0] + gmc->dV[1])>>2;
691 :
692 :	Skal	1709	gmc->predict_16x16 = Predict_16x16_func;
693 :			gmc->predict_8x8 = Predict_8x8_func;
694 :	edgomez	1382	gmc->get_average_mv = get_average_mv_C;
695 :			}
696 :			}
697 :
698 :			/* *******************************************************************
699 :			* quick and dirty routine to generate the full warped image
700 :			* (pGMC != NULL) or just all average Motion Vectors (pGMC == NULL) */
701 :
702 :			void
703 :			generate_GMCimage( const NEW_GMC_DATA const gmc_data, / [input] precalculated data */
704 :			const IMAGE const pRef, / [input] */
705 :			const int mb_width,
706 :			const int mb_height,
707 :			const int stride,
708 :			const int stride2,
709 :			const int fcode, /* [input] some parameters... */
710 :			const int32_t quarterpel, /* [input] for rounding avgMV */
711 :			const int reduced_resolution, /* [input] ignored */
712 :			const int32_t rounding, /* [input] for rounding image data */
713 :			MACROBLOCK const pMBs, / [output] average motion vectors */
714 :			IMAGE const pGMC) / [output] full warped image */
715 :			{
716 :
717 :			unsigned int mj,mi;
718 :			VECTOR avgMV;
719 :
720 :			for (mj = 0; mj < (unsigned int)mb_height; mj++)
721 :			for (mi = 0; mi < (unsigned int)mb_width; mi++) {
722 :			const int mbnum = mj*mb_width+mi;
723 :			if (pGMC)
724 :			{
725 :			gmc_data->predict_16x16(gmc_data,
726 :			pGMC->y + mj16stride + mi*16, pRef->y,
727 :			stride, stride, mi, mj, rounding);
728 :
729 :			gmc_data->predict_8x8(gmc_data,
730 :			pGMC->u + mj8stride2 + mi*8, pRef->u,
731 :			pGMC->v + mj8stride2 + mi*8, pRef->v,
732 :			stride2, stride2, mi, mj, rounding);
733 :			}
734 :			gmc_data->get_average_mv(gmc_data, &avgMV, mi, mj, quarterpel);
735 :
736 :			pMBs[mbnum].amv.x = gmc_sanitize(avgMV.x, quarterpel, fcode);
737 :			pMBs[mbnum].amv.y = gmc_sanitize(avgMV.y, quarterpel, fcode);
738 :
739 :			pMBs[mbnum].mcsel = 0; /* until mode decision */
740 :			}
741 :	Skal	1756	emms();
742 :	edgomez	1382	}

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4