1 |
// 30.10.2002 corrected qpel chroma rounding |
/***************************************************************************** |
2 |
// 04.10.2002 added qpel support to MBMotionCompensation |
* |
3 |
// 01.05.2002 updated MBMotionCompensationBVOP |
* XVID MPEG-4 VIDEO CODEC |
4 |
// 14.04.2002 bframe compensation |
* - Motion Compensation related code - |
5 |
|
* |
6 |
|
* Copyright(C) 2002 Peter Ross <pross@xvid.org> |
7 |
|
* 2003 Christoph Lampert <gruel@web.de> |
8 |
|
* |
9 |
|
* This program is free software ; you can redistribute it and/or modify |
10 |
|
* it under the terms of the GNU General Public License as published by |
11 |
|
* the Free Software Foundation ; either version 2 of the License, or |
12 |
|
* (at your option) any later version. |
13 |
|
* |
14 |
|
* This program is distributed in the hope that it will be useful, |
15 |
|
* but WITHOUT ANY WARRANTY ; without even the implied warranty of |
16 |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 |
|
* GNU General Public License for more details. |
18 |
|
* |
19 |
|
* You should have received a copy of the GNU General Public License |
20 |
|
* along with this program ; if not, write to the Free Software |
21 |
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
22 |
|
* |
23 |
|
* $Id: motion_comp.c,v 1.18.2.6 2003-06-09 13:54:31 edgomez Exp $ |
24 |
|
* |
25 |
|
****************************************************************************/ |
26 |
|
|
27 |
#include <stdio.h> |
#include <stdio.h> |
28 |
|
|
33 |
#include "../utils/timer.h" |
#include "../utils/timer.h" |
34 |
#include "motion.h" |
#include "motion.h" |
35 |
|
|
|
#ifndef ABS |
|
|
#define ABS(X) (((X)>0)?(X):-(X)) |
|
|
#endif |
|
|
#ifndef SIGN |
|
|
#define SIGN(X) (((X)>0)?1:-1) |
|
|
#endif |
|
|
|
|
36 |
#ifndef RSHIFT |
#ifndef RSHIFT |
37 |
#define RSHIFT(a,b) ((a) > 0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b)) |
#define RSHIFT(a,b) ((a) > 0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b)) |
38 |
#endif |
#endif |
48 |
{ |
{ |
49 |
int length = 1 << (fcode+4); |
int length = 1 << (fcode+4); |
50 |
|
|
51 |
// if (quarterpel) value *= 2; |
#if 0 |
52 |
|
if (quarterpel) value *= 2; |
53 |
|
#endif |
54 |
|
|
55 |
if (value < -length) |
if (value < -length) |
56 |
return -length; |
return -length; |
109 |
(uint8_t *) ref, tmp + 32, |
(uint8_t *) ref, tmp + 32, |
110 |
tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); |
tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); |
111 |
ptr = tmp; |
ptr = tmp; |
112 |
} else ptr = ref + (y + dy/4)*stride + x + dx/4; // fullpixel position |
} else ptr = ref + (y + dy/4)*stride + x + dx/4; /* fullpixel position */ |
113 |
|
|
114 |
} else ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride); |
} else ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride); |
115 |
|
|
122 |
transfer_8to16sub(dct_codes+192, cur + y * stride + x + 8*stride+8, |
transfer_8to16sub(dct_codes+192, cur + y * stride + x + 8*stride+8, |
123 |
ptr + 8*stride + 8, stride); |
ptr + 8*stride + 8, stride); |
124 |
|
|
125 |
} else { //reduced_resolution |
} else { /* reduced_resolution */ |
126 |
|
|
127 |
x *= 2; y *= 2; |
x *= 2; y *= 2; |
128 |
|
|
171 |
(uint8_t *) ref, tmp + 32, |
(uint8_t *) ref, tmp + 32, |
172 |
tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); |
tmp + 64, tmp + 96, x, y, dx, dy, stride, rounding); |
173 |
ptr = tmp; |
ptr = tmp; |
174 |
} else ptr = ref + (y + dy/4)*stride + x + dx/4; // fullpixel position |
} else ptr = ref + (y + dy/4)*stride + x + dx/4; /* fullpixel position */ |
175 |
} else ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride); |
} else ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride); |
176 |
|
|
177 |
transfer_8to16sub(dct_codes, cur + y * stride + x, ptr, stride); |
transfer_8to16sub(dct_codes, cur + y * stride + x, ptr, stride); |
178 |
|
|
179 |
} else { //reduced_resolution |
} else { /* reduced_resolution */ |
180 |
|
|
181 |
x *= 2; y *= 2; |
x *= 2; y *= 2; |
182 |
|
|
189 |
} |
} |
190 |
} |
} |
191 |
|
|
|
|
|
|
static __inline void |
|
|
compensate16x16_interpolate_ro(int16_t * const dct_codes, |
|
|
const uint8_t * const cur, |
|
|
const uint8_t * const ref, |
|
|
const uint8_t * const refh, |
|
|
const uint8_t * const refv, |
|
|
const uint8_t * const refhv, |
|
|
uint8_t * const tmp, |
|
|
const uint32_t x, const uint32_t y, |
|
|
const int32_t dx, const int32_t dy, |
|
|
const int32_t stride, |
|
|
const int quarterpel) |
|
|
{ |
|
|
const uint8_t * ptr; |
|
|
|
|
|
if(quarterpel) { |
|
|
if ((dx&3) | (dy&3)) { |
|
|
interpolate16x16_quarterpel(tmp - y * stride - x, |
|
|
(uint8_t *) ref, tmp + 32, |
|
|
tmp + 64, tmp + 96, x, y, dx, dy, stride, 0); |
|
|
ptr = tmp; |
|
|
} else ptr = ref + (y + dy/4)*stride + x + dx/4; // fullpixel position |
|
|
|
|
|
} else ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride); |
|
|
|
|
|
transfer_8to16subro(dct_codes, cur + y * stride + x, |
|
|
ptr, stride); |
|
|
transfer_8to16subro(dct_codes+64, cur + y * stride + x + 8, |
|
|
ptr + 8, stride); |
|
|
transfer_8to16subro(dct_codes+128, cur + y * stride + x + 8*stride, |
|
|
ptr + 8*stride, stride); |
|
|
transfer_8to16subro(dct_codes+192, cur + y * stride + x + 8*stride+8, |
|
|
ptr + 8*stride + 8, stride); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
192 |
/* XXX: slow, inelegant... */ |
/* XXX: slow, inelegant... */ |
193 |
static void |
static void |
194 |
interpolate18x18_switch(uint8_t * const cur, |
interpolate18x18_switch(uint8_t * const cur, |
273 |
int32_t dx; |
int32_t dx; |
274 |
int32_t dy; |
int32_t dy; |
275 |
|
|
|
|
|
276 |
uint8_t * const tmp = refv->u; |
uint8_t * const tmp = refv->u; |
277 |
|
|
278 |
if ( (!reduced_resolution) && (mb->mode == MODE_NOT_CODED) ) { /* quick copy for early SKIP */ |
if ( (!reduced_resolution) && (mb->mode == MODE_NOT_CODED) ) { /* quick copy for early SKIP */ |
334 |
refv->y, refhv->y, tmp, 16 * i, 16 * j, dx, dy, |
refv->y, refhv->y, tmp, 16 * i, 16 * j, dx, dy, |
335 |
edged_width, quarterpel, reduced_resolution, rounding); |
edged_width, quarterpel, reduced_resolution, rounding); |
336 |
|
|
337 |
dx /= (int)(1 + quarterpel); |
if (quarterpel) { dx /= 2; dy /= 2; } |
|
dy /= (int)(1 + quarterpel); |
|
338 |
|
|
339 |
dx = (dx >> 1) + roundtab_79[dx & 0x3]; |
dx = (dx >> 1) + roundtab_79[dx & 0x3]; |
340 |
dy = (dy >> 1) + roundtab_79[dy & 0x3]; |
dy = (dy >> 1) + roundtab_79[dy & 0x3]; |
341 |
|
|
342 |
} else { // mode == MODE_INTER4V |
} else { /* mode == MODE_INTER4V */ |
343 |
int k, sumx = 0, sumy = 0; |
int k, sumx = 0, sumy = 0; |
344 |
const VECTOR * const mvs = (quarterpel ? mb->qmvs : mb->mvs); |
const VECTOR * const mvs = (quarterpel ? mb->qmvs : mb->mvs); |
345 |
|
|
346 |
for (k = 0; k < 4; k++) { |
for (k = 0; k < 4; k++) { |
347 |
dx = mvs[k].x; |
dx = mvs[k].x; |
348 |
dy = mvs[k].y; |
dy = mvs[k].y; |
349 |
sumx += dx / (1 + quarterpel); |
sumx += quarterpel ? dx/2 : dx; |
350 |
sumy += dy / (1 + quarterpel); |
sumy += quarterpel ? dy/2 : dy; |
351 |
|
|
352 |
if (reduced_resolution){ |
if (reduced_resolution){ |
353 |
dx = RRV_MV_SCALEUP(dx); |
dx = RRV_MV_SCALEUP(dx); |
386 |
const uint32_t edged_width = pParam->edged_width; |
const uint32_t edged_width = pParam->edged_width; |
387 |
int32_t dx, dy, b_dx, b_dy, sumx, sumy, b_sumx, b_sumy; |
int32_t dx, dy, b_dx, b_dy, sumx, sumy, b_sumx, b_sumy; |
388 |
int k; |
int k; |
389 |
const int quarterpel = pParam->m_quarterpel; |
const int quarterpel = pParam->vol_flags & XVID_VOL_QUARTERPEL; |
390 |
const uint8_t * ptr1, * ptr2; |
const uint8_t * ptr1, * ptr2; |
391 |
uint8_t * const tmp = f_refv->u; |
uint8_t * const tmp = f_refv->u; |
392 |
const VECTOR * const fmvs = (quarterpel ? mb->qmvs : mb->mvs); |
const VECTOR * const fmvs = (quarterpel ? mb->qmvs : mb->mvs); |
412 |
case MODE_BACKWARD: |
case MODE_BACKWARD: |
413 |
b_dx = bmvs->x; b_dy = bmvs->y; |
b_dx = bmvs->x; b_dy = bmvs->y; |
414 |
|
|
415 |
compensate16x16_interpolate_ro(&dct_codes[0 * 64], cur->y, b_ref->y, b_refh->y, |
compensate16x16_interpolate(&dct_codes[0 * 64], cur->y, b_ref->y, b_refh->y, |
416 |
b_refv->y, b_refhv->y, tmp, 16 * i, 16 * j, b_dx, |
b_refv->y, b_refhv->y, tmp, 16 * i, 16 * j, b_dx, |
417 |
b_dy, edged_width, quarterpel); |
b_dy, edged_width, quarterpel, 0, 0); |
418 |
|
|
419 |
if (quarterpel) { b_dx /= 2; b_dy /= 2; } |
if (quarterpel) { b_dx /= 2; b_dy /= 2; } |
420 |
|
|
437 |
(uint8_t *) f_ref->y, tmp + 32, |
(uint8_t *) f_ref->y, tmp + 32, |
438 |
tmp + 64, tmp + 96, 16*i, 16*j, dx, dy, edged_width, 0); |
tmp + 64, tmp + 96, 16*i, 16*j, dx, dy, edged_width, 0); |
439 |
ptr1 = tmp; |
ptr1 = tmp; |
440 |
} else ptr1 = f_ref->y + (16*j + dy/4)*edged_width + 16*i + dx/4; // fullpixel position |
} else ptr1 = f_ref->y + (16*j + dy/4)*edged_width + 16*i + dx/4; /* fullpixel position */ |
441 |
|
|
442 |
if ((b_dx&3) | (b_dy&3)) { |
if ((b_dx&3) | (b_dy&3)) { |
443 |
interpolate16x16_quarterpel(tmp - i * 16 - j * 16 * edged_width + 16, |
interpolate16x16_quarterpel(tmp - i * 16 - j * 16 * edged_width + 16, |
444 |
(uint8_t *) b_ref->y, tmp + 32, |
(uint8_t *) b_ref->y, tmp + 32, |
445 |
tmp + 64, tmp + 96, 16*i, 16*j, b_dx, b_dy, edged_width, 0); |
tmp + 64, tmp + 96, 16*i, 16*j, b_dx, b_dy, edged_width, 0); |
446 |
ptr2 = tmp + 16; |
ptr2 = tmp + 16; |
447 |
} else ptr2 = b_ref->y + (16*j + b_dy/4)*edged_width + 16*i + b_dx/4; // fullpixel position |
} else ptr2 = b_ref->y + (16*j + b_dy/4)*edged_width + 16*i + b_dx/4; /* fullpixel position */ |
448 |
|
|
449 |
b_dx /= 2; |
b_dx /= 2; |
450 |
b_dy /= 2; |
b_dy /= 2; |
473 |
|
|
474 |
break; |
break; |
475 |
|
|
476 |
default: // MODE_DIRECT |
default: /* MODE_DIRECT (or MODE_DIRECT_NONE_MV in case of bframes decoding) */ |
477 |
sumx = sumy = b_sumx = b_sumy = 0; |
sumx = sumy = b_sumx = b_sumy = 0; |
478 |
|
|
479 |
for (k = 0; k < 4; k++) { |
for (k = 0; k < 4; k++) { |
523 |
break; |
break; |
524 |
} |
} |
525 |
|
|
526 |
// uv block-based chroma interpolation for direct and interpolate modes |
/* v block-based chroma interpolation for direct and interpolate modes */ |
527 |
transfer_8to16sub2(&dct_codes[4 * 64], |
transfer_8to16sub2(&dct_codes[4 * 64], |
528 |
cur->u + (j * 8) * edged_width / 2 + (i * 8), |
cur->u + (j * 8) * edged_width / 2 + (i * 8), |
529 |
interpolate8x8_switch2(tmp, b_ref->u, 8 * i, 8 * j, |
interpolate8x8_switch2(tmp, b_ref->u, 8 * i, 8 * j, |
558 |
gmc->W = width; |
gmc->W = width; |
559 |
gmc->H = height; |
gmc->H = height; |
560 |
|
|
561 |
gmc->rho = 4 - log2bin(res-1); // = {3,2,1,0} for res={2,4,8,16} |
gmc->rho = 4 - log2bin(res-1); /* = {3,2,1,0} for res={2,4,8,16} */ |
562 |
|
|
563 |
gmc->alpha = log2bin(gmc->W-1); |
gmc->alpha = log2bin(gmc->W-1); |
564 |
gmc->Ws = (1 << gmc->alpha); |
gmc->Ws = (1 << gmc->alpha); |
571 |
if (num_wp==2) { |
if (num_wp==2) { |
572 |
gmc->dyF = -gmc->dxG; |
gmc->dyF = -gmc->dxG; |
573 |
gmc->dyG = gmc->dxF; |
gmc->dyG = gmc->dxF; |
574 |
} |
} else if (num_wp==3) { |
|
else if (num_wp==3) { |
|
575 |
gmc->beta = log2bin(gmc->H-1); |
gmc->beta = log2bin(gmc->H-1); |
576 |
gmc->Hs = (1 << gmc->beta); |
gmc->Hs = (1 << gmc->beta); |
577 |
gmc->dyF = RDIV( 8*gmc->Hs*du2, gmc->H ); |
gmc->dyF = RDIV( 8*gmc->Hs*du2, gmc->H ); |
581 |
gmc->dxG <<= (gmc->beta - gmc->alpha); |
gmc->dxG <<= (gmc->beta - gmc->alpha); |
582 |
gmc->alpha = gmc->beta; |
gmc->alpha = gmc->beta; |
583 |
gmc->Ws = 1<< gmc->beta; |
gmc->Ws = 1<< gmc->beta; |
584 |
} |
} else { |
|
else { |
|
585 |
gmc->dyF <<= gmc->alpha - gmc->beta; |
gmc->dyF <<= gmc->alpha - gmc->beta; |
586 |
gmc->dyG <<= gmc->alpha - gmc->beta; |
gmc->dyG <<= gmc->alpha - gmc->beta; |
587 |
} |
} |
595 |
} |
} |
596 |
|
|
597 |
void |
void |
598 |
generate_GMCimage( const GMC_DATA *const gmc_data, // [input] precalculated data |
generate_GMCimage( const GMC_DATA *const gmc_data, /* [input] precalculated data */ |
599 |
const IMAGE *const pRef, // [input] |
const IMAGE *const pRef, /* [input] */ |
600 |
const int mb_width, |
const int mb_width, |
601 |
const int mb_height, |
const int mb_height, |
602 |
const int stride, |
const int stride, |
603 |
const int stride2, |
const int stride2, |
604 |
const int fcode, // [input] some parameters... |
const int fcode, /* [input] some parameters... */ |
605 |
const int32_t quarterpel, // [input] for rounding avgMV |
const int32_t quarterpel, /* [input] for rounding avgMV */ |
606 |
const int reduced_resolution, // [input] ignored |
const int reduced_resolution, /* [input] ignored */ |
607 |
const int32_t rounding, // [input] for rounding image data |
const int32_t rounding, /* [input] for rounding image data */ |
608 |
MACROBLOCK *const pMBs, // [output] average motion vectors |
MACROBLOCK *const pMBs, /* [output] average motion vectors */ |
609 |
IMAGE *const pGMC) // [output] full warped image |
IMAGE *const pGMC) /* [output] full warped image */ |
610 |
{ |
{ |
611 |
|
|
612 |
unsigned int mj,mi; |
unsigned int mj,mi; |
613 |
VECTOR avgMV; |
VECTOR avgMV; |
614 |
|
|
615 |
for (mj=0;mj<mb_height;mj++) |
for (mj = 0; mj < (unsigned int)mb_height; mj++) |
616 |
for (mi=0;mi<mb_width; mi++) |
for (mi = 0; mi < (unsigned int)mb_width; mi++) { |
617 |
{ |
|
618 |
avgMV = generate_GMCimageMB(gmc_data, pRef, mi, mj, |
avgMV = generate_GMCimageMB(gmc_data, pRef, mi, mj, |
619 |
stride, stride2, quarterpel, rounding, pGMC); |
stride, stride2, quarterpel, rounding, pGMC); |
620 |
|
|
666 |
|
|
667 |
Fj = gmc_data->Fo + dyF*mj*16 + dxF*mi*16; |
Fj = gmc_data->Fo + dyF*mj*16 + dxF*mi*16; |
668 |
Gj = gmc_data->Go + dyG*mj*16 + dxG*mi*16; |
Gj = gmc_data->Go + dyG*mj*16 + dxG*mi*16; |
669 |
for (J=16; J>0; --J) |
|
670 |
{ |
for (J = 16; J > 0; --J) { |
671 |
int32_t Fi, Gi; |
int32_t Fi, Gi; |
672 |
|
|
673 |
Fi = Fj; Fj += dyF; |
Fi = Fj; Fj += dyF; |
674 |
Gi = Gj; Gj += dyG; |
Gi = Gj; Gj += dyG; |
675 |
for (I=-16; I<0; ++I) |
for (I = -16; I < 0; ++I) { |
|
{ |
|
676 |
int32_t F, G; |
int32_t F, G; |
677 |
uint32_t ri, rj; |
uint32_t ri, rj; |
678 |
|
|
693 |
if (G< -1) G=-1; |
if (G< -1) G=-1; |
694 |
else if (G>H) G=H; |
else if (G>H) G=H; |
695 |
|
|
696 |
{ // MMX-like bilinear... |
{ /* MMX-like bilinear... */ |
697 |
const int offset = G*stride + F; |
const int offset = G*stride + F; |
698 |
uint32_t f0, f1; |
uint32_t f0, f1; |
699 |
f0 = pRef->y[ offset +0 ]; |
f0 = pRef->y[ offset +0 ]; |
708 |
dstY[I] = (uint8_t)f0; |
dstY[I] = (uint8_t)f0; |
709 |
} |
} |
710 |
} |
} |
711 |
|
|
712 |
dstY += stride; |
dstY += stride; |
713 |
} |
} |
714 |
|
|
717 |
|
|
718 |
Fj = gmc_data->cFo + dyF*4 *mj*8 + dxF*4 *mi*8; |
Fj = gmc_data->cFo + dyF*4 *mj*8 + dxF*4 *mi*8; |
719 |
Gj = gmc_data->cGo + dyG*4 *mj*8 + dxG*4 *mi*8; |
Gj = gmc_data->cGo + dyG*4 *mj*8 + dxG*4 *mi*8; |
720 |
for (J=8; J>0; --J) |
|
721 |
{ |
for (J = 8; J > 0; --J) { |
722 |
int32_t Fi, Gi; |
int32_t Fi, Gi; |
723 |
Fi = Fj; Fj += 4*dyF; |
Fi = Fj; Fj += 4*dyF; |
724 |
Gi = Gj; Gj += 4*dyG; |
Gi = Gj; Gj += 4*dyG; |
725 |
|
|
726 |
for (I=-8; I<0; ++I) |
for (I = -8; I < 0; ++I) { |
|
{ |
|
727 |
int32_t F, G; |
int32_t F, G; |
728 |
uint32_t ri, rj; |
uint32_t ri, rj; |
729 |
|
|
774 |
} |
} |
775 |
|
|
776 |
|
|
777 |
avgMV.x -= 16*((256*mi+120)<<4); // 120 = 15*16/2 |
avgMV.x -= 16*((256*mi+120)<<4); /* 120 = 15*16/2 */ |
778 |
avgMV.y -= 16*((256*mj+120)<<4); |
avgMV.y -= 16*((256*mj+120)<<4); |
779 |
|
|
780 |
avgMV.x = RSHIFT( avgMV.x, (4+7-quarterpel) ); |
avgMV.x = RSHIFT( avgMV.x, (4+7-quarterpel) ); |
787 |
|
|
788 |
#ifdef OLD_GRUEL_GMC |
#ifdef OLD_GRUEL_GMC |
789 |
void |
void |
790 |
generate_GMCparameters( const int num_wp, // [input]: number of warppoints |
generate_GMCparameters( const int num_wp, /* [input]: number of warppoints */ |
791 |
const int res, // [input]: resolution |
const int res, /* [input]: resolution */ |
792 |
const WARPPOINTS *const warp, // [input]: warp points |
const WARPPOINTS *const warp, /* [input]: warp points */ |
793 |
const int width, const int height, |
const int width, const int height, |
794 |
GMC_DATA *const gmc) // [output] precalculated parameters |
GMC_DATA *const gmc) /* [output] precalculated parameters */ |
795 |
{ |
{ |
796 |
|
|
797 |
/* We follow mainly two sources: The original standard, which is ugly, and the |
/* We follow mainly two sources: The original standard, which is ugly, and the |
851 |
int dv0 = warp->duv[0].y; |
int dv0 = warp->duv[0].y; |
852 |
int du1 = warp->duv[1].x; |
int du1 = warp->duv[1].x; |
853 |
int dv1 = warp->duv[1].y; |
int dv1 = warp->duv[1].y; |
854 |
// int du2 = warp->duv[2].x; |
#if 0 |
855 |
// int dv2 = warp->duv[2].y; |
int du2 = warp->duv[2].x; |
856 |
|
int dv2 = warp->duv[2].y; |
857 |
|
#endif |
858 |
|
|
859 |
gmc->num_wp = num_wp; |
gmc->num_wp = num_wp; |
860 |
|
|
869 |
gmc->alpha = log2bin(gmc->W-1); |
gmc->alpha = log2bin(gmc->W-1); |
870 |
gmc->Ws= 1<<gmc->alpha; |
gmc->Ws= 1<<gmc->alpha; |
871 |
|
|
872 |
// gmc->beta = log2bin(gmc->H-1); |
#if 0 |
873 |
// gmc->Hs= 1<<gmc->beta; |
gmc->beta = log2bin(gmc->H-1); |
874 |
|
gmc->Hs= 1<<gmc->beta; |
875 |
|
#endif |
876 |
|
|
877 |
// printf("du0=%d dv0=%d du1=%d dv1=%d s=%d sigma=%d W=%d alpha=%d, Ws=%d, rho=%d\n",du0,dv0,du1,dv1,gmc->s,gmc->sigma,gmc->W,gmc->alpha,gmc->Ws,gmc->rho); |
#if 0 |
878 |
|
printf("du0=%d dv0=%d du1=%d dv1=%d s=%d sigma=%d W=%d alpha=%d, Ws=%d, rho=%d\n",du0,dv0,du1,dv1,gmc->s,gmc->sigma,gmc->W,gmc->alpha,gmc->Ws,gmc->rho); |
879 |
|
#endif |
880 |
|
|
881 |
/* i2s is only needed for num_wp >= 3, etc. */ |
/* |
882 |
/* the 's' values are in 1/s pel resolution */ |
* i2s is only needed for num_wp >= 3, etc. |
883 |
|
* the 's' values are in 1/s pel resolution |
884 |
|
*/ |
885 |
gmc->i0s = res/2 * ( du0 ); |
gmc->i0s = res/2 * ( du0 ); |
886 |
gmc->j0s = res/2 * ( dv0 ); |
gmc->j0s = res/2 * ( dv0 ); |
887 |
gmc->i1s = res/2 * (2*width + du1 + du0 ); |
gmc->i1s = res/2 * (2*width + du1 + du0 ); |
888 |
gmc->j1s = res/2 * ( dv1 + dv0 ); |
gmc->j1s = res/2 * ( dv1 + dv0 ); |
889 |
// gmc->i2s = res/2 * ( du2 + du0 ); |
#if 0 |
890 |
// gmc->j2s = res/2 * (2*height + dv2 + dv0 ); |
gmc->i2s = res/2 * ( du2 + du0 ); |
891 |
|
gmc->j2s = res/2 * (2*height + dv2 + dv0 ); |
892 |
|
#endif |
893 |
|
|
894 |
/* i2s and i2ss are only needed for num_wp == 3, etc. */ |
/* i2s and i2ss are only needed for num_wp == 3, etc. */ |
895 |
|
|
897 |
gmc->i1ss = 16*gmc->Ws + ROUNDED_DIV(((gmc->W-gmc->Ws)*(gmc->r*gmc->i0s) + gmc->Ws*(gmc->r*gmc->i1s - 16*gmc->W)),gmc->W); |
gmc->i1ss = 16*gmc->Ws + ROUNDED_DIV(((gmc->W-gmc->Ws)*(gmc->r*gmc->i0s) + gmc->Ws*(gmc->r*gmc->i1s - 16*gmc->W)),gmc->W); |
898 |
gmc->j1ss = ROUNDED_DIV( ((gmc->W - gmc->Ws)*(gmc->r*gmc->j0s) + gmc->Ws*gmc->r*gmc->j1s) ,gmc->W ); |
gmc->j1ss = ROUNDED_DIV( ((gmc->W - gmc->Ws)*(gmc->r*gmc->j0s) + gmc->Ws*gmc->r*gmc->j1s) ,gmc->W ); |
899 |
|
|
900 |
// gmc->i2ss = ROUNDED_DIV( ((gmc->H - gmc->Hs)*(gmc->r*gmc->i0s) + gmc->Hs*(gmc->r*gmc->i2s)), gmc->H); |
#if 0 |
901 |
// gmc->j2ss = 16*gmc->Hs + ROUNDED_DIV( ((gmc->H-gmc->Hs)*(gmc->r*gmc->j0s) + gmc->Ws*(gmc->r*gmc->j2s - 16*gmc->H)), gmc->H); |
gmc->i2ss = ROUNDED_DIV( ((gmc->H - gmc->Hs)*(gmc->r*gmc->i0s) + gmc->Hs*(gmc->r*gmc->i2s)), gmc->H); |
902 |
|
gmc->j2ss = 16*gmc->Hs + ROUNDED_DIV( ((gmc->H-gmc->Hs)*(gmc->r*gmc->j0s) + gmc->Ws*(gmc->r*gmc->j2s - 16*gmc->H)), gmc->H); |
903 |
|
#endif |
904 |
|
|
905 |
return; |
return; |
906 |
} |
} |
907 |
|
|
|
|
|
|
|
|
908 |
void |
void |
909 |
generate_GMCimage( const GMC_DATA *const gmc_data, // [input] precalculated data |
generate_GMCimage( const GMC_DATA *const gmc_data, /* [input] precalculated data */ |
910 |
const IMAGE *const pRef, // [input] |
const IMAGE *const pRef, /* [input] */ |
911 |
const int mb_width, |
const int mb_width, |
912 |
const int mb_height, |
const int mb_height, |
913 |
const int stride, |
const int stride, |
914 |
const int stride2, |
const int stride2, |
915 |
const int fcode, // [input] some parameters... |
const int fcode, /* [input] some parameters... */ |
916 |
const int32_t quarterpel, // [input] for rounding avgMV |
const int32_t quarterpel, /* [input] for rounding avgMV */ |
917 |
const int reduced_resolution, // [input] ignored |
const int reduced_resolution, /* [input] ignored */ |
918 |
const int32_t rounding, // [input] for rounding image data |
const int32_t rounding, /* [input] for rounding image data */ |
919 |
MACROBLOCK *const pMBs, // [output] average motion vectors |
MACROBLOCK *const pMBs, /* [output] average motion vectors */ |
920 |
IMAGE *const pGMC) // [output] full warped image |
IMAGE *const pGMC) /* [output] full warped image */ |
921 |
{ |
{ |
922 |
|
|
923 |
unsigned int mj,mi; |
unsigned int mj,mi; |
924 |
VECTOR avgMV; |
VECTOR avgMV; |
925 |
|
|
926 |
for (mj=0;mj<mb_height;mj++) |
for (mj=0;mj<mb_height;mj++) |
927 |
for (mi=0;mi<mb_width; mi++) |
for (mi = 0;mi < mb_width; mi++) { |
928 |
{ |
|
929 |
avgMV = generate_GMCimageMB(gmc_data, pRef, mi, mj, |
avgMV = generate_GMCimageMB(gmc_data, pRef, mi, mj, |
930 |
stride, stride2, quarterpel, rounding, pGMC); |
stride, stride2, quarterpel, rounding, pGMC); |
931 |
|
|
979 |
|
|
980 |
const int i1ss = gmc_data->i1ss; |
const int i1ss = gmc_data->i1ss; |
981 |
const int j1ss = gmc_data->j1ss; |
const int j1ss = gmc_data->j1ss; |
982 |
// const int i2ss = gmc_data->i2ss; |
#if 0 |
983 |
// const int j2ss = gmc_data->j2ss; |
const int i2ss = gmc_data->i2ss; |
984 |
|
const int j2ss = gmc_data->j2ss; |
985 |
|
#endif |
986 |
|
|
987 |
const int alpha = gmc_data->alpha; |
const int alpha = gmc_data->alpha; |
988 |
const int Ws = gmc_data->Ws; |
const int Ws = gmc_data->Ws; |
989 |
|
|
990 |
// const int beta = gmc_data->beta; |
#if 0 |
991 |
// const int Hs = gmc_data->Hs; |
const int beta = gmc_data->beta; |
992 |
|
const int Hs = gmc_data->Hs; |
993 |
|
#endif |
994 |
|
|
995 |
int I,J; |
int I,J; |
996 |
VECTOR avgMV = {0,0}; |
VECTOR avgMV = {0,0}; |
1004 |
/* this naive implementation (with lots of multiplications) isn't slower (rather faster) than |
/* this naive implementation (with lots of multiplications) isn't slower (rather faster) than |
1005 |
working incremental. Don't ask me why... maybe the whole this is memory bound? */ |
working incremental. Don't ask me why... maybe the whole this is memory bound? */ |
1006 |
|
|
1007 |
const int ri= F & (s-1); // fractional part of pelwise MV X |
const int ri= F & (s-1); /* fractional part of pelwise MV X */ |
1008 |
const int rj= G & (s-1); // fractional part of pelwise MV Y |
const int rj= G & (s-1); /* fractional part of pelwise MV Y */ |
1009 |
|
|
1010 |
int Y00,Y01,Y10,Y11; |
int Y00,Y01,Y10,Y11; |
1011 |
|
|
1028 |
else if (G>H) |
else if (G>H) |
1029 |
G=H; /* dito */ |
G=H; /* dito */ |
1030 |
|
|
1031 |
Y00 = pRef->y[ G*stride + F ]; // Lumi values |
Y00 = pRef->y[ G*stride + F ]; /* Lumi values */ |
1032 |
Y01 = pRef->y[ G*stride + F+1 ]; |
Y01 = pRef->y[ G*stride + F+1 ]; |
1033 |
Y10 = pRef->y[ G*stride + F+stride ]; |
Y10 = pRef->y[ G*stride + F+stride ]; |
1034 |
Y11 = pRef->y[ G*stride + F+stride+1 ]; |
Y11 = pRef->y[ G*stride + F+stride+1 ]; |
1055 |
int Gc=((-r*j0s+j1ss)*(4*I+1) +(-r*i0s+i1ss)*(4*J+1) +2*Ws*r*j0s |
int Gc=((-r*j0s+j1ss)*(4*I+1) +(-r*i0s+i1ss)*(4*J+1) +2*Ws*r*j0s |
1056 |
-16*Ws +(1<<(alpha+rho+1))) >>(alpha+rho+2); |
-16*Ws +(1<<(alpha+rho+1))) >>(alpha+rho+2); |
1057 |
|
|
1058 |
const int ri= Fc & (s-1); // fractional part of pelwise MV X |
const int ri= Fc & (s-1); /* fractional part of pelwise MV X */ |
1059 |
const int rj= Gc & (s-1); // fractional part of pelwise MV Y |
const int rj= Gc & (s-1); /* fractional part of pelwise MV Y */ |
1060 |
|
|
1061 |
int C00,C01,C10,C11; |
int C00,C01,C10,C11; |
1062 |
|
|
1073 |
Gc=H/2; /* dito */ |
Gc=H/2; /* dito */ |
1074 |
|
|
1075 |
/* now calculate U data */ |
/* now calculate U data */ |
1076 |
C00 = pRef->u[ Gc*stride2 + Fc ]; // chroma-value Cb |
C00 = pRef->u[ Gc*stride2 + Fc ]; /* chroma-value Cb */ |
1077 |
C01 = pRef->u[ Gc*stride2 + Fc+1 ]; |
C01 = pRef->u[ Gc*stride2 + Fc+1 ]; |
1078 |
C10 = pRef->u[ (Gc+1)*stride2 + Fc ]; |
C10 = pRef->u[ (Gc+1)*stride2 + Fc ]; |
1079 |
C11 = pRef->u[ (Gc+1)*stride2 + Fc+1 ]; |
C11 = pRef->u[ (Gc+1)*stride2 + Fc+1 ]; |
1086 |
pGMC->u[J*stride2+I] = (uint8_t)C00; /* output 1 U-pixel */ |
pGMC->u[J*stride2+I] = (uint8_t)C00; /* output 1 U-pixel */ |
1087 |
|
|
1088 |
/* now calculate V data */ |
/* now calculate V data */ |
1089 |
C00 = pRef->v[ Gc*stride2 + Fc ]; // chroma-value Cr |
C00 = pRef->v[ Gc*stride2 + Fc ]; /* chroma-value Cr */ |
1090 |
C01 = pRef->v[ Gc*stride2 + Fc+1 ]; |
C01 = pRef->v[ Gc*stride2 + Fc+1 ]; |
1091 |
C10 = pRef->v[ (Gc+1)*stride2 + Fc ]; |
C10 = pRef->v[ (Gc+1)*stride2 + Fc ]; |
1092 |
C11 = pRef->v[ (Gc+1)*stride2 + Fc+1 ]; |
C11 = pRef->v[ (Gc+1)*stride2 + Fc+1 ]; |