--- trunk/xvidcore/src/motion/motion_est.c 2002/03/08 02:46:11 3 +++ trunk/xvidcore/src/motion/motion_est.c 2002/04/25 06:55:00 136 @@ -2,26 +2,31 @@ * * Modifications: * + * 25.04.2002 partial prevMB conversion + * 22.04.2002 remove some compile warning by chenm001 + * 14.04.2002 added MotionEstimationBVOP() + * 02.04.2002 add EPZS(^2) as ME algorithm, use PMV_USESQUARES to choose between + * EPZS and EPZS^2 * 08.02.2002 split up PMVfast into three routines: PMVFast, PMVFast_MainLoop * PMVFast_Refine to support multiple searches with different start points - * 07.01.2002 uv-block-based interpolation + * 07.01.2002 uv-block-based interpolation * 06.01.2002 INTER/INTRA-decision is now done before any SEARCH8 (speedup) - * changed INTER_BIAS to 150 (as suggested by suxen_drol) - * removed halfpel refinement step in PMVfastSearch8 + quality=5 - * added new quality mode = 6 which performs halfpel refinement - * filesize difference between quality 5 and 6 is smaller than 1% + * changed INTER_BIAS to 150 (as suggested by suxen_drol) + * removed halfpel refinement step in PMVfastSearch8 + quality=5 + * added new quality mode = 6 which performs halfpel refinement + * filesize difference between quality 5 and 6 is smaller than 1% * (Isibaar) * 31.12.2001 PMVfastSearch16 and PMVfastSearch8 (gruel) - * 30.12.2001 get_range/MotionSearchX simplified; blue/green bug fix - * 22.12.2001 commented best_point==99 check - * 19.12.2001 modified get_range (purple bug fix) + * 30.12.2001 get_range/MotionSearchX simplified; blue/green bug fix + * 22.12.2001 commented best_point==99 check + * 19.12.2001 modified get_range (purple bug fix) * 15.12.2001 moved pmv displacement from mbprediction * 02.12.2001 motion estimation/compensation split (Isibaar) - * 16.11.2001 rewrote/tweaked search algorithms; pross@cs.rmit.edu.au + * 16.11.2001 rewrote/tweaked search algorithms; pross@cs.rmit.edu.au * 10.11.2001 support for sad16/sad8 functions * 28.08.2001 reactivated MODE_INTER4V for EXT_MODE * 24.08.2001 removed MODE_INTER4V_Q, disabled MODE_INTER4V for EXT_MODE - * 22.08.2001 added MODE_INTER4V_Q + * 22.08.2001 added MODE_INTER4V_Q * 20.08.2001 added pragma to get rid of internal compiler error with VC6 * idea by Cyril. Thanks. * @@ -31,12 +36,14 @@ #include #include +#include #include "../encoder.h" #include "../utils/mbfunctions.h" #include "../prediction/mbprediction.h" #include "../global.h" #include "../utils/timer.h" +#include "motion.h" #include "sad.h" // very large value @@ -65,64 +72,116 @@ #define EVEN(A) (((A)<0?(A)+1:(A)) & ~1) -#define MIN(X, Y) ((X)<(Y)?(X):(Y)) -#define MAX(X, Y) ((X)>(Y)?(X):(Y)) -#define ABS(X) (((X)>0)?(X):-(X)) -#define SIGN(X) (((X)>0)?1:-1) - - -int32_t PMVfastSearch8( +int32_t PMVfastSearch16( const uint8_t * const pRef, const uint8_t * const pRefH, const uint8_t * const pRefV, const uint8_t * const pRefHV, const IMAGE * const pCur, const int x, const int y, - const int start_x, int start_y, - const uint32_t iQuality, - MBParam * const pParam, - MACROBLOCK * const pMBs, + const uint32_t MotionFlags, + const uint32_t iQuant, + const uint32_t iFcode, + const MBParam * const pParam, + const MACROBLOCK * const pMBs, + const MACROBLOCK * const prevMBs, VECTOR * const currMV, VECTOR * const currPMV); -int32_t PMVfastSearch16( +int32_t EPZSSearch16( const uint8_t * const pRef, const uint8_t * const pRefH, const uint8_t * const pRefV, const uint8_t * const pRefHV, const IMAGE * const pCur, const int x, const int y, - const uint32_t iQuality, - MBParam * const pParam, - MACROBLOCK * const pMBs, + const uint32_t MotionFlags, + const uint32_t iQuant, + const uint32_t iFcode, + const MBParam * const pParam, + const MACROBLOCK * const pMBs, + const MACROBLOCK * const prevMBs, VECTOR * const currMV, VECTOR * const currPMV); +int32_t PMVfastSearch8( + const uint8_t * const pRef, + const uint8_t * const pRefH, + const uint8_t * const pRefV, + const uint8_t * const pRefHV, + const IMAGE * const pCur, + const int x, const int y, + const int start_x, const int start_y, + const uint32_t MotionFlags, + const uint32_t iQuant, + const uint32_t iFcode, + const MBParam * const pParam, + const MACROBLOCK * const pMBs, + const MACROBLOCK * const prevMBs, + VECTOR * const currMV, + VECTOR * const currPMV); -/* diamond search stuff - keep the the sequence in circular order (so optimization works) -*/ - -typedef struct -{ - int32_t dx; - int32_t dy; -} -DPOINT; - - -static const DPOINT diamond_small[4] = -{ - {0, 1}, {1, 0}, {0, -1}, {-1, 0} -}; +int32_t EPZSSearch8( + const uint8_t * const pRef, + const uint8_t * const pRefH, + const uint8_t * const pRefV, + const uint8_t * const pRefHV, + const IMAGE * const pCur, + const int x, const int y, + const int start_x, const int start_y, + const uint32_t MotionFlags, + const uint32_t iQuant, + const uint32_t iFcode, + const MBParam * const pParam, + const MACROBLOCK * const pMBs, + const MACROBLOCK * const prevMBs, + VECTOR * const currMV, + VECTOR * const currPMV); -static const DPOINT diamond_large[8] = -{ - {0, 2}, {1, 1}, {2, 0}, {1, -1}, {0, -2}, {-1, -1}, {-2, 0}, {-1, 1} -}; +typedef int32_t (MainSearch16Func)( + const uint8_t * const pRef, + const uint8_t * const pRefH, + const uint8_t * const pRefV, + const uint8_t * const pRefHV, + const uint8_t * const cur, + const int x, const int y, + int32_t startx, int32_t starty, + int32_t iMinSAD, + VECTOR * const currMV, + const VECTOR * const pmv, + const int32_t min_dx, const int32_t max_dx, + const int32_t min_dy, const int32_t max_dy, + const int32_t iEdgedWidth, + const int32_t iDiamondSize, + const int32_t iFcode, + const int32_t iQuant, + int iFound); + +typedef MainSearch16Func* MainSearch16FuncPtr; + + +typedef int32_t (MainSearch8Func)( + const uint8_t * const pRef, + const uint8_t * const pRefH, + const uint8_t * const pRefV, + const uint8_t * const pRefHV, + const uint8_t * const cur, + const int x, const int y, + int32_t startx, int32_t starty, + int32_t iMinSAD, + VECTOR * const currMV, + const VECTOR * const pmv, + const int32_t min_dx, const int32_t max_dx, + const int32_t min_dy, const int32_t max_dy, + const int32_t iEdgedWidth, + const int32_t iDiamondSize, + const int32_t iFcode, + const int32_t iQuant, + int iFound); +typedef MainSearch8Func* MainSearch8FuncPtr; // mv.length table static const uint32_t mvtab[33] = { @@ -173,176 +232,163 @@ -/* calculate the min/max range (in halfpixels) - relative to the _MACROBLOCK_ position -*/ - -static void __inline get_range( - int32_t * const min_dx, int32_t * const max_dx, - int32_t * const min_dy, int32_t * const max_dy, - const uint32_t x, const uint32_t y, - const uint32_t block_sz, // block dimension, 8 or 16 - const uint32_t width, const uint32_t height, - const uint32_t fcode) -{ - const int search_range = 32 << (fcode - 1); - const int high = search_range - 1; - const int low = -search_range; - - // convert full-pixel measurements to half pixel - const int hp_width = 2 * width; - const int hp_height = 2 * height; - const int hp_edge = 2 * block_sz; - const int hp_x = 2 * (x) * block_sz; // we need _right end_ of block, not x-coordinate - const int hp_y = 2 * (y) * block_sz; // same for _bottom end_ - - *max_dx = MIN(high, hp_width - hp_x); - *max_dy = MIN(high, hp_height - hp_y); - *min_dx = MAX(low, -(hp_edge + hp_x)); - *min_dy = MAX(low, -(hp_edge + hp_y)); -} - - -/* getref: calculate reference image pointer -the decision to use interpolation h/v/hv or the normal image is -based on dx & dy. -*/ - -static __inline const uint8_t * get_ref( - const uint8_t * const refn, - const uint8_t * const refh, - const uint8_t * const refv, - const uint8_t * const refhv, - const uint32_t x, const uint32_t y, - const uint32_t block, // block dimension, 8 or 16 - const int32_t dx, const int32_t dy, - const uint32_t stride) -{ - switch ( ((dx&1)<<1) + (dy&1) ) // ((dx%2)?2:0)+((dy%2)?1:0) - { - case 0 : return refn + (x*block+dx/2) + (y*block+dy/2)*stride; - case 1 : return refv + (x*block+dx/2) + (y*block+(dy-1)/2)*stride; - case 2 : return refh + (x*block+(dx-1)/2) + (y*block+dy/2)*stride; - default : - case 3 : return refhv + (x*block+(dx-1)/2) + (y*block+(dy-1)/2)*stride; - } -} - - -/* This is somehow a copy of get_ref, but with MV instead of X,Y */ - -static __inline const uint8_t * get_ref_mv( - const uint8_t * const refn, - const uint8_t * const refh, - const uint8_t * const refv, - const uint8_t * const refhv, - const uint32_t x, const uint32_t y, - const uint32_t block, // block dimension, 8 or 16 - const VECTOR* mv, // measured in half-pel! - const uint32_t stride) -{ - switch ( (((mv->x)&1)<<1) + ((mv->y)&1) ) - { - case 0 : return refn + (x*block+(mv->x)/2) + (y*block+(mv->y)/2)*stride; - case 1 : return refv + (x*block+(mv->x)/2) + (y*block+((mv->y)-1)/2)*stride; - case 2 : return refh + (x*block+((mv->x)-1)/2) + (y*block+(mv->y)/2)*stride; - default : - case 3 : return refhv + (x*block+((mv->x)-1)/2) + (y*block+((mv->y)-1)/2)*stride; - } -} #ifndef SEARCH16 #define SEARCH16 PMVfastSearch16 +//#define SEARCH16 FullSearch16 +//#define SEARCH16 EPZSSearch16 #endif #ifndef SEARCH8 #define SEARCH8 PMVfastSearch8 +//#define SEARCH8 EPZSSearch8 #endif bool MotionEstimation( - MACROBLOCK * const pMBs, - MBParam * const pParam, - const IMAGE * const pRef, - const IMAGE * const pRefH, - const IMAGE * const pRefV, - const IMAGE * const pRefHV, - IMAGE * const pCurrent, - const uint32_t iLimit) + MBParam * const pParam, + FRAMEINFO * const current, + FRAMEINFO * const reference, + const IMAGE * const pRefH, + const IMAGE * const pRefV, + const IMAGE * const pRefHV, + const uint32_t iLimit) { - const uint32_t iWcount = pParam->mb_width; - const uint32_t iHcount = pParam->mb_height; + const uint32_t iWcount = pParam->mb_width; + const uint32_t iHcount = pParam->mb_height; + MACROBLOCK * pMBs = current->mbs; + IMAGE * pCurrent = ¤t->image; + + MACROBLOCK * prevMBs = reference->mbs; // previous frame + IMAGE * pRef = &reference->image; + uint32_t i, j, iIntra = 0; - VECTOR mv16; - VECTOR pmv16; + VECTOR mv16; + VECTOR pmv16; + + int32_t sad8 = 0; + int32_t sad16; + int32_t deviation; + + if (sadInit) + (*sadInit)(); + - int32_t sad8 = 0; - int32_t sad16; - int32_t deviation; + /* eventhough we have a seperate prevMBs, + pmvfast/epsz does something "funny" with the previous frames data */ + for (i = 0; i < iHcount; i++) + for (j = 0; j < iWcount; j++) + { + pMBs[j + i * iWcount].mvs[0] = prevMBs[j + i * iWcount].mvs[0]; + pMBs[j + i * iWcount].mvs[1] = prevMBs[j + i * iWcount].mvs[1]; + pMBs[j + i * iWcount].mvs[2] = prevMBs[j + i * iWcount].mvs[2]; + pMBs[j + i * iWcount].mvs[3] = prevMBs[j + i * iWcount].mvs[3]; + } + + /*dprintf("*** BEFORE ***"); + for (i = 0; i < iHcount; i++) + for (j = 0; j < iWcount; j++) + { + dprintf(" [%i,%i] mode=%i dquant=%i mvs=(%i %i %i %i) sad8=(%i %i %i %i) sad16=(%i)", j,i, + pMBs[j + i * iWcount].mode, + pMBs[j + i * iWcount].dquant, + pMBs[j + i * iWcount].mvs[0], + pMBs[j + i * iWcount].mvs[1], + pMBs[j + i * iWcount].mvs[2], + pMBs[j + i * iWcount].mvs[3], + prevMBs[j + i * iWcount].sad8[0], + prevMBs[j + i * iWcount].sad8[1], + prevMBs[j + i * iWcount].sad8[2], + prevMBs[j + i * iWcount].sad8[3], + prevMBs[j + i * iWcount].sad16); + } + */ + // note: i==horizontal, j==vertical - for (i = 0; i < iHcount; i++) + for (i = 0; i < iHcount; i++) for (j = 0; j < iWcount; j++) { MACROBLOCK *pMB = &pMBs[j + i * iWcount]; + MACROBLOCK *prevMB = &prevMBs[j + i * iWcount]; sad16 = SEARCH16(pRef->y, pRefH->y, pRefV->y, pRefHV->y, pCurrent, - j, i, pParam->motion_flags, - pParam, pMBs, &mv16, &pmv16); + j, i, current->motion_flags, current->quant, current->fcode, + pParam, pMBs, prevMBs, &mv16, &pmv16); pMB->sad16=sad16; - /* decide: MODE_INTER or MODE_INTRA - if (dev_intra < sad_inter - 2 * nb) use_intra - */ + /* decide: MODE_INTER or MODE_INTRA + if (dev_intra < sad_inter - 2 * nb) use_intra + */ - deviation = dev16(pCurrent->y + j*16 + i*16*pParam->edged_width, pParam->edged_width); + deviation = dev16(pCurrent->y + j*16 + i*16*pParam->edged_width, pParam->edged_width); - if (deviation < (sad16 - INTER_BIAS)) - { - pMB->mode = MODE_INTRA; - pMB->mvs[0].x = pMB->mvs[1].x = pMB->mvs[2].x = pMB->mvs[3].x = 0; - pMB->mvs[0].y = pMB->mvs[1].y = pMB->mvs[2].y = pMB->mvs[3].y = 0; + if (deviation < (sad16 - INTER_BIAS)) + { + pMB->mode = MODE_INTRA; + pMB->mvs[0].x = pMB->mvs[1].x = pMB->mvs[2].x = pMB->mvs[3].x = 0; + pMB->mvs[0].y = pMB->mvs[1].y = pMB->mvs[2].y = pMB->mvs[3].y = 0; - iIntra++; - if(iIntra >= iLimit) - return 1; + pMB->sad8[0] = pMB->sad8[1] = pMB->sad8[2] = pMB->sad8[3] = 0; - continue; - } + iIntra++; + if(iIntra >= iLimit) + return 1; - if (pParam->global_flags & XVID_INTER4V) - { - pMB->sad8[0] = SEARCH8(pRef->y, pRefH->y, pRefV->y, pRefHV->y, pCurrent, - 2 * j, 2 * i, mv16.x, mv16.y, pParam->motion_flags, - pParam, pMBs, &pMB->mvs[0], &pMB->pmvs[0]); - - pMB->sad8[1] = SEARCH8(pRef->y, pRefH->y, pRefV->y, pRefHV->y, pCurrent, - 2 * j + 1, 2 * i, mv16.x, mv16.y, pParam->motion_flags, - pParam, pMBs, &pMB->mvs[1], &pMB->pmvs[1]); - - pMB->sad8[2] = SEARCH8(pRef->y, pRefH->y, pRefV->y, pRefHV->y, pCurrent, - 2 * j, 2 * i + 1, mv16.x, mv16.y, pParam->motion_flags, - pParam, pMBs, &pMB->mvs[2], &pMB->pmvs[2]); + continue; + } + + if (current->global_flags & XVID_INTER4V) + { + pMB->sad8[0] = SEARCH8(pRef->y, pRefH->y, pRefV->y, pRefHV->y, pCurrent, + 2 * j, 2 * i, mv16.x, mv16.y, + current->motion_flags, current->quant, current->fcode, + pParam, pMBs, prevMBs, &pMB->mvs[0], &pMB->pmvs[0]); + + pMB->sad8[1] = SEARCH8(pRef->y, pRefH->y, pRefV->y, pRefHV->y, pCurrent, + 2 * j + 1, 2 * i, mv16.x, mv16.y, + current->motion_flags, current->quant, current->fcode, + pParam, pMBs, prevMBs, &pMB->mvs[1], &pMB->pmvs[1]); + + pMB->sad8[2] = SEARCH8(pRef->y, pRefH->y, pRefV->y, pRefHV->y, pCurrent, + 2 * j, 2 * i + 1, mv16.x, mv16.y, + current->motion_flags, current->quant, current->fcode, + pParam, pMBs, prevMBs, &pMB->mvs[2], &pMB->pmvs[2]); - pMB->sad8[3] = SEARCH8(pRef->y, pRefH->y, pRefV->y, pRefHV->y, pCurrent, - 2 * j + 1, 2 * i + 1, mv16.x, mv16.y, pParam->motion_flags, - pParam, pMBs, &pMB->mvs[3], &pMB->pmvs[3]); + pMB->sad8[3] = SEARCH8(pRef->y, pRefH->y, pRefV->y, pRefHV->y, pCurrent, + 2 * j + 1, 2 * i + 1, mv16.x, mv16.y, + current->motion_flags, current->quant, current->fcode, + pParam, pMBs, prevMBs, &pMB->mvs[3], &pMB->pmvs[3]); - sad8 = pMB->sad8[0] + pMB->sad8[1] + pMB->sad8[2] + pMB->sad8[3]; - } + sad8 = pMB->sad8[0] + pMB->sad8[1] + pMB->sad8[2] + pMB->sad8[3]; + } - /* decide: MODE_INTER or MODE_INTER4V - mpeg4: if (sad8 < sad16 - nb/2+1) use_inter4v - */ - - if (pMB->dquant == NO_CHANGE) { - if (((pParam->global_flags & XVID_INTER4V)==0) || - (sad16 < (sad8 + (int32_t)(IMV16X16 * pParam->quant)))) { + /* decide: MODE_INTER or MODE_INTER4V + mpeg4: if (sad8 < sad16 - nb/2+1) use_inter4v + */ + + if (!(current->global_flags & XVID_LUMIMASKING) || pMB->dquant == NO_CHANGE) + { + if (((current->global_flags & XVID_INTER4V)==0) || + (sad16 < (sad8 + (int32_t)(IMV16X16 * current->quant)))) + { + sad8 = sad16; + pMB->mode = MODE_INTER; + pMB->mvs[0].x = pMB->mvs[1].x = pMB->mvs[2].x = pMB->mvs[3].x = mv16.x; + pMB->mvs[0].y = pMB->mvs[1].y = pMB->mvs[2].y = pMB->mvs[3].y = mv16.y; + pMB->pmvs[0].x = pmv16.x; + pMB->pmvs[0].y = pmv16.y; + } + else + pMB->mode = MODE_INTER4V; + } + else + { sad8 = sad16; pMB->mode = MODE_INTER; pMB->mvs[0].x = pMB->mvs[1].x = pMB->mvs[2].x = pMB->mvs[3].x = mv16.x; @@ -350,19 +396,26 @@ pMB->pmvs[0].x = pmv16.x; pMB->pmvs[0].y = pmv16.y; } - else - pMB->mode = MODE_INTER4V; } - else + +/* dprintf("*** AFTER ***", pMBs[0].b_mvs[0].x); + for (i = 0; i < iHcount; i++) + for (j = 0; j < iWcount; j++) { - sad8 = sad16; - pMB->mode = MODE_INTER; - pMB->mvs[0].x = pMB->mvs[1].x = pMB->mvs[2].x = pMB->mvs[3].x = mv16.x; - pMB->mvs[0].y = pMB->mvs[1].y = pMB->mvs[2].y = pMB->mvs[3].y = mv16.y; - pMB->pmvs[0].x = pmv16.x; - pMB->pmvs[0].y = pmv16.y; + dprintf(" [%i,%i] mode=%i dquant=%i mvs=(%i %i %i %i) sad8=(%i %i %i %i) sad16=(%i)", j,i, + pMBs[j + i * iWcount].mode, + pMBs[j + i * iWcount].dquant, + pMBs[j + i * iWcount].mvs[0], + pMBs[j + i * iWcount].mvs[1], + pMBs[j + i * iWcount].mvs[2], + pMBs[j + i * iWcount].mvs[3], + pMBs[j + i * iWcount].sad8[0], + pMBs[j + i * iWcount].sad8[1], + pMBs[j + i * iWcount].sad8[2], + pMBs[j + i * iWcount].sad8[3], + pMBs[j + i * iWcount].sad16); } - } + */ return 0; } @@ -384,6 +437,12 @@ { iMinSAD=iSAD; currMV->x=0; currMV->y=0; } } \ } +#define NOCHECK_MV16_CANDIDATE(X,Y) { \ + iSAD = sad16( cur, get_ref(pRef, pRefH, pRefV, pRefHV, x, y, 16, X, Y, iEdgedWidth),iEdgedWidth, iMinSAD); \ + iSAD += calc_delta_16((X) - pmv[0].x, (Y) - pmv[0].y, (uint8_t)iFcode) * iQuant;\ + if (iSAD < iMinSAD) \ + { iMinSAD=iSAD; currMV->x=(X); currMV->y=(Y); } \ +} #define CHECK_MV16_CANDIDATE(X,Y) { \ if ( ((X) <= max_dx) && ((X) >= min_dx) \ @@ -423,6 +482,13 @@ { iMinSAD=iSAD; currMV->x=0; currMV->y=0; } \ } +#define NOCHECK_MV8_CANDIDATE(X,Y) \ + { \ + iSAD = sad8( cur, get_ref(pRef, pRefH, pRefV, pRefHV, x, y, 8, (X), (Y), iEdgedWidth),iEdgedWidth); \ + iSAD += calc_delta_8((X)-pmv[0].x, (Y)-pmv[0].y, (uint8_t)iFcode) * iQuant;\ + if (iSAD < iMinSAD) \ + { iMinSAD=iSAD; currMV->x=(X); currMV->y=(Y); } \ +} #define CHECK_MV8_CANDIDATE(X,Y) { \ if ( ((X) <= max_dx) && ((X) >= min_dx) \ @@ -464,13 +530,15 @@ const IMAGE * const pCur, const int x, const int y, const uint32_t MotionFlags, + const uint32_t iQuant, + const uint32_t iFcode, MBParam * const pParam, - MACROBLOCK * const pMBs, + const MACROBLOCK * const pMBs, + const MACROBLOCK * const prevMBs, VECTOR * const currMV, VECTOR * const currPMV) { const int32_t iEdgedWidth = pParam->edged_width; - const int32_t iQuant = pParam->quant; const uint8_t * cur = pCur->y + x*16 + y*16*iEdgedWidth; int32_t iSAD; int32_t pred_x,pred_y; @@ -493,7 +561,64 @@ } */ -int32_t PMVfastSearch16_MainSearch( +int32_t Diamond16_MainSearch( + const uint8_t * const pRef, + const uint8_t * const pRefH, + const uint8_t * const pRefV, + const uint8_t * const pRefHV, + const uint8_t * const cur, + const int x, const int y, + int32_t startx, int32_t starty, + int32_t iMinSAD, + VECTOR * const currMV, + const VECTOR * const pmv, + const int32_t min_dx, const int32_t max_dx, + const int32_t min_dy, const int32_t max_dy, + const int32_t iEdgedWidth, + const int32_t iDiamondSize, + const int32_t iFcode, + const int32_t iQuant, + int iFound) +{ +/* Do a diamond search around given starting point, return SAD of best */ + + int32_t iDirection=0; + int32_t iSAD; + VECTOR backupMV; + backupMV.x = startx; + backupMV.y = starty; + +/* It's one search with full Diamond pattern, and only 3 of 4 for all following diamonds */ + + CHECK_MV16_CANDIDATE_DIR(backupMV.x-iDiamondSize,backupMV.y,1); + CHECK_MV16_CANDIDATE_DIR(backupMV.x+iDiamondSize,backupMV.y,2); + CHECK_MV16_CANDIDATE_DIR(backupMV.x,backupMV.y-iDiamondSize,3); + CHECK_MV16_CANDIDATE_DIR(backupMV.x,backupMV.y+iDiamondSize,4); + + if (iDirection) + while (!iFound) + { + iFound = 1; + backupMV=*currMV; + + if ( iDirection != 2) + CHECK_MV16_CANDIDATE_FOUND(backupMV.x-iDiamondSize,backupMV.y,1); + if ( iDirection != 1) + CHECK_MV16_CANDIDATE_FOUND(backupMV.x+iDiamondSize,backupMV.y,2); + if ( iDirection != 4) + CHECK_MV16_CANDIDATE_FOUND(backupMV.x,backupMV.y-iDiamondSize,3); + if ( iDirection != 3) + CHECK_MV16_CANDIDATE_FOUND(backupMV.x,backupMV.y+iDiamondSize,4); + } + else + { + currMV->x = startx; + currMV->y = starty; + } + return iMinSAD; +} + +int32_t Square16_MainSearch( const uint8_t * const pRef, const uint8_t * const pRefH, const uint8_t * const pRefV, @@ -512,7 +637,7 @@ const int32_t iQuant, int iFound) { -/* Do a diamond search around given starting point, return SAD of best */ +/* Do a square search around given starting point, return SAD of best */ int32_t iDirection=0; int32_t iSAD; @@ -520,27 +645,101 @@ backupMV.x = startx; backupMV.y = starty; -/* It's one search with full Diamond pattern, and only 3 of 4 for all following diamonds */ +/* It's one search with full square pattern, and new parts for all following diamonds */ + +/* new direction are extra, so 1-4 is normal diamond + 537 + 1*2 + 648 +*/ CHECK_MV16_CANDIDATE_DIR(backupMV.x-iDiamondSize,backupMV.y,1); CHECK_MV16_CANDIDATE_DIR(backupMV.x+iDiamondSize,backupMV.y,2); CHECK_MV16_CANDIDATE_DIR(backupMV.x,backupMV.y-iDiamondSize,3); CHECK_MV16_CANDIDATE_DIR(backupMV.x,backupMV.y+iDiamondSize,4); + CHECK_MV16_CANDIDATE_DIR(backupMV.x-iDiamondSize,backupMV.y-iDiamondSize,5); + CHECK_MV16_CANDIDATE_DIR(backupMV.x-iDiamondSize,backupMV.y+iDiamondSize,6); + CHECK_MV16_CANDIDATE_DIR(backupMV.x+iDiamondSize,backupMV.y-iDiamondSize,7); + CHECK_MV16_CANDIDATE_DIR(backupMV.x+iDiamondSize,backupMV.y+iDiamondSize,8); + + if (iDirection) while (!iFound) { iFound = 1; backupMV=*currMV; - if ( iDirection != 2) - CHECK_MV16_CANDIDATE_FOUND(backupMV.x-iDiamondSize,backupMV.y,1); - if ( iDirection != 1) - CHECK_MV16_CANDIDATE_FOUND(backupMV.x+iDiamondSize,backupMV.y,2); - if ( iDirection != 4) - CHECK_MV16_CANDIDATE_FOUND(backupMV.x,backupMV.y-iDiamondSize,3); - if ( iDirection != 3) - CHECK_MV16_CANDIDATE_FOUND(backupMV.x,backupMV.y+iDiamondSize,4); + switch (iDirection) + { + case 1: + CHECK_MV16_CANDIDATE_FOUND(backupMV.x-iDiamondSize,backupMV.y,1); + CHECK_MV16_CANDIDATE_DIR(backupMV.x-iDiamondSize,backupMV.y-iDiamondSize,5); + CHECK_MV16_CANDIDATE_DIR(backupMV.x+iDiamondSize,backupMV.y-iDiamondSize,7); + break; + case 2: + CHECK_MV16_CANDIDATE_DIR(backupMV.x+iDiamondSize,backupMV.y,2); + CHECK_MV16_CANDIDATE_DIR(backupMV.x-iDiamondSize,backupMV.y+iDiamondSize,6); + CHECK_MV16_CANDIDATE_DIR(backupMV.x+iDiamondSize,backupMV.y+iDiamondSize,8); + break; + + case 3: + CHECK_MV16_CANDIDATE_DIR(backupMV.x,backupMV.y+iDiamondSize,4); + CHECK_MV16_CANDIDATE_DIR(backupMV.x+iDiamondSize,backupMV.y-iDiamondSize,7); + CHECK_MV16_CANDIDATE_DIR(backupMV.x+iDiamondSize,backupMV.y+iDiamondSize,8); + break; + + case 4: + CHECK_MV16_CANDIDATE_DIR(backupMV.x,backupMV.y-iDiamondSize,3); + CHECK_MV16_CANDIDATE_DIR(backupMV.x-iDiamondSize,backupMV.y-iDiamondSize,5); + CHECK_MV16_CANDIDATE_DIR(backupMV.x-iDiamondSize,backupMV.y+iDiamondSize,6); + break; + + case 5: + CHECK_MV16_CANDIDATE_DIR(backupMV.x-iDiamondSize,backupMV.y,1); + CHECK_MV16_CANDIDATE_DIR(backupMV.x,backupMV.y-iDiamondSize,3); + CHECK_MV16_CANDIDATE_DIR(backupMV.x-iDiamondSize,backupMV.y-iDiamondSize,5); + CHECK_MV16_CANDIDATE_DIR(backupMV.x-iDiamondSize,backupMV.y+iDiamondSize,6); + CHECK_MV16_CANDIDATE_DIR(backupMV.x+iDiamondSize,backupMV.y-iDiamondSize,7); + break; + + case 6: + CHECK_MV16_CANDIDATE_DIR(backupMV.x+iDiamondSize,backupMV.y,2); + CHECK_MV16_CANDIDATE_DIR(backupMV.x,backupMV.y-iDiamondSize,3); + + CHECK_MV16_CANDIDATE_DIR(backupMV.x-iDiamondSize,backupMV.y-iDiamondSize,5); + CHECK_MV16_CANDIDATE_DIR(backupMV.x-iDiamondSize,backupMV.y+iDiamondSize,6); + CHECK_MV16_CANDIDATE_DIR(backupMV.x+iDiamondSize,backupMV.y+iDiamondSize,8); + + break; + + case 7: + CHECK_MV16_CANDIDATE_FOUND(backupMV.x-iDiamondSize,backupMV.y,1); + CHECK_MV16_CANDIDATE_DIR(backupMV.x,backupMV.y+iDiamondSize,4); + CHECK_MV16_CANDIDATE_DIR(backupMV.x-iDiamondSize,backupMV.y-iDiamondSize,5); + CHECK_MV16_CANDIDATE_DIR(backupMV.x+iDiamondSize,backupMV.y-iDiamondSize,7); + CHECK_MV16_CANDIDATE_DIR(backupMV.x+iDiamondSize,backupMV.y+iDiamondSize,8); + break; + + case 8: + CHECK_MV16_CANDIDATE_DIR(backupMV.x+iDiamondSize,backupMV.y,2); + CHECK_MV16_CANDIDATE_DIR(backupMV.x,backupMV.y+iDiamondSize,4); + CHECK_MV16_CANDIDATE_DIR(backupMV.x-iDiamondSize,backupMV.y+iDiamondSize,6); + CHECK_MV16_CANDIDATE_DIR(backupMV.x+iDiamondSize,backupMV.y-iDiamondSize,7); + CHECK_MV16_CANDIDATE_DIR(backupMV.x+iDiamondSize,backupMV.y+iDiamondSize,8); + break; + default: + CHECK_MV16_CANDIDATE_DIR(backupMV.x-iDiamondSize,backupMV.y,1); + CHECK_MV16_CANDIDATE_DIR(backupMV.x+iDiamondSize,backupMV.y,2); + CHECK_MV16_CANDIDATE_DIR(backupMV.x,backupMV.y-iDiamondSize,3); + CHECK_MV16_CANDIDATE_DIR(backupMV.x,backupMV.y+iDiamondSize,4); + + CHECK_MV16_CANDIDATE_DIR(backupMV.x-iDiamondSize,backupMV.y-iDiamondSize,5); + CHECK_MV16_CANDIDATE_DIR(backupMV.x-iDiamondSize,backupMV.y+iDiamondSize,6); + CHECK_MV16_CANDIDATE_DIR(backupMV.x+iDiamondSize,backupMV.y-iDiamondSize,7); + CHECK_MV16_CANDIDATE_DIR(backupMV.x+iDiamondSize,backupMV.y+iDiamondSize,8); + break; + } } else { @@ -550,21 +749,88 @@ return iMinSAD; } -int32_t PMVfastSearch16_Refine( + +int32_t Full16_MainSearch( const uint8_t * const pRef, const uint8_t * const pRefH, const uint8_t * const pRefV, const uint8_t * const pRefHV, const uint8_t * const cur, const int x, const int y, + int32_t startx, int32_t starty, + int32_t iMinSAD, VECTOR * const currMV, + const VECTOR * const pmv, + const int32_t min_dx, const int32_t max_dx, + const int32_t min_dy, const int32_t max_dy, + const int32_t iEdgedWidth, + const int32_t iDiamondSize, + const int32_t iFcode, + const int32_t iQuant, + int iFound) +{ + int32_t iSAD; + int32_t dx,dy; + VECTOR backupMV; + backupMV.x = startx; + backupMV.y = starty; + + for (dx = min_dx; dx<=max_dx; dx+=iDiamondSize) + for (dy = min_dy; dy<= max_dy; dy+=iDiamondSize) + NOCHECK_MV16_CANDIDATE(dx,dy); + + return iMinSAD; +} + +int32_t Full8_MainSearch( + const uint8_t * const pRef, + const uint8_t * const pRefH, + const uint8_t * const pRefV, + const uint8_t * const pRefHV, + const uint8_t * const cur, + const int x, const int y, + int32_t startx, int32_t starty, int32_t iMinSAD, + VECTOR * const currMV, const VECTOR * const pmv, const int32_t min_dx, const int32_t max_dx, const int32_t min_dy, const int32_t max_dy, + const int32_t iEdgedWidth, + const int32_t iDiamondSize, const int32_t iFcode, const int32_t iQuant, - const int32_t iEdgedWidth) + int iFound) +{ + int32_t iSAD; + int32_t dx,dy; + VECTOR backupMV; + backupMV.x = startx; + backupMV.y = starty; + + for (dx = min_dx; dx<=max_dx; dx+=iDiamondSize) + for (dy = min_dy; dy<= max_dy; dy+=iDiamondSize) + NOCHECK_MV8_CANDIDATE(dx,dy); + + return iMinSAD; +} + + + +int32_t Halfpel16_Refine( + const uint8_t * const pRef, + const uint8_t * const pRefH, + const uint8_t * const pRefV, + const uint8_t * const pRefHV, + const uint8_t * const cur, + const int x, const int y, + VECTOR * const currMV, + int32_t iMinSAD, + const VECTOR * const pmv, + const int32_t min_dx, const int32_t max_dx, + const int32_t min_dy, const int32_t max_dy, + const int32_t iFcode, + const int32_t iQuant, + const int32_t iEdgedWidth) { /* Do a half-pel refinement (or rather a "smallest possible amount" refinement) */ @@ -585,6 +851,7 @@ #define PMV_HALFPEL16 (PMV_HALFPELDIAMOND16|PMV_HALFPELREFINE16) + int32_t PMVfastSearch16( const uint8_t * const pRef, const uint8_t * const pRefH, @@ -592,15 +859,16 @@ const uint8_t * const pRefHV, const IMAGE * const pCur, const int x, const int y, - const uint32_t MotionFlags, - MBParam * const pParam, - MACROBLOCK * const pMBs, + const uint32_t MotionFlags, + const uint32_t iQuant, + const uint32_t iFcode, + const MBParam * const pParam, + const MACROBLOCK * const pMBs, + const MACROBLOCK * const prevMBs, VECTOR * const currMV, VECTOR * const currPMV) { - const uint32_t iWcount = pParam->mb_width; - const int32_t iFcode = pParam->fixed_code; - const int32_t iQuant = pParam->quant; + const uint32_t iWcount = pParam->mb_width; const int32_t iWidth = pParam->width; const int32_t iHeight = pParam->height; const int32_t iEdgedWidth = pParam->edged_width; @@ -622,7 +890,8 @@ VECTOR pmv[4]; int32_t psad[4]; - MACROBLOCK * const pMB = pMBs + x + y * iWcount; + const MACROBLOCK * const pMB = pMBs + x + y * iWcount; + const MACROBLOCK * const prevMB = prevMBs + x + y * iWcount; static int32_t threshA,threshB; int32_t bPredEq; @@ -630,15 +899,15 @@ /* Get maximum range */ get_range(&min_dx, &max_dx, &min_dy, &max_dy, - x, y, 16, iWidth, iHeight, iFcode); + x, y, 16, iWidth, iHeight, iFcode); /* we work with abs. MVs, not relative to prediction, so get_range is called relative to 0,0 */ if (!(MotionFlags & PMV_HALFPEL16 )) { min_dx = EVEN(min_dx); - max_dx = EVEN(max_dx); - min_dy = EVEN(min_dy); - max_dy = EVEN(max_dy); + max_dx = EVEN(max_dx); + min_dy = EVEN(min_dy); + max_dy = EVEN(max_dy); } /* because we might use something like IF (dx>max_dx) THEN dx=max_dx; */ @@ -662,15 +931,15 @@ iFound=0; /* Step 2: Calculate Distance= |MedianMVX| + |MedianMVY| where MedianMV is the motion - vector of the median. - If PredEq=1 and MVpredicted = Previous Frame MV, set Found=2 + vector of the median. + If PredEq=1 and MVpredicted = Previous Frame MV, set Found=2 */ - if ((bPredEq) && (MVequal(pmv[0],pMB->mvs[0]) ) ) + if ((bPredEq) && (MVequal(pmv[0],prevMB->mvs[0]) ) ) iFound=2; /* Step 3: If Distance>0 or thresb<1536 or PredEq=1 Select small Diamond Search. - Otherwise select large Diamond Search. + Otherwise select large Diamond Search. */ if ( (pmv[0].x != 0) || (pmv[0].y != 0) || (threshB<1536) || (bPredEq) ) @@ -682,10 +951,10 @@ iDiamondSize*=2; /* Step 4: Calculate SAD around the Median prediction. - MinSAD=SAD - If Motion Vector equal to Previous frame motion vector - and MinSADx > max_dx) - { - currMV->x=max_dx; - } + { + currMV->x=max_dx; + } if (currMV->x < min_dx) - { - currMV->x=min_dx; - } + { + currMV->x=min_dx; + } if (currMV->y > max_dy) - { - currMV->y=max_dy; - } + { + currMV->y=max_dy; + } if (currMV->y < min_dy) - { - currMV->y=min_dy; - } + { + currMV->y=min_dy; + } iMinSAD = sad16( cur, - get_ref_mv(pRef, pRefH, pRefV, pRefHV, x, y, 16, currMV, iEdgedWidth), - iEdgedWidth, MV_MAX_ERROR); + get_ref_mv(pRef, pRefH, pRefV, pRefHV, x, y, 16, currMV, iEdgedWidth), + iEdgedWidth, MV_MAX_ERROR); iMinSAD += calc_delta_16(currMV->x-pmv[0].x, currMV->y-pmv[0].y, (uint8_t)iFcode) * iQuant; - if ( (iMinSAD < 256 ) || ( (MVequal(*currMV,pMB->mvs[0])) && (iMinSAD < pMB->sad16) ) ) - { + if ( (iMinSAD < 256 ) || ( (MVequal(*currMV,prevMB->mvs[0])) && ((uint32_t)iMinSAD < prevMB->sad16) ) ) + { - if (MotionFlags & PMV_QUICKSTOP16) - goto step10b; - if (MotionFlags & PMV_EARLYSTOP16) - goto step10; - } + if (MotionFlags & PMV_QUICKSTOP16) + goto PMVfast16_Terminate_without_Refine; + if (MotionFlags & PMV_EARLYSTOP16) + goto PMVfast16_Terminate_with_Refine; + } /* -Step 5: Calculate SAD for motion vectors taken from left block, top, top-right, and Previous frame block. - Also calculate (0,0) but do not subtract offset. - Let MinSAD be the smallest SAD up to this point. - If MV is (0,0) subtract offset. ******** WHAT'S THIS 'OFFSET' ??? *********** + Step 5: Calculate SAD for motion vectors taken from left block, top, top-right, and Previous frame block. + Also calculate (0,0) but do not subtract offset. + Let MinSAD be the smallest SAD up to this point. + If MV is (0,0) subtract offset. ******** WHAT'S THIS 'OFFSET' ??? *********** */ // (0,0) is always possible @@ -741,14 +1010,14 @@ CHECK_MV16_ZERO; // previous frame MV is always possible - CHECK_MV16_CANDIDATE(pMB->mvs[0].x,pMB->mvs[0].y); + CHECK_MV16_CANDIDATE(prevMB->mvs[0].x,prevMB->mvs[0].y); // left neighbour, if allowed if (x != 0) { if (!(MotionFlags & PMV_HALFPEL16 )) { pmv[1].x = EVEN(pmv[1].x); - pmv[1].y = EVEN(pmv[1].y); + pmv[1].y = EVEN(pmv[1].y); } CHECK_MV16_CANDIDATE(pmv[1].x,pmv[1].y); } @@ -758,16 +1027,16 @@ { if (!(MotionFlags & PMV_HALFPEL16 )) { pmv[2].x = EVEN(pmv[2].x); - pmv[2].y = EVEN(pmv[2].y); + pmv[2].y = EVEN(pmv[2].y); } CHECK_MV16_CANDIDATE(pmv[2].x,pmv[2].y); // top right neighbour, if allowed - if (x != (iWcount-1)) + if ((uint32_t)x != (iWcount-1)) { if (!(MotionFlags & PMV_HALFPEL16 )) { pmv[3].x = EVEN(pmv[3].x); - pmv[3].y = EVEN(pmv[3].y); + pmv[3].y = EVEN(pmv[3].y); } CHECK_MV16_CANDIDATE(pmv[3].x,pmv[3].y); } @@ -777,32 +1046,32 @@ If Motion Vector equal to Previous frame motion vector and MinSADmvs[0]) && (iMinSAD < pMB->sad16) ) ) - { - if (MotionFlags & PMV_QUICKSTOP16) - goto step10b; - if (MotionFlags & PMV_EARLYSTOP16) - goto step10; - } + if ( (iMinSAD <= threshA) || ( MVequal(*currMV,prevMB->mvs[0]) && ((uint32_t)iMinSAD < prevMB->sad16) ) ) + { + if (MotionFlags & PMV_QUICKSTOP16) + goto PMVfast16_Terminate_without_Refine; + if (MotionFlags & PMV_EARLYSTOP16) + goto PMVfast16_Terminate_with_Refine; + } /************ (Diamond Search) **************/ /* -Step 7: Perform Diamond search, with either the small or large diamond. - If Found=2 only examine one Diamond pattern, and afterwards goto step 10 -Step 8: If small diamond, iterate small diamond search pattern until motion vector lies in the center of the diamond. - If center then goto step 10. -Step 9: If large diamond, iterate large diamond search pattern until motion vector lies in the center. - Refine by using small diamond and goto step 10. + Step 7: Perform Diamond search, with either the small or large diamond. + If Found=2 only examine one Diamond pattern, and afterwards goto step 10 + Step 8: If small diamond, iterate small diamond search pattern until motion vector lies in the center of the diamond. + If center then goto step 10. + Step 9: If large diamond, iterate large diamond search pattern until motion vector lies in the center. + Refine by using small diamond and goto step 10. */ backupMV = *currMV; /* save best prediction, actually only for EXTSEARCH */ /* default: use best prediction as starting point for one call of PMVfast_MainSearch */ - iSAD = PMVfastSearch16_MainSearch(pRef, pRefH, pRefV, pRefHV, cur, - x, y, - currMV->x, currMV->y, iMinSAD, &newMV, - pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, iDiamondSize, iFcode, iQuant, iFound); + iSAD = Diamond16_MainSearch(pRef, pRefH, pRefV, pRefHV, cur, + x, y, + currMV->x, currMV->y, iMinSAD, &newMV, + pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, iDiamondSize, iFcode, iQuant, iFound); if (iSAD < iMinSAD) { @@ -815,44 +1084,44 @@ /* extended: search (up to) two more times: orignal prediction and (0,0) */ if (!(MVequal(pmv[0],backupMV)) ) - { iSAD = PMVfastSearch16_MainSearch(pRef, pRefH, pRefV, pRefHV, cur, - x, y, - pmv[0].x, pmv[0].y, iMinSAD, &newMV, - pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, iDiamondSize, iFcode, iQuant, iFound); + { iSAD = Diamond16_MainSearch(pRef, pRefH, pRefV, pRefHV, cur, + x, y, + pmv[0].x, pmv[0].y, iMinSAD, &newMV, + pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, iDiamondSize, iFcode, iQuant, iFound); - if (iSAD < iMinSAD) - { - *currMV = newMV; - iMinSAD = iSAD; - } + if (iSAD < iMinSAD) + { + *currMV = newMV; + iMinSAD = iSAD; + } } if ( (!(MVzero(pmv[0]))) && (!(MVzero(backupMV))) ) - { iSAD = PMVfastSearch16_MainSearch(pRef, pRefH, pRefV, pRefHV, cur, - x, y, - 0, 0, iMinSAD, &newMV, - pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, iDiamondSize, iFcode, iQuant, iFound); + { iSAD = Diamond16_MainSearch(pRef, pRefH, pRefV, pRefHV, cur, + x, y, + 0, 0, iMinSAD, &newMV, + pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, iDiamondSize, iFcode, iQuant, iFound); - if (iSAD < iMinSAD) - { - *currMV = newMV; - iMinSAD = iSAD; - } + if (iSAD < iMinSAD) + { + *currMV = newMV; + iMinSAD = iSAD; + } } } /* - Step 10: The motion vector is chosen according to the block corresponding to MinSAD. + Step 10: The motion vector is chosen according to the block corresponding to MinSAD. */ -step10: +PMVfast16_Terminate_with_Refine: if (MotionFlags & PMV_HALFPELREFINE16) // perform final half-pel step - iMinSAD = PMVfastSearch16_Refine( pRef, pRefH, pRefV, pRefHV, cur, - x, y, - currMV, iMinSAD, - pmv, min_dx, max_dx, min_dy, max_dy, iFcode, iQuant, iEdgedWidth); + iMinSAD = Halfpel16_Refine( pRef, pRefH, pRefV, pRefHV, cur, + x, y, + currMV, iMinSAD, + pmv, min_dx, max_dx, min_dy, max_dy, iFcode, iQuant, iEdgedWidth); -step10b: +PMVfast16_Terminate_without_Refine: currPMV->x = currMV->x - pmv[0].x; currPMV->y = currMV->y - pmv[0].y; return iMinSAD; @@ -863,24 +1132,24 @@ -int32_t PMVfastSearch8_MainSearch( - const uint8_t * const pRef, - const uint8_t * const pRefH, - const uint8_t * const pRefV, - const uint8_t * const pRefHV, - const uint8_t * const cur, - const int x, const int y, - int32_t startx, int32_t starty, - int32_t iMinSAD, - VECTOR * const currMV, - const VECTOR * const pmv, - const int32_t min_dx, const int32_t max_dx, - const int32_t min_dy, const int32_t max_dy, - const int32_t iEdgedWidth, - const int32_t iDiamondSize, - const int32_t iFcode, - const int32_t iQuant, - int iFound) +int32_t Diamond8_MainSearch( + const uint8_t * const pRef, + const uint8_t * const pRefH, + const uint8_t * const pRefV, + const uint8_t * const pRefHV, + const uint8_t * const cur, + const int x, const int y, + int32_t startx, int32_t starty, + int32_t iMinSAD, + VECTOR * const currMV, + const VECTOR * const pmv, + const int32_t min_dx, const int32_t max_dx, + const int32_t min_dy, const int32_t max_dy, + const int32_t iEdgedWidth, + const int32_t iDiamondSize, + const int32_t iFcode, + const int32_t iQuant, + int iFound) { /* Do a diamond search around given starting point, return SAD of best */ @@ -913,28 +1182,28 @@ CHECK_MV8_CANDIDATE_FOUND(backupMV.x,backupMV.y+iDiamondSize,4); } else - { - currMV->x = startx; - currMV->y = starty; - } + { + currMV->x = startx; + currMV->y = starty; + } return iMinSAD; } -int32_t PMVfastSearch8_Refine( - const uint8_t * const pRef, - const uint8_t * const pRefH, - const uint8_t * const pRefV, - const uint8_t * const pRefHV, - const uint8_t * const cur, - const int x, const int y, - VECTOR * const currMV, - int32_t iMinSAD, - const VECTOR * const pmv, - const int32_t min_dx, const int32_t max_dx, - const int32_t min_dy, const int32_t max_dy, - const int32_t iFcode, - const int32_t iQuant, - const int32_t iEdgedWidth) +int32_t Halfpel8_Refine( + const uint8_t * const pRef, + const uint8_t * const pRefH, + const uint8_t * const pRefV, + const uint8_t * const pRefHV, + const uint8_t * const cur, + const int x, const int y, + VECTOR * const currMV, + int32_t iMinSAD, + const VECTOR * const pmv, + const int32_t min_dx, const int32_t max_dx, + const int32_t min_dy, const int32_t max_dy, + const int32_t iFcode, + const int32_t iQuant, + const int32_t iEdgedWidth) { /* Do a half-pel refinement (or rather a "smallest possible amount" refinement) */ @@ -963,17 +1232,17 @@ const uint8_t * const pRefHV, const IMAGE * const pCur, const int x, const int y, - const int start_x, int start_y, + const int start_x, const int start_y, const uint32_t MotionFlags, - MBParam * const pParam, - MACROBLOCK * const pMBs, + const uint32_t iQuant, + const uint32_t iFcode, + const MBParam * const pParam, + const MACROBLOCK * const pMBs, + const MACROBLOCK * const prevMBs, VECTOR * const currMV, VECTOR * const currPMV) { - const uint32_t iWcount = pParam->mb_width; - - const int32_t iFcode = pParam->fixed_code; - const int32_t iQuant = pParam->quant; + const uint32_t iWcount = pParam->mb_width; const int32_t iWidth = pParam->width; const int32_t iHeight = pParam->height; const int32_t iEdgedWidth = pParam->edged_width; @@ -992,7 +1261,8 @@ VECTOR newMV; VECTOR backupMV; - MACROBLOCK * const pMB = pMBs + (x>>1) + (y>>1) * iWcount; + const MACROBLOCK * const pMB = pMBs + (x>>1) + (y>>1) * iWcount; + const MACROBLOCK * const prevMB = prevMBs + (x>>1) + (y>>1) * iWcount; static int32_t threshA,threshB; int32_t iFound,bPredEq; @@ -1001,16 +1271,16 @@ int32_t iSubBlock = ((y&1)<<1) + (x&1); /* Get maximum range */ - get_range(&min_dx, &max_dx, &min_dy, &max_dy, - x, y, 8, iWidth, iHeight, iFcode); + get_range(&min_dx, &max_dx, &min_dy, &max_dy, + x, y, 8, iWidth, iHeight, iFcode); /* we work with abs. MVs, not relative to prediction, so range is relative to 0,0 */ if (!(MotionFlags & PMV_HALFPELDIAMOND8 )) { min_dx = EVEN(min_dx); - max_dx = EVEN(max_dx); - min_dy = EVEN(min_dy); - max_dy = EVEN(max_dy); + max_dx = EVEN(max_dx); + min_dy = EVEN(min_dy); + max_dy = EVEN(max_dy); } /* because we might use IF (dx>max_dx) THEN dx=max_dx; */ @@ -1034,15 +1304,15 @@ iFound=0; /* Step 2: Calculate Distance= |MedianMVX| + |MedianMVY| where MedianMV is the motion - vector of the median. - If PredEq=1 and MVpredicted = Previous Frame MV, set Found=2 + vector of the median. + If PredEq=1 and MVpredicted = Previous Frame MV, set Found=2 */ if ((bPredEq) && (MVequal(pmv[0],pMB->mvs[iSubBlock]) ) ) iFound=2; /* Step 3: If Distance>0 or thresb<1536 or PredEq=1 Select small Diamond Search. - Otherwise select large Diamond Search. + Otherwise select large Diamond Search. */ if ( (pmv[0].x != 0) || (pmv[0].y != 0) || (threshB<1536/4) || (bPredEq) ) @@ -1054,10 +1324,10 @@ iDiamondSize*=2; /* Step 4: Calculate SAD around the Median prediction. - MinSAD=SAD - If Motion Vector equal to Previous frame motion vector - and MinSADy=start_y; iMinSAD = sad8( cur, - get_ref_mv(pRef, pRefH, pRefV, pRefHV, x, y, 8, currMV, iEdgedWidth), - iEdgedWidth); + get_ref_mv(pRef, pRefH, pRefV, pRefHV, x, y, 8, currMV, iEdgedWidth), + iEdgedWidth); iMinSAD += calc_delta_8(currMV->x - pmv[0].x, currMV->y - pmv[0].y, (uint8_t)iFcode) * iQuant; - if ( (iMinSAD < 256/4 ) || ( (MVequal(*currMV,pMB->mvs[iSubBlock])) && (iMinSAD < pMB->sad8[iSubBlock]) ) ) - { - if (MotionFlags & PMV_QUICKSTOP8) - goto step10_8b; - if (MotionFlags & PMV_EARLYSTOP8) - goto step10_8; - } + if ( (iMinSAD < 256/4 ) || ( (MVequal(*currMV,pMB->mvs[iSubBlock])) && ((uint32_t)iMinSAD < prevMB->sad8[iSubBlock]) ) ) + { + if (MotionFlags & PMV_QUICKSTOP16) + goto PMVfast8_Terminate_without_Refine; + if (MotionFlags & PMV_EARLYSTOP16) + goto PMVfast8_Terminate_with_Refine; + } + /* -Step 5: Calculate SAD for motion vectors taken from left block, top, top-right, and Previous frame block. - Also calculate (0,0) but do not subtract offset. - Let MinSAD be the smallest SAD up to this point. - If MV is (0,0) subtract offset. ******** WHAT'S THIS 'OFFSET' ??? *********** + Step 5: Calculate SAD for motion vectors taken from left block, top, top-right, and Previous frame block. + Also calculate (0,0) but do not subtract offset. + Let MinSAD be the smallest SAD up to this point. + If MV is (0,0) subtract offset. ******** WHAT'S THIS 'OFFSET' ??? *********** */ // the prediction might be even better than mv16 @@ -1100,7 +1371,7 @@ { if (!(MotionFlags & PMV_HALFPEL8 )) { pmv[1].x = EVEN(pmv[1].x); - pmv[1].y = EVEN(pmv[1].y); + pmv[1].y = EVEN(pmv[1].y); } CHECK_MV8_CANDIDATE(pmv[1].x,pmv[1].y); } @@ -1110,17 +1381,17 @@ { if (!(MotionFlags & PMV_HALFPEL8 )) { pmv[2].x = EVEN(pmv[2].x); - pmv[2].y = EVEN(pmv[2].y); + pmv[2].y = EVEN(pmv[2].y); } CHECK_MV8_CANDIDATE(pmv[2].x,pmv[2].y); // top right neighbour, if allowed if (psad[3] != MV_MAX_ERROR) { - if (!(MotionFlags & PMV_HALFPEL8 )) - { pmv[3].x = EVEN(pmv[3].x); + if (!(MotionFlags & PMV_HALFPEL8 )) + { pmv[3].x = EVEN(pmv[3].x); pmv[3].y = EVEN(pmv[3].y); - } + } CHECK_MV8_CANDIDATE(pmv[3].x,pmv[3].y); } } @@ -1129,31 +1400,520 @@ If Motion Vector equal to Previous frame motion vector and MinSADmvs[iSubBlock]) && (iMinSAD < pMB->sad8[iSubBlock]) ) ) - { - if (MotionFlags & PMV_QUICKSTOP8) - goto step10_8b; - if (MotionFlags & PMV_EARLYSTOP8) - goto step10_8; - } + if ( (iMinSAD <= threshA) || ( MVequal(*currMV,pMB->mvs[iSubBlock]) && ((uint32_t)iMinSAD < prevMB->sad8[iSubBlock]) ) ) + { + if (MotionFlags & PMV_QUICKSTOP16) + goto PMVfast8_Terminate_without_Refine; + if (MotionFlags & PMV_EARLYSTOP16) + goto PMVfast8_Terminate_with_Refine; + } /************ (Diamond Search) **************/ /* -Step 7: Perform Diamond search, with either the small or large diamond. - If Found=2 only examine one Diamond pattern, and afterwards goto step 10 -Step 8: If small diamond, iterate small diamond search pattern until motion vector lies in the center of the diamond. - If center then goto step 10. -Step 9: If large diamond, iterate large diamond search pattern until motion vector lies in the center. - Refine by using small diamond and goto step 10. + Step 7: Perform Diamond search, with either the small or large diamond. + If Found=2 only examine one Diamond pattern, and afterwards goto step 10 + Step 8: If small diamond, iterate small diamond search pattern until motion vector lies in the center of the diamond. + If center then goto step 10. + Step 9: If large diamond, iterate large diamond search pattern until motion vector lies in the center. + Refine by using small diamond and goto step 10. */ backupMV = *currMV; /* save best prediction, actually only for EXTSEARCH */ /* default: use best prediction as starting point for one call of PMVfast_MainSearch */ - iSAD = PMVfastSearch8_MainSearch(pRef, pRefH, pRefV, pRefHV, cur, + iSAD = Diamond8_MainSearch(pRef, pRefH, pRefV, pRefHV, cur, + x, y, + currMV->x, currMV->y, iMinSAD, &newMV, + pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, iDiamondSize, iFcode, iQuant, iFound); + + if (iSAD < iMinSAD) + { + *currMV = newMV; + iMinSAD = iSAD; + } + + if (MotionFlags & PMV_EXTSEARCH8) + { +/* extended: search (up to) two more times: orignal prediction and (0,0) */ + + if (!(MVequal(pmv[0],backupMV)) ) + { iSAD = Diamond16_MainSearch(pRef, pRefH, pRefV, pRefHV, cur, + x, y, + pmv[0].x, pmv[0].y, iMinSAD, &newMV, + pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, iDiamondSize, iFcode, iQuant, iFound); + + if (iSAD < iMinSAD) + { + *currMV = newMV; + iMinSAD = iSAD; + } + } + + if ( (!(MVzero(pmv[0]))) && (!(MVzero(backupMV))) ) + { iSAD = Diamond16_MainSearch(pRef, pRefH, pRefV, pRefHV, cur, + x, y, + 0, 0, iMinSAD, &newMV, + pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, iDiamondSize, iFcode, iQuant, iFound); + + if (iSAD < iMinSAD) + { + *currMV = newMV; + iMinSAD = iSAD; + } + } + } + +/* Step 10: The motion vector is chosen according to the block corresponding to MinSAD. + By performing an optional local half-pixel search, we can refine this result even further. +*/ + +PMVfast8_Terminate_with_Refine: + if (MotionFlags & PMV_HALFPELREFINE8) // perform final half-pel step + iMinSAD = Halfpel8_Refine( pRef, pRefH, pRefV, pRefHV, cur, + x, y, + currMV, iMinSAD, + pmv, min_dx, max_dx, min_dy, max_dy, iFcode, iQuant, iEdgedWidth); + + +PMVfast8_Terminate_without_Refine: + currPMV->x = currMV->x - pmv[0].x; + currPMV->y = currMV->y - pmv[0].y; + + return iMinSAD; +} + +int32_t EPZSSearch16( + const uint8_t * const pRef, + const uint8_t * const pRefH, + const uint8_t * const pRefV, + const uint8_t * const pRefHV, + const IMAGE * const pCur, + const int x, const int y, + const uint32_t MotionFlags, + const uint32_t iQuant, + const uint32_t iFcode, + const MBParam * const pParam, + const MACROBLOCK * const pMBs, + const MACROBLOCK * const prevMBs, + VECTOR * const currMV, + VECTOR * const currPMV) +{ + const uint32_t iWcount = pParam->mb_width; + const uint32_t iHcount = pParam->mb_height; + + const int32_t iWidth = pParam->width; + const int32_t iHeight = pParam->height; + const int32_t iEdgedWidth = pParam->edged_width; + + const uint8_t * cur = pCur->y + x*16 + y*16*iEdgedWidth; + + int32_t min_dx; + int32_t max_dx; + int32_t min_dy; + int32_t max_dy; + + VECTOR newMV; + VECTOR backupMV; + + VECTOR pmv[4]; + int32_t psad[8]; + + static MACROBLOCK * oldMBs = NULL; + const MACROBLOCK * const pMB = pMBs + x + y * iWcount; + const MACROBLOCK * const prevMB = prevMBs + x + y * iWcount; + MACROBLOCK * oldMB = NULL; + + static int32_t thresh2; + int32_t bPredEq; + int32_t iMinSAD,iSAD=9999; + + MainSearch16FuncPtr EPZSMainSearchPtr; + + if (oldMBs == NULL) + { oldMBs = (MACROBLOCK*) calloc(1,iWcount*iHcount*sizeof(MACROBLOCK)); + fprintf(stderr,"allocated %d bytes for oldMBs\n",iWcount*iHcount*sizeof(MACROBLOCK)); + } + oldMB = oldMBs + x + y * iWcount; + +/* Get maximum range */ + get_range(&min_dx, &max_dx, &min_dy, &max_dy, + x, y, 16, iWidth, iHeight, iFcode); + +/* we work with abs. MVs, not relative to prediction, so get_range is called relative to 0,0 */ + + if (!(MotionFlags & PMV_HALFPEL16 )) + { min_dx = EVEN(min_dx); + max_dx = EVEN(max_dx); + min_dy = EVEN(min_dy); + max_dy = EVEN(max_dy); + } /* because we might use something like IF (dx>max_dx) THEN dx=max_dx; */ + + bPredEq = get_pmvdata(pMBs, x, y, iWcount, 0, pmv, psad); + +/* Step 4: Calculate SAD around the Median prediction. + MinSAD=SAD + If Motion Vector equal to Previous frame motion vector + and MinSADx = EVEN(currMV->x); + currMV->y = EVEN(currMV->y); + } + + if (currMV->x > max_dx) + currMV->x=max_dx; + if (currMV->x < min_dx) + currMV->x=min_dx; + if (currMV->y > max_dy) + currMV->y=max_dy; + if (currMV->y < min_dy) + currMV->y=min_dy; + +/***************** This is predictor SET A: only median prediction ******************/ + + iMinSAD = sad16( cur, + get_ref_mv(pRef, pRefH, pRefV, pRefHV, x, y, 16, currMV, iEdgedWidth), + iEdgedWidth, MV_MAX_ERROR); + iMinSAD += calc_delta_16(currMV->x-pmv[0].x, currMV->y-pmv[0].y, (uint8_t)iFcode) * iQuant; + +// thresh1 is fixed to 256 + if ( (iMinSAD < 256 ) || ( (MVequal(*currMV,pMB->mvs[0])) && ((uint32_t)iMinSAD < prevMB->sad16) ) ) + { + if (MotionFlags & PMV_QUICKSTOP16) + goto EPZS16_Terminate_without_Refine; + if (MotionFlags & PMV_EARLYSTOP16) + goto EPZS16_Terminate_with_Refine; + } + +/************** This is predictor SET B: (0,0), prev.frame MV, neighbours **************/ + +// previous frame MV + CHECK_MV16_CANDIDATE(pMB->mvs[0].x,pMB->mvs[0].y); + +// set threshhold based on Min of Prediction and SAD of collocated block +// CHECK_MV16 always uses iSAD for the SAD of last vector to check, so now iSAD is what we want + + if ((x==0) && (y==0) ) + { + thresh2 = 512; + } + else + { +/* T_k = 1.2 * MIN(SAD_top,SAD_left,SAD_topleft,SAD_coll) +128; [Tourapis, 2002] */ + + thresh2 = MIN(psad[0],iSAD)*6/5 + 128; + } + +// MV=(0,0) is often a good choice + + CHECK_MV16_ZERO; + + +// left neighbour, if allowed + if (x != 0) + { + if (!(MotionFlags & PMV_HALFPEL16 )) + { pmv[1].x = EVEN(pmv[1].x); + pmv[1].y = EVEN(pmv[1].y); + } + CHECK_MV16_CANDIDATE(pmv[1].x,pmv[1].y); + } + +// top neighbour, if allowed + if (y != 0) + { + if (!(MotionFlags & PMV_HALFPEL16 )) + { pmv[2].x = EVEN(pmv[2].x); + pmv[2].y = EVEN(pmv[2].y); + } + CHECK_MV16_CANDIDATE(pmv[2].x,pmv[2].y); + +// top right neighbour, if allowed + if ((uint32_t)x != (iWcount-1)) + { + if (!(MotionFlags & PMV_HALFPEL16 )) + { pmv[3].x = EVEN(pmv[3].x); + pmv[3].y = EVEN(pmv[3].y); + } + CHECK_MV16_CANDIDATE(pmv[3].x,pmv[3].y); + } + } + +/* Terminate if MinSAD <= T_2 + Terminate if MV[t] == MV[t-1] and MinSAD[t] <= MinSAD[t-1] +*/ + + if ( (iMinSAD <= thresh2) + || ( MVequal(*currMV,pMB->mvs[0]) && ((uint32_t)iMinSAD <= prevMB->sad16) ) ) + { + if (MotionFlags & PMV_QUICKSTOP16) + goto EPZS16_Terminate_without_Refine; + if (MotionFlags & PMV_EARLYSTOP16) + goto EPZS16_Terminate_with_Refine; + } + +/***** predictor SET C: acceleration MV (new!), neighbours in prev. frame(new!) ****/ + + backupMV = pMB->mvs[0]; // last MV + backupMV.x += (pMB->mvs[0].x - oldMB->mvs[0].x ); // acceleration X + backupMV.y += (pMB->mvs[0].y - oldMB->mvs[0].y ); // acceleration Y + + CHECK_MV16_CANDIDATE(backupMV.x,backupMV.y); + +// left neighbour + if (x != 0) + CHECK_MV16_CANDIDATE((oldMB-1)->mvs[0].x,oldMB->mvs[0].y); + +// top neighbour + if (y != 0) + CHECK_MV16_CANDIDATE((oldMB-iWcount)->mvs[0].x,oldMB->mvs[0].y); + +// right neighbour, if allowed (this value is not written yet, so take it from pMB->mvs + + if ((uint32_t)x != iWcount-1) + CHECK_MV16_CANDIDATE((pMB+1)->mvs[0].x,oldMB->mvs[0].y); + +// bottom neighbour, dito + if ((uint32_t)y != iHcount-1) + CHECK_MV16_CANDIDATE((pMB+iWcount)->mvs[0].x,oldMB->mvs[0].y); + +/* Terminate if MinSAD <= T_3 (here T_3 = T_2) */ + if (iMinSAD <= thresh2) + { + if (MotionFlags & PMV_QUICKSTOP16) + goto EPZS16_Terminate_without_Refine; + if (MotionFlags & PMV_EARLYSTOP16) + goto EPZS16_Terminate_with_Refine; + } + +/************ (if Diamond Search) **************/ + + backupMV = *currMV; /* save best prediction, actually only for EXTSEARCH */ + +/* default: use best prediction as starting point for one call of PMVfast_MainSearch */ + + if (MotionFlags & PMV_USESQUARES16) + EPZSMainSearchPtr = Square16_MainSearch; + else + EPZSMainSearchPtr = Diamond16_MainSearch; + + iSAD = (*EPZSMainSearchPtr)(pRef, pRefH, pRefV, pRefHV, cur, + x, y, + currMV->x, currMV->y, iMinSAD, &newMV, pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, + 2, iFcode, iQuant, 0); + + if (iSAD < iMinSAD) + { + *currMV = newMV; + iMinSAD = iSAD; + } + + + if (MotionFlags & PMV_EXTSEARCH16) + { +/* extended mode: search (up to) two more times: orignal prediction and (0,0) */ + + if (!(MVequal(pmv[0],backupMV)) ) + { + iSAD = (*EPZSMainSearchPtr)(pRef, pRefH, pRefV, pRefHV, cur, + x, y, + pmv[0].x, pmv[0].y, iMinSAD, &newMV, + pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, 2, iFcode, iQuant, 0); + } + + if (iSAD < iMinSAD) + { + *currMV = newMV; + iMinSAD = iSAD; + } + + if ( (!(MVzero(pmv[0]))) && (!(MVzero(backupMV))) ) + { + iSAD = (*EPZSMainSearchPtr)(pRef, pRefH, pRefV, pRefHV, cur, + x, y, + 0, 0, iMinSAD, &newMV, + pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, /*iDiamondSize*/ 2, iFcode, iQuant, 0); + + if (iSAD < iMinSAD) + { + *currMV = newMV; + iMinSAD = iSAD; + } + } + } + +/*************** Choose best MV found **************/ + +EPZS16_Terminate_with_Refine: + if (MotionFlags & PMV_HALFPELREFINE16) // perform final half-pel step + iMinSAD = Halfpel16_Refine( pRef, pRefH, pRefV, pRefHV, cur, + x, y, + currMV, iMinSAD, + pmv, min_dx, max_dx, min_dy, max_dy, iFcode, iQuant, iEdgedWidth); + +EPZS16_Terminate_without_Refine: + + *oldMB = *pMB; + + currPMV->x = currMV->x - pmv[0].x; + currPMV->y = currMV->y - pmv[0].y; + return iMinSAD; +} + + +int32_t EPZSSearch8( + const uint8_t * const pRef, + const uint8_t * const pRefH, + const uint8_t * const pRefV, + const uint8_t * const pRefHV, + const IMAGE * const pCur, + const int x, const int y, + const int start_x, const int start_y, + const uint32_t MotionFlags, + const uint32_t iQuant, + const uint32_t iFcode, + const MBParam * const pParam, + const MACROBLOCK * const pMBs, + const MACROBLOCK * const prevMBs, + VECTOR * const currMV, + VECTOR * const currPMV) +{ + const uint32_t iWcount = pParam->mb_width; + const int32_t iWidth = pParam->width; + const int32_t iHeight = pParam->height; + const int32_t iEdgedWidth = pParam->edged_width; + + const uint8_t * cur = pCur->y + x*8 + y*8*iEdgedWidth; + + int32_t iDiamondSize=1; + + int32_t min_dx; + int32_t max_dx; + int32_t min_dy; + int32_t max_dy; + + VECTOR newMV; + VECTOR backupMV; + + VECTOR pmv[4]; + int32_t psad[8]; + + const int32_t iSubBlock = ((y&1)<<1) + (x&1); + + const MACROBLOCK * const pMB = pMBs + (x>>1) + (y>>1) * iWcount; + const MACROBLOCK * const prevMB = prevMBs + (x>>1) + (y>>1) * iWcount; + + int32_t bPredEq; + int32_t iMinSAD,iSAD=9999; + + MainSearch8FuncPtr EPZSMainSearchPtr; + +/* Get maximum range */ + get_range(&min_dx, &max_dx, &min_dy, &max_dy, + x, y, 8, iWidth, iHeight, iFcode); + +/* we work with abs. MVs, not relative to prediction, so get_range is called relative to 0,0 */ + + if (!(MotionFlags & PMV_HALFPEL8 )) + { min_dx = EVEN(min_dx); + max_dx = EVEN(max_dx); + min_dy = EVEN(min_dy); + max_dy = EVEN(max_dy); + } /* because we might use something like IF (dx>max_dx) THEN dx=max_dx; */ + + bPredEq = get_pmvdata(pMBs, x>>1, y>>1, iWcount, iSubBlock, pmv, psad); + + +/* Step 4: Calculate SAD around the Median prediction. + MinSAD=SAD + If Motion Vector equal to Previous frame motion vector + and MinSADx = EVEN(currMV->x); + currMV->y = EVEN(currMV->y); + } + + if (currMV->x > max_dx) + currMV->x=max_dx; + if (currMV->x < min_dx) + currMV->x=min_dx; + if (currMV->y > max_dy) + currMV->y=max_dy; + if (currMV->y < min_dy) + currMV->y=min_dy; + +/***************** This is predictor SET A: only median prediction ******************/ + + + iMinSAD = sad8( cur, + get_ref_mv(pRef, pRefH, pRefV, pRefHV, x, y, 8, currMV, iEdgedWidth), + iEdgedWidth); + iMinSAD += calc_delta_8(currMV->x-pmv[0].x, currMV->y-pmv[0].y, (uint8_t)iFcode) * iQuant; + + +// thresh1 is fixed to 256 + if (iMinSAD < 256/4 ) + { + if (MotionFlags & PMV_QUICKSTOP8) + goto EPZS8_Terminate_without_Refine; + if (MotionFlags & PMV_EARLYSTOP8) + goto EPZS8_Terminate_with_Refine; + } + +/************** This is predictor SET B: (0,0), prev.frame MV, neighbours **************/ + +// previous frame MV + CHECK_MV8_CANDIDATE(pMB->mvs[0].x,pMB->mvs[0].y); + +// MV=(0,0) is often a good choice + + CHECK_MV8_ZERO; + +/* Terminate if MinSAD <= T_2 + Terminate if MV[t] == MV[t-1] and MinSAD[t] <= MinSAD[t-1] +*/ + + if (iMinSAD < 512/4) /* T_2 == 512/4 hardcoded */ + { + if (MotionFlags & PMV_QUICKSTOP8) + goto EPZS8_Terminate_without_Refine; + if (MotionFlags & PMV_EARLYSTOP8) + goto EPZS8_Terminate_with_Refine; + } + +/************ (if Diamond Search) **************/ + + backupMV = *currMV; /* save best prediction, actually only for EXTSEARCH */ + + if (!(MotionFlags & PMV_HALFPELDIAMOND8)) + iDiamondSize *= 2; + +/* default: use best prediction as starting point for one call of PMVfast_MainSearch */ + +// if (MotionFlags & PMV_USESQUARES8) +// EPZSMainSearchPtr = Square8_MainSearch; +// else + EPZSMainSearchPtr = Diamond8_MainSearch; + + iSAD = (*EPZSMainSearchPtr)(pRef, pRefH, pRefV, pRefHV, cur, x, y, currMV->x, currMV->y, iMinSAD, &newMV, - pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, iDiamondSize, iFcode, iQuant, iFound); + pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, + iDiamondSize, iFcode, iQuant, 00); + if (iSAD < iMinSAD) { @@ -1163,13 +1923,14 @@ if (MotionFlags & PMV_EXTSEARCH8) { -/* extended: search (up to) two more times: orignal prediction and (0,0) */ +/* extended mode: search (up to) two more times: orignal prediction and (0,0) */ if (!(MVequal(pmv[0],backupMV)) ) - { iSAD = PMVfastSearch16_MainSearch(pRef, pRefH, pRefV, pRefHV, cur, + { + iSAD = (*EPZSMainSearchPtr)(pRef, pRefH, pRefV, pRefHV, cur, x, y, pmv[0].x, pmv[0].y, iMinSAD, &newMV, - pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, iDiamondSize, iFcode, iQuant, iFound); + pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, iDiamondSize, iFcode, iQuant, 0); if (iSAD < iMinSAD) { @@ -1179,10 +1940,11 @@ } if ( (!(MVzero(pmv[0]))) && (!(MVzero(backupMV))) ) - { iSAD = PMVfastSearch16_MainSearch(pRef, pRefH, pRefV, pRefHV, cur, + { + iSAD = (*EPZSMainSearchPtr)(pRef, pRefH, pRefV, pRefHV, cur, x, y, 0, 0, iMinSAD, &newMV, - pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, iDiamondSize, iFcode, iQuant, iFound); + pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, iDiamondSize, iFcode, iQuant, 0); if (iSAD < iMinSAD) { @@ -1192,21 +1954,143 @@ } } -/* Step 10: The motion vector is chosen according to the block corresponding to MinSAD. - By performing an optional local half-pixel search, we can refine this result even further. -*/ - -step10_8: +/*************** Choose best MV found **************/ + +EPZS8_Terminate_with_Refine: if (MotionFlags & PMV_HALFPELREFINE8) // perform final half-pel step - iMinSAD = PMVfastSearch8_Refine( pRef, pRefH, pRefV, pRefHV, cur, + iMinSAD = Halfpel8_Refine( pRef, pRefH, pRefV, pRefHV, cur, x, y, currMV, iMinSAD, pmv, min_dx, max_dx, min_dy, max_dy, iFcode, iQuant, iEdgedWidth); -step10_8b: +EPZS8_Terminate_without_Refine: currPMV->x = currMV->x - pmv[0].x; currPMV->y = currMV->y - pmv[0].y; - return iMinSAD; } + + + + + +/* *********************************************************** + bvop motion estimation +// TODO: need to incorporate prediction here (eg. sad += calc_delta_16) +***************************************************************/ + +/* +void MotionEstimationBVOP( + MBParam * const pParam, + FRAMEINFO * const frame, + + // forward (past) reference + const MACROBLOCK * const f_mbs, + const IMAGE * const f_ref, + const IMAGE * const f_refH, + const IMAGE * const f_refV, + const IMAGE * const f_refHV, + // backward (future) reference + const MACROBLOCK * const b_mbs, + const IMAGE * const b_ref, + const IMAGE * const b_refH, + const IMAGE * const b_refV, + const IMAGE * const b_refHV) +{ + const uint32_t mb_width = pParam->mb_width; + const uint32_t mb_height = pParam->mb_height; + const int32_t edged_width = pParam->edged_width; + + int32_t i,j; + + int32_t f_sad16; + int32_t b_sad16; + int32_t i_sad16; + int32_t d_sad16; + int32_t best_sad; + + VECTOR pmv_dontcare; + + // note: i==horizontal, j==vertical + for (j = 0; j < mb_height; j++) + { + for (i = 0; i < mb_width; i++) + { + MACROBLOCK *mb = &frame->mbs[i + j*mb_width]; + const MACROBLOCK *f_mb = &f_mbs[i + j*mb_width]; + const MACROBLOCK *b_mb = &b_mbs[i + j*mb_width]; + + if (b_mb->mode == MODE_INTER + && b_mb->cbp == 0 + && b_mb->mvs[0].x == 0 + && b_mb->mvs[0].y == 0) + { + mb->mode = MB_IGNORE; + mb->mvs[0].x = 0; + mb->mvs[0].y = 0; + mb->b_mvs[0].x = 0; + mb->b_mvs[0].y = 0; + continue; + } + + + // forward search + f_sad16 = SEARCH16(f_ref->y, f_refH->y, f_refV->y, f_refHV->y, + &frame->image, + i, j, + frame->motion_flags, frame->quant, frame->fcode, + pParam, + f_mbs, + &mb->mvs[0], &pmv_dontcare); // ignore pmv + + // backward search + b_sad16 = SEARCH16(b_ref->y, b_refH->y, b_refV->y, b_refHV->y, + &frame->image, + i, j, + frame->motion_flags, frame->quant, frame->bcode, + pParam, + b_mbs, + &mb->b_mvs[0], &pmv_dontcare); // ignore pmv + + // interpolate search (simple, but effective) + i_sad16 = sad16bi_c( + frame->image.y + i*16 + j*16*edged_width, + get_ref(f_ref->y, f_refH->y, f_refV->y, f_refHV->y, + i, j, 16, mb->mvs[0].x, mb->mvs[0].y, edged_width), + get_ref(b_ref->y, b_refH->y, b_refV->y, b_refHV->y, + i, j, 16, mb->b_mvs[0].x, mb->b_mvs[0].x, edged_width), + edged_width); + + // TODO: direct search + // predictor + range of [-32,32] + d_sad16 = 65535; + + + if (f_sad16 < b_sad16) + { + best_sad = f_sad16; + mb->mode = MB_FORWARD; + } + else + { + best_sad = b_sad16; + mb->mode = MB_BACKWARD; + } + + if (i_sad16 < best_sad) + { + best_sad = i_sad16; + mb->mode = MB_INTERPOLATE; + } + + if (d_sad16 < best_sad) + { + best_sad = d_sad16; + mb->mode = MB_DIRECT; + } + + } + } +} + +*/