--- trunk/xvidcore/src/motion/motion_est.c 2002/04/25 06:55:00 136 +++ trunk/xvidcore/src/motion/motion_est.c 2002/05/27 18:07:38 184 @@ -2,6 +2,7 @@ * * Modifications: * + * 01.05.2002 updated MotionEstimationBVOP * 25.04.2002 partial prevMB conversion * 22.04.2002 remove some compile warning by chenm001 * 14.04.2002 added MotionEstimationBVOP() @@ -53,12 +54,17 @@ #define MV16_THRESHOLD 192 #define MV8_THRESHOLD 56 +#define NEIGH_MOVE_THRESH 0 +// how much a block's MV must differ from his neighbour +// to be search for INTER4V. The more, the faster... + /* sad16(0,0) bias; mpeg4 spec suggests nb/2+1 */ /* nb = vop pixels * 2^(bpp-8) */ #define MV16_00_BIAS (128+1) +#define MV8_00_BIAS (0) /* INTER bias for INTER/INTRA decision; mpeg4 spec suggests 2*nb */ -#define INTER_BIAS 512 +#define MV16_INTER_BIAS 512 /* Parameters which control inter/inter4v decision */ #define IMV16X16 5 @@ -67,10 +73,11 @@ #define NEIGH_TEND_16X16 2 #define NEIGH_TEND_8X8 2 - // fast ((A)/2)*2 #define EVEN(A) (((A)<0?(A)+1:(A)) & ~1) +#define MVzero(A) ( ((A).x)==(0) && ((A).y)==(0) ) +#define MVequal(A,B) ( ((A).x)==((B).x) && ((A).y)==((B).y) ) int32_t PMVfastSearch16( const uint8_t * const pRef, @@ -183,6 +190,19 @@ typedef MainSearch8Func* MainSearch8FuncPtr; +static int32_t lambda_vec16[32] = /* rounded values for lambda param for weight of motion bits as in modified H.26L */ + { 0 ,(int)(1.00235+0.5), (int)(1.15582+0.5), (int)(1.31976+0.5), (int)(1.49591+0.5), (int)(1.68601+0.5), + (int)(1.89187+0.5), (int)(2.11542+0.5), (int)(2.35878+0.5), (int)(2.62429+0.5), (int)(2.91455+0.5), + (int)(3.23253+0.5), (int)(3.58158+0.5), (int)(3.96555+0.5), (int)(4.38887+0.5), (int)(4.85673+0.5), + (int)(5.37519+0.5), (int)(5.95144+0.5), (int)(6.59408+0.5), (int)(7.31349+0.5), (int)(8.12242+0.5), + (int)(9.03669+0.5), (int)(10.0763+0.5), (int)(11.2669+0.5), (int)(12.6426+0.5), (int)(14.2493+0.5), + (int)(16.1512+0.5), (int)(18.442+0.5), (int)(21.2656+0.5), (int)(24.8580+0.5), (int)(29.6436+0.5), + (int)(36.4949+0.5) }; + +static int32_t *lambda_vec8 = lambda_vec16; /* same table for INTER and INTER4V for now*/ + + + // mv.length table static const uint32_t mvtab[33] = { 1, 2, 3, 4, 6, 7, 7, 7, @@ -218,15 +238,15 @@ } -static __inline uint32_t calc_delta_16(const int32_t dx, const int32_t dy, const uint32_t iFcode) +static __inline uint32_t calc_delta_16(const int32_t dx, const int32_t dy, const uint32_t iFcode, const uint32_t iQuant) { - return NEIGH_TEND_16X16 * (mv_bits(dx, iFcode) + mv_bits(dy, iFcode)); + return NEIGH_TEND_16X16 * lambda_vec16[iQuant] * (mv_bits(dx, iFcode) + mv_bits(dy, iFcode)); } -static __inline uint32_t calc_delta_8(const int32_t dx, const int32_t dy, const uint32_t iFcode) +static __inline uint32_t calc_delta_8(const int32_t dx, const int32_t dy, const uint32_t iFcode, const uint32_t iQuant) { - return NEIGH_TEND_8X8 * (mv_bits(dx, iFcode) + mv_bits(dy, iFcode)); + return NEIGH_TEND_8X8 * lambda_vec8[iQuant] * (mv_bits(dx, iFcode) + mv_bits(dy, iFcode)); } @@ -251,195 +271,119 @@ const IMAGE * const pRefH, const IMAGE * const pRefV, const IMAGE * const pRefHV, - const uint32_t iLimit) - + const uint32_t iLimit) { const uint32_t iWcount = pParam->mb_width; const uint32_t iHcount = pParam->mb_height; - MACROBLOCK * pMBs = current->mbs; - IMAGE * pCurrent = ¤t->image; - - MACROBLOCK * prevMBs = reference->mbs; // previous frame - IMAGE * pRef = &reference->image; - - - uint32_t i, j, iIntra = 0; - - VECTOR mv16; - VECTOR pmv16; - - int32_t sad8 = 0; - int32_t sad16; - int32_t deviation; + MACROBLOCK * const pMBs = current->mbs; + MACROBLOCK * const prevMBs = reference->mbs; + const IMAGE * const pCurrent = ¤t->image; + const IMAGE * const pRef = &reference->image; + + const VECTOR zeroMV = {0,0}; + + int32_t x, y; + int32_t iIntra = 0; + VECTOR pmv; if (sadInit) - (*sadInit)(); - - - /* eventhough we have a seperate prevMBs, - pmvfast/epsz does something "funny" with the previous frames data */ - - for (i = 0; i < iHcount; i++) - for (j = 0; j < iWcount; j++) - { - pMBs[j + i * iWcount].mvs[0] = prevMBs[j + i * iWcount].mvs[0]; - pMBs[j + i * iWcount].mvs[1] = prevMBs[j + i * iWcount].mvs[1]; - pMBs[j + i * iWcount].mvs[2] = prevMBs[j + i * iWcount].mvs[2]; - pMBs[j + i * iWcount].mvs[3] = prevMBs[j + i * iWcount].mvs[3]; - } - - /*dprintf("*** BEFORE ***"); - for (i = 0; i < iHcount; i++) - for (j = 0; j < iWcount; j++) - { - dprintf(" [%i,%i] mode=%i dquant=%i mvs=(%i %i %i %i) sad8=(%i %i %i %i) sad16=(%i)", j,i, - pMBs[j + i * iWcount].mode, - pMBs[j + i * iWcount].dquant, - pMBs[j + i * iWcount].mvs[0], - pMBs[j + i * iWcount].mvs[1], - pMBs[j + i * iWcount].mvs[2], - pMBs[j + i * iWcount].mvs[3], - prevMBs[j + i * iWcount].sad8[0], - prevMBs[j + i * iWcount].sad8[1], - prevMBs[j + i * iWcount].sad8[2], - prevMBs[j + i * iWcount].sad8[3], - prevMBs[j + i * iWcount].sad16); - } - */ + (*sadInit)(); - // note: i==horizontal, j==vertical - for (i = 0; i < iHcount; i++) - for (j = 0; j < iWcount; j++) + for (y = 0; y < iHcount; y++) + for (x = 0; x < iWcount; x++) { - MACROBLOCK *pMB = &pMBs[j + i * iWcount]; - MACROBLOCK *prevMB = &prevMBs[j + i * iWcount]; + MACROBLOCK* const pMB = &pMBs[x + y * iWcount]; - sad16 = SEARCH16(pRef->y, pRefH->y, pRefV->y, pRefHV->y, pCurrent, - j, i, current->motion_flags, current->quant, current->fcode, - pParam, pMBs, prevMBs, &mv16, &pmv16); - pMB->sad16=sad16; + pMB->sad16 = SEARCH16(pRef->y, pRefH->y, pRefV->y, pRefHV->y, pCurrent, + x, y, current->motion_flags, current->quant, current->fcode, + pParam, pMBs, prevMBs, &pMB->mv16, &pMB->pmvs[0]); - - /* decide: MODE_INTER or MODE_INTRA - if (dev_intra < sad_inter - 2 * nb) use_intra - */ - - deviation = dev16(pCurrent->y + j*16 + i*16*pParam->edged_width, pParam->edged_width); - - if (deviation < (sad16 - INTER_BIAS)) + if (0 < (pMB->sad16 - MV16_INTER_BIAS)) { - pMB->mode = MODE_INTRA; - pMB->mvs[0].x = pMB->mvs[1].x = pMB->mvs[2].x = pMB->mvs[3].x = 0; - pMB->mvs[0].y = pMB->mvs[1].y = pMB->mvs[2].y = pMB->mvs[3].y = 0; - - pMB->sad8[0] = pMB->sad8[1] = pMB->sad8[2] = pMB->sad8[3] = 0; - - iIntra++; - if(iIntra >= iLimit) - return 1; - - continue; - } + int32_t deviation; + deviation = dev16(pCurrent->y + x*16 + y*16*pParam->edged_width, pParam->edged_width); - if (current->global_flags & XVID_INTER4V) - { - pMB->sad8[0] = SEARCH8(pRef->y, pRefH->y, pRefV->y, pRefHV->y, pCurrent, - 2 * j, 2 * i, mv16.x, mv16.y, - current->motion_flags, current->quant, current->fcode, - pParam, pMBs, prevMBs, &pMB->mvs[0], &pMB->pmvs[0]); - - pMB->sad8[1] = SEARCH8(pRef->y, pRefH->y, pRefV->y, pRefHV->y, pCurrent, - 2 * j + 1, 2 * i, mv16.x, mv16.y, - current->motion_flags, current->quant, current->fcode, - pParam, pMBs, prevMBs, &pMB->mvs[1], &pMB->pmvs[1]); - - pMB->sad8[2] = SEARCH8(pRef->y, pRefH->y, pRefV->y, pRefHV->y, pCurrent, - 2 * j, 2 * i + 1, mv16.x, mv16.y, - current->motion_flags, current->quant, current->fcode, - pParam, pMBs, prevMBs, &pMB->mvs[2], &pMB->pmvs[2]); + if (deviation < (pMB->sad16 - MV16_INTER_BIAS)) + { + pMB->mode = MODE_INTRA; + pMB->mv16 = pMB->mvs[0] = pMB->mvs[1] = pMB->mvs[2] = pMB->mvs[3] = zeroMV; + pMB->sad16 = pMB->sad8[0] = pMB->sad8[1] = pMB->sad8[2] = pMB->sad8[3] = 0; + + iIntra++; + if (iIntra >= iLimit) + return 1; + + continue; + } + } - pMB->sad8[3] = SEARCH8(pRef->y, pRefH->y, pRefV->y, pRefHV->y, pCurrent, - 2 * j + 1, 2 * i + 1, mv16.x, mv16.y, - current->motion_flags, current->quant, current->fcode, - pParam, pMBs, prevMBs, &pMB->mvs[3], &pMB->pmvs[3]); - - sad8 = pMB->sad8[0] + pMB->sad8[1] + pMB->sad8[2] + pMB->sad8[3]; - } - - - /* decide: MODE_INTER or MODE_INTER4V - mpeg4: if (sad8 < sad16 - nb/2+1) use_inter4v - */ - - if (!(current->global_flags & XVID_LUMIMASKING) || pMB->dquant == NO_CHANGE) - { - if (((current->global_flags & XVID_INTER4V)==0) || - (sad16 < (sad8 + (int32_t)(IMV16X16 * current->quant)))) - { + pmv = pMB->pmvs[0]; + if (current->global_flags & XVID_INTER4V) + if ( (!(current->global_flags & XVID_LUMIMASKING) || pMB->dquant == NO_CHANGE) ) + { + int32_t sad8 = IMV16X16 * current->quant; if (sad8 < pMB->sad16) + + sad8 += pMB->sad8[0] = SEARCH8(pRef->y, pRefH->y, pRefV->y, pRefHV->y, pCurrent, + 2*x, 2*y, pMB->mv16.x, pMB->mv16.y, + current->motion_flags, current->quant, current->fcode, + pParam, pMBs, prevMBs, &pMB->mvs[0], &pMB->pmvs[0]); + + if (sad8 < pMB->sad16) + sad8 += pMB->sad8[1] = SEARCH8(pRef->y, pRefH->y, pRefV->y, pRefHV->y, pCurrent, + 2*x+1, 2*y, pMB->mv16.x, pMB->mv16.y, + current->motion_flags, current->quant, current->fcode, + pParam, pMBs, prevMBs, &pMB->mvs[1], &pMB->pmvs[1]); + + if (sad8 < pMB->sad16) + sad8 += pMB->sad8[2] = SEARCH8(pRef->y, pRefH->y, pRefV->y, pRefHV->y, pCurrent, + 2*x, 2*y+1, pMB->mv16.x, pMB->mv16.y, + current->motion_flags, current->quant, current->fcode, + pParam, pMBs, prevMBs, &pMB->mvs[2], &pMB->pmvs[2]); + + if (sad8 < pMB->sad16) + sad8 += pMB->sad8[3] = SEARCH8(pRef->y, pRefH->y, pRefV->y, pRefHV->y, pCurrent, + 2*x+1, 2*y+1, pMB->mv16.x, pMB->mv16.y, + current->motion_flags, current->quant, current->fcode, + pParam, pMBs, prevMBs, &pMB->mvs[3], &pMB->pmvs[3]); + + /* decide: MODE_INTER or MODE_INTER4V + mpeg4: if (sad8 < pMB->sad16 - nb/2+1) use_inter4v + */ + + if (sad8 < pMB->sad16) + { + pMB->mode = MODE_INTER4V; + pMB->sad8[0] *= 4; + pMB->sad8[1] *= 4; + pMB->sad8[2] *= 4; + pMB->sad8[3] *= 4; + continue; + } - sad8 = sad16; - pMB->mode = MODE_INTER; - pMB->mvs[0].x = pMB->mvs[1].x = pMB->mvs[2].x = pMB->mvs[3].x = mv16.x; - pMB->mvs[0].y = pMB->mvs[1].y = pMB->mvs[2].y = pMB->mvs[3].y = mv16.y; - pMB->pmvs[0].x = pmv16.x; - pMB->pmvs[0].y = pmv16.y; - } - else - pMB->mode = MODE_INTER4V; - } - else - { - sad8 = sad16; + } + pMB->mode = MODE_INTER; - pMB->mvs[0].x = pMB->mvs[1].x = pMB->mvs[2].x = pMB->mvs[3].x = mv16.x; - pMB->mvs[0].y = pMB->mvs[1].y = pMB->mvs[2].y = pMB->mvs[3].y = mv16.y; - pMB->pmvs[0].x = pmv16.x; - pMB->pmvs[0].y = pmv16.y; - } - } + pMB->pmvs[0] = pmv; /* pMB->pmvs[1] = pMB->pmvs[2] = pMB->pmvs[3] are not needed for INTER */ + pMB->mvs[0] = pMB->mvs[1] = pMB->mvs[2] = pMB->mvs[3] = pMB->mv16; + pMB->sad8[0] = pMB->sad8[1] = pMB->sad8[2] = pMB->sad8[3] = pMB->sad16; -/* dprintf("*** AFTER ***", pMBs[0].b_mvs[0].x); - for (i = 0; i < iHcount; i++) - for (j = 0; j < iWcount; j++) - { - dprintf(" [%i,%i] mode=%i dquant=%i mvs=(%i %i %i %i) sad8=(%i %i %i %i) sad16=(%i)", j,i, - pMBs[j + i * iWcount].mode, - pMBs[j + i * iWcount].dquant, - pMBs[j + i * iWcount].mvs[0], - pMBs[j + i * iWcount].mvs[1], - pMBs[j + i * iWcount].mvs[2], - pMBs[j + i * iWcount].mvs[3], - pMBs[j + i * iWcount].sad8[0], - pMBs[j + i * iWcount].sad8[1], - pMBs[j + i * iWcount].sad8[2], - pMBs[j + i * iWcount].sad8[3], - pMBs[j + i * iWcount].sad16); } - */ - - return 0; + return 0; } -#define MVzero(A) ( ((A).x)==(0) && ((A).y)==(0) ) - -#define MVequal(A,B) ( ((A).x)==((B).x) && ((A).y)==((B).y) ) - - #define CHECK_MV16_ZERO {\ if ( (0 <= max_dx) && (0 >= min_dx) \ && (0 <= max_dy) && (0 >= min_dy) ) \ { \ iSAD = sad16( cur, get_ref(pRef, pRefH, pRefV, pRefHV, x, y, 16, 0, 0 , iEdgedWidth), iEdgedWidth, MV_MAX_ERROR); \ - iSAD += calc_delta_16(-pmv[0].x, -pmv[0].y, (uint8_t)iFcode) * iQuant;\ - if (iSAD <= iQuant * 96) \ - iSAD -= MV16_00_BIAS; \ + iSAD += calc_delta_16(-pmv[0].x, -pmv[0].y, (uint8_t)iFcode, iQuant);\ if (iSAD < iMinSAD) \ { iMinSAD=iSAD; currMV->x=0; currMV->y=0; } } \ } #define NOCHECK_MV16_CANDIDATE(X,Y) { \ iSAD = sad16( cur, get_ref(pRef, pRefH, pRefV, pRefHV, x, y, 16, X, Y, iEdgedWidth),iEdgedWidth, iMinSAD); \ - iSAD += calc_delta_16((X) - pmv[0].x, (Y) - pmv[0].y, (uint8_t)iFcode) * iQuant;\ + iSAD += calc_delta_16((X) - pmv[0].x, (Y) - pmv[0].y, (uint8_t)iFcode, iQuant);\ if (iSAD < iMinSAD) \ { iMinSAD=iSAD; currMV->x=(X); currMV->y=(Y); } \ } @@ -449,7 +393,7 @@ && ((Y) <= max_dy) && ((Y) >= min_dy) ) \ { \ iSAD = sad16( cur, get_ref(pRef, pRefH, pRefV, pRefHV, x, y, 16, X, Y, iEdgedWidth),iEdgedWidth, iMinSAD); \ - iSAD += calc_delta_16((X) - pmv[0].x, (Y) - pmv[0].y, (uint8_t)iFcode) * iQuant;\ + iSAD += calc_delta_16((X) - pmv[0].x, (Y) - pmv[0].y, (uint8_t)iFcode, iQuant);\ if (iSAD < iMinSAD) \ { iMinSAD=iSAD; currMV->x=(X); currMV->y=(Y); } } \ } @@ -459,7 +403,7 @@ && ((Y) <= max_dy) && ((Y) >= min_dy) ) \ { \ iSAD = sad16( cur, get_ref(pRef, pRefH, pRefV, pRefHV, x, y, 16, X, Y, iEdgedWidth),iEdgedWidth, iMinSAD); \ - iSAD += calc_delta_16((X) - pmv[0].x, (Y) - pmv[0].y, (uint8_t)iFcode) * iQuant;\ + iSAD += calc_delta_16((X) - pmv[0].x, (Y) - pmv[0].y, (uint8_t)iFcode, iQuant);\ if (iSAD < iMinSAD) \ { iMinSAD=iSAD; currMV->x=(X); currMV->y=(Y); iDirection=(D); } } \ } @@ -469,7 +413,7 @@ && ((Y) <= max_dy) && ((Y) >= min_dy) ) \ { \ iSAD = sad16( cur, get_ref(pRef, pRefH, pRefV, pRefHV, x, y, 16, X, Y, iEdgedWidth),iEdgedWidth, iMinSAD); \ - iSAD += calc_delta_16((X) - pmv[0].x, (Y) - pmv[0].y, (uint8_t)iFcode) * iQuant;\ + iSAD += calc_delta_16((X) - pmv[0].x, (Y) - pmv[0].y, (uint8_t)iFcode, iQuant);\ if (iSAD < iMinSAD) \ { iMinSAD=iSAD; currMV->x=(X); currMV->y=(Y); iDirection=(D); iFound=0; } } \ } @@ -477,7 +421,7 @@ #define CHECK_MV8_ZERO {\ iSAD = sad8( cur, get_ref(pRef, pRefH, pRefV, pRefHV, x, y, 8, 0, 0 , iEdgedWidth), iEdgedWidth); \ - iSAD += calc_delta_8(-pmv[0].x, -pmv[0].y, (uint8_t)iFcode) * iQuant;\ + iSAD += calc_delta_8(-pmv[0].x, -pmv[0].y, (uint8_t)iFcode, iQuant);\ if (iSAD < iMinSAD) \ { iMinSAD=iSAD; currMV->x=0; currMV->y=0; } \ } @@ -485,7 +429,7 @@ #define NOCHECK_MV8_CANDIDATE(X,Y) \ { \ iSAD = sad8( cur, get_ref(pRef, pRefH, pRefV, pRefHV, x, y, 8, (X), (Y), iEdgedWidth),iEdgedWidth); \ - iSAD += calc_delta_8((X)-pmv[0].x, (Y)-pmv[0].y, (uint8_t)iFcode) * iQuant;\ + iSAD += calc_delta_8((X)-pmv[0].x, (Y)-pmv[0].y, (uint8_t)iFcode, iQuant);\ if (iSAD < iMinSAD) \ { iMinSAD=iSAD; currMV->x=(X); currMV->y=(Y); } \ } @@ -495,7 +439,7 @@ && ((Y) <= max_dy) && ((Y) >= min_dy) ) \ { \ iSAD = sad8( cur, get_ref(pRef, pRefH, pRefV, pRefHV, x, y, 8, (X), (Y), iEdgedWidth),iEdgedWidth); \ - iSAD += calc_delta_8((X)-pmv[0].x, (Y)-pmv[0].y, (uint8_t)iFcode) * iQuant;\ + iSAD += calc_delta_8((X)-pmv[0].x, (Y)-pmv[0].y, (uint8_t)iFcode, iQuant);\ if (iSAD < iMinSAD) \ { iMinSAD=iSAD; currMV->x=(X); currMV->y=(Y); } } \ } @@ -505,7 +449,7 @@ && ((Y) <= max_dy) && ((Y) >= min_dy) ) \ { \ iSAD = sad8( cur, get_ref(pRef, pRefH, pRefV, pRefHV, x, y, 8, (X), (Y), iEdgedWidth),iEdgedWidth); \ - iSAD += calc_delta_8((X)-pmv[0].x, (Y)-pmv[0].y, (uint8_t)iFcode) * iQuant;\ + iSAD += calc_delta_8((X)-pmv[0].x, (Y)-pmv[0].y, (uint8_t)iFcode, iQuant);\ if (iSAD < iMinSAD) \ { iMinSAD=iSAD; currMV->x=(X); currMV->y=(Y); iDirection=(D); } } \ } @@ -515,7 +459,7 @@ && ((Y) <= max_dy) && ((Y) >= min_dy) ) \ { \ iSAD = sad8( cur, get_ref(pRef, pRefH, pRefV, pRefHV, x, y, 8, (X), (Y), iEdgedWidth),iEdgedWidth); \ - iSAD += calc_delta_8((X)-pmv[0].x, (Y)-pmv[0].y, (uint8_t)iFcode) * iQuant;\ + iSAD += calc_delta_8((X)-pmv[0].x, (Y)-pmv[0].y, (uint8_t)iFcode, iQuant);\ if (iSAD < iMinSAD) \ { iMinSAD=iSAD; currMV->x=(X); currMV->y=(Y); iDirection=(D); iFound=0; } } \ } @@ -782,6 +726,269 @@ return iMinSAD; } +int32_t AdvDiamond16_MainSearch( + const uint8_t * const pRef, + const uint8_t * const pRefH, + const uint8_t * const pRefV, + const uint8_t * const pRefHV, + const uint8_t * const cur, + const int x, const int y, + int32_t startx, int32_t starty, + int32_t iMinSAD, + VECTOR * const currMV, + const VECTOR * const pmv, + const int32_t min_dx, const int32_t max_dx, + const int32_t min_dy, const int32_t max_dy, + const int32_t iEdgedWidth, + const int32_t iDiamondSize, + const int32_t iFcode, + const int32_t iQuant, + int iDirection) +{ + + int32_t iSAD; + +/* directions: 1 - left (x-1); 2 - right (x+1), 4 - up (y-1); 8 - down (y+1) */ + + if (iDirection) + { + CHECK_MV16_CANDIDATE(startx-iDiamondSize, starty); + CHECK_MV16_CANDIDATE(startx+iDiamondSize, starty); + CHECK_MV16_CANDIDATE(startx, starty-iDiamondSize); + CHECK_MV16_CANDIDATE(startx, starty+iDiamondSize); + } + else + { + int bDirection = 1+2+4+8; + do + { + iDirection = 0; + if (bDirection&1) //we only want to check left if we came from the right (our last motion was to the left, up-left or down-left) + CHECK_MV16_CANDIDATE_DIR(startx-iDiamondSize,starty,1); + + if (bDirection&2) + CHECK_MV16_CANDIDATE_DIR(startx+iDiamondSize,starty,2); + + if (bDirection&4) + CHECK_MV16_CANDIDATE_DIR(startx,starty-iDiamondSize,4); + + if (bDirection&8) + CHECK_MV16_CANDIDATE_DIR(startx,starty+iDiamondSize,8); + + /* now we're doing diagonal checks near our candidate */ + + if (iDirection) //checking if anything found + { + bDirection = iDirection; + iDirection = 0; + startx=currMV->x; starty=currMV->y; + if (bDirection & 3) //our candidate is left or right + { + CHECK_MV16_CANDIDATE_DIR(startx,starty+iDiamondSize, 8); + CHECK_MV16_CANDIDATE_DIR(startx,starty-iDiamondSize, 4); + } + else // what remains here is up or down + { + CHECK_MV16_CANDIDATE_DIR(startx+iDiamondSize, starty, 2); + CHECK_MV16_CANDIDATE_DIR(startx-iDiamondSize, starty, 1); + } + + if (iDirection) + { bDirection+=iDirection; + startx=currMV->x; starty=currMV->y; + } + } + else //about to quit, eh? not so fast.... + { + switch (bDirection) + { + case 2: + CHECK_MV16_CANDIDATE_DIR(startx+iDiamondSize, starty-iDiamondSize, 2+4); + CHECK_MV16_CANDIDATE_DIR(startx+iDiamondSize, starty+iDiamondSize, 2+8); + break; + case 1: + CHECK_MV16_CANDIDATE_DIR(startx-iDiamondSize, starty-iDiamondSize, 1+4); + CHECK_MV16_CANDIDATE_DIR(startx-iDiamondSize, starty+iDiamondSize, 1+8); + break; + case 2+4: + CHECK_MV16_CANDIDATE_DIR(startx-iDiamondSize, starty-iDiamondSize, 1+4); + CHECK_MV16_CANDIDATE_DIR(startx+iDiamondSize, starty-iDiamondSize, 2+4); + CHECK_MV16_CANDIDATE_DIR(startx+iDiamondSize, starty+iDiamondSize, 2+8); + break; + case 4: + CHECK_MV16_CANDIDATE_DIR(startx+iDiamondSize, starty-iDiamondSize, 2+4); + CHECK_MV16_CANDIDATE_DIR(startx-iDiamondSize, starty-iDiamondSize, 1+4); + break; + case 8: + CHECK_MV16_CANDIDATE_DIR(startx+iDiamondSize, starty+iDiamondSize, 2+8); + CHECK_MV16_CANDIDATE_DIR(startx-iDiamondSize, starty+iDiamondSize, 1+8); + break; + case 1+4: + CHECK_MV16_CANDIDATE_DIR(startx-iDiamondSize, starty+iDiamondSize, 1+8); + CHECK_MV16_CANDIDATE_DIR(startx-iDiamondSize, starty-iDiamondSize, 1+4); + CHECK_MV16_CANDIDATE_DIR(startx+iDiamondSize, starty-iDiamondSize, 2+4); + break; + case 2+8: + CHECK_MV16_CANDIDATE_DIR(startx-iDiamondSize, starty-iDiamondSize, 1+4); + CHECK_MV16_CANDIDATE_DIR(startx-iDiamondSize, starty+iDiamondSize, 1+8); + CHECK_MV16_CANDIDATE_DIR(startx+iDiamondSize, starty+iDiamondSize, 2+8); + break; + case 1+8: + CHECK_MV16_CANDIDATE_DIR(startx+iDiamondSize, starty-iDiamondSize, 2+4); + CHECK_MV16_CANDIDATE_DIR(startx+iDiamondSize, starty+iDiamondSize, 2+8); + CHECK_MV16_CANDIDATE_DIR(startx-iDiamondSize, starty+iDiamondSize, 1+8); + break; + default: //1+2+4+8 == we didn't find anything at all + CHECK_MV16_CANDIDATE_DIR(startx-iDiamondSize, starty-iDiamondSize, 1+4); + CHECK_MV16_CANDIDATE_DIR(startx-iDiamondSize, starty+iDiamondSize, 1+8); + CHECK_MV16_CANDIDATE_DIR(startx+iDiamondSize, starty-iDiamondSize, 2+4); + CHECK_MV16_CANDIDATE_DIR(startx+iDiamondSize, starty+iDiamondSize, 2+8); + break; + } + if (!iDirection) break; //ok, the end. really + else + { bDirection=iDirection; + startx=currMV->x; starty=currMV->y; + } + } + } + while (1); //forever + } + return iMinSAD; +} + +int32_t AdvDiamond8_MainSearch( + const uint8_t * const pRef, + const uint8_t * const pRefH, + const uint8_t * const pRefV, + const uint8_t * const pRefHV, + const uint8_t * const cur, + const int x, const int y, + int32_t startx, int32_t starty, + int32_t iMinSAD, + VECTOR * const currMV, + const VECTOR * const pmv, + const int32_t min_dx, const int32_t max_dx, + const int32_t min_dy, const int32_t max_dy, + const int32_t iEdgedWidth, + const int32_t iDiamondSize, + const int32_t iFcode, + const int32_t iQuant, + int iDirection) +{ + + int32_t iSAD; + +/* directions: 1 - left (x-1); 2 - right (x+1), 4 - up (y-1); 8 - down (y+1) */ + + if (iDirection) + { + CHECK_MV8_CANDIDATE(startx-iDiamondSize, starty); + CHECK_MV8_CANDIDATE(startx+iDiamondSize, starty); + CHECK_MV8_CANDIDATE(startx, starty-iDiamondSize); + CHECK_MV8_CANDIDATE(startx, starty+iDiamondSize); + } + else + { + int bDirection = 1+2+4+8; + do + { + iDirection = 0; + if (bDirection&1) //we only want to check left if we came from the right (our last motion was to the left, up-left or down-left) + CHECK_MV8_CANDIDATE_DIR(startx-iDiamondSize,starty,1); + + if (bDirection&2) + CHECK_MV8_CANDIDATE_DIR(startx+iDiamondSize,starty,2); + + if (bDirection&4) + CHECK_MV8_CANDIDATE_DIR(startx,starty-iDiamondSize,4); + + if (bDirection&8) + CHECK_MV8_CANDIDATE_DIR(startx,starty+iDiamondSize,8); + + /* now we're doing diagonal checks near our candidate */ + + if (iDirection) //checking if anything found + { + bDirection = iDirection; + iDirection = 0; + startx=currMV->x; starty=currMV->y; + if (bDirection & 3) //our candidate is left or right + { + CHECK_MV8_CANDIDATE_DIR(startx,starty+iDiamondSize, 8); + CHECK_MV8_CANDIDATE_DIR(startx,starty-iDiamondSize, 4); + } + else // what remains here is up or down + { + CHECK_MV8_CANDIDATE_DIR(startx+iDiamondSize, starty, 2); + CHECK_MV8_CANDIDATE_DIR(startx-iDiamondSize, starty, 1); + } + + if (iDirection) + { bDirection+=iDirection; + startx=currMV->x; starty=currMV->y; + } + } + else //about to quit, eh? not so fast.... + { + switch (bDirection) + { + case 2: + CHECK_MV8_CANDIDATE_DIR(startx+iDiamondSize, starty-iDiamondSize, 2+4); + CHECK_MV8_CANDIDATE_DIR(startx+iDiamondSize, starty+iDiamondSize, 2+8); + break; + case 1: + CHECK_MV8_CANDIDATE_DIR(startx-iDiamondSize, starty-iDiamondSize, 1+4); + CHECK_MV8_CANDIDATE_DIR(startx-iDiamondSize, starty+iDiamondSize, 1+8); + break; + case 2+4: + CHECK_MV8_CANDIDATE_DIR(startx-iDiamondSize, starty-iDiamondSize, 1+4); + CHECK_MV8_CANDIDATE_DIR(startx+iDiamondSize, starty-iDiamondSize, 2+4); + CHECK_MV8_CANDIDATE_DIR(startx+iDiamondSize, starty+iDiamondSize, 2+8); + break; + case 4: + CHECK_MV8_CANDIDATE_DIR(startx+iDiamondSize, starty-iDiamondSize, 2+4); + CHECK_MV8_CANDIDATE_DIR(startx-iDiamondSize, starty-iDiamondSize, 1+4); + break; + case 8: + CHECK_MV8_CANDIDATE_DIR(startx+iDiamondSize, starty+iDiamondSize, 2+8); + CHECK_MV8_CANDIDATE_DIR(startx-iDiamondSize, starty+iDiamondSize, 1+8); + break; + case 1+4: + CHECK_MV8_CANDIDATE_DIR(startx-iDiamondSize, starty+iDiamondSize, 1+8); + CHECK_MV8_CANDIDATE_DIR(startx-iDiamondSize, starty-iDiamondSize, 1+4); + CHECK_MV8_CANDIDATE_DIR(startx+iDiamondSize, starty-iDiamondSize, 2+4); + break; + case 2+8: + CHECK_MV8_CANDIDATE_DIR(startx-iDiamondSize, starty-iDiamondSize, 1+4); + CHECK_MV8_CANDIDATE_DIR(startx-iDiamondSize, starty+iDiamondSize, 1+8); + CHECK_MV8_CANDIDATE_DIR(startx+iDiamondSize, starty+iDiamondSize, 2+8); + break; + case 1+8: + CHECK_MV8_CANDIDATE_DIR(startx+iDiamondSize, starty-iDiamondSize, 2+4); + CHECK_MV8_CANDIDATE_DIR(startx+iDiamondSize, starty+iDiamondSize, 2+8); + CHECK_MV8_CANDIDATE_DIR(startx-iDiamondSize, starty+iDiamondSize, 1+8); + break; + default: //1+2+4+8 == we didn't find anything at all + CHECK_MV8_CANDIDATE_DIR(startx-iDiamondSize, starty-iDiamondSize, 1+4); + CHECK_MV8_CANDIDATE_DIR(startx-iDiamondSize, starty+iDiamondSize, 1+8); + CHECK_MV8_CANDIDATE_DIR(startx+iDiamondSize, starty-iDiamondSize, 2+4); + CHECK_MV8_CANDIDATE_DIR(startx+iDiamondSize, starty+iDiamondSize, 2+8); + break; + } + if (!(iDirection)) break; //ok, the end. really + else + { bDirection=iDirection; + startx=currMV->x; starty=currMV->y; + } + } + } + while (1); //forever + } + return iMinSAD; +} + + int32_t Full8_MainSearch( const uint8_t * const pRef, const uint8_t * const pRefH, @@ -889,8 +1096,10 @@ VECTOR pmv[4]; int32_t psad[4]; + + MainSearch16FuncPtr MainSearchPtr; - const MACROBLOCK * const pMB = pMBs + x + y * iWcount; +// const MACROBLOCK * const pMB = pMBs + x + y * iWcount; const MACROBLOCK * const prevMB = prevMBs + x + y * iWcount; static int32_t threshA,threshB; @@ -930,26 +1139,6 @@ iFound=0; -/* Step 2: Calculate Distance= |MedianMVX| + |MedianMVY| where MedianMV is the motion - vector of the median. - If PredEq=1 and MVpredicted = Previous Frame MV, set Found=2 -*/ - - if ((bPredEq) && (MVequal(pmv[0],prevMB->mvs[0]) ) ) - iFound=2; - -/* Step 3: If Distance>0 or thresb<1536 or PredEq=1 Select small Diamond Search. - Otherwise select large Diamond Search. -*/ - - if ( (pmv[0].x != 0) || (pmv[0].y != 0) || (threshB<1536) || (bPredEq) ) - iDiamondSize=1; // halfpel! - else - iDiamondSize=2; // halfpel! - - if (!(MotionFlags & PMV_HALFPELDIAMOND16) ) - iDiamondSize*=2; - /* Step 4: Calculate SAD around the Median prediction. MinSAD=SAD If Motion Vector equal to Previous frame motion vector @@ -957,9 +1146,6 @@ If SAD<=256 goto Step 10. */ - -// Prepare for main loop - *currMV=pmv[0]; /* current best := prediction */ if (!(MotionFlags & PMV_HALFPEL16 )) { /* This should NOT be necessary! */ @@ -987,61 +1173,110 @@ iMinSAD = sad16( cur, get_ref_mv(pRef, pRefH, pRefV, pRefHV, x, y, 16, currMV, iEdgedWidth), iEdgedWidth, MV_MAX_ERROR); - iMinSAD += calc_delta_16(currMV->x-pmv[0].x, currMV->y-pmv[0].y, (uint8_t)iFcode) * iQuant; + iMinSAD += calc_delta_16(currMV->x-pmv[0].x, currMV->y-pmv[0].y, (uint8_t)iFcode, iQuant); if ( (iMinSAD < 256 ) || ( (MVequal(*currMV,prevMB->mvs[0])) && ((uint32_t)iMinSAD < prevMB->sad16) ) ) { - + if (iMinSAD < 2*iQuant) // high chances for SKIP-mode + { + if (!MVzero(*currMV)) + { + iMinSAD += MV16_00_BIAS; + CHECK_MV16_ZERO; // (0,0) saves space for letterboxed pictures + iMinSAD -= MV16_00_BIAS; + } + } + if (MotionFlags & PMV_QUICKSTOP16) goto PMVfast16_Terminate_without_Refine; if (MotionFlags & PMV_EARLYSTOP16) goto PMVfast16_Terminate_with_Refine; } + +/* Step 2 (lazy eval): Calculate Distance= |MedianMVX| + |MedianMVY| where MedianMV is the motion + vector of the median. + If PredEq=1 and MVpredicted = Previous Frame MV, set Found=2 +*/ + + if ((bPredEq) && (MVequal(pmv[0],prevMB->mvs[0]) ) ) + iFound=2; + +/* Step 3 (lazy eval): If Distance>0 or thresb<1536 or PredEq=1 Select small Diamond Search. + Otherwise select large Diamond Search. +*/ + + if ( (!MVzero(pmv[0])) || (threshB<1536) || (bPredEq) ) + iDiamondSize=1; // halfpel! + else + iDiamondSize=2; // halfpel! + + if (!(MotionFlags & PMV_HALFPELDIAMOND16) ) + iDiamondSize*=2; + /* Step 5: Calculate SAD for motion vectors taken from left block, top, top-right, and Previous frame block. Also calculate (0,0) but do not subtract offset. Let MinSAD be the smallest SAD up to this point. - If MV is (0,0) subtract offset. ******** WHAT'S THIS 'OFFSET' ??? *********** + If MV is (0,0) subtract offset. */ // (0,0) is always possible - CHECK_MV16_ZERO; + if (!MVzero(pmv[0])) + CHECK_MV16_ZERO; // previous frame MV is always possible - CHECK_MV16_CANDIDATE(prevMB->mvs[0].x,prevMB->mvs[0].y); + + if (!MVzero(prevMB->mvs[0])) + if (!MVequal(prevMB->mvs[0],pmv[0])) + CHECK_MV16_CANDIDATE(prevMB->mvs[0].x,prevMB->mvs[0].y); // left neighbour, if allowed - if (x != 0) + + if (!MVzero(pmv[1])) + if (!MVequal(pmv[1],prevMB->mvs[0])) + if (!MVequal(pmv[1],pmv[0])) { if (!(MotionFlags & PMV_HALFPEL16 )) { pmv[1].x = EVEN(pmv[1].x); - pmv[1].y = EVEN(pmv[1].y); + pmv[1].y = EVEN(pmv[1].y); } + CHECK_MV16_CANDIDATE(pmv[1].x,pmv[1].y); } // top neighbour, if allowed - if (y != 0) + if (!MVzero(pmv[2])) + if (!MVequal(pmv[2],prevMB->mvs[0])) + if (!MVequal(pmv[2],pmv[0])) + if (!MVequal(pmv[2],pmv[1])) { if (!(MotionFlags & PMV_HALFPEL16 )) { pmv[2].x = EVEN(pmv[2].x); - pmv[2].y = EVEN(pmv[2].y); + pmv[2].y = EVEN(pmv[2].y); } CHECK_MV16_CANDIDATE(pmv[2].x,pmv[2].y); // top right neighbour, if allowed - if ((uint32_t)x != (iWcount-1)) + if (!MVzero(pmv[3])) + if (!MVequal(pmv[3],prevMB->mvs[0])) + if (!MVequal(pmv[3],pmv[0])) + if (!MVequal(pmv[3],pmv[1])) + if (!MVequal(pmv[3],pmv[2])) { if (!(MotionFlags & PMV_HALFPEL16 )) { pmv[3].x = EVEN(pmv[3].x); - pmv[3].y = EVEN(pmv[3].y); + pmv[3].y = EVEN(pmv[3].y); } CHECK_MV16_CANDIDATE(pmv[3].x,pmv[3].y); } } + + if ( (MVzero(*currMV)) && (!MVzero(pmv[0])) /* && (iMinSAD <= iQuant * 96)*/ ) + iMinSAD -= MV16_00_BIAS; + /* Step 6: If MinSAD <= thresa goto Step 10. If Motion Vector equal to Previous frame motion vector and MinSADx, currMV->y, iMinSAD, &newMV, pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, iDiamondSize, iFcode, iQuant, iFound); @@ -1084,7 +1327,7 @@ /* extended: search (up to) two more times: orignal prediction and (0,0) */ if (!(MVequal(pmv[0],backupMV)) ) - { iSAD = Diamond16_MainSearch(pRef, pRefH, pRefV, pRefHV, cur, + { iSAD = (*MainSearchPtr)(pRef, pRefH, pRefV, pRefHV, cur, x, y, pmv[0].x, pmv[0].y, iMinSAD, &newMV, pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, iDiamondSize, iFcode, iQuant, iFound); @@ -1097,7 +1340,7 @@ } if ( (!(MVzero(pmv[0]))) && (!(MVzero(backupMV))) ) - { iSAD = Diamond16_MainSearch(pRef, pRefH, pRefV, pRefHV, cur, + { iSAD = (*MainSearchPtr)(pRef, pRefH, pRefV, pRefHV, cur, x, y, 0, 0, iMinSAD, &newMV, pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, iDiamondSize, iFcode, iQuant, iFound); @@ -1260,27 +1503,32 @@ int32_t psad[4]; VECTOR newMV; VECTOR backupMV; + VECTOR startMV; - const MACROBLOCK * const pMB = pMBs + (x>>1) + (y>>1) * iWcount; +// const MACROBLOCK * const pMB = pMBs + (x>>1) + (y>>1) * iWcount; const MACROBLOCK * const prevMB = prevMBs + (x>>1) + (y>>1) * iWcount; static int32_t threshA,threshB; int32_t iFound,bPredEq; int32_t iMinSAD,iSAD; - int32_t iSubBlock = ((y&1)<<1) + (x&1); + int32_t iSubBlock = (y&1)+(y&1) + (x&1); -/* Get maximum range */ + MainSearch8FuncPtr MainSearchPtr; + + /* Init variables */ + startMV.x = start_x; + startMV.y = start_y; + + /* Get maximum range */ get_range(&min_dx, &max_dx, &min_dy, &max_dy, x, y, 8, iWidth, iHeight, iFcode); -/* we work with abs. MVs, not relative to prediction, so range is relative to 0,0 */ - if (!(MotionFlags & PMV_HALFPELDIAMOND8 )) { min_dx = EVEN(min_dx); - max_dx = EVEN(max_dx); - min_dy = EVEN(min_dy); - max_dy = EVEN(max_dy); + max_dx = EVEN(max_dx); + min_dy = EVEN(min_dy); + max_dy = EVEN(max_dy); } /* because we might use IF (dx>max_dx) THEN dx=max_dx; */ @@ -1303,26 +1551,6 @@ iFound=0; -/* Step 2: Calculate Distance= |MedianMVX| + |MedianMVY| where MedianMV is the motion - vector of the median. - If PredEq=1 and MVpredicted = Previous Frame MV, set Found=2 -*/ - - if ((bPredEq) && (MVequal(pmv[0],pMB->mvs[iSubBlock]) ) ) - iFound=2; - -/* Step 3: If Distance>0 or thresb<1536 or PredEq=1 Select small Diamond Search. - Otherwise select large Diamond Search. -*/ - - if ( (pmv[0].x != 0) || (pmv[0].y != 0) || (threshB<1536/4) || (bPredEq) ) - iDiamondSize=1; // 1 halfpel! - else - iDiamondSize=2; // 2 halfpel = 1 full pixel! - - if (!(MotionFlags & PMV_HALFPELDIAMOND8) ) - iDiamondSize*=2; - /* Step 4: Calculate SAD around the Median prediction. MinSAD=SAD If Motion Vector equal to Previous frame motion vector @@ -1333,15 +1561,25 @@ // Prepare for main loop - currMV->x=start_x; /* start with mv16 */ - currMV->y=start_y; +// if (MotionFlags & PMV_USESQUARES8) +// MainSearchPtr = Square8_MainSearch; +// else + + if (MotionFlags & PMV_ADVANCEDDIAMOND8) + MainSearchPtr = AdvDiamond8_MainSearch; + else + MainSearchPtr = Diamond8_MainSearch; + + + *currMV = startMV; iMinSAD = sad8( cur, get_ref_mv(pRef, pRefH, pRefV, pRefHV, x, y, 8, currMV, iEdgedWidth), iEdgedWidth); - iMinSAD += calc_delta_8(currMV->x - pmv[0].x, currMV->y - pmv[0].y, (uint8_t)iFcode) * iQuant; + iMinSAD += calc_delta_8(currMV->x - pmv[0].x, currMV->y - pmv[0].y, (uint8_t)iFcode, iQuant); - if ( (iMinSAD < 256/4 ) || ( (MVequal(*currMV,pMB->mvs[iSubBlock])) && ((uint32_t)iMinSAD < prevMB->sad8[iSubBlock]) ) ) + if ( (iMinSAD < 256/4 ) || ( (MVequal(*currMV,prevMB->mvs[iSubBlock])) + && ((uint32_t)iMinSAD < prevMB->sad8[iSubBlock]) ) ) { if (MotionFlags & PMV_QUICKSTOP16) goto PMVfast8_Terminate_without_Refine; @@ -1349,58 +1587,110 @@ goto PMVfast8_Terminate_with_Refine; } +/* Step 2 (lazy eval): Calculate Distance= |MedianMVX| + |MedianMVY| where MedianMV is the motion + vector of the median. + If PredEq=1 and MVpredicted = Previous Frame MV, set Found=2 +*/ + + if ((bPredEq) && (MVequal(pmv[0],prevMB->mvs[iSubBlock]) ) ) + iFound=2; + +/* Step 3 (lazy eval): If Distance>0 or thresb<1536 or PredEq=1 Select small Diamond Search. + Otherwise select large Diamond Search. +*/ + + if ( (!MVzero(pmv[0])) || (threshB<1536/4) || (bPredEq) ) + iDiamondSize=1; // 1 halfpel! + else + iDiamondSize=2; // 2 halfpel = 1 full pixel! + + if (!(MotionFlags & PMV_HALFPELDIAMOND8) ) + iDiamondSize*=2; + /* Step 5: Calculate SAD for motion vectors taken from left block, top, top-right, and Previous frame block. Also calculate (0,0) but do not subtract offset. Let MinSAD be the smallest SAD up to this point. - If MV is (0,0) subtract offset. ******** WHAT'S THIS 'OFFSET' ??? *********** + If MV is (0,0) subtract offset. */ -// the prediction might be even better than mv16 - CHECK_MV8_CANDIDATE(pmv[0].x,pmv[0].y); +// the median prediction might be even better than mv16 -// (0,0) is always possible + if (!MVequal(pmv[0],startMV)) + CHECK_MV8_CANDIDATE(pmv[0].x,pmv[0].y); + +// (0,0) if needed + if (!MVzero(pmv[0])) + if (!MVzero(startMV)) CHECK_MV8_ZERO; -// previous frame MV is always possible - CHECK_MV8_CANDIDATE(pMB->mvs[iSubBlock].x,pMB->mvs[iSubBlock].y); - -// left neighbour, if allowed - if (psad[1] != MV_MAX_ERROR) +// previous frame MV if needed + if (!MVzero(prevMB->mvs[iSubBlock])) + if (!MVequal(prevMB->mvs[iSubBlock],startMV)) + if (!MVequal(prevMB->mvs[iSubBlock],pmv[0])) + CHECK_MV8_CANDIDATE(prevMB->mvs[iSubBlock].x,prevMB->mvs[iSubBlock].y); + + if ( (iMinSAD <= threshA) || ( MVequal(*currMV,prevMB->mvs[iSubBlock]) && ((uint32_t)iMinSAD < prevMB->sad8[iSubBlock]) ) ) + { + if (MotionFlags & PMV_QUICKSTOP16) + goto PMVfast8_Terminate_without_Refine; + if (MotionFlags & PMV_EARLYSTOP16) + goto PMVfast8_Terminate_with_Refine; + } + + +// left neighbour, if allowed and needed + if (!MVzero(pmv[1])) + if (!MVequal(pmv[1],startMV)) + if (!MVequal(pmv[1],prevMB->mvs[iSubBlock])) + if (!MVequal(pmv[1],pmv[0])) { if (!(MotionFlags & PMV_HALFPEL8 )) { pmv[1].x = EVEN(pmv[1].x); - pmv[1].y = EVEN(pmv[1].y); + pmv[1].y = EVEN(pmv[1].y); } CHECK_MV8_CANDIDATE(pmv[1].x,pmv[1].y); } -// top neighbour, if allowed - if (psad[2] != MV_MAX_ERROR) +// top neighbour, if allowed and needed + if (!MVzero(pmv[2])) + if (!MVequal(pmv[2],startMV)) + if (!MVequal(pmv[2],prevMB->mvs[iSubBlock])) + if (!MVequal(pmv[2],pmv[0])) + if (!MVequal(pmv[2],pmv[1])) { if (!(MotionFlags & PMV_HALFPEL8 )) { pmv[2].x = EVEN(pmv[2].x); - pmv[2].y = EVEN(pmv[2].y); + pmv[2].y = EVEN(pmv[2].y); } CHECK_MV8_CANDIDATE(pmv[2].x,pmv[2].y); -// top right neighbour, if allowed - if (psad[3] != MV_MAX_ERROR) +// top right neighbour, if allowed and needed + if (!MVzero(pmv[3])) + if (!MVequal(pmv[3],startMV)) + if (!MVequal(pmv[3],prevMB->mvs[iSubBlock])) + if (!MVequal(pmv[3],pmv[0])) + if (!MVequal(pmv[3],pmv[1])) + if (!MVequal(pmv[3],pmv[2])) { if (!(MotionFlags & PMV_HALFPEL8 )) { pmv[3].x = EVEN(pmv[3].x); - pmv[3].y = EVEN(pmv[3].y); + pmv[3].y = EVEN(pmv[3].y); } CHECK_MV8_CANDIDATE(pmv[3].x,pmv[3].y); } } + if ( (MVzero(*currMV)) && (!MVzero(pmv[0])) /* && (iMinSAD <= iQuant * 96) */ ) + iMinSAD -= MV8_00_BIAS; + + /* Step 6: If MinSAD <= thresa goto Step 10. If Motion Vector equal to Previous frame motion vector and MinSADmvs[iSubBlock]) && ((uint32_t)iMinSAD < prevMB->sad8[iSubBlock]) ) ) + if ( (iMinSAD <= threshA) || ( MVequal(*currMV,prevMB->mvs[iSubBlock]) && ((uint32_t)iMinSAD < prevMB->sad8[iSubBlock]) ) ) { if (MotionFlags & PMV_QUICKSTOP16) goto PMVfast8_Terminate_without_Refine; @@ -1421,7 +1711,7 @@ backupMV = *currMV; /* save best prediction, actually only for EXTSEARCH */ /* default: use best prediction as starting point for one call of PMVfast_MainSearch */ - iSAD = Diamond8_MainSearch(pRef, pRefH, pRefV, pRefHV, cur, + iSAD = (*MainSearchPtr)(pRef, pRefH, pRefV, pRefHV, cur, x, y, currMV->x, currMV->y, iMinSAD, &newMV, pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, iDiamondSize, iFcode, iQuant, iFound); @@ -1437,7 +1727,7 @@ /* extended: search (up to) two more times: orignal prediction and (0,0) */ if (!(MVequal(pmv[0],backupMV)) ) - { iSAD = Diamond16_MainSearch(pRef, pRefH, pRefV, pRefHV, cur, + { iSAD = (*MainSearchPtr)(pRef, pRefH, pRefV, pRefHV, cur, x, y, pmv[0].x, pmv[0].y, iMinSAD, &newMV, pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, iDiamondSize, iFcode, iQuant, iFound); @@ -1450,7 +1740,7 @@ } if ( (!(MVzero(pmv[0]))) && (!(MVzero(backupMV))) ) - { iSAD = Diamond16_MainSearch(pRef, pRefH, pRefV, pRefHV, cur, + { iSAD = (*MainSearchPtr)(pRef, pRefH, pRefV, pRefHV, cur, x, y, 0, 0, iMinSAD, &newMV, pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, iDiamondSize, iFcode, iQuant, iFound); @@ -1519,7 +1809,7 @@ int32_t psad[8]; static MACROBLOCK * oldMBs = NULL; - const MACROBLOCK * const pMB = pMBs + x + y * iWcount; +// const MACROBLOCK * const pMB = pMBs + x + y * iWcount; const MACROBLOCK * const prevMB = prevMBs + x + y * iWcount; MACROBLOCK * oldMB = NULL; @@ -1527,11 +1817,11 @@ int32_t bPredEq; int32_t iMinSAD,iSAD=9999; - MainSearch16FuncPtr EPZSMainSearchPtr; + MainSearch16FuncPtr MainSearchPtr; if (oldMBs == NULL) - { oldMBs = (MACROBLOCK*) calloc(1,iWcount*iHcount*sizeof(MACROBLOCK)); - fprintf(stderr,"allocated %d bytes for oldMBs\n",iWcount*iHcount*sizeof(MACROBLOCK)); + { oldMBs = (MACROBLOCK*) calloc(iWcount*iHcount,sizeof(MACROBLOCK)); +// fprintf(stderr,"allocated %d bytes for oldMBs\n",iWcount*iHcount*sizeof(MACROBLOCK)); } oldMB = oldMBs + x + y * iWcount; @@ -1539,8 +1829,6 @@ get_range(&min_dx, &max_dx, &min_dy, &max_dy, x, y, 16, iWidth, iHeight, iFcode); -/* we work with abs. MVs, not relative to prediction, so get_range is called relative to 0,0 */ - if (!(MotionFlags & PMV_HALFPEL16 )) { min_dx = EVEN(min_dx); max_dx = EVEN(max_dx); @@ -1580,10 +1868,10 @@ iMinSAD = sad16( cur, get_ref_mv(pRef, pRefH, pRefV, pRefHV, x, y, 16, currMV, iEdgedWidth), iEdgedWidth, MV_MAX_ERROR); - iMinSAD += calc_delta_16(currMV->x-pmv[0].x, currMV->y-pmv[0].y, (uint8_t)iFcode) * iQuant; + iMinSAD += calc_delta_16(currMV->x-pmv[0].x, currMV->y-pmv[0].y, (uint8_t)iFcode, iQuant); // thresh1 is fixed to 256 - if ( (iMinSAD < 256 ) || ( (MVequal(*currMV,pMB->mvs[0])) && ((uint32_t)iMinSAD < prevMB->sad16) ) ) + if ( (iMinSAD < 256 ) || ( (MVequal(*currMV, prevMB->mvs[0])) && ((uint32_t)iMinSAD < prevMB->sad16) ) ) { if (MotionFlags & PMV_QUICKSTOP16) goto EPZS16_Terminate_without_Refine; @@ -1594,7 +1882,7 @@ /************** This is predictor SET B: (0,0), prev.frame MV, neighbours **************/ // previous frame MV - CHECK_MV16_CANDIDATE(pMB->mvs[0].x,pMB->mvs[0].y); + CHECK_MV16_CANDIDATE(prevMB->mvs[0].x,prevMB->mvs[0].y); // set threshhold based on Min of Prediction and SAD of collocated block // CHECK_MV16 always uses iSAD for the SAD of last vector to check, so now iSAD is what we want @@ -1650,7 +1938,7 @@ */ if ( (iMinSAD <= thresh2) - || ( MVequal(*currMV,pMB->mvs[0]) && ((uint32_t)iMinSAD <= prevMB->sad16) ) ) + || ( MVequal(*currMV,prevMB->mvs[0]) && ((uint32_t)iMinSAD <= prevMB->sad16) ) ) { if (MotionFlags & PMV_QUICKSTOP16) goto EPZS16_Terminate_without_Refine; @@ -1660,28 +1948,28 @@ /***** predictor SET C: acceleration MV (new!), neighbours in prev. frame(new!) ****/ - backupMV = pMB->mvs[0]; // last MV - backupMV.x += (pMB->mvs[0].x - oldMB->mvs[0].x ); // acceleration X - backupMV.y += (pMB->mvs[0].y - oldMB->mvs[0].y ); // acceleration Y + backupMV = prevMB->mvs[0]; // collocated MV + backupMV.x += (prevMB->mvs[0].x - oldMB->mvs[0].x ); // acceleration X + backupMV.y += (prevMB->mvs[0].y - oldMB->mvs[0].y ); // acceleration Y - CHECK_MV16_CANDIDATE(backupMV.x,backupMV.y); + CHECK_MV16_CANDIDATE(backupMV.x,backupMV.y); // left neighbour if (x != 0) - CHECK_MV16_CANDIDATE((oldMB-1)->mvs[0].x,oldMB->mvs[0].y); + CHECK_MV16_CANDIDATE((prevMB-1)->mvs[0].x,(prevMB-1)->mvs[0].y); // top neighbour if (y != 0) - CHECK_MV16_CANDIDATE((oldMB-iWcount)->mvs[0].x,oldMB->mvs[0].y); + CHECK_MV16_CANDIDATE((prevMB-iWcount)->mvs[0].x,(prevMB-iWcount)->mvs[0].y); // right neighbour, if allowed (this value is not written yet, so take it from pMB->mvs if ((uint32_t)x != iWcount-1) - CHECK_MV16_CANDIDATE((pMB+1)->mvs[0].x,oldMB->mvs[0].y); + CHECK_MV16_CANDIDATE((prevMB+1)->mvs[0].x,(prevMB+1)->mvs[0].y); // bottom neighbour, dito if ((uint32_t)y != iHcount-1) - CHECK_MV16_CANDIDATE((pMB+iWcount)->mvs[0].x,oldMB->mvs[0].y); + CHECK_MV16_CANDIDATE((prevMB+iWcount)->mvs[0].x,(prevMB+iWcount)->mvs[0].y); /* Terminate if MinSAD <= T_3 (here T_3 = T_2) */ if (iMinSAD <= thresh2) @@ -1696,14 +1984,18 @@ backupMV = *currMV; /* save best prediction, actually only for EXTSEARCH */ -/* default: use best prediction as starting point for one call of PMVfast_MainSearch */ + if (MotionFlags & PMV_USESQUARES8) + MainSearchPtr = Square16_MainSearch; + else - if (MotionFlags & PMV_USESQUARES16) - EPZSMainSearchPtr = Square16_MainSearch; + if (MotionFlags & PMV_ADVANCEDDIAMOND8) + MainSearchPtr = AdvDiamond16_MainSearch; else - EPZSMainSearchPtr = Diamond16_MainSearch; + MainSearchPtr = Diamond16_MainSearch; - iSAD = (*EPZSMainSearchPtr)(pRef, pRefH, pRefV, pRefHV, cur, +/* default: use best prediction as starting point for one call of PMVfast_MainSearch */ + + iSAD = (*MainSearchPtr)(pRef, pRefH, pRefV, pRefHV, cur, x, y, currMV->x, currMV->y, iMinSAD, &newMV, pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, 2, iFcode, iQuant, 0); @@ -1721,7 +2013,7 @@ if (!(MVequal(pmv[0],backupMV)) ) { - iSAD = (*EPZSMainSearchPtr)(pRef, pRefH, pRefV, pRefHV, cur, + iSAD = (*MainSearchPtr)(pRef, pRefH, pRefV, pRefHV, cur, x, y, pmv[0].x, pmv[0].y, iMinSAD, &newMV, pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, 2, iFcode, iQuant, 0); @@ -1735,10 +2027,10 @@ if ( (!(MVzero(pmv[0]))) && (!(MVzero(backupMV))) ) { - iSAD = (*EPZSMainSearchPtr)(pRef, pRefH, pRefV, pRefHV, cur, + iSAD = (*MainSearchPtr)(pRef, pRefH, pRefV, pRefHV, cur, x, y, 0, 0, iMinSAD, &newMV, - pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, /*iDiamondSize*/ 2, iFcode, iQuant, 0); + pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, 2, iFcode, iQuant, 0); if (iSAD < iMinSAD) { @@ -1759,7 +2051,7 @@ EPZS16_Terminate_without_Refine: - *oldMB = *pMB; + *oldMB = *prevMB; currPMV->x = currMV->x - pmv[0].x; currPMV->y = currMV->y - pmv[0].y; @@ -1784,7 +2076,9 @@ VECTOR * const currMV, VECTOR * const currPMV) { - const uint32_t iWcount = pParam->mb_width; +/* Please not that EPZS might not be a good choice for 8x8-block motion search ! */ + + const uint32_t iWcount = pParam->mb_width; const int32_t iWidth = pParam->width; const int32_t iHeight = pParam->height; const int32_t iEdgedWidth = pParam->edged_width; @@ -1806,13 +2100,13 @@ const int32_t iSubBlock = ((y&1)<<1) + (x&1); - const MACROBLOCK * const pMB = pMBs + (x>>1) + (y>>1) * iWcount; +// const MACROBLOCK * const pMB = pMBs + (x>>1) + (y>>1) * iWcount; const MACROBLOCK * const prevMB = prevMBs + (x>>1) + (y>>1) * iWcount; int32_t bPredEq; int32_t iMinSAD,iSAD=9999; - MainSearch8FuncPtr EPZSMainSearchPtr; + MainSearch8FuncPtr MainSearchPtr; /* Get maximum range */ get_range(&min_dx, &max_dx, &min_dy, &max_dy, @@ -1861,7 +2155,7 @@ iMinSAD = sad8( cur, get_ref_mv(pRef, pRefH, pRefV, pRefHV, x, y, 8, currMV, iEdgedWidth), iEdgedWidth); - iMinSAD += calc_delta_8(currMV->x-pmv[0].x, currMV->y-pmv[0].y, (uint8_t)iFcode) * iQuant; + iMinSAD += calc_delta_8(currMV->x-pmv[0].x, currMV->y-pmv[0].y, (uint8_t)iFcode, iQuant); // thresh1 is fixed to 256 @@ -1875,13 +2169,50 @@ /************** This is predictor SET B: (0,0), prev.frame MV, neighbours **************/ -// previous frame MV - CHECK_MV8_CANDIDATE(pMB->mvs[0].x,pMB->mvs[0].y); // MV=(0,0) is often a good choice - CHECK_MV8_ZERO; +// previous frame MV + CHECK_MV8_CANDIDATE(prevMB->mvs[iSubBlock].x,prevMB->mvs[iSubBlock].y); + +// left neighbour, if allowed + if (psad[1] != MV_MAX_ERROR) + { + if (!(MotionFlags & PMV_HALFPEL8 )) + { pmv[1].x = EVEN(pmv[1].x); + pmv[1].y = EVEN(pmv[1].y); + } + CHECK_MV8_CANDIDATE(pmv[1].x,pmv[1].y); + } + +// top neighbour, if allowed + if (psad[2] != MV_MAX_ERROR) + { + if (!(MotionFlags & PMV_HALFPEL8 )) + { pmv[2].x = EVEN(pmv[2].x); + pmv[2].y = EVEN(pmv[2].y); + } + CHECK_MV8_CANDIDATE(pmv[2].x,pmv[2].y); + +// top right neighbour, if allowed + if (psad[3] != MV_MAX_ERROR) + { + if (!(MotionFlags & PMV_HALFPEL8 )) + { pmv[3].x = EVEN(pmv[3].x); + pmv[3].y = EVEN(pmv[3].y); + } + CHECK_MV8_CANDIDATE(pmv[3].x,pmv[3].y); + } + } + +/* // this bias is zero anyway, at the moment! + + if ( (MVzero(*currMV)) && (!MVzero(pmv[0])) ) // && (iMinSAD <= iQuant * 96) + iMinSAD -= MV8_00_BIAS; + +*/ + /* Terminate if MinSAD <= T_2 Terminate if MV[t] == MV[t-1] and MinSAD[t] <= MinSAD[t-1] */ @@ -1894,27 +2225,33 @@ goto EPZS8_Terminate_with_Refine; } -/************ (if Diamond Search) **************/ +/************ (Diamond Search) **************/ backupMV = *currMV; /* save best prediction, actually only for EXTSEARCH */ if (!(MotionFlags & PMV_HALFPELDIAMOND8)) iDiamondSize *= 2; -/* default: use best prediction as starting point for one call of PMVfast_MainSearch */ +/* default: use best prediction as starting point for one call of EPZS_MainSearch */ + +// there is no EPZS^2 for inter4v at the moment // if (MotionFlags & PMV_USESQUARES8) -// EPZSMainSearchPtr = Square8_MainSearch; +// MainSearchPtr = Square8_MainSearch; // else - EPZSMainSearchPtr = Diamond8_MainSearch; - - iSAD = (*EPZSMainSearchPtr)(pRef, pRefH, pRefV, pRefHV, cur, + + if (MotionFlags & PMV_ADVANCEDDIAMOND8) + MainSearchPtr = AdvDiamond8_MainSearch; + else + MainSearchPtr = Diamond8_MainSearch; + + iSAD = (*MainSearchPtr)(pRef, pRefH, pRefV, pRefHV, cur, x, y, currMV->x, currMV->y, iMinSAD, &newMV, pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, - iDiamondSize, iFcode, iQuant, 00); + iDiamondSize, iFcode, iQuant, 0); + - if (iSAD < iMinSAD) { *currMV = newMV; @@ -1927,7 +2264,7 @@ if (!(MVequal(pmv[0],backupMV)) ) { - iSAD = (*EPZSMainSearchPtr)(pRef, pRefH, pRefV, pRefHV, cur, + iSAD = (*MainSearchPtr)(pRef, pRefH, pRefV, pRefHV, cur, x, y, pmv[0].x, pmv[0].y, iMinSAD, &newMV, pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, iDiamondSize, iFcode, iQuant, 0); @@ -1941,7 +2278,7 @@ if ( (!(MVzero(pmv[0]))) && (!(MVzero(backupMV))) ) { - iSAD = (*EPZSMainSearchPtr)(pRef, pRefH, pRefV, pRefHV, cur, + iSAD = (*MainSearchPtr)(pRef, pRefH, pRefV, pRefHV, cur, x, y, 0, 0, iMinSAD, &newMV, pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, iDiamondSize, iFcode, iQuant, 0); @@ -1979,7 +2316,7 @@ // TODO: need to incorporate prediction here (eg. sad += calc_delta_16) ***************************************************************/ -/* + void MotionEstimationBVOP( MBParam * const pParam, FRAMEINFO * const frame, @@ -2001,7 +2338,7 @@ const uint32_t mb_height = pParam->mb_height; const int32_t edged_width = pParam->edged_width; - int32_t i,j; + uint32_t i,j; int32_t f_sad16; int32_t b_sad16; @@ -2025,7 +2362,7 @@ && b_mb->mvs[0].x == 0 && b_mb->mvs[0].y == 0) { - mb->mode = MB_IGNORE; + mb->mode = MODE_NOT_CODED; mb->mvs[0].x = 0; mb->mvs[0].y = 0; mb->b_mvs[0].x = 0; @@ -2040,7 +2377,7 @@ i, j, frame->motion_flags, frame->quant, frame->fcode, pParam, - f_mbs, + f_mbs, f_mbs /* todo */, &mb->mvs[0], &pmv_dontcare); // ignore pmv // backward search @@ -2049,7 +2386,7 @@ i, j, frame->motion_flags, frame->quant, frame->bcode, pParam, - b_mbs, + b_mbs, b_mbs, /* todo */ &mb->b_mvs[0], &pmv_dontcare); // ignore pmv // interpolate search (simple, but effective) @@ -2069,28 +2406,26 @@ if (f_sad16 < b_sad16) { best_sad = f_sad16; - mb->mode = MB_FORWARD; + mb->mode = MODE_FORWARD; } else { best_sad = b_sad16; - mb->mode = MB_BACKWARD; + mb->mode = MODE_BACKWARD; } if (i_sad16 < best_sad) { best_sad = i_sad16; - mb->mode = MB_INTERPOLATE; + mb->mode = MODE_INTERPOLATE; } if (d_sad16 < best_sad) { best_sad = d_sad16; - mb->mode = MB_DIRECT; + mb->mode = MODE_DIRECT; } } } } - -*/