--- branches/dev-api-3/xvidcore/src/motion/motion_est.c 2002/12/24 16:44:24 739 +++ branches/dev-api-3/xvidcore/src/motion/motion_est.c 2003/01/08 14:37:52 767 @@ -1,7 +1,7 @@ /************************************************************************** * * XVID MPEG-4 VIDEO CODEC - * motion estimation + * motion estimation * * This program is an implementation of a part of one or more MPEG-4 * Video tools as specified in ISO/IEC 14496-2 standard. Those intending @@ -58,71 +58,67 @@ int xb, yb; if (qpel) { x *= 2; y *= 2;} else if (rrv) { x = RRV_MV_SCALEDOWN(x); y = RRV_MV_SCALEDOWN(y); } - x = pred.x - x; - y = pred.y - y; + x -= pred.x; + y -= pred.y; - if (x == 0) xb = 1; - else { + if (x) { if (x < 0) x = -x; x += (1 << (iFcode - 1)) - 1; x >>= (iFcode - 1); if (x > 32) x = 32; xb = mvtab[x] + iFcode; - } + } else xb = 1; - if (y == 0) yb = 1; - else { + if (y) { if (y < 0) y = -y; y += (1 << (iFcode - 1)) - 1; y >>= (iFcode - 1); if (y > 32) y = 32; yb = mvtab[y] + iFcode; - } + } else yb = 1; return xb + yb; } -static int32_t +static int32_t ChromaSAD(int dx, int dy, const SearchData * const data) { int sad; - dx = (dx >> 1) + roundtab_79[dx & 0x3]; - dy = (dy >> 1) + roundtab_79[dy & 0x3]; + const uint32_t stride = data->iEdgedWidth/2; if (dx == data->temp[5] && dy == data->temp[6]) return data->temp[7]; //it has been checked recently + data->temp[5] = dx; data->temp[6] = dy; // backup switch (((dx & 1) << 1) | (dy & 1)) { case 0: - sad = sad8(data->CurU, data->RefCU + (dy/2) * (data->iEdgedWidth/2) + dx/2, data->iEdgedWidth/2); - sad += sad8(data->CurV, data->RefCV + (dy/2) * (data->iEdgedWidth/2) + dx/2, data->iEdgedWidth/2); + dx = dx / 2; dy = dy / 2; + sad = sad8(data->CurU, data->RefCU + dy * stride + dx, stride); + sad += sad8(data->CurV, data->RefCV + dy * stride + dx, stride); break; case 1: dx = dx / 2; dy = (dy - 1) / 2; - sad = sad8bi(data->CurU, data->RefCU + dy * (data->iEdgedWidth/2) + dx, data->RefCU + (dy+1) * (data->iEdgedWidth/2) + dx, data->iEdgedWidth/2); - sad += sad8bi(data->CurV, data->RefCV + dy * (data->iEdgedWidth/2) + dx, data->RefCV + (dy+1) * (data->iEdgedWidth/2) + dx, data->iEdgedWidth/2); + sad = sad8bi(data->CurU, data->RefCU + dy * stride + dx, data->RefCU + (dy+1) * stride + dx, stride); + sad += sad8bi(data->CurV, data->RefCV + dy * stride + dx, data->RefCV + (dy+1) * stride + dx, stride); break; case 2: dx = (dx - 1) / 2; dy = dy / 2; - sad = sad8bi(data->CurU, data->RefCU + dy * (data->iEdgedWidth/2) + dx, data->RefCU + dy * (data->iEdgedWidth/2) + dx+1, data->iEdgedWidth/2); - sad += sad8bi(data->CurV, data->RefCV + dy * (data->iEdgedWidth/2) + dx, data->RefCV + dy * (data->iEdgedWidth/2) + dx+1, data->iEdgedWidth/2); + sad = sad8bi(data->CurU, data->RefCU + dy * stride + dx, data->RefCU + dy * stride + dx+1, stride); + sad += sad8bi(data->CurV, data->RefCV + dy * stride + dx, data->RefCV + dy * stride + dx+1, stride); break; default: dx = (dx - 1) / 2; dy = (dy - 1) / 2; - interpolate8x8_halfpel_hv(data->RefQ, - data->RefCU + dy * (data->iEdgedWidth/2) + dx, data->iEdgedWidth/2, - data->rounding); - sad = sad8(data->CurU, data->RefQ, data->iEdgedWidth/2); - interpolate8x8_halfpel_hv(data->RefQ, - data->RefCV + dy * (data->iEdgedWidth/2) + dx, data->iEdgedWidth/2, - data->rounding); - sad += sad8(data->CurV, data->RefQ, data->iEdgedWidth/2); + interpolate8x8_halfpel_hv(data->RefQ, data->RefCU + dy * stride + dx, stride, data->rounding); + sad = sad8(data->CurU, data->RefQ, stride); + + interpolate8x8_halfpel_hv(data->RefQ, data->RefCV + dy * stride + dx, stride, data->rounding); + sad += sad8(data->CurV, data->RefQ, stride); break; } - data->temp[5] = dx; data->temp[6] = dy; data->temp[7] = sad; //backup + data->temp[7] = sad; //backup, part 2 return sad; } static __inline const uint8_t * -GetReference(const int x, const int y, const int dir, const SearchData * const data) +GetReferenceB(const int x, const int y, const int dir, const SearchData * const data) { // dir : 0 = forward, 1 = backward switch ( (dir << 2) | ((x&1)<<1) | (y&1) ) { @@ -137,7 +133,19 @@ } } -static uint8_t * +// this is a simpler copy of GetReferenceB, but as it's __inline anyway, we can keep the two separate +static __inline const uint8_t * +GetReference(const int x, const int y, const SearchData * const data) +{ + switch ( ((x&1)<<1) | (y&1) ) { + case 0 : return data->Ref + x/2 + (y/2)*(data->iEdgedWidth); + case 1 : return data->RefV + x/2 + ((y-1)/2)*(data->iEdgedWidth); + case 2 : return data->RefH + (x-1)/2 + (y/2)*(data->iEdgedWidth); + default : return data->RefHV + (x-1)/2 + ((y-1)/2)*(data->iEdgedWidth); + } +} + +static uint8_t * Interpolate8x8qpel(const int x, const int y, const int block, const int dir, const SearchData * const data) { // create or find a qpel-precision reference picture; return pointer to it @@ -148,31 +156,30 @@ const int halfpel_y = y/2; const uint8_t *ref1, *ref2, *ref3, *ref4; - ref1 = GetReference(halfpel_x, halfpel_y, dir, data); // this reference is used in all cases + ref1 = GetReferenceB(halfpel_x, halfpel_y, dir, data); ref1 += 8 * (block&1) + 8 * (block>>1) * iEdgedWidth; switch( ((x&1)<<1) + (y&1) ) { case 0: // pure halfpel position - Reference = (uint8_t *) GetReference(halfpel_x, halfpel_y, dir, data); - Reference += 8 * (block&1) + 8 * (block>>1) * iEdgedWidth; + return (uint8_t *) ref1; break; case 1: // x halfpel, y qpel - top or bottom during qpel refinement - ref2 = GetReference(halfpel_x, y - halfpel_y, dir, data); + ref2 = GetReferenceB(halfpel_x, y - halfpel_y, dir, data); ref2 += 8 * (block&1) + 8 * (block>>1) * iEdgedWidth; interpolate8x8_avg2(Reference, ref1, ref2, iEdgedWidth, rounding, 8); break; case 2: // x qpel, y halfpel - left or right during qpel refinement - ref2 = GetReference(x - halfpel_x, halfpel_y, dir, data); + ref2 = GetReferenceB(x - halfpel_x, halfpel_y, dir, data); ref2 += 8 * (block&1) + 8 * (block>>1) * iEdgedWidth; interpolate8x8_avg2(Reference, ref1, ref2, iEdgedWidth, rounding, 8); break; default: // x and y in qpel resolution - the "corners" (top left/right and // bottom left/right) during qpel refinement - ref2 = GetReference(halfpel_x, y - halfpel_y, dir, data); - ref3 = GetReference(x - halfpel_x, halfpel_y, dir, data); - ref4 = GetReference(x - halfpel_x, y - halfpel_y, dir, data); + ref2 = GetReferenceB(halfpel_x, y - halfpel_y, dir, data); + ref3 = GetReferenceB(x - halfpel_x, halfpel_y, dir, data); + ref4 = GetReferenceB(x - halfpel_x, y - halfpel_y, dir, data); ref2 += 8 * (block&1) + 8 * (block>>1) * iEdgedWidth; ref3 += 8 * (block&1) + 8 * (block>>1) * iEdgedWidth; ref4 += 8 * (block&1) + 8 * (block>>1) * iEdgedWidth; @@ -182,7 +189,7 @@ return Reference; } -static uint8_t * +static uint8_t * Interpolate16x16qpel(const int x, const int y, const int dir, const SearchData * const data) { // create or find a qpel-precision reference picture; return pointer to it @@ -193,12 +200,12 @@ const int halfpel_y = y/2; const uint8_t *ref1, *ref2, *ref3, *ref4; - ref1 = GetReference(halfpel_x, halfpel_y, dir, data); // this reference is used in all cases + ref1 = GetReferenceB(halfpel_x, halfpel_y, dir, data); switch( ((x&1)<<1) + (y&1) ) { case 0: // pure halfpel position - return (uint8_t *) GetReference(halfpel_x, halfpel_y, dir, data); + return (uint8_t *) ref1; case 1: // x halfpel, y qpel - top or bottom during qpel refinement - ref2 = GetReference(halfpel_x, y - halfpel_y, dir, data); + ref2 = GetReferenceB(halfpel_x, y - halfpel_y, dir, data); interpolate8x8_avg2(Reference, ref1, ref2, iEdgedWidth, rounding, 8); interpolate8x8_avg2(Reference+8, ref1+8, ref2+8, iEdgedWidth, rounding, 8); interpolate8x8_avg2(Reference+8*iEdgedWidth, ref1+8*iEdgedWidth, ref2+8*iEdgedWidth, iEdgedWidth, rounding, 8); @@ -206,7 +213,7 @@ break; case 2: // x qpel, y halfpel - left or right during qpel refinement - ref2 = GetReference(x - halfpel_x, halfpel_y, dir, data); + ref2 = GetReferenceB(x - halfpel_x, halfpel_y, dir, data); interpolate8x8_avg2(Reference, ref1, ref2, iEdgedWidth, rounding, 8); interpolate8x8_avg2(Reference+8, ref1+8, ref2+8, iEdgedWidth, rounding, 8); interpolate8x8_avg2(Reference+8*iEdgedWidth, ref1+8*iEdgedWidth, ref2+8*iEdgedWidth, iEdgedWidth, rounding, 8); @@ -215,9 +222,9 @@ default: // x and y in qpel resolution - the "corners" (top left/right and // bottom left/right) during qpel refinement - ref2 = GetReference(halfpel_x, y - halfpel_y, dir, data); - ref3 = GetReference(x - halfpel_x, halfpel_y, dir, data); - ref4 = GetReference(x - halfpel_x, y - halfpel_y, dir, data); + ref2 = GetReferenceB(halfpel_x, y - halfpel_y, dir, data); + ref3 = GetReferenceB(x - halfpel_x, halfpel_y, dir, data); + ref4 = GetReferenceB(x - halfpel_x, y - halfpel_y, dir, data); interpolate8x8_avg4(Reference, ref1, ref2, ref3, ref4, iEdgedWidth, rounding); interpolate8x8_avg4(Reference+8, ref1+8, ref2+8, ref3+8, ref4+8, iEdgedWidth, rounding); interpolate8x8_avg4(Reference+8*iEdgedWidth, ref1+8*iEdgedWidth, ref2+8*iEdgedWidth, ref3+8*iEdgedWidth, ref4+8*iEdgedWidth, iEdgedWidth, rounding); @@ -229,7 +236,7 @@ /* CHECK_CANDIATE FUNCTIONS START */ -static void +static void CheckCandidate16(const int x, const int y, const int Direction, int * const dir, const SearchData * const data) { int t, xc, yc; @@ -244,7 +251,7 @@ xc = x/2; yc = y/2; //for chroma sad current = data->currentQMV; } else { - Reference = GetReference(x, y, 0, data); + Reference = GetReference(x, y, data); current = data->currentMV; xc = x; yc = y; } @@ -255,7 +262,8 @@ data->temp[0] += (data->lambda16 * t * data->temp[0])/1000; data->temp[1] += (data->lambda8 * t * (data->temp[1] + NEIGH_8X8_BIAS))/100; - if (data->chroma) data->temp[0] += ChromaSAD(xc, yc, data); + if (data->chroma) data->temp[0] += ChromaSAD((xc >> 1) + roundtab_79[xc & 0x3], + (yc >> 1) + roundtab_79[yc & 0x3], data); if (data->temp[0] < data->iMinSAD[0]) { data->iMinSAD[0] = data->temp[0]; @@ -273,7 +281,7 @@ } -static void +static void CheckCandidate32(const int x, const int y, const int Direction, int * const dir, const SearchData * const data) { int t; @@ -283,7 +291,7 @@ ( x > data->max_dx) || ( x < data->min_dx) || ( y > data->max_dy) || (y < data->min_dy)) return; - Reference = GetReference(x, y, 0, data); + Reference = GetReference(x, y, data); t = d_mv_bits(x, y, data->predMV, data->iFcode, 0, 1); data->temp[0] = sad32v_c(data->Cur, Reference, data->iEdgedWidth, data->temp + 1); @@ -306,7 +314,7 @@ data->iMinSAD[4] = data->temp[4]; data->currentMV[4].x = x; data->currentMV[4].y = y; } } -static void +static void CheckCandidate16no4v(const int x, const int y, const int Direction, int * const dir, const SearchData * const data) { int32_t sad; @@ -324,11 +332,11 @@ Reference = Interpolate16x16qpel(x, y, 0, data); current = data->currentQMV; } else { - Reference = GetReference(x, y, 0, data); + Reference = GetReference(x, y, data); current = data->currentMV; } t = d_mv_bits(x, y, data->predMV, data->iFcode, - data->qpel && !data->qpel_precision && !data->rrv, data->rrv); + data->qpel && !data->qpel_precision, data->rrv); sad = sad16(data->Cur, Reference, data->iEdgedWidth, 256*4096); sad += (data->lambda16 * t * sad)/1000; @@ -339,7 +347,7 @@ *dir = Direction; } } -static void +static void CheckCandidate32I(const int x, const int y, const int Direction, int * const dir, const SearchData * const data) { // maximum speed - for P/B/I decision @@ -364,8 +372,7 @@ } - -static void +static void CheckCandidateInt(const int xf, const int yf, const int Direction, int * const dir, const SearchData * const data) { int32_t sad; @@ -382,9 +389,9 @@ current = data->currentQMV; ReferenceB = Interpolate16x16qpel(xb, yb, 1, data); } else { - ReferenceF = GetReference(xf, yf, 0, data); + ReferenceF = GetReference(xf, yf, data); xb = data->currentMV[1].x; yb = data->currentMV[1].y; - ReferenceB = GetReference(xb, yb, 1, data); + ReferenceB = GetReferenceB(xb, yb, 1, data); current = data->currentMV; } @@ -400,7 +407,7 @@ *dir = Direction; } } -static void +static void CheckCandidateDirect(const int x, const int y, const int Direction, int * const dir, const SearchData * const data) { int32_t sad = 0; @@ -428,8 +435,8 @@ || ( b_mvs.x > data->max_dx ) || ( b_mvs.x < data->min_dx ) || ( b_mvs.y > data->max_dy ) || ( b_mvs.y < data->min_dy )) return; - if (!data->qpel) { - mvs.x *= 2; mvs.y *= 2; + if (!data->qpel) { + mvs.x *= 2; mvs.y *= 2; b_mvs.x *= 2; b_mvs.y *= 2; //we move to qpel precision anyway } ReferenceF = Interpolate8x8qpel(mvs.x, mvs.y, k, 0, data); @@ -449,7 +456,7 @@ *dir = Direction; } } -static void +static void CheckCandidateDirectno4v(const int x, const int y, const int Direction, int * const dir, const SearchData * const data) { int32_t sad; @@ -475,8 +482,8 @@ || ( b_mvs.x > data->max_dx ) || ( b_mvs.x < data->min_dx ) || ( b_mvs.y > data->max_dy ) || ( b_mvs.y < data->min_dy )) return; - if (!data->qpel) { - mvs.x *= 2; mvs.y *= 2; + if (!data->qpel) { + mvs.x *= 2; mvs.y *= 2; b_mvs.x *= 2; b_mvs.y *= 2; //we move to qpel precision anyway } ReferenceF = Interpolate16x16qpel(mvs.x, mvs.y, 0, data); @@ -501,7 +508,7 @@ || ( y > data->max_dy) || (y < data->min_dy)) return; if (data->qpel) Reference = Interpolate16x16qpel(x, y, 0, data); - else Reference = GetReference(x, y, 0, data); + else Reference = GetReference(x, y, data); sad = sad8(data->Cur, Reference, data->iEdgedWidth); t = d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel && !data->qpel_precision, 0); @@ -544,12 +551,12 @@ CHECK_CANDIDATE(x, y - iDiamondSize, 4); } else { // what remains here is up or down CHECK_CANDIDATE(x + iDiamondSize, y, 2); - CHECK_CANDIDATE(x - iDiamondSize, y, 1); + CHECK_CANDIDATE(x - iDiamondSize, y, 1); } if (iDirection) { bDirection += iDirection; - x = data->currentMV->x; y = data->currentMV->y; + x = data->currentMV->x; y = data->currentMV->y; } } else { //about to quit, eh? not so fast.... switch (bDirection) { @@ -650,7 +657,7 @@ CHECK_CANDIDATE(x, y - iDiamondSize, 4); } else { // what remains here is up or down CHECK_CANDIDATE(x + iDiamondSize, y, 2); - CHECK_CANDIDATE(x - iDiamondSize, y, 1); + CHECK_CANDIDATE(x - iDiamondSize, y, 1); } bDirection += iDirection; x = data->currentMV->x; y = data->currentMV->y; @@ -674,16 +681,14 @@ backupMV = *(data->currentQMV); else backupMV = *(data->currentMV); - CHECK_CANDIDATE(backupMV.x - 1, backupMV.y - 1, 0); + CHECK_CANDIDATE(backupMV.x, backupMV.y - 1, 0); CHECK_CANDIDATE(backupMV.x + 1, backupMV.y - 1, 0); - CHECK_CANDIDATE(backupMV.x - 1, backupMV.y + 1, 0); - CHECK_CANDIDATE(backupMV.x + 1, backupMV.y + 1, 0); - - CHECK_CANDIDATE(backupMV.x - 1, backupMV.y, 0); CHECK_CANDIDATE(backupMV.x + 1, backupMV.y, 0); - + CHECK_CANDIDATE(backupMV.x + 1, backupMV.y + 1, 0); CHECK_CANDIDATE(backupMV.x, backupMV.y + 1, 0); - CHECK_CANDIDATE(backupMV.x, backupMV.y - 1, 0); + CHECK_CANDIDATE(backupMV.x - 1, backupMV.y + 1, 0); + CHECK_CANDIDATE(backupMV.x - 1, backupMV.y, 0); + CHECK_CANDIDATE(backupMV.x - 1, backupMV.y - 1, 0); } static __inline int @@ -748,7 +753,6 @@ uint32_t x, y; uint32_t iIntra = 0; int32_t InterBias, quant = current->quant, sad00; - uint8_t *qimage; // some pre-initialized thingies for SearchP int32_t temp[8]; @@ -774,23 +778,19 @@ Data.qpel = Data.chroma = 0; } - if((qimage = (uint8_t *) malloc(32 * pParam->edged_width)) == NULL) - return 1; // allocate some mem for qpel interpolated blocks - // somehow this is dirty since I think we shouldn't use malloc outside - // encoder_create() - so please fix me! - Data.RefQ = qimage; + Data.RefQ = pRefV->u; // a good place, also used in MC (for similar purpose) if (sadInit) (*sadInit) (); for (y = 0; y < mb_height; y++) { for (x = 0; x < mb_width; x++) { MACROBLOCK *pMB = &pMBs[x + y * pParam->mb_width]; - if (Data.rrv) pMB->sad16 = + if (Data.rrv) pMB->sad16 = sad32v_c(pCurrent->y + (x + y * pParam->edged_width) * 32, pRef->y + (x + y * pParam->edged_width) * 32, pParam->edged_width, pMB->sad8 ); - else pMB->sad16 = + else pMB->sad16 = sad16v(pCurrent->y + (x + y * pParam->edged_width) * 16, pRef->y + (x + y * pParam->edged_width) * 16, pParam->edged_width, pMB->sad8 ); @@ -807,7 +807,7 @@ if (!(current->global_flags & XVID_LUMIMASKING)) { pMB->dquant = NO_CHANGE; - pMB->quant = current->quant; + pMB->quant = current->quant; } else { if (pMB->dquant != NO_CHANGE) { quant += DQtab[pMB->dquant]; @@ -833,7 +833,7 @@ current->global_flags & XVID_INTER4V, pMB); /* final skip decision, a.k.a. "the vector you found, really that good?" */ - if (current->coding_type == P_VOP) { + if (current->coding_type == P_VOP) { if ( (pMB->dquant == NO_CHANGE) && (sad00 < pMB->quant * MAX_SAD00_FOR_SKIP) && ((100*pMB->sad16)/(sad00+1) > FINAL_SKIP_THRESH * (Data.rrv ? 4:1)) ) if (Data.chroma || SkipDecisionP(pCurrent, pRef, x, y, pParam->edged_width, pMB->quant, Data.rrv)) { @@ -870,7 +870,7 @@ pParam->edged_width); if (deviation < (pMB->sad16 - InterBias)) { - if (++iIntra >= iLimit) { free(qimage); return 1; } + if (++iIntra >= iLimit) return 1; pMB->mode = MODE_INTRA; pMB->mvs[0] = pMB->mvs[1] = pMB->mvs[2] = pMB->mvs[3] = zeroMV; @@ -882,7 +882,6 @@ } } } - free(qimage); if (current->coding_type == S_VOP) /* first GMC step only for S(GMC)-VOPs */ current->GMC_MV = GlobalMotionEst( pMBs, pParam, current->fcode ); @@ -904,7 +903,7 @@ if (pmv[i].x == pmv[j].x) { if (pmv[i].y == pmv[j].y + iDiamondSize) { mask &= ~4; continue; } if (pmv[i].y == pmv[j].y - iDiamondSize) { mask &= ~8; continue; } - } else + } else if (pmv[i].y == pmv[j].y) { if (pmv[i].x == pmv[j].x + iDiamondSize) { mask &= ~1; continue; } if (pmv[i].x == pmv[j].x - iDiamondSize) { mask &= ~2; continue; } @@ -913,7 +912,7 @@ return mask; } -static __inline void +static __inline void PreparePredictionsP(VECTOR * const pmv, int x, int y, int iWcount, int iHcount, const MACROBLOCK * const prevMB, int rrv) { @@ -923,7 +922,7 @@ if ( (y != 0) && (x < (iWcount-1)) ) { // [5] top-right neighbour pmv[5].x = EVEN(pmv[3].x); - pmv[5].y = EVEN(pmv[3].y); + pmv[5].y = EVEN(pmv[3].y); } else pmv[5].x = pmv[5].y = 0; if (x != 0) { pmv[3].x = EVEN(pmv[1].x); pmv[3].y = EVEN(pmv[1].y); }// pmv[3] is left neighbour @@ -933,10 +932,7 @@ else pmv[4].x = pmv[4].y = 0; // [1] median prediction - if (rrv) { //median is in halfzero-precision - pmv[1].x = RRV_MV_SCALEUP(pmv[0].x); - pmv[1].y = RRV_MV_SCALEUP(pmv[0].y); - } else { pmv[1].x = EVEN(pmv[0].x); pmv[1].y = EVEN(pmv[0].y); } + pmv[1].x = EVEN(pmv[0].x); pmv[1].y = EVEN(pmv[0].y); pmv[0].x = pmv[0].y = 0; // [0] is zero; not used in the loop (checked before) but needed here for make_mask @@ -945,14 +941,14 @@ if ((x < iWcount-1) && (y < iHcount-1)) { pmv[6].x = EVEN((prevMB+1+iWcount)->mvs[0].x); //[6] right-down neighbour in last frame - pmv[6].y = EVEN((prevMB+1+iWcount)->mvs[0].y); + pmv[6].y = EVEN((prevMB+1+iWcount)->mvs[0].y); } else pmv[6].x = pmv[6].y = 0; if (rrv) { int i; for (i = 0; i < 7; i++) { - pmv[i].x = RRV_MV_SCALEDOWN(pmv[i].x); - pmv[i].x = RRV_MV_SCALEUP(pmv[i].x); // a trick + pmv[i].x = RRV_MV_SCALEUP(pmv[i].x); // halfzero->halfpel + pmv[i].y = RRV_MV_SCALEUP(pmv[i].y); } } } @@ -1042,7 +1038,7 @@ inter4v = 0; } else { - MainSearchFunc * MainSearchPtr; + MainSearchFunc * MainSearchPtr; if (MotionFlags & PMV_USESQUARES16) MainSearchPtr = SquareSearch; else if (MotionFlags & PMV_ADVANCEDDIAMOND16) MainSearchPtr = AdvDiamondSearch; else MainSearchPtr = DiamondSearch; @@ -1059,7 +1055,7 @@ if (Data->rrv) { startMV.x = RRV_MV_SCALEUP(startMV.x); startMV.y = RRV_MV_SCALEUP(startMV.y); - } else + } else if (!(MotionFlags & PMV_HALFPELREFINE16)) // who's gonna use extsearch and no halfpel? startMV.x = EVEN(startMV.x); startMV.y = EVEN(startMV.y); if (!(MVequal(startMV, backupMV))) { @@ -1094,7 +1090,7 @@ Data->currentQMV[i].y = 2 * Data->currentMV[i].y; } - if((!Data->rrv) && (pParam->m_quarterpel) && (MotionFlags & PMV_QUARTERPELREFINE16)) { + if((Data->qpel) && (MotionFlags & PMV_QUARTERPELREFINE16)) { Data->qpel_precision = 1; get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 16, @@ -1118,19 +1114,18 @@ Search8(Data, 2*x + 1, 2*y + 1, MotionFlags, pParam, pMB, pMBs, 3, &Data8); if (Data->chroma) { - int sumx, sumy, dx, dy; + int sumx, sumy; if(pParam->m_quarterpel) { - sumx= pMB->qmvs[0].x/2 + pMB->qmvs[1].x/2 + pMB->qmvs[2].x/2 + pMB->qmvs[3].x/2; + sumx = pMB->qmvs[0].x/2 + pMB->qmvs[1].x/2 + pMB->qmvs[2].x/2 + pMB->qmvs[3].x/2; sumy = pMB->qmvs[0].y/2 + pMB->qmvs[1].y/2 + pMB->qmvs[2].y/2 + pMB->qmvs[3].y/2; } else { sumx = pMB->mvs[0].x + pMB->mvs[1].x + pMB->mvs[2].x + pMB->mvs[3].x; sumy = pMB->mvs[0].y + pMB->mvs[1].y + pMB->mvs[2].y + pMB->mvs[3].y; } - dx = (sumx >> 3) + roundtab_76[sumx & 0xf]; - dy = (sumy >> 3) + roundtab_76[sumy & 0xf]; - Data->iMinSAD[1] += ChromaSAD(dx, dy, Data); + Data->iMinSAD[1] += ChromaSAD( (sumx >> 3) + roundtab_76[sumx & 0xf], + (sumy >> 3) + roundtab_76[sumy & 0xf], Data); } } @@ -1138,8 +1133,9 @@ Data->currentMV[0].x = RRV_MV_SCALEDOWN(Data->currentMV[0].x); Data->currentMV[0].y = RRV_MV_SCALEDOWN(Data->currentMV[0].y); } + if (!(inter4v) || - (Data->iMinSAD[0] < Data->iMinSAD[1] + Data->iMinSAD[2] + + (Data->iMinSAD[0] < Data->iMinSAD[1] + Data->iMinSAD[2] + Data->iMinSAD[3] + Data->iMinSAD[4] + IMV16X16 * (int32_t)iQuant )) { // INTER MODE pMB->mode = MODE_INTER; @@ -1159,9 +1155,9 @@ pMB->pmvs[0].y = Data->currentMV[0].y - Data->predMV.y; } } else { -// INTER4V MODE; all other things are already set in Search8 +// INTER4V MODE; all other things are already set in Search8 pMB->mode = MODE_INTER4V; - pMB->sad16 = Data->iMinSAD[1] + Data->iMinSAD[2] + + pMB->sad16 = Data->iMinSAD[1] + Data->iMinSAD[2] + Data->iMinSAD[3] + Data->iMinSAD[4] + IMV16X16 * iQuant; } } @@ -1240,7 +1236,7 @@ } } - if(!Data->rrv && Data->qpel) { + if(Data->qpel) { if((!(Data->currentQMV->x & 1)) && (!(Data->currentQMV->y & 1)) && (MotionFlags & PMV_QUARTERPELREFINE8)) { Data->qpel_precision = 1; @@ -1296,7 +1292,7 @@ if ((y != 0)&&(x != (int)(iWcount+1))) { // [3] top-right neighbour pmv[3] = ChoosePred(pMB+1-iWcount, mode_curr); - pmv[3].x = EVEN(pmv[3].x); pmv[3].y = EVEN(pmv[3].y); + pmv[3].x = EVEN(pmv[3].x); pmv[3].y = EVEN(pmv[3].y); } else pmv[3].x = pmv[3].y = 0; if (y != 0) { @@ -1311,7 +1307,7 @@ if ((x != 0)&&(y != 0)) { pmv[6] = ChoosePred(pMB-1-iWcount, mode_curr); - pmv[6].x = EVEN(pmv[5].x); pmv[5].y = EVEN(pmv[5].y); + pmv[6].x = EVEN(pmv[6].x); pmv[6].y = EVEN(pmv[6].y); } else pmv[6].x = pmv[6].y = 0; // more? @@ -1338,7 +1334,7 @@ const int32_t iEdgedWidth = pParam->edged_width; - int i, iDirection, mask; + int i, iDirection = 255, mask; VECTOR pmv[7]; MainSearchFunc *MainSearchPtr; *Data->iMinSAD = MV_MAX_ERROR; @@ -1363,7 +1359,7 @@ CheckCandidate = CheckCandidate16no4v; // main loop. checking all predictions - for (i = 0; i < 8; i++) { + for (i = 0; i < 7; i++) { if (!(mask = make_mask(pmv, i)) ) continue; CheckCandidate16no4v(pmv[i].x, pmv[i].y, mask, &iDirection, Data); } @@ -1374,11 +1370,11 @@ MainSearchPtr = AdvDiamondSearch; else MainSearchPtr = DiamondSearch; - (*MainSearchPtr)(Data->currentMV->x, Data->currentMV->y, Data, 255); + (*MainSearchPtr)(Data->currentMV->x, Data->currentMV->y, Data, iDirection); SubpelRefine(Data); - if (Data->qpel) { + if (Data->qpel && *Data->iMinSAD < *best_sad + 300) { Data->currentQMV->x = 2*Data->currentMV->x; Data->currentQMV->y = 2*Data->currentMV->y; Data->qpel_precision = 1; @@ -1388,7 +1384,7 @@ } // three bits are needed to code backward mode. four for forward -// we treat the bits just like they were vector's + if (mode_current == MODE_FORWARD) *Data->iMinSAD += 4 * Data->lambda16; else *Data->iMinSAD += 3 * Data->lambda16; @@ -1398,9 +1394,9 @@ if (Data->qpel) { pMB->pmvs[0].x = Data->currentQMV->x - predMV->x; pMB->pmvs[0].y = Data->currentQMV->y - predMV->y; - if (mode_current == MODE_FORWARD) + if (mode_current == MODE_FORWARD) pMB->qmvs[0] = *Data->currentQMV; - else + else pMB->b_qmvs[0] = *Data->currentQMV; } else { pMB->pmvs[0].x = Data->currentMV->x - predMV->x; @@ -1414,46 +1410,28 @@ } -static void +static void SkipDecisionB(const IMAGE * const pCur, - const IMAGE * const f_Ref, - const IMAGE * const b_Ref, - MACROBLOCK * const pMB, - const uint32_t quant, - const uint32_t x, const uint32_t y, - const SearchData * const Data) + const IMAGE * const f_Ref, + const IMAGE * const b_Ref, + MACROBLOCK * const pMB, + const uint32_t x, const uint32_t y, + const SearchData * const Data) { - int dx, dy, b_dx, b_dy; + int dx = 0, dy = 0, b_dx = 0, b_dy = 0; uint32_t sum; + const int div = 1 + Data->qpel; + int k; + const uint32_t quant = pMB->quant; //this is not full chroma compensation, only it's fullpel approximation. should work though - if (Data->qpel) { - dy = Data->directmvF[0].y/2 + Data->directmvF[1].y/2 + - Data->directmvF[2].y/2 + Data->directmvF[3].y/2; - - dx = Data->directmvF[0].x/2 + Data->directmvF[1].x/2 + - Data->directmvF[2].x/2 + Data->directmvF[3].x/2; - - b_dy = Data->directmvB[0].y/2 + Data->directmvB[1].y/2 + - Data->directmvB[2].y/2 + Data->directmvB[3].y/2; - - b_dx = Data->directmvB[0].x/2 + Data->directmvB[1].x/2 + - Data->directmvB[2].x/2 + Data->directmvB[3].x/2; - - } else { - dy = Data->directmvF[0].y + Data->directmvF[1].y + - Data->directmvF[2].y + Data->directmvF[3].y; - - dx = Data->directmvF[0].x + Data->directmvF[1].x + - Data->directmvF[2].x + Data->directmvF[3].x; - - b_dy = Data->directmvB[0].y + Data->directmvB[1].y + - Data->directmvB[2].y + Data->directmvB[3].y; - b_dx = Data->directmvB[0].x + Data->directmvB[1].x + - Data->directmvB[2].x + Data->directmvB[3].x; + for (k = 0; k < 4; k++) { + dy += Data->directmvF[k].y / div; + dx += Data->directmvF[0].x / div; + b_dy += Data->directmvB[0].y / div; + b_dx += Data->directmvB[0].x / div; } - dy = (dy >> 3) + roundtab_76[dy & 0xf]; dx = (dx >> 3) + roundtab_76[dx & 0xf]; b_dy = (b_dy >> 3) + roundtab_76[b_dy & 0xf]; @@ -1463,12 +1441,15 @@ f_Ref->u + (y*8 + dy/2) * (Data->iEdgedWidth/2) + x*8 + dx/2, b_Ref->u + (y*8 + b_dy/2) * (Data->iEdgedWidth/2) + x*8 + b_dx/2, Data->iEdgedWidth/2); + + if (sum >= 2 * MAX_CHROMA_SAD_FOR_SKIP * quant) return; //no skip + sum += sad8bi(pCur->v + 8*x + 8*y*(Data->iEdgedWidth/2), f_Ref->v + (y*8 + dy/2) * (Data->iEdgedWidth/2) + x*8 + dx/2, b_Ref->v + (y*8 + b_dy/2) * (Data->iEdgedWidth/2) + x*8 + b_dx/2, Data->iEdgedWidth/2); - if (sum < 2*MAX_CHROMA_SAD_FOR_SKIP * quant) pMB->mode = MODE_DIRECT_NONE_MV; //skipped + if (sum < 2 * MAX_CHROMA_SAD_FOR_SKIP * quant) pMB->mode = MODE_DIRECT_NONE_MV; //skipped } @@ -1552,7 +1533,7 @@ // initial (fast) skip decision if (*Data->iMinSAD < pMB->quant * INITIAL_SKIP_THRESH*2) { - SkipDecisionB(pCur, f_Ref, b_Ref, pMB, x, y, Data->chroma, Data); //possible skip - checking chroma + SkipDecisionB(pCur, f_Ref, b_Ref, pMB, x, y, Data); //possible skip - checking chroma if (pMB->mode == MODE_DIRECT_NONE_MV) return *Data->iMinSAD; // skip. } @@ -1571,7 +1552,7 @@ *best_sad = *Data->iMinSAD; - if (b_mb->mode == MODE_INTER4V) pMB->mode = MODE_DIRECT; + if (b_mb->mode == MODE_INTER4V || Data->qpel) pMB->mode = MODE_DIRECT; else pMB->mode = MODE_DIRECT_NO4V; //for faster compensation pMB->pmvs[3] = *Data->currentMV; @@ -1688,6 +1669,7 @@ } while (!(iDirection)); if (fData->qpel) { + if (*fData->iMinSAD > *best_sad + 500) return; CheckCandidate = CheckCandidateInt; fData->qpel_precision = bData.qpel_precision = 1; get_range(&fData->min_dx, &fData->max_dx, &fData->min_dy, &fData->max_dy, x, y, 16, pParam->width, pParam->height, fcode, 1, 0); @@ -1697,11 +1679,12 @@ fData->currentQMV[1].x = 2 * fData->currentMV[1].x; fData->currentQMV[1].y = 2 * fData->currentMV[1].y; SubpelRefine(fData); + if (*fData->iMinSAD > *best_sad + 300) return; fData->currentQMV[2] = fData->currentQMV[0]; SubpelRefine(&bData); } - *fData->iMinSAD += (2+2) * fData->lambda16; // two bits are needed to code interpolate mode. + *fData->iMinSAD += (2+3) * fData->lambda16; // two bits are needed to code interpolate mode. if (*fData->iMinSAD < *best_sad) { *best_sad = *fData->iMinSAD; @@ -1753,7 +1736,6 @@ const int32_t TRB = time_pp - time_bp; const int32_t TRD = time_pp; - uint8_t * qimage; // some pre-inintialized data for the rest of the search @@ -1770,12 +1752,7 @@ Data.qpel = pParam->m_quarterpel; Data.rounding = 0; - if((qimage = (uint8_t *) malloc(32 * pParam->edged_width)) == NULL) - return; // allocate some mem for qpel interpolated blocks - // somehow this is dirty since I think we shouldn't use malloc outside - // encoder_create() - so please fix me! - Data.RefQ = qimage; - + Data.RefQ = f_refV->u; // a good place, also used in MC (for similar purpose) // note: i==horizontal, j==vertical for (j = 0; j < pParam->mb_height; j++) { @@ -1841,7 +1818,7 @@ // final skip decision if ( (skip_sad < frame->quant * MAX_SAD00_FOR_SKIP*2) && ((100*best_sad)/(skip_sad+1) > FINAL_SKIP_THRESH) ) - SkipDecisionB(&frame->image, f_ref, b_ref, pMB,frame->quant, i, j, &Data); + SkipDecisionB(&frame->image, f_ref, b_ref, pMB, i, j, &Data); switch (pMB->mode) { case MODE_FORWARD: @@ -1859,7 +1836,7 @@ if (Data.qpel) { f_predMV = pMB->qmvs[0]; b_predMV = pMB->b_qmvs[0]; - } else { + } else { f_predMV = pMB->mvs[0]; b_predMV = pMB->b_mvs[0]; } @@ -1872,7 +1849,6 @@ } } } - free(qimage); } static __inline void @@ -1896,7 +1872,7 @@ else if (x == 1) //left macroblock does not have any vector now Data->predMV = (pMB - pParam->mb_width)->mvs[0]; // top instead of median - else if (y == 1) // top macroblock doesn't have it's vector + else if (y == 1) // top macroblock doesn't have it's vector Data->predMV = (pMB - 1)->mvs[0]; // left instead of median else Data->predMV = get_pmv2(pMBs, pParam->mb_width, 0, x, y, 0); //else median @@ -2007,7 +1983,7 @@ } static void -CheckGMC(int x, int y, const int dir, int * iDirection, +CheckGMC(int x, int y, const int dir, int * iDirection, const MACROBLOCK * const pMBs, uint32_t * bestcount, VECTOR * GMC, const MBParam * const pParam) { @@ -2026,7 +2002,7 @@ if (count > *bestcount) { *bestcount = count; *iDirection = dir; - GMC->x = x; GMC->y = y; + GMC->x = x; GMC->y = y; } } @@ -2057,7 +2033,7 @@ const MACROBLOCK *pMB = &pMBs[mx + my * pParam->mb_width]; VECTOR mv; - if (pMB->mode == MODE_INTRA || pMB->mode == MODE_NOT_CODED) + if (pMB->mode == MODE_INTRA || pMB->mode == MODE_NOT_CODED) continue; mv = pMB->mvs[0]; @@ -2070,13 +2046,12 @@ max_x = gmc.x + step; min_y = gmc.y - step; max_y = gmc.y + step; - } - if (bestcount < (pParam->mb_height-2)*(pParam->mb_width-2)/10) + if (bestcount < (pParam->mb_height-2)*(pParam->mb_width-2)/10) gmc.x = gmc.y = 0; //no camara pan, no GMC -// step2: let's refine camera panning using gradiend-descent approach. +// step2: let's refine camera panning using gradiend-descent approach // TODO: more warping points may be evaluated here (like in interpolate mode search - two vectors in one diamond) bestcount = 0; CheckGMC(gmc.x, gmc.y, 255, &iDirection, pMBs, &bestcount, &gmc, pParam);