--- trunk/xvidcore/src/motion/estimation_bvop.c 2004/03/22 22:36:25 1382 +++ trunk/xvidcore/src/motion/estimation_bvop.c 2004/05/26 09:15:02 1456 @@ -21,7 +21,7 @@ * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: estimation_bvop.c,v 1.2 2004-03-22 22:36:24 edgomez Exp $ + * $Id: estimation_bvop.c,v 1.5 2004-05-26 09:15:02 edgomez Exp $ * ****************************************************************************/ @@ -46,6 +46,7 @@ int sad; const uint32_t stride = data->iEdgedWidth/2; uint8_t *f_refu, *f_refv, *b_refu, *b_refv; + int offset, filter; const INTERPOLATE8X8_PTR interpolate8x8_halfpel[] = { NULL, @@ -53,19 +54,26 @@ interpolate8x8_halfpel_h, interpolate8x8_halfpel_hv }; + + if (data->chromaX == fx && data->chromaY == fy && + data->b_chromaX == bx && data->b_chromaY == by) + return data->chromaSAD; - int offset = (fx>>1) + (fy>>1)*stride; - int filter = ((fx & 1) << 1) | (fy & 1); + offset = (fx>>1) + (fy>>1)*stride; + filter = ((fx & 1) << 1) | (fy & 1); if (filter != 0) { f_refu = data->RefQ; f_refv = data->RefQ + 8; - interpolate8x8_halfpel[filter](f_refu, data->RefP[4] + offset, stride, data->rounding); - interpolate8x8_halfpel[filter](f_refv, data->RefP[5] + offset, stride, data->rounding); + if (data->chromaX != fx || data->chromaY != fy) { + interpolate8x8_halfpel[filter](f_refu, data->RefP[4] + offset, stride, data->rounding); + interpolate8x8_halfpel[filter](f_refv, data->RefP[5] + offset, stride, data->rounding); + } } else { f_refu = (uint8_t*)data->RefP[4] + offset; f_refv = (uint8_t*)data->RefP[5] + offset; } + data->chromaX = fx; data->chromaY = fy; offset = (bx>>1) + (by>>1)*stride; filter = ((bx & 1) << 1) | (by & 1); @@ -73,16 +81,20 @@ if (filter != 0) { b_refu = data->RefQ + 16; b_refv = data->RefQ + 24; - interpolate8x8_halfpel[filter](b_refu, data->b_RefP[4] + offset, stride, data->rounding); - interpolate8x8_halfpel[filter](b_refv, data->b_RefP[5] + offset, stride, data->rounding); + if (data->b_chromaX != bx || data->b_chromaY != by) { + interpolate8x8_halfpel[filter](b_refu, data->b_RefP[4] + offset, stride, data->rounding); + interpolate8x8_halfpel[filter](b_refv, data->b_RefP[5] + offset, stride, data->rounding); + } } else { b_refu = (uint8_t*)data->b_RefP[4] + offset; b_refv = (uint8_t*)data->b_RefP[5] + offset; } + data->b_chromaX = bx; data->b_chromaY = by; sad = sad8bi(data->CurU, b_refu, f_refu, stride); sad += sad8bi(data->CurV, b_refv, f_refv, stride); + data->chromaSAD = sad; return sad; } @@ -91,7 +103,7 @@ { int32_t sad, xf, yf, xb, yb, xcf, ycf, xcb, ycb; uint32_t t; - + const uint8_t *ReferenceF, *ReferenceB; VECTOR *current; @@ -145,68 +157,6 @@ } static void -CheckCandidateInt_qpel(const int x, const int y, SearchData * const data, const unsigned int Direction) -{ - int32_t sad, xf, yf, xb, yb, xcf, ycf, xcb, ycb; - uint32_t t; - - const uint8_t *ReferenceF, *ReferenceB; - VECTOR *current; - - if ((x > data->max_dx) || (x < data->min_dx) || - (y > data->max_dy) || (y < data->min_dy)) - return; - - if (Direction == 1) { /* x and y mean forward vector */ - VECTOR backward = data->qpel_precision ? data->currentQMV[1] : data->currentMV[1]; - xb = backward.x; - yb = backward.y; - xf = x; yf = y; - } else { /* x and y mean backward vector */ - VECTOR forward = data->qpel_precision ? data->currentQMV[0] : data->currentMV[0]; - xf = forward.x; - yf = forward.y; - xb = x; yb = y; - } - - ReferenceF = xvid_me_interpolate16x16qpel(xf, yf, 0, data); - current = data->currentQMV + Direction - 1; - ReferenceB = xvid_me_interpolate16x16qpel(xb, yb, 1, data); - xcf = xf/2; ycf = yf/2; - xcb = xb/2; ycb = yb/2; - - t = d_mv_bits(xf, yf, data->predMV, data->iFcode, data->qpel^data->qpel_precision, 0) - + d_mv_bits(xb, yb, data->bpredMV, data->iFcode, data->qpel^data->qpel_precision, 0); - - sad = sad16bi(data->Cur, ReferenceF, ReferenceB, data->iEdgedWidth); - sad += (data->lambda16 * t * sad)>>10; - - if (data->chroma && sad < *data->iMinSAD) - sad += ChromaSAD2((xcf >> 1) + roundtab_79[xcf & 0x3], - (ycf >> 1) + roundtab_79[ycf & 0x3], - (xcb >> 1) + roundtab_79[xcb & 0x3], - (ycb >> 1) + roundtab_79[ycb & 0x3], data); - - if (sad < *(data->iMinSAD)) { - *data->iMinSAD = sad; - current->x = x; current->y = y; - data->dir = Direction; - } - - if (sad < *(data->iMinSAD)) { - data->iMinSAD2 = *(data->iMinSAD); - data->currentQMV2.x = current->x; - data->currentQMV2.y = current->y; - - *data->iMinSAD = sad; - current->x = x; current->y = y; - } else if (sad < data->iMinSAD2) { - data->iMinSAD2 = sad; - data->currentQMV2.x = x; data->currentQMV2.y = y; - } -} - -static void CheckCandidateDirect(const int x, const int y, SearchData * const data, const unsigned int Direction) { int32_t sad = 0, xcf = 0, ycf = 0, xcb = 0, ycb = 0; @@ -247,8 +197,9 @@ ReferenceF = xvid_me_interpolate8x8qpel(mvs.x, mvs.y, k, 0, data); ReferenceB = xvid_me_interpolate8x8qpel(b_mvs.x, b_mvs.y, k, 1, data); - sad += sad8bi(data->Cur + 8*(k&1) + 8*(k>>1)*(data->iEdgedWidth), - ReferenceF, ReferenceB, data->iEdgedWidth); + sad += data->iMinSAD[k+1] = + sad8bi(data->Cur + 8*(k&1) + 8*(k>>1)*(data->iEdgedWidth), + ReferenceF, ReferenceB, data->iEdgedWidth); if (sad > *(data->iMinSAD)) return; } @@ -359,44 +310,6 @@ } } -void -CheckCandidate16no4v_qpel(const int x, const int y, SearchData * const data, const unsigned int Direction) -{ - int32_t sad, xc, yc; - const uint8_t * Reference; - uint32_t t; - - if ( (x > data->max_dx) || ( x < data->min_dx) - || (y > data->max_dy) || (y < data->min_dy) ) return; - - if (data->rrv && (!(x&1) && x !=0) | (!(y&1) && y !=0) ) return; /* non-zero even value */ - - Reference = xvid_me_interpolate16x16qpel(x, y, 0, data); - - xc = x/2; yc = y/2; - t = d_mv_bits(x, y, data->predMV, data->iFcode, - data->qpel^data->qpel_precision, data->rrv); - - sad = sad16(data->Cur, Reference, data->iEdgedWidth, 256*4096); - sad += (data->lambda16 * t * sad)>>10; - - if (data->chroma && sad < *data->iMinSAD) - sad += xvid_me_ChromaSAD((xc >> 1) + roundtab_79[xc & 0x3], - (yc >> 1) + roundtab_79[yc & 0x3], data); - - if (sad < *(data->iMinSAD)) { - data->iMinSAD2 = *(data->iMinSAD); - data->currentQMV2.x = data->currentQMV->x; - data->currentQMV2.y = data->currentQMV->y; - - data->iMinSAD[0] = sad; - data->currentQMV[0].x = x; data->currentQMV[0].y = y; - } else if (sad < data->iMinSAD2) { - data->iMinSAD2 = sad; - data->currentQMV2.x = x; data->currentQMV2.y = y; - } -} - static __inline VECTOR ChoosePred(const MACROBLOCK * const pMB, const uint32_t mode) { @@ -501,20 +414,37 @@ MainSearchPtr(Data->currentMV->x, Data->currentMV->y, Data, mask, CheckCandidate16no4v); } - xvid_me_SubpelRefine(Data, CheckCandidate16no4v); - if (Data->qpel && (*Data->iMinSAD < *best_sad + threshA)) { - Data->currentQMV->x = 2*Data->currentMV->x; - Data->currentQMV->y = 2*Data->currentMV->y; - Data->qpel_precision = 1; - get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 4, - pParam->width, pParam->height, iFcode, 2, 0); - if (MotionFlags & XVID_ME_QUARTERPELREFINE16) { - if (MotionFlags & XVID_ME_FASTREFINE16) - SubpelRefine_Fast(Data, CheckCandidate16no4v_qpel); - else - xvid_me_SubpelRefine(Data, CheckCandidate16no4v); + + if(!Data->qpel) { + /* halfpel mode */ + if (MotionFlags & XVID_ME_HALFPELREFINE16) + xvid_me_SubpelRefine(Data, CheckCandidate16no4v, 0); + } else { + /* qpel mode */ + if(MotionFlags & XVID_ME_FASTREFINE16) { + /* fast */ + get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 4, + pParam->width, pParam->height, Data->iFcode, 2, 0); + if (*Data->iMinSAD < *best_sad + 2*threshA) + FullRefine_Fast(Data, CheckCandidate16no4v, 0); + } else { + Data->currentQMV->x = 2*Data->currentMV->x; + Data->currentQMV->y = 2*Data->currentMV->y; + if(MotionFlags & XVID_ME_QUARTERPELREFINE16) { + /* full */ + if (MotionFlags & XVID_ME_HALFPELREFINE16) { + xvid_me_SubpelRefine(Data, CheckCandidate16no4v, 0); /* hpel part */ + Data->currentQMV->x = 2*Data->currentMV->x; + Data->currentQMV->y = 2*Data->currentMV->y; + } + get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 4, + pParam->width, pParam->height, Data->iFcode, 2, 0); + Data->qpel_precision = 1; + if (*Data->iMinSAD < *best_sad + threshA) + xvid_me_SubpelRefine(Data, CheckCandidate16no4v, 0); /* qpel part */ + } } } @@ -640,6 +570,7 @@ Data->max_dy = k * (pParam->height - y * 16); Data->min_dx = -k * (16 + x * 16); Data->min_dy = -k * (16 + y * 16); + Data->chromaX = Data->chromaY = Data->b_chromaX = Data->b_chromaY = 256*4096; Data->referencemv = Data->qpel ? b_mb->qmvs : b_mb->mvs; Data->qpel_precision = 0; @@ -667,19 +598,23 @@ } } - CheckCandidate = b_mb->mode == MODE_INTER4V ? CheckCandidateDirect : CheckCandidateDirectno4v; - - CheckCandidate(0, 0, Data, 255); + CheckCandidateDirect(0, 0, Data, 255); /* will also fill iMinSAD[1..4] with 8x8 SADs */ /* initial (fast) skip decision */ - if (*Data->iMinSAD < (int)Data->iQuant * INITIAL_SKIP_THRESH) { + if (Data->iMinSAD[1] < (int)Data->iQuant * INITIAL_SKIP_THRESH + && Data->iMinSAD[2] < (int)Data->iQuant * INITIAL_SKIP_THRESH + && Data->iMinSAD[3] < (int)Data->iQuant * INITIAL_SKIP_THRESH + && Data->iMinSAD[4] < (int)Data->iQuant * INITIAL_SKIP_THRESH) { /* possible skip */ SkipDecisionB(pCur, f_Ref, b_Ref, pMB, x, y, Data); - if (pMB->mode == MODE_DIRECT_NONE_MV) return *Data->iMinSAD; /* skipped */ + if (pMB->mode == MODE_DIRECT_NONE_MV) + return *Data->iMinSAD; /* skipped */ } - *Data->iMinSAD += Data->lambda16; - skip_sad = *Data->iMinSAD; + skip_sad = 4*MAX(MAX(Data->iMinSAD[1],Data->iMinSAD[2]), MAX(Data->iMinSAD[3],Data->iMinSAD[4])); + if (Data->chroma) skip_sad += Data->chromaSAD; + + CheckCandidate = b_mb->mode == MODE_INTER4V ? CheckCandidateDirect : CheckCandidateDirectno4v; if (!(MotionFlags & XVID_ME_SKIP_DELTASEARCH)) { if (MotionFlags & XVID_ME_USESQUARES16) MainSearchPtr = xvid_me_SquareSearch; @@ -688,9 +623,10 @@ MainSearchPtr(0, 0, Data, 255, CheckCandidate); - xvid_me_SubpelRefine(Data, CheckCandidate); + xvid_me_SubpelRefine(Data, CheckCandidate, 0); } + *Data->iMinSAD += Data->lambda16; *best_sad = *Data->iMinSAD; if (Data->qpel || b_mb->mode == MODE_INTER4V) pMB->mode = MODE_DIRECT; @@ -735,108 +671,6 @@ } static void -SubpelRefine_dir(SearchData * const data, CheckFunc * const CheckCandidate, const int dir) -{ -/* Do a half-pel or q-pel refinement */ - const VECTOR centerMV = data->qpel_precision ? - data->currentQMV[dir-1] : data->currentMV[dir-1]; - - CHECK_CANDIDATE(centerMV.x, centerMV.y - 1, dir); - CHECK_CANDIDATE(centerMV.x + 1, centerMV.y - 1, dir); - CHECK_CANDIDATE(centerMV.x + 1, centerMV.y, dir); - CHECK_CANDIDATE(centerMV.x + 1, centerMV.y + 1, dir); - CHECK_CANDIDATE(centerMV.x, centerMV.y + 1, dir); - CHECK_CANDIDATE(centerMV.x - 1, centerMV.y + 1, dir); - CHECK_CANDIDATE(centerMV.x - 1, centerMV.y, dir); - CHECK_CANDIDATE(centerMV.x - 1, centerMV.y - 1, dir); -} - -/* Pretty much redundant code, just as SubpelRefine_dir above too - * - * TODO: Get rid off all the redundancy (SubpelRefine_Fast_dir, - * CheckCandidate16no4v_qpel etc.) */ - -void -SubpelRefine_Fast_dir(SearchData * data, CheckFunc * CheckCandidate, const int dir) -{ -/* Do a fast q-pel refinement */ - VECTOR centerMV; - VECTOR second_best; - int best_sad = *data->iMinSAD; - int xo, yo, xo2, yo2; - int size = 2; - data->iMinSAD2 = 0; - - /* check all halfpixel positions near our best halfpel position */ - centerMV = data->currentQMV[dir-1]; - *data->iMinSAD = 256 * 4096; - - CHECK_CANDIDATE(centerMV.x, centerMV.y - size, dir); - CHECK_CANDIDATE(centerMV.x + size, centerMV.y - size, dir); - CHECK_CANDIDATE(centerMV.x + size, centerMV.y, dir); - CHECK_CANDIDATE(centerMV.x + size, centerMV.y + size, dir); - - CHECK_CANDIDATE(centerMV.x, centerMV.y + size, dir); - CHECK_CANDIDATE(centerMV.x - size, centerMV.y + size, dir); - CHECK_CANDIDATE(centerMV.x - size, centerMV.y, dir); - CHECK_CANDIDATE(centerMV.x - size, centerMV.y - size, dir); - - second_best = data->currentQMV[dir-1]; - - /* after second_best has been found, go back to the vector we began with */ - - data->currentQMV[dir-1] = centerMV; - *data->iMinSAD = best_sad; - - xo = centerMV.x; - yo = centerMV.y; - xo2 = second_best.x; - yo2 = second_best.y; - - data->iMinSAD2 = 256 * 4096; - - if (yo == yo2) { - CHECK_CANDIDATE((xo+xo2)>>1, yo, dir); - CHECK_CANDIDATE(xo, yo-1, dir); - CHECK_CANDIDATE(xo, yo+1, dir); - - if(best_sad <= data->iMinSAD2) return; - - if(data->currentQMV[dir-1].x == data->currentQMV2.x) { - CHECK_CANDIDATE((xo+xo2)>>1, yo-1, dir); - CHECK_CANDIDATE((xo+xo2)>>1, yo+1, dir); - } else { - CHECK_CANDIDATE((xo+xo2)>>1, - (data->currentQMV[dir-1].x == xo) ? data->currentQMV[dir-1].y : data->currentQMV2.y, dir); - } - return; - } - - if (xo == xo2) { - CHECK_CANDIDATE(xo, (yo+yo2)>>1, dir); - CHECK_CANDIDATE(xo-1, yo, dir); - CHECK_CANDIDATE(xo+1, yo, dir); - - if(best_sad < data->iMinSAD2) return; - - if(data->currentQMV[dir-1].y == data->currentQMV2.y) { - CHECK_CANDIDATE(xo-1, (yo+yo2)>>1, dir); - CHECK_CANDIDATE(xo+1, (yo+yo2)>>1, dir); - } else { - CHECK_CANDIDATE((data->currentQMV[dir-1].y == yo) ? data->currentQMV[dir-1].x : data->currentQMV2.x, (yo+yo2)>>1, dir); - } - return; - } - - CHECK_CANDIDATE(xo, (yo+yo2)>>1, dir); - CHECK_CANDIDATE((xo+xo2)>>1, yo, dir); - - if(best_sad <= data->iMinSAD2) return; - - CHECK_CANDIDATE((xo+xo2)>>1, (yo+yo2)>>1, dir); -} - -static void SearchInterpolate(const IMAGE * const f_Ref, const uint8_t * const f_RefH, const uint8_t * const f_RefV, @@ -881,6 +715,8 @@ Data->b_RefP[4] = b_Ref->u + (x + (Data->iEdgedWidth/2) * y) * 8; Data->b_RefP[5] = b_Ref->v + (x + (Data->iEdgedWidth/2) * y) * 8; + Data->chromaX = Data->chromaY = 256*4096; + Data->predMV = *f_predMV; Data->bpredMV = *b_predMV; @@ -938,22 +774,14 @@ Data->currentQMV[1].x = 2 * Data->currentMV[1].x; Data->currentQMV[1].y = 2 * Data->currentMV[1].y; - if (MotionFlags & XVID_ME_QUARTERPELREFINE16) { - if (MotionFlags & XVID_ME_FASTREFINE16) - SubpelRefine_Fast_dir(Data, CheckCandidateInt_qpel, 1); - else - SubpelRefine_dir(Data, CheckCandidateInt, 1); - } + if (MotionFlags & XVID_ME_QUARTERPELREFINE16) + xvid_me_SubpelRefine(Data, CheckCandidateInt, 1); if (*Data->iMinSAD > *best_sad + threshB) return; get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 4, pParam->width, pParam->height, bcode, 2, 0); - if (MotionFlags & XVID_ME_QUARTERPELREFINE16) { - if (MotionFlags & XVID_ME_FASTREFINE16) - SubpelRefine_Fast_dir(Data, CheckCandidateInt_qpel, 2); - else - SubpelRefine_dir(Data, CheckCandidateInt, 2); - } + if (MotionFlags & XVID_ME_QUARTERPELREFINE16) + xvid_me_SubpelRefine(Data, CheckCandidateInt, 2); } *Data->iMinSAD += 2 * Data->lambda16; /* two bits are needed to code interpolate mode. */ @@ -1042,9 +870,8 @@ pMB->sad16 = 0; continue; } - - Data.lambda16 = xvid_me_lambda_vec16[b_mb->quant]; + Data.lambda16 = xvid_me_lambda_vec16[b_mb->quant]; Data.Cur = frame->image.y + (j * Data.iEdgedWidth + i) * 16; Data.CurU = frame->image.u + (j * Data.iEdgedWidth/2 + i) * 8; Data.CurV = frame->image.v + (j * Data.iEdgedWidth/2 + i) * 8; @@ -1141,7 +968,7 @@ } /* final skip decision */ - if ( (skip_sad < Data.iQuant * MAX_SAD00_FOR_SKIP * (Data.chroma ? 3:2) ) + if ( (skip_sad < Data.iQuant * MAX_SAD00_FOR_SKIP ) && ((100*best_sad)/(skip_sad+1) > FINAL_SKIP_THRESH) ) SkipDecisionB(&frame->image, f_ref, b_ref, pMB, i, j, &Data);