--- branches/dev-api-4/xvidcore/src/motion/estimation_pvop.c 2003/10/28 23:47:29 1194 +++ branches/dev-api-4/xvidcore/src/motion/estimation_pvop.c 2003/12/18 21:31:32 1280 @@ -21,7 +21,7 @@ * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: estimation_pvop.c,v 1.1.2.5 2003-10-28 23:47:29 Isibaar Exp $ + * $Id: estimation_pvop.c,v 1.1.2.13 2003-12-18 21:31:32 Isibaar Exp $ * ****************************************************************************/ @@ -60,7 +60,7 @@ }; static void -CheckCandidate16(const int x, const int y, const SearchData * const data, const unsigned int Direction) +CheckCandidate16(const int x, const int y, SearchData * const data, const unsigned int Direction) { const uint8_t * Reference; int32_t sad; uint32_t t; @@ -85,7 +85,7 @@ if (sad < data->iMinSAD[0]) { data->iMinSAD[0] = sad; data->currentMV[0].x = x; data->currentMV[0].y = y; - *data->dir = Direction; + data->dir = Direction; } no16: @@ -100,7 +100,7 @@ } static void -CheckCandidate16_qpel(const int x, const int y, const SearchData * const data, const unsigned int Direction) +CheckCandidate16_qpel(const int x, const int y, SearchData * const data, const unsigned int Direction) { const uint8_t *Reference; int32_t sad; uint32_t t; @@ -116,7 +116,7 @@ sad += (data->lambda16 * t * sad)>>10; data->temp[0] += (data->lambda8 * t * (data->temp[0] + NEIGH_8X8_BIAS))>>10; - if (data->chroma && (sad < data->iMinSAD[0] || sad < data->iMinSAD2[0]) ) + if (data->chroma && (sad < data->iMinSAD[0] || sad < data->iMinSAD2) ) sad += xvid_me_ChromaSAD(((x/2) >> 1) + roundtab_79[(x/2) & 0x3], ((y/2) >> 1) + roundtab_79[(y/2) & 0x3], data); @@ -130,20 +130,20 @@ data->iMinSAD[4] = data->temp[3]; data->currentQMV[4].x = x; data->currentQMV[4].y = y; } if (sad < data->iMinSAD[0]) { - *(data->iMinSAD2) = *(data->iMinSAD); - data->currentQMV2->x = data->currentQMV->x; - data->currentQMV2->y = data->currentQMV->y; + data->iMinSAD2 = *(data->iMinSAD); + data->currentQMV2.x = data->currentQMV->x; + data->currentQMV2.y = data->currentQMV->y; data->iMinSAD[0] = sad; data->currentQMV[0].x = x; data->currentQMV[0].y = y; - } else if (sad < *(data->iMinSAD2)) { - *(data->iMinSAD2) = sad; - data->currentQMV2->x = x; data->currentQMV2->y = y; + } else if (sad < data->iMinSAD2) { + data->iMinSAD2 = sad; + data->currentQMV2.x = x; data->currentQMV2.y = y; } } static void -CheckCandidate8(const int x, const int y, const SearchData * const data, const unsigned int Direction) +CheckCandidate8(const int x, const int y, SearchData * const data, const unsigned int Direction) { int32_t sad; uint32_t t; const uint8_t * Reference; @@ -168,12 +168,45 @@ if (sad < *(data->iMinSAD)) { *(data->iMinSAD) = sad; current->x = x; current->y = y; - *data->dir = Direction; + data->dir = Direction; } } static void -CheckCandidate32(const int x, const int y, const SearchData * const data, const unsigned int Direction) +CheckCandidate8_qpel(const int x, const int y, SearchData * const data, const unsigned int Direction) +{ + int32_t sad; uint32_t t; + const uint8_t * Reference; + VECTOR * current; + + if ( (x > data->max_dx) || (x < data->min_dx) + || (y > data->max_dy) || (y < data->min_dy) ) return; + + /* x and y are in 1/4 precision */ + Reference = xvid_me_interpolate8x8qpel(x, y, 0, 0, data); + current = data->currentQMV; + + sad = sad8(data->Cur, Reference, data->iEdgedWidth); + t = d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision, 0); + + sad += (data->lambda8 * t * (sad+NEIGH_8X8_BIAS))>>10; + + if (sad < *(data->iMinSAD)) { + data->iMinSAD2 = *(data->iMinSAD); + data->currentQMV2.x = data->currentQMV->x; + data->currentQMV2.y = data->currentQMV->y; + + *(data->iMinSAD) = sad; + data->currentQMV->x = x; data->currentQMV->y = y; + data->dir = Direction; + } else if (sad < data->iMinSAD2) { + data->iMinSAD2 = sad; + data->currentQMV2.x = x; data->currentQMV2.y = y; + } +} + +static void +CheckCandidate32(const int x, const int y, SearchData * const data, const unsigned int Direction) { uint32_t t; const uint8_t * Reference; @@ -194,7 +227,7 @@ if (sad < data->iMinSAD[0]) { data->iMinSAD[0] = sad; data->currentMV[0].x = x; data->currentMV[0].y = y; - *data->dir = Direction; + data->dir = Direction; } if (data->temp[0] < data->iMinSAD[1]) { @@ -207,86 +240,6 @@ data->iMinSAD[4] = data->temp[3]; data->currentMV[4].x = x; data->currentMV[4].y = y; } } -static void -SubpelRefine_Fast(SearchData * data, CheckFunc * CheckCandidate) -{ -/* Do a fast q-pel refinement */ - VECTOR centerMV; - VECTOR second_best; - int best_sad = *data->iMinSAD; - int xo, yo, xo2, yo2; - int size = 2; - *data->iMinSAD2 = 0; - - /* check all halfpixel positions near our best halfpel position */ - centerMV = *data->currentQMV; - *data->iMinSAD = 256 * 4096; - - CHECK_CANDIDATE(centerMV.x, centerMV.y - size, 0); - CHECK_CANDIDATE(centerMV.x + size, centerMV.y - size, 0); - CHECK_CANDIDATE(centerMV.x + size, centerMV.y, 0); - CHECK_CANDIDATE(centerMV.x + size, centerMV.y + size, 0); - - CHECK_CANDIDATE(centerMV.x, centerMV.y + size, 0); - CHECK_CANDIDATE(centerMV.x - size, centerMV.y + size, 0); - CHECK_CANDIDATE(centerMV.x - size, centerMV.y, 0); - CHECK_CANDIDATE(centerMV.x - size, centerMV.y - size, 0); - - second_best = *data->currentQMV; - - /* after second_best has been found, go back to the vector we began with */ - - data->currentQMV[0] = centerMV; - *data->iMinSAD = best_sad; - - xo = centerMV.x; - yo = centerMV.y; - xo2 = second_best.x; - yo2 = second_best.y; - - *data->iMinSAD2 = 256 * 4096; - - if (yo == yo2) { - CHECK_CANDIDATE((xo+xo2)>>1, yo, 0); - CHECK_CANDIDATE(xo, yo-1, 0); - CHECK_CANDIDATE(xo, yo+1, 0); - - if(best_sad <= *data->iMinSAD2) return; - - if(data->currentQMV[0].x == data->currentQMV2[0].x) { - CHECK_CANDIDATE((xo+xo2)>>1, yo-1, 0); - CHECK_CANDIDATE((xo+xo2)>>1, yo+1, 0); - } else { - CHECK_CANDIDATE((xo+xo2)>>1, - (data->currentQMV[0].x == xo) ? data->currentQMV[0].y : data->currentQMV2[0].y, 0); - } - return; - } - - if (xo == xo2) { - CHECK_CANDIDATE(xo, (yo+yo2)>>1, 0); - CHECK_CANDIDATE(xo-1, yo, 0); - CHECK_CANDIDATE(xo+1, yo, 0); - - if(best_sad < *data->iMinSAD2) return; - - if(data->currentQMV[0].y == data->currentQMV2[0].y) { - CHECK_CANDIDATE(xo-1, (yo+yo2)>>1, 0); - CHECK_CANDIDATE(xo+1, (yo+yo2)>>1, 0); - } else { - CHECK_CANDIDATE((data->currentQMV[0].y == yo) ? data->currentQMV[0].x : data->currentQMV2[0].x, (yo+yo2)>>1, 0); - } - return; - } - - CHECK_CANDIDATE(xo, (yo+yo2)>>1, 0); - CHECK_CANDIDATE((xo+xo2)>>1, yo, 0); - - if(best_sad <= *data->iMinSAD2) return; - - CHECK_CANDIDATE((xo+xo2)>>1, (yo+yo2)>>1, 0); -} - int xvid_me_SkipDecisionP(const IMAGE * current, const IMAGE * reference, const int x, const int y, @@ -479,7 +432,7 @@ /* intra decision */ - if (iQuant > 8) InterBias += 100 * (iQuant - 8); /* to make high quants work */ + if (iQuant > 10) InterBias += 60 * (iQuant - 10); /* to make high quants work */ if (y != 0) if ((pMB - pParam->mb_width)->mode == MODE_INTRA ) InterBias -= 80; if (x != 0) @@ -581,7 +534,7 @@ } static void -Search8(const SearchData * const OldData, +Search8(SearchData * const OldData, const int x, const int y, const uint32_t MotionFlags, const MBParam * const pParam, @@ -592,9 +545,9 @@ { int i = 0; CheckFunc * CheckCandidate; - Data->iMinSAD = OldData->iMinSAD + 1 + block; - Data->currentMV = OldData->currentMV + 1 + block; - Data->currentQMV = OldData->currentQMV + 1 + block; + *Data->iMinSAD = *(OldData->iMinSAD + 1 + block); + *Data->currentMV = *(OldData->currentMV + 1 + block); + *Data->currentQMV = *(OldData->currentQMV + 1 + block); if(Data->qpel) { Data->predMV = get_qpmv2(pMBs, pParam->mb_width, 0, x/2, y/2, block); @@ -653,11 +606,15 @@ } } - if (Data->qpel && MotionFlags & XVID_ME_QUARTERPELREFINE8) { + if ((Data->qpel && MotionFlags & XVID_ME_QUARTERPELREFINE8)) { Data->qpel_precision = 1; get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 3, pParam->width, pParam->height, Data->iFcode, 2, 0); - xvid_me_SubpelRefine(Data, CheckCandidate); + + if((MotionFlags & XVID_ME_FASTREFINE8) && (!Data->rrv)) + SubpelRefine_Fast(Data, CheckCandidate8_qpel); + else + xvid_me_SubpelRefine(Data, CheckCandidate); } } @@ -675,6 +632,10 @@ pMB->pmvs[block].y = Data->currentMV->y - Data->predMV.y; } + *(OldData->iMinSAD + 1 + block) = *Data->iMinSAD; + *(OldData->currentMV + 1 + block) = *Data->currentMV; + *(OldData->currentQMV + 1 + block) = *Data->currentQMV; + pMB->mvs[block] = *Data->currentMV; pMB->sad8[block] = 4 * *Data->iMinSAD; } @@ -708,7 +669,7 @@ get_pmvdata2(pMBs, pParam->mb_width, 0, x, y, pmv, Data->temp); - Data->temp[5] = Data->temp[6] = 0; /* chroma-sad cache */ + Data->chromaX = Data->chromaY = 0; /* chroma-sad cache */ i = Data->rrv ? 2 : 1; Data->Cur = pCur->y + (x + y * Data->iEdgedWidth) * 16*i; Data->CurV = pCur->v + (x + y * (Data->iEdgedWidth/2)) * 8*i; @@ -724,7 +685,7 @@ Data->lambda16 = xvid_me_lambda_vec16[pMB->quant]; Data->lambda8 = xvid_me_lambda_vec8[pMB->quant]; Data->qpel_precision = 0; - *Data->dir = 0; + Data->dir = 0; memset(Data->currentMV, 0, 5*sizeof(VECTOR)); @@ -768,7 +729,7 @@ else { MainSearchFunc * MainSearchPtr; - int mask = make_mask(pmv, i, *Data->dir); /* all vectors pmv[0..i-1] have been checked */ + int mask = make_mask(pmv, i, Data->dir); /* all vectors pmv[0..i-1] have been checked */ if (MotionFlags & XVID_ME_USESQUARES16) MainSearchPtr = xvid_me_SquareSearch; else if (MotionFlags & XVID_ME_ADVANCEDDIAMOND16) MainSearchPtr = xvid_me_AdvDiamondSearch; @@ -794,7 +755,7 @@ bSAD = Data->iMinSAD[0]; Data->iMinSAD[0] = MV_MAX_ERROR; CheckCandidate(startMV.x, startMV.y, Data, 255); - MainSearchPtr(startMV.x, startMV.y, Data, 255, CheckCandidate); + xvid_me_DiamondSearch(startMV.x, startMV.y, Data, 255, CheckCandidate); if (bSAD < Data->iMinSAD[0]) { Data->currentMV[0] = backupMV; Data->iMinSAD[0] = bSAD; } @@ -806,7 +767,7 @@ bSAD = Data->iMinSAD[0]; Data->iMinSAD[0] = MV_MAX_ERROR; CheckCandidate(startMV.x, startMV.y, Data, 255); - MainSearchPtr(startMV.x, startMV.y, Data, 255, CheckCandidate); + xvid_me_DiamondSearch(startMV.x, startMV.y, Data, 255, CheckCandidate); if (bSAD < Data->iMinSAD[0]) { Data->currentMV[0] = backupMV; Data->iMinSAD[0] = bSAD; @@ -835,7 +796,7 @@ } } - if (Data->iMinSAD[0] < (int32_t)pMB->quant * 30) + if (Data->iMinSAD[0] < (int32_t)pMB->quant * 30*((MotionFlags & XVID_ME_FASTREFINE16) ? 8 : 1)) inter4v = 0; if (inter4v) { @@ -933,23 +894,10 @@ (current->vop_flags & XVID_VOP_MODEDECISION_RD ? 2:1); /* some pre-initialized thingies for SearchP */ - int32_t temp[8]; uint32_t dir; - VECTOR currentMV[5]; - VECTOR currentQMV[5]; - VECTOR currentQMV2; - int32_t iMinSAD[5]; - int32_t iMinSAD2; DECLARE_ALIGNED_MATRIX(dct_space, 3, 64, int16_t, CACHE_LINE); SearchData Data; memset(&Data, 0, sizeof(SearchData)); Data.iEdgedWidth = iEdgedWidth; - Data.currentMV = currentMV; - Data.currentQMV = currentQMV; - Data.currentQMV2 = ¤tQMV2; - Data.iMinSAD = iMinSAD; - Data.iMinSAD2 = &iMinSAD2; - Data.temp = temp; - Data.dir = &dir; Data.iFcode = current->fcode; Data.rounding = pParam->m_rounding_type; Data.qpel = (current->vol_flags & XVID_VOL_QUARTERPEL ? 1:0); @@ -957,6 +905,8 @@ Data.rrv = (current->vop_flags & XVID_VOP_REDUCED) ? 1:0; Data.dctSpace = dct_space; Data.quant_type = !(pParam->vol_flags & XVID_VOL_MPEGQUANT); + Data.mpeg_quant_matrices = pParam->mpeg_quant_matrices; + Data.iMinSAD2 = 0; if ((current->vop_flags & XVID_VOP_REDUCED)) { mb_width = (pParam->width + 31) / 32; @@ -983,11 +933,11 @@ pParam->edged_width, pMB->sad8 ); if (Data.chroma) { - Data.temp[7] = sad8(pCurrent->u + x*8 + y*(iEdgedWidth/2)*8, + Data.chromaSAD = sad8(pCurrent->u + x*8 + y*(iEdgedWidth/2)*8, pRef->u + x*8 + y*(iEdgedWidth/2)*8, iEdgedWidth/2) + sad8(pCurrent->v + (x + y*(iEdgedWidth/2))*8, pRef->v + (x + y*(iEdgedWidth/2))*8, iEdgedWidth/2); - pMB->sad16 += Data.temp[7]; + pMB->sad16 += Data.chromaSAD; } sad00 = pMB->sad16; @@ -1013,8 +963,7 @@ MAX((&pMBs[x + (y-1) * pParam->mb_width])->sad16, MAX((&pMBs[(x+1) + (y-1) * pParam->mb_width])->sad16, prevMB->sad16))); - } - else { + } else { stat_thresh = MIN((&pMBs[(x-1) + y * pParam->mb_width])->sad16, MIN((&pMBs[x + (y-1) * pParam->mb_width])->sad16, MIN((&pMBs[(x+1) + (y-1) * pParam->mb_width])->sad16,