--- branches/dev-api-3/xvidcore/src/motion/motion_est.c 2002/11/21 10:33:33 668 +++ branches/dev-api-3/xvidcore/src/motion/motion_est.c 2002/12/10 11:13:50 702 @@ -54,6 +54,11 @@ #define iDiamondSize 2 +static VECTOR +GlobalMotionEst(const MACROBLOCK * const pMBs, + const MBParam * const pParam, const uint32_t iFcode); + + static __inline int d_mv_bits(int x, int y, const uint32_t iFcode) { @@ -703,6 +708,9 @@ const VECTOR zeroMV = { 0, 0 }; + int mb_width = pParam->mb_width; + int mb_height = pParam->mb_height; + uint32_t x, y; uint32_t iIntra = 0; int32_t InterBias, quant = current->quant, sad00; @@ -724,6 +732,12 @@ Data.qpel = pParam->m_quarterpel; Data.chroma = current->global_flags & XVID_ME_COLOUR; + if ((current->global_flags & XVID_REDUCED)) + { + mb_width = (pParam->width + 31) / 32; + mb_height = (pParam->height + 31) / 32; + } + if((qimage = (uint8_t *) malloc(32 * pParam->edged_width)) == NULL) return 1; // allocate some mem for qpel interpolated blocks // somehow this is dirty since I think we shouldn't use malloc outside @@ -731,8 +745,8 @@ Data.RefQ = qimage; if (sadInit) (*sadInit) (); - for (y = 0; y < pParam->mb_height; y++) { - for (x = 0; x < pParam->mb_width; x++) { + for (y = 0; y < mb_height; y++) { + for (x = 0; x < mb_width; x++) { MACROBLOCK *pMB = &pMBs[x + y * pParam->mb_width]; pMB->sad16 @@ -902,7 +916,7 @@ get_pmvdata2(pMBs, pParam->mb_width, 0, x, y, 0, pmv, Data->temp); //has to be changed to get_pmv(2)() get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 16, - pParam->width, pParam->height, Data->iFcode, pParam->m_quarterpel); + pParam->width, pParam->height, Data->iFcode - pParam->m_quarterpel); Data->Cur = pCur->y + (x + y * Data->iEdgedWidth) * 16; Data->CurV = pCur->v + (x + y * (Data->iEdgedWidth/2)) * 8; @@ -1017,8 +1031,8 @@ if((pParam->m_quarterpel) && (MotionFlags & PMV_QUARTERPELREFINE16)) { Data->qpel_precision = 1; - get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 16, - pParam->width, pParam->height, Data->iFcode, 0); + get_range_qpel(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 16, + pParam->width, pParam->height, Data->iFcode); SubpelRefine(Data); } @@ -1120,7 +1134,7 @@ Data->qpel_precision = 0; get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 8, - pParam->width, pParam->height, OldData->iFcode, pParam->m_quarterpel); + pParam->width, pParam->height, OldData->iFcode - pParam->m_quarterpel); CheckCandidate = CheckCandidate8; if (MotionFlags & PMV_EXTSEARCH8) { @@ -1154,8 +1168,8 @@ if((!(Data->currentQMV->x & 1)) && (!(Data->currentQMV->y & 1)) && (MotionFlags & PMV_QUARTERPELREFINE8)) { Data->qpel_precision = 1; - get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 8, - pParam->width, pParam->height, OldData->iFcode, 0); + get_range_qpel(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 8, + pParam->width, pParam->height, OldData->iFcode); SubpelRefine(Data); } } @@ -1259,7 +1273,7 @@ Data->predMV = *predMV; get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 16, - pParam->width, pParam->height, iFcode, pParam->m_quarterpel); + pParam->width, pParam->height, iFcode - pParam->m_quarterpel); pmv[0] = Data->predMV; if (Data->qpel) { pmv[0].x /= 2; pmv[0].y /= 2; } @@ -1288,8 +1302,8 @@ Data->currentQMV->x = 2*Data->currentMV->x; Data->currentQMV->y = 2*Data->currentMV->y; Data->qpel_precision = 1; - get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 16, - pParam->width, pParam->height, iFcode, 0); + get_range_qpel(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 16, + pParam->width, pParam->height, iFcode); SubpelRefine(Data); } @@ -1368,7 +1382,7 @@ Data->min_dy *= 2; Data->referencemv = b_mb->qmvs; } else Data->referencemv = b_mb->mvs; - Data->qpel_precision = 0; // it'm a trick. it's 1 not 0, but we need 0 here + Data->qpel_precision = 0; // it's a trick. it's 1 not 0, but we need 0 here for (k = 0; k < 4; k++) { pMB->mvs[k].x = Data->directmvF[k].x = ((TRB * Data->referencemv[k].x) / TRD); @@ -1455,12 +1469,12 @@ SubpelRefine(Data); - *Data->iMinSAD += 1 * Data->lambda16; // one bit is needed to code direct mode +// *Data->iMinSAD += 1 * Data->lambda16; // one bit is needed to code direct mode *best_sad = *Data->iMinSAD; -// if (b_mb->mode == MODE_INTER4V) + if (b_mb->mode == MODE_INTER4V) pMB->mode = MODE_DIRECT; -// else pMB->mode = MODE_DIRECT_NO4V; //for faster compensation + else pMB->mode = MODE_DIRECT_NO4V; //for faster compensation pMB->pmvs[3] = *Data->currentMV; @@ -1535,25 +1549,25 @@ bData.RefV = fData->bRefV = b_RefV + (x + y * iEdgedWidth) * 16; bData.RefHV = fData->bRefHV = b_RefHV + (x + y * iEdgedWidth) * 16; bData.RefQ = fData->RefQ; - fData->qpel_precision = bData.qpel_precision = 0; + fData->qpel_precision = bData.qpel_precision = 0; bData.qpel = fData->qpel; bData.rounding = 0; bData.bpredMV = fData->predMV = *f_predMV; fData->bpredMV = bData.predMV = *b_predMV; fData->currentMV[0] = fData->currentMV[2]; - get_range(&fData->min_dx, &fData->max_dx, &fData->min_dy, &fData->max_dy, x, y, 16, pParam->width, pParam->height, fcode, pParam->m_quarterpel); - get_range(&bData.min_dx, &bData.max_dx, &bData.min_dy, &bData.max_dy, x, y, 16, pParam->width, pParam->height, bcode, pParam->m_quarterpel); + get_range(&fData->min_dx, &fData->max_dx, &fData->min_dy, &fData->max_dy, x, y, 16, pParam->width, pParam->height, fcode - pParam->m_quarterpel); + get_range(&bData.min_dx, &bData.max_dx, &bData.min_dy, &bData.max_dy, x, y, 16, pParam->width, pParam->height, bcode - pParam->m_quarterpel); if (fData->currentMV[0].x > fData->max_dx) fData->currentMV[0].x = fData->max_dx; - if (fData->currentMV[0].x < fData->min_dx) fData->currentMV[0].x = fData->min_dy; - if (fData->currentMV[0].y > fData->max_dy) fData->currentMV[0].y = fData->max_dx; - if (fData->currentMV[0].y > fData->min_dy) fData->currentMV[0].y = fData->min_dy; + if (fData->currentMV[0].x < fData->min_dx) fData->currentMV[0].x = fData->min_dx; + if (fData->currentMV[0].y > fData->max_dy) fData->currentMV[0].y = fData->max_dy; + if (fData->currentMV[0].y < fData->min_dy) fData->currentMV[0].y = fData->min_dy; if (fData->currentMV[1].x > bData.max_dx) fData->currentMV[1].x = bData.max_dx; - if (fData->currentMV[1].x < bData.min_dx) fData->currentMV[1].x = bData.min_dy; - if (fData->currentMV[1].y > bData.max_dy) fData->currentMV[1].y = bData.max_dx; - if (fData->currentMV[1].y > bData.min_dy) fData->currentMV[1].y = bData.min_dy; + if (fData->currentMV[1].x < bData.min_dx) fData->currentMV[1].x = bData.min_dx; + if (fData->currentMV[1].y > bData.max_dy) fData->currentMV[1].y = bData.max_dy; + if (fData->currentMV[1].y < bData.min_dy) fData->currentMV[1].y = bData.min_dy; CheckCandidateInt(fData->currentMV[0].x, fData->currentMV[0].y, 255, &iDirection, fData); @@ -1579,12 +1593,11 @@ } while (!(iDirection)); - *fData->iMinSAD += 2 * fData->lambda16; // two bits are needed to code interpolate mode. - if (fData->qpel) { + CheckCandidate = CheckCandidateInt; fData->qpel_precision = bData.qpel_precision = 1; - get_range(&fData->min_dx, &fData->max_dx, &fData->min_dy, &fData->max_dy, x, y, 16, pParam->width, pParam->height, fcode, 0); - get_range(&bData.min_dx, &bData.max_dx, &bData.min_dy, &bData.max_dy, x, y, 16, pParam->width, pParam->height, bcode, 0); + get_range_qpel(&fData->min_dx, &fData->max_dx, &fData->min_dy, &fData->max_dy, x, y, 16, pParam->width, pParam->height, fcode); + get_range_qpel(&bData.min_dx, &bData.max_dx, &bData.min_dy, &bData.max_dy, x, y, 16, pParam->width, pParam->height, bcode); fData->currentQMV[2].x = fData->currentQMV[0].x = 2 * fData->currentMV[0].x; fData->currentQMV[2].y = fData->currentQMV[0].y = 2 * fData->currentMV[0].y; fData->currentQMV[1].x = 2 * fData->currentMV[1].x; @@ -1593,6 +1606,8 @@ fData->currentQMV[2] = fData->currentQMV[0]; SubpelRefine(&bData); } + + *fData->iMinSAD += 2 * fData->lambda16; // two bits are needed to code interpolate mode. if (*fData->iMinSAD < *best_sad) { *best_sad = *fData->iMinSAD; @@ -1654,7 +1669,7 @@ Data.iEdgedWidth = pParam->edged_width; Data.currentMV = currentMV; Data.currentQMV = currentQMV; Data.iMinSAD = &iMinSAD; - Data.lambda16 = lambda_vec16[frame->quant]; + Data.lambda16 = lambda_vec16[frame->quant] + 2; Data.qpel = pParam->m_quarterpel; Data.rounding = 0; @@ -1751,7 +1766,6 @@ case MODE_DIRECT: case MODE_DIRECT_NO4V: d_count++; - break; default: break; } @@ -1783,7 +1797,7 @@ MainSearchFunc * MainSearchPtr; get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 16, - pParam->width, pParam->height, Data->iFcode, pParam->m_quarterpel); + pParam->width, pParam->height, Data->iFcode - pParam->m_quarterpel); Data->Cur = pCur->y + (x + y * Data->iEdgedWidth) * 16; Data->CurV = pCur->v + (x + y * (Data->iEdgedWidth/2)) * 8; @@ -1846,8 +1860,8 @@ } if((pParam->m_quarterpel) && (MotionFlags & PMV_QUARTERPELREFINE16)) { - get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 16, - pParam->width, pParam->height, Data->iFcode, 0); + get_range_qpel(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 16, + pParam->width, pParam->height, Data->iFcode); Data->qpel_precision = 1; SubpelRefine(Data); } @@ -2003,7 +2017,7 @@ else Data->predMV = get_pmv2(pMBs, pParam->mb_width, 0, x, y, 0); //else median get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 16, - pParam->width, pParam->height, Data->iFcode, pParam->m_quarterpel); + pParam->width, pParam->height, Data->iFcode - pParam->m_quarterpel); Data->Cur = pCur + (x + y * pParam->edged_width) * 16; Data->Ref = pRef + (x + y * pParam->edged_width) * 16; @@ -2037,7 +2051,7 @@ } #define INTRA_THRESH 1350 -#define INTER_THRESH 900 +#define INTER_THRESH 1200 int @@ -2048,6 +2062,9 @@ int intraCount, //number of non-I frames after last I frame; 0 if we force P/B frame int bCount) // number if B frames in a row { + int mb_width = pParam->mb_width; + int mb_height = pParam->mb_height; + uint32_t x, y, intra = 0; int sSAD = 0; MACROBLOCK * const pMBs = Current->mbs; @@ -2063,6 +2080,13 @@ Data.iFcode = Current->fcode; CheckCandidate = CheckCandidate16no4vI; + if ((Current->global_flags & XVID_REDUCED)) + { + mb_width = (pParam->width + 31) / 32; + mb_height = (pParam->height + 31) / 32; + } + + if (intraCount < 10) // we're right after an I frame IntraThresh += 4 * (intraCount - 10) * (intraCount - 10); else @@ -2070,13 +2094,13 @@ IntraThresh -= (IntraThresh * (maxIntra - 5*(maxIntra - intraCount)))/maxIntra; - InterThresh += 300 * (1 - bCount); + InterThresh += 400 * (1 - bCount); if (InterThresh < 200) InterThresh = 200; if (sadInit) (*sadInit) (); - for (y = 1; y < pParam->mb_height-1; y++) { - for (x = 1; x < pParam->mb_width-1; x++) { + for (y = 1; y < mb_height-1; y++) { + for (x = 1; x < mb_width-1; x++) { int sad, dev; MACROBLOCK *pMB = &pMBs[x + y * pParam->mb_width]; @@ -2088,13 +2112,13 @@ pParam->edged_width); if (dev + IntraThresh < sad) { pMB->mode = MODE_INTRA; - if (++intra > (pParam->mb_height-2)*(pParam->mb_width-2)/2) return 2; // I frame + if (++intra > (mb_height-2)*(mb_width-2)/2) return 2; // I frame } } sSAD += sad; } } - sSAD /= (pParam->mb_height-2)*(pParam->mb_width-2); + sSAD /= (mb_height-2)*(mb_width-2); if (sSAD > InterThresh ) return 1; //P frame emms(); return 0; // B frame @@ -2105,11 +2129,22 @@ FindFcode( const MBParam * const pParam, const FRAMEINFO * const current) { + int mb_width = pParam->mb_width; + int mb_height = pParam->mb_height; + uint32_t x, y; int max = 0, min = 0, i; - for (y = 0; y < pParam->mb_height; y++) { - for (x = 0; x < pParam->mb_width; x++) { + + if ((current->global_flags & XVID_REDUCED)) + { + mb_width = (pParam->width + 31) / 32; + mb_height = (pParam->height + 31) / 32; + } + + + for (y = 0; y < mb_height; y++) { + for (x = 0; x < mb_width; x++) { MACROBLOCK *pMB = ¤t->mbs[x + y * pParam->mb_width]; for(i = 0; i < (pMB->mode == MODE_INTER4V ? 4:1); i++) {