--- trunk/xvidcore/src/motion/estimation_rd_based.c 2004/12/05 04:53:01 1564 +++ trunk/xvidcore/src/motion/estimation_rd_based.c 2004/12/18 12:06:43 1579 @@ -20,7 +20,7 @@ * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: estimation_rd_based.c,v 1.9 2004-12-05 04:53:01 syskin Exp $ + * $Id: estimation_rd_based.c,v 1.12 2004-12-18 12:06:43 syskin Exp $ * ****************************************************************************/ @@ -57,7 +57,8 @@ const int block, const uint16_t * scan_table, const unsigned int lambda, - const uint16_t * mpeg_quant_matrices) + const uint16_t * mpeg_quant_matrices, + const unsigned int quant_sq) { int sum; int bits; @@ -93,7 +94,7 @@ } - return bits + (lambda*distortion)/(quant*quant); + return bits + (lambda*distortion)/quant_sq; } static __inline unsigned int @@ -111,7 +112,8 @@ unsigned int bits[2], unsigned int cbp[2], unsigned int lambda, - const uint16_t * mpeg_quant_matrices) + const uint16_t * mpeg_quant_matrices, + const unsigned int quant_sq) { int direction; int16_t *pCurrent; @@ -146,8 +148,8 @@ /* dc prediction */ qcoeff[0] = qcoeff[0] - predictors[0]; - if (block < 4) bits[1] = bits[0] = dcy_tab[qcoeff[0] + 255].len; - else bits[1] = bits[0] = dcc_tab[qcoeff[0] + 255].len; + if (block < 4) bits[1] = bits[0] = dcy_tab[qcoeff[0] + 255].len - 3; /* 3 bits added before (4 times) */ + else bits[1] = bits[0] = dcc_tab[qcoeff[0] + 255].len - 2; /* 2 bits added before (2 times)*/ /* calc cost before ac prediction */ bits[0] += coded = CodeCoeffIntra_CalcBits(qcoeff, scan_tables[0]); @@ -171,7 +173,7 @@ distortion = sse8_16bit(coeff, dqcoeff, 8*sizeof(int16_t)); - return (lambda*distortion)/(quant*quant); + return (lambda*distortion)/quant_sq; } @@ -181,7 +183,8 @@ { int16_t *in = data->dctSpace, *coeff = data->dctSpace + 64; - int32_t rd = 0; + /* minimum nuber of bits INTER can take is 1 (mcbpc) + 2 (cby) + 2 (vector) */ + int32_t rd = BITS_MULT * (1+2+2); VECTOR * current; const uint8_t * ptr; int i, t, xc, yc; @@ -203,10 +206,12 @@ for(i = 0; i < 4; i++) { int s = 8*((i&1) + (i>>1)*data->iEdgedWidth); transfer_8to16subro(in, data->Cur + s, ptr + s, data->iEdgedWidth); - rd += data->temp[i] = Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, i, data->scan_table, data->lambda[i], data->mpeg_quant_matrices); + rd += data->temp[i] = Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, + data->quant_type, &cbp, i, data->scan_table, data->lambda[i], + data->mpeg_quant_matrices, data->quant_sq); } - rd += t = BITS_MULT*d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision); + rd += t = BITS_MULT * (d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision) - 2); if (data->temp[0] + t < data->iMinSAD[1]) { data->iMinSAD[1] = data->temp[0] + t; current[1].x = x; current[1].y = y; data->cbp[1] = (data->cbp[1]&~32) | (cbp&32); } @@ -217,7 +222,7 @@ if (data->temp[3] < data->iMinSAD[4]) { data->iMinSAD[4] = data->temp[3]; current[4].x = x; current[4].y = y; data->cbp[1] = (data->cbp[1]&~4) | (cbp&4); } - rd += BITS_MULT*xvid_cbpy_tab[15-(cbp>>2)].len; + rd += BITS_MULT * (xvid_cbpy_tab[15-(cbp>>2)].len - 2); if (rd >= data->iMinSAD[0]) return; @@ -228,15 +233,19 @@ /* chroma U */ ptr = interpolate8x8_switch2(data->RefQ, data->RefP[4], 0, 0, xc, yc, data->iEdgedWidth/2, data->rounding); transfer_8to16subro(in, data->CurU, ptr, data->iEdgedWidth/2); - rd += Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, 4, data->scan_table, data->lambda[4], data->mpeg_quant_matrices); + rd += Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, + &cbp, 4, data->scan_table, data->lambda[4], + data->mpeg_quant_matrices, data->quant_sq); if (rd >= data->iMinSAD[0]) return; /* chroma V */ ptr = interpolate8x8_switch2(data->RefQ, data->RefP[5], 0, 0, xc, yc, data->iEdgedWidth/2, data->rounding); transfer_8to16subro(in, data->CurV, ptr, data->iEdgedWidth/2); - rd += Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, 5, data->scan_table, data->lambda[5], data->mpeg_quant_matrices); + rd += Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, + &cbp, 5, data->scan_table, data->lambda[5], + data->mpeg_quant_matrices, data->quant_sq); - rd += BITS_MULT*mcbpc_inter_tab[(MODE_INTER & 7) | ((cbp & 3) << 3)].len; + rd += BITS_MULT * (mcbpc_inter_tab[(MODE_INTER & 7) | ((cbp & 3) << 3)].len - 1); /* one was added before */ if (rd < data->iMinSAD[0]) { data->iMinSAD[0] = rd; @@ -268,8 +277,12 @@ } transfer_8to16subro(in, data->Cur, ptr, data->iEdgedWidth); - rd = Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, 5, data->scan_table, data->lambda[0], data->mpeg_quant_matrices); - rd += BITS_MULT*d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision); + rd = Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, + &cbp, 5, data->scan_table, data->lambda[0], + data->mpeg_quant_matrices, data->quant_sq); + /* we took 2 bits into account before */ + rd += BITS_MULT * (d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision) - 2); + if (rd < data->iMinSAD[0]) { *data->cbp = cbp; @@ -353,7 +366,10 @@ const VECTOR * const backup) { - unsigned int cbp = 0, bits = 0, t = 0, i; + unsigned int cbp = 0, t = 0, i; + + /* minimum number of bits INTER4V can take is 2 (cbpy) + 3 (mcbpc) + 4*2 (vectors)*/ + int bits = (2+3+4*2)*BITS_MULT; SearchData Data2, *Data8 = &Data2; int sumx = 0, sumy = 0; int16_t *in = Data->dctSpace, *coeff = Data->dctSpace + 64; @@ -377,17 +393,17 @@ if(Data->qpel) { Data8->predMV = get_qpmv2(pMBs, pParam->mb_width, 0, x, y, i); if (i != 0) t = d_mv_bits( Data8->currentQMV->x, Data8->currentQMV->y, - Data8->predMV, Data8->iFcode, 0); + Data8->predMV, Data8->iFcode, 0) - 2; } else { Data8->predMV = get_pmv2(pMBs, pParam->mb_width, 0, x, y, i); if (i != 0) t = d_mv_bits( Data8->currentMV->x, Data8->currentMV->y, - Data8->predMV, Data8->iFcode, 0); + Data8->predMV, Data8->iFcode, 0) - 2; } get_range(&Data8->min_dx, &Data8->max_dx, &Data8->min_dy, &Data8->max_dy, 2*x + (i&1), 2*y + (i>>1), 3, pParam->width, pParam->height, Data8->iFcode, Data8->qpel+1); - *Data8->iMinSAD += BITS_MULT*t; + *Data8->iMinSAD += BITS_MULT * t; Data8->qpel_precision = Data8->qpel; /* checking the vector which has been found by SAD-based 8x8 search (if it's different than the one found so far) */ @@ -482,7 +498,7 @@ } /* end - for all luma blocks */ - bits += BITS_MULT*xvid_cbpy_tab[15-(cbp>>2)].len; + bits += BITS_MULT * (xvid_cbpy_tab[15-(cbp>>2)].len - 2); /* 2 were added before */ /* let's check chroma */ sumx = (sumx >> 3) + roundtab_76[sumx & 0xf]; @@ -491,16 +507,18 @@ /* chroma U */ ptr = interpolate8x8_switch2(Data->RefQ + 64, Data->RefP[4], 0, 0, sumx, sumy, Data->iEdgedWidth/2, Data->rounding); transfer_8to16subro(in, Data->CurU, ptr, Data->iEdgedWidth/2); - bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 4, Data->scan_table, Data->lambda[4], Data->mpeg_quant_matrices); + bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 4, + Data->scan_table, Data->lambda[4], Data->mpeg_quant_matrices, Data->quant_sq); if (bits >= *Data->iMinSAD) return bits; /* chroma V */ ptr = interpolate8x8_switch2(Data->RefQ + 64, Data->RefP[5], 0, 0, sumx, sumy, Data->iEdgedWidth/2, Data->rounding); transfer_8to16subro(in, Data->CurV, ptr, Data->iEdgedWidth/2); - bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 5, Data->scan_table, Data->lambda[5], Data->mpeg_quant_matrices); + bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 5, + Data->scan_table, Data->lambda[5], Data->mpeg_quant_matrices, Data->quant_sq); - bits += BITS_MULT*mcbpc_inter_tab[(MODE_INTER4V & 7) | ((cbp & 3) << 3)].len; + bits += BITS_MULT*(mcbpc_inter_tab[(MODE_INTER4V & 7) | ((cbp & 3) << 3)].len - 3); /* 3 were added before */ *Data->cbp = cbp; return bits; @@ -511,7 +529,8 @@ const int x, const int y, const int mb_width) { unsigned int cbp[2] = {0, 0}, bits[2], i; - unsigned int bits1 = BITS_MULT*1, bits2 = BITS_MULT*1; /* this one is ac/dc prediction flag bit */ + /* minimum number of bits that WILL be coded in intra - mcbpc 5, cby 2 acdc flag - 1 and DC coeffs - 4*3+2*2 */ + int bits1 = BITS_MULT*(5+2+1+4*3+2*2), bits2 = BITS_MULT*(5+2+1+4*3+2*2); unsigned int distortion = 0; int16_t *in = Data->dctSpace, * coeff = Data->dctSpace + 64, * dqcoeff = Data->dctSpace + 128; @@ -524,7 +543,8 @@ distortion = Block_CalcBitsIntra(pMB, x, y, mb_width, i, in, coeff, dqcoeff, - predictors[i], iQuant, Data->quant_type, bits, cbp, Data->lambda[i], Data->mpeg_quant_matrices); + predictors[i], iQuant, Data->quant_type, bits, cbp, + Data->lambda[i], Data->mpeg_quant_matrices, Data->quant_sq); bits1 += distortion + BITS_MULT * bits[0]; bits2 += distortion + BITS_MULT * bits[1]; @@ -532,13 +552,14 @@ return bits1; } - bits1 += BITS_MULT*xvid_cbpy_tab[cbp[0]>>2].len; - bits2 += BITS_MULT*xvid_cbpy_tab[cbp[1]>>2].len; + bits1 += BITS_MULT * (xvid_cbpy_tab[cbp[0]>>2].len - 2); /* two bits were added before */ + bits2 += BITS_MULT * (xvid_cbpy_tab[cbp[1]>>2].len - 2); /*chroma U */ transfer_8to16copy(in, Data->CurU, Data->iEdgedWidth/2); distortion = Block_CalcBitsIntra(pMB, x, y, mb_width, 4, in, coeff, dqcoeff, - predictors[4], iQuant, Data->quant_type, bits, cbp, Data->lambda[4], Data->mpeg_quant_matrices); + predictors[4], iQuant, Data->quant_type, bits, cbp, + Data->lambda[4], Data->mpeg_quant_matrices, Data->quant_sq); bits1 += distortion + BITS_MULT * bits[0]; bits2 += distortion + BITS_MULT * bits[1]; @@ -548,13 +569,14 @@ /* chroma V */ transfer_8to16copy(in, Data->CurV, Data->iEdgedWidth/2); distortion = Block_CalcBitsIntra(pMB, x, y, mb_width, 5, in, coeff, dqcoeff, - predictors[5], iQuant, Data->quant_type, bits, cbp, Data->lambda[5], Data->mpeg_quant_matrices); + predictors[5], iQuant, Data->quant_type, bits, cbp, + Data->lambda[5], Data->mpeg_quant_matrices, Data->quant_sq); bits1 += distortion + BITS_MULT * bits[0]; bits2 += distortion + BITS_MULT * bits[1]; - bits1 += BITS_MULT*mcbpc_inter_tab[(MODE_INTRA & 7) | ((cbp[0] & 3) << 3)].len; - bits2 += BITS_MULT*mcbpc_inter_tab[(MODE_INTRA & 7) | ((cbp[1] & 3) << 3)].len; + bits1 += BITS_MULT * (mcbpc_inter_tab[(MODE_INTRA & 7) | ((cbp[0] & 3) << 3)].len - 5); /* 5 bits were added before */ + bits2 += BITS_MULT * (mcbpc_inter_tab[(MODE_INTRA & 7) | ((cbp[1] & 3) << 3)].len - 5); *Data->cbp = bits1 <= bits2 ? cbp[0] : cbp[1]; @@ -565,30 +587,34 @@ static int findRD_gmc(SearchData * const Data, const IMAGE * const vGMC, const int x, const int y) { - int bits = BITS_MULT*1; /* this one is mcsel */ + /* minimum nubler of bits - 1 (mcbpc) + 2 (cby) + 1 (mcsel) */ + int bits = BITS_MULT * (1+2+1); unsigned int cbp = 0, i; int16_t *in = Data->dctSpace, * coeff = Data->dctSpace + 64; for(i = 0; i < 4; i++) { int s = 8*((i&1) + (i>>1)*Data->iEdgedWidth); transfer_8to16subro(in, Data->Cur + s, vGMC->y + s + 16*(x+y*Data->iEdgedWidth), Data->iEdgedWidth); - bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, i, Data->scan_table, Data->lambda[i], Data->mpeg_quant_matrices); + bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, i, + Data->scan_table, Data->lambda[i], Data->mpeg_quant_matrices, Data->quant_sq); if (bits >= Data->iMinSAD[0]) return bits; } - bits += BITS_MULT*xvid_cbpy_tab[15-(cbp>>2)].len; + bits += BITS_MULT * (xvid_cbpy_tab[15-(cbp>>2)].len - 2); /*chroma U */ transfer_8to16subro(in, Data->CurU, vGMC->u + 8*(x+y*(Data->iEdgedWidth/2)), Data->iEdgedWidth/2); - bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 4, Data->scan_table, Data->lambda[4], Data->mpeg_quant_matrices); + bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 4, + Data->scan_table, Data->lambda[4], Data->mpeg_quant_matrices, Data->quant_sq); if (bits >= Data->iMinSAD[0]) return bits; /* chroma V */ transfer_8to16subro(in, Data->CurV , vGMC->v + 8*(x+y*(Data->iEdgedWidth/2)), Data->iEdgedWidth/2); - bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 5, Data->scan_table, Data->lambda[5], Data->mpeg_quant_matrices); + bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 5, + Data->scan_table, Data->lambda[5], Data->mpeg_quant_matrices, Data->quant_sq); - bits += BITS_MULT*mcbpc_inter_tab[(MODE_INTER & 7) | ((cbp & 3) << 3)].len; + bits += BITS_MULT * (mcbpc_inter_tab[(MODE_INTER & 7) | ((cbp & 3) << 3)].len - 1); *Data->cbp = cbp; @@ -613,10 +639,10 @@ int mcsel = 0; int inter4v = (VopFlags & XVID_VOP_INTER4V) && (pMB->dquant == 0); const uint32_t iQuant = pMB->quant; - int min_rd, intra_rd, i, cbp; VECTOR backup[5], *v; Data->iQuant = iQuant; + Data->quant_sq = iQuant*iQuant; Data->scan_table = VopFlags & XVID_VOP_ALTERNATESCAN ? scan_tables[2] : scan_tables[0]; @@ -657,12 +683,15 @@ cbp = *Data->cbp; } } - - intra_rd = findRD_intra(Data, pMB, x, y, pParam->mb_width); - if (intra_rd < min_rd) { - *Data->iMinSAD = min_rd = intra_rd; - mode = MODE_INTRA; - cbp = *Data->cbp; + + /* there is no way for INTRA to take less than 24 bits - go to findRD_intra() for calculations */ + if (min_rd > 24*BITS_MULT) { + intra_rd = findRD_intra(Data, pMB, x, y, pParam->mb_width); + if (intra_rd < min_rd) { + *Data->iMinSAD = min_rd = intra_rd; + mode = MODE_INTRA; + cbp = *Data->cbp; + } } pMB->sad16 = pMB->sad8[0] = pMB->sad8[1] = pMB->sad8[2] = pMB->sad8[3] = 0; @@ -729,6 +758,8 @@ scan_tables[2] : scan_tables[0]; pMB->mcsel = 0; + Data->iQuant = iQuant; + Data->quant_sq = iQuant*iQuant; for (i = 0; i < 6; i++) { /* HVS models, anyone ? */