--- trunk/xvidcore/src/motion/estimation_rd_based.c 2005/12/09 04:45:35 1660 +++ trunk/xvidcore/src/motion/estimation_rd_based.c 2010/11/28 15:19:07 1909 @@ -4,7 +4,7 @@ * - Rate-Distortion Based Motion Estimation for P- and S- VOPs - * * Copyright(C) 2003 Radoslaw Czyz - * 2003 Michael Militzer + * 2003-2010 Michael Militzer * * This program is free software ; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: estimation_rd_based.c,v 1.14 2005-12-09 04:45:35 syskin Exp $ + * $Id: estimation_rd_based.c,v 1.15 2010-11-28 15:18:21 Isibaar Exp $ * ****************************************************************************/ @@ -49,7 +49,7 @@ #define LAMBDA ( (int)(BITS_MULT*1.0) ) static __inline unsigned int -Block_CalcBits( int16_t * const coeff, +Block_CalcBits( int16_t * const coeff, int16_t * const data, int16_t * const dqcoeff, const uint32_t quant, const int quant_type, @@ -58,7 +58,9 @@ const uint16_t * scan_table, const unsigned int lambda, const uint16_t * mpeg_quant_matrices, - const unsigned int quant_sq) + const unsigned int quant_sq, + const unsigned int rel_var8, + const unsigned int metric) { int sum; int bits; @@ -76,7 +78,9 @@ if (quant_type) dequant_h263_inter(dqcoeff, coeff, quant, mpeg_quant_matrices); else dequant_mpeg_inter(dqcoeff, coeff, quant, mpeg_quant_matrices); - distortion = sse8_16bit(data, dqcoeff, 8*sizeof(int16_t)); + if (metric) distortion = masked_sseh8_16bit(data, dqcoeff, rel_var8); + else distortion = sse8_16bit(data, dqcoeff, 8*sizeof(int16_t)); + } else { const static int16_t zero_block[64] = { @@ -90,9 +94,11 @@ 0, 0, 0, 0, 0, 0, 0, 0, }; bits = 0; - distortion = sse8_16bit(data, zero_block, 8*sizeof(int16_t)); - } + if (metric) distortion = masked_sseh8_16bit(data, (int16_t * const) zero_block, rel_var8); + else distortion = sse8_16bit(data, (int16_t * const) zero_block, 8*sizeof(int16_t)); + + } return bits + (lambda*distortion)/quant_sq; } @@ -113,7 +119,8 @@ unsigned int cbp[2], unsigned int lambda, const uint16_t * mpeg_quant_matrices, - const unsigned int quant_sq) + const unsigned int quant_sq, + const unsigned int metric) { int direction; int16_t *pCurrent; @@ -171,7 +178,8 @@ bits[1] += coded = CodeCoeffIntra_CalcBits(qcoeff, scan_tables[direction]); if (coded > 0) cbp[1] |= 1 << (5 - block); - distortion = sse8_16bit(coeff, dqcoeff, 8*sizeof(int16_t)); + if (metric) distortion = masked_sseh8_16bit(coeff, dqcoeff, pMB->rel_var8[block]); + else distortion = sse8_16bit(coeff, dqcoeff, 8*sizeof(int16_t)); return (lambda*distortion)/quant_sq; } @@ -208,7 +216,7 @@ transfer_8to16subro(in, data->Cur + s, ptr + s, data->iEdgedWidth); rd += data->temp[i] = Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, i, data->scan_table, data->lambda[i], - data->mpeg_quant_matrices, data->quant_sq); + data->mpeg_quant_matrices, data->quant_sq, data->rel_var8[i], data->metric); } rd += t = BITS_MULT * (d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision) - 2); @@ -235,7 +243,7 @@ transfer_8to16subro(in, data->CurU, ptr, data->iEdgedWidth/2); rd += Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, 4, data->scan_table, data->lambda[4], - data->mpeg_quant_matrices, data->quant_sq); + data->mpeg_quant_matrices, data->quant_sq, data->rel_var8[4], data->metric); if (rd >= data->iMinSAD[0]) return; /* chroma V */ @@ -243,7 +251,7 @@ transfer_8to16subro(in, data->CurV, ptr, data->iEdgedWidth/2); rd += Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, 5, data->scan_table, data->lambda[5], - data->mpeg_quant_matrices, data->quant_sq); + data->mpeg_quant_matrices, data->quant_sq, data->rel_var8[5], data->metric); rd += BITS_MULT * (mcbpc_inter_tab[(MODE_INTER & 7) | ((cbp & 3) << 3)].len - 1); /* one was added before */ @@ -279,7 +287,7 @@ transfer_8to16subro(in, data->Cur, ptr, data->iEdgedWidth); rd = Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, 5, data->scan_table, data->lambda[0], - data->mpeg_quant_matrices, data->quant_sq); + data->mpeg_quant_matrices, data->quant_sq, data->rel_var8[0], data->metric); /* we took 2 bits into account before */ rd += BITS_MULT * (d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision) - 2); @@ -295,9 +303,9 @@ static int findRD_inter(SearchData * const Data, - const int x, const int y, - const MBParam * const pParam, - const uint32_t MotionFlags) + const int x, const int y, + const MBParam * const pParam, + const uint32_t MotionFlags) { int i; int32_t bsad[5]; @@ -389,6 +397,7 @@ Data8->RefP[3] = Data->RefP[3] + 8*((i&1) + (i>>1)*Data->iEdgedWidth); *Data8->cbp = (Data->cbp[1] & (1<<(5-i))) ? 1:0; /* copy corresponding cbp bit */ Data8->lambda[0] = Data->lambda[i]; + Data8->rel_var8[0] = Data->rel_var8[i]; if(Data->qpel) { Data8->predMV = get_qpmv2(pMBs, pParam->mb_width, 0, x, y, i); @@ -508,7 +517,7 @@ ptr = interpolate8x8_switch2(Data->RefQ + 64, Data->RefP[4], 0, 0, sumx, sumy, Data->iEdgedWidth/2, Data->rounding); transfer_8to16subro(in, Data->CurU, ptr, Data->iEdgedWidth/2); bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 4, - Data->scan_table, Data->lambda[4], Data->mpeg_quant_matrices, Data->quant_sq); + Data->scan_table, Data->lambda[4], Data->mpeg_quant_matrices, Data->quant_sq, Data->rel_var8[4], Data->metric); if (bits >= *Data->iMinSAD) return bits; @@ -516,7 +525,7 @@ ptr = interpolate8x8_switch2(Data->RefQ + 64, Data->RefP[5], 0, 0, sumx, sumy, Data->iEdgedWidth/2, Data->rounding); transfer_8to16subro(in, Data->CurV, ptr, Data->iEdgedWidth/2); bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 5, - Data->scan_table, Data->lambda[5], Data->mpeg_quant_matrices, Data->quant_sq); + Data->scan_table, Data->lambda[5], Data->mpeg_quant_matrices, Data->quant_sq, Data->rel_var8[5], Data->metric); bits += BITS_MULT*(mcbpc_inter_tab[(MODE_INTER4V & 7) | ((cbp & 3) << 3)].len - 3); /* 3 were added before */ @@ -544,7 +553,7 @@ distortion = Block_CalcBitsIntra(pMB, x, y, mb_width, i, in, coeff, dqcoeff, predictors[i], iQuant, Data->quant_type, bits, cbp, - Data->lambda[i], Data->mpeg_quant_matrices, Data->quant_sq); + Data->lambda[i], Data->mpeg_quant_matrices, Data->quant_sq, Data->metric); bits1 += distortion + BITS_MULT * bits[0]; bits2 += distortion + BITS_MULT * bits[1]; @@ -559,7 +568,7 @@ transfer_8to16copy(in, Data->CurU, Data->iEdgedWidth/2); distortion = Block_CalcBitsIntra(pMB, x, y, mb_width, 4, in, coeff, dqcoeff, predictors[4], iQuant, Data->quant_type, bits, cbp, - Data->lambda[4], Data->mpeg_quant_matrices, Data->quant_sq); + Data->lambda[4], Data->mpeg_quant_matrices, Data->quant_sq, Data->metric); bits1 += distortion + BITS_MULT * bits[0]; bits2 += distortion + BITS_MULT * bits[1]; @@ -570,7 +579,7 @@ transfer_8to16copy(in, Data->CurV, Data->iEdgedWidth/2); distortion = Block_CalcBitsIntra(pMB, x, y, mb_width, 5, in, coeff, dqcoeff, predictors[5], iQuant, Data->quant_type, bits, cbp, - Data->lambda[5], Data->mpeg_quant_matrices, Data->quant_sq); + Data->lambda[5], Data->mpeg_quant_matrices, Data->quant_sq, Data->metric); bits1 += distortion + BITS_MULT * bits[0]; bits2 += distortion + BITS_MULT * bits[1]; @@ -596,7 +605,8 @@ int s = 8*((i&1) + (i>>1)*Data->iEdgedWidth); transfer_8to16subro(in, Data->Cur + s, vGMC->y + s + 16*(x+y*Data->iEdgedWidth), Data->iEdgedWidth); bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, i, - Data->scan_table, Data->lambda[i], Data->mpeg_quant_matrices, Data->quant_sq); + Data->scan_table, Data->lambda[i], Data->mpeg_quant_matrices, + Data->quant_sq, Data->rel_var8[i], Data->metric); if (bits >= Data->iMinSAD[0]) return bits; } @@ -605,14 +615,16 @@ /*chroma U */ transfer_8to16subro(in, Data->CurU, vGMC->u + 8*(x+y*(Data->iEdgedWidth/2)), Data->iEdgedWidth/2); bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 4, - Data->scan_table, Data->lambda[4], Data->mpeg_quant_matrices, Data->quant_sq); + Data->scan_table, Data->lambda[4], Data->mpeg_quant_matrices, + Data->quant_sq, Data->rel_var8[4], Data->metric); if (bits >= Data->iMinSAD[0]) return bits; /* chroma V */ transfer_8to16subro(in, Data->CurV , vGMC->v + 8*(x+y*(Data->iEdgedWidth/2)), Data->iEdgedWidth/2); bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 5, - Data->scan_table, Data->lambda[5], Data->mpeg_quant_matrices, Data->quant_sq); + Data->scan_table, Data->lambda[5], Data->mpeg_quant_matrices, + Data->quant_sq, Data->rel_var8[5], Data->metric); bits += BITS_MULT * (mcbpc_inter_tab[(MODE_INTER & 7) | ((cbp & 3) << 3)].len - 1); @@ -645,6 +657,7 @@ Data->quant_sq = iQuant*iQuant; Data->scan_table = VopFlags & XVID_VOP_ALTERNATESCAN ? scan_tables[2] : scan_tables[0]; + Data->metric = !!(VopFlags & XVID_VOP_RD_PSNRHVSM); pMB->mcsel = 0; @@ -656,6 +669,7 @@ for (i = 0; i < 6; i++) { Data->lambda[i] = (LAMBDA*pMB->lambda[i])>>LAMBDA_EXP; + Data->rel_var8[i] = pMB->rel_var8[i]; } min_rd = findRD_inter(Data, x, y, pParam, MotionFlags); @@ -755,6 +769,7 @@ int top = 0, top_right = 0, left = 0; Data->scan_table = VopFlags & XVID_VOP_ALTERNATESCAN ? scan_tables[2] : scan_tables[0]; + Data->metric = !!(VopFlags & XVID_VOP_RD_PSNRHVSM); pMB->mcsel = 0; Data->iQuant = iQuant; @@ -762,6 +777,7 @@ for (i = 0; i < 6; i++) { Data->lambda[i] = (LAMBDA*pMB->lambda[i])>>LAMBDA_EXP; + Data->rel_var8[i] = pMB->rel_var8[i]; } /* INTER <-> INTER4V decision */