4 |
* - Rate-Distortion Based Motion Estimation for P- and S- VOPs - |
* - Rate-Distortion Based Motion Estimation for P- and S- VOPs - |
5 |
* |
* |
6 |
* Copyright(C) 2003 Radoslaw Czyz <xvid@syskin.cjb.net> |
* Copyright(C) 2003 Radoslaw Czyz <xvid@syskin.cjb.net> |
7 |
* 2003 Michael Militzer <michael@xvid.org> |
* 2003-2010 Michael Militzer <michael@xvid.org> |
8 |
* |
* |
9 |
* This program is free software ; you can redistribute it and/or modify |
* This program is free software ; you can redistribute it and/or modify |
10 |
* it under the terms of the GNU General Public License as published by |
* it under the terms of the GNU General Public License as published by |
20 |
* along with this program ; if not, write to the Free Software |
* along with this program ; if not, write to the Free Software |
21 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
22 |
* |
* |
23 |
* $Id: estimation_rd_based.c,v 1.14 2005-12-09 04:45:35 syskin Exp $ |
* $Id: estimation_rd_based.c,v 1.15 2010-11-28 15:18:21 Isibaar Exp $ |
24 |
* |
* |
25 |
****************************************************************************/ |
****************************************************************************/ |
26 |
|
|
58 |
const uint16_t * scan_table, |
const uint16_t * scan_table, |
59 |
const unsigned int lambda, |
const unsigned int lambda, |
60 |
const uint16_t * mpeg_quant_matrices, |
const uint16_t * mpeg_quant_matrices, |
61 |
const unsigned int quant_sq) |
const unsigned int quant_sq, |
62 |
|
const unsigned int rel_var8, |
63 |
|
const unsigned int metric) |
64 |
{ |
{ |
65 |
int sum; |
int sum; |
66 |
int bits; |
int bits; |
78 |
if (quant_type) dequant_h263_inter(dqcoeff, coeff, quant, mpeg_quant_matrices); |
if (quant_type) dequant_h263_inter(dqcoeff, coeff, quant, mpeg_quant_matrices); |
79 |
else dequant_mpeg_inter(dqcoeff, coeff, quant, mpeg_quant_matrices); |
else dequant_mpeg_inter(dqcoeff, coeff, quant, mpeg_quant_matrices); |
80 |
|
|
81 |
distortion = sse8_16bit(data, dqcoeff, 8*sizeof(int16_t)); |
if (metric) distortion = masked_sseh8_16bit(data, dqcoeff, rel_var8); |
82 |
|
else distortion = sse8_16bit(data, dqcoeff, 8*sizeof(int16_t)); |
83 |
|
|
84 |
} else { |
} else { |
85 |
const static int16_t zero_block[64] = |
const static int16_t zero_block[64] = |
86 |
{ |
{ |
94 |
0, 0, 0, 0, 0, 0, 0, 0, |
0, 0, 0, 0, 0, 0, 0, 0, |
95 |
}; |
}; |
96 |
bits = 0; |
bits = 0; |
|
distortion = sse8_16bit(data, zero_block, 8*sizeof(int16_t)); |
|
|
} |
|
97 |
|
|
98 |
|
if (metric) distortion = masked_sseh8_16bit(data, (int16_t * const) zero_block, rel_var8); |
99 |
|
else distortion = sse8_16bit(data, (int16_t * const) zero_block, 8*sizeof(int16_t)); |
100 |
|
|
101 |
|
} |
102 |
|
|
103 |
return bits + (lambda*distortion)/quant_sq; |
return bits + (lambda*distortion)/quant_sq; |
104 |
} |
} |
119 |
unsigned int cbp[2], |
unsigned int cbp[2], |
120 |
unsigned int lambda, |
unsigned int lambda, |
121 |
const uint16_t * mpeg_quant_matrices, |
const uint16_t * mpeg_quant_matrices, |
122 |
const unsigned int quant_sq) |
const unsigned int quant_sq, |
123 |
|
const unsigned int metric) |
124 |
{ |
{ |
125 |
int direction; |
int direction; |
126 |
int16_t *pCurrent; |
int16_t *pCurrent; |
178 |
bits[1] += coded = CodeCoeffIntra_CalcBits(qcoeff, scan_tables[direction]); |
bits[1] += coded = CodeCoeffIntra_CalcBits(qcoeff, scan_tables[direction]); |
179 |
if (coded > 0) cbp[1] |= 1 << (5 - block); |
if (coded > 0) cbp[1] |= 1 << (5 - block); |
180 |
|
|
181 |
distortion = sse8_16bit(coeff, dqcoeff, 8*sizeof(int16_t)); |
if (metric) distortion = masked_sseh8_16bit(coeff, dqcoeff, pMB->rel_var8[block]); |
182 |
|
else distortion = sse8_16bit(coeff, dqcoeff, 8*sizeof(int16_t)); |
183 |
|
|
184 |
return (lambda*distortion)/quant_sq; |
return (lambda*distortion)/quant_sq; |
185 |
} |
} |
216 |
transfer_8to16subro(in, data->Cur + s, ptr + s, data->iEdgedWidth); |
transfer_8to16subro(in, data->Cur + s, ptr + s, data->iEdgedWidth); |
217 |
rd += data->temp[i] = Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, |
rd += data->temp[i] = Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, |
218 |
data->quant_type, &cbp, i, data->scan_table, data->lambda[i], |
data->quant_type, &cbp, i, data->scan_table, data->lambda[i], |
219 |
data->mpeg_quant_matrices, data->quant_sq); |
data->mpeg_quant_matrices, data->quant_sq, data->rel_var8[i], data->metric); |
220 |
} |
} |
221 |
|
|
222 |
rd += t = BITS_MULT * (d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision) - 2); |
rd += t = BITS_MULT * (d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision) - 2); |
243 |
transfer_8to16subro(in, data->CurU, ptr, data->iEdgedWidth/2); |
transfer_8to16subro(in, data->CurU, ptr, data->iEdgedWidth/2); |
244 |
rd += Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, |
rd += Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, |
245 |
&cbp, 4, data->scan_table, data->lambda[4], |
&cbp, 4, data->scan_table, data->lambda[4], |
246 |
data->mpeg_quant_matrices, data->quant_sq); |
data->mpeg_quant_matrices, data->quant_sq, data->rel_var8[4], data->metric); |
247 |
if (rd >= data->iMinSAD[0]) return; |
if (rd >= data->iMinSAD[0]) return; |
248 |
|
|
249 |
/* chroma V */ |
/* chroma V */ |
251 |
transfer_8to16subro(in, data->CurV, ptr, data->iEdgedWidth/2); |
transfer_8to16subro(in, data->CurV, ptr, data->iEdgedWidth/2); |
252 |
rd += Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, |
rd += Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, |
253 |
&cbp, 5, data->scan_table, data->lambda[5], |
&cbp, 5, data->scan_table, data->lambda[5], |
254 |
data->mpeg_quant_matrices, data->quant_sq); |
data->mpeg_quant_matrices, data->quant_sq, data->rel_var8[5], data->metric); |
255 |
|
|
256 |
rd += BITS_MULT * (mcbpc_inter_tab[(MODE_INTER & 7) | ((cbp & 3) << 3)].len - 1); /* one was added before */ |
rd += BITS_MULT * (mcbpc_inter_tab[(MODE_INTER & 7) | ((cbp & 3) << 3)].len - 1); /* one was added before */ |
257 |
|
|
287 |
transfer_8to16subro(in, data->Cur, ptr, data->iEdgedWidth); |
transfer_8to16subro(in, data->Cur, ptr, data->iEdgedWidth); |
288 |
rd = Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, |
rd = Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, |
289 |
&cbp, 5, data->scan_table, data->lambda[0], |
&cbp, 5, data->scan_table, data->lambda[0], |
290 |
data->mpeg_quant_matrices, data->quant_sq); |
data->mpeg_quant_matrices, data->quant_sq, data->rel_var8[0], data->metric); |
291 |
/* we took 2 bits into account before */ |
/* we took 2 bits into account before */ |
292 |
rd += BITS_MULT * (d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision) - 2); |
rd += BITS_MULT * (d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision) - 2); |
293 |
|
|
397 |
Data8->RefP[3] = Data->RefP[3] + 8*((i&1) + (i>>1)*Data->iEdgedWidth); |
Data8->RefP[3] = Data->RefP[3] + 8*((i&1) + (i>>1)*Data->iEdgedWidth); |
398 |
*Data8->cbp = (Data->cbp[1] & (1<<(5-i))) ? 1:0; /* copy corresponding cbp bit */ |
*Data8->cbp = (Data->cbp[1] & (1<<(5-i))) ? 1:0; /* copy corresponding cbp bit */ |
399 |
Data8->lambda[0] = Data->lambda[i]; |
Data8->lambda[0] = Data->lambda[i]; |
400 |
|
Data8->rel_var8[0] = Data->rel_var8[i]; |
401 |
|
|
402 |
if(Data->qpel) { |
if(Data->qpel) { |
403 |
Data8->predMV = get_qpmv2(pMBs, pParam->mb_width, 0, x, y, i); |
Data8->predMV = get_qpmv2(pMBs, pParam->mb_width, 0, x, y, i); |
517 |
ptr = interpolate8x8_switch2(Data->RefQ + 64, Data->RefP[4], 0, 0, sumx, sumy, Data->iEdgedWidth/2, Data->rounding); |
ptr = interpolate8x8_switch2(Data->RefQ + 64, Data->RefP[4], 0, 0, sumx, sumy, Data->iEdgedWidth/2, Data->rounding); |
518 |
transfer_8to16subro(in, Data->CurU, ptr, Data->iEdgedWidth/2); |
transfer_8to16subro(in, Data->CurU, ptr, Data->iEdgedWidth/2); |
519 |
bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 4, |
bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 4, |
520 |
Data->scan_table, Data->lambda[4], Data->mpeg_quant_matrices, Data->quant_sq); |
Data->scan_table, Data->lambda[4], Data->mpeg_quant_matrices, Data->quant_sq, Data->rel_var8[4], Data->metric); |
521 |
|
|
522 |
if (bits >= *Data->iMinSAD) return bits; |
if (bits >= *Data->iMinSAD) return bits; |
523 |
|
|
525 |
ptr = interpolate8x8_switch2(Data->RefQ + 64, Data->RefP[5], 0, 0, sumx, sumy, Data->iEdgedWidth/2, Data->rounding); |
ptr = interpolate8x8_switch2(Data->RefQ + 64, Data->RefP[5], 0, 0, sumx, sumy, Data->iEdgedWidth/2, Data->rounding); |
526 |
transfer_8to16subro(in, Data->CurV, ptr, Data->iEdgedWidth/2); |
transfer_8to16subro(in, Data->CurV, ptr, Data->iEdgedWidth/2); |
527 |
bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 5, |
bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 5, |
528 |
Data->scan_table, Data->lambda[5], Data->mpeg_quant_matrices, Data->quant_sq); |
Data->scan_table, Data->lambda[5], Data->mpeg_quant_matrices, Data->quant_sq, Data->rel_var8[5], Data->metric); |
529 |
|
|
530 |
bits += BITS_MULT*(mcbpc_inter_tab[(MODE_INTER4V & 7) | ((cbp & 3) << 3)].len - 3); /* 3 were added before */ |
bits += BITS_MULT*(mcbpc_inter_tab[(MODE_INTER4V & 7) | ((cbp & 3) << 3)].len - 3); /* 3 were added before */ |
531 |
|
|
553 |
|
|
554 |
distortion = Block_CalcBitsIntra(pMB, x, y, mb_width, i, in, coeff, dqcoeff, |
distortion = Block_CalcBitsIntra(pMB, x, y, mb_width, i, in, coeff, dqcoeff, |
555 |
predictors[i], iQuant, Data->quant_type, bits, cbp, |
predictors[i], iQuant, Data->quant_type, bits, cbp, |
556 |
Data->lambda[i], Data->mpeg_quant_matrices, Data->quant_sq); |
Data->lambda[i], Data->mpeg_quant_matrices, Data->quant_sq, Data->metric); |
557 |
bits1 += distortion + BITS_MULT * bits[0]; |
bits1 += distortion + BITS_MULT * bits[0]; |
558 |
bits2 += distortion + BITS_MULT * bits[1]; |
bits2 += distortion + BITS_MULT * bits[1]; |
559 |
|
|
568 |
transfer_8to16copy(in, Data->CurU, Data->iEdgedWidth/2); |
transfer_8to16copy(in, Data->CurU, Data->iEdgedWidth/2); |
569 |
distortion = Block_CalcBitsIntra(pMB, x, y, mb_width, 4, in, coeff, dqcoeff, |
distortion = Block_CalcBitsIntra(pMB, x, y, mb_width, 4, in, coeff, dqcoeff, |
570 |
predictors[4], iQuant, Data->quant_type, bits, cbp, |
predictors[4], iQuant, Data->quant_type, bits, cbp, |
571 |
Data->lambda[4], Data->mpeg_quant_matrices, Data->quant_sq); |
Data->lambda[4], Data->mpeg_quant_matrices, Data->quant_sq, Data->metric); |
572 |
bits1 += distortion + BITS_MULT * bits[0]; |
bits1 += distortion + BITS_MULT * bits[0]; |
573 |
bits2 += distortion + BITS_MULT * bits[1]; |
bits2 += distortion + BITS_MULT * bits[1]; |
574 |
|
|
579 |
transfer_8to16copy(in, Data->CurV, Data->iEdgedWidth/2); |
transfer_8to16copy(in, Data->CurV, Data->iEdgedWidth/2); |
580 |
distortion = Block_CalcBitsIntra(pMB, x, y, mb_width, 5, in, coeff, dqcoeff, |
distortion = Block_CalcBitsIntra(pMB, x, y, mb_width, 5, in, coeff, dqcoeff, |
581 |
predictors[5], iQuant, Data->quant_type, bits, cbp, |
predictors[5], iQuant, Data->quant_type, bits, cbp, |
582 |
Data->lambda[5], Data->mpeg_quant_matrices, Data->quant_sq); |
Data->lambda[5], Data->mpeg_quant_matrices, Data->quant_sq, Data->metric); |
583 |
|
|
584 |
bits1 += distortion + BITS_MULT * bits[0]; |
bits1 += distortion + BITS_MULT * bits[0]; |
585 |
bits2 += distortion + BITS_MULT * bits[1]; |
bits2 += distortion + BITS_MULT * bits[1]; |
605 |
int s = 8*((i&1) + (i>>1)*Data->iEdgedWidth); |
int s = 8*((i&1) + (i>>1)*Data->iEdgedWidth); |
606 |
transfer_8to16subro(in, Data->Cur + s, vGMC->y + s + 16*(x+y*Data->iEdgedWidth), Data->iEdgedWidth); |
transfer_8to16subro(in, Data->Cur + s, vGMC->y + s + 16*(x+y*Data->iEdgedWidth), Data->iEdgedWidth); |
607 |
bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, i, |
bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, i, |
608 |
Data->scan_table, Data->lambda[i], Data->mpeg_quant_matrices, Data->quant_sq); |
Data->scan_table, Data->lambda[i], Data->mpeg_quant_matrices, |
609 |
|
Data->quant_sq, Data->rel_var8[i], Data->metric); |
610 |
if (bits >= Data->iMinSAD[0]) return bits; |
if (bits >= Data->iMinSAD[0]) return bits; |
611 |
} |
} |
612 |
|
|
615 |
/*chroma U */ |
/*chroma U */ |
616 |
transfer_8to16subro(in, Data->CurU, vGMC->u + 8*(x+y*(Data->iEdgedWidth/2)), Data->iEdgedWidth/2); |
transfer_8to16subro(in, Data->CurU, vGMC->u + 8*(x+y*(Data->iEdgedWidth/2)), Data->iEdgedWidth/2); |
617 |
bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 4, |
bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 4, |
618 |
Data->scan_table, Data->lambda[4], Data->mpeg_quant_matrices, Data->quant_sq); |
Data->scan_table, Data->lambda[4], Data->mpeg_quant_matrices, |
619 |
|
Data->quant_sq, Data->rel_var8[4], Data->metric); |
620 |
|
|
621 |
if (bits >= Data->iMinSAD[0]) return bits; |
if (bits >= Data->iMinSAD[0]) return bits; |
622 |
|
|
623 |
/* chroma V */ |
/* chroma V */ |
624 |
transfer_8to16subro(in, Data->CurV , vGMC->v + 8*(x+y*(Data->iEdgedWidth/2)), Data->iEdgedWidth/2); |
transfer_8to16subro(in, Data->CurV , vGMC->v + 8*(x+y*(Data->iEdgedWidth/2)), Data->iEdgedWidth/2); |
625 |
bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 5, |
bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 5, |
626 |
Data->scan_table, Data->lambda[5], Data->mpeg_quant_matrices, Data->quant_sq); |
Data->scan_table, Data->lambda[5], Data->mpeg_quant_matrices, |
627 |
|
Data->quant_sq, Data->rel_var8[5], Data->metric); |
628 |
|
|
629 |
bits += BITS_MULT * (mcbpc_inter_tab[(MODE_INTER & 7) | ((cbp & 3) << 3)].len - 1); |
bits += BITS_MULT * (mcbpc_inter_tab[(MODE_INTER & 7) | ((cbp & 3) << 3)].len - 1); |
630 |
|
|
657 |
Data->quant_sq = iQuant*iQuant; |
Data->quant_sq = iQuant*iQuant; |
658 |
Data->scan_table = VopFlags & XVID_VOP_ALTERNATESCAN ? |
Data->scan_table = VopFlags & XVID_VOP_ALTERNATESCAN ? |
659 |
scan_tables[2] : scan_tables[0]; |
scan_tables[2] : scan_tables[0]; |
660 |
|
Data->metric = !!(VopFlags & XVID_VOP_RD_PSNRHVSM); |
661 |
|
|
662 |
pMB->mcsel = 0; |
pMB->mcsel = 0; |
663 |
|
|
669 |
|
|
670 |
for (i = 0; i < 6; i++) { |
for (i = 0; i < 6; i++) { |
671 |
Data->lambda[i] = (LAMBDA*pMB->lambda[i])>>LAMBDA_EXP; |
Data->lambda[i] = (LAMBDA*pMB->lambda[i])>>LAMBDA_EXP; |
672 |
|
Data->rel_var8[i] = pMB->rel_var8[i]; |
673 |
} |
} |
674 |
|
|
675 |
min_rd = findRD_inter(Data, x, y, pParam, MotionFlags); |
min_rd = findRD_inter(Data, x, y, pParam, MotionFlags); |
769 |
int top = 0, top_right = 0, left = 0; |
int top = 0, top_right = 0, left = 0; |
770 |
Data->scan_table = VopFlags & XVID_VOP_ALTERNATESCAN ? |
Data->scan_table = VopFlags & XVID_VOP_ALTERNATESCAN ? |
771 |
scan_tables[2] : scan_tables[0]; |
scan_tables[2] : scan_tables[0]; |
772 |
|
Data->metric = !!(VopFlags & XVID_VOP_RD_PSNRHVSM); |
773 |
|
|
774 |
pMB->mcsel = 0; |
pMB->mcsel = 0; |
775 |
Data->iQuant = iQuant; |
Data->iQuant = iQuant; |
777 |
|
|
778 |
for (i = 0; i < 6; i++) { |
for (i = 0; i < 6; i++) { |
779 |
Data->lambda[i] = (LAMBDA*pMB->lambda[i])>>LAMBDA_EXP; |
Data->lambda[i] = (LAMBDA*pMB->lambda[i])>>LAMBDA_EXP; |
780 |
|
Data->rel_var8[i] = pMB->rel_var8[i]; |
781 |
} |
} |
782 |
|
|
783 |
/* INTER <-> INTER4V decision */ |
/* INTER <-> INTER4V decision */ |