4 |
* - Rate-Distortion Based Motion Estimation for B- VOPs - |
* - Rate-Distortion Based Motion Estimation for B- VOPs - |
5 |
* |
* |
6 |
* Copyright(C) 2004 Radoslaw Czyz <xvid@syskin.cjb.net> |
* Copyright(C) 2004 Radoslaw Czyz <xvid@syskin.cjb.net> |
7 |
|
* Copyright(C) 2010 Michael Militzer <michael@xvid.org> |
8 |
* |
* |
9 |
* This program is free software ; you can redistribute it and/or modify |
* This program is free software ; you can redistribute it and/or modify |
10 |
* it under the terms of the GNU General Public License as published by |
* it under the terms of the GNU General Public License as published by |
20 |
* along with this program ; if not, write to the Free Software |
* along with this program ; if not, write to the Free Software |
21 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
22 |
* |
* |
23 |
* $Id: estimation_rd_based_bvop.c,v 1.10 2005-12-09 04:45:35 syskin Exp $ |
* $Id: estimation_rd_based_bvop.c,v 1.11 2010-11-28 15:18:21 Isibaar Exp $ |
24 |
* |
* |
25 |
****************************************************************************/ |
****************************************************************************/ |
26 |
|
|
46 |
/* rd = BITS_MULT*bits + LAMBDA*distortion */ |
/* rd = BITS_MULT*bits + LAMBDA*distortion */ |
47 |
#define LAMBDA ( (int)(BITS_MULT*1.0) ) |
#define LAMBDA ( (int)(BITS_MULT*1.0) ) |
48 |
|
|
|
|
|
49 |
static __inline unsigned int |
static __inline unsigned int |
50 |
Block_CalcBits_BVOP(int16_t * const coeff, |
Block_CalcBits_BVOP(int16_t * const coeff, |
51 |
int16_t * const data, |
int16_t * const data, |
57 |
const unsigned int lambda, |
const unsigned int lambda, |
58 |
const uint16_t * mpeg_quant_matrices, |
const uint16_t * mpeg_quant_matrices, |
59 |
const unsigned int quant_sq, |
const unsigned int quant_sq, |
60 |
int * const cbpcost) |
int * const cbpcost, |
61 |
|
const unsigned int rel_var8, |
62 |
|
const unsigned int metric) |
63 |
{ |
{ |
64 |
int sum; |
int sum; |
65 |
int bits; |
int bits; |
79 |
if (quant_type) dequant_h263_inter(dqcoeff, coeff, quant, mpeg_quant_matrices); |
if (quant_type) dequant_h263_inter(dqcoeff, coeff, quant, mpeg_quant_matrices); |
80 |
else dequant_mpeg_inter(dqcoeff, coeff, quant, mpeg_quant_matrices); |
else dequant_mpeg_inter(dqcoeff, coeff, quant, mpeg_quant_matrices); |
81 |
|
|
82 |
distortion = sse8_16bit(data, dqcoeff, 8*sizeof(int16_t)); |
if (metric) distortion = masked_sseh8_16bit(data, dqcoeff, rel_var8); |
83 |
|
else distortion = sse8_16bit(data, dqcoeff, 8*sizeof(int16_t)); |
84 |
|
|
85 |
} else { |
} else { |
86 |
const static int16_t zero_block[64] = |
const static int16_t zero_block[64] = |
87 |
{ |
{ |
95 |
0, 0, 0, 0, 0, 0, 0, 0, |
0, 0, 0, 0, 0, 0, 0, 0, |
96 |
}; |
}; |
97 |
bits = 0; |
bits = 0; |
98 |
distortion = sse8_16bit(data, zero_block, 8*sizeof(int16_t)); |
|
99 |
|
if (metric) distortion = masked_sseh8_16bit(data, (int16_t * const) zero_block, rel_var8); |
100 |
|
else distortion = sse8_16bit(data, (int16_t * const) zero_block, 8*sizeof(int16_t)); |
101 |
|
|
102 |
} |
} |
103 |
|
|
104 |
return bits + (lambda*distortion)/quant_sq; |
return bits + (lambda*distortion)/quant_sq; |
116 |
const unsigned int lambda, |
const unsigned int lambda, |
117 |
const uint16_t * mpeg_quant_matrices, |
const uint16_t * mpeg_quant_matrices, |
118 |
const unsigned int quant_sq, |
const unsigned int quant_sq, |
119 |
int * const cbpcost) |
int * const cbpcost, |
120 |
|
const unsigned int rel_var8, |
121 |
|
const unsigned int metric) |
122 |
{ |
{ |
123 |
int sum; |
int sum; |
124 |
int bits; |
int bits; |
138 |
if (quant_type) dequant_h263_inter(dqcoeff, coeff, quant, mpeg_quant_matrices); |
if (quant_type) dequant_h263_inter(dqcoeff, coeff, quant, mpeg_quant_matrices); |
139 |
else dequant_mpeg_inter(dqcoeff, coeff, quant, mpeg_quant_matrices); |
else dequant_mpeg_inter(dqcoeff, coeff, quant, mpeg_quant_matrices); |
140 |
|
|
141 |
distortion = sse8_16bit(data, dqcoeff, 8*sizeof(int16_t)); |
if (metric) distortion = masked_sseh8_16bit(data, dqcoeff, rel_var8); |
142 |
|
else distortion = sse8_16bit(data, dqcoeff, 8*sizeof(int16_t)); |
143 |
|
|
144 |
} else { |
} else { |
145 |
const static int16_t zero_block[64] = |
const static int16_t zero_block[64] = |
146 |
{ |
{ |
154 |
0, 0, 0, 0, 0, 0, 0, 0, |
0, 0, 0, 0, 0, 0, 0, 0, |
155 |
}; |
}; |
156 |
bits = 0; |
bits = 0; |
157 |
distortion = sse8_16bit(data, zero_block, 8*sizeof(int16_t)); |
|
158 |
|
if (metric) distortion = masked_sseh8_16bit(data, (int16_t * const) zero_block, rel_var8); |
159 |
|
else distortion = sse8_16bit(data, (int16_t * const) zero_block, 8*sizeof(int16_t)); |
160 |
|
|
161 |
} |
} |
162 |
|
|
163 |
return bits + (lambda*distortion)/quant_sq; |
return bits + (lambda*distortion)/quant_sq; |
195 |
transfer_8to16subro(in, data->Cur + s, ptr + s, data->iEdgedWidth); |
transfer_8to16subro(in, data->Cur + s, ptr + s, data->iEdgedWidth); |
196 |
rd += Block_CalcBits_BVOP(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, |
rd += Block_CalcBits_BVOP(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, |
197 |
&cbp, i, data->scan_table, data->lambda[i], data->mpeg_quant_matrices, |
&cbp, i, data->scan_table, data->lambda[i], data->mpeg_quant_matrices, |
198 |
data->quant_sq, &cbpcost); |
data->quant_sq, &cbpcost, data->rel_var8[i], data->metric); |
199 |
if (rd >= data->iMinSAD[0]) return; |
if (rd >= data->iMinSAD[0]) return; |
200 |
} |
} |
201 |
|
|
208 |
transfer_8to16subro(in, data->CurU, ptr, data->iEdgedWidth/2); |
transfer_8to16subro(in, data->CurU, ptr, data->iEdgedWidth/2); |
209 |
rd += Block_CalcBits_BVOP(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, |
rd += Block_CalcBits_BVOP(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, |
210 |
&cbp, 4, data->scan_table, data->lambda[4], data->mpeg_quant_matrices, |
&cbp, 4, data->scan_table, data->lambda[4], data->mpeg_quant_matrices, |
211 |
data->quant_sq, &cbpcost); |
data->quant_sq, &cbpcost, data->rel_var8[4], data->metric); |
212 |
if (rd >= data->iMinSAD[0]) return; |
if (rd >= data->iMinSAD[0]) return; |
213 |
|
|
214 |
/* chroma V */ |
/* chroma V */ |
216 |
transfer_8to16subro(in, data->CurV, ptr, data->iEdgedWidth/2); |
transfer_8to16subro(in, data->CurV, ptr, data->iEdgedWidth/2); |
217 |
rd += Block_CalcBits_BVOP(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, |
rd += Block_CalcBits_BVOP(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, |
218 |
&cbp, 5, data->scan_table, data->lambda[5], data->mpeg_quant_matrices, |
&cbp, 5, data->scan_table, data->lambda[5], data->mpeg_quant_matrices, |
219 |
data->quant_sq, &cbpcost); |
data->quant_sq, &cbpcost, data->rel_var8[5], data->metric); |
220 |
|
|
221 |
if (rd < data->iMinSAD[0]) { |
if (rd < data->iMinSAD[0]) { |
222 |
data->iMinSAD[0] = rd; |
data->iMinSAD[0] = rd; |
275 |
transfer_8to16sub2ro(in, data->Cur + s, ReferenceF, ReferenceB, data->iEdgedWidth); |
transfer_8to16sub2ro(in, data->Cur + s, ReferenceF, ReferenceB, data->iEdgedWidth); |
276 |
rd += Block_CalcBits_BVOP_direct(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, |
rd += Block_CalcBits_BVOP_direct(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, |
277 |
&cbp, k, data->scan_table, data->lambda[k], data->mpeg_quant_matrices, |
&cbp, k, data->scan_table, data->lambda[k], data->mpeg_quant_matrices, |
278 |
data->quant_sq, &cbpcost); |
data->quant_sq, &cbpcost, data->rel_var8[k], data->metric); |
279 |
if (rd > *(data->iMinSAD)) return; |
if (rd > *(data->iMinSAD)) return; |
280 |
} |
} |
281 |
|
|
291 |
transfer_8to16sub2ro(in, data->CurU, ReferenceF, ReferenceB, data->iEdgedWidth/2); |
transfer_8to16sub2ro(in, data->CurU, ReferenceF, ReferenceB, data->iEdgedWidth/2); |
292 |
rd += Block_CalcBits_BVOP_direct(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, |
rd += Block_CalcBits_BVOP_direct(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, |
293 |
&cbp, 4, data->scan_table, data->lambda[4], data->mpeg_quant_matrices, |
&cbp, 4, data->scan_table, data->lambda[4], data->mpeg_quant_matrices, |
294 |
data->quant_sq, &cbpcost); |
data->quant_sq, &cbpcost, data->rel_var8[4], data->metric); |
295 |
if (rd >= data->iMinSAD[0]) return; |
if (rd >= data->iMinSAD[0]) return; |
296 |
|
|
297 |
/* chroma V */ |
/* chroma V */ |
300 |
transfer_8to16sub2ro(in, data->CurV, ReferenceF, ReferenceB, data->iEdgedWidth/2); |
transfer_8to16sub2ro(in, data->CurV, ReferenceF, ReferenceB, data->iEdgedWidth/2); |
301 |
rd += Block_CalcBits_BVOP_direct(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, |
rd += Block_CalcBits_BVOP_direct(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, |
302 |
&cbp, 5, data->scan_table, data->lambda[5], data->mpeg_quant_matrices, |
&cbp, 5, data->scan_table, data->lambda[5], data->mpeg_quant_matrices, |
303 |
data->quant_sq, &cbpcost); |
data->quant_sq, &cbpcost, data->rel_var8[5], data->metric); |
304 |
|
|
305 |
if (cbp || x != 0 || y != 0) |
if (cbp || x != 0 || y != 0) |
306 |
rd += BITS_MULT * d_mv_bits(x, y, zeroMV, 1, 0); |
rd += BITS_MULT * d_mv_bits(x, y, zeroMV, 1, 0); |
365 |
transfer_8to16sub2ro(in, data->Cur + s, ReferenceF + s, ReferenceB + s, data->iEdgedWidth); |
transfer_8to16sub2ro(in, data->Cur + s, ReferenceF + s, ReferenceB + s, data->iEdgedWidth); |
366 |
rd += Block_CalcBits_BVOP(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, |
rd += Block_CalcBits_BVOP(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, |
367 |
i, data->scan_table, data->lambda[i], data->mpeg_quant_matrices, |
i, data->scan_table, data->lambda[i], data->mpeg_quant_matrices, |
368 |
data->quant_sq, &cbpcost); |
data->quant_sq, &cbpcost, data->rel_var8[i], data->metric); |
369 |
} |
} |
370 |
|
|
371 |
/* chroma */ |
/* chroma */ |
380 |
transfer_8to16sub2ro(in, data->CurU, ReferenceF, ReferenceB, data->iEdgedWidth/2); |
transfer_8to16sub2ro(in, data->CurU, ReferenceF, ReferenceB, data->iEdgedWidth/2); |
381 |
rd += Block_CalcBits_BVOP(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, |
rd += Block_CalcBits_BVOP(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, |
382 |
4, data->scan_table, data->lambda[4], data->mpeg_quant_matrices, |
4, data->scan_table, data->lambda[4], data->mpeg_quant_matrices, |
383 |
data->quant_sq, &cbpcost); |
data->quant_sq, &cbpcost, data->rel_var8[4], data->metric); |
384 |
if (rd >= data->iMinSAD[0]) return; |
if (rd >= data->iMinSAD[0]) return; |
385 |
|
|
386 |
|
|
390 |
transfer_8to16sub2ro(in, data->CurV, ReferenceF, ReferenceB, data->iEdgedWidth/2); |
transfer_8to16sub2ro(in, data->CurV, ReferenceF, ReferenceB, data->iEdgedWidth/2); |
391 |
rd += Block_CalcBits_BVOP(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, |
rd += Block_CalcBits_BVOP(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, |
392 |
5, data->scan_table, data->lambda[5], data->mpeg_quant_matrices, |
5, data->scan_table, data->lambda[5], data->mpeg_quant_matrices, |
393 |
data->quant_sq, &cbpcost); |
data->quant_sq, &cbpcost, data->rel_var8[5], data->metric); |
394 |
|
|
395 |
if (rd < *(data->iMinSAD)) { |
if (rd < *(data->iMinSAD)) { |
396 |
*data->iMinSAD = rd; |
*data->iMinSAD = rd; |
496 |
VECTOR * f_predMV, |
VECTOR * f_predMV, |
497 |
VECTOR * b_predMV, |
VECTOR * b_predMV, |
498 |
const uint32_t MotionFlags, |
const uint32_t MotionFlags, |
499 |
|
const uint32_t VopFlags, |
500 |
const MBParam * const pParam, |
const MBParam * const pParam, |
501 |
int x, int y, |
int x, int y, |
502 |
int best_sad) |
int best_sad) |
511 |
|
|
512 |
int order[4] = {MODE_DIRECT, MODE_FORWARD, MODE_BACKWARD, MODE_INTERPOLATE}; |
int order[4] = {MODE_DIRECT, MODE_FORWARD, MODE_BACKWARD, MODE_INTERPOLATE}; |
513 |
|
|
514 |
|
Data_d->metric = Data_b->metric = Data_f->metric = Data_i->metric = !!(VopFlags & XVID_VOP_RD_PSNRHVSM); |
515 |
|
|
516 |
Data_d->scan_table = Data_b->scan_table = Data_f->scan_table = Data_i->scan_table |
Data_d->scan_table = Data_b->scan_table = Data_f->scan_table = Data_i->scan_table |
517 |
= /*VopFlags & XVID_VOP_ALTERNATESCAN ? scan_tables[2] : */scan_tables[0]; |
= /*VopFlags & XVID_VOP_ALTERNATESCAN ? scan_tables[2] : */scan_tables[0]; |
518 |
*Data_f->cbp = *Data_b->cbp = *Data_i->cbp = *Data_d->cbp = 63; |
*Data_f->cbp = *Data_b->cbp = *Data_i->cbp = *Data_d->cbp = 63; |
527 |
Data_b->lambda[i] = lam; |
Data_b->lambda[i] = lam; |
528 |
Data_f->lambda[i] = lam; |
Data_f->lambda[i] = lam; |
529 |
Data_i->lambda[i] = lam; |
Data_i->lambda[i] = lam; |
530 |
|
|
531 |
|
Data_d->rel_var8[i] = pMB->rel_var8[i]; |
532 |
|
Data_b->rel_var8[i] = pMB->rel_var8[i]; |
533 |
|
Data_f->rel_var8[i] = pMB->rel_var8[i]; |
534 |
|
Data_i->rel_var8[i] = pMB->rel_var8[i]; |
535 |
} |
} |
536 |
|
|
537 |
/* find the best order of evaluation - smallest SAD comes first, because *if* it means smaller RD, |
/* find the best order of evaluation - smallest SAD comes first, because *if* it means smaller RD, |