--- branches/dev-api-4/xvidcore/src/bitstream/mbcoding.c 2003/04/27 15:40:50 994 +++ branches/dev-api-4/xvidcore/src/bitstream/mbcoding.c 2003/04/27 19:47:48 995 @@ -51,6 +51,8 @@ #include #include +#include + #include "../portab.h" #include "../global.h" #include "bitstream.h" @@ -1267,6 +1269,305 @@ } + + + + + + +/************************************************************************ + * Trellis based R-D optimal quantization * + * not really "bitstream" or "mbcoding" related, but needs VLC tables * + * * + ************************************************************************/ + + +int __inline +RunLevel_CalcBits_inter(const int16_t run, int16_t level) +{ + const int esc_length = 30; + + if (!((level+32) & -64)) + return coeff_VLC[0][0][level+32][run].len; + else + return esc_length; +} + +int __inline +RunLevelLast_CalcBits_inter(const int16_t run, const int16_t level) +{ + const int esc_length = 30; + + if (!((level+32) & -64)) + return coeff_VLC[0][1][level+32][run].len; + else + return esc_length; +} + + +int __inline +RunLevel_CalcBits_intra(const int16_t run, int16_t level) +{ + const int esc_length = 30; + int bits; + + level = abs(level); + if (!(level & -64)) { + bits = coeff_VLC[1][0][level][run].len; + if (bits!=128) + return bits; + } + return esc_length; +} + +int __inline +RunLevelLast_CalcBits_intra(const int16_t run, int16_t level) +{ + const int esc_length = 30; + int bits; + + level = abs(level); + if (!(level & -64)) { + bits = coeff_VLC[1][1][level][run].len; + if (bits!=128) + return bits; + } + return esc_length; +} + +/* based on ffmpeg's trellis quant, thanks! */ +/* (C) 2003 Michael Niedermayer */ + +int +dct_quantize_trellis_inter_h263_c (int16_t *qcoeff, const int16_t *data, int quant) +{ + +/* input: original quantized DCT coefficients (to calc distorion)*/ +/* already quantized DCT coefficients */ +/* quantizer */ +/* output: modified table of quantized DCT coefficients */ + +/* maybe combining quantize&Trellis would be faster (even that it disables MMX quant) */ + + int run_tab[65]; + int level_tab[65]; + int score_tab[65]; + int last_run = 0; + int last_level = 0; + int last_score = 0; + int last_i = 0; + int coeff[64]; + int coeff_count[64]; /* is a table useful for this 0-1 (or 1-2) table? */ + int last_non_zero, i; + + const uint16_t *const zigzag = &scan_tables[0][0]; + /* ordinary zigzag order, so it's not INTERLACE compatible, yet */ + + const int qmul = 2*quant; + const int qadd = ((quant-1)|1); + +/* quant is not needed anymore after this */ + + int score_limit = 0; + int left_limit = 0; + + const int lambda = (quant * quant * 123 + 64) >> 7; // default lagrangian + +/* control lambda through a ENVIRONMENT variable (for automatic optmization) */ + +/* + const int lfact=123; // better control of the lagrangian lambda + int lambda = (quant * quant * 123 + 64) >> 7; // default lagrangian + + const char * const trellis_lambda = getenv("TRELLIS_LAMBDA"); + if(trellis_lambda) + lfact = atoi(trellis_lambda); + if (lfact < 1) + lfact = 123; // why this value? Who knows? But 123 seems better than 109 = 0.85<<7 + + lambda = (quant * quant * lfact + 64) >> 7; // lagrangian +*/ + + last_non_zero = -1; + for (i = 0; i < 64; i++) + { + const int level = qcoeff[zigzag[i]]; + + if (level) { + last_non_zero = i; + + if (level>0) { + if (level==1) { + coeff[i] = 1; + coeff_count[i] = 0; + } else { + coeff[i] = level; + coeff_count[i] = 1; + } + } else { + if (level==-1) { + coeff[i] = -1; + coeff_count[i] = 0; + } else { + coeff[i] = level+1; // because we check coeff[i] and coeff[i]-1 + coeff_count[i] = 1; + } + } + } else { + coeff[i] = ((data[zigzag[i]]>>31)|1); /* +- 1 because of gap */ + coeff_count[i] = 0; + } + } + + if (last_non_zero < 0) + return last_non_zero; + + score_tab[0] = 0; + + for (i = 0; i <= last_non_zero; i++) { + int level, run, j; + const int dct_coeff = data[zigzag[i]]; + const int zero_distortion = dct_coeff * dct_coeff; + int best_score = 256 * 256 * 256 * 120; + + int distortion; + int dequant_err; + + last_score += zero_distortion; + + +/****************** level loop unrolled: first check coeff[i] *********/ + level = coeff[i]; + + if (level > 0) // coeff[i]==0 is not possible here + dequant_err = level * qmul + qadd - dct_coeff; + else + dequant_err = level * qmul - qadd - dct_coeff; + + distortion = dequant_err*dequant_err; + + for (run = 0; run <= i - left_limit; run++) { + + int score = distortion + lambda*RunLevel_CalcBits_inter(run, level) + score_tab[i - run]; + + if (score < best_score) + { + best_score = score_tab[i + 1] = score; + run_tab[i + 1] = run; + level_tab[i + 1] = level; + } + } + + for (run = 0; run <= i - left_limit; run++) { + int score = distortion + lambda*RunLevelLast_CalcBits_inter(run, level) + score_tab[i - run]; + + if (score < last_score) + { + last_score = score; + last_run = run; + last_level = level; + last_i = i + 1; + } + } + +/****************** level loop unrolled: if possible, check coeff[i]-1 *********/ + + if (coeff_count[i]) { + + level--; + dequant_err -= qmul; + distortion = dequant_err*dequant_err; + + for (run = 0; run <= i - left_limit; run++) { + int score = distortion + lambda*RunLevel_CalcBits_inter(run, level) + score_tab[i-run]; + + if (score < best_score) + { + best_score = score_tab[i + 1] = score; + run_tab[i + 1] = run; + level_tab[i + 1] = level; + } + } + + for (run = 0; run <= i - left_limit; run++) { + int score = distortion + lambda*RunLevelLast_CalcBits_inter(run, level) + score_tab[i-run]; + + if (score < last_score) + { + last_score = score; + last_run = run; + last_level = level; + last_i = i + 1; + } + + } + } // of check coeff[i]-1 + + +/****************** checking coeff[i]-2 doesn't isn't supported *********/ + +/****************** add distorsion for higher RUN (-> coeff[i]==0) *******/ + for (j = left_limit; j <= i; j++) + score_tab[j] += zero_distortion; + + score_limit += zero_distortion; + + if (score_tab[i + 1] < score_limit) + score_limit = score_tab[i + 1]; + + // there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level + // so we finalize only if we have no chance of getting lower than score_limit + 1*lambda anymore + + while (score_tab[left_limit] > score_limit + lambda) + left_limit++; + + + } // end of (i=0;i<=last_non_zero;i++) + + + last_score = 256 * 256 * 256 * 120; + for (i = left_limit; i <= last_non_zero + 1; i++) + { + int score = score_tab[i]; + if (i) + score += 2*lambda; /* coded block means 2 extra bits (roughly) */ + + if (score < last_score) + { + last_score = score; + last_i = i; + last_level = level_tab[i]; + last_run = run_tab[i]; + } + } + + last_non_zero = last_i - 1; + if (last_non_zero < 0) + return last_non_zero; + + i = last_i; + + memset(qcoeff,0x00,64*sizeof(int16_t)); + + qcoeff[zigzag[last_non_zero]] = last_level; + i -= last_run + 1; + + for (; i > 0; i -= run_tab[i] + 1) + { + qcoeff[zigzag[i-1]] = level_tab[i]; + } + + return last_non_zero; +} + +int +dct_quantize_trellis_inter_mpeg_c (int16_t *qcoeff, const int16_t *data, int quant) +{ return 64; } + + + + + /***************************************************************************** * VLC tables and other constant arrays ****************************************************************************/