[svn] / branches / dev-api-4 / xvidcore / src / utils / mbtransquant.c Repository:
ViewVC logotype

Annotation of /branches/dev-api-4/xvidcore/src/utils/mbtransquant.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1232 - (view) (download)

1 : edgomez 965 /*****************************************************************************
2 :     *
3 :     * XVID MPEG-4 VIDEO CODEC
4 :     * - MB Transfert/Quantization functions -
5 :     *
6 :     * Copyright(C) 2001-2003 Peter Ross <pross@xvid.org>
7 :     * 2001-2003 Michael Militzer <isibaar@xvid.org>
8 :     * 2003 Edouard Gomez <ed.gomez@free.fr>
9 :     *
10 :     * This program is free software ; you can redistribute it and/or modify
11 :     * it under the terms of the GNU General Public License as published by
12 :     * the Free Software Foundation ; either version 2 of the License, or
13 :     * (at your option) any later version.
14 :     *
15 :     * This program is distributed in the hope that it will be useful,
16 :     * but WITHOUT ANY WARRANTY ; without even the implied warranty of
17 :     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 :     * GNU General Public License for more details.
19 :     *
20 :     * You should have received a copy of the GNU General Public License
21 :     * along with this program ; if not, write to the Free Software
22 :     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 :     *
24 : syskin 1232 * $Id: mbtransquant.c,v 1.21.2.22 2003-12-01 10:46:40 syskin Exp $
25 : edgomez 965 *
26 :     ****************************************************************************/
27 : Isibaar 3
28 : chl 1012 #include <stdio.h>
29 :     #include <stdlib.h>
30 : edgomez 78 #include <string.h>
31 :    
32 : Isibaar 3 #include "../portab.h"
33 :     #include "mbfunctions.h"
34 :    
35 :     #include "../global.h"
36 :     #include "mem_transfer.h"
37 :     #include "timer.h"
38 : chl 995 #include "../bitstream/mbcoding.h"
39 : chl 1011 #include "../bitstream/zigzag.h"
40 : Isibaar 3 #include "../dct/fdct.h"
41 :     #include "../dct/idct.h"
42 : edgomez 1174 #include "../quant/quant.h"
43 : Isibaar 3 #include "../encoder.h"
44 :    
45 : edgomez 851 #include "../image/reduced.h"
46 : edgomez 1223 #include "../quant/quant_matrix.h"
47 : Isibaar 3
48 : edgomez 851 MBFIELDTEST_PTR MBFieldTest;
49 : Isibaar 3
50 : edgomez 965 /*
51 :     * Skip blocks having a coefficient sum below this value. This value will be
52 :     * corrected according to the MB quantizer to avoid artifacts for quant==1
53 :     */
54 :     #define PVOP_TOOSMALL_LIMIT 1
55 :     #define BVOP_TOOSMALL_LIMIT 3
56 : Isibaar 3
57 : edgomez 965 /*****************************************************************************
58 :     * Local functions
59 :     ****************************************************************************/
60 :    
61 :     /* permute block and return field dct choice */
62 :     static __inline uint32_t
63 :     MBDecideFieldDCT(int16_t data[6 * 64])
64 : Isibaar 3 {
65 : edgomez 965 uint32_t field = MBFieldTest(data);
66 : edgomez 78
67 : edgomez 965 if (field)
68 :     MBFrameToField(data);
69 : Isibaar 3
70 : edgomez 965 return field;
71 :     }
72 : h 69
73 : edgomez 965 /* Performs Forward DCT on all blocks */
74 :     static __inline void
75 : syskin 984 MBfDCT(const MBParam * const pParam,
76 :     const FRAMEINFO * const frame,
77 :     MACROBLOCK * const pMB,
78 : edgomez 965 uint32_t x_pos,
79 :     uint32_t y_pos,
80 :     int16_t data[6 * 64])
81 : syskin 984 {
82 : edgomez 965 /* Handles interlacing */
83 : h 69 start_timer();
84 :     pMB->field_dct = 0;
85 : edgomez 949 if ((frame->vol_flags & XVID_VOL_INTERLACING) &&
86 : h 390 (x_pos>0) && (x_pos<pParam->mb_width-1) &&
87 :     (y_pos>0) && (y_pos<pParam->mb_height-1)) {
88 : h 69 pMB->field_dct = MBDecideFieldDCT(data);
89 :     }
90 :     stop_interlacing_timer();
91 :    
92 : edgomez 965 /* Perform DCT */
93 :     start_timer();
94 :     fdct(&data[0 * 64]);
95 :     fdct(&data[1 * 64]);
96 :     fdct(&data[2 * 64]);
97 :     fdct(&data[3 * 64]);
98 :     fdct(&data[4 * 64]);
99 :     fdct(&data[5 * 64]);
100 :     stop_dct_timer();
101 :     }
102 :    
103 :     /* Performs Inverse DCT on all blocks */
104 :     static __inline void
105 :     MBiDCT(int16_t data[6 * 64],
106 :     const uint8_t cbp)
107 :     {
108 :     start_timer();
109 :     if(cbp & (1 << (5 - 0))) idct(&data[0 * 64]);
110 :     if(cbp & (1 << (5 - 1))) idct(&data[1 * 64]);
111 :     if(cbp & (1 << (5 - 2))) idct(&data[2 * 64]);
112 :     if(cbp & (1 << (5 - 3))) idct(&data[3 * 64]);
113 :     if(cbp & (1 << (5 - 4))) idct(&data[4 * 64]);
114 :     if(cbp & (1 << (5 - 5))) idct(&data[5 * 64]);
115 :     stop_idct_timer();
116 :     }
117 :    
118 :     /* Quantize all blocks -- Intra mode */
119 :     static __inline void
120 :     MBQuantIntra(const MBParam * pParam,
121 : chl 995 const FRAMEINFO * const frame,
122 : edgomez 965 const MACROBLOCK * pMB,
123 : syskin 984 int16_t qcoeff[6 * 64],
124 : edgomez 965 int16_t data[6*64])
125 :     {
126 : edgomez 1139 int mpeg;
127 :     int scaler_lum, scaler_chr;
128 : edgomez 965
129 : edgomez 1174 quant_intraFuncPtr const quant[2] =
130 : edgomez 1139 {
131 : edgomez 1174 quant_h263_intra,
132 :     quant_mpeg_intra
133 : edgomez 1139 };
134 : edgomez 965
135 : edgomez 1139 mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT);
136 :     scaler_lum = get_dc_scaler(pMB->quant, 1);
137 :     scaler_chr = get_dc_scaler(pMB->quant, 0);
138 :    
139 :     /* Quantize the block */
140 :     start_timer();
141 : edgomez 1230 quant[mpeg](&data[0 * 64], &qcoeff[0 * 64], pMB->quant, scaler_lum, pParam->mpeg_quant_matrices);
142 :     quant[mpeg](&data[1 * 64], &qcoeff[1 * 64], pMB->quant, scaler_lum, pParam->mpeg_quant_matrices);
143 :     quant[mpeg](&data[2 * 64], &qcoeff[2 * 64], pMB->quant, scaler_lum, pParam->mpeg_quant_matrices);
144 :     quant[mpeg](&data[3 * 64], &qcoeff[3 * 64], pMB->quant, scaler_lum, pParam->mpeg_quant_matrices);
145 :     quant[mpeg](&data[4 * 64], &qcoeff[4 * 64], pMB->quant, scaler_chr, pParam->mpeg_quant_matrices);
146 :     quant[mpeg](&data[5 * 64], &qcoeff[5 * 64], pMB->quant, scaler_chr, pParam->mpeg_quant_matrices);
147 : edgomez 1139 stop_quant_timer();
148 : edgomez 965 }
149 :    
150 :     /* DeQuantize all blocks -- Intra mode */
151 :     static __inline void
152 :     MBDeQuantIntra(const MBParam * pParam,
153 :     const int iQuant,
154 :     int16_t qcoeff[6 * 64],
155 :     int16_t data[6*64])
156 :     {
157 : edgomez 1139 int mpeg;
158 :     int scaler_lum, scaler_chr;
159 : edgomez 965
160 : edgomez 1174 quant_intraFuncPtr const dequant[2] =
161 : edgomez 1139 {
162 : edgomez 1174 dequant_h263_intra,
163 :     dequant_mpeg_intra
164 : edgomez 1139 };
165 : Isibaar 3
166 : edgomez 1139 mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT);
167 :     scaler_lum = get_dc_scaler(iQuant, 1);
168 :     scaler_chr = get_dc_scaler(iQuant, 0);
169 :    
170 :     start_timer();
171 : edgomez 1230 dequant[mpeg](&qcoeff[0 * 64], &data[0 * 64], iQuant, scaler_lum, pParam->mpeg_quant_matrices);
172 :     dequant[mpeg](&qcoeff[1 * 64], &data[1 * 64], iQuant, scaler_lum, pParam->mpeg_quant_matrices);
173 :     dequant[mpeg](&qcoeff[2 * 64], &data[2 * 64], iQuant, scaler_lum, pParam->mpeg_quant_matrices);
174 :     dequant[mpeg](&qcoeff[3 * 64], &data[3 * 64], iQuant, scaler_lum, pParam->mpeg_quant_matrices);
175 :     dequant[mpeg](&qcoeff[4 * 64], &data[4 * 64], iQuant, scaler_chr, pParam->mpeg_quant_matrices);
176 :     dequant[mpeg](&qcoeff[5 * 64], &data[5 * 64], iQuant, scaler_chr, pParam->mpeg_quant_matrices);
177 : edgomez 1139 stop_iquant_timer();
178 : edgomez 965 }
179 : Isibaar 3
180 : edgomez 1161 static int
181 : edgomez 1223 dct_quantize_trellis_c(int16_t *const Out,
182 :     const int16_t *const In,
183 :     int Q,
184 :     const uint16_t * const Zigzag,
185 :     const uint16_t * const QuantMatrix,
186 :     int Non_Zero);
187 : chl 1011
188 : edgomez 965 /* Quantize all blocks -- Inter mode */
189 :     static __inline uint8_t
190 :     MBQuantInter(const MBParam * pParam,
191 : chl 995 const FRAMEINFO * const frame,
192 : edgomez 965 const MACROBLOCK * pMB,
193 :     int16_t data[6 * 64],
194 :     int16_t qcoeff[6 * 64],
195 :     int bvop,
196 :     int limit)
197 :     {
198 :    
199 :     int i;
200 :     uint8_t cbp = 0;
201 :     int sum;
202 : edgomez 1139 int code_block, mpeg;
203 : edgomez 965
204 : edgomez 1174 quant_interFuncPtr const quant[2] =
205 : edgomez 1139 {
206 : edgomez 1174 quant_h263_inter,
207 :     quant_mpeg_inter
208 : edgomez 1139 };
209 :    
210 :     mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT);
211 :    
212 : edgomez 965 for (i = 0; i < 6; i++) {
213 : syskin 984
214 : edgomez 965 /* Quantize the block */
215 :     start_timer();
216 : edgomez 1139
217 : edgomez 1230 sum = quant[mpeg](&qcoeff[i*64], &data[i*64], pMB->quant, pParam->mpeg_quant_matrices);
218 : edgomez 1139
219 :     if(sum && (frame->vop_flags & XVID_VOP_TRELLISQUANT)) {
220 : edgomez 1230 const uint16_t *matrix;
221 : edgomez 1223 const static uint16_t h263matrix[] =
222 :     {
223 :     16, 16, 16, 16, 16, 16, 16, 16,
224 :     16, 16, 16, 16, 16, 16, 16, 16,
225 :     16, 16, 16, 16, 16, 16, 16, 16,
226 :     16, 16, 16, 16, 16, 16, 16, 16,
227 :     16, 16, 16, 16, 16, 16, 16, 16,
228 :     16, 16, 16, 16, 16, 16, 16, 16,
229 :     16, 16, 16, 16, 16, 16, 16, 16,
230 :     16, 16, 16, 16, 16, 16, 16, 16
231 :     };
232 : edgomez 1230
233 :     matrix = (mpeg)?get_inter_matrix(pParam->mpeg_quant_matrices):h263matrix;
234 : edgomez 1223 sum = dct_quantize_trellis_c(&qcoeff[i*64], &data[i*64],
235 :     pMB->quant, &scan_tables[0][0],
236 : edgomez 1230 matrix,
237 : edgomez 1223 63);
238 : chl 995 }
239 : edgomez 965 stop_quant_timer();
240 :    
241 :     /*
242 :     * We code the block if the sum is higher than the limit and if the first
243 :     * two AC coefficients in zig zag order are not zero.
244 :     */
245 :     code_block = 0;
246 :     if ((sum >= limit) || (qcoeff[i*64+1] != 0) || (qcoeff[i*64+8] != 0)) {
247 :     code_block = 1;
248 : edgomez 195 } else {
249 : Isibaar 3
250 : edgomez 965 if (bvop && (pMB->mode == MODE_DIRECT || pMB->mode == MODE_DIRECT_NO4V)) {
251 :     /* dark blocks prevention for direct mode */
252 :     if ((qcoeff[i*64] < -1) || (qcoeff[i*64] > 0))
253 :     code_block = 1;
254 : edgomez 851 } else {
255 : edgomez 965 /* not direct mode */
256 :     if (qcoeff[i*64] != 0)
257 :     code_block = 1;
258 : edgomez 851 }
259 : Isibaar 3 }
260 :    
261 : edgomez 965 /* Set the corresponding cbp bit */
262 :     cbp |= code_block << (5 - i);
263 :     }
264 : edgomez 851
265 : edgomez 965 return(cbp);
266 :     }
267 : Isibaar 3
268 : edgomez 965 /* DeQuantize all blocks -- Inter mode */
269 : syskin 984 static __inline void
270 : edgomez 965 MBDeQuantInter(const MBParam * pParam,
271 :     const int iQuant,
272 :     int16_t data[6 * 64],
273 :     int16_t qcoeff[6 * 64],
274 :     const uint8_t cbp)
275 :     {
276 : edgomez 1139 int mpeg;
277 : edgomez 965
278 : edgomez 1174 quant_interFuncPtr const dequant[2] =
279 : edgomez 1139 {
280 : edgomez 1174 dequant_h263_inter,
281 :     dequant_mpeg_inter
282 : edgomez 1139 };
283 :    
284 :     mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT);
285 :    
286 :     start_timer();
287 : edgomez 1230 if(cbp & (1 << (5 - 0))) dequant[mpeg](&data[0 * 64], &qcoeff[0 * 64], iQuant, pParam->mpeg_quant_matrices);
288 :     if(cbp & (1 << (5 - 1))) dequant[mpeg](&data[1 * 64], &qcoeff[1 * 64], iQuant, pParam->mpeg_quant_matrices);
289 :     if(cbp & (1 << (5 - 2))) dequant[mpeg](&data[2 * 64], &qcoeff[2 * 64], iQuant, pParam->mpeg_quant_matrices);
290 :     if(cbp & (1 << (5 - 3))) dequant[mpeg](&data[3 * 64], &qcoeff[3 * 64], iQuant, pParam->mpeg_quant_matrices);
291 :     if(cbp & (1 << (5 - 4))) dequant[mpeg](&data[4 * 64], &qcoeff[4 * 64], iQuant, pParam->mpeg_quant_matrices);
292 :     if(cbp & (1 << (5 - 5))) dequant[mpeg](&data[5 * 64], &qcoeff[5 * 64], iQuant, pParam->mpeg_quant_matrices);
293 : edgomez 1139 stop_iquant_timer();
294 : Isibaar 3 }
295 :    
296 : edgomez 965 typedef void (transfer_operation_8to16_t) (int16_t *Dst, const uint8_t *Src, int BpS);
297 :     typedef void (transfer_operation_16to8_t) (uint8_t *Dst, const int16_t *Src, int BpS);
298 : Isibaar 3
299 : edgomez 78
300 : edgomez 965 static __inline void
301 : syskin 984 MBTrans8to16(const MBParam * const pParam,
302 :     const FRAMEINFO * const frame,
303 :     const MACROBLOCK * const pMB,
304 : edgomez 965 const uint32_t x_pos,
305 :     const uint32_t y_pos,
306 :     int16_t data[6 * 64])
307 :     {
308 : h 82 uint32_t stride = pParam->edged_width;
309 :     uint32_t stride2 = stride / 2;
310 : edgomez 965 uint32_t next_block = stride * 8;
311 : syskin 984 int32_t cst;
312 : edgomez 1139 int vop_reduced;
313 : Isibaar 3 uint8_t *pY_Cur, *pU_Cur, *pV_Cur;
314 : syskin 984 const IMAGE * const pCurrent = &frame->image;
315 : edgomez 1139 transfer_operation_8to16_t * const functions[2] =
316 :     {
317 :     (transfer_operation_8to16_t *)transfer_8to16copy,
318 :     (transfer_operation_8to16_t *)filter_18x18_to_8x8
319 :     };
320 : edgomez 965 transfer_operation_8to16_t *transfer_op = NULL;
321 : edgomez 195
322 : edgomez 1139 vop_reduced = !!(frame->vop_flags & XVID_VOP_REDUCED);
323 : edgomez 965
324 : edgomez 1139 /* Image pointers */
325 :     pY_Cur = pCurrent->y + (y_pos << (4+vop_reduced)) * stride + (x_pos << (4+vop_reduced));
326 :     pU_Cur = pCurrent->u + (y_pos << (3+vop_reduced)) * stride2 + (x_pos << (3+vop_reduced));
327 :     pV_Cur = pCurrent->v + (y_pos << (3+vop_reduced)) * stride2 + (x_pos << (3+vop_reduced));
328 : edgomez 965
329 : edgomez 1139 /* Block size */
330 :     cst = 8<<vop_reduced;
331 : edgomez 965
332 : edgomez 1139 /* Operation function */
333 :     transfer_op = functions[vop_reduced];
334 : edgomez 965
335 :     /* Do the transfer */
336 : h 69 start_timer();
337 : edgomez 965 transfer_op(&data[0 * 64], pY_Cur, stride);
338 :     transfer_op(&data[1 * 64], pY_Cur + cst, stride);
339 :     transfer_op(&data[2 * 64], pY_Cur + next_block, stride);
340 :     transfer_op(&data[3 * 64], pY_Cur + next_block + cst, stride);
341 :     transfer_op(&data[4 * 64], pU_Cur, stride2);
342 :     transfer_op(&data[5 * 64], pV_Cur, stride2);
343 :     stop_transfer_timer();
344 : syskin 984 }
345 : edgomez 965
346 :     static __inline void
347 : syskin 984 MBTrans16to8(const MBParam * const pParam,
348 :     const FRAMEINFO * const frame,
349 :     const MACROBLOCK * const pMB,
350 : edgomez 965 const uint32_t x_pos,
351 :     const uint32_t y_pos,
352 :     int16_t data[6 * 64],
353 : edgomez 1139 const uint32_t add, /* Must be 1 or 0 */
354 : edgomez 965 const uint8_t cbp)
355 :     {
356 :     uint8_t *pY_Cur, *pU_Cur, *pV_Cur;
357 :     uint32_t stride = pParam->edged_width;
358 :     uint32_t stride2 = stride / 2;
359 :     uint32_t next_block = stride * 8;
360 : syskin 984 uint32_t cst;
361 : edgomez 1139 int vop_reduced;
362 : syskin 984 const IMAGE * const pCurrent = &frame->image;
363 : edgomez 1174
364 : edgomez 1139 /* Array of function pointers, indexed by [vop_reduced<<1+add] */
365 :     transfer_operation_16to8_t * const functions[4] =
366 :     {
367 :     (transfer_operation_16to8_t*)transfer_16to8copy,
368 :     (transfer_operation_16to8_t*)transfer_16to8add,
369 :     (transfer_operation_16to8_t*)copy_upsampled_8x8_16to8,
370 :     (transfer_operation_16to8_t*)add_upsampled_8x8_16to8
371 :     };
372 : edgomez 1161
373 : edgomez 965 transfer_operation_16to8_t *transfer_op = NULL;
374 :    
375 : edgomez 1139 /* Makes this vars booleans */
376 :     vop_reduced = !!(frame->vop_flags & XVID_VOP_REDUCED);
377 : edgomez 851
378 : edgomez 1139 /* Image pointers */
379 :     pY_Cur = pCurrent->y + (y_pos << (4+vop_reduced)) * stride + (x_pos << (4+vop_reduced));
380 :     pU_Cur = pCurrent->u + (y_pos << (3+vop_reduced)) * stride2 + (x_pos << (3+vop_reduced));
381 :     pV_Cur = pCurrent->v + (y_pos << (3+vop_reduced)) * stride2 + (x_pos << (3+vop_reduced));
382 : Isibaar 3
383 : syskin 1232 if (pMB->field_dct) {
384 :     next_block = stride;
385 :     stride *= 2;
386 :     }
387 :    
388 : edgomez 1139 /* Block size */
389 :     cst = 8<<vop_reduced;
390 : Isibaar 3
391 : edgomez 1139 /* Operation function */
392 :     transfer_op = functions[(vop_reduced<<1) + add];
393 : Isibaar 3
394 : edgomez 965 /* Do the operation */
395 : h 69 start_timer();
396 : edgomez 1139 if (cbp&32) transfer_op(pY_Cur, &data[0 * 64], stride);
397 :     if (cbp&16) transfer_op(pY_Cur + cst, &data[1 * 64], stride);
398 :     if (cbp& 8) transfer_op(pY_Cur + next_block, &data[2 * 64], stride);
399 : edgomez 965 if (cbp& 4) transfer_op(pY_Cur + next_block + cst, &data[3 * 64], stride);
400 : edgomez 1139 if (cbp& 2) transfer_op(pU_Cur, &data[4 * 64], stride2);
401 :     if (cbp& 1) transfer_op(pV_Cur, &data[5 * 64], stride2);
402 : h 69 stop_transfer_timer();
403 : Isibaar 3 }
404 : h 69
405 : edgomez 965 /*****************************************************************************
406 :     * Module functions
407 :     ****************************************************************************/
408 :    
409 : syskin 984 void
410 :     MBTransQuantIntra(const MBParam * const pParam,
411 :     const FRAMEINFO * const frame,
412 :     MACROBLOCK * const pMB,
413 : chl 368 const uint32_t x_pos,
414 :     const uint32_t y_pos,
415 :     int16_t data[6 * 64],
416 :     int16_t qcoeff[6 * 64])
417 :     {
418 : h 69
419 : edgomez 965 /* Transfer data */
420 :     MBTrans8to16(pParam, frame, pMB, x_pos, y_pos, data);
421 : chl 368
422 : edgomez 965 /* Perform DCT (and field decision) */
423 :     MBfDCT(pParam, frame, pMB, x_pos, y_pos, data);
424 : chl 368
425 : edgomez 965 /* Quantize the block */
426 : chl 995 MBQuantIntra(pParam, frame, pMB, data, qcoeff);
427 : edgomez 965
428 :     /* DeQuantize the block */
429 :     MBDeQuantIntra(pParam, pMB->quant, data, qcoeff);
430 :    
431 :     /* Perform inverse DCT*/
432 :     MBiDCT(data, 0x3F);
433 :    
434 :     /* Transfer back the data -- Don't add data */
435 :     MBTrans16to8(pParam, frame, pMB, x_pos, y_pos, data, 0, 0x3F);
436 : chl 368 }
437 :    
438 : edgomez 965
439 : chl 368 uint8_t
440 : syskin 984 MBTransQuantInter(const MBParam * const pParam,
441 :     const FRAMEINFO * const frame,
442 :     MACROBLOCK * const pMB,
443 : edgomez 914 const uint32_t x_pos,
444 :     const uint32_t y_pos,
445 : chl 368 int16_t data[6 * 64],
446 :     int16_t qcoeff[6 * 64])
447 :     {
448 :     uint8_t cbp;
449 : edgomez 965 uint32_t limit;
450 : chl 368
451 : edgomez 1174 /* There is no MBTrans8to16 for Inter block, that's done in motion compensation
452 :     * already */
453 : chl 368
454 : edgomez 965 /* Perform DCT (and field decision) */
455 :     MBfDCT(pParam, frame, pMB, x_pos, y_pos, data);
456 : edgomez 914
457 : edgomez 965 /* Set the limit threshold */
458 :     limit = PVOP_TOOSMALL_LIMIT + ((pMB->quant == 1)? 1 : 0);
459 : chl 368
460 : Isibaar 1095 if (frame->vop_flags & XVID_VOP_CARTOON)
461 :     limit *= 3;
462 :    
463 : edgomez 965 /* Quantize the block */
464 : chl 995 cbp = MBQuantInter(pParam, frame, pMB, data, qcoeff, 0, limit);
465 : chl 368
466 : edgomez 965 /* DeQuantize the block */
467 :     MBDeQuantInter(pParam, pMB->quant, data, qcoeff, cbp);
468 : chl 368
469 : edgomez 965 /* Perform inverse DCT*/
470 :     MBiDCT(data, cbp);
471 : chl 368
472 : edgomez 965 /* Transfer back the data -- Add the data */
473 :     MBTrans16to8(pParam, frame, pMB, x_pos, y_pos, data, 1, cbp);
474 : syskin 984
475 : edgomez 965 return(cbp);
476 : chl 368 }
477 :    
478 : edgomez 965 uint8_t
479 :     MBTransQuantInterBVOP(const MBParam * pParam,
480 : edgomez 1139 FRAMEINFO * frame,
481 :     MACROBLOCK * pMB,
482 :     const uint32_t x_pos,
483 :     const uint32_t y_pos,
484 :     int16_t data[6 * 64],
485 :     int16_t qcoeff[6 * 64])
486 : chl 368 {
487 : edgomez 965 uint8_t cbp;
488 :     uint32_t limit;
489 : syskin 984
490 : edgomez 1174 /* There is no MBTrans8to16 for Inter block, that's done in motion compensation
491 :     * already */
492 : chl 368
493 : edgomez 965 /* Perform DCT (and field decision) */
494 :     MBfDCT(pParam, frame, pMB, x_pos, y_pos, data);
495 : chl 368
496 : edgomez 965 /* Set the limit threshold */
497 :     limit = BVOP_TOOSMALL_LIMIT;
498 : chl 368
499 : Isibaar 1095 if (frame->vop_flags & XVID_VOP_CARTOON)
500 :     limit *= 2;
501 :    
502 : edgomez 965 /* Quantize the block */
503 : chl 995 cbp = MBQuantInter(pParam, frame, pMB, data, qcoeff, 1, limit);
504 : chl 368
505 : edgomez 965 /*
506 :     * History comment:
507 :     * We don't have to DeQuant, iDCT and Transfer back data for B-frames.
508 :     *
509 : edgomez 1174 * BUT some plugins require the rebuilt original frame to be passed so we
510 :     * have to take care of that here
511 : edgomez 965 */
512 :     if((pParam->plugin_flags & XVID_REQORIGINAL)) {
513 : chl 368
514 : edgomez 965 /* DeQuantize the block */
515 :     MBDeQuantInter(pParam, pMB->quant, data, qcoeff, cbp);
516 : chl 368
517 : edgomez 965 /* Perform inverse DCT*/
518 :     MBiDCT(data, cbp);
519 : h 69
520 : edgomez 965 /* Transfer back the data -- Add the data */
521 :     MBTrans16to8(pParam, frame, pMB, x_pos, y_pos, data, 1, cbp);
522 : edgomez 851 }
523 :    
524 : edgomez 965 return(cbp);
525 : edgomez 851 }
526 :    
527 :     /* if sum(diff between field lines) < sum(diff between frame lines), use field dct */
528 :     uint32_t
529 :     MBFieldTest_c(int16_t data[6 * 64])
530 :     {
531 : edgomez 195 const uint8_t blocks[] =
532 :     { 0 * 64, 0 * 64, 0 * 64, 0 * 64, 2 * 64, 2 * 64, 2 * 64, 2 * 64 };
533 :     const uint8_t lines[] = { 0, 16, 32, 48, 0, 16, 32, 48 };
534 : edgomez 78
535 : h 69 int frame = 0, field = 0;
536 :     int i, j;
537 :    
538 : edgomez 195 for (i = 0; i < 7; ++i) {
539 :     for (j = 0; j < 8; ++j) {
540 :     frame +=
541 : edgomez 982 abs(data[0 * 64 + (i + 1) * 8 + j] - data[0 * 64 + i * 8 + j]);
542 : edgomez 195 frame +=
543 : edgomez 982 abs(data[1 * 64 + (i + 1) * 8 + j] - data[1 * 64 + i * 8 + j]);
544 : edgomez 195 frame +=
545 : edgomez 982 abs(data[2 * 64 + (i + 1) * 8 + j] - data[2 * 64 + i * 8 + j]);
546 : edgomez 195 frame +=
547 : edgomez 982 abs(data[3 * 64 + (i + 1) * 8 + j] - data[3 * 64 + i * 8 + j]);
548 : h 69
549 : edgomez 195 field +=
550 : edgomez 982 abs(data[blocks[i + 1] + lines[i + 1] + j] -
551 : edgomez 195 data[blocks[i] + lines[i] + j]);
552 :     field +=
553 : edgomez 982 abs(data[blocks[i + 1] + lines[i + 1] + 8 + j] -
554 : edgomez 195 data[blocks[i] + lines[i] + 8 + j]);
555 :     field +=
556 : edgomez 982 abs(data[blocks[i + 1] + 64 + lines[i + 1] + j] -
557 : edgomez 195 data[blocks[i] + 64 + lines[i] + j]);
558 :     field +=
559 : edgomez 982 abs(data[blocks[i + 1] + 64 + lines[i + 1] + 8 + j] -
560 : edgomez 195 data[blocks[i] + 64 + lines[i] + 8 + j]);
561 : h 69 }
562 :     }
563 :    
564 : edgomez 851 return (frame >= (field + 350));
565 : h 69 }
566 :    
567 :    
568 :     /* deinterlace Y blocks vertically */
569 :    
570 :     #define MOVLINE(X,Y) memcpy(X, Y, sizeof(tmp))
571 : syskin 984 #define LINE(X,Y) &data[X*64 + Y*8]
572 : h 69
573 : edgomez 195 void
574 :     MBFrameToField(int16_t data[6 * 64])
575 : h 69 {
576 :     int16_t tmp[8];
577 :    
578 :     /* left blocks */
579 :    
580 : edgomez 1053 /* 1=2, 2=4, 4=8, 8=1 */
581 : edgomez 195 MOVLINE(tmp, LINE(0, 1));
582 :     MOVLINE(LINE(0, 1), LINE(0, 2));
583 :     MOVLINE(LINE(0, 2), LINE(0, 4));
584 :     MOVLINE(LINE(0, 4), LINE(2, 0));
585 :     MOVLINE(LINE(2, 0), tmp);
586 : h 69
587 : edgomez 1053 /* 3=6, 6=12, 12=9, 9=3 */
588 : edgomez 195 MOVLINE(tmp, LINE(0, 3));
589 :     MOVLINE(LINE(0, 3), LINE(0, 6));
590 :     MOVLINE(LINE(0, 6), LINE(2, 4));
591 :     MOVLINE(LINE(2, 4), LINE(2, 1));
592 :     MOVLINE(LINE(2, 1), tmp);
593 : h 69
594 : edgomez 1053 /* 5=10, 10=5 */
595 : edgomez 195 MOVLINE(tmp, LINE(0, 5));
596 :     MOVLINE(LINE(0, 5), LINE(2, 2));
597 :     MOVLINE(LINE(2, 2), tmp);
598 : h 69
599 : edgomez 1053 /* 7=14, 14=13, 13=11, 11=7 */
600 : edgomez 195 MOVLINE(tmp, LINE(0, 7));
601 :     MOVLINE(LINE(0, 7), LINE(2, 6));
602 :     MOVLINE(LINE(2, 6), LINE(2, 5));
603 :     MOVLINE(LINE(2, 5), LINE(2, 3));
604 :     MOVLINE(LINE(2, 3), tmp);
605 : h 69
606 :     /* right blocks */
607 :    
608 : edgomez 1053 /* 1=2, 2=4, 4=8, 8=1 */
609 : edgomez 195 MOVLINE(tmp, LINE(1, 1));
610 :     MOVLINE(LINE(1, 1), LINE(1, 2));
611 :     MOVLINE(LINE(1, 2), LINE(1, 4));
612 :     MOVLINE(LINE(1, 4), LINE(3, 0));
613 :     MOVLINE(LINE(3, 0), tmp);
614 : h 69
615 : edgomez 1053 /* 3=6, 6=12, 12=9, 9=3 */
616 : edgomez 195 MOVLINE(tmp, LINE(1, 3));
617 :     MOVLINE(LINE(1, 3), LINE(1, 6));
618 :     MOVLINE(LINE(1, 6), LINE(3, 4));
619 :     MOVLINE(LINE(3, 4), LINE(3, 1));
620 :     MOVLINE(LINE(3, 1), tmp);
621 : h 69
622 : edgomez 1053 /* 5=10, 10=5 */
623 : edgomez 195 MOVLINE(tmp, LINE(1, 5));
624 :     MOVLINE(LINE(1, 5), LINE(3, 2));
625 :     MOVLINE(LINE(3, 2), tmp);
626 : h 69
627 : edgomez 1053 /* 7=14, 14=13, 13=11, 11=7 */
628 : edgomez 195 MOVLINE(tmp, LINE(1, 7));
629 :     MOVLINE(LINE(1, 7), LINE(3, 6));
630 :     MOVLINE(LINE(3, 6), LINE(3, 5));
631 :     MOVLINE(LINE(3, 5), LINE(3, 3));
632 :     MOVLINE(LINE(3, 3), tmp);
633 : h 69 }
634 : chl 1011
635 : edgomez 1053 /*****************************************************************************
636 :     * Trellis based R-D optimal quantization
637 :     *
638 :     * Trellis Quant code (C) 2003 Pascal Massimino skal(at)planet-d.net
639 :     *
640 :     ****************************************************************************/
641 : chl 1011
642 : edgomez 1053 /*----------------------------------------------------------------------------
643 :     *
644 :     * Trellis-Based quantization
645 :     *
646 :     * So far I understand this paper:
647 :     *
648 :     * "Trellis-Based R-D Optimal Quantization in H.263+"
649 :     * J.Wen, M.Luttrell, J.Villasenor
650 :     * IEEE Transactions on Image Processing, Vol.9, No.8, Aug. 2000.
651 :     *
652 :     * we are at stake with a simplified Bellmand-Ford / Dijkstra Single
653 : edgomez 1224 * Source Shortest Path algo. But due to the underlying graph structure
654 : edgomez 1053 * ("Trellis"), it can be turned into a dynamic programming algo,
655 : edgomez 1161 * partially saving the explicit graph's nodes representation. And
656 : edgomez 1053 * without using a heap, since the open frontier of the DAG is always
657 : edgomez 1224 * known, and of fixed size.
658 : edgomez 1053 *--------------------------------------------------------------------------*/
659 : chl 1011
660 :    
661 :    
662 : edgomez 1053 /* Codes lengths for relevant levels. */
663 : chl 1011
664 : edgomez 1139 /* let's factorize: */
665 : chl 1011 static const uint8_t Code_Len0[64] = {
666 : edgomez 1139 30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
667 :     30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
668 : chl 1011 static const uint8_t Code_Len1[64] = {
669 : edgomez 1139 20,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
670 :     30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
671 : chl 1011 static const uint8_t Code_Len2[64] = {
672 : edgomez 1139 19,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
673 :     30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
674 : chl 1011 static const uint8_t Code_Len3[64] = {
675 : edgomez 1139 18,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
676 :     30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
677 : chl 1011 static const uint8_t Code_Len4[64] = {
678 : edgomez 1139 17,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
679 :     30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
680 : chl 1011 static const uint8_t Code_Len5[64] = {
681 : edgomez 1139 16,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
682 :     30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
683 : chl 1011 static const uint8_t Code_Len6[64] = {
684 : edgomez 1139 15,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
685 :     30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
686 : chl 1011 static const uint8_t Code_Len7[64] = {
687 : edgomez 1139 13,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
688 :     30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
689 : chl 1011 static const uint8_t Code_Len8[64] = {
690 : edgomez 1139 11,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
691 :     30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
692 : chl 1011 static const uint8_t Code_Len9[64] = {
693 : edgomez 1139 12,21,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
694 :     30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
695 : chl 1011 static const uint8_t Code_Len10[64] = {
696 : edgomez 1139 12,20,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
697 :     30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
698 : chl 1011 static const uint8_t Code_Len11[64] = {
699 : edgomez 1139 12,19,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
700 :     30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
701 : chl 1011 static const uint8_t Code_Len12[64] = {
702 : edgomez 1139 11,17,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
703 :     30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
704 : chl 1011 static const uint8_t Code_Len13[64] = {
705 : edgomez 1139 11,15,21,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
706 :     30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
707 : chl 1011 static const uint8_t Code_Len14[64] = {
708 : edgomez 1139 10,12,19,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
709 :     30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
710 : chl 1011 static const uint8_t Code_Len15[64] = {
711 : edgomez 1139 10,13,17,19,21,21,21,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
712 :     30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
713 : chl 1011 static const uint8_t Code_Len16[64] = {
714 : edgomez 1139 9,12,13,18,18,19,19,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
715 :     30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30};
716 : chl 1011 static const uint8_t Code_Len17[64] = {
717 : edgomez 1139 8,11,13,14,14,14,15,19,19,19,21,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
718 :     30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
719 : chl 1011 static const uint8_t Code_Len18[64] = {
720 : edgomez 1139 7, 9,11,11,13,13,13,15,15,15,16,22,22,22,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
721 :     30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
722 : chl 1011 static const uint8_t Code_Len19[64] = {
723 : edgomez 1139 5, 7, 9,10,10,11,11,11,11,11,13,14,16,17,17,18,18,18,18,18,18,18,18,20,20,21,21,30,30,30,30,30,
724 :     30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
725 : chl 1011 static const uint8_t Code_Len20[64] = {
726 : edgomez 1139 3, 4, 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9,10,10,10,10,10,10,10,10,12,12,13,13,12,13,14,15,15,
727 :     15,16,16,16,16,17,17,17,18,18,19,19,19,19,19,19,19,19,21,21,22,22,30,30,30,30,30,30,30,30,30,30 };
728 : chl 1011
729 : edgomez 1139 /* a few more table for LAST table: */
730 : chl 1011 static const uint8_t Code_Len21[64] = {
731 : edgomez 1139 13,20,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
732 :     30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30};
733 : chl 1011 static const uint8_t Code_Len22[64] = {
734 : edgomez 1139 12,15,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
735 :     30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30};
736 : chl 1011 static const uint8_t Code_Len23[64] = {
737 : edgomez 1139 10,12,15,15,15,16,16,16,16,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,20,20,20,
738 :     20,21,21,21,21,21,21,21,21,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30};
739 : chl 1011 static const uint8_t Code_Len24[64] = {
740 : edgomez 1139 5, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,10,10,10,10,10,10,10,10,11,11,11,11,12,12,12,
741 :     12,13,13,13,13,13,13,13,13,14,16,16,16,16,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,19,19};
742 : chl 1011
743 :    
744 : edgomez 1053 static const uint8_t * const B16_17_Code_Len[24] = { /* levels [1..24] */
745 : edgomez 1139 Code_Len20,Code_Len19,Code_Len18,Code_Len17,
746 :     Code_Len16,Code_Len15,Code_Len14,Code_Len13,
747 :     Code_Len12,Code_Len11,Code_Len10,Code_Len9,
748 :     Code_Len8, Code_Len7 ,Code_Len6 ,Code_Len5,
749 :     Code_Len4, Code_Len3, Code_Len3 ,Code_Len2,
750 :     Code_Len2, Code_Len1, Code_Len1, Code_Len1,
751 : chl 1011 };
752 :    
753 : edgomez 1053 static const uint8_t * const B16_17_Code_Len_Last[6] = { /* levels [1..6] */
754 : edgomez 1161 Code_Len24,Code_Len23,Code_Len22,Code_Len21, Code_Len3, Code_Len1,
755 : chl 1011 };
756 :    
757 : edgomez 1223 /* TL_SHIFT controls the precision of the RD optimizations in trellis
758 :     * valid range is [10..16]. The bigger, the more trellis is vulnerable
759 :     * to overflows in cost formulas.
760 :     * - 10 allows ac values up to 2^11 == 2048
761 :     * - 16 allows ac values up to 2^8 == 256
762 :     */
763 :     #define TL_SHIFT 11
764 :     #define TL(q) ((0xfe00>>(16-TL_SHIFT))/(q*q))
765 : chl 1011
766 :     static const int Trellis_Lambda_Tabs[31] = {
767 : edgomez 1139 TL( 1),TL( 2),TL( 3),TL( 4),TL( 5),TL( 6), TL( 7),
768 :     TL( 8),TL( 9),TL(10),TL(11),TL(12),TL(13),TL(14), TL(15),
769 :     TL(16),TL(17),TL(18),TL(19),TL(20),TL(21),TL(22), TL(23),
770 :     TL(24),TL(25),TL(26),TL(27),TL(28),TL(29),TL(30), TL(31)
771 : chl 1011 };
772 :     #undef TL
773 :    
774 : edgomez 1139 static int __inline
775 :     Find_Last(const int16_t *C, const uint16_t *Zigzag, int i)
776 : chl 1011 {
777 : edgomez 1139 while(i>=0)
778 :     if (C[Zigzag[i]])
779 :     return i;
780 :     else i--;
781 :     return -1;
782 : chl 1011 }
783 :    
784 : edgomez 1139 static int __inline
785 :     Compute_Sum(const int16_t *C, int last)
786 :     {
787 :     int sum = 0;
788 :    
789 :     while(last--)
790 :     sum += abs(C[last]);
791 :    
792 :     return(sum);
793 :     }
794 : edgomez 1223
795 : edgomez 1053 /* this routine has been strippen of all debug code */
796 : edgomez 1161 static int
797 : edgomez 1223 dct_quantize_trellis_c(int16_t *const Out,
798 :     const int16_t *const In,
799 :     int Q,
800 :     const uint16_t * const Zigzag,
801 :     const uint16_t * const QuantMatrix,
802 :     int Non_Zero)
803 : chl 1012 {
804 :    
805 : edgomez 1224 /* Note: We should search last non-zero coeffs on *real* DCT input coeffs
806 :     * (In[]), not quantized one (Out[]). However, it only improves the result
807 :     * *very* slightly (~0.01dB), whereas speed drops to crawling level :)
808 :     * Well, actually, taking 1 more coeff past Non_Zero into account sometimes
809 :     * helps. */
810 : edgomez 1139 typedef struct { int16_t Run, Level; } NODE;
811 : edgomez 1161
812 : edgomez 1139 NODE Nodes[65], Last;
813 :     uint32_t Run_Costs0[64+1];
814 :     uint32_t * const Run_Costs = Run_Costs0 + 1;
815 : edgomez 1223
816 : edgomez 1224 /* it's 1/lambda, actually */
817 :     const int Lambda = Trellis_Lambda_Tabs[Q-1];
818 : chl 1012
819 : edgomez 1139 int Run_Start = -1;
820 : edgomez 1223 uint32_t Min_Cost = 2<<TL_SHIFT;
821 : chl 1012
822 : edgomez 1139 int Last_Node = -1;
823 :     uint32_t Last_Cost = 0;
824 : chl 1012
825 : edgomez 1139 int i, j, sum;
826 : chl 1012
827 : edgomez 1224 /* source (w/ CBP penalty) */
828 :     Run_Costs[-1] = 2<<TL_SHIFT;
829 :    
830 : edgomez 1139 Non_Zero = Find_Last(Out, Zigzag, Non_Zero);
831 :     if (Non_Zero<0)
832 :     return 0; /* Sum is zero if there are only zero coeffs */
833 : chl 1012
834 : edgomez 1139 for(i=0; i<=Non_Zero; i++) {
835 : edgomez 1223 const int q = ((Q*QuantMatrix[Zigzag[i]])>>4);
836 :     const int Mult = 2*q;
837 :     const int Bias = (q-1) | 1;
838 :     const int Lev0 = Mult + Bias;
839 :    
840 : edgomez 1139 const int AC = In[Zigzag[i]];
841 :     const int Level1 = Out[Zigzag[i]];
842 : edgomez 1223 const unsigned int Dist0 = Lambda* AC*AC;
843 : edgomez 1139 uint32_t Best_Cost = 0xf0000000;
844 :     Last_Cost += Dist0;
845 : chl 1012
846 : edgomez 1139 /* very specialized loop for -1,0,+1 */
847 :     if ((uint32_t)(Level1+1)<3) {
848 :     int dQ;
849 :     int Run;
850 :     uint32_t Cost0;
851 : chl 1012
852 : edgomez 1139 if (AC<0) {
853 :     Nodes[i].Level = -1;
854 :     dQ = Lev0 + AC;
855 :     } else {
856 :     Nodes[i].Level = 1;
857 :     dQ = Lev0 - AC;
858 :     }
859 :     Cost0 = Lambda*dQ*dQ;
860 : edgomez 1161
861 : edgomez 1139 Nodes[i].Run = 1;
862 : edgomez 1223 Best_Cost = (Code_Len20[0]<<TL_SHIFT) + Run_Costs[i-1]+Cost0;
863 : edgomez 1139 for(Run=i-Run_Start; Run>0; --Run) {
864 :     const uint32_t Cost_Base = Cost0 + Run_Costs[i-Run];
865 : edgomez 1223 const uint32_t Cost = Cost_Base + (Code_Len20[Run-1]<<TL_SHIFT);
866 :     const uint32_t lCost = Cost_Base + (Code_Len24[Run-1]<<TL_SHIFT);
867 : chl 1012
868 : edgomez 1224 /* TODO: what about tie-breaks? Should we favor short runs or
869 : edgomez 1139 * long runs? Although the error is the same, it would not be
870 : edgomez 1224 * spread the same way along high and low frequencies... */
871 : chl 1012
872 : edgomez 1224 /* Gruel: I'd say, favour short runs => hifreq errors (HVS) */
873 : chl 1012
874 : edgomez 1139 if (Cost<Best_Cost) {
875 : edgomez 1224 Best_Cost = Cost;
876 : edgomez 1139 Nodes[i].Run = Run;
877 :     }
878 : chl 1012
879 : edgomez 1139 if (lCost<Last_Cost) {
880 :     Last_Cost = lCost;
881 :     Last.Run = Run;
882 :     Last_Node = i;
883 :     }
884 :     }
885 : edgomez 1161 if (Last_Node==i)
886 : edgomez 1139 Last.Level = Nodes[i].Level;
887 : edgomez 1224 } else if (51U>(uint32_t)(Level1+25)) {
888 :     /* "big" levels (not less than ESC3, though) */
889 : edgomez 1139 const uint8_t *Tbl_L1, *Tbl_L2, *Tbl_L1_Last, *Tbl_L2_Last;
890 :     int Level2;
891 :     int dQ1, dQ2;
892 :     int Run;
893 :     uint32_t Dist1,Dist2;
894 :     int dDist21;
895 : edgomez 1161
896 : edgomez 1139 if (Level1>1) {
897 :     dQ1 = Level1*Mult-AC + Bias;
898 :     dQ2 = dQ1 - Mult;
899 :     Level2 = Level1-1;
900 : edgomez 1224 Tbl_L1 = (Level1<=24) ? B16_17_Code_Len[Level1-1] : Code_Len0;
901 :     Tbl_L2 = (Level2<=24) ? B16_17_Code_Len[Level2-1] : Code_Len0;
902 : edgomez 1139 Tbl_L1_Last = (Level1<=6) ? B16_17_Code_Len_Last[Level1-1] : Code_Len0;
903 :     Tbl_L2_Last = (Level2<=6) ? B16_17_Code_Len_Last[Level2-1] : Code_Len0;
904 :     } else { /* Level1<-1 */
905 :     dQ1 = Level1*Mult-AC - Bias;
906 :     dQ2 = dQ1 + Mult;
907 :     Level2 = Level1 + 1;
908 : edgomez 1224 Tbl_L1 = (Level1>=-24) ? B16_17_Code_Len[Level1^-1] : Code_Len0;
909 :     Tbl_L2 = (Level2>=-24) ? B16_17_Code_Len[Level2^-1] : Code_Len0;
910 : edgomez 1139 Tbl_L1_Last = (Level1>=- 6) ? B16_17_Code_Len_Last[Level1^-1] : Code_Len0;
911 :     Tbl_L2_Last = (Level2>=- 6) ? B16_17_Code_Len_Last[Level2^-1] : Code_Len0;
912 :     }
913 : chl 1012
914 : edgomez 1139 Dist1 = Lambda*dQ1*dQ1;
915 :     Dist2 = Lambda*dQ2*dQ2;
916 :     dDist21 = Dist2-Dist1;
917 : chl 1012
918 : edgomez 1139 for(Run=i-Run_Start; Run>0; --Run)
919 :     {
920 :     const uint32_t Cost_Base = Dist1 + Run_Costs[i-Run];
921 :     uint32_t Cost1, Cost2;
922 :     int bLevel;
923 : chl 1012
924 : edgomez 1224 /* for sub-optimal (but slightly worth it, speed-wise) search,
925 :     * uncomment the following:
926 :     * if (Cost_Base>=Best_Cost) continue;
927 :     * (? doesn't seem to have any effect -- gruel ) */
928 : chl 1012
929 : edgomez 1223 Cost1 = Cost_Base + (Tbl_L1[Run-1]<<TL_SHIFT);
930 :     Cost2 = Cost_Base + (Tbl_L2[Run-1]<<TL_SHIFT) + dDist21;
931 : chl 1012
932 : edgomez 1161 if (Cost2<Cost1) {
933 :     Cost1 = Cost2;
934 :     bLevel = Level2;
935 : edgomez 1139 } else {
936 :     bLevel = Level1;
937 :     }
938 : chl 1012
939 : edgomez 1139 if (Cost1<Best_Cost) {
940 :     Best_Cost = Cost1;
941 :     Nodes[i].Run = Run;
942 :     Nodes[i].Level = bLevel;
943 :     }
944 : chl 1012
945 : edgomez 1223 Cost1 = Cost_Base + (Tbl_L1_Last[Run-1]<<TL_SHIFT);
946 :     Cost2 = Cost_Base + (Tbl_L2_Last[Run-1]<<TL_SHIFT) + dDist21;
947 : edgomez 1139
948 : edgomez 1161 if (Cost2<Cost1) {
949 :     Cost1 = Cost2;
950 :     bLevel = Level2;
951 : edgomez 1139 } else {
952 :     bLevel = Level1;
953 :     }
954 : edgomez 1161
955 : edgomez 1139 if (Cost1<Last_Cost) {
956 :     Last_Cost = Cost1;
957 :     Last.Run = Run;
958 :     Last.Level = bLevel;
959 :     Last_Node = i;
960 :     }
961 :     } /* end of "for Run" */
962 : edgomez 1224 } else {
963 :     /* Very very high levels, with no chance of being optimizable
964 :     * => Simply pick best Run. */
965 :     int Run;
966 :     for(Run=i-Run_Start; Run>0; --Run) {
967 :     /* 30 bits + no distortion */
968 :     const uint32_t Cost = (30<<TL_SHIFT) + Run_Costs[i-Run];
969 :     if (Cost<Best_Cost) {
970 :     Best_Cost = Cost;
971 :     Nodes[i].Run = Run;
972 :     Nodes[i].Level = Level1;
973 :     }
974 : chl 1012
975 : edgomez 1224 if (Cost<Last_Cost) {
976 :     Last_Cost = Cost;
977 :     Last.Run = Run;
978 :     Last.Level = Level1;
979 :     Last_Node = i;
980 :     }
981 :     }
982 : edgomez 1139 }
983 : chl 1012
984 : edgomez 1224
985 : edgomez 1139 Run_Costs[i] = Best_Cost;
986 : chl 1012
987 : edgomez 1139 if (Best_Cost < Min_Cost + Dist0) {
988 :     Min_Cost = Best_Cost;
989 :     Run_Start = i;
990 :     } else {
991 : edgomez 1224 /* as noticed by Michael Niedermayer (michaelni at gmx.at),
992 :     * there's a code shorter by 1 bit for a larger run (!), same
993 :     * level. We give it a chance by not moving the left barrier too
994 :     * much. */
995 : edgomez 1223 while( Run_Costs[Run_Start]>Min_Cost+(1<<TL_SHIFT) )
996 : edgomez 1139 Run_Start++;
997 : chl 1012
998 : edgomez 1224 /* spread on preceding coeffs the cost incurred by skipping this
999 :     * one */
1000 : edgomez 1139 for(j=Run_Start; j<i; ++j) Run_Costs[j] += Dist0;
1001 :     Min_Cost += Dist0;
1002 :     }
1003 :     }
1004 : chl 1012
1005 : edgomez 1224 /* It seems trellis doesn't give good results... just compute the Out sum
1006 :     * and quit */
1007 : edgomez 1139 if (Last_Node<0)
1008 :     return Compute_Sum(Out, Non_Zero);
1009 : chl 1012
1010 : edgomez 1139 /* reconstruct optimal sequence backward with surviving paths */
1011 :     memset(Out, 0x00, 64*sizeof(*Out));
1012 :     Out[Zigzag[Last_Node]] = Last.Level;
1013 :     i = Last_Node - Last.Run;
1014 :     sum = 0;
1015 :     while(i>=0) {
1016 :     Out[Zigzag[i]] = Nodes[i].Level;
1017 :     sum += abs(Nodes[i].Level);
1018 :     i -= Nodes[i].Run;
1019 :     }
1020 :    
1021 :     return sum;
1022 : chl 1012 }
1023 :    
1024 : edgomez 1053 /* original version including heavy debugging info */
1025 : chl 1012
1026 :     #ifdef DBGTRELL
1027 :    
1028 : chl 1011 #define DBG 0
1029 :    
1030 : suxen_drol 1014 static __inline uint32_t Evaluate_Cost(const int16_t *C, int Mult, int Bias,
1031 : edgomez 1139 const uint16_t * Zigzag, int Max, int Lambda)
1032 : chl 1011 {
1033 :     #if (DBG>0)
1034 : edgomez 1139 const int16_t * const Ref = C + 6*64;
1035 :     int Last = Max;
1036 :     int Bits = 0;
1037 :     int Dist = 0;
1038 : edgomez 1161 int i;
1039 : edgomez 1139 uint32_t Cost;
1040 : edgomez 1161
1041 :     while(Last>=0 && C[Zigzag[Last]]==0)
1042 : edgomez 1139 Last--;
1043 : edgomez 1161
1044 : edgomez 1139 if (Last>=0) {
1045 :     int j=0, j0=0;
1046 :     int Run, Level;
1047 : chl 1012
1048 : edgomez 1139 Bits = 2; /* CBP */
1049 :     while(j<Last) {
1050 : edgomez 1161 while(!C[Zigzag[j]])
1051 : edgomez 1139 j++;
1052 : edgomez 1161 if (j==Last)
1053 : edgomez 1139 break;
1054 :     Level=C[Zigzag[j]];
1055 :     Run = j - j0;
1056 :     j0 = ++j;
1057 : edgomez 1161 if (Level>=-24 && Level<=24)
1058 : edgomez 1139 Bits += B16_17_Code_Len[(Level<0) ? -Level-1 : Level-1][Run];
1059 : edgomez 1161 else
1060 : edgomez 1139 Bits += 30;
1061 :     }
1062 :     Level = C[Zigzag[Last]];
1063 :     Run = j - j0;
1064 : edgomez 1161 if (Level>=-6 && Level<=6)
1065 : edgomez 1139 Bits += B16_17_Code_Len_Last[(Level<0) ? -Level-1 : Level-1][Run];
1066 : edgomez 1161 else
1067 : chl 1012 Bits += 30;
1068 : edgomez 1139 }
1069 : chl 1011
1070 : edgomez 1139 for(i=0; i<=Last; ++i) {
1071 :     int V = C[Zigzag[i]]*Mult;
1072 : edgomez 1161 if (V>0)
1073 : edgomez 1139 V += Bias;
1074 : edgomez 1161 else
1075 :     if (V<0)
1076 : edgomez 1139 V -= Bias;
1077 :     V -= Ref[Zigzag[i]];
1078 :     Dist += V*V;
1079 :     }
1080 : edgomez 1223 Cost = Lambda*Dist + (Bits<<TL_SHIFT);
1081 : edgomez 1139 if (DBG==1)
1082 :     printf( " Last:%2d/%2d Cost = [(Bits=%5.0d) + Lambda*(Dist=%6.0d) = %d ] >>12= %d ", Last,Max, Bits, Dist, Cost, Cost>>12 );
1083 :     return Cost;
1084 : chl 1011
1085 :     #else
1086 : edgomez 1139 return 0;
1087 : chl 1011 #endif
1088 :     }
1089 :    
1090 :    
1091 : edgomez 1161 static int
1092 : chl 1011 dct_quantize_trellis_h263_c(int16_t *const Out, const int16_t *const In, int Q, const uint16_t * const Zigzag, int Non_Zero)
1093 :     {
1094 :    
1095 : edgomez 1053 /*
1096 :     * Note: We should search last non-zero coeffs on *real* DCT input coeffs (In[]),
1097 :     * not quantized one (Out[]). However, it only improves the result *very*
1098 :     * slightly (~0.01dB), whereas speed drops to crawling level :)
1099 :     * Well, actually, taking 1 more coeff past Non_Zero into account sometimes helps.
1100 :     */
1101 : edgomez 1139 typedef struct { int16_t Run, Level; } NODE;
1102 : edgomez 1161
1103 : edgomez 1139 NODE Nodes[65], Last;
1104 :     uint32_t Run_Costs0[64+1];
1105 :     uint32_t * const Run_Costs = Run_Costs0 + 1;
1106 :     const int Mult = 2*Q;
1107 :     const int Bias = (Q-1) | 1;
1108 :     const int Lev0 = Mult + Bias;
1109 :     const int Lambda = Trellis_Lambda_Tabs[Q-1]; /* it's 1/lambda, actually */
1110 : chl 1011
1111 : edgomez 1139 int Run_Start = -1;
1112 : edgomez 1223 Run_Costs[-1] = 2<<TL_SHIFT; /* source (w/ CBP penalty) */
1113 :     uint32_t Min_Cost = 2<<TL_SHIFT;
1114 : chl 1011
1115 : edgomez 1139 int Last_Node = -1;
1116 :     uint32_t Last_Cost = 0;
1117 : chl 1011
1118 : edgomez 1139 int i, j;
1119 : chl 1012
1120 : chl 1011 #if (DBG>0)
1121 : edgomez 1139 Last.Level = 0; Last.Run = -1; /* just initialize to smthg */
1122 : chl 1011 #endif
1123 :    
1124 : edgomez 1139 Non_Zero = Find_Last(Out, Zigzag, Non_Zero);
1125 :     if (Non_Zero<0)
1126 : edgomez 1161 return -1;
1127 : chl 1011
1128 : edgomez 1139 for(i=0; i<=Non_Zero; i++)
1129 :     {
1130 :     const int AC = In[Zigzag[i]];
1131 :     const int Level1 = Out[Zigzag[i]];
1132 :     const int Dist0 = Lambda* AC*AC;
1133 :     uint32_t Best_Cost = 0xf0000000;
1134 :     Last_Cost += Dist0;
1135 : chl 1011
1136 : edgomez 1139 if ((uint32_t)(Level1+1)<3) /* very specialized loop for -1,0,+1 */
1137 :     {
1138 :     int dQ;
1139 :     int Run;
1140 :     uint32_t Cost0;
1141 : chl 1011
1142 : edgomez 1139 if (AC<0) {
1143 :     Nodes[i].Level = -1;
1144 :     dQ = Lev0 + AC;
1145 :     } else {
1146 :     Nodes[i].Level = 1;
1147 :     dQ = Lev0 - AC;
1148 :     }
1149 :     Cost0 = Lambda*dQ*dQ;
1150 : edgomez 1161
1151 : edgomez 1139 Nodes[i].Run = 1;
1152 : edgomez 1223 Best_Cost = (Code_Len20[0]<<TL_SHIFT) + Run_Costs[i-1]+Cost0;
1153 : edgomez 1139 for(Run=i-Run_Start; Run>0; --Run)
1154 :     {
1155 :     const uint32_t Cost_Base = Cost0 + Run_Costs[i-Run];
1156 : edgomez 1223 const uint32_t Cost = Cost_Base + (Code_Len20[Run-1]<<TL_SHIFT);
1157 :     const uint32_t lCost = Cost_Base + (Code_Len24[Run-1]<<TL_SHIFT);
1158 : chl 1012
1159 : edgomez 1139 /*
1160 :     * TODO: what about tie-breaks? Should we favor short runs or
1161 :     * long runs? Although the error is the same, it would not be
1162 :     * spread the same way along high and low frequencies...
1163 :     */
1164 :     if (Cost<Best_Cost) {
1165 :     Best_Cost = Cost;
1166 :     Nodes[i].Run = Run;
1167 :     }
1168 : chl 1012
1169 : edgomez 1139 if (lCost<Last_Cost) {
1170 :     Last_Cost = lCost;
1171 :     Last.Run = Run;
1172 :     Last_Node = i;
1173 :     }
1174 :     }
1175 : edgomez 1161 if (Last_Node==i)
1176 : edgomez 1139 Last.Level = Nodes[i].Level;
1177 : chl 1011
1178 : edgomez 1139 if (DBG==1) {
1179 :     Run_Costs[i] = Best_Cost;
1180 :     printf( "Costs #%2d: ", i);
1181 :     for(j=-1;j<=Non_Zero;++j) {
1182 :     if (j==Run_Start) printf( " %3.0d|", Run_Costs[j]>>12 );
1183 :     else if (j>Run_Start && j<i) printf( " %3.0d|", Run_Costs[j]>>12 );
1184 :     else if (j==i) printf( "(%3.0d)", Run_Costs[j]>>12 );
1185 :     else printf( " - |" );
1186 :     }
1187 :     printf( "<%3.0d %2d %d>", Min_Cost>>12, Nodes[i].Level, Nodes[i].Run );
1188 :     printf( " Last:#%2d {%3.0d %2d %d}", Last_Node, Last_Cost>>12, Last.Level, Last.Run );
1189 :     printf( " AC:%3.0d Dist0:%3d Dist(%d)=%d", AC, Dist0>>12, Nodes[i].Level, Cost0>>12 );
1190 :     printf( "\n" );
1191 :     }
1192 :     }
1193 :     else /* "big" levels */
1194 :     {
1195 :     const uint8_t *Tbl_L1, *Tbl_L2, *Tbl_L1_Last, *Tbl_L2_Last;
1196 :     int Level2;
1197 :     int dQ1, dQ2;
1198 :     int Run;
1199 :     uint32_t Dist1,Dist2;
1200 :     int dDist21;
1201 : edgomez 1161
1202 : edgomez 1139 if (Level1>1) {
1203 :     dQ1 = Level1*Mult-AC + Bias;
1204 :     dQ2 = dQ1 - Mult;
1205 :     Level2 = Level1-1;
1206 :     Tbl_L1 = (Level1<=24) ? B16_17_Code_Len[Level1-1] : Code_Len0;
1207 :     Tbl_L2 = (Level2<=24) ? B16_17_Code_Len[Level2-1] : Code_Len0;
1208 :     Tbl_L1_Last = (Level1<=6) ? B16_17_Code_Len_Last[Level1-1] : Code_Len0;
1209 :     Tbl_L2_Last = (Level2<=6) ? B16_17_Code_Len_Last[Level2-1] : Code_Len0;
1210 :     } else { /* Level1<-1 */
1211 :     dQ1 = Level1*Mult-AC - Bias;
1212 :     dQ2 = dQ1 + Mult;
1213 :     Level2 = Level1 + 1;
1214 :     Tbl_L1 = (Level1>=-24) ? B16_17_Code_Len[Level1^-1] : Code_Len0;
1215 :     Tbl_L2 = (Level2>=-24) ? B16_17_Code_Len[Level2^-1] : Code_Len0;
1216 :     Tbl_L1_Last = (Level1>=- 6) ? B16_17_Code_Len_Last[Level1^-1] : Code_Len0;
1217 :     Tbl_L2_Last = (Level2>=- 6) ? B16_17_Code_Len_Last[Level2^-1] : Code_Len0;
1218 :     }
1219 :     Dist1 = Lambda*dQ1*dQ1;
1220 :     Dist2 = Lambda*dQ2*dQ2;
1221 :     dDist21 = Dist2-Dist1;
1222 : chl 1011
1223 : edgomez 1139 for(Run=i-Run_Start; Run>0; --Run)
1224 :     {
1225 :     const uint32_t Cost_Base = Dist1 + Run_Costs[i-Run];
1226 :     uint32_t Cost1, Cost2;
1227 :     int bLevel;
1228 : chl 1011
1229 : edgomez 1053 /*
1230 :     * for sub-optimal (but slightly worth it, speed-wise) search, uncomment the following:
1231 :     * if (Cost_Base>=Best_Cost) continue;
1232 :     */
1233 : edgomez 1223 Cost1 = Cost_Base + (Tbl_L1[Run-1]<<TL_SHIFT);
1234 :     Cost2 = Cost_Base + (Tbl_L2[Run-1]<<TL_SHIFT) + dDist21;
1235 : chl 1011
1236 : edgomez 1161 if (Cost2<Cost1) {
1237 :     Cost1 = Cost2;
1238 :     bLevel = Level2;
1239 :     } else
1240 : edgomez 1139 bLevel = Level1;
1241 : chl 1011
1242 : edgomez 1139 if (Cost1<Best_Cost) {
1243 :     Best_Cost = Cost1;
1244 :     Nodes[i].Run = Run;
1245 :     Nodes[i].Level = bLevel;
1246 :     }
1247 : chl 1011
1248 : edgomez 1223 Cost1 = Cost_Base + (Tbl_L1_Last[Run-1]<<TL_SHIFT);
1249 :     Cost2 = Cost_Base + (Tbl_L2_Last[Run-1]<<TL_SHIFT) + dDist21;
1250 : chl 1011
1251 : edgomez 1161 if (Cost2<Cost1) {
1252 :     Cost1 = Cost2;
1253 :     bLevel = Level2;
1254 :     } else
1255 : edgomez 1139 bLevel = Level1;
1256 : edgomez 1161
1257 : edgomez 1139 if (Cost1<Last_Cost) {
1258 :     Last_Cost = Cost1;
1259 :     Last.Run = Run;
1260 :     Last.Level = bLevel;
1261 :     Last_Node = i;
1262 :     }
1263 :     } /* end of "for Run" */
1264 : chl 1011
1265 : edgomez 1139 if (DBG==1) {
1266 :     Run_Costs[i] = Best_Cost;
1267 :     printf( "Costs #%2d: ", i);
1268 :     for(j=-1;j<=Non_Zero;++j) {
1269 :     if (j==Run_Start) printf( " %3.0d|", Run_Costs[j]>>12 );
1270 :     else if (j>Run_Start && j<i) printf( " %3.0d|", Run_Costs[j]>>12 );
1271 :     else if (j==i) printf( "(%3.0d)", Run_Costs[j]>>12 );
1272 :     else printf( " - |" );
1273 :     }
1274 :     printf( "<%3.0d %2d %d>", Min_Cost>>12, Nodes[i].Level, Nodes[i].Run );
1275 :     printf( " Last:#%2d {%3.0d %2d %d}", Last_Node, Last_Cost>>12, Last.Level, Last.Run );
1276 :     printf( " AC:%3.0d Dist0:%3d Dist(%2d):%3d Dist(%2d):%3d", AC, Dist0>>12, Level1, Dist1>>12, Level2, Dist2>>12 );
1277 :     printf( "\n" );
1278 :     }
1279 :     }
1280 : chl 1011
1281 : edgomez 1139 Run_Costs[i] = Best_Cost;
1282 : chl 1011
1283 : edgomez 1139 if (Best_Cost < Min_Cost + Dist0) {
1284 :     Min_Cost = Best_Cost;
1285 :     Run_Start = i;
1286 :     }
1287 :     else
1288 :     {
1289 :     /*
1290 :     * as noticed by Michael Niedermayer (michaelni at gmx.at), there's
1291 :     * a code shorter by 1 bit for a larger run (!), same level. We give
1292 :     * it a chance by not moving the left barrier too much.
1293 :     */
1294 : chl 1012
1295 : edgomez 1223 while( Run_Costs[Run_Start]>Min_Cost+(1<<TL_SHIFT) )
1296 : edgomez 1139 Run_Start++;
1297 : chl 1011
1298 : edgomez 1139 /* spread on preceding coeffs the cost incurred by skipping this one */
1299 :     for(j=Run_Start; j<i; ++j) Run_Costs[j] += Dist0;
1300 :     Min_Cost += Dist0;
1301 :     }
1302 :     }
1303 : chl 1011
1304 : edgomez 1139 if (DBG) {
1305 :     Last_Cost = Evaluate_Cost(Out,Mult,Bias, Zigzag,Non_Zero, Lambda);
1306 :     if (DBG==1) {
1307 :     printf( "=> " );
1308 :     for(i=0; i<=Non_Zero; ++i) printf( "[%3.0d] ", Out[Zigzag[i]] );
1309 :     printf( "\n" );
1310 :     }
1311 :     }
1312 : chl 1011
1313 : edgomez 1139 if (Last_Node<0)
1314 :     return -1;
1315 : chl 1011
1316 : edgomez 1139 /* reconstruct optimal sequence backward with surviving paths */
1317 :     memset(Out, 0x00, 64*sizeof(*Out));
1318 :     Out[Zigzag[Last_Node]] = Last.Level;
1319 :     i = Last_Node - Last.Run;
1320 :     while(i>=0) {
1321 :     Out[Zigzag[i]] = Nodes[i].Level;
1322 :     i -= Nodes[i].Run;
1323 :     }
1324 : chl 1011
1325 : edgomez 1139 if (DBG) {
1326 :     uint32_t Cost = Evaluate_Cost(Out,Mult,Bias, Zigzag,Non_Zero, Lambda);
1327 :     if (DBG==1) {
1328 : edgomez 1161 printf( "<= " );
1329 : edgomez 1139 for(i=0; i<=Last_Node; ++i) printf( "[%3.0d] ", Out[Zigzag[i]] );
1330 :     printf( "\n--------------------------------\n" );
1331 :     }
1332 :     if (Cost>Last_Cost) printf( "!!! %u > %u\n", Cost, Last_Cost );
1333 :     }
1334 :     return Last_Node;
1335 : chl 1011 }
1336 :    
1337 :     #undef DBG
1338 : chl 1012
1339 :     #endif

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4