[svn] / branches / dev-api-4 / xvidcore / src / utils / mbtransquant.c Repository:
ViewVC logotype

Diff of /branches/dev-api-4/xvidcore/src/utils/mbtransquant.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1014, Mon May 12 12:33:16 2003 UTC revision 1230, Sun Nov 30 16:13:16 2003 UTC
# Line 21  Line 21 
21   *  along with this program ; if not, write to the Free Software   *  along with this program ; if not, write to the Free Software
22   *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA   *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
23   *   *
24   * $Id: mbtransquant.c,v 1.21.2.12 2003-05-12 12:33:16 suxen_drol Exp $   * $Id: mbtransquant.c,v 1.21.2.21 2003-11-30 16:13:16 edgomez Exp $
25   *   *
26   ****************************************************************************/   ****************************************************************************/
27    
# Line 39  Line 39 
39  #include "../bitstream/zigzag.h"  #include "../bitstream/zigzag.h"
40  #include "../dct/fdct.h"  #include "../dct/fdct.h"
41  #include "../dct/idct.h"  #include "../dct/idct.h"
42  #include "../quant/quant_mpeg4.h"  #include "../quant/quant.h"
 #include "../quant/quant_h263.h"  
43  #include "../encoder.h"  #include "../encoder.h"
44    
45  #include "../image/reduced.h"  #include "../image/reduced.h"
46    #include  "../quant/quant_matrix.h"
47    
48  MBFIELDTEST_PTR MBFieldTest;  MBFIELDTEST_PTR MBFieldTest;
49    
# Line 123  Line 123 
123                           int16_t qcoeff[6 * 64],                           int16_t qcoeff[6 * 64],
124                           int16_t data[6*64])                           int16_t data[6*64])
125  {  {
126          int i;          int mpeg;
127            int scaler_lum, scaler_chr;
128    
129          for (i = 0; i < 6; i++) {          quant_intraFuncPtr const quant[2] =
130                  uint32_t iDcScaler = get_dc_scaler(pMB->quant, i < 4);                  {
131                            quant_h263_intra,
132                            quant_mpeg_intra
133                    };
134    
135            mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT);
136            scaler_lum = get_dc_scaler(pMB->quant, 1);
137            scaler_chr = get_dc_scaler(pMB->quant, 0);
138    
139                  /* Quantize the block */                  /* Quantize the block */
140                  start_timer();                  start_timer();
141                  if (!(pParam->vol_flags & XVID_VOL_MPEGQUANT)) {          quant[mpeg](&data[0 * 64], &qcoeff[0 * 64], pMB->quant, scaler_lum, pParam->mpeg_quant_matrices);
142                          quant_intra(&data[i * 64], &qcoeff[i * 64], pMB->quant, iDcScaler);          quant[mpeg](&data[1 * 64], &qcoeff[1 * 64], pMB->quant, scaler_lum, pParam->mpeg_quant_matrices);
143                  } else {          quant[mpeg](&data[2 * 64], &qcoeff[2 * 64], pMB->quant, scaler_lum, pParam->mpeg_quant_matrices);
144                          quant4_intra(&data[i * 64], &qcoeff[i * 64], pMB->quant, iDcScaler);          quant[mpeg](&data[3 * 64], &qcoeff[3 * 64], pMB->quant, scaler_lum, pParam->mpeg_quant_matrices);
145                  }          quant[mpeg](&data[4 * 64], &qcoeff[4 * 64], pMB->quant, scaler_chr, pParam->mpeg_quant_matrices);
146            quant[mpeg](&data[5 * 64], &qcoeff[5 * 64], pMB->quant, scaler_chr, pParam->mpeg_quant_matrices);
147                  stop_quant_timer();                  stop_quant_timer();
148          }          }
 }  
149    
150  /* DeQuantize all blocks -- Intra mode */  /* DeQuantize all blocks -- Intra mode */
151  static __inline void  static __inline void
# Line 146  Line 154 
154                             int16_t qcoeff[6 * 64],                             int16_t qcoeff[6 * 64],
155                             int16_t data[6*64])                             int16_t data[6*64])
156  {  {
157          int i;          int mpeg;
158            int scaler_lum, scaler_chr;
159    
160          for (i = 0; i < 6; i++) {          quant_intraFuncPtr const dequant[2] =
161                  uint32_t iDcScaler = get_dc_scaler(iQuant, i < 4);                  {
162                            dequant_h263_intra,
163                            dequant_mpeg_intra
164                    };
165    
166            mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT);
167            scaler_lum = get_dc_scaler(iQuant, 1);
168            scaler_chr = get_dc_scaler(iQuant, 0);
169    
170                  start_timer();                  start_timer();
171                  if (!(pParam->vol_flags & XVID_VOL_MPEGQUANT))          dequant[mpeg](&qcoeff[0 * 64], &data[0 * 64], iQuant, scaler_lum, pParam->mpeg_quant_matrices);
172                          dequant_intra(&qcoeff[i * 64], &data[i * 64], iQuant, iDcScaler);          dequant[mpeg](&qcoeff[1 * 64], &data[1 * 64], iQuant, scaler_lum, pParam->mpeg_quant_matrices);
173                  else          dequant[mpeg](&qcoeff[2 * 64], &data[2 * 64], iQuant, scaler_lum, pParam->mpeg_quant_matrices);
174                          dequant4_intra(&qcoeff[i * 64], &data[i * 64], iQuant, iDcScaler);          dequant[mpeg](&qcoeff[3 * 64], &data[3 * 64], iQuant, scaler_lum, pParam->mpeg_quant_matrices);
175            dequant[mpeg](&qcoeff[4 * 64], &data[4 * 64], iQuant, scaler_chr, pParam->mpeg_quant_matrices);
176            dequant[mpeg](&qcoeff[5 * 64], &data[5 * 64], iQuant, scaler_chr, pParam->mpeg_quant_matrices);
177                  stop_iquant_timer();                  stop_iquant_timer();
178          }          }
 }  
   
   
 static int  
 dct_quantize_trellis_h263_c(int16_t *const Out, const int16_t *const In, int Q, const uint16_t * const Zigzag, int Non_Zero);  
179    
180  static int  static int
181  dct_quantize_trellis_mpeg_c(int16_t *const Out, const int16_t *const In, int Q, const uint16_t * const Zigzag, int Non_Zero);  dct_quantize_trellis_c(int16_t *const Out,
182                                               const int16_t *const In,
183                                               int Q,
184                                               const uint16_t * const Zigzag,
185                                               const uint16_t * const QuantMatrix,
186                                               int Non_Zero);
187    
188  /* Quantize all blocks -- Inter mode */  /* Quantize all blocks -- Inter mode */
189  static __inline uint8_t  static __inline uint8_t
# Line 182  Line 199 
199          int i;          int i;
200          uint8_t cbp = 0;          uint8_t cbp = 0;
201          int sum;          int sum;
202          int code_block;          int code_block, mpeg;
203    
204            quant_interFuncPtr const quant[2] =
205                    {
206                            quant_h263_inter,
207                            quant_mpeg_inter
208                    };
209    
210            mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT);
211    
212          for (i = 0; i < 6; i++) {          for (i = 0; i < 6; i++) {
213    
214                  /* Quantize the block */                  /* Quantize the block */
215                  start_timer();                  start_timer();
216                  if (!(pParam->vol_flags & XVID_VOL_MPEGQUANT)) {  
217                          sum = quant_inter(&qcoeff[i*64], &data[i*64], pMB->quant);                  sum = quant[mpeg](&qcoeff[i*64], &data[i*64], pMB->quant, pParam->mpeg_quant_matrices);
218                          if ( (sum) && (frame->vop_flags & XVID_VOP_TRELLISQUANT) ) {  
219                                  sum = dct_quantize_trellis_h263_c(&qcoeff[i*64], &data[i*64], pMB->quant, &scan_tables[0][0], 63)+1;                  if(sum && (frame->vop_flags & XVID_VOP_TRELLISQUANT)) {
220                                  limit = 1;                          const uint16_t *matrix;
221                          }                          const static uint16_t h263matrix[] =
222                  } else {                                  {
223                          sum = quant4_inter(&qcoeff[i * 64], &data[i * 64], pMB->quant);                                          16, 16, 16, 16, 16, 16, 16, 16,
224  //                      if ( (sum) && (frame->vop_flags & XVID_VOP_TRELLISQUANT) )                                          16, 16, 16, 16, 16, 16, 16, 16,
225  //                              sum = dct_quantize_trellis_mpeg_c (&qcoeff[i*64], &data[i*64], pMB->quant)+1;                                          16, 16, 16, 16, 16, 16, 16, 16,
226                                            16, 16, 16, 16, 16, 16, 16, 16,
227                                            16, 16, 16, 16, 16, 16, 16, 16,
228                                            16, 16, 16, 16, 16, 16, 16, 16,
229                                            16, 16, 16, 16, 16, 16, 16, 16,
230                                            16, 16, 16, 16, 16, 16, 16, 16
231                                    };
232    
233                            matrix = (mpeg)?get_inter_matrix(pParam->mpeg_quant_matrices):h263matrix;
234                            sum = dct_quantize_trellis_c(&qcoeff[i*64], &data[i*64],
235                                                                                     pMB->quant, &scan_tables[0][0],
236                                                                                     matrix,
237                                                                                     63);
238                  }                  }
239                  stop_quant_timer();                  stop_quant_timer();
240    
# Line 236  Line 273 
273                             int16_t qcoeff[6 * 64],                             int16_t qcoeff[6 * 64],
274                             const uint8_t cbp)                             const uint8_t cbp)
275  {  {
276          int i;          int mpeg;
277    
278            quant_interFuncPtr const dequant[2] =
279                    {
280                            dequant_h263_inter,
281                            dequant_mpeg_inter
282                    };
283    
284            mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT);
285    
         for (i = 0; i < 6; i++) {  
                 if (cbp & (1 << (5 - i))) {  
286                          start_timer();                          start_timer();
287                          if (!(pParam->vol_flags & XVID_VOL_MPEGQUANT))          if(cbp & (1 << (5 - 0))) dequant[mpeg](&data[0 * 64], &qcoeff[0 * 64], iQuant, pParam->mpeg_quant_matrices);
288                                  dequant_inter(&data[i * 64], &qcoeff[i * 64], iQuant);          if(cbp & (1 << (5 - 1))) dequant[mpeg](&data[1 * 64], &qcoeff[1 * 64], iQuant, pParam->mpeg_quant_matrices);
289                          else          if(cbp & (1 << (5 - 2))) dequant[mpeg](&data[2 * 64], &qcoeff[2 * 64], iQuant, pParam->mpeg_quant_matrices);
290                                  dequant4_inter(&data[i * 64], &qcoeff[i * 64], iQuant);          if(cbp & (1 << (5 - 3))) dequant[mpeg](&data[3 * 64], &qcoeff[3 * 64], iQuant, pParam->mpeg_quant_matrices);
291            if(cbp & (1 << (5 - 4))) dequant[mpeg](&data[4 * 64], &qcoeff[4 * 64], iQuant, pParam->mpeg_quant_matrices);
292            if(cbp & (1 << (5 - 5))) dequant[mpeg](&data[5 * 64], &qcoeff[5 * 64], iQuant, pParam->mpeg_quant_matrices);
293                          stop_iquant_timer();                          stop_iquant_timer();
294                  }                  }
         }  
 }  
295    
296  typedef void (transfer_operation_8to16_t) (int16_t *Dst, const uint8_t *Src, int BpS);  typedef void (transfer_operation_8to16_t) (int16_t *Dst, const uint8_t *Src, int BpS);
297  typedef void (transfer_operation_16to8_t) (uint8_t *Dst, const int16_t *Src, int BpS);  typedef void (transfer_operation_16to8_t) (uint8_t *Dst, const int16_t *Src, int BpS);
# Line 266  Line 309 
309          uint32_t stride2 = stride / 2;          uint32_t stride2 = stride / 2;
310          uint32_t next_block = stride * 8;          uint32_t next_block = stride * 8;
311          int32_t cst;          int32_t cst;
312            int vop_reduced;
313          uint8_t *pY_Cur, *pU_Cur, *pV_Cur;          uint8_t *pY_Cur, *pU_Cur, *pV_Cur;
314          const IMAGE * const pCurrent = &frame->image;          const IMAGE * const pCurrent = &frame->image;
315            transfer_operation_8to16_t * const functions[2] =
316                    {
317                            (transfer_operation_8to16_t *)transfer_8to16copy,
318                            (transfer_operation_8to16_t *)filter_18x18_to_8x8
319                    };
320          transfer_operation_8to16_t *transfer_op = NULL;          transfer_operation_8to16_t *transfer_op = NULL;
321    
322          if ((frame->vop_flags & XVID_VOP_REDUCED)) {          vop_reduced = !!(frame->vop_flags & XVID_VOP_REDUCED);
   
                 /* Image pointers */  
                 pY_Cur = pCurrent->y + (y_pos << 5) * stride  + (x_pos << 5);  
                 pU_Cur = pCurrent->u + (y_pos << 4) * stride2 + (x_pos << 4);  
                 pV_Cur = pCurrent->v + (y_pos << 4) * stride2 + (x_pos << 4);  
   
                 /* Block size */  
                 cst = 16;  
   
                 /* Operation function */  
                 transfer_op = (transfer_operation_8to16_t*)filter_18x18_to_8x8;  
         } else {  
323    
324                  /* Image pointers */                  /* Image pointers */
325                  pY_Cur = pCurrent->y + (y_pos << 4) * stride  + (x_pos << 4);          pY_Cur = pCurrent->y + (y_pos << (4+vop_reduced)) * stride  + (x_pos << (4+vop_reduced));
326                  pU_Cur = pCurrent->u + (y_pos << 3) * stride2 + (x_pos << 3);          pU_Cur = pCurrent->u + (y_pos << (3+vop_reduced)) * stride2 + (x_pos << (3+vop_reduced));
327                  pV_Cur = pCurrent->v + (y_pos << 3) * stride2 + (x_pos << 3);          pV_Cur = pCurrent->v + (y_pos << (3+vop_reduced)) * stride2 + (x_pos << (3+vop_reduced));
328    
329                  /* Block size */                  /* Block size */
330                  cst = 8;          cst = 8<<vop_reduced;
331    
332                  /* Operation function */                  /* Operation function */
333                  transfer_op = (transfer_operation_8to16_t*)transfer_8to16copy;          transfer_op = functions[vop_reduced];
         }  
334    
335          /* Do the transfer */          /* Do the transfer */
336          start_timer();          start_timer();
# Line 314  Line 350 
350                           const uint32_t x_pos,                           const uint32_t x_pos,
351                           const uint32_t y_pos,                           const uint32_t y_pos,
352                           int16_t data[6 * 64],                           int16_t data[6 * 64],
353                           const uint32_t add,                           const uint32_t add, /* Must be 1 or 0 */
354                           const uint8_t cbp)                           const uint8_t cbp)
355  {  {
356          uint8_t *pY_Cur, *pU_Cur, *pV_Cur;          uint8_t *pY_Cur, *pU_Cur, *pV_Cur;
# Line 322  Line 358 
358          uint32_t stride2 = stride / 2;          uint32_t stride2 = stride / 2;
359          uint32_t next_block = stride * 8;          uint32_t next_block = stride * 8;
360          uint32_t cst;          uint32_t cst;
361            int vop_reduced;
362          const IMAGE * const pCurrent = &frame->image;          const IMAGE * const pCurrent = &frame->image;
363    
364            /* Array of function pointers, indexed by [vop_reduced<<1+add] */
365            transfer_operation_16to8_t  * const functions[4] =
366                    {
367                            (transfer_operation_16to8_t*)transfer_16to8copy,
368                            (transfer_operation_16to8_t*)transfer_16to8add,
369                            (transfer_operation_16to8_t*)copy_upsampled_8x8_16to8,
370                            (transfer_operation_16to8_t*)add_upsampled_8x8_16to8
371                    };
372    
373          transfer_operation_16to8_t *transfer_op = NULL;          transfer_operation_16to8_t *transfer_op = NULL;
374    
375          if (pMB->field_dct) {          if (pMB->field_dct) {
# Line 330  Line 377 
377                  stride *= 2;                  stride *= 2;
378          }          }
379    
380          if ((frame->vop_flags & XVID_VOP_REDUCED)) {          /* Makes this vars booleans */
381            vop_reduced = !!(frame->vop_flags & XVID_VOP_REDUCED);
                 /* Image pointers */  
                 pY_Cur = pCurrent->y + (y_pos << 5) * stride  + (x_pos << 5);  
                 pU_Cur = pCurrent->u + (y_pos << 4) * stride2 + (x_pos << 4);  
                 pV_Cur = pCurrent->v + (y_pos << 4) * stride2 + (x_pos << 4);  
   
                 /* Block size */  
                 cst = 16;  
   
                 /* Operation function */  
                 if(add)  
                         transfer_op = (transfer_operation_16to8_t*)add_upsampled_8x8_16to8;  
                 else  
                         transfer_op = (transfer_operation_16to8_t*)copy_upsampled_8x8_16to8;  
         } else {  
382    
383                  /* Image pointers */                  /* Image pointers */
384                  pY_Cur = pCurrent->y + (y_pos << 4) * stride  + (x_pos << 4);          pY_Cur = pCurrent->y + (y_pos << (4+vop_reduced)) * stride  + (x_pos << (4+vop_reduced));
385                  pU_Cur = pCurrent->u + (y_pos << 3) * stride2 + (x_pos << 3);          pU_Cur = pCurrent->u + (y_pos << (3+vop_reduced)) * stride2 + (x_pos << (3+vop_reduced));
386                  pV_Cur = pCurrent->v + (y_pos << 3) * stride2 + (x_pos << 3);          pV_Cur = pCurrent->v + (y_pos << (3+vop_reduced)) * stride2 + (x_pos << (3+vop_reduced));
387    
388                  /* Block size */                  /* Block size */
389                  cst = 8;          cst = 8<<vop_reduced;
390    
391                  /* Operation function */                  /* Operation function */
392                  if(add)          transfer_op = functions[(vop_reduced<<1) + add];
                         transfer_op = (transfer_operation_16to8_t*)transfer_16to8add;  
                 else  
                         transfer_op = (transfer_operation_16to8_t*)transfer_16to8copy;  
         }  
393    
394          /* Do the operation */          /* Do the operation */
395          start_timer();          start_timer();
# Line 419  Line 448 
448          uint8_t cbp;          uint8_t cbp;
449          uint32_t limit;          uint32_t limit;
450    
451          /*          /* There is no MBTrans8to16 for Inter block, that's done in motion compensation
452           * There is no MBTrans8to16 for Inter block, that's done in motion compensation           * already */
          * already  
          */  
453    
454          /* Perform DCT (and field decision) */          /* Perform DCT (and field decision) */
455          MBfDCT(pParam, frame, pMB, x_pos, y_pos, data);          MBfDCT(pParam, frame, pMB, x_pos, y_pos, data);
# Line 430  Line 457 
457          /* Set the limit threshold */          /* Set the limit threshold */
458          limit = PVOP_TOOSMALL_LIMIT + ((pMB->quant == 1)? 1 : 0);          limit = PVOP_TOOSMALL_LIMIT + ((pMB->quant == 1)? 1 : 0);
459    
460            if (frame->vop_flags & XVID_VOP_CARTOON)
461                    limit *= 3;
462    
463          /* Quantize the block */          /* Quantize the block */
464          cbp = MBQuantInter(pParam, frame, pMB, data, qcoeff, 0, limit);          cbp = MBQuantInter(pParam, frame, pMB, data, qcoeff, 0, limit);
465    
# Line 457  Line 487 
487          uint8_t cbp;          uint8_t cbp;
488          uint32_t limit;          uint32_t limit;
489    
490          /*          /* There is no MBTrans8to16 for Inter block, that's done in motion compensation
491           * There is no MBTrans8to16 for Inter block, that's done in motion compensation           * already */
          * already  
          */  
492    
493          /* Perform DCT (and field decision) */          /* Perform DCT (and field decision) */
494          MBfDCT(pParam, frame, pMB, x_pos, y_pos, data);          MBfDCT(pParam, frame, pMB, x_pos, y_pos, data);
# Line 468  Line 496 
496          /* Set the limit threshold */          /* Set the limit threshold */
497          limit = BVOP_TOOSMALL_LIMIT;          limit = BVOP_TOOSMALL_LIMIT;
498    
499            if (frame->vop_flags & XVID_VOP_CARTOON)
500                    limit *= 2;
501    
502          /* Quantize the block */          /* Quantize the block */
503          cbp = MBQuantInter(pParam, frame, pMB, data, qcoeff, 1, limit);          cbp = MBQuantInter(pParam, frame, pMB, data, qcoeff, 1, limit);
504    
# Line 475  Line 506 
506           * History comment:           * History comment:
507           * We don't have to DeQuant, iDCT and Transfer back data for B-frames.           * We don't have to DeQuant, iDCT and Transfer back data for B-frames.
508           *           *
509           * BUT some plugins require the original frame to be passed so we have           * BUT some plugins require the rebuilt original frame to be passed so we
510           * to take care of that here           * have to take care of that here
511           */           */
512          if((pParam->plugin_flags & XVID_REQORIGINAL)) {          if((pParam->plugin_flags & XVID_REQORIGINAL)) {
513    
# Line 546  Line 577 
577    
578          /* left blocks */          /* left blocks */
579    
580          // 1=2, 2=4, 4=8, 8=1          /* 1=2, 2=4, 4=8, 8=1 */
581          MOVLINE(tmp, LINE(0, 1));          MOVLINE(tmp, LINE(0, 1));
582          MOVLINE(LINE(0, 1), LINE(0, 2));          MOVLINE(LINE(0, 1), LINE(0, 2));
583          MOVLINE(LINE(0, 2), LINE(0, 4));          MOVLINE(LINE(0, 2), LINE(0, 4));
584          MOVLINE(LINE(0, 4), LINE(2, 0));          MOVLINE(LINE(0, 4), LINE(2, 0));
585          MOVLINE(LINE(2, 0), tmp);          MOVLINE(LINE(2, 0), tmp);
586    
587          // 3=6, 6=12, 12=9, 9=3          /* 3=6, 6=12, 12=9, 9=3 */
588          MOVLINE(tmp, LINE(0, 3));          MOVLINE(tmp, LINE(0, 3));
589          MOVLINE(LINE(0, 3), LINE(0, 6));          MOVLINE(LINE(0, 3), LINE(0, 6));
590          MOVLINE(LINE(0, 6), LINE(2, 4));          MOVLINE(LINE(0, 6), LINE(2, 4));
591          MOVLINE(LINE(2, 4), LINE(2, 1));          MOVLINE(LINE(2, 4), LINE(2, 1));
592          MOVLINE(LINE(2, 1), tmp);          MOVLINE(LINE(2, 1), tmp);
593    
594          // 5=10, 10=5          /* 5=10, 10=5 */
595          MOVLINE(tmp, LINE(0, 5));          MOVLINE(tmp, LINE(0, 5));
596          MOVLINE(LINE(0, 5), LINE(2, 2));          MOVLINE(LINE(0, 5), LINE(2, 2));
597          MOVLINE(LINE(2, 2), tmp);          MOVLINE(LINE(2, 2), tmp);
598    
599          // 7=14, 14=13, 13=11, 11=7          /* 7=14, 14=13, 13=11, 11=7 */
600          MOVLINE(tmp, LINE(0, 7));          MOVLINE(tmp, LINE(0, 7));
601          MOVLINE(LINE(0, 7), LINE(2, 6));          MOVLINE(LINE(0, 7), LINE(2, 6));
602          MOVLINE(LINE(2, 6), LINE(2, 5));          MOVLINE(LINE(2, 6), LINE(2, 5));
# Line 574  Line 605 
605    
606          /* right blocks */          /* right blocks */
607    
608          // 1=2, 2=4, 4=8, 8=1          /* 1=2, 2=4, 4=8, 8=1 */
609          MOVLINE(tmp, LINE(1, 1));          MOVLINE(tmp, LINE(1, 1));
610          MOVLINE(LINE(1, 1), LINE(1, 2));          MOVLINE(LINE(1, 1), LINE(1, 2));
611          MOVLINE(LINE(1, 2), LINE(1, 4));          MOVLINE(LINE(1, 2), LINE(1, 4));
612          MOVLINE(LINE(1, 4), LINE(3, 0));          MOVLINE(LINE(1, 4), LINE(3, 0));
613          MOVLINE(LINE(3, 0), tmp);          MOVLINE(LINE(3, 0), tmp);
614    
615          // 3=6, 6=12, 12=9, 9=3          /* 3=6, 6=12, 12=9, 9=3 */
616          MOVLINE(tmp, LINE(1, 3));          MOVLINE(tmp, LINE(1, 3));
617          MOVLINE(LINE(1, 3), LINE(1, 6));          MOVLINE(LINE(1, 3), LINE(1, 6));
618          MOVLINE(LINE(1, 6), LINE(3, 4));          MOVLINE(LINE(1, 6), LINE(3, 4));
619          MOVLINE(LINE(3, 4), LINE(3, 1));          MOVLINE(LINE(3, 4), LINE(3, 1));
620          MOVLINE(LINE(3, 1), tmp);          MOVLINE(LINE(3, 1), tmp);
621    
622          // 5=10, 10=5          /* 5=10, 10=5 */
623          MOVLINE(tmp, LINE(1, 5));          MOVLINE(tmp, LINE(1, 5));
624          MOVLINE(LINE(1, 5), LINE(3, 2));          MOVLINE(LINE(1, 5), LINE(3, 2));
625          MOVLINE(LINE(3, 2), tmp);          MOVLINE(LINE(3, 2), tmp);
626    
627          // 7=14, 14=13, 13=11, 11=7          /* 7=14, 14=13, 13=11, 11=7 */
628          MOVLINE(tmp, LINE(1, 7));          MOVLINE(tmp, LINE(1, 7));
629          MOVLINE(LINE(1, 7), LINE(3, 6));          MOVLINE(LINE(1, 7), LINE(3, 6));
630          MOVLINE(LINE(3, 6), LINE(3, 5));          MOVLINE(LINE(3, 6), LINE(3, 5));
# Line 601  Line 632 
632          MOVLINE(LINE(3, 3), tmp);          MOVLINE(LINE(3, 3), tmp);
633  }  }
634    
635    /*****************************************************************************
636     *               Trellis based R-D optimal quantization
637     *
638     *   Trellis Quant code (C) 2003 Pascal Massimino skal(at)planet-d.net
639     *
640     ****************************************************************************/
641    
642    /*----------------------------------------------------------------------------
643     *
644     *        Trellis-Based quantization
645     *
646     * So far I understand this paper:
647     *
648     *  "Trellis-Based R-D Optimal Quantization in H.263+"
649     *    J.Wen, M.Luttrell, J.Villasenor
650     *    IEEE Transactions on Image Processing, Vol.9, No.8, Aug. 2000.
651     *
652     * we are at stake with a simplified Bellmand-Ford / Dijkstra Single
653     * Source Shortest Path algo. But due to the underlying graph structure
654     * ("Trellis"), it can be turned into a dynamic programming algo,
655     * partially saving the explicit graph's nodes representation. And
656     * without using a heap, since the open frontier of the DAG is always
657     * known, and of fixed size.
658     *--------------------------------------------------------------------------*/
659    
660    
661    
662  /************************************************************************  /* Codes lengths for relevant levels. */
  *               Trellis based R-D optimal quantization                 *  
  *                                                                      *  
  *   Trellis Quant code (C) 2003 Pascal Massimino skal(at)planet-d.net  *  
  *                                                                      *  
  ************************************************************************/  
   
   
 static int  
 dct_quantize_trellis_mpeg_c(int16_t *const Out, const int16_t *const In, int Q,  
                 const uint16_t * const Zigzag, int Non_Zero)  
 { return 63; }  
   
   
 //////////////////////////////////////////////////////////  
 //  
 //        Trellis-Based quantization  
 //  
 // So far I understand this paper:  
 //  
 //  "Trellis-Based R-D Optimal Quantization in H.263+"  
 //    J.Wen, M.Luttrell, J.Villasenor  
 //    IEEE Transactions on Image Processing, Vol.9, No.8, Aug. 2000.  
 //  
 // we are at stake with a simplified Bellmand-Ford / Dijkstra Single  
 // Source Shorted Path algo. But due to the underlying graph structure  
 // ("Trellis"), it can be turned into a dynamic programming algo,  
 // partially saving the explicit graph's nodes representation. And  
 // without using a heap, since the open frontier of the DAG is always  
 // known, and of fixed sized.  
 //  
 //////////////////////////////////////////////////////////  
   
   
 //////////////////////////////////////////////////////////  
 // Codes lengths for relevant levels.  
663    
664    // let's factorize:  /* let's factorize: */
665  static const uint8_t Code_Len0[64] = {  static const uint8_t Code_Len0[64] = {
666    30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,    30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
667    30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };    30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
# Line 707  Line 726 
726     3, 4, 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9,10,10,10,10,10,10,10,10,12,12,13,13,12,13,14,15,15,     3, 4, 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9,10,10,10,10,10,10,10,10,12,12,13,13,12,13,14,15,15,
727    15,16,16,16,16,17,17,17,18,18,19,19,19,19,19,19,19,19,21,21,22,22,30,30,30,30,30,30,30,30,30,30 };    15,16,16,16,16,17,17,17,18,18,19,19,19,19,19,19,19,19,21,21,22,22,30,30,30,30,30,30,30,30,30,30 };
728    
729    // a few more table for LAST table:  /* a few more table for LAST table: */
730  static const uint8_t Code_Len21[64] = {  static const uint8_t Code_Len21[64] = {
731    13,20,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,    13,20,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
732    30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30};    30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30};
# Line 722  Line 741 
741    12,13,13,13,13,13,13,13,13,14,16,16,16,16,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,19,19};    12,13,13,13,13,13,13,13,13,14,16,16,16,16,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,19,19};
742    
743    
744  static const uint8_t * const B16_17_Code_Len[24] = { // levels [1..24]  static const uint8_t * const B16_17_Code_Len[24] = { /* levels [1..24] */
745    Code_Len20,Code_Len19,Code_Len18,Code_Len17,    Code_Len20,Code_Len19,Code_Len18,Code_Len17,
746    Code_Len16,Code_Len15,Code_Len14,Code_Len13,    Code_Len16,Code_Len15,Code_Len14,Code_Len13,
747    Code_Len12,Code_Len11,Code_Len10,Code_Len9,    Code_Len12,Code_Len11,Code_Len10,Code_Len9,
# Line 731  Line 750 
750    Code_Len2, Code_Len1, Code_Len1, Code_Len1,    Code_Len2, Code_Len1, Code_Len1, Code_Len1,
751  };  };
752    
753  static const uint8_t * const B16_17_Code_Len_Last[6] = { // levels [1..6]  static const uint8_t * const B16_17_Code_Len_Last[6] = { /* levels [1..6] */
754    Code_Len24,Code_Len23,Code_Len22,Code_Len21, Code_Len3, Code_Len1,    Code_Len24,Code_Len23,Code_Len22,Code_Len21, Code_Len3, Code_Len1,
755  };  };
756    
757  #define TL(q) 0xfe00/(q*q)  /* TL_SHIFT controls the precision of the RD optimizations in trellis
758     * valid range is [10..16]. The bigger, the more trellis is vulnerable
759     * to overflows in cost formulas.
760     *  - 10 allows ac values up to 2^11 == 2048
761     *  - 16 allows ac values up to 2^8 == 256
762     */
763    #define TL_SHIFT 11
764    #define TL(q) ((0xfe00>>(16-TL_SHIFT))/(q*q))
765    
766  static const int Trellis_Lambda_Tabs[31] = {  static const int Trellis_Lambda_Tabs[31] = {
767           TL( 1),TL( 2),TL( 3),TL( 4),TL( 5),TL( 6), TL( 7),           TL( 1),TL( 2),TL( 3),TL( 4),TL( 5),TL( 6), TL( 7),
# Line 745  Line 771 
771  };  };
772  #undef TL  #undef TL
773    
774  static __inline int Find_Last(const int16_t *C, const uint16_t *Zigzag, int i)  static int __inline
775    Find_Last(const int16_t *C, const uint16_t *Zigzag, int i)
776  {  {
777    while(i>=0)    while(i>=0)
778      if (C[Zigzag[i]])      if (C[Zigzag[i]])
# Line 754  Line 781 
781    return -1;    return -1;
782  }  }
783    
784  //////////////////////////////////////////////////////////  static int __inline
785  // this routine has been strippen of all debug code  Compute_Sum(const int16_t *C, int last)
786  //////////////////////////////////////////////////////////  {
787            int sum = 0;
788    
789            while(last--)
790                    sum += abs(C[last]);
791    
792            return(sum);
793    }
794    
795    /* this routine has been strippen of all debug code */
796  static int  static int
797  dct_quantize_trellis_h263_c(int16_t *const Out, const int16_t *const In, int Q, const uint16_t * const Zigzag, int Non_Zero)  dct_quantize_trellis_c(int16_t *const Out,
798                                               const int16_t *const In,
799                                               int Q,
800                                               const uint16_t * const Zigzag,
801                                               const uint16_t * const QuantMatrix,
802                                               int Non_Zero)
803  {  {
804    
805      // Note: We should search last non-zero coeffs on *real* DCT input coeffs (In[]),          /* Note: We should search last non-zero coeffs on *real* DCT input coeffs
806      // not quantized one (Out[]). However, it only improves the result *very*           * (In[]), not quantized one (Out[]). However, it only improves the result
807      // slightly (~0.01dB), whereas speed drops to crawling level :)           * *very* slightly (~0.01dB), whereas speed drops to crawling level :)
808      // Well, actually, taking 1 more coeff past Non_Zero into account sometimes helps,           * Well, actually, taking 1 more coeff past Non_Zero into account sometimes
809             * helps. */
810    typedef struct { int16_t Run, Level; } NODE;    typedef struct { int16_t Run, Level; } NODE;
811    
812    NODE Nodes[65], Last;    NODE Nodes[65], Last;
813    uint32_t Run_Costs0[64+1];    uint32_t Run_Costs0[64+1];
814    uint32_t * const Run_Costs = Run_Costs0 + 1;    uint32_t * const Run_Costs = Run_Costs0 + 1;
815    const int Mult = 2*Q;  
816    const int Bias = (Q-1) | 1;          /* it's 1/lambda, actually */
817    const int Lev0 = Mult + Bias;          const int Lambda = Trellis_Lambda_Tabs[Q-1];
   const int Lambda = Trellis_Lambda_Tabs[Q-1];    // it's 1/lambda, actually  
818    
819    int Run_Start = -1;    int Run_Start = -1;
820    uint32_t Min_Cost = 2<<16;          uint32_t Min_Cost = 2<<TL_SHIFT;
821    
822    int Last_Node = -1;    int Last_Node = -1;
823    uint32_t Last_Cost = 0;    uint32_t Last_Cost = 0;
824    
825    int i, j;          int i, j, sum;
826    Run_Costs[-1] = 2<<16;                          // source (w/ CBP penalty)  
827            /* source (w/ CBP penalty) */
828            Run_Costs[-1] = 2<<TL_SHIFT;
829    
830    Non_Zero = Find_Last(Out, Zigzag, Non_Zero);    Non_Zero = Find_Last(Out, Zigzag, Non_Zero);
831    if (Non_Zero<0)    if (Non_Zero<0)
832        return -1;                  return 0; /* Sum is zero if there are only zero coeffs */
833    
834            for(i=0; i<=Non_Zero; i++) {
835                    const int q = ((Q*QuantMatrix[Zigzag[i]])>>4);
836                    const int Mult = 2*q;
837                    const int Bias = (q-1) | 1;
838                    const int Lev0 = Mult + Bias;
839    
   for(i=0; i<=Non_Zero; i++)  
   {  
840      const int AC = In[Zigzag[i]];      const int AC = In[Zigzag[i]];
841      const int Level1 = Out[Zigzag[i]];      const int Level1 = Out[Zigzag[i]];
842      const int Dist0 = Lambda* AC*AC;                  const unsigned int Dist0 = Lambda* AC*AC;
843      uint32_t Best_Cost = 0xf0000000;      uint32_t Best_Cost = 0xf0000000;
844      Last_Cost += Dist0;      Last_Cost += Dist0;
845    
846      if ((uint32_t)(Level1+1)<3)                 // very specialized loop for -1,0,+1                  /* very specialized loop for -1,0,+1 */
847      {                  if ((uint32_t)(Level1+1)<3) {
848          int dQ;          int dQ;
849                  int Run;                  int Run;
850        uint32_t Cost0;        uint32_t Cost0;
# Line 814  Line 859 
859                  Cost0 = Lambda*dQ*dQ;                  Cost0 = Lambda*dQ*dQ;
860    
861        Nodes[i].Run = 1;        Nodes[i].Run = 1;
862        Best_Cost = (Code_Len20[0]<<16) + Run_Costs[i-1]+Cost0;                          Best_Cost = (Code_Len20[0]<<TL_SHIFT) + Run_Costs[i-1]+Cost0;
863        for(Run=i-Run_Start; Run>0; --Run)                          for(Run=i-Run_Start; Run>0; --Run) {
       {  
864          const uint32_t Cost_Base = Cost0 + Run_Costs[i-Run];          const uint32_t Cost_Base = Cost0 + Run_Costs[i-Run];
865          const uint32_t Cost = Cost_Base + (Code_Len20[Run-1]<<16);                                  const uint32_t Cost = Cost_Base + (Code_Len20[Run-1]<<TL_SHIFT);
866          const uint32_t lCost = Cost_Base + (Code_Len24[Run-1]<<16);                                  const uint32_t lCost = Cost_Base + (Code_Len24[Run-1]<<TL_SHIFT);
867    
868            // TODO: what about tie-breaks? Should we favor short runs or                                  /* TODO: what about tie-breaks? Should we favor short runs or
869            // long runs? Although the error is the same, it would not be                                   * long runs? Although the error is the same, it would not be
870            // spread the same way along high and low frequencies...                                   * spread the same way along high and low frequencies... */
871    
872                          // (I'd say: favour short runs => hifreq errors (HVS) -- gruel )                                  /* Gruel: I'd say, favour short runs => hifreq errors (HVS) */
873    
874          if (Cost<Best_Cost) {          if (Cost<Best_Cost) {
875            Best_Cost    = Cost;            Best_Cost    = Cost;
# Line 840  Line 884 
884        }        }
885        if (Last_Node==i)        if (Last_Node==i)
886                          Last.Level = Nodes[i].Level;                          Last.Level = Nodes[i].Level;
887      }                  } else if (51U>(uint32_t)(Level1+25)) {
888      else                      // "big" levels                          /* "big" levels (not less than ESC3, though) */
     {  
889        const uint8_t *Tbl_L1, *Tbl_L2, *Tbl_L1_Last, *Tbl_L2_Last;        const uint8_t *Tbl_L1, *Tbl_L2, *Tbl_L1_Last, *Tbl_L2_Last;
890        int Level2;        int Level2;
891        int dQ1, dQ2;        int dQ1, dQ2;
# Line 858  Line 901 
901          Tbl_L2      = (Level2<=24) ? B16_17_Code_Len[Level2-1]     : Code_Len0;          Tbl_L2      = (Level2<=24) ? B16_17_Code_Len[Level2-1]     : Code_Len0;
902          Tbl_L1_Last = (Level1<=6) ? B16_17_Code_Len_Last[Level1-1] : Code_Len0;          Tbl_L1_Last = (Level1<=6) ? B16_17_Code_Len_Last[Level1-1] : Code_Len0;
903          Tbl_L2_Last = (Level2<=6) ? B16_17_Code_Len_Last[Level2-1] : Code_Len0;          Tbl_L2_Last = (Level2<=6) ? B16_17_Code_Len_Last[Level2-1] : Code_Len0;
904        } else { // Level1<-1                          } else { /* Level1<-1 */
905          dQ1 = Level1*Mult-AC - Bias;          dQ1 = Level1*Mult-AC - Bias;
906          dQ2 = dQ1 + Mult;          dQ2 = dQ1 + Mult;
907          Level2 = Level1 + 1;          Level2 = Level1 + 1;
# Line 867  Line 910 
910          Tbl_L1_Last = (Level1>=- 6) ? B16_17_Code_Len_Last[Level1^-1] : Code_Len0;          Tbl_L1_Last = (Level1>=- 6) ? B16_17_Code_Len_Last[Level1^-1] : Code_Len0;
911          Tbl_L2_Last = (Level2>=- 6) ? B16_17_Code_Len_Last[Level2^-1] : Code_Len0;          Tbl_L2_Last = (Level2>=- 6) ? B16_17_Code_Len_Last[Level2^-1] : Code_Len0;
912        }        }
913    
914        Dist1 = Lambda*dQ1*dQ1;        Dist1 = Lambda*dQ1*dQ1;
915        Dist2 = Lambda*dQ2*dQ2;        Dist2 = Lambda*dQ2*dQ2;
916        dDist21 = Dist2-Dist1;        dDist21 = Dist2-Dist1;
# Line 877  Line 921 
921          uint32_t Cost1, Cost2;          uint32_t Cost1, Cost2;
922          int bLevel;          int bLevel;
923    
924  // for sub-optimal (but slightly worth it, speed-wise) search, uncomment the following:                                  /* for sub-optimal (but slightly worth it, speed-wise) search,
925  //        if (Cost_Base>=Best_Cost) continue;                                   * uncomment the following:
926  // (? doesn't seem to have any effect -- gruel )                                   *              if (Cost_Base>=Best_Cost) continue;
927                                     * (? doesn't seem to have any effect -- gruel ) */
928    
929          Cost1 = Cost_Base + (Tbl_L1[Run-1]<<16);                                  Cost1 = Cost_Base + (Tbl_L1[Run-1]<<TL_SHIFT);
930          Cost2 = Cost_Base + (Tbl_L2[Run-1]<<16) + dDist21;                                  Cost2 = Cost_Base + (Tbl_L2[Run-1]<<TL_SHIFT) + dDist21;
931    
932          if (Cost2<Cost1) {          if (Cost2<Cost1) {
933                           Cost1 = Cost2;                           Cost1 = Cost2;
934                           bLevel = Level2;                           bLevel = Level2;
935                    } else                                  } else {
936                           bLevel = Level1;                           bLevel = Level1;
937                                    }
938    
939          if (Cost1<Best_Cost) {          if (Cost1<Best_Cost) {
940            Best_Cost = Cost1;            Best_Cost = Cost1;
# Line 896  Line 942 
942            Nodes[i].Level = bLevel;            Nodes[i].Level = bLevel;
943          }          }
944    
945          Cost1 = Cost_Base + (Tbl_L1_Last[Run-1]<<16);                                  Cost1 = Cost_Base + (Tbl_L1_Last[Run-1]<<TL_SHIFT);
946          Cost2 = Cost_Base + (Tbl_L2_Last[Run-1]<<16) + dDist21;                                  Cost2 = Cost_Base + (Tbl_L2_Last[Run-1]<<TL_SHIFT) + dDist21;
947    
948          if (Cost2<Cost1) {          if (Cost2<Cost1) {
949                           Cost1 = Cost2;                           Cost1 = Cost2;
950                           bLevel = Level2;                           bLevel = Level2;
951                    } else                                  } else {
952                           bLevel = Level1;                           bLevel = Level1;
953                                    }
954    
955          if (Cost1<Last_Cost) {          if (Cost1<Last_Cost) {
956            Last_Cost  = Cost1;            Last_Cost  = Cost1;
# Line 911  Line 958 
958            Last.Level = bLevel;            Last.Level = bLevel;
959            Last_Node  = i;            Last_Node  = i;
960          }          }
961        } //end of "for Run"                          } /* end of "for Run" */
962                    } else {
963                            /* Very very high levels, with no chance of being optimizable
964                             * => Simply pick best Run. */
965                            int Run;
966                            for(Run=i-Run_Start; Run>0; --Run) {
967                                    /* 30 bits + no distortion */
968                                    const uint32_t Cost = (30<<TL_SHIFT) + Run_Costs[i-Run];
969                                    if (Cost<Best_Cost) {
970                                            Best_Cost = Cost;
971                                            Nodes[i].Run   = Run;
972                                            Nodes[i].Level = Level1;
973                                    }
974    
975                                    if (Cost<Last_Cost) {
976                                            Last_Cost  = Cost;
977                                            Last.Run   = Run;
978                                            Last.Level = Level1;
979                                            Last_Node  = i;
980                                    }
981                            }
982      }      }
983    
984    
985      Run_Costs[i] = Best_Cost;      Run_Costs[i] = Best_Cost;
986    
987      if (Best_Cost < Min_Cost + Dist0) {      if (Best_Cost < Min_Cost + Dist0) {
988        Min_Cost = Best_Cost;        Min_Cost = Best_Cost;
989        Run_Start = i;        Run_Start = i;
990      }                  } else {
991      else                          /* as noticed by Michael Niedermayer (michaelni at gmx.at),
992      {                           * there's a code shorter by 1 bit for a larger run (!), same
993          // as noticed by Michael Niedermayer (michaelni at gmx.at), there's                           * level. We give it a chance by not moving the left barrier too
994          // a code shorter by 1 bit for a larger run (!), same level. We give                           * much. */
995          // it a chance by not moving the left barrier too much.                          while( Run_Costs[Run_Start]>Min_Cost+(1<<TL_SHIFT) )
   
       while( Run_Costs[Run_Start]>Min_Cost+(1<<16) )  
996          Run_Start++;          Run_Start++;
997    
998          // spread on preceding coeffs the cost incurred by skipping this one                          /* spread on preceding coeffs the cost incurred by skipping this
999                             * one */
1000        for(j=Run_Start; j<i; ++j) Run_Costs[j] += Dist0;        for(j=Run_Start; j<i; ++j) Run_Costs[j] += Dist0;
1001        Min_Cost += Dist0;        Min_Cost += Dist0;
1002      }      }
1003    }    }
1004    
1005            /* It seems trellis doesn't give good results... just compute the Out sum
1006             * and quit */
1007    if (Last_Node<0)    if (Last_Node<0)
1008      return -1;                  return Compute_Sum(Out, Non_Zero);
1009    
1010         // reconstruct optimal sequence backward with surviving paths          /* reconstruct optimal sequence backward with surviving paths */
1011    memset(Out, 0x00, 64*sizeof(*Out));    memset(Out, 0x00, 64*sizeof(*Out));
1012    Out[Zigzag[Last_Node]] = Last.Level;    Out[Zigzag[Last_Node]] = Last.Level;
1013    i = Last_Node - Last.Run;    i = Last_Node - Last.Run;
1014            sum = 0;
1015    while(i>=0) {    while(i>=0) {
1016      Out[Zigzag[i]] = Nodes[i].Level;      Out[Zigzag[i]] = Nodes[i].Level;
1017                    sum += abs(Nodes[i].Level);
1018      i -= Nodes[i].Run;      i -= Nodes[i].Run;
1019    }    }
   return Last_Node;  
 }  
   
   
1020    
1021            return sum;
1022    }
1023    
1024    /* original version including heavy debugging info */
   
   
   
   
   
   
 //////////////////////////////////////////////////////////  
 // original version including heavy debugging info  
 //////////////////////////////////////////////////////////  
   
1025    
1026  #ifdef DBGTRELL  #ifdef DBGTRELL
1027    
# Line 987  Line 1045 
1045      int j=0, j0=0;      int j=0, j0=0;
1046      int Run, Level;      int Run, Level;
1047    
1048      Bits = 2;   // CBP                  Bits = 2;   /* CBP */
1049      while(j<Last) {      while(j<Last) {
1050        while(!C[Zigzag[j]])        while(!C[Zigzag[j]])
1051                          j++;                          j++;
# Line 1019  Line 1077 
1077      V -= Ref[Zigzag[i]];      V -= Ref[Zigzag[i]];
1078      Dist += V*V;      Dist += V*V;
1079    }    }
1080    Cost = Lambda*Dist + (Bits<<16);          Cost = Lambda*Dist + (Bits<<TL_SHIFT);
1081    if (DBG==1)    if (DBG==1)
1082      printf( " Last:%2d/%2d Cost = [(Bits=%5.0d) + Lambda*(Dist=%6.0d) = %d ] >>12= %d ", Last,Max, Bits, Dist, Cost, Cost>>12 );      printf( " Last:%2d/%2d Cost = [(Bits=%5.0d) + Lambda*(Dist=%6.0d) = %d ] >>12= %d ", Last,Max, Bits, Dist, Cost, Cost>>12 );
1083    return Cost;    return Cost;
# Line 1034  Line 1092 
1092  dct_quantize_trellis_h263_c(int16_t *const Out, const int16_t *const In, int Q, const uint16_t * const Zigzag, int Non_Zero)  dct_quantize_trellis_h263_c(int16_t *const Out, const int16_t *const In, int Q, const uint16_t * const Zigzag, int Non_Zero)
1093  {  {
1094    
1095      // Note: We should search last non-zero coeffs on *real* DCT input coeffs (In[]),      /*
1096      // not quantized one (Out[]). However, it only improves the result *very*           * Note: We should search last non-zero coeffs on *real* DCT input coeffs (In[]),
1097      // slightly (~0.01dB), whereas speed drops to crawling level :)           * not quantized one (Out[]). However, it only improves the result *very*
1098      // Well, actually, taking 1 more coeff past Non_Zero into account sometimes helps,           * slightly (~0.01dB), whereas speed drops to crawling level :)
1099             * Well, actually, taking 1 more coeff past Non_Zero into account sometimes helps.
1100             */
1101    typedef struct { int16_t Run, Level; } NODE;    typedef struct { int16_t Run, Level; } NODE;
1102    
1103    NODE Nodes[65], Last;    NODE Nodes[65], Last;
# Line 1047  Line 1106 
1106    const int Mult = 2*Q;    const int Mult = 2*Q;
1107    const int Bias = (Q-1) | 1;    const int Bias = (Q-1) | 1;
1108    const int Lev0 = Mult + Bias;    const int Lev0 = Mult + Bias;
1109    const int Lambda = Trellis_Lambda_Tabs[Q-1];    // it's 1/lambda, actually          const int Lambda = Trellis_Lambda_Tabs[Q-1];    /* it's 1/lambda, actually */
1110    
1111    int Run_Start = -1;    int Run_Start = -1;
1112    Run_Costs[-1] = 2<<16;                          // source (w/ CBP penalty)          Run_Costs[-1] = 2<<TL_SHIFT;                          /* source (w/ CBP penalty) */
1113    uint32_t Min_Cost = 2<<16;          uint32_t Min_Cost = 2<<TL_SHIFT;
1114    
1115    int Last_Node = -1;    int Last_Node = -1;
1116    uint32_t Last_Cost = 0;    uint32_t Last_Cost = 0;
# Line 1059  Line 1118 
1118    int i, j;    int i, j;
1119    
1120  #if (DBG>0)  #if (DBG>0)
1121    Last.Level = 0; Last.Run = -1; // just initialize to smthg          Last.Level = 0; Last.Run = -1; /* just initialize to smthg */
1122  #endif  #endif
1123    
1124    Non_Zero = Find_Last(Out, Zigzag, Non_Zero);    Non_Zero = Find_Last(Out, Zigzag, Non_Zero);
# Line 1074  Line 1133 
1133      uint32_t Best_Cost = 0xf0000000;      uint32_t Best_Cost = 0xf0000000;
1134      Last_Cost += Dist0;      Last_Cost += Dist0;
1135    
1136      if ((uint32_t)(Level1+1)<3)                 // very specialized loop for -1,0,+1                  if ((uint32_t)(Level1+1)<3)                 /* very specialized loop for -1,0,+1 */
1137      {      {
1138          int dQ;          int dQ;
1139                  int Run;                  int Run;
# Line 1090  Line 1149 
1149                  Cost0 = Lambda*dQ*dQ;                  Cost0 = Lambda*dQ*dQ;
1150    
1151        Nodes[i].Run = 1;        Nodes[i].Run = 1;
1152        Best_Cost = (Code_Len20[0]<<16) + Run_Costs[i-1]+Cost0;                          Best_Cost = (Code_Len20[0]<<TL_SHIFT) + Run_Costs[i-1]+Cost0;
1153        for(Run=i-Run_Start; Run>0; --Run)        for(Run=i-Run_Start; Run>0; --Run)
1154        {        {
1155          const uint32_t Cost_Base = Cost0 + Run_Costs[i-Run];          const uint32_t Cost_Base = Cost0 + Run_Costs[i-Run];
1156          const uint32_t Cost = Cost_Base + (Code_Len20[Run-1]<<16);                                  const uint32_t Cost = Cost_Base + (Code_Len20[Run-1]<<TL_SHIFT);
1157          const uint32_t lCost = Cost_Base + (Code_Len24[Run-1]<<16);                                  const uint32_t lCost = Cost_Base + (Code_Len24[Run-1]<<TL_SHIFT);
1158    
1159            // TODO: what about tie-breaks? Should we favor short runs or                                  /*
1160            // long runs? Although the error is the same, it would not be                                   * TODO: what about tie-breaks? Should we favor short runs or
1161            // spread the same way along high and low frequencies...                                   * long runs? Although the error is the same, it would not be
1162                                     * spread the same way along high and low frequencies...
1163                                     */
1164          if (Cost<Best_Cost) {          if (Cost<Best_Cost) {
1165            Best_Cost    = Cost;            Best_Cost    = Cost;
1166            Nodes[i].Run = Run;            Nodes[i].Run = Run;
# Line 1129  Line 1190 
1190          printf( "\n" );          printf( "\n" );
1191        }        }
1192      }      }
1193      else                      // "big" levels                  else                      /* "big" levels */
1194      {      {
1195        const uint8_t *Tbl_L1, *Tbl_L2, *Tbl_L1_Last, *Tbl_L2_Last;        const uint8_t *Tbl_L1, *Tbl_L2, *Tbl_L1_Last, *Tbl_L2_Last;
1196        int Level2;        int Level2;
# Line 1146  Line 1207 
1207          Tbl_L2      = (Level2<=24) ? B16_17_Code_Len[Level2-1]     : Code_Len0;          Tbl_L2      = (Level2<=24) ? B16_17_Code_Len[Level2-1]     : Code_Len0;
1208          Tbl_L1_Last = (Level1<=6) ? B16_17_Code_Len_Last[Level1-1] : Code_Len0;          Tbl_L1_Last = (Level1<=6) ? B16_17_Code_Len_Last[Level1-1] : Code_Len0;
1209          Tbl_L2_Last = (Level2<=6) ? B16_17_Code_Len_Last[Level2-1] : Code_Len0;          Tbl_L2_Last = (Level2<=6) ? B16_17_Code_Len_Last[Level2-1] : Code_Len0;
1210        } else { // Level1<-1                          } else { /* Level1<-1 */
1211          dQ1 = Level1*Mult-AC - Bias;          dQ1 = Level1*Mult-AC - Bias;
1212          dQ2 = dQ1 + Mult;          dQ2 = dQ1 + Mult;
1213          Level2 = Level1 + 1;          Level2 = Level1 + 1;
# Line 1165  Line 1226 
1226          uint32_t Cost1, Cost2;          uint32_t Cost1, Cost2;
1227          int bLevel;          int bLevel;
1228    
1229  // for sub-optimal (but slightly worth it, speed-wise) search, uncomment the following:  /*
1230  //        if (Cost_Base>=Best_Cost) continue;   * for sub-optimal (but slightly worth it, speed-wise) search, uncomment the following:
1231     *        if (Cost_Base>=Best_Cost) continue;
1232          Cost1 = Cost_Base + (Tbl_L1[Run-1]<<16);   */
1233          Cost2 = Cost_Base + (Tbl_L2[Run-1]<<16) + dDist21;                                  Cost1 = Cost_Base + (Tbl_L1[Run-1]<<TL_SHIFT);
1234                                    Cost2 = Cost_Base + (Tbl_L2[Run-1]<<TL_SHIFT) + dDist21;
1235    
1236          if (Cost2<Cost1) {          if (Cost2<Cost1) {
1237                           Cost1 = Cost2;                           Cost1 = Cost2;
# Line 1183  Line 1245 
1245            Nodes[i].Level = bLevel;            Nodes[i].Level = bLevel;
1246          }          }
1247    
1248          Cost1 = Cost_Base + (Tbl_L1_Last[Run-1]<<16);                                  Cost1 = Cost_Base + (Tbl_L1_Last[Run-1]<<TL_SHIFT);
1249          Cost2 = Cost_Base + (Tbl_L2_Last[Run-1]<<16) + dDist21;                                  Cost2 = Cost_Base + (Tbl_L2_Last[Run-1]<<TL_SHIFT) + dDist21;
1250    
1251          if (Cost2<Cost1) {          if (Cost2<Cost1) {
1252                           Cost1 = Cost2;                           Cost1 = Cost2;
# Line 1198  Line 1260 
1260            Last.Level = bLevel;            Last.Level = bLevel;
1261            Last_Node  = i;            Last_Node  = i;
1262          }          }
1263        } //end of "for Run"                          } /* end of "for Run" */
1264    
1265        if (DBG==1) {        if (DBG==1) {
1266          Run_Costs[i] = Best_Cost;          Run_Costs[i] = Best_Cost;
# Line 1224  Line 1286 
1286      }      }
1287      else      else
1288      {      {
1289          // as noticed by Michael Niedermayer (michaelni at gmx.at), there's                          /*
1290          // a code shorter by 1 bit for a larger run (!), same level. We give                           * as noticed by Michael Niedermayer (michaelni at gmx.at), there's
1291          // it a chance by not moving the left barrier too much.                           * a code shorter by 1 bit for a larger run (!), same level. We give
1292                             * it a chance by not moving the left barrier too much.
1293                             */
1294    
1295        while( Run_Costs[Run_Start]>Min_Cost+(1<<16) )                          while( Run_Costs[Run_Start]>Min_Cost+(1<<TL_SHIFT) )
1296          Run_Start++;          Run_Start++;
1297    
1298          // spread on preceding coeffs the cost incurred by skipping this one                          /* spread on preceding coeffs the cost incurred by skipping this one */
1299        for(j=Run_Start; j<i; ++j) Run_Costs[j] += Dist0;        for(j=Run_Start; j<i; ++j) Run_Costs[j] += Dist0;
1300        Min_Cost += Dist0;        Min_Cost += Dist0;
1301      }      }
# Line 1249  Line 1313 
1313    if (Last_Node<0)    if (Last_Node<0)
1314      return -1;      return -1;
1315    
1316         // reconstruct optimal sequence backward with surviving paths          /* reconstruct optimal sequence backward with surviving paths */
1317    memset(Out, 0x00, 64*sizeof(*Out));    memset(Out, 0x00, 64*sizeof(*Out));
1318    Out[Zigzag[Last_Node]] = Last.Level;    Out[Zigzag[Last_Node]] = Last.Level;
1319    i = Last_Node - Last.Run;    i = Last_Node - Last.Run;

Legend:
Removed from v.1014  
changed lines
  Added in v.1230

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4