Annotation of /branches/dev-api-4/xvidcore/src/utils/mbtransquant.c

Revision 1014 - (view) (download)

1 :	edgomez	965	/*****************************************************************************
2 :			*
3 :			* XVID MPEG-4 VIDEO CODEC
4 :			* - MB Transfert/Quantization functions -
5 :			*
6 :			* Copyright(C) 2001-2003 Peter Ross <pross@xvid.org>
7 :			* 2001-2003 Michael Militzer <isibaar@xvid.org>
8 :			* 2003 Edouard Gomez <ed.gomez@free.fr>
9 :			*
10 :			* This program is free software ; you can redistribute it and/or modify
11 :			* it under the terms of the GNU General Public License as published by
12 :			* the Free Software Foundation ; either version 2 of the License, or
13 :			* (at your option) any later version.
14 :			*
15 :			* This program is distributed in the hope that it will be useful,
16 :			* but WITHOUT ANY WARRANTY ; without even the implied warranty of
17 :			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 :			* GNU General Public License for more details.
19 :			*
20 :			* You should have received a copy of the GNU General Public License
21 :			* along with this program ; if not, write to the Free Software
22 :			* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 :			*
24 :	suxen_drol	1014	* $Id: mbtransquant.c,v 1.21.2.12 2003-05-12 12:33:16 suxen_drol Exp $
25 :	edgomez	965	*
26 :			****************************************************************************/
27 :	Isibaar	3
28 :	chl	1012	#include <stdio.h>
29 :			#include <stdlib.h>
30 :	edgomez	78	#include <string.h>
31 :
32 :	Isibaar	3	#include "../portab.h"
33 :			#include "mbfunctions.h"
34 :
35 :			#include "../global.h"
36 :			#include "mem_transfer.h"
37 :			#include "timer.h"
38 :	chl	995	#include "../bitstream/mbcoding.h"
39 :	chl	1011	#include "../bitstream/zigzag.h"
40 :	Isibaar	3	#include "../dct/fdct.h"
41 :			#include "../dct/idct.h"
42 :			#include "../quant/quant_mpeg4.h"
43 :			#include "../quant/quant_h263.h"
44 :			#include "../encoder.h"
45 :
46 :	edgomez	851	#include "../image/reduced.h"
47 :	Isibaar	3
48 :	edgomez	851	MBFIELDTEST_PTR MBFieldTest;
49 :	Isibaar	3
50 :	edgomez	965	/*
51 :			* Skip blocks having a coefficient sum below this value. This value will be
52 :			* corrected according to the MB quantizer to avoid artifacts for quant==1
53 :			*/
54 :			#define PVOP_TOOSMALL_LIMIT 1
55 :			#define BVOP_TOOSMALL_LIMIT 3
56 :	Isibaar	3
57 :	edgomez	965	/*****************************************************************************
58 :			* Local functions
59 :			****************************************************************************/
60 :
61 :			/* permute block and return field dct choice */
62 :			static __inline uint32_t
63 :			MBDecideFieldDCT(int16_t data[6 * 64])
64 :	Isibaar	3	{
65 :	edgomez	965	uint32_t field = MBFieldTest(data);
66 :	edgomez	78
67 :	edgomez	965	if (field)
68 :			MBFrameToField(data);
69 :	Isibaar	3
70 :	edgomez	965	return field;
71 :			}
72 :	h	69
73 :	edgomez	965	/* Performs Forward DCT on all blocks */
74 :			static __inline void
75 :	syskin	984	MBfDCT(const MBParam * const pParam,
76 :			const FRAMEINFO * const frame,
77 :			MACROBLOCK * const pMB,
78 :	edgomez	965	uint32_t x_pos,
79 :			uint32_t y_pos,
80 :			int16_t data[6 * 64])
81 :	syskin	984	{
82 :	edgomez	965	/* Handles interlacing */
83 :	h	69	start_timer();
84 :			pMB->field_dct = 0;
85 :	edgomez	949	if ((frame->vol_flags & XVID_VOL_INTERLACING) &&
86 :	h	390	(x_pos>0) && (x_pos<pParam->mb_width-1) &&
87 :			(y_pos>0) && (y_pos<pParam->mb_height-1)) {
88 :	h	69	pMB->field_dct = MBDecideFieldDCT(data);
89 :			}
90 :			stop_interlacing_timer();
91 :
92 :	edgomez	965	/* Perform DCT */
93 :			start_timer();
94 :			fdct(&data[0 * 64]);
95 :			fdct(&data[1 * 64]);
96 :			fdct(&data[2 * 64]);
97 :			fdct(&data[3 * 64]);
98 :			fdct(&data[4 * 64]);
99 :			fdct(&data[5 * 64]);
100 :			stop_dct_timer();
101 :			}
102 :
103 :			/* Performs Inverse DCT on all blocks */
104 :			static __inline void
105 :			MBiDCT(int16_t data[6 * 64],
106 :			const uint8_t cbp)
107 :			{
108 :			start_timer();
109 :			if(cbp & (1 << (5 - 0))) idct(&data[0 * 64]);
110 :			if(cbp & (1 << (5 - 1))) idct(&data[1 * 64]);
111 :			if(cbp & (1 << (5 - 2))) idct(&data[2 * 64]);
112 :			if(cbp & (1 << (5 - 3))) idct(&data[3 * 64]);
113 :			if(cbp & (1 << (5 - 4))) idct(&data[4 * 64]);
114 :			if(cbp & (1 << (5 - 5))) idct(&data[5 * 64]);
115 :			stop_idct_timer();
116 :			}
117 :
118 :			/* Quantize all blocks -- Intra mode */
119 :			static __inline void
120 :			MBQuantIntra(const MBParam * pParam,
121 :	chl	995	const FRAMEINFO * const frame,
122 :	edgomez	965	const MACROBLOCK * pMB,
123 :	syskin	984	int16_t qcoeff[6 * 64],
124 :	edgomez	965	int16_t data[6*64])
125 :			{
126 :			int i;
127 :
128 :	edgomez	195	for (i = 0; i < 6; i++) {
129 :	edgomez	965	uint32_t iDcScaler = get_dc_scaler(pMB->quant, i < 4);
130 :
131 :			/* Quantize the block */
132 :			start_timer();
133 :	chl	995	if (!(pParam->vol_flags & XVID_VOL_MPEGQUANT)) {
134 :	edgomez	965	quant_intra(&data[i * 64], &qcoeff[i * 64], pMB->quant, iDcScaler);
135 :	chl	995	} else {
136 :	edgomez	965	quant4_intra(&data[i * 64], &qcoeff[i * 64], pMB->quant, iDcScaler);
137 :	chl	995	}
138 :	edgomez	965	stop_quant_timer();
139 :			}
140 :			}
141 :
142 :			/* DeQuantize all blocks -- Intra mode */
143 :			static __inline void
144 :			MBDeQuantIntra(const MBParam * pParam,
145 :			const int iQuant,
146 :			int16_t qcoeff[6 * 64],
147 :			int16_t data[6*64])
148 :			{
149 :			int i;
150 :
151 :			for (i = 0; i < 6; i++) {
152 :	Isibaar	3	uint32_t iDcScaler = get_dc_scaler(iQuant, i < 4);
153 :
154 :			start_timer();
155 :	edgomez	965	if (!(pParam->vol_flags & XVID_VOL_MPEGQUANT))
156 :			dequant_intra(&qcoeff[i * 64], &data[i * 64], iQuant, iDcScaler);
157 :			else
158 :			dequant4_intra(&qcoeff[i * 64], &data[i * 64], iQuant, iDcScaler);
159 :			stop_iquant_timer();
160 :			}
161 :			}
162 :	Isibaar	3
163 :	chl	1011
164 :			static int
165 :			dct_quantize_trellis_h263_c(int16_t const Out, const int16_t const In, int Q, const uint16_t * const Zigzag, int Non_Zero);
166 :
167 :			static int
168 :			dct_quantize_trellis_mpeg_c(int16_t const Out, const int16_t const In, int Q, const uint16_t * const Zigzag, int Non_Zero);
169 :
170 :
171 :	edgomez	965	/* Quantize all blocks -- Inter mode */
172 :			static __inline uint8_t
173 :			MBQuantInter(const MBParam * pParam,
174 :	chl	995	const FRAMEINFO * const frame,
175 :	edgomez	965	const MACROBLOCK * pMB,
176 :			int16_t data[6 * 64],
177 :			int16_t qcoeff[6 * 64],
178 :			int bvop,
179 :			int limit)
180 :			{
181 :
182 :			int i;
183 :			uint8_t cbp = 0;
184 :			int sum;
185 :			int code_block;
186 :
187 :			for (i = 0; i < 6; i++) {
188 :	syskin	984
189 :	edgomez	965	/* Quantize the block */
190 :			start_timer();
191 :	chl	995	if (!(pParam->vol_flags & XVID_VOL_MPEGQUANT)) {
192 :			sum = quant_inter(&qcoeff[i64], &data[i64], pMB->quant);
193 :			if ( (sum) && (frame->vop_flags & XVID_VOP_TRELLISQUANT) ) {
194 :	chl	1011	sum = dct_quantize_trellis_h263_c(&qcoeff[i64], &data[i64], pMB->quant, &scan_tables[0][0], 63)+1;
195 :	chl	995	limit = 1;
196 :			}
197 :			} else {
198 :	edgomez	965	sum = quant4_inter(&qcoeff[i * 64], &data[i * 64], pMB->quant);
199 :	chl	995	// if ( (sum) && (frame->vop_flags & XVID_VOP_TRELLISQUANT) )
200 :	chl	1011	// sum = dct_quantize_trellis_mpeg_c (&qcoeff[i64], &data[i64], pMB->quant)+1;
201 :	chl	995	}
202 :	edgomez	965	stop_quant_timer();
203 :
204 :			/*
205 :			* We code the block if the sum is higher than the limit and if the first
206 :			* two AC coefficients in zig zag order are not zero.
207 :			*/
208 :			code_block = 0;
209 :			if ((sum >= limit) \|\| (qcoeff[i64+1] != 0) \|\| (qcoeff[i64+8] != 0)) {
210 :			code_block = 1;
211 :	edgomez	195	} else {
212 :	Isibaar	3
213 :	edgomez	965	if (bvop && (pMB->mode == MODE_DIRECT \|\| pMB->mode == MODE_DIRECT_NO4V)) {
214 :			/* dark blocks prevention for direct mode */
215 :			if ((qcoeff[i64] < -1) \|\| (qcoeff[i64] > 0))
216 :			code_block = 1;
217 :	edgomez	851	} else {
218 :	edgomez	965	/* not direct mode */
219 :			if (qcoeff[i*64] != 0)
220 :			code_block = 1;
221 :	edgomez	851	}
222 :	Isibaar	3	}
223 :
224 :	edgomez	965	/* Set the corresponding cbp bit */
225 :			cbp \|= code_block << (5 - i);
226 :			}
227 :	edgomez	851
228 :	edgomez	965	return(cbp);
229 :			}
230 :	Isibaar	3
231 :	edgomez	965	/* DeQuantize all blocks -- Inter mode */
232 :	syskin	984	static __inline void
233 :	edgomez	965	MBDeQuantInter(const MBParam * pParam,
234 :			const int iQuant,
235 :			int16_t data[6 * 64],
236 :			int16_t qcoeff[6 * 64],
237 :			const uint8_t cbp)
238 :			{
239 :			int i;
240 :
241 :			for (i = 0; i < 6; i++) {
242 :	syskin	984	if (cbp & (1 << (5 - i))) {
243 :	edgomez	965	start_timer();
244 :			if (!(pParam->vol_flags & XVID_VOL_MPEGQUANT))
245 :			dequant_inter(&data[i * 64], &qcoeff[i * 64], iQuant);
246 :			else
247 :			dequant4_inter(&data[i * 64], &qcoeff[i * 64], iQuant);
248 :			stop_iquant_timer();
249 :	edgomez	851	}
250 :	h	69	}
251 :	Isibaar	3	}
252 :
253 :	edgomez	965	typedef void (transfer_operation_8to16_t) (int16_t Dst, const uint8_t Src, int BpS);
254 :			typedef void (transfer_operation_16to8_t) (uint8_t Dst, const int16_t Src, int BpS);
255 :	Isibaar	3
256 :	edgomez	78
257 :	edgomez	965	static __inline void
258 :	syskin	984	MBTrans8to16(const MBParam * const pParam,
259 :			const FRAMEINFO * const frame,
260 :			const MACROBLOCK * const pMB,
261 :	edgomez	965	const uint32_t x_pos,
262 :			const uint32_t y_pos,
263 :			int16_t data[6 * 64])
264 :			{
265 :	h	82	uint32_t stride = pParam->edged_width;
266 :			uint32_t stride2 = stride / 2;
267 :	edgomez	965	uint32_t next_block = stride * 8;
268 :	syskin	984	int32_t cst;
269 :	Isibaar	3	uint8_t pY_Cur, pU_Cur, *pV_Cur;
270 :	syskin	984	const IMAGE * const pCurrent = &frame->image;
271 :	edgomez	965	transfer_operation_8to16_t *transfer_op = NULL;
272 :	edgomez	195
273 :	edgomez	965	if ((frame->vop_flags & XVID_VOP_REDUCED)) {
274 :
275 :			/* Image pointers */
276 :			pY_Cur = pCurrent->y + (y_pos << 5) * stride + (x_pos << 5);
277 :	edgomez	851	pU_Cur = pCurrent->u + (y_pos << 4) * stride2 + (x_pos << 4);
278 :			pV_Cur = pCurrent->v + (y_pos << 4) * stride2 + (x_pos << 4);
279 :	edgomez	965
280 :			/* Block size */
281 :			cst = 16;
282 :
283 :			/* Operation function */
284 :			transfer_op = (transfer_operation_8to16_t*)filter_18x18_to_8x8;
285 :			} else {
286 :
287 :			/* Image pointers */
288 :			pY_Cur = pCurrent->y + (y_pos << 4) * stride + (x_pos << 4);
289 :	edgomez	851	pU_Cur = pCurrent->u + (y_pos << 3) * stride2 + (x_pos << 3);
290 :			pV_Cur = pCurrent->v + (y_pos << 3) * stride2 + (x_pos << 3);
291 :	edgomez	965
292 :			/* Block size */
293 :			cst = 8;
294 :
295 :			/* Operation function */
296 :			transfer_op = (transfer_operation_8to16_t*)transfer_8to16copy;
297 :	edgomez	851	}
298 :	Isibaar	3
299 :	edgomez	965	/* Do the transfer */
300 :	h	69	start_timer();
301 :	edgomez	965	transfer_op(&data[0 * 64], pY_Cur, stride);
302 :			transfer_op(&data[1 * 64], pY_Cur + cst, stride);
303 :			transfer_op(&data[2 * 64], pY_Cur + next_block, stride);
304 :			transfer_op(&data[3 * 64], pY_Cur + next_block + cst, stride);
305 :			transfer_op(&data[4 * 64], pU_Cur, stride2);
306 :			transfer_op(&data[5 * 64], pV_Cur, stride2);
307 :			stop_transfer_timer();
308 :	syskin	984	}
309 :	edgomez	965
310 :			static __inline void
311 :	syskin	984	MBTrans16to8(const MBParam * const pParam,
312 :			const FRAMEINFO * const frame,
313 :			const MACROBLOCK * const pMB,
314 :	edgomez	965	const uint32_t x_pos,
315 :			const uint32_t y_pos,
316 :			int16_t data[6 * 64],
317 :			const uint32_t add,
318 :			const uint8_t cbp)
319 :			{
320 :			uint8_t pY_Cur, pU_Cur, *pV_Cur;
321 :			uint32_t stride = pParam->edged_width;
322 :			uint32_t stride2 = stride / 2;
323 :			uint32_t next_block = stride * 8;
324 :	syskin	984	uint32_t cst;
325 :			const IMAGE * const pCurrent = &frame->image;
326 :	edgomez	965	transfer_operation_16to8_t *transfer_op = NULL;
327 :
328 :			if (pMB->field_dct) {
329 :			next_block = stride;
330 :			stride *= 2;
331 :	h	69	}
332 :
333 :	edgomez	965	if ((frame->vop_flags & XVID_VOP_REDUCED)) {
334 :	edgomez	851
335 :	edgomez	965	/* Image pointers */
336 :			pY_Cur = pCurrent->y + (y_pos << 5) * stride + (x_pos << 5);
337 :			pU_Cur = pCurrent->u + (y_pos << 4) * stride2 + (x_pos << 4);
338 :			pV_Cur = pCurrent->v + (y_pos << 4) * stride2 + (x_pos << 4);
339 :	Isibaar	3
340 :	edgomez	965	/* Block size */
341 :			cst = 16;
342 :	Isibaar	3
343 :	edgomez	965	/* Operation function */
344 :			if(add)
345 :			transfer_op = (transfer_operation_16to8_t*)add_upsampled_8x8_16to8;
346 :			else
347 :			transfer_op = (transfer_operation_16to8_t*)copy_upsampled_8x8_16to8;
348 :			} else {
349 :	Isibaar	3
350 :	edgomez	965	/* Image pointers */
351 :			pY_Cur = pCurrent->y + (y_pos << 4) * stride + (x_pos << 4);
352 :			pU_Cur = pCurrent->u + (y_pos << 3) * stride2 + (x_pos << 3);
353 :			pV_Cur = pCurrent->v + (y_pos << 3) * stride2 + (x_pos << 3);
354 :	Isibaar	3
355 :	edgomez	965	/* Block size */
356 :			cst = 8;
357 :	Isibaar	3
358 :	edgomez	965	/* Operation function */
359 :			if(add)
360 :			transfer_op = (transfer_operation_16to8_t*)transfer_16to8add;
361 :			else
362 :			transfer_op = (transfer_operation_16to8_t*)transfer_16to8copy;
363 :	Isibaar	3	}
364 :	h	69
365 :	edgomez	965	/* Do the operation */
366 :	h	69	start_timer();
367 :	edgomez	965	if (cbp&32) transfer_op(pY_Cur, &data[0 * 64], stride);
368 :			if (cbp&16) transfer_op(pY_Cur + cst, &data[1 * 64], stride);
369 :			if (cbp& 8) transfer_op(pY_Cur + next_block, &data[2 * 64], stride);
370 :			if (cbp& 4) transfer_op(pY_Cur + next_block + cst, &data[3 * 64], stride);
371 :			if (cbp& 2) transfer_op(pU_Cur, &data[4 * 64], stride2);
372 :			if (cbp& 1) transfer_op(pV_Cur, &data[5 * 64], stride2);
373 :	h	69	stop_transfer_timer();
374 :	Isibaar	3	}
375 :	h	69
376 :	edgomez	965	/*****************************************************************************
377 :			* Module functions
378 :			****************************************************************************/
379 :
380 :	syskin	984	void
381 :			MBTransQuantIntra(const MBParam * const pParam,
382 :			const FRAMEINFO * const frame,
383 :			MACROBLOCK * const pMB,
384 :	chl	368	const uint32_t x_pos,
385 :			const uint32_t y_pos,
386 :			int16_t data[6 * 64],
387 :			int16_t qcoeff[6 * 64])
388 :			{
389 :	h	69
390 :	edgomez	965	/* Transfer data */
391 :			MBTrans8to16(pParam, frame, pMB, x_pos, y_pos, data);
392 :	chl	368
393 :	edgomez	965	/* Perform DCT (and field decision) */
394 :			MBfDCT(pParam, frame, pMB, x_pos, y_pos, data);
395 :	chl	368
396 :	edgomez	965	/* Quantize the block */
397 :	chl	995	MBQuantIntra(pParam, frame, pMB, data, qcoeff);
398 :	edgomez	965
399 :			/* DeQuantize the block */
400 :			MBDeQuantIntra(pParam, pMB->quant, data, qcoeff);
401 :
402 :			/* Perform inverse DCT*/
403 :			MBiDCT(data, 0x3F);
404 :
405 :			/* Transfer back the data -- Don't add data */
406 :			MBTrans16to8(pParam, frame, pMB, x_pos, y_pos, data, 0, 0x3F);
407 :	chl	368	}
408 :
409 :	edgomez	965
410 :	chl	368	uint8_t
411 :	syskin	984	MBTransQuantInter(const MBParam * const pParam,
412 :			const FRAMEINFO * const frame,
413 :			MACROBLOCK * const pMB,
414 :	edgomez	914	const uint32_t x_pos,
415 :			const uint32_t y_pos,
416 :	chl	368	int16_t data[6 * 64],
417 :			int16_t qcoeff[6 * 64])
418 :			{
419 :			uint8_t cbp;
420 :	edgomez	965	uint32_t limit;
421 :	chl	368
422 :	edgomez	914	/*
423 :	edgomez	965	* There is no MBTrans8to16 for Inter block, that's done in motion compensation
424 :			* already
425 :	edgomez	914	*/
426 :	chl	368
427 :	edgomez	965	/* Perform DCT (and field decision) */
428 :			MBfDCT(pParam, frame, pMB, x_pos, y_pos, data);
429 :	edgomez	914
430 :	edgomez	965	/* Set the limit threshold */
431 :			limit = PVOP_TOOSMALL_LIMIT + ((pMB->quant == 1)? 1 : 0);
432 :	chl	368
433 :	edgomez	965	/* Quantize the block */
434 :	chl	995	cbp = MBQuantInter(pParam, frame, pMB, data, qcoeff, 0, limit);
435 :	chl	368
436 :	edgomez	965	/* DeQuantize the block */
437 :			MBDeQuantInter(pParam, pMB->quant, data, qcoeff, cbp);
438 :	chl	368
439 :	edgomez	965	/* Perform inverse DCT*/
440 :			MBiDCT(data, cbp);
441 :	chl	368
442 :	edgomez	965	/* Transfer back the data -- Add the data */
443 :			MBTrans16to8(pParam, frame, pMB, x_pos, y_pos, data, 1, cbp);
444 :	syskin	984
445 :	edgomez	965	return(cbp);
446 :	chl	368	}
447 :
448 :	edgomez	965	uint8_t
449 :			MBTransQuantInterBVOP(const MBParam * pParam,
450 :	chl	368	FRAMEINFO * frame,
451 :			MACROBLOCK * pMB,
452 :			const uint32_t x_pos,
453 :			const uint32_t y_pos,
454 :			int16_t data[6 * 64],
455 :	edgomez	965	int16_t qcoeff[6 * 64])
456 :	chl	368	{
457 :	edgomez	965	uint8_t cbp;
458 :			uint32_t limit;
459 :	syskin	984
460 :	edgomez	965	/*
461 :			* There is no MBTrans8to16 for Inter block, that's done in motion compensation
462 :			* already
463 :			*/
464 :	chl	368
465 :	edgomez	965	/* Perform DCT (and field decision) */
466 :			MBfDCT(pParam, frame, pMB, x_pos, y_pos, data);
467 :	chl	368
468 :	edgomez	965	/* Set the limit threshold */
469 :			limit = BVOP_TOOSMALL_LIMIT;
470 :	chl	368
471 :	edgomez	965	/* Quantize the block */
472 :	chl	995	cbp = MBQuantInter(pParam, frame, pMB, data, qcoeff, 1, limit);
473 :	chl	368
474 :	edgomez	965	/*
475 :			* History comment:
476 :			* We don't have to DeQuant, iDCT and Transfer back data for B-frames.
477 :			*
478 :			* BUT some plugins require the original frame to be passed so we have
479 :			* to take care of that here
480 :			*/
481 :			if((pParam->plugin_flags & XVID_REQORIGINAL)) {
482 :	chl	368
483 :	edgomez	965	/* DeQuantize the block */
484 :			MBDeQuantInter(pParam, pMB->quant, data, qcoeff, cbp);
485 :	chl	368
486 :	edgomez	965	/* Perform inverse DCT*/
487 :			MBiDCT(data, cbp);
488 :	h	69
489 :	edgomez	965	/* Transfer back the data -- Add the data */
490 :			MBTrans16to8(pParam, frame, pMB, x_pos, y_pos, data, 1, cbp);
491 :	edgomez	851	}
492 :
493 :	edgomez	965	return(cbp);
494 :	edgomez	851	}
495 :
496 :			/* if sum(diff between field lines) < sum(diff between frame lines), use field dct */
497 :			uint32_t
498 :			MBFieldTest_c(int16_t data[6 * 64])
499 :			{
500 :	edgomez	195	const uint8_t blocks[] =
501 :			{ 0 * 64, 0 * 64, 0 * 64, 0 * 64, 2 * 64, 2 * 64, 2 * 64, 2 * 64 };
502 :			const uint8_t lines[] = { 0, 16, 32, 48, 0, 16, 32, 48 };
503 :	edgomez	78
504 :	h	69	int frame = 0, field = 0;
505 :			int i, j;
506 :
507 :	edgomez	195	for (i = 0; i < 7; ++i) {
508 :			for (j = 0; j < 8; ++j) {
509 :			frame +=
510 :	edgomez	982	abs(data[0 * 64 + (i + 1) * 8 + j] - data[0 * 64 + i * 8 + j]);
511 :	edgomez	195	frame +=
512 :	edgomez	982	abs(data[1 * 64 + (i + 1) * 8 + j] - data[1 * 64 + i * 8 + j]);
513 :	edgomez	195	frame +=
514 :	edgomez	982	abs(data[2 * 64 + (i + 1) * 8 + j] - data[2 * 64 + i * 8 + j]);
515 :	edgomez	195	frame +=
516 :	edgomez	982	abs(data[3 * 64 + (i + 1) * 8 + j] - data[3 * 64 + i * 8 + j]);
517 :	h	69
518 :	edgomez	195	field +=
519 :	edgomez	982	abs(data[blocks[i + 1] + lines[i + 1] + j] -
520 :	edgomez	195	data[blocks[i] + lines[i] + j]);
521 :			field +=
522 :	edgomez	982	abs(data[blocks[i + 1] + lines[i + 1] + 8 + j] -
523 :	edgomez	195	data[blocks[i] + lines[i] + 8 + j]);
524 :			field +=
525 :	edgomez	982	abs(data[blocks[i + 1] + 64 + lines[i + 1] + j] -
526 :	edgomez	195	data[blocks[i] + 64 + lines[i] + j]);
527 :			field +=
528 :	edgomez	982	abs(data[blocks[i + 1] + 64 + lines[i + 1] + 8 + j] -
529 :	edgomez	195	data[blocks[i] + 64 + lines[i] + 8 + j]);
530 :	h	69	}
531 :			}
532 :
533 :	edgomez	851	return (frame >= (field + 350));
534 :	h	69	}
535 :
536 :
537 :			/* deinterlace Y blocks vertically */
538 :
539 :			#define MOVLINE(X,Y) memcpy(X, Y, sizeof(tmp))
540 :	syskin	984	#define LINE(X,Y) &data[X64 + Y8]
541 :	h	69
542 :	edgomez	195	void
543 :			MBFrameToField(int16_t data[6 * 64])
544 :	h	69	{
545 :			int16_t tmp[8];
546 :
547 :			/* left blocks */
548 :
549 :	edgomez	851	// 1=2, 2=4, 4=8, 8=1
550 :	edgomez	195	MOVLINE(tmp, LINE(0, 1));
551 :			MOVLINE(LINE(0, 1), LINE(0, 2));
552 :			MOVLINE(LINE(0, 2), LINE(0, 4));
553 :			MOVLINE(LINE(0, 4), LINE(2, 0));
554 :			MOVLINE(LINE(2, 0), tmp);
555 :	h	69
556 :	edgomez	851	// 3=6, 6=12, 12=9, 9=3
557 :	edgomez	195	MOVLINE(tmp, LINE(0, 3));
558 :			MOVLINE(LINE(0, 3), LINE(0, 6));
559 :			MOVLINE(LINE(0, 6), LINE(2, 4));
560 :			MOVLINE(LINE(2, 4), LINE(2, 1));
561 :			MOVLINE(LINE(2, 1), tmp);
562 :	h	69
563 :	edgomez	851	// 5=10, 10=5
564 :	edgomez	195	MOVLINE(tmp, LINE(0, 5));
565 :			MOVLINE(LINE(0, 5), LINE(2, 2));
566 :			MOVLINE(LINE(2, 2), tmp);
567 :	h	69
568 :	edgomez	851	// 7=14, 14=13, 13=11, 11=7
569 :	edgomez	195	MOVLINE(tmp, LINE(0, 7));
570 :			MOVLINE(LINE(0, 7), LINE(2, 6));
571 :			MOVLINE(LINE(2, 6), LINE(2, 5));
572 :			MOVLINE(LINE(2, 5), LINE(2, 3));
573 :			MOVLINE(LINE(2, 3), tmp);
574 :	h	69
575 :			/* right blocks */
576 :
577 :	edgomez	851	// 1=2, 2=4, 4=8, 8=1
578 :	edgomez	195	MOVLINE(tmp, LINE(1, 1));
579 :			MOVLINE(LINE(1, 1), LINE(1, 2));
580 :			MOVLINE(LINE(1, 2), LINE(1, 4));
581 :			MOVLINE(LINE(1, 4), LINE(3, 0));
582 :			MOVLINE(LINE(3, 0), tmp);
583 :	h	69
584 :	edgomez	851	// 3=6, 6=12, 12=9, 9=3
585 :	edgomez	195	MOVLINE(tmp, LINE(1, 3));
586 :			MOVLINE(LINE(1, 3), LINE(1, 6));
587 :			MOVLINE(LINE(1, 6), LINE(3, 4));
588 :			MOVLINE(LINE(3, 4), LINE(3, 1));
589 :			MOVLINE(LINE(3, 1), tmp);
590 :	h	69
591 :	edgomez	851	// 5=10, 10=5
592 :	edgomez	195	MOVLINE(tmp, LINE(1, 5));
593 :			MOVLINE(LINE(1, 5), LINE(3, 2));
594 :			MOVLINE(LINE(3, 2), tmp);
595 :	h	69
596 :	edgomez	851	// 7=14, 14=13, 13=11, 11=7
597 :	edgomez	195	MOVLINE(tmp, LINE(1, 7));
598 :			MOVLINE(LINE(1, 7), LINE(3, 6));
599 :			MOVLINE(LINE(3, 6), LINE(3, 5));
600 :			MOVLINE(LINE(3, 5), LINE(3, 3));
601 :			MOVLINE(LINE(3, 3), tmp);
602 :	h	69	}
603 :	chl	1011
604 :
605 :
606 :
607 :
608 :			/************************************************************************
609 :			* Trellis based R-D optimal quantization *
610 :			* *
611 :			* Trellis Quant code (C) 2003 Pascal Massimino skal(at)planet-d.net *
612 :			* *
613 :			************************************************************************/
614 :
615 :
616 :	chl	1012	static int
617 :			dct_quantize_trellis_mpeg_c(int16_t const Out, const int16_t const In, int Q,
618 :			const uint16_t * const Zigzag, int Non_Zero)
619 :	chl	1011	{ return 63; }
620 :
621 :
622 :			//////////////////////////////////////////////////////////
623 :			//
624 :			// Trellis-Based quantization
625 :			//
626 :			// So far I understand this paper:
627 :			//
628 :			// "Trellis-Based R-D Optimal Quantization in H.263+"
629 :			// J.Wen, M.Luttrell, J.Villasenor
630 :			// IEEE Transactions on Image Processing, Vol.9, No.8, Aug. 2000.
631 :			//
632 :			// we are at stake with a simplified Bellmand-Ford / Dijkstra Single
633 :			// Source Shorted Path algo. But due to the underlying graph structure
634 :			// ("Trellis"), it can be turned into a dynamic programming algo,
635 :			// partially saving the explicit graph's nodes representation. And
636 :			// without using a heap, since the open frontier of the DAG is always
637 :			// known, and of fixed sized.
638 :			//
639 :			//////////////////////////////////////////////////////////
640 :
641 :
642 :			//////////////////////////////////////////////////////////
643 :			// Codes lengths for relevant levels.
644 :
645 :			// let's factorize:
646 :			static const uint8_t Code_Len0[64] = {
647 :			30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
648 :			30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
649 :			static const uint8_t Code_Len1[64] = {
650 :			20,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
651 :			30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
652 :			static const uint8_t Code_Len2[64] = {
653 :			19,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
654 :			30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
655 :			static const uint8_t Code_Len3[64] = {
656 :			18,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
657 :			30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
658 :			static const uint8_t Code_Len4[64] = {
659 :			17,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
660 :			30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
661 :			static const uint8_t Code_Len5[64] = {
662 :			16,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
663 :			30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
664 :			static const uint8_t Code_Len6[64] = {
665 :			15,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
666 :			30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
667 :			static const uint8_t Code_Len7[64] = {
668 :			13,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
669 :			30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
670 :			static const uint8_t Code_Len8[64] = {
671 :			11,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
672 :			30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
673 :			static const uint8_t Code_Len9[64] = {
674 :			12,21,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
675 :			30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
676 :			static const uint8_t Code_Len10[64] = {
677 :			12,20,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
678 :			30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
679 :			static const uint8_t Code_Len11[64] = {
680 :			12,19,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
681 :			30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
682 :			static const uint8_t Code_Len12[64] = {
683 :			11,17,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
684 :			30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
685 :			static const uint8_t Code_Len13[64] = {
686 :			11,15,21,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
687 :			30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
688 :			static const uint8_t Code_Len14[64] = {
689 :			10,12,19,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
690 :			30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
691 :			static const uint8_t Code_Len15[64] = {
692 :			10,13,17,19,21,21,21,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
693 :			30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
694 :			static const uint8_t Code_Len16[64] = {
695 :			9,12,13,18,18,19,19,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
696 :			30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30};
697 :			static const uint8_t Code_Len17[64] = {
698 :			8,11,13,14,14,14,15,19,19,19,21,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
699 :			30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
700 :			static const uint8_t Code_Len18[64] = {
701 :			7, 9,11,11,13,13,13,15,15,15,16,22,22,22,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
702 :			30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
703 :			static const uint8_t Code_Len19[64] = {
704 :			5, 7, 9,10,10,11,11,11,11,11,13,14,16,17,17,18,18,18,18,18,18,18,18,20,20,21,21,30,30,30,30,30,
705 :			30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30 };
706 :			static const uint8_t Code_Len20[64] = {
707 :			3, 4, 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9,10,10,10,10,10,10,10,10,12,12,13,13,12,13,14,15,15,
708 :			15,16,16,16,16,17,17,17,18,18,19,19,19,19,19,19,19,19,21,21,22,22,30,30,30,30,30,30,30,30,30,30 };
709 :
710 :			// a few more table for LAST table:
711 :			static const uint8_t Code_Len21[64] = {
712 :			13,20,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
713 :			30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30};
714 :			static const uint8_t Code_Len22[64] = {
715 :			12,15,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,
716 :			30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30};
717 :			static const uint8_t Code_Len23[64] = {
718 :			10,12,15,15,15,16,16,16,16,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,20,20,20,
719 :			20,21,21,21,21,21,21,21,21,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30};
720 :			static const uint8_t Code_Len24[64] = {
721 :			5, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,10,10,10,10,10,10,10,10,11,11,11,11,12,12,12,
722 :			12,13,13,13,13,13,13,13,13,14,16,16,16,16,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,19,19};
723 :
724 :
725 :			static const uint8_t * const B16_17_Code_Len[24] = { // levels [1..24]
726 :			Code_Len20,Code_Len19,Code_Len18,Code_Len17,
727 :			Code_Len16,Code_Len15,Code_Len14,Code_Len13,
728 :			Code_Len12,Code_Len11,Code_Len10,Code_Len9,
729 :			Code_Len8, Code_Len7 ,Code_Len6 ,Code_Len5,
730 :			Code_Len4, Code_Len3, Code_Len3 ,Code_Len2,
731 :			Code_Len2, Code_Len1, Code_Len1, Code_Len1,
732 :			};
733 :
734 :			static const uint8_t * const B16_17_Code_Len_Last[6] = { // levels [1..6]
735 :			Code_Len24,Code_Len23,Code_Len22,Code_Len21, Code_Len3, Code_Len1,
736 :			};
737 :
738 :			#define TL(q) 0xfe00/(q*q)
739 :
740 :			static const int Trellis_Lambda_Tabs[31] = {
741 :			TL( 1),TL( 2),TL( 3),TL( 4),TL( 5),TL( 6), TL( 7),
742 :			TL( 8),TL( 9),TL(10),TL(11),TL(12),TL(13),TL(14), TL(15),
743 :			TL(16),TL(17),TL(18),TL(19),TL(20),TL(21),TL(22), TL(23),
744 :			TL(24),TL(25),TL(26),TL(27),TL(28),TL(29),TL(30), TL(31)
745 :			};
746 :			#undef TL
747 :
748 :	suxen_drol	1014	static __inline int Find_Last(const int16_t C, const uint16_t Zigzag, int i)
749 :	chl	1011	{
750 :			while(i>=0)
751 :			if (C[Zigzag[i]])
752 :			return i;
753 :			else i--;
754 :			return -1;
755 :			}
756 :
757 :			//////////////////////////////////////////////////////////
758 :	chl	1012	// this routine has been strippen of all debug code
759 :			//////////////////////////////////////////////////////////
760 :	chl	1011
761 :	chl	1012	static int
762 :			dct_quantize_trellis_h263_c(int16_t const Out, const int16_t const In, int Q, const uint16_t * const Zigzag, int Non_Zero)
763 :			{
764 :
765 :			// Note: We should search last non-zero coeffs on real DCT input coeffs (In[]),
766 :			// not quantized one (Out[]). However, it only improves the result very
767 :			// slightly (~0.01dB), whereas speed drops to crawling level :)
768 :			// Well, actually, taking 1 more coeff past Non_Zero into account sometimes helps,
769 :
770 :			typedef struct { int16_t Run, Level; } NODE;
771 :
772 :			NODE Nodes[65], Last;
773 :			uint32_t Run_Costs0[64+1];
774 :			uint32_t * const Run_Costs = Run_Costs0 + 1;
775 :			const int Mult = 2*Q;
776 :			const int Bias = (Q-1) \| 1;
777 :			const int Lev0 = Mult + Bias;
778 :			const int Lambda = Trellis_Lambda_Tabs[Q-1]; // it's 1/lambda, actually
779 :
780 :			int Run_Start = -1;
781 :			uint32_t Min_Cost = 2<<16;
782 :
783 :			int Last_Node = -1;
784 :			uint32_t Last_Cost = 0;
785 :
786 :			int i, j;
787 :	suxen_drol	1014	Run_Costs[-1] = 2<<16; // source (w/ CBP penalty)
788 :	chl	1012
789 :			Non_Zero = Find_Last(Out, Zigzag, Non_Zero);
790 :			if (Non_Zero<0)
791 :			return -1;
792 :
793 :			for(i=0; i<=Non_Zero; i++)
794 :			{
795 :			const int AC = In[Zigzag[i]];
796 :			const int Level1 = Out[Zigzag[i]];
797 :			const int Dist0 = Lambda* AC*AC;
798 :			uint32_t Best_Cost = 0xf0000000;
799 :			Last_Cost += Dist0;
800 :
801 :			if ((uint32_t)(Level1+1)<3) // very specialized loop for -1,0,+1
802 :			{
803 :			int dQ;
804 :			int Run;
805 :			uint32_t Cost0;
806 :
807 :			if (AC<0) {
808 :			Nodes[i].Level = -1;
809 :			dQ = Lev0 + AC;
810 :			} else {
811 :			Nodes[i].Level = 1;
812 :			dQ = Lev0 - AC;
813 :			}
814 :			Cost0 = LambdadQdQ;
815 :
816 :			Nodes[i].Run = 1;
817 :			Best_Cost = (Code_Len20[0]<<16) + Run_Costs[i-1]+Cost0;
818 :			for(Run=i-Run_Start; Run>0; --Run)
819 :			{
820 :			const uint32_t Cost_Base = Cost0 + Run_Costs[i-Run];
821 :			const uint32_t Cost = Cost_Base + (Code_Len20[Run-1]<<16);
822 :			const uint32_t lCost = Cost_Base + (Code_Len24[Run-1]<<16);
823 :
824 :			// TODO: what about tie-breaks? Should we favor short runs or
825 :			// long runs? Although the error is the same, it would not be
826 :			// spread the same way along high and low frequencies...
827 :
828 :			// (I'd say: favour short runs => hifreq errors (HVS) -- gruel )
829 :
830 :			if (Cost<Best_Cost) {
831 :			Best_Cost = Cost;
832 :			Nodes[i].Run = Run;
833 :			}
834 :
835 :			if (lCost<Last_Cost) {
836 :			Last_Cost = lCost;
837 :			Last.Run = Run;
838 :			Last_Node = i;
839 :			}
840 :			}
841 :			if (Last_Node==i)
842 :			Last.Level = Nodes[i].Level;
843 :			}
844 :			else // "big" levels
845 :			{
846 :			const uint8_t Tbl_L1, Tbl_L2, Tbl_L1_Last, Tbl_L2_Last;
847 :			int Level2;
848 :			int dQ1, dQ2;
849 :			int Run;
850 :			uint32_t Dist1,Dist2;
851 :			int dDist21;
852 :
853 :			if (Level1>1) {
854 :			dQ1 = Level1*Mult-AC + Bias;
855 :			dQ2 = dQ1 - Mult;
856 :			Level2 = Level1-1;
857 :			Tbl_L1 = (Level1<=24) ? B16_17_Code_Len[Level1-1] : Code_Len0;
858 :			Tbl_L2 = (Level2<=24) ? B16_17_Code_Len[Level2-1] : Code_Len0;
859 :			Tbl_L1_Last = (Level1<=6) ? B16_17_Code_Len_Last[Level1-1] : Code_Len0;
860 :			Tbl_L2_Last = (Level2<=6) ? B16_17_Code_Len_Last[Level2-1] : Code_Len0;
861 :			} else { // Level1<-1
862 :			dQ1 = Level1*Mult-AC - Bias;
863 :			dQ2 = dQ1 + Mult;
864 :			Level2 = Level1 + 1;
865 :			Tbl_L1 = (Level1>=-24) ? B16_17_Code_Len[Level1^-1] : Code_Len0;
866 :			Tbl_L2 = (Level2>=-24) ? B16_17_Code_Len[Level2^-1] : Code_Len0;
867 :			Tbl_L1_Last = (Level1>=- 6) ? B16_17_Code_Len_Last[Level1^-1] : Code_Len0;
868 :			Tbl_L2_Last = (Level2>=- 6) ? B16_17_Code_Len_Last[Level2^-1] : Code_Len0;
869 :			}
870 :			Dist1 = LambdadQ1dQ1;
871 :			Dist2 = LambdadQ2dQ2;
872 :			dDist21 = Dist2-Dist1;
873 :
874 :			for(Run=i-Run_Start; Run>0; --Run)
875 :			{
876 :			const uint32_t Cost_Base = Dist1 + Run_Costs[i-Run];
877 :			uint32_t Cost1, Cost2;
878 :			int bLevel;
879 :
880 :			// for sub-optimal (but slightly worth it, speed-wise) search, uncomment the following:
881 :			// if (Cost_Base>=Best_Cost) continue;
882 :			// (? doesn't seem to have any effect -- gruel )
883 :
884 :			Cost1 = Cost_Base + (Tbl_L1[Run-1]<<16);
885 :			Cost2 = Cost_Base + (Tbl_L2[Run-1]<<16) + dDist21;
886 :
887 :			if (Cost2<Cost1) {
888 :			Cost1 = Cost2;
889 :			bLevel = Level2;
890 :			} else
891 :			bLevel = Level1;
892 :
893 :			if (Cost1<Best_Cost) {
894 :			Best_Cost = Cost1;
895 :			Nodes[i].Run = Run;
896 :			Nodes[i].Level = bLevel;
897 :			}
898 :
899 :			Cost1 = Cost_Base + (Tbl_L1_Last[Run-1]<<16);
900 :			Cost2 = Cost_Base + (Tbl_L2_Last[Run-1]<<16) + dDist21;
901 :
902 :			if (Cost2<Cost1) {
903 :			Cost1 = Cost2;
904 :			bLevel = Level2;
905 :			} else
906 :			bLevel = Level1;
907 :
908 :			if (Cost1<Last_Cost) {
909 :			Last_Cost = Cost1;
910 :			Last.Run = Run;
911 :			Last.Level = bLevel;
912 :			Last_Node = i;
913 :			}
914 :			} //end of "for Run"
915 :
916 :			}
917 :
918 :			Run_Costs[i] = Best_Cost;
919 :
920 :			if (Best_Cost < Min_Cost + Dist0) {
921 :			Min_Cost = Best_Cost;
922 :			Run_Start = i;
923 :			}
924 :			else
925 :			{
926 :			// as noticed by Michael Niedermayer (michaelni at gmx.at), there's
927 :			// a code shorter by 1 bit for a larger run (!), same level. We give
928 :			// it a chance by not moving the left barrier too much.
929 :
930 :			while( Run_Costs[Run_Start]>Min_Cost+(1<<16) )
931 :			Run_Start++;
932 :
933 :			// spread on preceding coeffs the cost incurred by skipping this one
934 :			for(j=Run_Start; j<i; ++j) Run_Costs[j] += Dist0;
935 :			Min_Cost += Dist0;
936 :			}
937 :			}
938 :
939 :			if (Last_Node<0)
940 :			return -1;
941 :
942 :			// reconstruct optimal sequence backward with surviving paths
943 :			memset(Out, 0x00, 64sizeof(Out));
944 :			Out[Zigzag[Last_Node]] = Last.Level;
945 :			i = Last_Node - Last.Run;
946 :			while(i>=0) {
947 :			Out[Zigzag[i]] = Nodes[i].Level;
948 :			i -= Nodes[i].Run;
949 :			}
950 :			return Last_Node;
951 :			}
952 :
953 :
954 :
955 :
956 :
957 :
958 :
959 :
960 :
961 :
962 :
963 :			//////////////////////////////////////////////////////////
964 :			// original version including heavy debugging info
965 :			//////////////////////////////////////////////////////////
966 :
967 :
968 :			#ifdef DBGTRELL
969 :
970 :	chl	1011	#define DBG 0
971 :
972 :	suxen_drol	1014	static __inline uint32_t Evaluate_Cost(const int16_t *C, int Mult, int Bias,
973 :	chl	1011	const uint16_t * Zigzag, int Max, int Lambda)
974 :			{
975 :			#if (DBG>0)
976 :			const int16_t * const Ref = C + 6*64;
977 :			int Last = Max;
978 :			int Bits = 0;
979 :	chl	1012	int Dist = 0;
980 :			int i;
981 :			uint32_t Cost;
982 :
983 :			while(Last>=0 && C[Zigzag[Last]]==0)
984 :			Last--;
985 :
986 :	chl	1011	if (Last>=0) {
987 :			int j=0, j0=0;
988 :			int Run, Level;
989 :	chl	1012
990 :			Bits = 2; // CBP
991 :	chl	1011	while(j<Last) {
992 :	chl	1012	while(!C[Zigzag[j]])
993 :			j++;
994 :			if (j==Last)
995 :			break;
996 :	chl	1011	Level=C[Zigzag[j]];
997 :			Run = j - j0;
998 :			j0 = ++j;
999 :	chl	1012	if (Level>=-24 && Level<=24)
1000 :			Bits += B16_17_Code_Len[(Level<0) ? -Level-1 : Level-1][Run];
1001 :			else
1002 :			Bits += 30;
1003 :	chl	1011	}
1004 :			Level = C[Zigzag[Last]];
1005 :			Run = j - j0;
1006 :	chl	1012	if (Level>=-6 && Level<=6)
1007 :			Bits += B16_17_Code_Len_Last[(Level<0) ? -Level-1 : Level-1][Run];
1008 :			else
1009 :			Bits += 30;
1010 :	chl	1011	}
1011 :
1012 :			for(i=0; i<=Last; ++i) {
1013 :			int V = C[Zigzag[i]]*Mult;
1014 :	chl	1012	if (V>0)
1015 :			V += Bias;
1016 :			else
1017 :			if (V<0)
1018 :			V -= Bias;
1019 :	chl	1011	V -= Ref[Zigzag[i]];
1020 :			Dist += V*V;
1021 :			}
1022 :	chl	1012	Cost = Lambda*Dist + (Bits<<16);
1023 :	chl	1011	if (DBG==1)
1024 :			printf( " Last:%2d/%2d Cost = [(Bits=%5.0d) + Lambda*(Dist=%6.0d) = %d ] >>12= %d ", Last,Max, Bits, Dist, Cost, Cost>>12 );
1025 :			return Cost;
1026 :
1027 :			#else
1028 :			return 0;
1029 :			#endif
1030 :			}
1031 :
1032 :
1033 :			static int
1034 :			dct_quantize_trellis_h263_c(int16_t const Out, const int16_t const In, int Q, const uint16_t * const Zigzag, int Non_Zero)
1035 :			{
1036 :
1037 :			// Note: We should search last non-zero coeffs on real DCT input coeffs (In[]),
1038 :			// not quantized one (Out[]). However, it only improves the result very
1039 :			// slightly (~0.01dB), whereas speed drops to crawling level :)
1040 :			// Well, actually, taking 1 more coeff past Non_Zero into account sometimes helps,
1041 :
1042 :			typedef struct { int16_t Run, Level; } NODE;
1043 :
1044 :			NODE Nodes[65], Last;
1045 :	chl	1012	uint32_t Run_Costs0[64+1];
1046 :			uint32_t * const Run_Costs = Run_Costs0 + 1;
1047 :	chl	1011	const int Mult = 2*Q;
1048 :			const int Bias = (Q-1) \| 1;
1049 :			const int Lev0 = Mult + Bias;
1050 :			const int Lambda = Trellis_Lambda_Tabs[Q-1]; // it's 1/lambda, actually
1051 :
1052 :			int Run_Start = -1;
1053 :			Run_Costs[-1] = 2<<16; // source (w/ CBP penalty)
1054 :			uint32_t Min_Cost = 2<<16;
1055 :
1056 :			int Last_Node = -1;
1057 :			uint32_t Last_Cost = 0;
1058 :
1059 :	chl	1012	int i, j;
1060 :
1061 :	chl	1011	#if (DBG>0)
1062 :			Last.Level = 0; Last.Run = -1; // just initialize to smthg
1063 :			#endif
1064 :
1065 :			Non_Zero = Find_Last(Out, Zigzag, Non_Zero);
1066 :			if (Non_Zero<0)
1067 :			return -1;
1068 :
1069 :			for(i=0; i<=Non_Zero; i++)
1070 :			{
1071 :			const int AC = In[Zigzag[i]];
1072 :			const int Level1 = Out[Zigzag[i]];
1073 :			const int Dist0 = Lambda* AC*AC;
1074 :			uint32_t Best_Cost = 0xf0000000;
1075 :			Last_Cost += Dist0;
1076 :
1077 :			if ((uint32_t)(Level1+1)<3) // very specialized loop for -1,0,+1
1078 :			{
1079 :	chl	1012	int dQ;
1080 :			int Run;
1081 :			uint32_t Cost0;
1082 :	chl	1011
1083 :			if (AC<0) {
1084 :			Nodes[i].Level = -1;
1085 :			dQ = Lev0 + AC;
1086 :			} else {
1087 :			Nodes[i].Level = 1;
1088 :			dQ = Lev0 - AC;
1089 :			}
1090 :	chl	1012	Cost0 = LambdadQdQ;
1091 :
1092 :	chl	1011	Nodes[i].Run = 1;
1093 :			Best_Cost = (Code_Len20[0]<<16) + Run_Costs[i-1]+Cost0;
1094 :			for(Run=i-Run_Start; Run>0; --Run)
1095 :			{
1096 :			const uint32_t Cost_Base = Cost0 + Run_Costs[i-Run];
1097 :			const uint32_t Cost = Cost_Base + (Code_Len20[Run-1]<<16);
1098 :	chl	1012	const uint32_t lCost = Cost_Base + (Code_Len24[Run-1]<<16);
1099 :
1100 :	chl	1011	// TODO: what about tie-breaks? Should we favor short runs or
1101 :			// long runs? Although the error is the same, it would not be
1102 :			// spread the same way along high and low frequencies...
1103 :	chl	1012	if (Cost<Best_Cost) {
1104 :	chl	1011	Best_Cost = Cost;
1105 :			Nodes[i].Run = Run;
1106 :			}
1107 :	chl	1012
1108 :			if (lCost<Last_Cost) {
1109 :	chl	1011	Last_Cost = lCost;
1110 :			Last.Run = Run;
1111 :			Last_Node = i;
1112 :			}
1113 :			}
1114 :	chl	1012	if (Last_Node==i)
1115 :			Last.Level = Nodes[i].Level;
1116 :	chl	1011
1117 :			if (DBG==1) {
1118 :			Run_Costs[i] = Best_Cost;
1119 :			printf( "Costs #%2d: ", i);
1120 :			for(j=-1;j<=Non_Zero;++j) {
1121 :			if (j==Run_Start) printf( " %3.0d\|", Run_Costs[j]>>12 );
1122 :			else if (j>Run_Start && j<i) printf( " %3.0d\|", Run_Costs[j]>>12 );
1123 :			else if (j==i) printf( "(%3.0d)", Run_Costs[j]>>12 );
1124 :			else printf( " - \|" );
1125 :			}
1126 :			printf( "<%3.0d %2d %d>", Min_Cost>>12, Nodes[i].Level, Nodes[i].Run );
1127 :			printf( " Last:#%2d {%3.0d %2d %d}", Last_Node, Last_Cost>>12, Last.Level, Last.Run );
1128 :			printf( " AC:%3.0d Dist0:%3d Dist(%d)=%d", AC, Dist0>>12, Nodes[i].Level, Cost0>>12 );
1129 :			printf( "\n" );
1130 :			}
1131 :			}
1132 :			else // "big" levels
1133 :			{
1134 :			const uint8_t Tbl_L1, Tbl_L2, Tbl_L1_Last, Tbl_L2_Last;
1135 :			int Level2;
1136 :			int dQ1, dQ2;
1137 :			int Run;
1138 :	chl	1012	uint32_t Dist1,Dist2;
1139 :			int dDist21;
1140 :
1141 :	chl	1011	if (Level1>1) {
1142 :			dQ1 = Level1*Mult-AC + Bias;
1143 :			dQ2 = dQ1 - Mult;
1144 :			Level2 = Level1-1;
1145 :			Tbl_L1 = (Level1<=24) ? B16_17_Code_Len[Level1-1] : Code_Len0;
1146 :			Tbl_L2 = (Level2<=24) ? B16_17_Code_Len[Level2-1] : Code_Len0;
1147 :			Tbl_L1_Last = (Level1<=6) ? B16_17_Code_Len_Last[Level1-1] : Code_Len0;
1148 :			Tbl_L2_Last = (Level2<=6) ? B16_17_Code_Len_Last[Level2-1] : Code_Len0;
1149 :	chl	1012	} else { // Level1<-1
1150 :	chl	1011	dQ1 = Level1*Mult-AC - Bias;
1151 :			dQ2 = dQ1 + Mult;
1152 :			Level2 = Level1 + 1;
1153 :			Tbl_L1 = (Level1>=-24) ? B16_17_Code_Len[Level1^-1] : Code_Len0;
1154 :			Tbl_L2 = (Level2>=-24) ? B16_17_Code_Len[Level2^-1] : Code_Len0;
1155 :			Tbl_L1_Last = (Level1>=- 6) ? B16_17_Code_Len_Last[Level1^-1] : Code_Len0;
1156 :			Tbl_L2_Last = (Level2>=- 6) ? B16_17_Code_Len_Last[Level2^-1] : Code_Len0;
1157 :			}
1158 :	chl	1012	Dist1 = LambdadQ1dQ1;
1159 :			Dist2 = LambdadQ2dQ2;
1160 :			dDist21 = Dist2-Dist1;
1161 :	chl	1011
1162 :			for(Run=i-Run_Start; Run>0; --Run)
1163 :			{
1164 :			const uint32_t Cost_Base = Dist1 + Run_Costs[i-Run];
1165 :	chl	1012	uint32_t Cost1, Cost2;
1166 :			int bLevel;
1167 :	chl	1011
1168 :			// for sub-optimal (but slightly worth it, speed-wise) search, uncomment the following:
1169 :			// if (Cost_Base>=Best_Cost) continue;
1170 :
1171 :			Cost1 = Cost_Base + (Tbl_L1[Run-1]<<16);
1172 :			Cost2 = Cost_Base + (Tbl_L2[Run-1]<<16) + dDist21;
1173 :
1174 :	chl	1012	if (Cost2<Cost1) {
1175 :			Cost1 = Cost2;
1176 :			bLevel = Level2;
1177 :			} else
1178 :			bLevel = Level1;
1179 :	chl	1011
1180 :	chl	1012	if (Cost1<Best_Cost) {
1181 :	chl	1011	Best_Cost = Cost1;
1182 :			Nodes[i].Run = Run;
1183 :			Nodes[i].Level = bLevel;
1184 :			}
1185 :
1186 :			Cost1 = Cost_Base + (Tbl_L1_Last[Run-1]<<16);
1187 :			Cost2 = Cost_Base + (Tbl_L2_Last[Run-1]<<16) + dDist21;
1188 :
1189 :	chl	1012	if (Cost2<Cost1) {
1190 :			Cost1 = Cost2;
1191 :			bLevel = Level2;
1192 :			} else
1193 :			bLevel = Level1;
1194 :
1195 :			if (Cost1<Last_Cost) {
1196 :	chl	1011	Last_Cost = Cost1;
1197 :			Last.Run = Run;
1198 :			Last.Level = bLevel;
1199 :			Last_Node = i;
1200 :			}
1201 :	chl	1012	} //end of "for Run"
1202 :	chl	1011
1203 :			if (DBG==1) {
1204 :			Run_Costs[i] = Best_Cost;
1205 :			printf( "Costs #%2d: ", i);
1206 :			for(j=-1;j<=Non_Zero;++j) {
1207 :			if (j==Run_Start) printf( " %3.0d\|", Run_Costs[j]>>12 );
1208 :			else if (j>Run_Start && j<i) printf( " %3.0d\|", Run_Costs[j]>>12 );
1209 :			else if (j==i) printf( "(%3.0d)", Run_Costs[j]>>12 );
1210 :			else printf( " - \|" );
1211 :			}
1212 :			printf( "<%3.0d %2d %d>", Min_Cost>>12, Nodes[i].Level, Nodes[i].Run );
1213 :			printf( " Last:#%2d {%3.0d %2d %d}", Last_Node, Last_Cost>>12, Last.Level, Last.Run );
1214 :			printf( " AC:%3.0d Dist0:%3d Dist(%2d):%3d Dist(%2d):%3d", AC, Dist0>>12, Level1, Dist1>>12, Level2, Dist2>>12 );
1215 :			printf( "\n" );
1216 :			}
1217 :			}
1218 :
1219 :			Run_Costs[i] = Best_Cost;
1220 :
1221 :			if (Best_Cost < Min_Cost + Dist0) {
1222 :			Min_Cost = Best_Cost;
1223 :			Run_Start = i;
1224 :			}
1225 :			else
1226 :			{
1227 :			// as noticed by Michael Niedermayer (michaelni at gmx.at), there's
1228 :			// a code shorter by 1 bit for a larger run (!), same level. We give
1229 :			// it a chance by not moving the left barrier too much.
1230 :	chl	1012
1231 :	chl	1011	while( Run_Costs[Run_Start]>Min_Cost+(1<<16) )
1232 :			Run_Start++;
1233 :
1234 :			// spread on preceding coeffs the cost incurred by skipping this one
1235 :			for(j=Run_Start; j<i; ++j) Run_Costs[j] += Dist0;
1236 :			Min_Cost += Dist0;
1237 :			}
1238 :			}
1239 :
1240 :			if (DBG) {
1241 :			Last_Cost = Evaluate_Cost(Out,Mult,Bias, Zigzag,Non_Zero, Lambda);
1242 :			if (DBG==1) {
1243 :			printf( "=> " );
1244 :			for(i=0; i<=Non_Zero; ++i) printf( "[%3.0d] ", Out[Zigzag[i]] );
1245 :			printf( "\n" );
1246 :			}
1247 :			}
1248 :
1249 :			if (Last_Node<0)
1250 :			return -1;
1251 :
1252 :			// reconstruct optimal sequence backward with surviving paths
1253 :	chl	1012	memset(Out, 0x00, 64sizeof(Out));
1254 :	chl	1011	Out[Zigzag[Last_Node]] = Last.Level;
1255 :			i = Last_Node - Last.Run;
1256 :			while(i>=0) {
1257 :			Out[Zigzag[i]] = Nodes[i].Level;
1258 :			i -= Nodes[i].Run;
1259 :			}
1260 :
1261 :			if (DBG) {
1262 :			uint32_t Cost = Evaluate_Cost(Out,Mult,Bias, Zigzag,Non_Zero, Lambda);
1263 :			if (DBG==1) {
1264 :			printf( "<= " );
1265 :			for(i=0; i<=Last_Node; ++i) printf( "[%3.0d] ", Out[Zigzag[i]] );
1266 :			printf( "\n--------------------------------\n" );
1267 :			}
1268 :			if (Cost>Last_Cost) printf( "!!! %u > %u\n", Cost, Last_Cost );
1269 :			}
1270 :			return Last_Node;
1271 :			}
1272 :
1273 :			#undef DBG
1274 :	chl	1012
1275 :			#endif

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4