20 |
* along with this program ; if not, write to the Free Software |
* along with this program ; if not, write to the Free Software |
21 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
22 |
* |
* |
23 |
* $Id: decoder.c,v 1.55 2004-04-15 12:05:19 suxen_drol Exp $ |
* $Id: decoder.c,v 1.63 2004-07-24 11:46:08 edgomez Exp $ |
24 |
* |
* |
25 |
****************************************************************************/ |
****************************************************************************/ |
26 |
|
|
48 |
#include "image/interpolate8x8.h" |
#include "image/interpolate8x8.h" |
49 |
#include "image/reduced.h" |
#include "image/reduced.h" |
50 |
#include "image/font.h" |
#include "image/font.h" |
51 |
|
#include "image/qpel.h" |
52 |
|
|
53 |
#include "bitstream/mbcoding.h" |
#include "bitstream/mbcoding.h" |
54 |
#include "prediction/mbprediction.h" |
#include "prediction/mbprediction.h" |
62 |
#include "image/postprocessing.h" |
#include "image/postprocessing.h" |
63 |
#include "utils/mem_align.h" |
#include "utils/mem_align.h" |
64 |
|
|
65 |
|
#ifdef ARCH_IS_IA32 |
66 |
|
#define interpolate16x16_quarterpel new_interpolate16x16_quarterpel |
67 |
|
#define interpolate8x8_quarterpel new_interpolate8x8_quarterpel |
68 |
|
#endif |
69 |
|
|
70 |
static int |
static int |
71 |
decoder_resize(DECODER * dec) |
decoder_resize(DECODER * dec) |
72 |
{ |
{ |
228 |
dec->time = dec->time_base = dec->last_time_base = 0; |
dec->time = dec->time_base = dec->last_time_base = 0; |
229 |
dec->low_delay = 0; |
dec->low_delay = 0; |
230 |
dec->packed_mode = 0; |
dec->packed_mode = 0; |
231 |
|
dec->time_inc_resolution = 1; /* until VOL header says otherwise */ |
232 |
|
|
233 |
dec->fixed_dimensions = (dec->width > 0 && dec->height > 0); |
dec->fixed_dimensions = (dec->width > 0 && dec->height > 0); |
234 |
|
|
345 |
stop_coding_timer(); |
stop_coding_timer(); |
346 |
|
|
347 |
start_timer(); |
start_timer(); |
348 |
add_acdc(pMB, i, &block[i * 64], iDcScaler, predictors); |
add_acdc(pMB, i, &block[i * 64], iDcScaler, predictors, dec->bs_version); |
349 |
stop_prediction_timer(); |
stop_prediction_timer(); |
350 |
|
|
351 |
start_timer(); |
start_timer(); |
399 |
const int reduced_resolution, |
const int reduced_resolution, |
400 |
const MACROBLOCK * pMB) |
const MACROBLOCK * pMB) |
401 |
{ |
{ |
|
DECLARE_ALIGNED_MATRIX(block, 1, 64, int16_t, CACHE_LINE); |
|
402 |
DECLARE_ALIGNED_MATRIX(data, 6, 64, int16_t, CACHE_LINE); |
DECLARE_ALIGNED_MATRIX(data, 6, 64, int16_t, CACHE_LINE); |
403 |
|
|
404 |
int stride = dec->edged_width; |
int stride = dec->edged_width; |
407 |
int i; |
int i; |
408 |
const uint32_t iQuant = pMB->quant; |
const uint32_t iQuant = pMB->quant; |
409 |
const int direction = dec->alternate_vertical_scan ? 2 : 0; |
const int direction = dec->alternate_vertical_scan ? 2 : 0; |
410 |
const quant_interFuncPtr dequant = dec->quant_type == 0 ? dequant_h263_inter : dequant_mpeg_inter; |
typedef void (*get_inter_block_function_t)( |
411 |
|
Bitstream * bs, |
412 |
|
int16_t * block, |
413 |
|
int direction, |
414 |
|
const int quant, |
415 |
|
const uint16_t *matrix); |
416 |
|
|
417 |
|
const get_inter_block_function_t get_inter_block = (dec->quant_type == 0) |
418 |
|
? get_inter_block_h263 |
419 |
|
: get_inter_block_mpeg; |
420 |
|
|
421 |
|
memset(&data[0], 0, 6*64*sizeof(int16_t)); /* clear */ |
422 |
|
|
423 |
for (i = 0; i < 6; i++) { |
for (i = 0; i < 6; i++) { |
424 |
|
|
425 |
if (cbp & (1 << (5 - i))) { /* coded */ |
if (cbp & (1 << (5 - i))) { /* coded */ |
426 |
|
|
|
memset(block, 0, 64 * sizeof(int16_t)); /* clear */ |
|
427 |
|
|
428 |
|
/* Decode coeffs and dequantize on the fly */ |
429 |
start_timer(); |
start_timer(); |
430 |
get_inter_block(bs, block, direction); |
get_inter_block(bs, &data[i*64], direction, iQuant, get_inter_matrix(dec->mpeg_quant_matrices)); |
431 |
stop_coding_timer(); |
stop_coding_timer(); |
432 |
|
|
433 |
start_timer(); |
start_timer(); |
|
dequant(&data[i * 64], block, iQuant, dec->mpeg_quant_matrices); |
|
|
stop_iquant_timer(); |
|
|
|
|
|
start_timer(); |
|
434 |
idct(&data[i * 64]); |
idct(&data[i * 64]); |
435 |
stop_idct_timer(); |
stop_idct_timer(); |
436 |
} |
} |
827 |
mb_height = (dec->height + 31) / 32; |
mb_height = (dec->height + 31) / 32; |
828 |
} |
} |
829 |
|
|
830 |
|
if (!dec->is_edged[0]) { |
831 |
start_timer(); |
start_timer(); |
832 |
image_setedges(&dec->refn[0], dec->edged_width, dec->edged_height, |
image_setedges(&dec->refn[0], dec->edged_width, dec->edged_height, |
833 |
dec->width, dec->height, dec->bs_version); |
dec->width, dec->height, dec->bs_version); |
834 |
|
dec->is_edged[0] = 1; |
835 |
stop_edges_timer(); |
stop_edges_timer(); |
836 |
|
} |
837 |
|
|
838 |
if (gmc_warp) { |
if (gmc_warp) { |
839 |
/* accuracy: 0==1/2, 1=1/4, 2=1/8, 3=1/16 */ |
/* accuracy: 0==1/2, 1=1/4, 2=1/8, 3=1/16 */ |
1044 |
if (!direct) { |
if (!direct) { |
1045 |
uv_dx = pMB->mvs[0].x; |
uv_dx = pMB->mvs[0].x; |
1046 |
uv_dy = pMB->mvs[0].y; |
uv_dy = pMB->mvs[0].y; |
|
|
|
1047 |
b_uv_dx = pMB->b_mvs[0].x; |
b_uv_dx = pMB->b_mvs[0].x; |
1048 |
b_uv_dy = pMB->b_mvs[0].y; |
b_uv_dy = pMB->b_mvs[0].y; |
1049 |
|
|
1056 |
|
|
1057 |
uv_dx = (uv_dx >> 1) + roundtab_79[uv_dx & 0x3]; |
uv_dx = (uv_dx >> 1) + roundtab_79[uv_dx & 0x3]; |
1058 |
uv_dy = (uv_dy >> 1) + roundtab_79[uv_dy & 0x3]; |
uv_dy = (uv_dy >> 1) + roundtab_79[uv_dy & 0x3]; |
|
|
|
1059 |
b_uv_dx = (b_uv_dx >> 1) + roundtab_79[b_uv_dx & 0x3]; |
b_uv_dx = (b_uv_dx >> 1) + roundtab_79[b_uv_dx & 0x3]; |
1060 |
b_uv_dy = (b_uv_dy >> 1) + roundtab_79[b_uv_dy & 0x3]; |
b_uv_dy = (b_uv_dy >> 1) + roundtab_79[b_uv_dy & 0x3]; |
1061 |
|
|
1062 |
} else { |
} else { |
|
if(dec->quarterpel) { |
|
|
uv_dx = (pMB->mvs[0].x / 2) + (pMB->mvs[1].x / 2) + (pMB->mvs[2].x / 2) + (pMB->mvs[3].x / 2); |
|
|
uv_dy = (pMB->mvs[0].y / 2) + (pMB->mvs[1].y / 2) + (pMB->mvs[2].y / 2) + (pMB->mvs[3].y / 2); |
|
|
b_uv_dx = (pMB->b_mvs[0].x / 2) + (pMB->b_mvs[1].x / 2) + (pMB->b_mvs[2].x / 2) + (pMB->b_mvs[3].x / 2); |
|
|
b_uv_dy = (pMB->b_mvs[0].y / 2) + (pMB->b_mvs[1].y / 2) + (pMB->b_mvs[2].y / 2) + (pMB->b_mvs[3].y / 2); |
|
|
} else { |
|
1063 |
uv_dx = pMB->mvs[0].x + pMB->mvs[1].x + pMB->mvs[2].x + pMB->mvs[3].x; |
uv_dx = pMB->mvs[0].x + pMB->mvs[1].x + pMB->mvs[2].x + pMB->mvs[3].x; |
1064 |
uv_dy = pMB->mvs[0].y + pMB->mvs[1].y + pMB->mvs[2].y + pMB->mvs[3].y; |
uv_dy = pMB->mvs[0].y + pMB->mvs[1].y + pMB->mvs[2].y + pMB->mvs[3].y; |
1065 |
b_uv_dx = pMB->b_mvs[0].x + pMB->b_mvs[1].x + pMB->b_mvs[2].x + pMB->b_mvs[3].x; |
b_uv_dx = pMB->b_mvs[0].x + pMB->b_mvs[1].x + pMB->b_mvs[2].x + pMB->b_mvs[3].x; |
1066 |
b_uv_dy = pMB->b_mvs[0].y + pMB->b_mvs[1].y + pMB->b_mvs[2].y + pMB->b_mvs[3].y; |
b_uv_dy = pMB->b_mvs[0].y + pMB->b_mvs[1].y + pMB->b_mvs[2].y + pMB->b_mvs[3].y; |
1067 |
|
|
1068 |
|
if (dec->quarterpel) { |
1069 |
|
uv_dx /= 2; |
1070 |
|
uv_dy /= 2; |
1071 |
|
b_uv_dx /= 2; |
1072 |
|
b_uv_dy /= 2; |
1073 |
} |
} |
1074 |
|
|
1075 |
uv_dx = (uv_dx >> 3) + roundtab_76[uv_dx & 0xf]; |
uv_dx = (uv_dx >> 3) + roundtab_76[uv_dx & 0xf]; |
1153 |
interpolate8x8_avg2(dec->cur.y + (16 * y_pos * stride) + 16 * x_pos, |
interpolate8x8_avg2(dec->cur.y + (16 * y_pos * stride) + 16 * x_pos, |
1154 |
dec->cur.y + (16 * y_pos * stride) + 16 * x_pos, |
dec->cur.y + (16 * y_pos * stride) + 16 * x_pos, |
1155 |
dec->tmp.y + (16 * y_pos * stride) + 16 * x_pos, |
dec->tmp.y + (16 * y_pos * stride) + 16 * x_pos, |
1156 |
stride, 1, 8); |
stride, 0, 8); |
1157 |
|
|
1158 |
interpolate8x8_avg2(dec->cur.y + (16 * y_pos * stride) + 16 * x_pos + 8, |
interpolate8x8_avg2(dec->cur.y + (16 * y_pos * stride) + 16 * x_pos + 8, |
1159 |
dec->cur.y + (16 * y_pos * stride) + 16 * x_pos + 8, |
dec->cur.y + (16 * y_pos * stride) + 16 * x_pos + 8, |
1160 |
dec->tmp.y + (16 * y_pos * stride) + 16 * x_pos + 8, |
dec->tmp.y + (16 * y_pos * stride) + 16 * x_pos + 8, |
1161 |
stride, 1, 8); |
stride, 0, 8); |
1162 |
|
|
1163 |
interpolate8x8_avg2(dec->cur.y + ((16 * y_pos + 8) * stride) + 16 * x_pos, |
interpolate8x8_avg2(dec->cur.y + ((16 * y_pos + 8) * stride) + 16 * x_pos, |
1164 |
dec->cur.y + ((16 * y_pos + 8) * stride) + 16 * x_pos, |
dec->cur.y + ((16 * y_pos + 8) * stride) + 16 * x_pos, |
1165 |
dec->tmp.y + ((16 * y_pos + 8) * stride) + 16 * x_pos, |
dec->tmp.y + ((16 * y_pos + 8) * stride) + 16 * x_pos, |
1166 |
stride, 1, 8); |
stride, 0, 8); |
1167 |
|
|
1168 |
interpolate8x8_avg2(dec->cur.y + ((16 * y_pos + 8) * stride) + 16 * x_pos + 8, |
interpolate8x8_avg2(dec->cur.y + ((16 * y_pos + 8) * stride) + 16 * x_pos + 8, |
1169 |
dec->cur.y + ((16 * y_pos + 8) * stride) + 16 * x_pos + 8, |
dec->cur.y + ((16 * y_pos + 8) * stride) + 16 * x_pos + 8, |
1170 |
dec->tmp.y + ((16 * y_pos + 8) * stride) + 16 * x_pos + 8, |
dec->tmp.y + ((16 * y_pos + 8) * stride) + 16 * x_pos + 8, |
1171 |
stride, 1, 8); |
stride, 0, 8); |
1172 |
|
|
1173 |
interpolate8x8_avg2(dec->cur.u + (8 * y_pos * stride2) + 8 * x_pos, |
interpolate8x8_avg2(dec->cur.u + (8 * y_pos * stride2) + 8 * x_pos, |
1174 |
dec->cur.u + (8 * y_pos * stride2) + 8 * x_pos, |
dec->cur.u + (8 * y_pos * stride2) + 8 * x_pos, |
1175 |
dec->tmp.u + (8 * y_pos * stride2) + 8 * x_pos, |
dec->tmp.u + (8 * y_pos * stride2) + 8 * x_pos, |
1176 |
stride2, 1, 8); |
stride2, 0, 8); |
1177 |
|
|
1178 |
interpolate8x8_avg2(dec->cur.v + (8 * y_pos * stride2) + 8 * x_pos, |
interpolate8x8_avg2(dec->cur.v + (8 * y_pos * stride2) + 8 * x_pos, |
1179 |
dec->cur.v + (8 * y_pos * stride2) + 8 * x_pos, |
dec->cur.v + (8 * y_pos * stride2) + 8 * x_pos, |
1180 |
dec->tmp.v + (8 * y_pos * stride2) + 8 * x_pos, |
dec->tmp.v + (8 * y_pos * stride2) + 8 * x_pos, |
1181 |
stride2, 1, 8); |
stride2, 0, 8); |
1182 |
|
|
1183 |
stop_comp_timer(); |
stop_comp_timer(); |
1184 |
|
|
1228 |
uint32_t x, y; |
uint32_t x, y; |
1229 |
VECTOR mv; |
VECTOR mv; |
1230 |
const VECTOR zeromv = {0,0}; |
const VECTOR zeromv = {0,0}; |
|
const int64_t TRB = dec->time_pp - dec->time_bp, TRD = dec->time_pp; |
|
1231 |
int i; |
int i; |
1232 |
|
|
1233 |
|
if (!dec->is_edged[0]) { |
1234 |
start_timer(); |
start_timer(); |
1235 |
image_setedges(&dec->refn[0], dec->edged_width, dec->edged_height, |
image_setedges(&dec->refn[0], dec->edged_width, dec->edged_height, |
1236 |
dec->width, dec->height, dec->bs_version); |
dec->width, dec->height, dec->bs_version); |
1237 |
|
dec->is_edged[0] = 1; |
1238 |
|
stop_edges_timer(); |
1239 |
|
} |
1240 |
|
|
1241 |
|
if (!dec->is_edged[1]) { |
1242 |
|
start_timer(); |
1243 |
image_setedges(&dec->refn[1], dec->edged_width, dec->edged_height, |
image_setedges(&dec->refn[1], dec->edged_width, dec->edged_height, |
1244 |
dec->width, dec->height, dec->bs_version); |
dec->width, dec->height, dec->bs_version); |
1245 |
|
dec->is_edged[1] = 1; |
1246 |
stop_edges_timer(); |
stop_edges_timer(); |
1247 |
|
} |
1248 |
|
|
1249 |
for (y = 0; y < dec->mb_height; y++) { |
for (y = 0; y < dec->mb_height; y++) { |
1250 |
/* Initialize Pred Motion Vector */ |
/* Initialize Pred Motion Vector */ |
1331 |
|
|
1332 |
case MODE_DIRECT_NONE_MV: |
case MODE_DIRECT_NONE_MV: |
1333 |
for (i = 0; i < 4; i++) { |
for (i = 0; i < 4; i++) { |
1334 |
mb->mvs[i].x = (int32_t) ((TRB * last_mb->mvs[i].x) / TRD + mv.x); |
mb->mvs[i].x = last_mb->mvs[i].x*dec->time_bp/dec->time_pp + mv.x; |
1335 |
mb->b_mvs[i].x = (int32_t) ((mv.x == 0) |
mb->mvs[i].y = last_mb->mvs[i].y*dec->time_bp/dec->time_pp + mv.y; |
1336 |
? ((TRB - TRD) * last_mb->mvs[i].x) / TRD |
|
1337 |
: mb->mvs[i].x - last_mb->mvs[i].x); |
mb->b_mvs[i].x = (mv.x) |
1338 |
mb->mvs[i].y = (int32_t) ((TRB * last_mb->mvs[i].y) / TRD + mv.y); |
? mb->mvs[i].x - last_mb->mvs[i].x |
1339 |
mb->b_mvs[i].y = (int32_t) ((mv.y == 0) |
: last_mb->mvs[i].x*(dec->time_bp - dec->time_pp)/dec->time_pp; |
1340 |
? ((TRB - TRD) * last_mb->mvs[i].y) / TRD |
mb->b_mvs[i].y = (mv.y) |
1341 |
: mb->mvs[i].y - last_mb->mvs[i].y); |
? mb->mvs[i].y - last_mb->mvs[i].y |
1342 |
|
: last_mb->mvs[i].y*(dec->time_bp - dec->time_pp)/dec->time_pp; |
1343 |
} |
} |
1344 |
|
|
1345 |
decoder_bf_interpolate_mbinter(dec, dec->refn[1], dec->refn[0], |
decoder_bf_interpolate_mbinter(dec, dec->refn[1], dec->refn[0], |
1515 |
goto repeat; |
goto repeat; |
1516 |
} |
} |
1517 |
|
|
1518 |
|
if(dec->frames == 0 && coding_type != I_VOP) { |
1519 |
|
/* 1st frame is not an i-vop */ |
1520 |
|
goto repeat; |
1521 |
|
} |
1522 |
|
|
1523 |
dec->p_bmv.x = dec->p_bmv.y = dec->p_fmv.y = dec->p_fmv.y = 0; /* init pred vector to 0 */ |
dec->p_bmv.x = dec->p_bmv.y = dec->p_fmv.y = dec->p_fmv.y = 0; /* init pred vector to 0 */ |
1524 |
|
|
1525 |
/* packed_mode: special-N_VOP treament */ |
/* packed_mode: special-N_VOP treament */ |
1569 |
} |
} |
1570 |
|
|
1571 |
image_swap(&dec->refn[0], &dec->refn[1]); |
image_swap(&dec->refn[0], &dec->refn[1]); |
1572 |
|
dec->is_edged[1] = dec->is_edged[0]; |
1573 |
image_swap(&dec->cur, &dec->refn[0]); |
image_swap(&dec->cur, &dec->refn[0]); |
1574 |
|
dec->is_edged[0] = 0; |
1575 |
SWAP(MACROBLOCK *, dec->mbs, dec->last_mbs); |
SWAP(MACROBLOCK *, dec->mbs, dec->last_mbs); |
1576 |
dec->last_reduced_resolution = reduced_resolution; |
dec->last_reduced_resolution = reduced_resolution; |
1577 |
dec->last_coding_type = coding_type; |
dec->last_coding_type = coding_type; |
1583 |
|
|
1584 |
if (dec->low_delay) { |
if (dec->low_delay) { |
1585 |
DPRINTF(XVID_DEBUG_ERROR, "warning: bvop found in low_delay==1 stream\n"); |
DPRINTF(XVID_DEBUG_ERROR, "warning: bvop found in low_delay==1 stream\n"); |
1586 |
dec->low_delay = 1; |
dec->low_delay = 0; |
1587 |
} |
} |
1588 |
|
|
1589 |
if (dec->frames < 2) { |
if (dec->frames < 2) { |