--- trunk/xvidcore/src/encoder.c 2004/04/04 11:47:21 1406 +++ trunk/xvidcore/src/encoder.c 2010/03/09 10:00:30 1883 @@ -3,9 +3,9 @@ * XVID MPEG-4 VIDEO CODEC * - Encoder main module - * - * Copyright(C) 2002 Michael Militzer - * 2002-2003 Peter Ross - * 2002 Daniel Smith + * Copyright(C) 2002-2010 Michael Militzer + * 2002-2003 Peter Ross + * 2002 Daniel Smith * * This program is free software ; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -21,7 +21,7 @@ * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: encoder.c,v 1.105 2004-04-04 11:47:21 syskin Exp $ + * $Id: encoder.c,v 1.131 2010-03-09 10:00:14 Isibaar Exp $ * ****************************************************************************/ @@ -49,6 +49,9 @@ #include "quant/quant_matrix.h" #include "utils/mem_align.h" +# include "motion/motion_smp.h" + + /***************************************************************************** * Local function prototypes ****************************************************************************/ @@ -85,26 +88,48 @@ /* * Simplify the "fincr/fbase" fraction */ +static int +gcd(int a, int b) +{ + int r ; + + if (b > a) { + r = a; + a = b; + b = r; + } + + while ((r = a % b)) { + a = b; + b = r; + } + return b; +} + static void simplify_time(int *inc, int *base) { /* common factor */ - int i = *inc; - while (i > 1) { - if (*inc % i == 0 && *base % i == 0) { - *inc /= i; - *base /= i; - i = *inc; - continue; - } - i--; - } - - /* if neccessary, round to 65535 accuracy */ - if (*base > 65535) { - float div = (float) *base / 65535; - *base = (int) (*base / div); - *inc = (int) (*inc / div); + const int s = gcd(*inc, *base); + *inc /= s; + *base /= s; + + if (*base > 65535 || *inc > 65535) { + int *biggest; + int *other; + float div; + + if (*base > *inc) { + biggest = base; + other = inc; + } else { + biggest = inc; + other = base; + } + + div = ((float)*biggest)/((float)65535); + *biggest = (unsigned int)(((float)*biggest)/div); + *other = (unsigned int)(((float)*other)/div); } } @@ -113,7 +138,7 @@ enc_create(xvid_enc_create_t * create) { Encoder *pEnc; - int n; + int n; if (XVID_VERSION_MAJOR(create->version) != 1) /* v1.x.x */ return XVID_ERR_VERSION; @@ -135,6 +160,8 @@ /* global flags */ pEnc->mbParam.global_flags = create->global; + if ((pEnc->mbParam.global_flags & XVID_GLOBAL_PACKED)) + pEnc->mbParam.global_flags |= XVID_GLOBAL_DIVX5_USERDATA; /* width, height */ pEnc->mbParam.width = create->width; @@ -148,7 +175,7 @@ pEnc->mbParam.fincr = MAX(create->fincr, 0); pEnc->mbParam.fbase = create->fincr <= 0 ? 25 : create->fbase; if (pEnc->mbParam.fincr>0) - simplify_time(&pEnc->mbParam.fincr, &pEnc->mbParam.fbase); + simplify_time((int*)&pEnc->mbParam.fincr, (int*)&pEnc->mbParam.fbase); /* zones */ if(create->num_zones > 0) { @@ -179,7 +206,7 @@ memset(&pinfo, 0, sizeof(xvid_plg_info_t)); pinfo.version = XVID_VERSION; - if (create->plugins[n].func(0, XVID_PLG_INFO, &pinfo, 0) >= 0) { + if (create->plugins[n].func(NULL, XVID_PLG_INFO, &pinfo, NULL) >= 0) { pEnc->mbParam.plugin_flags |= pinfo.flags; } @@ -196,7 +223,7 @@ pcreate.param = create->plugins[n].param; pEnc->plugins[n].func = NULL; /* disable plugins that fail */ - if (create->plugins[n].func(0, XVID_PLG_CREATE, &pcreate, &pEnc->plugins[n].param) >= 0) { + if (create->plugins[n].func(NULL, XVID_PLG_CREATE, &pcreate, &pEnc->plugins[n].param) >= 0) { pEnc->plugins[n].func = create->plugins[n].func; } } @@ -214,6 +241,14 @@ goto xvid_err_memory1a; } + /* temp lambdas */ + if (pEnc->mbParam.plugin_flags & XVID_REQLAMBDA) { + pEnc->temp_lambda = (float *) xvid_malloc(pEnc->mbParam.mb_width * + pEnc->mbParam.mb_height * 6 * sizeof(float), CACHE_LINE); + if (pEnc->temp_lambda == NULL) + goto xvid_err_memory1a; + } + /* bframes */ pEnc->mbParam.max_bframes = MAX(create->max_bframes, 0); pEnc->mbParam.bquant_ratio = MAX(create->bquant_ratio, 0); @@ -400,7 +435,7 @@ /* timestamp stuff */ pEnc->mbParam.m_stamp = 0; - pEnc->m_framenum = 0; + pEnc->m_framenum = create->start_frame_num; pEnc->current->stamp = 0; pEnc->reference->stamp = 0; @@ -409,6 +444,36 @@ pEnc->iFrameNum = 0; pEnc->fMvPrevSigma = -1; + /* multithreaded stuff */ + if (create->num_threads > 0) { + int t = create->num_threads; + int rows_per_thread = (pEnc->mbParam.mb_height+t-1)/t; + pEnc->num_threads = t; + pEnc->motionData = xvid_malloc(t*sizeof(SMPmotionData), CACHE_LINE); + if (!pEnc->motionData) + goto xvid_err_nosmp; + + for (n = 0; n < t; n++) { + pEnc->motionData[n].complete_count_self = + xvid_malloc(rows_per_thread * sizeof(int), CACHE_LINE); + + if (!pEnc->motionData[n].complete_count_self) + goto xvid_err_nosmp; + + if (n != 0) + pEnc->motionData[n].complete_count_above = + pEnc->motionData[n-1].complete_count_self; + } + pEnc->motionData[0].complete_count_above = + pEnc->motionData[t-1].complete_count_self - 1; + + } else { + xvid_err_nosmp: + /* no SMP */ + create->num_threads = 0; + pEnc->motionData = NULL; + } + create->handle = (void *) pEnc; init_timer(); @@ -495,10 +560,14 @@ xvid_free(pEnc->temp_dquants); } + if(pEnc->mbParam.plugin_flags & XVID_REQLAMBDA) { + xvid_free(pEnc->temp_lambda); + } + xvid_err_memory0: for (n=0; nnum_plugins;n++) { if (pEnc->plugins[n].func) { - pEnc->plugins[n].func(pEnc->plugins[n].param, XVID_PLG_DESTROY, 0, 0); + pEnc->plugins[n].func(pEnc->plugins[n].param, XVID_PLG_DESTROY, NULL, NULL); } } xvid_free(pEnc->plugins); @@ -593,6 +662,9 @@ xvid_free(pEnc->temp_dquants); } + if ((pEnc->mbParam.plugin_flags & XVID_REQLAMBDA)) { + xvid_free(pEnc->temp_lambda); + } if (pEnc->num_plugins>0) { xvid_plg_destroy_t pdestroy; @@ -603,7 +675,7 @@ for (i=0; inum_plugins;i++) { if (pEnc->plugins[i].func) { - pEnc->plugins[i].func(pEnc->plugins[i].param, XVID_PLG_DESTROY, &pdestroy, 0); + pEnc->plugins[i].func(pEnc->plugins[i].param, XVID_PLG_DESTROY, &pdestroy, NULL); } } xvid_free(pEnc->plugins); @@ -611,9 +683,16 @@ xvid_free(pEnc->mbParam.mpeg_quant_matrices); - if (pEnc->num_plugins>0) + if (pEnc->num_zones > 0) xvid_free(pEnc->zones); + if (pEnc->num_threads > 0) { + for (i = 0; i < pEnc->num_threads; i++) + xvid_free(pEnc->motionData[i].complete_count_self); + + xvid_free(pEnc->motionData); + } + xvid_free(pEnc); return 0; /* ok */ @@ -627,7 +706,7 @@ static void call_plugins(Encoder * pEnc, FRAMEINFO * frame, IMAGE * original, int opt, int * type, int * quant, xvid_enc_stats_t * stats) { - unsigned int i, j; + unsigned int i, j, k; xvid_plg_data_t data; /* set data struct */ @@ -686,9 +765,19 @@ if ((pEnc->mbParam.plugin_flags & XVID_REQDQUANTS)) { data.dquant = pEnc->temp_dquants; data.dquant_stride = pEnc->mbParam.mb_width; - memset(data.dquant, 0, data.mb_width*data.mb_height); + memset(data.dquant, 0, data.mb_width*data.mb_height*sizeof(int)); } - + + if(pEnc->mbParam.plugin_flags & XVID_REQLAMBDA) { + int block = 0; + emms(); + data.lambda = pEnc->temp_lambda; + for(i = 0;i < pEnc->mbParam.mb_height; i++) + for(j = 0;j < pEnc->mbParam.mb_width; j++) + for (k = 0; k < 6; k++) + data.lambda[block++] = 1.0f; + } + } else { /* XVID_PLG_AFTER */ if ((pEnc->mbParam.plugin_flags & XVID_REQORIGINAL)) { data.original.csp = XVID_CSP_PLANAR; @@ -763,7 +852,7 @@ for (i=0; i<(unsigned int)pEnc->num_plugins;i++) { emms(); if (pEnc->plugins[i].func) { - if (pEnc->plugins[i].func(pEnc->plugins[i].param, opt, &data, 0) < 0) { + if (pEnc->plugins[i].func(pEnc->plugins[i].param, opt, &data, NULL) < 0) { continue; } } @@ -792,6 +881,23 @@ frame->mbs[j*pEnc->mbParam.mb_width + i].dquant = 0; } } + + if (pEnc->mbParam.plugin_flags & XVID_REQLAMBDA) { + for (j = 0; j < pEnc->mbParam.mb_height; j++) + for (i = 0; i < pEnc->mbParam.mb_width; i++) + for (k = 0; k < 6; k++) { + frame->mbs[j*pEnc->mbParam.mb_width + i].lambda[k] = + (int) ((float)(1<mbParam.mb_height; j++) + for (i = 0; imbParam.mb_width; i++) + for (k = 0; k < 6; k++) { + frame->mbs[j*pEnc->mbParam.mb_width + i].lambda[k] = 1<mbs[0].quant = data.quant; /* FRAME will not affect the quant in stats */ } @@ -861,24 +967,6 @@ #endif } -static int -gcd(int a, int b) -{ - int r ; - - if (b > a) { - r = a; - a = b; - b = r; - } - - while ((r = a % b)) { - a = b; - b = r; - } - return b; -} - static void simplify_par(int *par_width, int *par_height) { @@ -1004,7 +1092,7 @@ } FrameCodeB(pEnc, pEnc->bframes[pEnc->bframenum_head], &bs); - call_plugins(pEnc, pEnc->bframes[pEnc->bframenum_head], &pEnc->sOriginal2, XVID_PLG_AFTER, 0, 0, stats); + call_plugins(pEnc, pEnc->bframes[pEnc->bframenum_head], &pEnc->sOriginal2, XVID_PLG_AFTER, NULL, NULL, stats); pEnc->bframenum_head++; goto done; @@ -1036,7 +1124,7 @@ /* add the not-coded length to the reference frame size */ pEnc->current->length += (BitstreamPos(&bs) - bits) / 8; - call_plugins(pEnc, pEnc->current, &pEnc->sOriginal, XVID_PLG_AFTER, 0, 0, stats); + call_plugins(pEnc, pEnc->current, &pEnc->sOriginal, XVID_PLG_AFTER, NULL, NULL, stats); /* flush complete: reset counters */ pEnc->flush_bframes = 0; @@ -1064,7 +1152,7 @@ pEnc->queue_head, pEnc->queue_tail, pEnc->queue_size); if (!(pEnc->mbParam.global_flags & XVID_GLOBAL_PACKED) && pEnc->mbParam.max_bframes > 0) { - call_plugins(pEnc, pEnc->current, &pEnc->sOriginal, XVID_PLG_AFTER, 0, 0, stats); + call_plugins(pEnc, pEnc->current, &pEnc->sOriginal, XVID_PLG_AFTER, NULL, NULL, stats); } /* if the very last frame is to be b-vop, we must change it to a p-vop */ @@ -1093,7 +1181,7 @@ if ((pEnc->mbParam.global_flags & XVID_GLOBAL_PACKED) && pEnc->bframenum_tail==0) { - call_plugins(pEnc, pEnc->current, &pEnc->sOriginal, XVID_PLG_AFTER, 0, 0, stats); + call_plugins(pEnc, pEnc->current, &pEnc->sOriginal, XVID_PLG_AFTER, NULL, NULL, stats); }else{ pEnc->flush_bframes = 1; goto done; @@ -1142,7 +1230,7 @@ type = frame->type; pEnc->current->quant = frame->quant; - call_plugins(pEnc, pEnc->current, NULL, XVID_PLG_BEFORE, &type, &pEnc->current->quant, stats); + call_plugins(pEnc, pEnc->current, NULL, XVID_PLG_BEFORE, &type, (int*)&pEnc->current->quant, stats); if (type > 0){ /* XVID_TYPE_?VOP */ type = type2coding(type); /* convert XVID_TYPE_?VOP to bitstream coding type */ @@ -1217,10 +1305,11 @@ if (!(pEnc->mbParam.global_flags & XVID_GLOBAL_PACKED) && pEnc->mbParam.max_bframes > 0) { if (pEnc->current->stamp > 0) { - call_plugins(pEnc, pEnc->reference, &pEnc->sOriginal, XVID_PLG_AFTER, 0, 0, stats); + call_plugins(pEnc, pEnc->reference, &pEnc->sOriginal, XVID_PLG_AFTER, NULL, NULL, stats); } - else - stats->type = XVID_TYPE_NOTHING; + else if (stats) { + stats->type = XVID_TYPE_NOTHING; + } } /* %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -1245,7 +1334,7 @@ SWAP(FRAMEINFO*, pEnc->current, pEnc->bframes[pEnc->bframenum_tail]); if ((pEnc->current->vop_flags & XVID_VOP_DEBUG)) { - image_printf(&pEnc->current->image, pEnc->mbParam.edged_width, pEnc->mbParam.height, 5, 100, "DX50 BVOP->PVOP"); + image_printf(&pEnc->current->image, pEnc->mbParam.edged_width, pEnc->mbParam.height, 5, 100, "CLOSED GOP BVOP->PVOP"); } /* convert B-VOP quant to P-VOP */ @@ -1306,9 +1395,6 @@ /* prevent vol/vop misuse */ - if (!(pEnc->current->vol_flags & XVID_VOL_REDUCED_ENABLE)) - pEnc->current->vop_flags &= ~XVID_VOP_REDUCED; - if (!(pEnc->current->vol_flags & XVID_VOL_INTERLACING)) pEnc->current->vop_flags &= ~(XVID_VOP_TOPFIELDFIRST|XVID_VOP_ALTERNATESCAN); @@ -1343,7 +1429,9 @@ if ( FrameCodeP(pEnc, &bs) == 0 ) { /* N-VOP, we mustn't code b-frames yet */ - call_plugins(pEnc, pEnc->current, &pEnc->sOriginal, XVID_PLG_AFTER, 0, 0, stats); + if ((pEnc->mbParam.global_flags & XVID_GLOBAL_PACKED) || + pEnc->mbParam.max_bframes == 0) + call_plugins(pEnc, pEnc->current, &pEnc->sOriginal, XVID_PLG_AFTER, NULL, NULL, stats); goto done; } } @@ -1364,7 +1452,7 @@ /* packed or no-bframes or no-bframes-queued: output stats */ if ((pEnc->mbParam.global_flags & XVID_GLOBAL_PACKED) || pEnc->mbParam.max_bframes == 0 ) { - call_plugins(pEnc, pEnc->current, &pEnc->sOriginal, XVID_PLG_AFTER, 0, 0, stats); + call_plugins(pEnc, pEnc->current, &pEnc->sOriginal, XVID_PLG_AFTER, NULL, NULL, stats); } /* %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -1436,20 +1524,6 @@ uint16_t x, y; - if ((pEnc->current->vol_flags & XVID_VOL_REDUCED_ENABLE)) - { - mb_width = (pEnc->mbParam.width + 31) / 32; - mb_height = (pEnc->mbParam.height + 31) / 32; - - /* 16x16->8x8 downsample requires 1 additional edge pixel*/ - /* XXX: setedges is overkill */ - start_timer(); - image_setedges(&pEnc->current->image, - pEnc->mbParam.edged_width, pEnc->mbParam.edged_height, - pEnc->mbParam.width, pEnc->mbParam.height, 0); - stop_edges_timer(); - } - pEnc->mbParam.m_rounding_type = 1; pEnc->current->rounding_type = pEnc->mbParam.m_rounding_type; pEnc->current->coding_type = I_VOP; @@ -1467,6 +1541,7 @@ BitstreamWriteVopHeader(bs, &pEnc->mbParam, pEnc->current, 1, pEnc->current->mbs[0].quant); pEnc->current->sStat.iTextBits = 0; + pEnc->current->sStat.iMVBits = 0; pEnc->current->sStat.kblks = mb_width * mb_height; pEnc->current->sStat.mblks = pEnc->current->sStat.ublks = 0; @@ -1485,21 +1560,10 @@ stop_prediction_timer(); start_timer(); - if (pEnc->current->vop_flags & XVID_VOP_GREYSCALE) - { pMB->cbp &= 0x3C; /* keep only bits 5-2 */ - qcoeff[4*64+0]=0; /* zero, because for INTRA MBs DC value is saved */ - qcoeff[5*64+0]=0; - } MBCoding(pEnc->current, pMB, qcoeff, bs, &pEnc->current->sStat); stop_coding_timer(); } - if ((pEnc->current->vop_flags & XVID_VOP_REDUCED)) - { - image_deblock_rrv(&pEnc->current->image, pEnc->mbParam.edged_width, - pEnc->current->mbs, mb_width, mb_height, pEnc->mbParam.mb_width, - 16, 0); - } emms(); BitstreamPadAlways(bs); /* next_start_code() at the end of VideoObjectPlane() */ @@ -1515,25 +1579,43 @@ return 1; /* intra */ } +static __inline void +updateFcode(Statistics * sStat, Encoder * pEnc) +{ + float fSigma; + int iSearchRange; -#define INTRA_THRESHOLD 0.5 -#define BFRAME_SKIP_THRESHHOLD 30 + if (sStat->iMvCount == 0) + sStat->iMvCount = 1; + + fSigma = (float) sqrt((float) sStat->iMvSum / sStat->iMvCount); + + iSearchRange = 16 << pEnc->mbParam.m_fcode; + + if ((3.0 * fSigma > iSearchRange) && (pEnc->mbParam.m_fcode <= 5) ) + pEnc->mbParam.m_fcode++; + + else if ((5.0 * fSigma < iSearchRange) + && (4.0 * pEnc->fMvPrevSigma < iSearchRange) + && (pEnc->mbParam.m_fcode >= 2) ) + pEnc->mbParam.m_fcode--; + + pEnc->fMvPrevSigma = fSigma; +} +#define BFRAME_SKIP_THRESHHOLD 30 /* FrameCodeP also handles S(GMC)-VOPs */ static int FrameCodeP(Encoder * pEnc, Bitstream * bs) { - float fSigma; int bits = BitstreamPos(bs); DECLARE_ALIGNED_MATRIX(dct_codes, 6, 64, int16_t, CACHE_LINE); DECLARE_ALIGNED_MATRIX(qcoeff, 6, 64, int16_t, CACHE_LINE); int x, y, k; - int iSearchRange; - int skip_possible; FRAMEINFO *const current = pEnc->current; FRAMEINFO *const reference = pEnc->reference; MBParam * const pParam = &pEnc->mbParam; @@ -1541,17 +1623,8 @@ int mb_height = pParam->mb_height; int coded = 1; - - /* IMAGE *pCurrent = ¤t->image; */ IMAGE *pRef = &reference->image; - if ((current->vop_flags & XVID_VOP_REDUCED)) - { - mb_width = (pParam->width + 31) / 32; - mb_height = (pParam->height + 31) / 32; - } - - if (!reference->is_edged) { start_timer(); image_setedges(pRef, pParam->edged_width, pParam->edged_height, @@ -1567,8 +1640,8 @@ if ((current->vop_flags & XVID_VOP_HALFPEL)) { if (reference->is_interpolated != current->rounding_type) { start_timer(); - image_interpolate(pRef, &pEnc->vInterH, &pEnc->vInterV, - &pEnc->vInterHV, pParam->edged_width, + image_interpolate(pRef->y, pEnc->vInterH.y, pEnc->vInterV.y, + pEnc->vInterHV.y, pParam->edged_width, pParam->edged_height, (pParam->vol_flags & XVID_VOL_QUARTERPEL), current->rounding_type); @@ -1577,6 +1650,10 @@ } } + current->sStat.iTextBits = current->sStat.iMvSum = current->sStat.iMvCount = + current->sStat.kblks = current->sStat.mblks = current->sStat.ublks = + current->sStat.iMVBits = 0; + current->coding_type = P_VOP; call_plugins(pEnc, pEnc->current, NULL, XVID_PLG_FRAME, NULL, NULL, NULL); @@ -1634,10 +1711,54 @@ } } - MotionEstimation(&pEnc->mbParam, current, reference, - &pEnc->vInterH, &pEnc->vInterV, &pEnc->vInterHV, - &pEnc->vGMC, 256*4096); + if (pEnc->num_threads > 0) { + /* multithreaded motion estimation - dispatch threads */ + + void * status; + int rows_per_thread = (pParam->mb_height + pEnc->num_threads - 1)/pEnc->num_threads; + + for (k = 0; k < pEnc->num_threads; k++) { + memset(pEnc->motionData[k].complete_count_self, 0, rows_per_thread * sizeof(int)); + pEnc->motionData[k].pParam = &pEnc->mbParam; + pEnc->motionData[k].current = current; + pEnc->motionData[k].reference = reference; + pEnc->motionData[k].pRefH = &pEnc->vInterH; + pEnc->motionData[k].pRefV = &pEnc->vInterV; + pEnc->motionData[k].pRefHV = &pEnc->vInterHV; + pEnc->motionData[k].pGMC = &pEnc->vGMC; + pEnc->motionData[k].y_step = pEnc->num_threads; + pEnc->motionData[k].start_y = k; + /* todo: sort out temp space once and for all */ + pEnc->motionData[k].RefQ = pEnc->vInterH.u + 16*k*pParam->edged_width; + } + + for (k = 1; k < pEnc->num_threads; k++) { + pthread_create(&pEnc->motionData[k].handle, NULL, + (void*)MotionEstimateSMP, (void*)&pEnc->motionData[k]); + } + + MotionEstimateSMP(&pEnc->motionData[0]); + + for (k = 1; k < pEnc->num_threads; k++) { + pthread_join(pEnc->motionData[k].handle, &status); + } + + current->fcode = 0; + for (k = 0; k < pEnc->num_threads; k++) { + current->sStat.iMvSum += pEnc->motionData[k].mvSum; + current->sStat.iMvCount += pEnc->motionData[k].mvCount; + if (pEnc->motionData[k].minfcode > current->fcode) + current->fcode = pEnc->motionData[k].minfcode; + } + + } else { + /* regular ME */ + + MotionEstimation(&pEnc->mbParam, current, reference, + &pEnc->vInterH, &pEnc->vInterV, &pEnc->vInterHV, + &pEnc->vGMC, 256*4096); + } stop_motion_timer(); @@ -1645,18 +1766,12 @@ BitstreamWriteVopHeader(bs, &pEnc->mbParam, current, 1, current->mbs[0].quant); - current->sStat.iTextBits = current->sStat.iMvSum = current->sStat.iMvCount = - current->sStat.kblks = current->sStat.mblks = current->sStat.ublks = 0; - - for (y = 0; y < mb_height; y++) { for (x = 0; x < mb_width; x++) { - MACROBLOCK *pMB = - ¤t->mbs[x + y * pParam->mb_width]; - - int bIntra = (pMB->mode == MODE_INTRA) || (pMB->mode == MODE_INTRA_Q); + MACROBLOCK *pMB = ¤t->mbs[x + y * pParam->mb_width]; + int skip_possible; - if (bIntra) { + if (pMB->mode == MODE_INTRA || pMB->mode == MODE_INTRA_Q) { CodeIntraMB(pEnc, pMB); MBTransQuantIntra(&pEnc->mbParam, current, pMB, x, y, dct_codes, qcoeff); @@ -1667,11 +1782,6 @@ current->sStat.kblks++; - if (pEnc->current->vop_flags & XVID_VOP_GREYSCALE) - { pMB->cbp &= 0x3C; /* keep only bits 5-2 */ - qcoeff[4*64+0]=0; /* zero, because for INTRA MBs DC value is saved */ - qcoeff[5*64+0]=0; - } MBCoding(current, pMB, qcoeff, bs, ¤t->sStat); stop_coding_timer(); continue; @@ -1686,16 +1796,14 @@ pParam->height, pParam->edged_width, (current->vol_flags & XVID_VOL_QUARTERPEL), - (current->vop_flags & XVID_VOP_REDUCED), current->rounding_type); stop_comp_timer(); pMB->field_pred = 0; - if (pMB->mode != MODE_NOT_CODED) - { pMB->cbp = - MBTransQuantInter(&pEnc->mbParam, current, pMB, x, y, + if (pMB->cbp != 0) { + pMB->cbp = MBTransQuantInter(&pEnc->mbParam, current, pMB, x, y, dct_codes, qcoeff); } @@ -1715,141 +1823,62 @@ /* Finished processing the MB, now check if to CODE or SKIP */ - skip_possible = (pMB->cbp == 0) && (pMB->mode == MODE_INTER) && - (pMB->dquant == 0); + skip_possible = (pMB->cbp == 0) && (pMB->mode == MODE_INTER); if (current->coding_type == S_VOP) skip_possible &= (pMB->mcsel == 1); - else if (current->coding_type == P_VOP) { - if ((pParam->vol_flags & XVID_VOL_QUARTERPEL)) - skip_possible &= ( (pMB->qmvs[0].x == 0) && (pMB->qmvs[0].y == 0) ); - else - skip_possible &= ( (pMB->mvs[0].x == 0) && (pMB->mvs[0].y == 0) ); + else { /* PVOP */ + const VECTOR * const mv = (pParam->vol_flags & XVID_VOL_QUARTERPEL) ? + pMB->qmvs : pMB->mvs; + skip_possible &= ((mv->x|mv->y) == 0); } - if ( (pMB->mode == MODE_NOT_CODED) || (skip_possible)) { - -/* This is a candidate for SKIPping, but for P-VOPs check intermediate B-frames first */ + if ((pMB->mode == MODE_NOT_CODED) || (skip_possible)) { + /* This is a candidate for SKIPping, but for P-VOPs check intermediate B-frames first */ + int bSkip = 1; - if (current->coding_type == P_VOP) /* special rule for P-VOP's SKIP */ - { - int bSkip = 1; + if (current->coding_type == P_VOP) { /* special rule for P-VOP's SKIP */ - for (k=pEnc->bframenum_head; k< pEnc->bframenum_tail; k++) - { + for (k = pEnc->bframenum_head; k < pEnc->bframenum_tail; k++) { int iSAD; iSAD = sad16(reference->image.y + 16*y*pParam->edged_width + 16*x, - pEnc->bframes[k]->image.y + 16*y*pParam->edged_width + 16*x, - pParam->edged_width,BFRAME_SKIP_THRESHHOLD); - if (iSAD >= BFRAME_SKIP_THRESHHOLD * pMB->quant) - { bSkip = 0; + pEnc->bframes[k]->image.y + 16*y*pParam->edged_width + 16*x, + pParam->edged_width, BFRAME_SKIP_THRESHHOLD * pMB->quant); + if (iSAD >= BFRAME_SKIP_THRESHHOLD * pMB->quant) { + bSkip = 0; /* could not SKIP */ + if (pParam->vol_flags & XVID_VOL_QUARTERPEL) { + VECTOR predMV = get_qpmv2(current->mbs, pParam->mb_width, 0, x, y, 0); + pMB->pmvs[0].x = - predMV.x; + pMB->pmvs[0].y = - predMV.y; + } else { + VECTOR predMV = get_pmv2(current->mbs, pParam->mb_width, 0, x, y, 0); + pMB->pmvs[0].x = - predMV.x; + pMB->pmvs[0].y = - predMV.y; + } + pMB->mode = MODE_INTER; + pMB->cbp = 0; break; } } - - if (!bSkip) { /* no SKIP, but trivial block */ - if((pParam->vol_flags & XVID_VOL_QUARTERPEL)) { - VECTOR predMV = get_qpmv2(current->mbs, pParam->mb_width, 0, x, y, 0); - pMB->pmvs[0].x = - predMV.x; - pMB->pmvs[0].y = - predMV.y; - } - else { - VECTOR predMV = get_pmv2(current->mbs, pParam->mb_width, 0, x, y, 0); - pMB->pmvs[0].x = - predMV.x; - pMB->pmvs[0].y = - predMV.y; - } - pMB->mode = MODE_INTER; - pMB->cbp = 0; - MBCoding(current, pMB, qcoeff, bs, ¤t->sStat); - stop_coding_timer(); - - continue; /* next MB */ - } } - /* do SKIP */ - - pMB->mode = MODE_NOT_CODED; - MBSkip(bs); - stop_coding_timer(); - continue; /* next MB */ - } - /* ordinary case: normal coded INTER/INTER4V block */ - - if ((current->vop_flags & XVID_VOP_GREYSCALE)) - { pMB->cbp &= 0x3C; /* keep only bits 5-2 */ - qcoeff[4*64+0]=0; /* zero, because DC for INTRA MBs DC value is saved */ - qcoeff[5*64+0]=0; - } - - if((pParam->vol_flags & XVID_VOL_QUARTERPEL)) { - VECTOR predMV = get_qpmv2(current->mbs, pParam->mb_width, 0, x, y, 0); - pMB->pmvs[0].x = pMB->qmvs[0].x - predMV.x; - pMB->pmvs[0].y = pMB->qmvs[0].y - predMV.y; - DPRINTF(XVID_DEBUG_MV,"mv_diff (%i,%i) pred (%i,%i) result (%i,%i)\n", pMB->pmvs[0].x, pMB->pmvs[0].y, predMV.x, predMV.y, pMB->mvs[0].x, pMB->mvs[0].y); - } else { - VECTOR predMV = get_pmv2(current->mbs, pParam->mb_width, 0, x, y, 0); - pMB->pmvs[0].x = pMB->mvs[0].x - predMV.x; - pMB->pmvs[0].y = pMB->mvs[0].y - predMV.y; - DPRINTF(XVID_DEBUG_MV,"mv_diff (%i,%i) pred (%i,%i) result (%i,%i)\n", pMB->pmvs[0].x, pMB->pmvs[0].y, predMV.x, predMV.y, pMB->mvs[0].x, pMB->mvs[0].y); - } - - - if (pMB->mode == MODE_INTER4V) - { int k; - for (k=1;k<4;k++) - { - if((pParam->vol_flags & XVID_VOL_QUARTERPEL)) { - VECTOR predMV = get_qpmv2(current->mbs, pParam->mb_width, 0, x, y, k); - pMB->pmvs[k].x = pMB->qmvs[k].x - predMV.x; - pMB->pmvs[k].y = pMB->qmvs[k].y - predMV.y; - DPRINTF(XVID_DEBUG_MV,"mv_diff (%i,%i) pred (%i,%i) result (%i,%i)\n", pMB->pmvs[k].x, pMB->pmvs[k].y, predMV.x, predMV.y, pMB->mvs[k].x, pMB->mvs[k].y); - } else { - VECTOR predMV = get_pmv2(current->mbs, pParam->mb_width, 0, x, y, k); - pMB->pmvs[k].x = pMB->mvs[k].x - predMV.x; - pMB->pmvs[k].y = pMB->mvs[k].y - predMV.y; - DPRINTF(XVID_DEBUG_MV,"mv_diff (%i,%i) pred (%i,%i) result (%i,%i)\n", pMB->pmvs[k].x, pMB->pmvs[k].y, predMV.x, predMV.y, pMB->mvs[k].x, pMB->mvs[k].y); - } + if (bSkip) { + /* do SKIP */ + pMB->mode = MODE_NOT_CODED; + MBSkip(bs); + stop_coding_timer(); + continue; /* next MB */ } } + /* ordinary case: normal coded INTER/INTER4V block */ MBCoding(current, pMB, qcoeff, bs, &pEnc->current->sStat); stop_coding_timer(); - } } - if ((current->vop_flags & XVID_VOP_REDUCED)) - { - image_deblock_rrv(¤t->image, pParam->edged_width, - current->mbs, mb_width, mb_height, pParam->mb_width, - 16, 0); - } - emms(); - - if (current->sStat.iMvCount == 0) - current->sStat.iMvCount = 1; - - fSigma = (float) sqrt((float) current->sStat.iMvSum / current->sStat.iMvCount); - - iSearchRange = 1 << (3 + pParam->m_fcode); - - if ((fSigma > iSearchRange / 3) - && (pParam->m_fcode <= (3 + (pParam->vol_flags & XVID_VOL_QUARTERPEL?1:0) ))) /* maximum search range 128 */ - { - pParam->m_fcode++; - iSearchRange *= 2; - } else if ((fSigma < iSearchRange / 6) - && (pEnc->fMvPrevSigma >= 0) - && (pEnc->fMvPrevSigma < iSearchRange / 6) - && (pParam->m_fcode >= (2 + (pParam->vol_flags & XVID_VOL_QUARTERPEL?1:0) ))) /* minimum search range 16 */ - { - pParam->m_fcode--; - iSearchRange /= 2; - } - - pEnc->fMvPrevSigma = fSigma; + updateFcode(¤t->sStat, pEnc); /* frame drop code */ #if 0 @@ -1859,7 +1888,7 @@ (pParam->frame_drop_ratio * mb_width * mb_height) / 100 && ( (pEnc->bframenum_head >= pEnc->bframenum_tail) || !(pEnc->mbParam.global_flags & XVID_GLOBAL_CLOSED_GOP)) ) { - current->sStat.kblks = current->sStat.mblks = 0; + current->sStat.kblks = current->sStat.mblks = current->sStat.iTextBits = 0; current->sStat.ublks = mb_width * mb_height; BitstreamReset(bs); @@ -1934,8 +1963,6 @@ fprintf(fp,"Y=%3d X=%3d MB=%2d CBP=%02X\n",y,x,mb->mode,mb->cbp); \ } - /* XXX: pEnc->current->global_flags &= ~XVID_VOP_REDUCED; reduced resoltion not yet supported */ - if (!first){ fp=fopen("C:\\XVIDDBGE.TXT","w"); } @@ -1951,7 +1978,7 @@ if (pEnc->reference->is_interpolated != 0) { start_timer(); - image_interpolate(f_ref, &pEnc->f_refh, &pEnc->f_refv, &pEnc->f_refhv, + image_interpolate(f_ref->y, pEnc->f_refh.y, pEnc->f_refv.y, pEnc->f_refhv.y, pEnc->mbParam.edged_width, pEnc->mbParam.edged_height, (pEnc->mbParam.vol_flags & XVID_VOL_QUARTERPEL), 0); stop_inter_timer(); @@ -1968,7 +1995,7 @@ if (pEnc->current->is_interpolated != 0) { start_timer(); - image_interpolate(b_ref, &pEnc->vInterH, &pEnc->vInterV, &pEnc->vInterHV, + image_interpolate(b_ref->y, pEnc->vInterH.y, pEnc->vInterV.y, pEnc->vInterHV.y, pEnc->mbParam.edged_width, pEnc->mbParam.edged_height, (pEnc->mbParam.vol_flags & XVID_VOL_QUARTERPEL), 0); stop_inter_timer(); @@ -1976,22 +2003,72 @@ } frame->coding_type = B_VOP; - call_plugins(pEnc, pEnc->current, NULL, XVID_PLG_FRAME, NULL, NULL, NULL); + call_plugins(pEnc, frame, NULL, XVID_PLG_FRAME, NULL, NULL, NULL); + + frame->fcode = frame->bcode = pEnc->current->fcode; start_timer(); - MotionEstimationBVOP(&pEnc->mbParam, frame, - ((int32_t)(pEnc->current->stamp - frame->stamp)), /* time_bp */ - ((int32_t)(pEnc->current->stamp - pEnc->reference->stamp)), /* time_pp */ - pEnc->reference->mbs, f_ref, - &pEnc->f_refh, &pEnc->f_refv, &pEnc->f_refhv, - pEnc->current, b_ref, &pEnc->vInterH, - &pEnc->vInterV, &pEnc->vInterHV); + if (pEnc->num_threads > 0) { + void * status; + int k; + /* multithreaded motion estimation - dispatch threads */ + int rows_per_thread = (pEnc->mbParam.mb_height + pEnc->num_threads - 1)/pEnc->num_threads; + + for (k = 0; k < pEnc->num_threads; k++) { + memset(pEnc->motionData[k].complete_count_self, 0, rows_per_thread * sizeof(int)); + pEnc->motionData[k].pParam = &pEnc->mbParam; + pEnc->motionData[k].current = frame; + pEnc->motionData[k].reference = pEnc->current; + pEnc->motionData[k].fRef = f_ref; + pEnc->motionData[k].fRefH = &pEnc->f_refh; + pEnc->motionData[k].fRefV = &pEnc->f_refv; + pEnc->motionData[k].fRefHV = &pEnc->f_refhv; + pEnc->motionData[k].pRef = b_ref; + pEnc->motionData[k].pRefH = &pEnc->vInterH; + pEnc->motionData[k].pRefV = &pEnc->vInterV; + pEnc->motionData[k].pRefHV = &pEnc->vInterHV; + pEnc->motionData[k].time_bp = (int32_t)(pEnc->current->stamp - frame->stamp); + pEnc->motionData[k].time_pp = (int32_t)(pEnc->current->stamp - pEnc->reference->stamp); + pEnc->motionData[k].y_step = pEnc->num_threads; + pEnc->motionData[k].start_y = k; + /* todo: sort out temp space once and for all */ + pEnc->motionData[k].RefQ = pEnc->vInterH.u + 16*k*pEnc->mbParam.edged_width; + } + + for (k = 1; k < pEnc->num_threads; k++) { + pthread_create(&pEnc->motionData[k].handle, NULL, + (void*)SMPMotionEstimationBVOP, (void*)&pEnc->motionData[k]); + } + + SMPMotionEstimationBVOP(&pEnc->motionData[0]); + + for (k = 1; k < pEnc->num_threads; k++) { + pthread_join(pEnc->motionData[k].handle, &status); + } + + frame->fcode = frame->bcode = 0; + for (k = 0; k < pEnc->num_threads; k++) { + if (pEnc->motionData[k].minfcode > frame->fcode) + frame->fcode = pEnc->motionData[k].minfcode; + if (pEnc->motionData[k].minbcode > frame->bcode) + frame->bcode = pEnc->motionData[k].minbcode; + } + } else { + MotionEstimationBVOP(&pEnc->mbParam, frame, + ((int32_t)(pEnc->current->stamp - frame->stamp)), /* time_bp */ + ((int32_t)(pEnc->current->stamp - pEnc->reference->stamp)), /* time_pp */ + pEnc->reference->mbs, f_ref, + &pEnc->f_refh, &pEnc->f_refv, &pEnc->f_refhv, + pEnc->current, b_ref, &pEnc->vInterH, + &pEnc->vInterV, &pEnc->vInterHV); + } stop_motion_timer(); set_timecodes(frame, pEnc->reference,pEnc->mbParam.fbase); BitstreamWriteVopHeader(bs, &pEnc->mbParam, frame, 1, frame->quant); frame->sStat.iTextBits = 0; + frame->sStat.iMVBits = 0; frame->sStat.iMvSum = 0; frame->sStat.iMvCount = 0; frame->sStat.kblks = frame->sStat.mblks = frame->sStat.ublks = 0; @@ -2006,36 +2083,35 @@ if (mb->mode == MODE_NOT_CODED) { if (pEnc->mbParam.plugin_flags & XVID_REQORIGINAL) { MBMotionCompensation(mb, x, y, f_ref, NULL, f_ref, NULL, NULL, &frame->image, - NULL, 0, 0, pEnc->mbParam.edged_width, 0, 0, 0); + NULL, 0, 0, pEnc->mbParam.edged_width, 0, 0); } - continue; } - if (mb->mode != MODE_DIRECT_NONE_MV || pEnc->mbParam.plugin_flags & XVID_REQORIGINAL) { + mb->quant = frame->quant; + + if (mb->cbp != 0 || pEnc->mbParam.plugin_flags & XVID_REQORIGINAL) { + /* we have to motion-compensate, transfer etc, + because there might be blocks to code */ + MBMotionCompensationBVOP(&pEnc->mbParam, mb, x, y, &frame->image, - f_ref, &pEnc->f_refh, &pEnc->f_refv, - &pEnc->f_refhv, b_ref, &pEnc->vInterH, - &pEnc->vInterV, &pEnc->vInterHV, - dct_codes); - - if (mb->mode == MODE_DIRECT_NO4V) mb->mode = MODE_DIRECT; - mb->quant = frame->quant; - - if (mb->mode != MODE_DIRECT_NONE_MV) - mb->cbp = MBTransQuantInterBVOP(&pEnc->mbParam, frame, mb, x, y, dct_codes, qcoeff); - - if ( (mb->mode == MODE_DIRECT) && (mb->cbp == 0) - && (mb->pmvs[3].x == 0) && (mb->pmvs[3].y == 0) ) { - mb->mode = MODE_DIRECT_NONE_MV; /* skipped */ - } + f_ref, &pEnc->f_refh, &pEnc->f_refv, + &pEnc->f_refhv, b_ref, &pEnc->vInterH, + &pEnc->vInterV, &pEnc->vInterHV, + dct_codes); + + mb->cbp = MBTransQuantInterBVOP(&pEnc->mbParam, frame, mb, x, y, dct_codes, qcoeff); } + + if (mb->mode == MODE_DIRECT_NO4V) + mb->mode = MODE_DIRECT; - /* keep only bits 5-2 -- Chroma blocks will just be skipped by the - * coding function for BFrames, that's why we don't zero teh DC - * coeffs */ - if ((frame->vop_flags & XVID_VOP_GREYSCALE)) - mb->cbp &= 0x3C; + if (mb->mode == MODE_DIRECT && (mb->cbp | mb->pmvs[3].x | mb->pmvs[3].y) == 0) + mb->mode = MODE_DIRECT_NONE_MV; /* skipped */ + else + if (frame->vop_flags & XVID_VOP_GREYSCALE) + /* keep only bits 5-2 -- Chroma blocks will just be skipped by MBCodingBVOP */ + mb->cbp &= 0x3C; start_timer(); MBCodingBVOP(frame, mb, qcoeff, frame->fcode, frame->bcode, bs, @@ -2043,11 +2119,8 @@ stop_coding_timer(); } } - emms(); - /* TODO: dynamic fcode/bcode ??? */ - BitstreamPadAlways(bs); /* next_start_code() at the end of VideoObjectPlane() */ frame->length = (BitstreamPos(bs) - bits) / 8;