--- trunk/xvidcore/src/encoder.c 2005/03/27 03:59:42 1607 +++ trunk/xvidcore/src/encoder.c 2006/12/14 13:09:00 1766 @@ -21,7 +21,7 @@ * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: encoder.c,v 1.117 2005-03-27 03:59:42 suxen_drol Exp $ + * $Id: encoder.c,v 1.129 2006-12-14 13:09:00 Isibaar Exp $ * ****************************************************************************/ @@ -49,6 +49,9 @@ #include "quant/quant_matrix.h" #include "utils/mem_align.h" +# include "motion/motion_smp.h" + + /***************************************************************************** * Local function prototypes ****************************************************************************/ @@ -85,20 +88,31 @@ /* * Simplify the "fincr/fbase" fraction */ +static int +gcd(int a, int b) +{ + int r ; + + if (b > a) { + r = a; + a = b; + b = r; + } + + while ((r = a % b)) { + a = b; + b = r; + } + return b; +} + static void simplify_time(int *inc, int *base) { /* common factor */ - int i = *inc; - while (i > 1) { - if (*inc % i == 0 && *base % i == 0) { - *inc /= i; - *base /= i; - i = *inc; - continue; - } - i--; - } + const int s = gcd(*inc, *base); + *inc /= s; + *base /= s; if (*base > 65535 || *inc > 65535) { int *biggest; @@ -114,8 +128,8 @@ } div = ((float)*biggest)/((float)65535); - *biggest = (int)(((float)*biggest)/div); - *other = (int)(((float)*other)/div); + *biggest = (unsigned int)(((float)*biggest)/div); + *other = (unsigned int)(((float)*other)/div); } } @@ -124,7 +138,7 @@ enc_create(xvid_enc_create_t * create) { Encoder *pEnc; - int n; + int n; if (XVID_VERSION_MAJOR(create->version) != 1) /* v1.x.x */ return XVID_ERR_VERSION; @@ -161,7 +175,7 @@ pEnc->mbParam.fincr = MAX(create->fincr, 0); pEnc->mbParam.fbase = create->fincr <= 0 ? 25 : create->fbase; if (pEnc->mbParam.fincr>0) - simplify_time(&pEnc->mbParam.fincr, &pEnc->mbParam.fbase); + simplify_time((int*)&pEnc->mbParam.fincr, (int*)&pEnc->mbParam.fbase); /* zones */ if(create->num_zones > 0) { @@ -192,7 +206,7 @@ memset(&pinfo, 0, sizeof(xvid_plg_info_t)); pinfo.version = XVID_VERSION; - if (create->plugins[n].func(0, XVID_PLG_INFO, &pinfo, 0) >= 0) { + if (create->plugins[n].func(NULL, XVID_PLG_INFO, &pinfo, NULL) >= 0) { pEnc->mbParam.plugin_flags |= pinfo.flags; } @@ -209,7 +223,7 @@ pcreate.param = create->plugins[n].param; pEnc->plugins[n].func = NULL; /* disable plugins that fail */ - if (create->plugins[n].func(0, XVID_PLG_CREATE, &pcreate, &pEnc->plugins[n].param) >= 0) { + if (create->plugins[n].func(NULL, XVID_PLG_CREATE, &pcreate, &pEnc->plugins[n].param) >= 0) { pEnc->plugins[n].func = create->plugins[n].func; } } @@ -227,6 +241,14 @@ goto xvid_err_memory1a; } + /* temp lambdas */ + if (pEnc->mbParam.plugin_flags & XVID_REQLAMBDA) { + pEnc->temp_lambda = (float *) xvid_malloc(pEnc->mbParam.mb_width * + pEnc->mbParam.mb_height * 6 * sizeof(float), CACHE_LINE); + if (pEnc->temp_lambda == NULL) + goto xvid_err_memory1a; + } + /* bframes */ pEnc->mbParam.max_bframes = MAX(create->max_bframes, 0); pEnc->mbParam.bquant_ratio = MAX(create->bquant_ratio, 0); @@ -422,6 +444,36 @@ pEnc->iFrameNum = 0; pEnc->fMvPrevSigma = -1; + /* multithreaded stuff */ + if (create->num_threads > 0) { + int t = create->num_threads; + int rows_per_thread = (pEnc->mbParam.mb_height+t-1)/t; + pEnc->num_threads = t; + pEnc->motionData = xvid_malloc(t*sizeof(SMPmotionData), CACHE_LINE); + if (!pEnc->motionData) + goto xvid_err_nosmp; + + for (n = 0; n < t; n++) { + pEnc->motionData[n].complete_count_self = + xvid_malloc(rows_per_thread * sizeof(int), CACHE_LINE); + + if (!pEnc->motionData[n].complete_count_self) + goto xvid_err_nosmp; + + if (n != 0) + pEnc->motionData[n].complete_count_above = + pEnc->motionData[n-1].complete_count_self; + } + pEnc->motionData[0].complete_count_above = + pEnc->motionData[t-1].complete_count_self - 1; + + } else { + xvid_err_nosmp: + /* no SMP */ + create->num_threads = 0; + pEnc->motionData = NULL; + } + create->handle = (void *) pEnc; init_timer(); @@ -508,10 +560,14 @@ xvid_free(pEnc->temp_dquants); } + if(pEnc->mbParam.plugin_flags & XVID_REQLAMBDA) { + xvid_free(pEnc->temp_lambda); + } + xvid_err_memory0: for (n=0; nnum_plugins;n++) { if (pEnc->plugins[n].func) { - pEnc->plugins[n].func(pEnc->plugins[n].param, XVID_PLG_DESTROY, 0, 0); + pEnc->plugins[n].func(pEnc->plugins[n].param, XVID_PLG_DESTROY, NULL, NULL); } } xvid_free(pEnc->plugins); @@ -606,6 +662,9 @@ xvid_free(pEnc->temp_dquants); } + if ((pEnc->mbParam.plugin_flags & XVID_REQLAMBDA)) { + xvid_free(pEnc->temp_lambda); + } if (pEnc->num_plugins>0) { xvid_plg_destroy_t pdestroy; @@ -616,7 +675,7 @@ for (i=0; inum_plugins;i++) { if (pEnc->plugins[i].func) { - pEnc->plugins[i].func(pEnc->plugins[i].param, XVID_PLG_DESTROY, &pdestroy, 0); + pEnc->plugins[i].func(pEnc->plugins[i].param, XVID_PLG_DESTROY, &pdestroy, NULL); } } xvid_free(pEnc->plugins); @@ -624,9 +683,16 @@ xvid_free(pEnc->mbParam.mpeg_quant_matrices); - if (pEnc->num_plugins>0) + if (pEnc->num_zones > 0) xvid_free(pEnc->zones); + if (pEnc->num_threads > 0) { + for (i = 0; i < pEnc->num_threads; i++) + xvid_free(pEnc->motionData[i].complete_count_self); + + xvid_free(pEnc->motionData); + } + xvid_free(pEnc); return 0; /* ok */ @@ -640,7 +706,7 @@ static void call_plugins(Encoder * pEnc, FRAMEINFO * frame, IMAGE * original, int opt, int * type, int * quant, xvid_enc_stats_t * stats) { - unsigned int i, j; + unsigned int i, j, k; xvid_plg_data_t data; /* set data struct */ @@ -699,9 +765,19 @@ if ((pEnc->mbParam.plugin_flags & XVID_REQDQUANTS)) { data.dquant = pEnc->temp_dquants; data.dquant_stride = pEnc->mbParam.mb_width; - memset(data.dquant, 0, data.mb_width*data.mb_height); + memset(data.dquant, 0, data.mb_width*data.mb_height*sizeof(int)); } - + + if(pEnc->mbParam.plugin_flags & XVID_REQLAMBDA) { + int block = 0; + emms(); + data.lambda = pEnc->temp_lambda; + for(i = 0;i < pEnc->mbParam.mb_height; i++) + for(j = 0;j < pEnc->mbParam.mb_width; j++) + for (k = 0; k < 6; k++) + data.lambda[block++] = 1.0f; + } + } else { /* XVID_PLG_AFTER */ if ((pEnc->mbParam.plugin_flags & XVID_REQORIGINAL)) { data.original.csp = XVID_CSP_PLANAR; @@ -776,7 +852,7 @@ for (i=0; i<(unsigned int)pEnc->num_plugins;i++) { emms(); if (pEnc->plugins[i].func) { - if (pEnc->plugins[i].func(pEnc->plugins[i].param, opt, &data, 0) < 0) { + if (pEnc->plugins[i].func(pEnc->plugins[i].param, opt, &data, NULL) < 0) { continue; } } @@ -805,6 +881,23 @@ frame->mbs[j*pEnc->mbParam.mb_width + i].dquant = 0; } } + + if (pEnc->mbParam.plugin_flags & XVID_REQLAMBDA) { + for (j = 0; j < pEnc->mbParam.mb_height; j++) + for (i = 0; i < pEnc->mbParam.mb_width; i++) + for (k = 0; k < 6; k++) { + frame->mbs[j*pEnc->mbParam.mb_width + i].lambda[k] = + (int) ((float)(1<mbParam.mb_height; j++) + for (i = 0; imbParam.mb_width; i++) + for (k = 0; k < 6; k++) { + frame->mbs[j*pEnc->mbParam.mb_width + i].lambda[k] = 1<mbs[0].quant = data.quant; /* FRAME will not affect the quant in stats */ } @@ -874,24 +967,6 @@ #endif } -static int -gcd(int a, int b) -{ - int r ; - - if (b > a) { - r = a; - a = b; - b = r; - } - - while ((r = a % b)) { - a = b; - b = r; - } - return b; -} - static void simplify_par(int *par_width, int *par_height) { @@ -1017,7 +1092,7 @@ } FrameCodeB(pEnc, pEnc->bframes[pEnc->bframenum_head], &bs); - call_plugins(pEnc, pEnc->bframes[pEnc->bframenum_head], &pEnc->sOriginal2, XVID_PLG_AFTER, 0, 0, stats); + call_plugins(pEnc, pEnc->bframes[pEnc->bframenum_head], &pEnc->sOriginal2, XVID_PLG_AFTER, NULL, NULL, stats); pEnc->bframenum_head++; goto done; @@ -1049,7 +1124,7 @@ /* add the not-coded length to the reference frame size */ pEnc->current->length += (BitstreamPos(&bs) - bits) / 8; - call_plugins(pEnc, pEnc->current, &pEnc->sOriginal, XVID_PLG_AFTER, 0, 0, stats); + call_plugins(pEnc, pEnc->current, &pEnc->sOriginal, XVID_PLG_AFTER, NULL, NULL, stats); /* flush complete: reset counters */ pEnc->flush_bframes = 0; @@ -1077,7 +1152,7 @@ pEnc->queue_head, pEnc->queue_tail, pEnc->queue_size); if (!(pEnc->mbParam.global_flags & XVID_GLOBAL_PACKED) && pEnc->mbParam.max_bframes > 0) { - call_plugins(pEnc, pEnc->current, &pEnc->sOriginal, XVID_PLG_AFTER, 0, 0, stats); + call_plugins(pEnc, pEnc->current, &pEnc->sOriginal, XVID_PLG_AFTER, NULL, NULL, stats); } /* if the very last frame is to be b-vop, we must change it to a p-vop */ @@ -1106,7 +1181,7 @@ if ((pEnc->mbParam.global_flags & XVID_GLOBAL_PACKED) && pEnc->bframenum_tail==0) { - call_plugins(pEnc, pEnc->current, &pEnc->sOriginal, XVID_PLG_AFTER, 0, 0, stats); + call_plugins(pEnc, pEnc->current, &pEnc->sOriginal, XVID_PLG_AFTER, NULL, NULL, stats); }else{ pEnc->flush_bframes = 1; goto done; @@ -1155,7 +1230,7 @@ type = frame->type; pEnc->current->quant = frame->quant; - call_plugins(pEnc, pEnc->current, NULL, XVID_PLG_BEFORE, &type, &pEnc->current->quant, stats); + call_plugins(pEnc, pEnc->current, NULL, XVID_PLG_BEFORE, &type, (int*)&pEnc->current->quant, stats); if (type > 0){ /* XVID_TYPE_?VOP */ type = type2coding(type); /* convert XVID_TYPE_?VOP to bitstream coding type */ @@ -1230,7 +1305,7 @@ if (!(pEnc->mbParam.global_flags & XVID_GLOBAL_PACKED) && pEnc->mbParam.max_bframes > 0) { if (pEnc->current->stamp > 0) { - call_plugins(pEnc, pEnc->reference, &pEnc->sOriginal, XVID_PLG_AFTER, 0, 0, stats); + call_plugins(pEnc, pEnc->reference, &pEnc->sOriginal, XVID_PLG_AFTER, NULL, NULL, stats); } else stats->type = XVID_TYPE_NOTHING; @@ -1353,7 +1428,9 @@ if ( FrameCodeP(pEnc, &bs) == 0 ) { /* N-VOP, we mustn't code b-frames yet */ - call_plugins(pEnc, pEnc->current, &pEnc->sOriginal, XVID_PLG_AFTER, 0, 0, stats); + if ((pEnc->mbParam.global_flags & XVID_GLOBAL_PACKED) || + pEnc->mbParam.max_bframes == 0) + call_plugins(pEnc, pEnc->current, &pEnc->sOriginal, XVID_PLG_AFTER, NULL, NULL, stats); goto done; } } @@ -1374,7 +1451,7 @@ /* packed or no-bframes or no-bframes-queued: output stats */ if ((pEnc->mbParam.global_flags & XVID_GLOBAL_PACKED) || pEnc->mbParam.max_bframes == 0 ) { - call_plugins(pEnc, pEnc->current, &pEnc->sOriginal, XVID_PLG_AFTER, 0, 0, stats); + call_plugins(pEnc, pEnc->current, &pEnc->sOriginal, XVID_PLG_AFTER, NULL, NULL, stats); } /* %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -1463,6 +1540,7 @@ BitstreamWriteVopHeader(bs, &pEnc->mbParam, pEnc->current, 1, pEnc->current->mbs[0].quant); pEnc->current->sStat.iTextBits = 0; + pEnc->current->sStat.iMVBits = 0; pEnc->current->sStat.kblks = mb_width * mb_height; pEnc->current->sStat.mblks = pEnc->current->sStat.ublks = 0; @@ -1561,8 +1639,8 @@ if ((current->vop_flags & XVID_VOP_HALFPEL)) { if (reference->is_interpolated != current->rounding_type) { start_timer(); - image_interpolate(pRef, &pEnc->vInterH, &pEnc->vInterV, - &pEnc->vInterHV, pParam->edged_width, + image_interpolate(pRef->y, pEnc->vInterH.y, pEnc->vInterV.y, + pEnc->vInterHV.y, pParam->edged_width, pParam->edged_height, (pParam->vol_flags & XVID_VOL_QUARTERPEL), current->rounding_type); @@ -1572,7 +1650,8 @@ } current->sStat.iTextBits = current->sStat.iMvSum = current->sStat.iMvCount = - current->sStat.kblks = current->sStat.mblks = current->sStat.ublks = 0; + current->sStat.kblks = current->sStat.mblks = current->sStat.ublks = + current->sStat.iMVBits = 0; current->coding_type = P_VOP; @@ -1631,10 +1710,54 @@ } } - MotionEstimation(&pEnc->mbParam, current, reference, - &pEnc->vInterH, &pEnc->vInterV, &pEnc->vInterHV, - &pEnc->vGMC, 256*4096); + if (pEnc->num_threads > 0) { + /* multithreaded motion estimation - dispatch threads */ + + void * status; + int rows_per_thread = (pParam->mb_height + pEnc->num_threads - 1)/pEnc->num_threads; + + for (k = 0; k < pEnc->num_threads; k++) { + memset(pEnc->motionData[k].complete_count_self, 0, rows_per_thread * sizeof(int)); + pEnc->motionData[k].pParam = &pEnc->mbParam; + pEnc->motionData[k].current = current; + pEnc->motionData[k].reference = reference; + pEnc->motionData[k].pRefH = &pEnc->vInterH; + pEnc->motionData[k].pRefV = &pEnc->vInterV; + pEnc->motionData[k].pRefHV = &pEnc->vInterHV; + pEnc->motionData[k].pGMC = &pEnc->vGMC; + pEnc->motionData[k].y_step = pEnc->num_threads; + pEnc->motionData[k].start_y = k; + /* todo: sort out temp space once and for all */ + pEnc->motionData[k].RefQ = pEnc->vInterH.u + 16*k*pParam->edged_width; + } + + for (k = 1; k < pEnc->num_threads; k++) { + pthread_create(&pEnc->motionData[k].handle, NULL, + (void*)MotionEstimateSMP, (void*)&pEnc->motionData[k]); + } + + MotionEstimateSMP(&pEnc->motionData[0]); + + for (k = 1; k < pEnc->num_threads; k++) { + pthread_join(pEnc->motionData[k].handle, &status); + } + + current->fcode = 0; + for (k = 0; k < pEnc->num_threads; k++) { + current->sStat.iMvSum += pEnc->motionData[k].mvSum; + current->sStat.iMvCount += pEnc->motionData[k].mvCount; + if (pEnc->motionData[k].minfcode > current->fcode) + current->fcode = pEnc->motionData[k].minfcode; + } + + } else { + /* regular ME */ + + MotionEstimation(&pEnc->mbParam, current, reference, + &pEnc->vInterH, &pEnc->vInterV, &pEnc->vInterHV, + &pEnc->vGMC, 256*4096); + } stop_motion_timer(); @@ -1764,7 +1887,7 @@ (pParam->frame_drop_ratio * mb_width * mb_height) / 100 && ( (pEnc->bframenum_head >= pEnc->bframenum_tail) || !(pEnc->mbParam.global_flags & XVID_GLOBAL_CLOSED_GOP)) ) { - current->sStat.kblks = current->sStat.mblks = 0; + current->sStat.kblks = current->sStat.mblks = current->sStat.iTextBits = 0; current->sStat.ublks = mb_width * mb_height; BitstreamReset(bs); @@ -1854,7 +1977,7 @@ if (pEnc->reference->is_interpolated != 0) { start_timer(); - image_interpolate(f_ref, &pEnc->f_refh, &pEnc->f_refv, &pEnc->f_refhv, + image_interpolate(f_ref->y, pEnc->f_refh.y, pEnc->f_refv.y, pEnc->f_refhv.y, pEnc->mbParam.edged_width, pEnc->mbParam.edged_height, (pEnc->mbParam.vol_flags & XVID_VOL_QUARTERPEL), 0); stop_inter_timer(); @@ -1871,7 +1994,7 @@ if (pEnc->current->is_interpolated != 0) { start_timer(); - image_interpolate(b_ref, &pEnc->vInterH, &pEnc->vInterV, &pEnc->vInterHV, + image_interpolate(b_ref->y, pEnc->vInterH.y, pEnc->vInterV.y, pEnc->vInterHV.y, pEnc->mbParam.edged_width, pEnc->mbParam.edged_height, (pEnc->mbParam.vol_flags & XVID_VOL_QUARTERPEL), 0); stop_inter_timer(); @@ -1879,22 +2002,72 @@ } frame->coding_type = B_VOP; - call_plugins(pEnc, pEnc->current, NULL, XVID_PLG_FRAME, NULL, NULL, NULL); + call_plugins(pEnc, frame, NULL, XVID_PLG_FRAME, NULL, NULL, NULL); + + frame->fcode = frame->bcode = pEnc->current->fcode; start_timer(); - MotionEstimationBVOP(&pEnc->mbParam, frame, - ((int32_t)(pEnc->current->stamp - frame->stamp)), /* time_bp */ - ((int32_t)(pEnc->current->stamp - pEnc->reference->stamp)), /* time_pp */ - pEnc->reference->mbs, f_ref, - &pEnc->f_refh, &pEnc->f_refv, &pEnc->f_refhv, - pEnc->current, b_ref, &pEnc->vInterH, - &pEnc->vInterV, &pEnc->vInterHV); + if (pEnc->num_threads > 0) { + void * status; + int k; + /* multithreaded motion estimation - dispatch threads */ + int rows_per_thread = (pEnc->mbParam.mb_height + pEnc->num_threads - 1)/pEnc->num_threads; + + for (k = 0; k < pEnc->num_threads; k++) { + memset(pEnc->motionData[k].complete_count_self, 0, rows_per_thread * sizeof(int)); + pEnc->motionData[k].pParam = &pEnc->mbParam; + pEnc->motionData[k].current = frame; + pEnc->motionData[k].reference = pEnc->current; + pEnc->motionData[k].fRef = f_ref; + pEnc->motionData[k].fRefH = &pEnc->f_refh; + pEnc->motionData[k].fRefV = &pEnc->f_refv; + pEnc->motionData[k].fRefHV = &pEnc->f_refhv; + pEnc->motionData[k].pRef = b_ref; + pEnc->motionData[k].pRefH = &pEnc->vInterH; + pEnc->motionData[k].pRefV = &pEnc->vInterV; + pEnc->motionData[k].pRefHV = &pEnc->vInterHV; + pEnc->motionData[k].time_bp = (int32_t)(pEnc->current->stamp - frame->stamp); + pEnc->motionData[k].time_pp = (int32_t)(pEnc->current->stamp - pEnc->reference->stamp); + pEnc->motionData[k].y_step = pEnc->num_threads; + pEnc->motionData[k].start_y = k; + /* todo: sort out temp space once and for all */ + pEnc->motionData[k].RefQ = pEnc->vInterH.u + 16*k*pEnc->mbParam.edged_width; + } + + for (k = 1; k < pEnc->num_threads; k++) { + pthread_create(&pEnc->motionData[k].handle, NULL, + (void*)SMPMotionEstimationBVOP, (void*)&pEnc->motionData[k]); + } + + SMPMotionEstimationBVOP(&pEnc->motionData[0]); + + for (k = 1; k < pEnc->num_threads; k++) { + pthread_join(pEnc->motionData[k].handle, &status); + } + + frame->fcode = frame->bcode = 0; + for (k = 0; k < pEnc->num_threads; k++) { + if (pEnc->motionData[k].minfcode > frame->fcode) + frame->fcode = pEnc->motionData[k].minfcode; + if (pEnc->motionData[k].minbcode > frame->bcode) + frame->bcode = pEnc->motionData[k].minbcode; + } + } else { + MotionEstimationBVOP(&pEnc->mbParam, frame, + ((int32_t)(pEnc->current->stamp - frame->stamp)), /* time_bp */ + ((int32_t)(pEnc->current->stamp - pEnc->reference->stamp)), /* time_pp */ + pEnc->reference->mbs, f_ref, + &pEnc->f_refh, &pEnc->f_refv, &pEnc->f_refhv, + pEnc->current, b_ref, &pEnc->vInterH, + &pEnc->vInterV, &pEnc->vInterHV); + } stop_motion_timer(); set_timecodes(frame, pEnc->reference,pEnc->mbParam.fbase); BitstreamWriteVopHeader(bs, &pEnc->mbParam, frame, 1, frame->quant); frame->sStat.iTextBits = 0; + frame->sStat.iMVBits = 0; frame->sStat.iMvSum = 0; frame->sStat.iMvCount = 0; frame->sStat.kblks = frame->sStat.mblks = frame->sStat.ublks = 0; @@ -1945,7 +2118,6 @@ stop_coding_timer(); } } - emms(); BitstreamPadAlways(bs); /* next_start_code() at the end of VideoObjectPlane() */