--- trunk/xvidcore/src/encoder.c 2005/12/17 12:04:52 1665 +++ trunk/xvidcore/src/encoder.c 2006/12/14 13:09:00 1766 @@ -21,7 +21,7 @@ * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: encoder.c,v 1.122 2005-12-17 12:04:52 syskin Exp $ + * $Id: encoder.c,v 1.129 2006-12-14 13:09:00 Isibaar Exp $ * ****************************************************************************/ @@ -49,6 +49,9 @@ #include "quant/quant_matrix.h" #include "utils/mem_align.h" +# include "motion/motion_smp.h" + + /***************************************************************************** * Local function prototypes ****************************************************************************/ @@ -135,7 +138,7 @@ enc_create(xvid_enc_create_t * create) { Encoder *pEnc; - int n; + int n; if (XVID_VERSION_MAJOR(create->version) != 1) /* v1.x.x */ return XVID_ERR_VERSION; @@ -441,6 +444,36 @@ pEnc->iFrameNum = 0; pEnc->fMvPrevSigma = -1; + /* multithreaded stuff */ + if (create->num_threads > 0) { + int t = create->num_threads; + int rows_per_thread = (pEnc->mbParam.mb_height+t-1)/t; + pEnc->num_threads = t; + pEnc->motionData = xvid_malloc(t*sizeof(SMPmotionData), CACHE_LINE); + if (!pEnc->motionData) + goto xvid_err_nosmp; + + for (n = 0; n < t; n++) { + pEnc->motionData[n].complete_count_self = + xvid_malloc(rows_per_thread * sizeof(int), CACHE_LINE); + + if (!pEnc->motionData[n].complete_count_self) + goto xvid_err_nosmp; + + if (n != 0) + pEnc->motionData[n].complete_count_above = + pEnc->motionData[n-1].complete_count_self; + } + pEnc->motionData[0].complete_count_above = + pEnc->motionData[t-1].complete_count_self - 1; + + } else { + xvid_err_nosmp: + /* no SMP */ + create->num_threads = 0; + pEnc->motionData = NULL; + } + create->handle = (void *) pEnc; init_timer(); @@ -629,6 +662,9 @@ xvid_free(pEnc->temp_dquants); } + if ((pEnc->mbParam.plugin_flags & XVID_REQLAMBDA)) { + xvid_free(pEnc->temp_lambda); + } if (pEnc->num_plugins>0) { xvid_plg_destroy_t pdestroy; @@ -647,9 +683,16 @@ xvid_free(pEnc->mbParam.mpeg_quant_matrices); - if (pEnc->num_plugins>0) + if (pEnc->num_zones > 0) xvid_free(pEnc->zones); + if (pEnc->num_threads > 0) { + for (i = 0; i < pEnc->num_threads; i++) + xvid_free(pEnc->motionData[i].complete_count_self); + + xvid_free(pEnc->motionData); + } + xvid_free(pEnc); return 0; /* ok */ @@ -722,11 +765,12 @@ if ((pEnc->mbParam.plugin_flags & XVID_REQDQUANTS)) { data.dquant = pEnc->temp_dquants; data.dquant_stride = pEnc->mbParam.mb_width; - memset(data.dquant, 0, data.mb_width*data.mb_height); + memset(data.dquant, 0, data.mb_width*data.mb_height*sizeof(int)); } if(pEnc->mbParam.plugin_flags & XVID_REQLAMBDA) { int block = 0; + emms(); data.lambda = pEnc->temp_lambda; for(i = 0;i < pEnc->mbParam.mb_height; i++) for(j = 0;j < pEnc->mbParam.mb_width; j++) @@ -1384,7 +1428,9 @@ if ( FrameCodeP(pEnc, &bs) == 0 ) { /* N-VOP, we mustn't code b-frames yet */ - call_plugins(pEnc, pEnc->current, &pEnc->sOriginal, XVID_PLG_AFTER, NULL, NULL, stats); + if ((pEnc->mbParam.global_flags & XVID_GLOBAL_PACKED) || + pEnc->mbParam.max_bframes == 0) + call_plugins(pEnc, pEnc->current, &pEnc->sOriginal, XVID_PLG_AFTER, NULL, NULL, stats); goto done; } } @@ -1494,6 +1540,7 @@ BitstreamWriteVopHeader(bs, &pEnc->mbParam, pEnc->current, 1, pEnc->current->mbs[0].quant); pEnc->current->sStat.iTextBits = 0; + pEnc->current->sStat.iMVBits = 0; pEnc->current->sStat.kblks = mb_width * mb_height; pEnc->current->sStat.mblks = pEnc->current->sStat.ublks = 0; @@ -1603,7 +1650,8 @@ } current->sStat.iTextBits = current->sStat.iMvSum = current->sStat.iMvCount = - current->sStat.kblks = current->sStat.mblks = current->sStat.ublks = 0; + current->sStat.kblks = current->sStat.mblks = current->sStat.ublks = + current->sStat.iMVBits = 0; current->coding_type = P_VOP; @@ -1662,10 +1710,54 @@ } } - MotionEstimation(&pEnc->mbParam, current, reference, - &pEnc->vInterH, &pEnc->vInterV, &pEnc->vInterHV, - &pEnc->vGMC, 256*4096); + if (pEnc->num_threads > 0) { + /* multithreaded motion estimation - dispatch threads */ + + void * status; + int rows_per_thread = (pParam->mb_height + pEnc->num_threads - 1)/pEnc->num_threads; + + for (k = 0; k < pEnc->num_threads; k++) { + memset(pEnc->motionData[k].complete_count_self, 0, rows_per_thread * sizeof(int)); + pEnc->motionData[k].pParam = &pEnc->mbParam; + pEnc->motionData[k].current = current; + pEnc->motionData[k].reference = reference; + pEnc->motionData[k].pRefH = &pEnc->vInterH; + pEnc->motionData[k].pRefV = &pEnc->vInterV; + pEnc->motionData[k].pRefHV = &pEnc->vInterHV; + pEnc->motionData[k].pGMC = &pEnc->vGMC; + pEnc->motionData[k].y_step = pEnc->num_threads; + pEnc->motionData[k].start_y = k; + /* todo: sort out temp space once and for all */ + pEnc->motionData[k].RefQ = pEnc->vInterH.u + 16*k*pParam->edged_width; + } + + for (k = 1; k < pEnc->num_threads; k++) { + pthread_create(&pEnc->motionData[k].handle, NULL, + (void*)MotionEstimateSMP, (void*)&pEnc->motionData[k]); + } + + MotionEstimateSMP(&pEnc->motionData[0]); + + for (k = 1; k < pEnc->num_threads; k++) { + pthread_join(pEnc->motionData[k].handle, &status); + } + + current->fcode = 0; + for (k = 0; k < pEnc->num_threads; k++) { + current->sStat.iMvSum += pEnc->motionData[k].mvSum; + current->sStat.iMvCount += pEnc->motionData[k].mvCount; + if (pEnc->motionData[k].minfcode > current->fcode) + current->fcode = pEnc->motionData[k].minfcode; + } + + } else { + /* regular ME */ + + MotionEstimation(&pEnc->mbParam, current, reference, + &pEnc->vInterH, &pEnc->vInterV, &pEnc->vInterHV, + &pEnc->vGMC, 256*4096); + } stop_motion_timer(); @@ -1795,7 +1887,7 @@ (pParam->frame_drop_ratio * mb_width * mb_height) / 100 && ( (pEnc->bframenum_head >= pEnc->bframenum_tail) || !(pEnc->mbParam.global_flags & XVID_GLOBAL_CLOSED_GOP)) ) { - current->sStat.kblks = current->sStat.mblks = 0; + current->sStat.kblks = current->sStat.mblks = current->sStat.iTextBits = 0; current->sStat.ublks = mb_width * mb_height; BitstreamReset(bs); @@ -1912,20 +2004,70 @@ frame->coding_type = B_VOP; call_plugins(pEnc, frame, NULL, XVID_PLG_FRAME, NULL, NULL, NULL); + frame->fcode = frame->bcode = pEnc->current->fcode; + start_timer(); - MotionEstimationBVOP(&pEnc->mbParam, frame, - ((int32_t)(pEnc->current->stamp - frame->stamp)), /* time_bp */ - ((int32_t)(pEnc->current->stamp - pEnc->reference->stamp)), /* time_pp */ - pEnc->reference->mbs, f_ref, - &pEnc->f_refh, &pEnc->f_refv, &pEnc->f_refhv, - pEnc->current, b_ref, &pEnc->vInterH, - &pEnc->vInterV, &pEnc->vInterHV); + if (pEnc->num_threads > 0) { + void * status; + int k; + /* multithreaded motion estimation - dispatch threads */ + int rows_per_thread = (pEnc->mbParam.mb_height + pEnc->num_threads - 1)/pEnc->num_threads; + + for (k = 0; k < pEnc->num_threads; k++) { + memset(pEnc->motionData[k].complete_count_self, 0, rows_per_thread * sizeof(int)); + pEnc->motionData[k].pParam = &pEnc->mbParam; + pEnc->motionData[k].current = frame; + pEnc->motionData[k].reference = pEnc->current; + pEnc->motionData[k].fRef = f_ref; + pEnc->motionData[k].fRefH = &pEnc->f_refh; + pEnc->motionData[k].fRefV = &pEnc->f_refv; + pEnc->motionData[k].fRefHV = &pEnc->f_refhv; + pEnc->motionData[k].pRef = b_ref; + pEnc->motionData[k].pRefH = &pEnc->vInterH; + pEnc->motionData[k].pRefV = &pEnc->vInterV; + pEnc->motionData[k].pRefHV = &pEnc->vInterHV; + pEnc->motionData[k].time_bp = (int32_t)(pEnc->current->stamp - frame->stamp); + pEnc->motionData[k].time_pp = (int32_t)(pEnc->current->stamp - pEnc->reference->stamp); + pEnc->motionData[k].y_step = pEnc->num_threads; + pEnc->motionData[k].start_y = k; + /* todo: sort out temp space once and for all */ + pEnc->motionData[k].RefQ = pEnc->vInterH.u + 16*k*pEnc->mbParam.edged_width; + } + + for (k = 1; k < pEnc->num_threads; k++) { + pthread_create(&pEnc->motionData[k].handle, NULL, + (void*)SMPMotionEstimationBVOP, (void*)&pEnc->motionData[k]); + } + + SMPMotionEstimationBVOP(&pEnc->motionData[0]); + + for (k = 1; k < pEnc->num_threads; k++) { + pthread_join(pEnc->motionData[k].handle, &status); + } + + frame->fcode = frame->bcode = 0; + for (k = 0; k < pEnc->num_threads; k++) { + if (pEnc->motionData[k].minfcode > frame->fcode) + frame->fcode = pEnc->motionData[k].minfcode; + if (pEnc->motionData[k].minbcode > frame->bcode) + frame->bcode = pEnc->motionData[k].minbcode; + } + } else { + MotionEstimationBVOP(&pEnc->mbParam, frame, + ((int32_t)(pEnc->current->stamp - frame->stamp)), /* time_bp */ + ((int32_t)(pEnc->current->stamp - pEnc->reference->stamp)), /* time_pp */ + pEnc->reference->mbs, f_ref, + &pEnc->f_refh, &pEnc->f_refv, &pEnc->f_refhv, + pEnc->current, b_ref, &pEnc->vInterH, + &pEnc->vInterV, &pEnc->vInterHV); + } stop_motion_timer(); set_timecodes(frame, pEnc->reference,pEnc->mbParam.fbase); BitstreamWriteVopHeader(bs, &pEnc->mbParam, frame, 1, frame->quant); frame->sStat.iTextBits = 0; + frame->sStat.iMVBits = 0; frame->sStat.iMvSum = 0; frame->sStat.iMvCount = 0; frame->sStat.kblks = frame->sStat.mblks = frame->sStat.ublks = 0; @@ -1976,7 +2118,6 @@ stop_coding_timer(); } } - emms(); BitstreamPadAlways(bs); /* next_start_code() at the end of VideoObjectPlane() */