--- trunk/xvidcore/src/motion/gmc.c 2006/06/14 21:44:07 1709 +++ trunk/xvidcore/src/motion/gmc.c 2008/11/14 15:43:28 1794 @@ -19,7 +19,7 @@ * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: gmc.c,v 1.5 2006-06-14 21:44:07 Skal Exp $ + * $Id: gmc.c,v 1.8 2008-11-14 15:43:27 Isibaar Exp $ * ****************************************************************************/ @@ -27,6 +27,7 @@ #include "../global.h" #include "../encoder.h" #include "gmc.h" +#include "../utils/emms.h" #include @@ -399,6 +400,9 @@ extern void xvid_GMC_Core_Lin_8_sse2(uint8_t *Dst, const uint16_t * Offsets, const uint8_t * const Src0, const int BpS, const int Rounder); +extern void xvid_GMC_Core_Lin_8_sse41(uint8_t *Dst, const uint16_t * Offsets, + const uint8_t * const Src0, const int BpS, const int Rounder); + /* *************************************************************/ static void GMC_Core_Non_Lin_8(uint8_t *Dst, @@ -459,6 +463,7 @@ if ( W2>(uint32_t)U && W2>(uint32_t)(U+15*dUx) && H2>(uint32_t)V && H2>(uint32_t)(V+15*dVx) ) { + uint32_t UV1, UV2; for(i=0; i<16; ++i) { uint32_t u = ( U >> 16 ) << rho; @@ -468,19 +473,19 @@ Offsets[16+i] = v; } // batch 8 input pixels when linearity says it's ok - uint32_t UV1, UV2; + UV1 = (Offsets[0] | (Offsets[16]<<16)) & 0xfff0fff0U; UV2 = (Offsets[7] | (Offsets[23]<<16)) & 0xfff0fff0U; if (UV1+7*16==UV2) - GMC_Core_Lin_8(dst, Offsets, src + (Offsets[0]>>4) + (Offsets[16]>>4)*srcstride, srcstride, Rounder); + GMC_Core_Lin_8(dst, Offsets, src + (Offsets[0]>>4) + (Offsets[16]>>4)*srcstride, srcstride, Rounder); else - GMC_Core_Non_Lin_8(dst, Offsets, src, srcstride, Rounder); + GMC_Core_Non_Lin_8(dst, Offsets, src, srcstride, Rounder); UV1 = (Offsets[ 8] | (Offsets[24]<<16)) & 0xfff0fff0U; UV2 = (Offsets[15] | (Offsets[31]<<16)) & 0xfff0fff0U; if (UV1+7*16==UV2) - GMC_Core_Lin_8(dst+8, Offsets+8, src + (Offsets[8]>>4) + (Offsets[24]>>4)*srcstride, srcstride, Rounder); + GMC_Core_Lin_8(dst+8, Offsets+8, src + (Offsets[8]>>4) + (Offsets[24]>>4)*srcstride, srcstride, Rounder); else - GMC_Core_Non_Lin_8(dst+8, Offsets+8, src, srcstride, Rounder); + GMC_Core_Non_Lin_8(dst+8, Offsets+8, src, srcstride, Rounder); } else { @@ -532,6 +537,7 @@ if ( W2>(uint32_t)U && W2>(uint32_t)(U+15*dUx) && H2>(uint32_t)V && H2>(uint32_t)(V+15*dVx) ) { + uint32_t UV1, UV2; for(i=0; i<8; ++i) { int32_t u = ( U >> 16 ) << rho; @@ -540,14 +546,15 @@ Offsets[ i] = u; Offsets[16+i] = v; } - // batch 8 input pixels when linearity says it's ok - const uint32_t UV1 = (Offsets[ 0] | (Offsets[16]<<16)) & 0xfff0fff0U; - const uint32_t UV2 = (Offsets[ 7] | (Offsets[23]<<16)) & 0xfff0fff0U; - if (UV1+7*16==UV2) + + // batch 8 input pixels when linearity says it's ok + UV1 = (Offsets[ 0] | (Offsets[16]<<16)) & 0xfff0fff0U; + UV2 = (Offsets[ 7] | (Offsets[23]<<16)) & 0xfff0fff0U; + if (UV1+7*16==UV2) { - const uint32_t Off = (Offsets[0]>>4) + (Offsets[16]>>4)*srcstride; - GMC_Core_Lin_8(uDst, Offsets, uSrc+Off, srcstride, Rounder); - GMC_Core_Lin_8(vDst, Offsets, vSrc+Off, srcstride, Rounder); + const uint32_t Off = (Offsets[0]>>4) + (Offsets[16]>>4)*srcstride; + GMC_Core_Lin_8(uDst, Offsets, uSrc+Off, srcstride, Rounder); + GMC_Core_Lin_8(vDst, Offsets, vSrc+Off, srcstride, Rounder); } else { GMC_Core_Non_Lin_8(uDst, Offsets, uSrc, srcstride, Rounder); @@ -583,17 +590,23 @@ Predict_16x16_func = Predict_16x16_C; Predict_8x8_func = Predict_8x8_C; -#if 0 // #if defined(ARCH_IS_IA32) +#if defined(ARCH_IS_IA32) if ((cpu_flags & XVID_CPU_MMX) || (cpu_flags & XVID_CPU_MMXEXT) || (cpu_flags & XVID_CPU_3DNOW) || (cpu_flags & XVID_CPU_3DNOWEXT) || - (cpu_flags & XVID_CPU_SSE) || (cpu_flags & XVID_CPU_SSE2)) + (cpu_flags & XVID_CPU_SSE) || (cpu_flags & XVID_CPU_SSE2) || + (cpu_flags & XVID_CPU_SSE3) || (cpu_flags & XVID_CPU_SSE41)) { Predict_16x16_func = Predict_16x16_mmx; Predict_8x8_func = Predict_8x8_mmx; +#if 0 + if (cpu_flags & XVID_CPU_SSE41) + GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_sse41; + else +#endif if (cpu_flags & XVID_CPU_SSE2) GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_sse2; else - GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_mmx; + GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_mmx; } #endif } @@ -734,4 +747,5 @@ pMBs[mbnum].mcsel = 0; /* until mode decision */ } + emms(); }