19 |
* along with this program ; if not, write to the Free Software |
* along with this program ; if not, write to the Free Software |
20 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
21 |
* |
* |
22 |
* $Id: gmc.c,v 1.5 2006-06-14 21:44:07 Skal Exp $ |
* $Id: gmc.c,v 1.8 2008-11-14 15:43:27 Isibaar Exp $ |
23 |
* |
* |
24 |
****************************************************************************/ |
****************************************************************************/ |
25 |
|
|
27 |
#include "../global.h" |
#include "../global.h" |
28 |
#include "../encoder.h" |
#include "../encoder.h" |
29 |
#include "gmc.h" |
#include "gmc.h" |
30 |
|
#include "../utils/emms.h" |
31 |
|
|
32 |
#include <stdio.h> |
#include <stdio.h> |
33 |
|
|
400 |
extern void xvid_GMC_Core_Lin_8_sse2(uint8_t *Dst, const uint16_t * Offsets, |
extern void xvid_GMC_Core_Lin_8_sse2(uint8_t *Dst, const uint16_t * Offsets, |
401 |
const uint8_t * const Src0, const int BpS, const int Rounder); |
const uint8_t * const Src0, const int BpS, const int Rounder); |
402 |
|
|
403 |
|
extern void xvid_GMC_Core_Lin_8_sse41(uint8_t *Dst, const uint16_t * Offsets, |
404 |
|
const uint8_t * const Src0, const int BpS, const int Rounder); |
405 |
|
|
406 |
/* *************************************************************/ |
/* *************************************************************/ |
407 |
|
|
408 |
static void GMC_Core_Non_Lin_8(uint8_t *Dst, |
static void GMC_Core_Non_Lin_8(uint8_t *Dst, |
463 |
if ( W2>(uint32_t)U && W2>(uint32_t)(U+15*dUx) && |
if ( W2>(uint32_t)U && W2>(uint32_t)(U+15*dUx) && |
464 |
H2>(uint32_t)V && H2>(uint32_t)(V+15*dVx) ) |
H2>(uint32_t)V && H2>(uint32_t)(V+15*dVx) ) |
465 |
{ |
{ |
466 |
|
uint32_t UV1, UV2; |
467 |
for(i=0; i<16; ++i) |
for(i=0; i<16; ++i) |
468 |
{ |
{ |
469 |
uint32_t u = ( U >> 16 ) << rho; |
uint32_t u = ( U >> 16 ) << rho; |
473 |
Offsets[16+i] = v; |
Offsets[16+i] = v; |
474 |
} |
} |
475 |
// batch 8 input pixels when linearity says it's ok |
// batch 8 input pixels when linearity says it's ok |
476 |
uint32_t UV1, UV2; |
|
477 |
UV1 = (Offsets[0] | (Offsets[16]<<16)) & 0xfff0fff0U; |
UV1 = (Offsets[0] | (Offsets[16]<<16)) & 0xfff0fff0U; |
478 |
UV2 = (Offsets[7] | (Offsets[23]<<16)) & 0xfff0fff0U; |
UV2 = (Offsets[7] | (Offsets[23]<<16)) & 0xfff0fff0U; |
479 |
if (UV1+7*16==UV2) |
if (UV1+7*16==UV2) |
537 |
if ( W2>(uint32_t)U && W2>(uint32_t)(U+15*dUx) && |
if ( W2>(uint32_t)U && W2>(uint32_t)(U+15*dUx) && |
538 |
H2>(uint32_t)V && H2>(uint32_t)(V+15*dVx) ) |
H2>(uint32_t)V && H2>(uint32_t)(V+15*dVx) ) |
539 |
{ |
{ |
540 |
|
uint32_t UV1, UV2; |
541 |
for(i=0; i<8; ++i) |
for(i=0; i<8; ++i) |
542 |
{ |
{ |
543 |
int32_t u = ( U >> 16 ) << rho; |
int32_t u = ( U >> 16 ) << rho; |
546 |
Offsets[ i] = u; |
Offsets[ i] = u; |
547 |
Offsets[16+i] = v; |
Offsets[16+i] = v; |
548 |
} |
} |
549 |
|
|
550 |
// batch 8 input pixels when linearity says it's ok |
// batch 8 input pixels when linearity says it's ok |
551 |
const uint32_t UV1 = (Offsets[ 0] | (Offsets[16]<<16)) & 0xfff0fff0U; |
UV1 = (Offsets[ 0] | (Offsets[16]<<16)) & 0xfff0fff0U; |
552 |
const uint32_t UV2 = (Offsets[ 7] | (Offsets[23]<<16)) & 0xfff0fff0U; |
UV2 = (Offsets[ 7] | (Offsets[23]<<16)) & 0xfff0fff0U; |
553 |
if (UV1+7*16==UV2) |
if (UV1+7*16==UV2) |
554 |
{ |
{ |
555 |
const uint32_t Off = (Offsets[0]>>4) + (Offsets[16]>>4)*srcstride; |
const uint32_t Off = (Offsets[0]>>4) + (Offsets[16]>>4)*srcstride; |
590 |
Predict_16x16_func = Predict_16x16_C; |
Predict_16x16_func = Predict_16x16_C; |
591 |
Predict_8x8_func = Predict_8x8_C; |
Predict_8x8_func = Predict_8x8_C; |
592 |
|
|
593 |
#if 0 // #if defined(ARCH_IS_IA32) |
#if defined(ARCH_IS_IA32) |
594 |
if ((cpu_flags & XVID_CPU_MMX) || (cpu_flags & XVID_CPU_MMXEXT) || |
if ((cpu_flags & XVID_CPU_MMX) || (cpu_flags & XVID_CPU_MMXEXT) || |
595 |
(cpu_flags & XVID_CPU_3DNOW) || (cpu_flags & XVID_CPU_3DNOWEXT) || |
(cpu_flags & XVID_CPU_3DNOW) || (cpu_flags & XVID_CPU_3DNOWEXT) || |
596 |
(cpu_flags & XVID_CPU_SSE) || (cpu_flags & XVID_CPU_SSE2)) |
(cpu_flags & XVID_CPU_SSE) || (cpu_flags & XVID_CPU_SSE2) || |
597 |
|
(cpu_flags & XVID_CPU_SSE3) || (cpu_flags & XVID_CPU_SSE41)) |
598 |
{ |
{ |
599 |
Predict_16x16_func = Predict_16x16_mmx; |
Predict_16x16_func = Predict_16x16_mmx; |
600 |
Predict_8x8_func = Predict_8x8_mmx; |
Predict_8x8_func = Predict_8x8_mmx; |
601 |
|
#if 0 |
602 |
|
if (cpu_flags & XVID_CPU_SSE41) |
603 |
|
GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_sse41; |
604 |
|
else |
605 |
|
#endif |
606 |
if (cpu_flags & XVID_CPU_SSE2) |
if (cpu_flags & XVID_CPU_SSE2) |
607 |
GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_sse2; |
GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_sse2; |
608 |
else |
else |
747 |
|
|
748 |
pMBs[mbnum].mcsel = 0; /* until mode decision */ |
pMBs[mbnum].mcsel = 0; /* until mode decision */ |
749 |
} |
} |
750 |
|
emms(); |
751 |
} |
} |