19 |
* along with this program ; if not, write to the Free Software |
* along with this program ; if not, write to the Free Software |
20 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
21 |
* |
* |
22 |
* $Id: gmc.c,v 1.6 2006-06-17 13:07:55 Isibaar Exp $ |
* $Id$ |
23 |
* |
* |
24 |
****************************************************************************/ |
****************************************************************************/ |
25 |
|
|
27 |
#include "../global.h" |
#include "../global.h" |
28 |
#include "../encoder.h" |
#include "../encoder.h" |
29 |
#include "gmc.h" |
#include "gmc.h" |
30 |
|
#include "../utils/emms.h" |
31 |
|
|
32 |
#include <stdio.h> |
#include <stdio.h> |
33 |
|
|
48 |
log2bin(uint32_t value) |
log2bin(uint32_t value) |
49 |
{ |
{ |
50 |
/* Changed by Chenm001 */ |
/* Changed by Chenm001 */ |
51 |
#if !defined(_MSC_VER) |
#if !defined(_MSC_VER) || defined(ARCH_IS_X86_64) |
52 |
int n = 0; |
int n = 0; |
53 |
|
|
54 |
while (value) { |
while (value) { |
269 |
{ |
{ |
270 |
const int W = This->sW; |
const int W = This->sW; |
271 |
const int H = This->sH; |
const int H = This->sH; |
272 |
const int rho = 3-This->accuracy; |
const int rho = 3-MIN(This->accuracy, 3); |
273 |
const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16; |
const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16; |
274 |
|
|
275 |
|
|
385 |
mv->y = RSHIFT(Dsp->Vo<<qpel, 3); |
mv->y = RSHIFT(Dsp->Vo<<qpel, 3); |
386 |
} |
} |
387 |
|
|
388 |
#if defined(ARCH_IS_IA32) |
#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) |
389 |
/* ************************************************************* |
/* ************************************************************* |
390 |
* MMX core function |
* MMX core function |
391 |
*/ |
*/ |
400 |
extern void xvid_GMC_Core_Lin_8_sse2(uint8_t *Dst, const uint16_t * Offsets, |
extern void xvid_GMC_Core_Lin_8_sse2(uint8_t *Dst, const uint16_t * Offsets, |
401 |
const uint8_t * const Src0, const int BpS, const int Rounder); |
const uint8_t * const Src0, const int BpS, const int Rounder); |
402 |
|
|
403 |
|
extern void xvid_GMC_Core_Lin_8_sse41(uint8_t *Dst, const uint16_t * Offsets, |
404 |
|
const uint8_t * const Src0, const int BpS, const int Rounder); |
405 |
|
|
406 |
/* *************************************************************/ |
/* *************************************************************/ |
407 |
|
|
408 |
static void GMC_Core_Non_Lin_8(uint8_t *Dst, |
static void GMC_Core_Non_Lin_8(uint8_t *Dst, |
463 |
if ( W2>(uint32_t)U && W2>(uint32_t)(U+15*dUx) && |
if ( W2>(uint32_t)U && W2>(uint32_t)(U+15*dUx) && |
464 |
H2>(uint32_t)V && H2>(uint32_t)(V+15*dVx) ) |
H2>(uint32_t)V && H2>(uint32_t)(V+15*dVx) ) |
465 |
{ |
{ |
466 |
|
uint32_t UV1, UV2; |
467 |
for(i=0; i<16; ++i) |
for(i=0; i<16; ++i) |
468 |
{ |
{ |
469 |
uint32_t u = ( U >> 16 ) << rho; |
uint32_t u = ( U >> 16 ) << rho; |
472 |
Offsets[ i] = u; |
Offsets[ i] = u; |
473 |
Offsets[16+i] = v; |
Offsets[16+i] = v; |
474 |
} |
} |
|
|
|
|
{ |
|
475 |
// batch 8 input pixels when linearity says it's ok |
// batch 8 input pixels when linearity says it's ok |
476 |
uint32_t UV1, UV2; |
|
477 |
UV1 = (Offsets[0] | (Offsets[16]<<16)) & 0xfff0fff0U; |
UV1 = (Offsets[0] | (Offsets[16]<<16)) & 0xfff0fff0U; |
478 |
UV2 = (Offsets[7] | (Offsets[23]<<16)) & 0xfff0fff0U; |
UV2 = (Offsets[7] | (Offsets[23]<<16)) & 0xfff0fff0U; |
479 |
if (UV1+7*16==UV2) |
if (UV1+7*16==UV2) |
487 |
else |
else |
488 |
GMC_Core_Non_Lin_8(dst+8, Offsets+8, src, srcstride, Rounder); |
GMC_Core_Non_Lin_8(dst+8, Offsets+8, src, srcstride, Rounder); |
489 |
} |
} |
|
} |
|
490 |
else |
else |
491 |
{ |
{ |
492 |
for(i=0; i<16; ++i) |
for(i=0; i<16; ++i) |
537 |
if ( W2>(uint32_t)U && W2>(uint32_t)(U+15*dUx) && |
if ( W2>(uint32_t)U && W2>(uint32_t)(U+15*dUx) && |
538 |
H2>(uint32_t)V && H2>(uint32_t)(V+15*dVx) ) |
H2>(uint32_t)V && H2>(uint32_t)(V+15*dVx) ) |
539 |
{ |
{ |
540 |
|
uint32_t UV1, UV2; |
541 |
for(i=0; i<8; ++i) |
for(i=0; i<8; ++i) |
542 |
{ |
{ |
543 |
int32_t u = ( U >> 16 ) << rho; |
int32_t u = ( U >> 16 ) << rho; |
547 |
Offsets[16+i] = v; |
Offsets[16+i] = v; |
548 |
} |
} |
549 |
|
|
|
{ |
|
550 |
// batch 8 input pixels when linearity says it's ok |
// batch 8 input pixels when linearity says it's ok |
551 |
const uint32_t UV1 = (Offsets[ 0] | (Offsets[16]<<16)) & 0xfff0fff0U; |
UV1 = (Offsets[ 0] | (Offsets[16]<<16)) & 0xfff0fff0U; |
552 |
const uint32_t UV2 = (Offsets[ 7] | (Offsets[23]<<16)) & 0xfff0fff0U; |
UV2 = (Offsets[ 7] | (Offsets[23]<<16)) & 0xfff0fff0U; |
553 |
if (UV1+7*16==UV2) |
if (UV1+7*16==UV2) |
554 |
{ |
{ |
555 |
const uint32_t Off = (Offsets[0]>>4) + (Offsets[16]>>4)*srcstride; |
const uint32_t Off = (Offsets[0]>>4) + (Offsets[16]>>4)*srcstride; |
561 |
GMC_Core_Non_Lin_8(vDst, Offsets, vSrc, srcstride, Rounder); |
GMC_Core_Non_Lin_8(vDst, Offsets, vSrc, srcstride, Rounder); |
562 |
} |
} |
563 |
} |
} |
|
} |
|
564 |
else |
else |
565 |
{ |
{ |
566 |
for(i=0; i<8; ++i) |
for(i=0; i<8; ++i) |
590 |
Predict_16x16_func = Predict_16x16_C; |
Predict_16x16_func = Predict_16x16_C; |
591 |
Predict_8x8_func = Predict_8x8_C; |
Predict_8x8_func = Predict_8x8_C; |
592 |
|
|
593 |
#if defined(ARCH_IS_IA32) |
#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) |
594 |
if ((cpu_flags & XVID_CPU_MMX) || (cpu_flags & XVID_CPU_MMXEXT) || |
if ((cpu_flags & XVID_CPU_MMX) || (cpu_flags & XVID_CPU_MMXEXT) || |
595 |
(cpu_flags & XVID_CPU_3DNOW) || (cpu_flags & XVID_CPU_3DNOWEXT) || |
(cpu_flags & XVID_CPU_3DNOW) || (cpu_flags & XVID_CPU_3DNOWEXT) || |
596 |
(cpu_flags & XVID_CPU_SSE) || (cpu_flags & XVID_CPU_SSE2)) |
(cpu_flags & XVID_CPU_SSE) || (cpu_flags & XVID_CPU_SSE2) || |
597 |
|
(cpu_flags & XVID_CPU_SSE3) || (cpu_flags & XVID_CPU_SSE41)) |
598 |
{ |
{ |
599 |
Predict_16x16_func = Predict_16x16_mmx; |
Predict_16x16_func = Predict_16x16_mmx; |
600 |
Predict_8x8_func = Predict_8x8_mmx; |
Predict_8x8_func = Predict_8x8_mmx; |
601 |
if (cpu_flags & XVID_CPU_SSE2) |
|
602 |
|
if (cpu_flags & XVID_CPU_SSE41) |
603 |
|
GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_sse41; |
604 |
|
else if (cpu_flags & XVID_CPU_SSE2) |
605 |
GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_sse2; |
GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_sse2; |
606 |
else |
else |
607 |
GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_mmx; |
GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_mmx; |
745 |
|
|
746 |
pMBs[mbnum].mcsel = 0; /* until mode decision */ |
pMBs[mbnum].mcsel = 0; /* until mode decision */ |
747 |
} |
} |
748 |
|
emms(); |
749 |
} |
} |