[svn] / branches / dev-api-4 / xvidcore / examples / xvid_bench.c Repository:
ViewVC logotype

View of /branches/dev-api-4/xvidcore/examples/xvid_bench.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1220 - (download) (annotate)
Wed Nov 19 21:27:25 2003 UTC (20 years, 4 months ago) by edgomez
File size: 43432 byte(s)
updated crc for transfer_8to16_sub2
/*****************************************************************************
 *
 *  XVID MPEG-4 VIDEO CODEC
 *  - Unit tests and benches -
 *
 *  Copyright(C) 2002 Pascal Massimino <skal@planet-d.net>
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 *
 * $Id: xvid_bench.c,v 1.9.2.8 2003-11-19 21:27:25 edgomez Exp $
 *
 ****************************************************************************/

/*****************************************************************************
 *                            
 *  'Reference' output is at the end of file.
 *  Don't take the checksums and crc too seriouly, they aren't
 *  bullet-proof (should plug some .md5 here)...
 *
 *   compiles with something like:
 *   gcc -o xvid_bench xvid_bench.c  -I../src/ -lxvidcore -lm
 *
 ****************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>    /* for memset */
#include <assert.h>

#ifndef WIN32
#include <sys/time.h>	/* for gettimeofday */
#else
#include <time.h>
#endif


#include "xvid.h"

// inner guts
#include "dct/idct.h"
#include "dct/fdct.h"
#include "image/colorspace.h"
#include "image/interpolate8x8.h"
#include "utils/mem_transfer.h"
#include "quant/quant.h"
#include "motion/sad.h"
#include "utils/emms.h"
#include "utils/timer.h"
#include "quant/quant_matrix.c"
#include "bitstream/cbp.h"

#include <math.h>

#ifndef M_PI
#define M_PI		3.14159265358979323846
#endif

const int speed_ref = 100;  /* on slow machines, decrease this value */

/*********************************************************************
 * misc
 *********************************************************************/

/* returns time in micro-s*/
double gettime_usec()
{    
#ifndef WIN32
	struct timeval  tv;
	gettimeofday(&tv, 0);
	return tv.tv_sec*1.0e6 + tv.tv_usec;
#else
	clock_t clk;
	clk = clock();
	return clk * 1000000 / CLOCKS_PER_SEC;
#endif
}

/* returns squared deviates (mean(v*v)-mean(v)^2) of a 8x8 block */
double sqr_dev(uint8_t v[8*8])
{
	double sum=0.;
	double sum2=0.;
	int n;
	for (n=0;n<8*8;n++)
	{
		sum  += v[n];
		sum2 += v[n]*v[n];
	}
	sum2 /= n;
	sum /= n;
	return sum2-sum*sum;
}

/*********************************************************************
 * cpu init
 *********************************************************************/

typedef struct {
	const char *name;
	unsigned int cpu;
} CPU;

CPU cpu_list[] = 
{ { "PLAINC", 0 }
#ifdef ARCH_IS_IA32
  , { "MMX   ", XVID_CPU_MMX }
  , { "MMXEXT", XVID_CPU_MMXEXT | XVID_CPU_MMX }
  , { "SSE2  ", XVID_CPU_SSE2 | XVID_CPU_MMX }
  , { "3DNOW ", XVID_CPU_3DNOW }
  , { "3DNOWE", XVID_CPU_3DNOWEXT }
#endif
//, { "IA64  ", XVID_CPU_IA64 }  
//, { "TSC   ", XVID_CPU_TSC }
  , { 0, 0 } };

CPU  cpu_short_list[] =
{ { "PLAINC", 0 }
#ifdef ARCH_IS_IA32
  , { "MMX   ", XVID_CPU_MMX }
//, { "MMXEXT", XVID_CPU_MMXEXT | XVID_CPU_MMX }
#endif
//, { "IA64  ", XVID_CPU_IA64 }
  , { 0, 0 } };

CPU cpu_short_list2[] = 
{ { "PLAINC", 0 }
#ifdef ARCH_IS_IA32
  , { "MMX   ", XVID_CPU_MMX }
  , { "SSE2  ", XVID_CPU_SSE2 | XVID_CPU_MMX }
#endif
  , { 0, 0 } };


int init_cpu(CPU *cpu)
{
	xvid_gbl_info_t xinfo;

	/* Get the available CPU flags */
	memset(&xinfo, 0, sizeof(xinfo));
	xinfo.version = XVID_VERSION;
	xvid_global(NULL, XVID_GBL_INFO, &xinfo, NULL);

	/* Are we trying to test a subset of the host CPU features */
	if ((xinfo.cpu_flags & cpu->cpu) == cpu->cpu) {
		int xerr;
		xvid_gbl_init_t xinit;
		memset(&xinit, 0, sizeof(xinit));
		xinit.cpu_flags = cpu->cpu | XVID_CPU_FORCE;
		xinit.version = XVID_VERSION;
		xerr = xvid_global(NULL, XVID_GBL_INIT, &xinit, NULL);
		if (xerr==XVID_ERR_FAIL) {
			/* libxvidcore failed to init */
			return 0;
		}
	} else {
		/* The host CPU doesn't support some required feature for this test */
		return(0);
	}
	return 1;
}

/*********************************************************************
 * test DCT
 *********************************************************************/

#define ABS(X)  ((X)<0 ? -(X) : (X))

void test_dct()
{
	const int nb_tests = 300*speed_ref;
	int tst;
	CPU *cpu;
	int i;
	DECLARE_ALIGNED_MATRIX(iDst0, 8, 8, short, 16);
	DECLARE_ALIGNED_MATRIX(iDst,  8, 8, short, 16);
	DECLARE_ALIGNED_MATRIX(fDst,  8, 8, short, 16);
	double overhead;

	printf( "\n ===== test fdct/idct =====\n" );

	for(i=0; i<8*8; ++i) iDst0[i] = (i*7-i*i) & 0x7f;
	overhead = gettime_usec();
	for(tst=0; tst<nb_tests; ++tst)
	{
		for(i=0; i<8*8; ++i) fDst[i] = iDst0[i];
		for(i=0; i<8*8; ++i) iDst[i] = fDst[i];
	}
	overhead = gettime_usec() - overhead;

	for(cpu = cpu_list; cpu->name!=0; ++cpu)
	{
		double t, PSNR, MSE;

		if (!init_cpu(cpu))
			continue;

		t = gettime_usec();
		emms();
		for(tst=0; tst<nb_tests; ++tst)
		{
			for(i=0; i<8*8; ++i) fDst[i] = iDst0[i];
			fdct(fDst);
			for(i=0; i<8*8; ++i) iDst[i] = fDst[i];
			idct(iDst);
		}
		emms();
		t = (gettime_usec() - t - overhead) / nb_tests;
		MSE = 0.;
		for(i=0; i<8*8; ++i) {
			double delta = 1.0*(iDst[i] - iDst0[i]);
			MSE += delta*delta;
		}
		PSNR = (MSE==0.) ? 1.e6 : -4.3429448*log( MSE/64. );
		printf( "%s -  %.3f usec       PSNR=%.3f  MSE=%.3f\n",
				cpu->name, t, PSNR, MSE );
		if (ABS(MSE)>=64) printf( "*** CRC ERROR! ***\n" );
	}
}

/*********************************************************************
 * test SAD
 *********************************************************************/

void test_sad()
{
	const int nb_tests = 2000*speed_ref;
	int tst;
	CPU *cpu;
	int i;
	uint8_t Cur[16*16], Ref1[16*16], Ref2[16*16];

	printf( "\n ======  test SAD ======\n" );
	for(i=0; i<16*16;++i) {
		Cur[i] = (i/5) ^ 0x05;
		Ref1[i] = (i + 0x0b) & 0xff;
		Ref2[i] = i ^ 0x76;
	}

	for(cpu = cpu_list; cpu->name!=0; ++cpu)
	{
		double t;
		uint32_t s;
		if (!init_cpu(cpu))
			continue;

		t = gettime_usec();
		emms();
		for(tst=0; tst<nb_tests; ++tst) s = sad8(Cur, Ref1, 16);
		emms();
		t = (gettime_usec() - t) / nb_tests;
		printf( "%s - sad8    %.3f usec       sad=%d\n", cpu->name, t, s );
		if (s!=3776) printf( "*** CRC ERROR! ***\n" );

		t = gettime_usec();
		emms();
		for(tst=0; tst<nb_tests; ++tst) s = sad16(Cur, Ref1, 16, -1);
		emms();
		t = (gettime_usec() - t) / nb_tests;
		printf( "%s - sad16   %.3f usec       sad=%d\n", cpu->name, t, s );
		if (s!=27214) printf( "*** CRC ERROR! ***\n" );

		t = gettime_usec();
		emms();
		for(tst=0; tst<nb_tests; ++tst) s = sad16bi(Cur, Ref1, Ref2, 16);
		emms();
		t = (gettime_usec() - t) / nb_tests;
		printf( "%s - sad16bi %.3f usec       sad=%d\n", cpu->name, t, s );
		if (s!=26274) printf( "*** CRC ERROR! ***\n" );

		t = gettime_usec();
		emms();
		for(tst=0; tst<nb_tests; ++tst) s = dev16(Cur, 16);
		emms();
		t = (gettime_usec() - t) / nb_tests;
		printf( "%s - dev16   %.3f usec       sad=%d\n", cpu->name, t, s );
		if (s!=3344) printf( "*** CRC ERROR! ***\n" );

		printf( " --- \n" );
	}
}

/*********************************************************************
 * test interpolation
 *********************************************************************/

#define ENTER \
for(i=0; i<16*8; ++i) Dst[i] = 0;   \
t = gettime_usec();                   \
emms();

#define LEAVE \
emms();                             \
t = (gettime_usec() - t) / nb_tests;  \
iCrc = 0;                           \
for(i=0; i<16*8; ++i) { iCrc += Dst[i]^i; }

#define TEST_MB(FUNC, R)                \
ENTER                               \
for(tst=0; tst<nb_tests; ++tst) (FUNC)(Dst, Src0, 16, (R)); \
LEAVE

#define TEST_MB2(FUNC)                  \
ENTER                               \
for(tst=0; tst<nb_tests; ++tst) (FUNC)(Dst, Src0, 16); \
LEAVE


void test_mb()
{
	const int nb_tests = 2000*speed_ref;
	CPU *cpu;
	const uint8_t Src0[16*9] = {
		/* try to have every possible combinaison of rounding... */
		0, 0, 1, 0, 2, 0, 3, 0, 4             ,0,0,0, 0,0,0,0
		, 0, 1, 1, 1, 2, 1, 3, 1, 3             ,0,0,0, 0,0,0,0
		, 0, 2, 1, 2, 2, 2, 3, 2, 2             ,0,0,0, 0,0,0,0
		, 0, 3, 1, 3, 2, 3, 3, 3, 1             ,0,0,0, 0,0,0,0
		, 1, 3, 0, 2, 1, 0, 2, 3, 4             ,0,0,0, 0,0,0,0
		, 2, 2, 1, 2, 0, 1, 3, 5, 3             ,0,0,0, 0,0,0,0
		, 3, 1, 2, 3, 1, 2, 2, 6, 2             ,0,0,0, 0,0,0,0
		, 1, 0, 1, 3, 0, 3, 1, 6, 1             ,0,0,0, 0,0,0,0
		, 4, 3, 2, 1, 2, 3, 4, 0, 3             ,0,0,0, 0,0,0,0
	};
	uint8_t Dst[16*8] = {0};

	printf( "\n ===  test block motion ===\n" );

	for(cpu = cpu_list; cpu->name!=0; ++cpu)
	{
		double t;
		int tst, i, iCrc;

		if (!init_cpu(cpu))
			continue;

		TEST_MB(interpolate8x8_halfpel_h, 0);
		printf( "%s - interp- h-round0 %.3f usec       iCrc=%d\n", cpu->name, t, iCrc );
		if (iCrc!=8107) printf( "*** CRC ERROR! ***\n" );

		TEST_MB(interpolate8x8_halfpel_h, 1);
		printf( "%s -           round1 %.3f usec       iCrc=%d\n", cpu->name, t, iCrc );
		if (iCrc!=8100) printf( "*** CRC ERROR! ***\n" );


		TEST_MB(interpolate8x8_halfpel_v, 0);
		printf( "%s - interp- v-round0 %.3f usec       iCrc=%d\n", cpu->name, t, iCrc );
		if (iCrc!=8108) printf( "*** CRC ERROR! ***\n" );

		TEST_MB(interpolate8x8_halfpel_v, 1);
		printf( "%s -           round1 %.3f usec       iCrc=%d\n", cpu->name, t, iCrc );
		if (iCrc!=8105) printf( "*** CRC ERROR! ***\n" );


		TEST_MB(interpolate8x8_halfpel_hv, 0);
		printf( "%s - interp-hv-round0 %.3f usec       iCrc=%d\n", cpu->name, t, iCrc );
		if (iCrc!=8112) printf( "*** CRC ERROR! ***\n" );

		TEST_MB(interpolate8x8_halfpel_hv, 1);
		printf( "%s -           round1 %.3f usec       iCrc=%d\n", cpu->name, t, iCrc );
		if (iCrc!=8103) printf( "*** CRC ERROR! ***\n" );


		/* this is a new function, as of 06.06.2002 */
#if 0
		TEST_MB2(interpolate8x8_avrg);
		printf( "%s - interpolate8x8_c %.3f usec       iCrc=%d\n", cpu->name, t, iCrc );
		if (iCrc!=8107) printf( "*** CRC ERROR! ***\n" );
#endif

		printf( " --- \n" );
	}
}

/*********************************************************************
 * test transfer
 *********************************************************************/

#define INIT_TRANSFER \
for(i=0; i<8*32; ++i) {             \
Src8[i] = i; Src16[i] = i;        \
Dst8[i] = 0; Dst16[i] = 0;        \
Ref1[i] = i^0x27;                 \
Ref2[i] = i^0x51;                 \
}

#define TEST_TRANSFER_BEGIN(DST)              \
INIT_TRANSFER                             \
overhead = -gettime_usec();               \
for(tst=0; tst<nb_tests; ++tst) {         \
  for(i=0; i<8*32; ++i) (DST)[i] = i^0x6a;\
}                                         \
overhead += gettime_usec();               \
t = gettime_usec();                       \
emms();                                   \
for(tst=0; tst<nb_tests; ++tst) {         \
  for(i=0; i<8*32; ++i) (DST)[i] = i^0x6a;


#define TEST_TRANSFER_END(DST)                \
}                                         \
emms();                                   \
t = (gettime_usec()-t -overhead) / nb_tests;\
s = 0; for(i=0; i<8*32; ++i) { s += (DST)[i]^i; }

#define TEST_TRANSFER(FUNC, DST, SRC)         \
TEST_TRANSFER_BEGIN(DST);                 \
  (FUNC)((DST), (SRC), 32);               \
TEST_TRANSFER_END(DST)


#define TEST_TRANSFER2_BEGIN(DST, SRC)        \
INIT_TRANSFER                             \
overhead = -gettime_usec();               \
for(tst=0; tst<nb_tests; ++tst) {         \
  for(i=0; i<8*32; ++i) (DST)[i] = i^0x6a;\
  for(i=0; i<8*32; ++i) (SRC)[i] = i^0x3e;\
}                                         \
overhead += gettime_usec();               \
t = gettime_usec();                       \
emms();                                   \
for(tst=0; tst<nb_tests; ++tst) {         \
  for(i=0; i<8*32; ++i) (DST)[i] = i^0x6a;\
  for(i=0; i<8*32; ++i) (SRC)[i] = i^0x3e;

#define TEST_TRANSFER2_END(DST)               \
}                                         \
emms();                                   \
t = (gettime_usec()-t -overhead) / nb_tests;\
s = 0; for(i=0; i<8*32; ++i) { s += (DST)[i]; }

#define TEST_TRANSFER2(FUNC, DST, SRC, R1)    \
TEST_TRANSFER2_BEGIN(DST,SRC);            \
  (FUNC)((DST), (SRC), (R1), 32);         \
TEST_TRANSFER2_END(DST)

#define TEST_TRANSFER3(FUNC, DST, SRC, R1, R2)\
TEST_TRANSFER_BEGIN(DST);                 \
  (FUNC)((DST), (SRC), (R1), (R2), 32);   \
TEST_TRANSFER_END(DST)

void test_transfer()
{
	const int nb_tests = 4000*speed_ref;
	int i;
	CPU *cpu;
	uint8_t  Src8[8*32], Dst8[8*32], Ref1[8*32], Ref2[8*32];
	int16_t Src16[8*32], Dst16[8*32];

	printf( "\n ===  test transfer ===\n" );

	for(cpu = cpu_list; cpu->name!=0; ++cpu)
	{
		double t, overhead;
		int tst, s;

		if (!init_cpu(cpu))
			continue;

		TEST_TRANSFER(transfer_8to16copy, Dst16, Src8);
		printf( "%s - 8to16     %.3f usec       crc=%d\n", cpu->name, t, s );
		if (s!=28288) printf( "*** CRC ERROR! ***\n" );

		TEST_TRANSFER(transfer_16to8copy, Dst8, Src16);
		printf( "%s - 16to8     %.3f usec       crc=%d\n", cpu->name, t, s );
		if (s!=28288) printf( "*** CRC ERROR! ***\n" );

		TEST_TRANSFER(transfer8x8_copy, Dst8, Src8);
		printf( "%s - 8to8      %.3f usec       crc=%d\n", cpu->name, t, s );
		if (s!=20352) printf( "*** CRC ERROR! ***\n" );

		TEST_TRANSFER(transfer_16to8add, Dst8, Src16);
		printf( "%s - 16to8add  %.3f usec       crc=%d\n", cpu->name, t, s );
		if (s!=25536) printf( "*** CRC ERROR! ***\n" );

		TEST_TRANSFER2(transfer_8to16sub, Dst16, Src8, Ref1);
		printf( "%s - 8to16sub  %.3f usec       crc1=%d ", cpu->name, t, s );
		if (s!=28064) printf( "*** CRC ERROR! ***\n" );
		s = 0; for(i=0; i<8*32; ++i) { s += (Src8[i]-Ref1[i])&i; }
		printf( "crc2=%d\n", s);
		if (s!=16256) printf( "*** CRC ERROR! ***\n" );

		TEST_TRANSFER3(transfer_8to16sub2, Dst16, Src8, Ref1, Ref2);
		printf( "%s - 8to16sub2 %.3f usec       crc=%d\n", cpu->name, t, s );
		if (s!=22368) printf( "*** CRC ERROR! ***\n" );

		printf( " --- \n" );
	}
}

/*********************************************************************
 * test quantization
 *********************************************************************/

#define TEST_QUANT(FUNC, DST, SRC)              \
t = gettime_usec();                         \
for(s=0,qm=1; qm<=255; ++qm) {              \
  for(i=0; i<8*8; ++i) Quant[i] = qm;       \
  set_inter_matrix( Quant );                \
  emms();                                   \
  for(q=1; q<=max_Q; ++q) {                 \
	for(tst=0; tst<nb_tests; ++tst)         \
	  (FUNC)((DST), (SRC), q);              \
	for(i=0; i<64; ++i) s+=(DST)[i]^i^qm;   \
  }                                         \
  emms();                                   \
}                                           \
t = (gettime_usec()-t-overhead)/nb_tests/qm;\
s = (s&0xffff)^(s>>16)

#define TEST_QUANT2(FUNC, DST, SRC)             \
t = gettime_usec();                         \
for(s=0,qm=1; qm<=255; ++qm) {              \
  for(i=0; i<8*8; ++i) Quant[i] = qm;       \
  set_intra_matrix( Quant );                \
  emms();                                   \
  for(q=1; q<=max_Q; ++q) {                 \
	for(tst=0; tst<nb_tests; ++tst)         \
	  (FUNC)((DST), (SRC), q, q);           \
	for(i=0; i<64; ++i) s+=(DST)[i]^i^qm;   \
  }                                         \
  emms();                                   \
}                                           \
t = (gettime_usec()-t-overhead)/nb_tests/qm;\
s = (s&0xffff)^(s>>16)

void test_quant()
{
	const int nb_tests = 1*speed_ref;
	const int max_Q = 31;
	int i, qm;
	CPU *cpu;
	int16_t  Src[8*8], Dst[8*8];
	uint8_t Quant[8*8];

	printf( "\n =====  test quant =====\n" );

/* we deliberately enfringe the norm's specified range [-127,127], */
/* to test the robustness of the iquant module */
	for(i=0; i<64; ++i) {
		Src[i] = 1 + (i-32) * (i&6);
		Dst[i] = 0;
	}

	for(cpu = cpu_list; cpu->name!=0; ++cpu)
	{
		double t, overhead;
		int tst, q;
		uint32_t s;

		if (!init_cpu(cpu))
		continue;

		overhead = -gettime_usec();
		for(s=0,qm=1; qm<=255; ++qm) {
			for(i=0; i<8*8; ++i) Quant[i] = qm;
			set_inter_matrix( Quant );
			for(q=1; q<=max_Q; ++q)
				for(i=0; i<64; ++i) s+=Dst[i]^i^qm;
		}
		overhead += gettime_usec();

#if 1
		TEST_QUANT2(quant_mpeg_intra, Dst, Src);
		printf( "%s -   quant_mpeg_intra %.3f usec       crc=%d\n", cpu->name, t, s );
		if (s!=29809) printf( "*** CRC ERROR! ***\n" );

		TEST_QUANT(quant_mpeg_inter, Dst, Src);
		printf( "%s -   quant_mpeg_inter %.3f usec       crc=%d\n", cpu->name, t, s );
		if (s!=12574) printf( "*** CRC ERROR! ***\n" );
#endif
#if 1
		TEST_QUANT2(dequant_mpeg_intra, Dst, Src);
		printf( "%s - dequant_mpeg_intra %.3f usec       crc=%d\n", cpu->name, t, s );
		if (s!=24052) printf( "*** CRC ERROR! ***\n" );

		TEST_QUANT(dequant_mpeg_inter, Dst, Src);
		printf( "%s - dequant_mpeg_inter %.3f usec       crc=%d\n", cpu->name, t, s );
		if (s!=63847) printf( "*** CRC ERROR! ***\n" );
#endif
#if 1
		TEST_QUANT2(quant_h263_intra, Dst, Src);
		printf( "%s -   quant_h263_intra %.3f usec       crc=%d\n", cpu->name, t, s );
		if (s!=25662) printf( "*** CRC ERROR! ***\n" );

		TEST_QUANT(quant_h263_inter, Dst, Src);
		printf( "%s -   quant_h263_inter %.3f usec       crc=%d\n", cpu->name, t, s );
		if (s!=23972) printf( "*** CRC ERROR! ***\n" );
#endif
#if 1
		TEST_QUANT2(dequant_h263_intra, Dst, Src);
		printf( "%s - dequant_h263_intra %.3f usec       crc=%d\n", cpu->name, t, s );
		if (s!=49900) printf( "*** CRC ERROR! ***\n" );

		TEST_QUANT(dequant_h263_inter, Dst, Src);
		printf( "%s - dequant_h263_inter %.3f usec       crc=%d\n", cpu->name, t, s );
		if (s!=48899) printf( "*** CRC ERROR! ***\n" );
#endif
		printf( " --- \n" );
	}
}

/*********************************************************************
 * test non-zero AC counting
 *********************************************************************/

#define TEST_CBP(FUNC, SRC)                   \
t = gettime_usec();                       \
emms();                                   \
for(tst=0; tst<nb_tests; ++tst) {         \
  cbp = (FUNC)((SRC));                    \
}                                         \
emms();                                   \
t = (gettime_usec()-t ) / nb_tests;

void test_cbp()
{
	const int nb_tests = 10000*speed_ref;
	int i;
	CPU *cpu;
	int16_t  Src1[6*64], Src2[6*64], Src3[6*64], Src4[6*64];

	printf( "\n =====  test cbp =====\n" );

	for(i=0; i<6*64; ++i) {
		Src1[i] = (i*i*3/8192)&(i/64)&1;  /* 'random' */
		Src2[i] = (i<3*64);               /* half-full */
		Src3[i] = ((i+32)>3*64);
		Src4[i] = (i==(3*64+2) || i==(5*64+9));
	}

	for(cpu = cpu_list; cpu->name!=0; ++cpu)
	{
		double t;
		int tst, cbp;

		if (!init_cpu(cpu))
			continue;

		TEST_CBP(calc_cbp, Src1);
		printf( "%s -   calc_cbp#1 %.3f usec       cbp=0x%x\n", cpu->name, t, cbp );
		if (cbp!=0x15) printf( "*** CRC ERROR! ***\n" );
		TEST_CBP(calc_cbp, Src2);
		printf( "%s -   calc_cbp#2 %.3f usec       cbp=0x%x\n", cpu->name, t, cbp );
		if (cbp!=0x38) printf( "*** CRC ERROR! ***\n" );
		TEST_CBP(calc_cbp, Src3);
		printf( "%s -   calc_cbp#3 %.3f usec       cbp=0x%x\n", cpu->name, t, cbp );
		if (cbp!=0x0f) printf( "*** CRC ERROR! ***\n" );
		TEST_CBP(calc_cbp, Src4);
		printf( "%s -   calc_cbp#4 %.3f usec       cbp=0x%x\n", cpu->name, t, cbp );
		if (cbp!=0x05) printf( "*** CRC ERROR! ***\n" );
		printf( " --- \n" );
	}
}

/*********************************************************************
 * fdct/idct IEEE1180 compliance
 *********************************************************************/

typedef struct {
	long Errors[64];
	long Sqr_Errors[64];
	long Max_Errors[64];
	long Nb;
} STATS_8x8;

void init_stats(STATS_8x8 *S)
{
	int i;
	for(i=0; i<64; ++i) {
		S->Errors[i]     = 0;
		S->Sqr_Errors[i] = 0;
		S->Max_Errors[i] = 0;
	}
	S->Nb = 0;
}

void store_stats(STATS_8x8 *S, short Blk[64], short Ref[64])
{
	int i;
	for(i=0; i<64; ++i)
	{
		short Err = Blk[i] - Ref[i];
		S->Errors[i] += Err;
		S->Sqr_Errors[i] += Err * Err;
		if (Err<0) Err = -Err;
		if (S->Max_Errors[i]<Err)
			S->Max_Errors[i] = Err;
	}
	S->Nb++;
}

void print_stats(STATS_8x8 *S)
{
	int i;
	double Norm;

	assert(S->Nb>0);
	Norm = 1. / (double)S->Nb;
	printf("\n== Max absolute values of errors ==\n");
	for(i=0; i<64; i++) {
		printf("  %4ld", S->Max_Errors[i]);
		if ((i&7)==7) printf("\n");
	}

	printf("\n== Mean square errors ==\n");
	for(i=0; i<64; i++)
	{
		double Err = Norm * (double)S->Sqr_Errors[i];
		printf(" %.3f", Err);
		if ((i&7)==7) printf("\n");
	}

	printf("\n== Mean errors ==\n");
	for(i=0; i<64; i++)
	{
		double Err = Norm * (double)S->Errors[i];
		printf(" %.3f", Err);
		if ((i&7)==7) printf("\n");
	}
	printf("\n");
}

static const char *CHECK(double v, double l) {
	if (fabs(v)<=l) return "ok";
	else return "FAIL!";
}

void report_stats(STATS_8x8 *S, const double *Limits)
{
	int i;
	double Norm, PE, PMSE, OMSE, PME, OME;

	assert(S->Nb>0);
	Norm = 1. / (double)S->Nb;
	PE = 0.;
	for(i=0; i<64; i++) {
		if (PE<S->Max_Errors[i])
			PE = S->Max_Errors[i];
	}

	PMSE = 0.;
	OMSE = 0.;
	for(i=0; i<64; i++)
	{
		double Err = Norm * (double)S->Sqr_Errors[i];
		OMSE += Err;
		if (PMSE < Err) PMSE = Err;
	}
	OMSE /= 64.;

	PME = 0.;
	OME = 0.;
	for(i=0; i<64; i++)
	{
		double Err = Norm * (double)S->Errors[i];
		OME += Err;
		Err = fabs(Err);
		if (PME < Err) PME = Err;
	}
	OME /= 64.;

	printf( "Peak error:   %4.4f\n", PE );
	printf( "Peak MSE:     %4.4f\n", PMSE );
	printf( "Overall MSE:  %4.4f\n", OMSE );
	printf( "Peak ME:      %4.4f\n", PME );
	printf( "Overall ME:   %4.4f\n", OME );

	if (Limits!=0)
	{
		printf( "[PE<=%.4f %s]  ", Limits[0], CHECK(PE,   Limits[0]) );
		printf( "\n" );
		printf( "[PMSE<=%.4f %s]", Limits[1], CHECK(PMSE, Limits[1]) );
		printf( "[OMSE<=%.4f %s]", Limits[2], CHECK(OMSE, Limits[2]) );
		printf( "\n" );
		printf( "[PME<=%.4f %s] ", Limits[3], CHECK(PME , Limits[3]) );
		printf( "[OME<=%.4f %s] ", Limits[4], CHECK(OME , Limits[4]) );
		printf( "\n" );
	}
}

///* ////////////////////////////////////////////////////// */
/* Pseudo-random generator specified by IEEE 1180 */

static long ieee_seed = 1;
static void ieee_reseed(long s) {
	ieee_seed = s;
}
static long ieee_rand(int Min, int Max)
{
	static double z = (double) 0x7fffffff;

	long i,j;
	double x;

	ieee_seed = (ieee_seed * 1103515245) + 12345;
	i = ieee_seed & 0x7ffffffe;
	x = ((double) i) / z;
	x *= (Max-Min+1);
	j = (long)x;
	j = j + Min;
	assert(j>=Min && j<=Max);
	return (short)j;
}

#define CLAMP(x, M)   (x) = ((x)<-(M)) ? (-(M)) : ((x)>=(M) ? ((M)-1) : (x))

static double Cos[8][8];
static void init_ref_dct()
{
	int i, j;
	for(i=0; i<8; i++)
	{
		double scale = (i == 0) ? sqrt(0.125) : 0.5;
		for (j=0; j<8; j++)
			Cos[i][j] = scale*cos( (M_PI/8.0)*i*(j + 0.5) );
	}
}

void ref_idct(short *M)
{
	int i, j, k;
	double Tmp[8][8];

	for(i=0; i<8; i++) {
		for(j=0; j<8; j++)
		{
			double Sum = 0.0;
			for (k=0; k<8; k++) Sum += Cos[k][j]*M[8*i+k];
			Tmp[i][j] = Sum;
		}
	}
	for(i=0; i<8; i++) {
		for(j=0; j<8; j++) {
			double Sum = 0.0;
			for (k=0; k<8; k++) Sum += Cos[k][i]*Tmp[k][j];
			M[8*i+j] = (short)floor(Sum + .5);
		}
	}
}

void ref_fdct(short *M)
{
	int i, j, k;
	double Tmp[8][8];

	for(i=0; i<8; i++) {
		for(j=0; j<8; j++)
		{
			double Sum = 0.0;
			for (k=0; k<8; k++) Sum += Cos[j][k]*M[8*i+k];
			Tmp[i][j] = Sum;
		}
	}
	for(i=0; i<8; i++) {
		for(j=0; j<8; j++) {
			double Sum = 0.0;
			for (k=0; k<8; k++) Sum += Cos[i][k]*Tmp[k][j];
			M[8*i+j] = (short)floor(Sum + 0.5);
		}
	}
}

void test_IEEE1180_compliance(int Min, int Max, int Sign)
{
	static const double ILimits[5] = { 1., 0.06, 0.02, 0.015, 0.0015 };
	int Loops = 10000;
	int i, m, n;
	DECLARE_ALIGNED_MATRIX(Blk0, 8, 8, short, 16); /* reference */
	DECLARE_ALIGNED_MATRIX(Blk,  8, 8, short, 16);
	DECLARE_ALIGNED_MATRIX(iBlk, 8, 8, short, 16);
	DECLARE_ALIGNED_MATRIX(Ref_FDCT, 8, 8, short, 16);
	DECLARE_ALIGNED_MATRIX(Ref_IDCT, 8, 8, short, 16);

	STATS_8x8 FStats; /* forward dct stats */
	STATS_8x8 IStats; /* inverse dct stats */

	CPU *cpu;

	init_ref_dct();

	for(cpu = cpu_list; cpu->name!=0; ++cpu)
	{
		if (!init_cpu(cpu))
			continue;

		printf( "\n===== IEEE test for %s ==== (Min=%d Max=%d Sign=%d Loops=%d)\n",
				cpu->name, Min, Max, Sign, Loops);

		init_stats(&IStats);
		init_stats(&FStats);

		ieee_reseed(1);
		for(n=0; n<Loops; ++n)
		{
			for(i=0; i<64; ++i)
				Blk0[i] = (short)ieee_rand(Min,Max) * Sign;

			/* hmm, I'm not quite sure this is exactly */
			/* the tests described in the norm. check... */

			memcpy(Ref_FDCT, Blk0, 64*sizeof(short));
			ref_fdct(Ref_FDCT);
			for(i=0; i<64; i++) CLAMP( Ref_FDCT[i], 2048 );

			memcpy(Blk, Blk0, 64*sizeof(short));
			emms(); fdct(Blk); emms();
			for(i=0; i<64; i++) CLAMP( Blk[i], 2048 );

			store_stats(&FStats, Blk, Ref_FDCT);


			memcpy(Ref_IDCT, Ref_FDCT, 64*sizeof(short));
			ref_idct(Ref_IDCT);
			for (i=0; i<64; i++) CLAMP( Ref_IDCT[i], 256 );

			memcpy(iBlk, Ref_FDCT, 64*sizeof(short));
			emms(); idct(iBlk); emms();
			for(i=0; i<64; i++) CLAMP( iBlk[i], 256 );

			store_stats(&IStats, iBlk, Ref_IDCT);
		}


		printf( "\n  -- FDCT report --\n" );
//    print_stats(&FStats);
		report_stats(&FStats, 0); /* so far I know, IEEE1180 says nothing for fdct */

		for(i=0; i<64; i++) Blk[i] = 0;
		emms(); fdct(Blk); emms();
		for(m=i=0; i<64; i++) if (Blk[i]!=0) m++;
		printf( "FDCT(0) == 0 ?  %s\n", (m!=0) ? "NOPE!" : "yup." );

		printf( "\n  -- IDCT report --\n" );
//    print_stats(&IStats);
		report_stats(&IStats, ILimits);


		for(i=0; i<64; i++) Blk[i] = 0;
		emms(); idct(Blk); emms();
		for(m=i=0; i<64; i++) if (Blk[i]!=0) m++;
		printf( "IDCT(0) == 0 ?  %s\n", (m!=0) ? "NOPE!" : "yup." );
	}
}


void test_dct_saturation(int Min, int Max)
{
/* test behaviour on input range fringe */

	int i, n, p;
	CPU *cpu;
//  const short IDCT_MAX =  2047;  /* 12bits input */
//  const short IDCT_MIN = -2048;
//  const short IDCT_OUT =   256;  /* 9bits ouput */
	const int Partitions = 4;
	const int Loops = 10000 / Partitions;

	init_ref_dct();

	for(cpu = cpu_list; cpu->name!=0; ++cpu)
	{
		short Blk0[64], Blk[64];
		STATS_8x8 Stats;

		if (!init_cpu(cpu))
			continue;

		printf( "\n===== IEEE test for %s Min=%d Max=%d =====\n",
				cpu->name, Min, Max );

		/* FDCT tests // */

		init_stats(&Stats);

		/* test each computation channels separately */
		for(i=0; i<64; i++) Blk[i] = Blk0[i] = ((i/8)==(i%8)) ? Max : 0;
		ref_fdct(Blk0);
		emms(); fdct(Blk); emms();
		store_stats(&Stats, Blk, Blk0);

		for(i=0; i<64; i++) Blk[i] = Blk0[i] = ((i/8)==(i%8)) ? Min : 0;
		ref_fdct(Blk0);
		emms(); fdct(Blk); emms();
		store_stats(&Stats, Blk, Blk0);

		/* randomly saturated inputs */
		for(p=0; p<Partitions; ++p)
		{
			for(n=0; n<Loops; ++n)
			{
				for(i=0; i<64; ++i)
					Blk0[i] = Blk[i] = (ieee_rand(0,Partitions)>=p)? Max : Min;
				ref_fdct(Blk0);
				emms(); fdct(Blk); emms();
				store_stats(&Stats, Blk, Blk0);
			}
		}
		printf( "\n  -- FDCT saturation report --\n" );
		report_stats(&Stats, 0);


		/* IDCT tests // */
#if 0
		/* no finished yet */

		init_stats(&Stats);

/* test each computation channel separately */
		for(i=0; i<64; i++) Blk[i] = Blk0[i] = ((i/8)==(i%8)) ? IDCT_MAX : 0;
		ref_idct(Blk0);
		emms(); idct(Blk); emms();
		for(i=0; i<64; i++) { CLAMP(Blk0[i], IDCT_OUT); CLAMP(Blk[i], IDCT_OUT); }
		store_stats(&Stats, Blk, Blk0);

		for(i=0; i<64; i++) Blk[i] = Blk0[i] = ((i/8)==(i%8)) ? IDCT_MIN : 0;
		ref_idct(Blk0);
		emms(); idct(Blk); emms();
		for(i=0; i<64; i++) { CLAMP(Blk0[i], IDCT_OUT); CLAMP(Blk[i], IDCT_OUT); }
		store_stats(&Stats, Blk, Blk0);

		/* randomly saturated inputs */
		for(p=0; p<Partitions; ++p)
		{
			for(n=0; n<Loops; ++n)
			{
				for(i=0; i<64; ++i)
					Blk0[i] = Blk[i] = (ieee_rand(0,Partitions)>=p)? IDCT_MAX : IDCT_MIN;
				ref_idct(Blk0);
				emms(); idct(Blk); emms();
				for(i=0; i<64; i++) { CLAMP(Blk0[i],IDCT_OUT); CLAMP(Blk[i],IDCT_OUT); }
				store_stats(&Stats, Blk, Blk0);
			}
		}

		printf( "\n  -- IDCT saturation report --\n" );
		print_stats(&Stats);
		report_stats(&Stats, 0);
#endif
	}
}

/*********************************************************************
 * measure raw decoding speed
 *********************************************************************/

void test_dec(const char *name, int width, int height, int with_chksum)
{
	FILE *f = 0;
	void *dechandle = 0;
	int xerr;
	xvid_gbl_init_t xinit;
	xvid_dec_create_t xparam;
	xvid_dec_frame_t xframe;
	double t = 0.;
	int nb = 0;
	uint8_t *buf = 0;
	uint8_t *rgb_out = 0;
	int buf_size, pos;
	uint32_t chksum = 0;

	memset(&xinit, 0, sizeof(xinit));
	xinit.cpu_flags = XVID_CPU_MMX | XVID_CPU_FORCE;
	xinit.version = XVID_VERSION;
	xvid_global(NULL, 0, &xinit, NULL);

	memset(&xparam, 0, sizeof(xparam));
	xparam.width = width;
	xparam.height = height;
	xparam.version = XVID_VERSION;
	xerr = xvid_decore(NULL, XVID_DEC_CREATE, &xparam, NULL);
	if (xerr==XVID_ERR_FAIL) {
		printf("can't init decoder (err=%d)\n", xerr);
		return;
	}
	dechandle = xparam.handle;


	f = fopen(name, "rb");
	if (f==0) {
		printf( "can't open file '%s'\n", name);
		return;
	}
	fseek(f, 0, SEEK_END);
	buf_size = ftell(f);
	fseek(f, 0, SEEK_SET);
	if (buf_size<=0) {
		printf("error while stating file\n");
		goto End;
	}
	else printf( "Input size: %d\n", buf_size);

	buf = malloc(buf_size); /* should be enuf' */
	rgb_out = calloc(4, width*height);  /* <-room for _RGB24 */
	if (buf==0 || rgb_out==0) {
		printf( "malloc failed!\n" );
		goto End;
	}

	if (fread(buf, buf_size, 1, f)!=1) {
		printf( "file-read failed\n" );
		goto End;
	}

	nb = 0;
	pos = 0;
	t = -gettime_usec();
	while(1) {
		memset(&xframe, 0, sizeof(xframe));
		xframe.version = XVID_VERSION;
		xframe.bitstream = buf + pos;
		xframe.length = buf_size - pos;
		xframe.output.plane[0] = rgb_out;
		xframe.output.stride[0] = width;
		xframe.output.csp = XVID_CSP_BGR;
		xerr = xvid_decore(dechandle, XVID_DEC_DECODE, &xframe, 0);
		nb++;
		pos += xframe.length;
		if (with_chksum) {
			int k = width*height;
			uint32_t *ptr = (uint32_t *)rgb_out;
			while(k-->0) chksum += *ptr++;
		}
		if (pos==buf_size)
			break;
		if (xerr==XVID_ERR_FAIL) {
			printf("decoding failed for frame #%d (err=%d)!\n", nb, xerr);
			break;
		}
	}
	t += gettime_usec();
	if (t>0.)
		printf( "%d frames decoded in %.3f s -> %.1f FPS\n", nb, t*1.e-6f, (float)(nb*1.e6f/t) );
	if (with_chksum)
		printf("checksum: 0x%.8x\n", chksum);

 End:
	if (rgb_out!=0) free(rgb_out);
	if (buf!=0) free(buf);
	if (dechandle!=0) {
		xerr= xvid_decore(dechandle, XVID_DEC_DESTROY, NULL, NULL);
		if (xerr==XVID_ERR_FAIL)
			printf("destroy-decoder failed (err=%d)!\n", xerr);
	}
	if (f!=0) fclose(f);
}

/*********************************************************************
 * non-regression tests
 *********************************************************************/

void test_bugs1()
{
	CPU *cpu;

	printf( "\n =====  (de)quant4_intra saturation bug? =====\n" );

	for(cpu = cpu_list; cpu->name!=0; ++cpu)
	{
		int i;
		int16_t  Src[8*8], Dst[8*8];

		if (!init_cpu(cpu))
			continue;

		for(i=0; i<64; ++i) Src[i] = i-32;
		set_intra_matrix( get_default_intra_matrix() );
		dequant_mpeg_intra(Dst, Src, 31, 5);
		printf( "dequant_mpeg_intra with CPU=%s:  ", cpu->name);
		printf( "  Out[]= " );
		for(i=0; i<64; ++i) printf( "[%d]", Dst[i]);
		printf( "\n" );
	}

	printf( "\n =====  (de)quant4_inter saturation bug? =====\n" );

	for(cpu = cpu_list; cpu->name!=0; ++cpu)
	{
		int i;
		int16_t  Src[8*8], Dst[8*8];

		if (!init_cpu(cpu))
			continue;

		for(i=0; i<64; ++i) Src[i] = i-32;
		set_inter_matrix( get_default_inter_matrix() );
		dequant_mpeg_inter(Dst, Src, 31);
		printf( "dequant_mpeg_inter with CPU=%s:  ", cpu->name);
		printf( "  Out[]= " );
		for(i=0; i<64; ++i) printf( "[%d]", Dst[i]);
		printf( "\n" ); 
	}
}

void test_dct_precision_diffs()
{
	CPU *cpu;
	short Blk[8*8], Blk0[8*8];

	printf( "\n =====  fdct/idct precision diffs =====\n" );

	for(cpu = cpu_list; cpu->name!=0; ++cpu)
	{
		int i;

		if (!init_cpu(cpu))
			continue;

		for(i=0; i<8*8; ++i) {
			Blk0[i] = (i*7-i*i) & 0x7f;
			Blk[i] = Blk0[i];
		}

		fdct(Blk);
		idct(Blk);
		printf( " fdct+idct diffs with CPU=%s: \n", cpu->name );
		for(i=0; i<8; ++i) {
			int j;
			for(j=0; j<8; ++j) printf( " %d ", Blk[i*8+j]-Blk0[i*8+j]); 
			printf("\n"); 
		}
		printf("\n"); 
	}
}

void test_quant_bug()
{
	const int max_Q = 31;
	int i, n, qm, q;
	CPU *cpu;
	int16_t  Src[8*8], Dst[8*8];
	uint8_t Quant[8*8];
	CPU cpu_bug_list[] = { { "PLAINC", 0 }, { "MMX   ", XVID_CPU_MMX }, {0,0} };
	uint16_t Crcs_Inter[2][32];
	uint16_t Crcs_Intra[2][32];
	printf( "\n =====  test MPEG4-quantize bug =====\n" );

	for(i=0; i<64; ++i) Src[i] = 2048*(i-32)/32;

#if 1
	for(qm=1; qm<=255; ++qm)
	{
		for(i=0; i<8*8; ++i) Quant[i] = qm;
		set_inter_matrix( Quant );

		for(n=0, cpu = cpu_bug_list; cpu->name!=0; ++cpu, ++n)
		{
			uint16_t s;

			if (!init_cpu(cpu))
				continue;

			for(q=1; q<=max_Q; ++q) {
				emms();
				quant_mpeg_inter( Dst, Src, q );
				emms();
				for(s=0, i=0; i<64; ++i) s+=((uint16_t)Dst[i])^i;
				Crcs_Inter[n][q] = s;
			}
		}

		for(q=1; q<=max_Q; ++q)
			for(i=0; i<n-1; ++i)
				if (Crcs_Inter[i][q]!=Crcs_Inter[i+1][q])
					printf( "Discrepancy Inter: qm=%d, q=%d  -> %d/%d !\n",
							qm, q, Crcs_Inter[i][q], Crcs_Inter[i+1][q]);
	}
#endif

#if 1
	for(qm=1; qm<=255; ++qm)
	{
		for(i=0; i<8*8; ++i) Quant[i] = qm;
		set_intra_matrix( Quant );

		for(n=0, cpu = cpu_bug_list; cpu->name!=0; ++cpu, ++n)
		{
			uint16_t s;

			if (!init_cpu(cpu))
				continue;

			for(q=1; q<=max_Q; ++q) {
				emms();
				quant_mpeg_intra( Dst, Src, q, q);
				emms();
				for(s=0, i=0; i<64; ++i) s+=((uint16_t)Dst[i])^i;
				Crcs_Intra[n][q] = s;
			}
		}

		for(q=1; q<=max_Q; ++q)
			for(i=0; i<n-1; ++i)
				if (Crcs_Intra[i][q]!=Crcs_Intra[i+1][q])
					printf( "Discrepancy Intra: qm=%d, q=%d  -> %d/%d!\n",
							qm, q, Crcs_Inter[i][q], Crcs_Inter[i+1][q]);
	}
#endif
}

/*********************************************************************
 * main
 *********************************************************************/

int main(int argc, char *argv[])
{
	int what = 0;
	if (argc>1) what = atoi(argv[1]);
	if (what==0 || what==1) test_dct();
	if (what==0 || what==2) test_mb();
	if (what==0 || what==3) test_sad();
	if (what==0 || what==4) test_transfer();
	if (what==0 || what==5) test_quant();
	if (what==0 || what==6) test_cbp();

	if (what==7) {
		test_IEEE1180_compliance(-256, 255, 1);
		test_IEEE1180_compliance(-256, 255,-1);
		test_IEEE1180_compliance(  -5,   5, 1);
		test_IEEE1180_compliance(  -5,   5,-1);
		test_IEEE1180_compliance(-300, 300, 1);
		test_IEEE1180_compliance(-300, 300,-1);
	}
	if (what==8) test_dct_saturation(-256, 255);

	if (what==9) {
		int width, height;
		if (argc<5) {
			printf("usage: %s %d [bitstream] [width] [height]\n", argv[0], what);
			return 1;
		}
		width = atoi(argv[3]);
		height = atoi(argv[4]);
		test_dec(argv[2], width, height, (argc>5));
	}

	if (what==-1) {
		test_dct_precision_diffs();
		test_bugs1();
	}
	if (what==-2)
		test_quant_bug();

	return 0;
}

/*********************************************************************
 * 'Reference' output (except for timing) on an Athlon XP 2200+
 *********************************************************************/

/* as of 07/01/2002, there's a problem with MMX mpeg4-quantization */
/*

 ===== test fdct/idct =====
PLAINC -  1.446 usec       PSNR=13.291  MSE=3.000
MMX    -  -0.260 usec       PSNR=9.611  MSE=7.000
MMXEXT -  -0.293 usec       PSNR=9.611  MSE=7.000
3DNOW  -  1.535 usec       PSNR=13.291  MSE=3.000
3DNOWE -  0.376 usec       PSNR=13.291  MSE=3.000

 ===  test block motion ===
PLAINC - interp- h-round0 0.126 usec       iCrc=8107
PLAINC -           round1 0.136 usec       iCrc=8100
PLAINC - interp- v-round0 0.121 usec       iCrc=8108
PLAINC -           round1 0.127 usec       iCrc=8105
PLAINC - interp-hv-round0 0.192 usec       iCrc=8112
PLAINC -           round1 0.213 usec       iCrc=8103
 --- 
MMX    - interp- h-round0 0.048 usec       iCrc=8107
MMX    -           round1 0.048 usec       iCrc=8100
MMX    - interp- v-round0 0.046 usec       iCrc=8108
MMX    -           round1 0.047 usec       iCrc=8105
MMX    - interp-hv-round0 0.074 usec       iCrc=8112
MMX    -           round1 0.074 usec       iCrc=8103
 --- 
MMXEXT - interp- h-round0 0.019 usec       iCrc=8107
MMXEXT -           round1 0.025 usec       iCrc=8100
MMXEXT - interp- v-round0 0.015 usec       iCrc=8108
MMXEXT -           round1 0.024 usec       iCrc=8105
MMXEXT - interp-hv-round0 0.039 usec       iCrc=8112
MMXEXT -           round1 0.037 usec       iCrc=8103
 --- 
3DNOW  - interp- h-round0 0.019 usec       iCrc=8107
3DNOW  -           round1 0.023 usec       iCrc=8100
3DNOW  - interp- v-round0 0.015 usec       iCrc=8108
3DNOW  -           round1 0.024 usec       iCrc=8105
3DNOW  - interp-hv-round0 0.037 usec       iCrc=8112
3DNOW  -           round1 0.038 usec       iCrc=8103
 --- 
3DNOWE - interp- h-round0 0.022 usec       iCrc=8107
3DNOWE -           round1 0.023 usec       iCrc=8100
3DNOWE - interp- v-round0 0.016 usec       iCrc=8108
3DNOWE -           round1 0.021 usec       iCrc=8105
3DNOWE - interp-hv-round0 0.036 usec       iCrc=8112
3DNOWE -           round1 0.036 usec       iCrc=8103
 --- 

 ======  test SAD ======
PLAINC - sad8    0.165 usec       sad=3776
PLAINC - sad16   0.587 usec       sad=27214
PLAINC - sad16bi 1.290 usec       sad=26274
PLAINC - dev16   1.535 usec       sad=3344
 --- 
MMX    - sad8    0.036 usec       sad=3776
MMX    - sad16   0.113 usec       sad=27214
MMX    - sad16bi 0.250 usec       sad=26274
MMX    - dev16   0.187 usec       sad=3344
 --- 
MMXEXT - sad8    0.015 usec       sad=3776
MMXEXT - sad16   0.046 usec       sad=27214
MMXEXT - sad16bi 0.059 usec       sad=26274
MMXEXT - dev16   0.088 usec       sad=3344
 --- 
3DNOW  - sad8    0.165 usec       sad=3776
3DNOW  - sad16   0.589 usec       sad=27214
3DNOW  - sad16bi 0.119 usec       sad=26274
3DNOW  - dev16   1.541 usec       sad=3344
 --- 
3DNOWE - sad8    0.018 usec       sad=3776
3DNOWE - sad16   0.039 usec       sad=27214
3DNOWE - sad16bi 0.051 usec       sad=26274
3DNOWE - dev16   0.070 usec       sad=3344
 --- 

 ===  test transfer ===
PLAINC - 8to16     0.207 usec       crc=28288
PLAINC - 16to8     0.357 usec       crc=28288
PLAINC - 8to8      0.154 usec       crc=20352
PLAINC - 16to8add  0.391 usec       crc=25536
PLAINC - 8to16sub  0.562 usec       crc1=28064 crc2=16256
PLAINC - 8to16sub2 0.519 usec       crc=22368
 --- 
MMX    - 8to16     0.048 usec       crc=28288
MMX    - 16to8     0.205 usec       crc=28288
MMX    - 8to8      -0.158 usec       crc=20352
MMX    - 16to8add  0.015 usec       crc=25536
MMX    - 8to16sub  0.203 usec       crc1=28064 crc2=16256
MMX    - 8to16sub2 0.087 usec       crc=22368
 --- 
MMXEXT - 8to16     0.013 usec       crc=28288
MMXEXT - 16to8     0.011 usec       crc=28288
MMXEXT - 8to8      -0.023 usec       crc=20352
MMXEXT - 16to8add  0.023 usec       crc=25536
MMXEXT - 8to16sub  0.072 usec       crc1=28064 crc2=16256
MMXEXT - 8to16sub2 0.093 usec       crc=22368
 --- 
3DNOW  - 8to16     0.192 usec       crc=28288
3DNOW  - 16to8     0.367 usec       crc=28288
3DNOW  - 8to8      0.132 usec       crc=20352
3DNOW  - 16to8add  0.440 usec       crc=25536
3DNOW  - 8to16sub  0.557 usec       crc1=28064 crc2=16256
3DNOW  - 8to16sub2 0.691 usec       crc=22368
 --- 
3DNOWE - 8to16     0.004 usec       crc=28288
3DNOWE - 16to8     0.019 usec       crc=28288
3DNOWE - 8to8      -0.294 usec       crc=20352
3DNOWE - 16to8add  0.028 usec       crc=25536
3DNOWE - 8to16sub  0.065 usec       crc1=28064 crc2=16256
3DNOWE - 8to16sub2 0.027 usec       crc=22368
 --- 

 =====  test quant =====
PLAINC -   quant_mpeg_intra 67.757 usec       crc=29809
PLAINC -   quant_mpeg_inter 68.482 usec       crc=12574
PLAINC - dequant_mpeg_intra 20.764 usec       crc=24052
PLAINC - dequant_mpeg_inter 24.413 usec       crc=63847
PLAINC -   quant_h263_intra 16.446 usec       crc=25662
PLAINC -   quant_h263_inter 14.499 usec       crc=23972
PLAINC - dequant_h263_intra 16.419 usec       crc=49900
PLAINC - dequant_h263_inter 18.679 usec       crc=48899
 --- 
MMX    -   quant_mpeg_intra 8.299 usec       crc=3459
*** CRC ERROR! ***
MMX    -   quant_mpeg_inter 7.078 usec       crc=13247
*** CRC ERROR! ***
MMX    - dequant_mpeg_intra 3.455 usec       crc=24052
MMX    - dequant_mpeg_inter 4.483 usec       crc=63847
MMX    -   quant_h263_intra 2.597 usec       crc=25662
MMX    -   quant_h263_inter 2.151 usec       crc=23972
MMX    - dequant_h263_intra 2.684 usec       crc=49900
MMX    - dequant_h263_inter 2.647 usec       crc=48899
 --- 
MMXEXT -   quant_mpeg_intra 3.878 usec       crc=29809
MMXEXT -   quant_mpeg_inter 4.112 usec       crc=12574
MMXEXT - dequant_mpeg_intra 3.452 usec       crc=24052
MMXEXT - dequant_mpeg_inter 4.473 usec       crc=63847
MMXEXT -   quant_h263_intra 2.608 usec       crc=25662
MMXEXT -   quant_h263_inter 2.145 usec       crc=23972
MMXEXT - dequant_h263_intra 2.478 usec       crc=49900
MMXEXT - dequant_h263_inter 2.450 usec       crc=48899
 --- 
3DNOW  -   quant_mpeg_intra 66.051 usec       crc=29809
3DNOW  -   quant_mpeg_inter 73.494 usec       crc=12574
3DNOW  - dequant_mpeg_intra 20.374 usec       crc=24052
3DNOW  - dequant_mpeg_inter 23.645 usec       crc=63847
3DNOW  -   quant_h263_intra 16.292 usec       crc=25662
3DNOW  -   quant_h263_inter 14.322 usec       crc=23972
3DNOW  - dequant_h263_intra 16.613 usec       crc=49900
3DNOW  - dequant_h263_inter 18.382 usec       crc=48899
 --- 
3DNOWE -   quant_mpeg_intra 66.140 usec       crc=29809
3DNOWE -   quant_mpeg_inter 68.454 usec       crc=12574
3DNOWE - dequant_mpeg_intra 2.881 usec       crc=24052
3DNOWE - dequant_mpeg_inter 4.155 usec       crc=63847
3DNOWE -   quant_h263_intra 1.451 usec       crc=25662
3DNOWE -   quant_h263_inter 1.849 usec       crc=23972
3DNOWE - dequant_h263_intra 2.101 usec       crc=49900
3DNOWE - dequant_h263_inter 2.109 usec       crc=48899
 --- 

 =====  test cbp =====
PLAINC -   calc_cbp#1 0.090 usec       cbp=0x15
PLAINC -   calc_cbp#2 0.086 usec       cbp=0x38
PLAINC -   calc_cbp#3 0.087 usec       cbp=0xf
PLAINC -   calc_cbp#4 0.114 usec       cbp=0x5
 --- 
MMX    -   calc_cbp#1 0.061 usec       cbp=0x15
MMX    -   calc_cbp#2 0.063 usec       cbp=0x38
MMX    -   calc_cbp#3 0.061 usec       cbp=0xf
MMX    -   calc_cbp#4 0.060 usec       cbp=0x5
 --- 
MMXEXT -   calc_cbp#1 0.062 usec       cbp=0x15
MMXEXT -   calc_cbp#2 0.060 usec       cbp=0x38
MMXEXT -   calc_cbp#3 0.062 usec       cbp=0xf
MMXEXT -   calc_cbp#4 0.061 usec       cbp=0x5
 --- 
3DNOW  -   calc_cbp#1 0.089 usec       cbp=0x15
3DNOW  -   calc_cbp#2 0.087 usec       cbp=0x38
3DNOW  -   calc_cbp#3 0.087 usec       cbp=0xf
3DNOW  -   calc_cbp#4 0.116 usec       cbp=0x5
 --- 
3DNOWE -   calc_cbp#1 0.050 usec       cbp=0x15
3DNOWE -   calc_cbp#2 0.051 usec       cbp=0x38
3DNOWE -   calc_cbp#3 0.050 usec       cbp=0xf
3DNOWE -   calc_cbp#4 0.049 usec       cbp=0x5
 --- 

*/

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4