--- trunk/xvidcore/examples/xvid_bench.c 2002/07/10 13:23:29 279 +++ trunk/xvidcore/examples/xvid_bench.c 2005/08/05 20:49:23 1629 @@ -1,44 +1,49 @@ -/************************************************************************** +/***************************************************************************** * - * XVID MPEG-4 VIDEO CODEC - Unit tests and benches + * XVID MPEG-4 VIDEO CODEC + * - Unit tests and benches - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * Copyright(C) 2002 Pascal Massimino * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * - *************************************************************************/ + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * $Id: xvid_bench.c,v 1.23 2005-08-05 20:49:23 Skal Exp $ + * + ****************************************************************************/ -/************************************************************************ +/***************************************************************************** * * 'Reference' output is at the end of file. - * Don't take the checksums and crc too seriouly, they aren't - * bullet-proof (should plug some .md5 here)... * * compiles with something like: * gcc -o xvid_bench xvid_bench.c -I../src/ -lxvidcore -lm * - * History: - * - * 06.06.2002 initial coding -Skal- - * - *************************************************************************/ + ****************************************************************************/ #include #include -#include // for gettimeofday -#include // for memset +#include /* for memset */ #include +#ifndef WIN32 +#include /* for gettimeofday */ +#else +#include +#endif + + #include "xvid.h" // inner guts @@ -47,8 +52,7 @@ #include "image/colorspace.h" #include "image/interpolate8x8.h" #include "utils/mem_transfer.h" -#include "quant/quant_h263.h" -#include "quant/quant_mpeg4.h" +#include "quant/quant.h" #include "motion/sad.h" #include "utils/emms.h" #include "utils/timer.h" @@ -56,34 +60,47 @@ #include "bitstream/cbp.h" #include -const int speed_ref = 100; // on slow machines, decrease this value + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +int speed_ref = 100; /* on slow machines, decrease this value */ +int verbose = 0; +unsigned int cpu_mask; /********************************************************************* * misc *********************************************************************/ - /* returns time in micro-s*/ +/* returns time in micro-s*/ double gettime_usec() { - struct timeval tv; - gettimeofday(&tv, 0); - return tv.tv_sec*1.0e6 + tv.tv_usec; +#ifndef WIN32 + struct timeval tv; + gettimeofday(&tv, 0); + return tv.tv_sec*1.0e6 + tv.tv_usec; +#else + clock_t clk; + clk = clock(); + return clk * 1000. / CLOCKS_PER_SEC; /* clock() returns time in Milliseconds */ +#endif } - /* returns squared deviates (mean(v*v)-mean(v)^2) of a 8x8 block */ +/* returns squared deviates (mean(v*v)-mean(v)^2) of a 8x8 block */ double sqr_dev(uint8_t v[8*8]) { - double sum=0.; - double sum2=0.; - int n; - for (n=0;n<8*8;n++) - { - sum += v[n]; - sum2 += v[n]*v[n]; - } - sum2 /= n; - sum /= n; - return sum2-sum*sum; + double sum=0.; + double sum2=0.; + int n; + for (n=0;n<8*8;n++) + { + sum += v[n]; + sum2 += v[n]*v[n]; + } + sum2 /= n; + sum /= n; + return sum2-sum*sum; } /********************************************************************* @@ -91,49 +108,194 @@ *********************************************************************/ typedef struct { - const char *name; - unsigned int cpu; + const char *name; + unsigned int cpu; } CPU; -CPU cpu_list[] = -{ { "PLAINC", 0 } -, { "MMX ", XVID_CPU_MMX } -, { "MMXEXT", XVID_CPU_MMXEXT | XVID_CPU_MMX } -, { "SSE2 ", XVID_CPU_SSE2 | XVID_CPU_MMX } -, { "3DNOW ", XVID_CPU_3DNOW } -, { "3DNOWE", XVID_CPU_3DNOWEXT } -, { "IA64 ", XVID_CPU_IA64 } -//, { "TSC ", XVID_CPU_TSC } -, { 0, 0 } } - -, cpu_short_list[] = -{ { "PLAINC", 0 } -, { "MMX ", XVID_CPU_MMX } -//, { "MMXEXT", XVID_CPU_MMXEXT | XVID_CPU_MMX } -, { "IA64 ", XVID_CPU_IA64 } -, { 0, 0 } } - -, cpu_short_list2[] = -{ { "PLAINC", 0 } -, { "MMX ", XVID_CPU_MMX } -, { "SSE2 ", XVID_CPU_SSE2 | XVID_CPU_MMX } -, { 0, 0 } }; +CPU cpu_list[] = { + { "PLAINC ", 0 }, +#ifdef ARCH_IS_IA32 + { "MMX ", XVID_CPU_MMX }, + { "MMXEXT ", XVID_CPU_MMXEXT | XVID_CPU_MMX }, + { "SSE2 ", XVID_CPU_SSE2 | XVID_CPU_MMX }, + { "3DNOW ", XVID_CPU_3DNOW }, + { "3DNOWE ", XVID_CPU_3DNOW | XVID_CPU_3DNOWEXT }, +#endif +#ifdef ARCH_IS_PPC + { "ALTIVEC", XVID_CPU_ALTIVEC }, +#endif +#ifdef ARCH_IS_X86_64 + { "X86_64 ", XVID_CPU_ASM}, +#endif +#ifdef ARCH_IS_IA64 +// { "IA64 ", XVID_CPU_IA64 }, +#endif +// { "TSC ", XVID_CPU_TSC }, + { 0, 0 } +}; int init_cpu(CPU *cpu) { - int xerr, cpu_type; - XVID_INIT_PARAM xinit; + xvid_gbl_info_t xinfo; - cpu_type = check_cpu_features() & cpu->cpu; - xinit.cpu_flags = cpu_type | XVID_CPU_FORCE; - // xinit.cpu_flags = XVID_CPU_MMX | XVID_CPU_FORCE; - xerr = xvid_init(NULL, 0, &xinit, NULL); - if (cpu->cpu>0 && (cpu_type==0 || xerr!=XVID_ERR_OK)) { - printf( "%s - skipped...\n", cpu->name ); - return 0; - } - return 1; + /* Get the available CPU flags */ + memset(&xinfo, 0, sizeof(xinfo)); + xinfo.version = XVID_VERSION; + xvid_global(NULL, XVID_GBL_INFO, &xinfo, NULL); + + /* Are we trying to test a subset of the host CPU features */ + if ((xinfo.cpu_flags & cpu->cpu) == cpu->cpu) { + int xerr; + xvid_gbl_init_t xinit; + memset(&xinit, 0, sizeof(xinit)); + xinit.cpu_flags = cpu->cpu | XVID_CPU_FORCE; + xinit.version = XVID_VERSION; + xerr = xvid_global(NULL, XVID_GBL_INIT, &xinit, NULL); + if (xerr==XVID_ERR_FAIL) { + /* libxvidcore failed to init */ + return 0; + } + } else { + /* The host CPU doesn't support some required feature for this test */ + return(0); + } + return 1; +} + +#define CRC32_REMAINDER 0xCBF43926 +#define CRC32_INITIAL 0xffffffff + +#define DO1(c, crc) ((crc) = crc32tab[((unsigned int)((crc)>>24) ^ (*c++)) & 0xff] ^ ((crc) << 8)) +#define DO2(c, crc) DO1(c, crc); DO1(c, crc); +#define DO4(c, crc) DO2(c, crc); DO2(c, crc); +#define DO8(c, crc) DO4(c, crc); DO4(c, crc); + +/****************************************************************************** +* Precomputed AAL5 CRC32 lookup table +******************************************************************************/ + +static unsigned long crc32tab[256] = { + + 0x00000000L, 0x04C11DB7L, 0x09823B6EL, 0x0D4326D9L, + 0x130476DCL, 0x17C56B6BL, 0x1A864DB2L, 0x1E475005L, + 0x2608EDB8L, 0x22C9F00FL, 0x2F8AD6D6L, 0x2B4BCB61L, + 0x350C9B64L, 0x31CD86D3L, 0x3C8EA00AL, 0x384FBDBDL, + 0x4C11DB70L, 0x48D0C6C7L, 0x4593E01EL, 0x4152FDA9L, + 0x5F15ADACL, 0x5BD4B01BL, 0x569796C2L, 0x52568B75L, + 0x6A1936C8L, 0x6ED82B7FL, 0x639B0DA6L, 0x675A1011L, + 0x791D4014L, 0x7DDC5DA3L, 0x709F7B7AL, 0x745E66CDL, + 0x9823B6E0L, 0x9CE2AB57L, 0x91A18D8EL, 0x95609039L, + 0x8B27C03CL, 0x8FE6DD8BL, 0x82A5FB52L, 0x8664E6E5L, + 0xBE2B5B58L, 0xBAEA46EFL, 0xB7A96036L, 0xB3687D81L, + 0xAD2F2D84L, 0xA9EE3033L, 0xA4AD16EAL, 0xA06C0B5DL, + 0xD4326D90L, 0xD0F37027L, 0xDDB056FEL, 0xD9714B49L, + 0xC7361B4CL, 0xC3F706FBL, 0xCEB42022L, 0xCA753D95L, + 0xF23A8028L, 0xF6FB9D9FL, 0xFBB8BB46L, 0xFF79A6F1L, + 0xE13EF6F4L, 0xE5FFEB43L, 0xE8BCCD9AL, 0xEC7DD02DL, + 0x34867077L, 0x30476DC0L, 0x3D044B19L, 0x39C556AEL, + 0x278206ABL, 0x23431B1CL, 0x2E003DC5L, 0x2AC12072L, + 0x128E9DCFL, 0x164F8078L, 0x1B0CA6A1L, 0x1FCDBB16L, + 0x018AEB13L, 0x054BF6A4L, 0x0808D07DL, 0x0CC9CDCAL, + 0x7897AB07L, 0x7C56B6B0L, 0x71159069L, 0x75D48DDEL, + 0x6B93DDDBL, 0x6F52C06CL, 0x6211E6B5L, 0x66D0FB02L, + 0x5E9F46BFL, 0x5A5E5B08L, 0x571D7DD1L, 0x53DC6066L, + 0x4D9B3063L, 0x495A2DD4L, 0x44190B0DL, 0x40D816BAL, + 0xACA5C697L, 0xA864DB20L, 0xA527FDF9L, 0xA1E6E04EL, + 0xBFA1B04BL, 0xBB60ADFCL, 0xB6238B25L, 0xB2E29692L, + 0x8AAD2B2FL, 0x8E6C3698L, 0x832F1041L, 0x87EE0DF6L, + 0x99A95DF3L, 0x9D684044L, 0x902B669DL, 0x94EA7B2AL, + 0xE0B41DE7L, 0xE4750050L, 0xE9362689L, 0xEDF73B3EL, + 0xF3B06B3BL, 0xF771768CL, 0xFA325055L, 0xFEF34DE2L, + 0xC6BCF05FL, 0xC27DEDE8L, 0xCF3ECB31L, 0xCBFFD686L, + 0xD5B88683L, 0xD1799B34L, 0xDC3ABDEDL, 0xD8FBA05AL, + 0x690CE0EEL, 0x6DCDFD59L, 0x608EDB80L, 0x644FC637L, + 0x7A089632L, 0x7EC98B85L, 0x738AAD5CL, 0x774BB0EBL, + 0x4F040D56L, 0x4BC510E1L, 0x46863638L, 0x42472B8FL, + 0x5C007B8AL, 0x58C1663DL, 0x558240E4L, 0x51435D53L, + 0x251D3B9EL, 0x21DC2629L, 0x2C9F00F0L, 0x285E1D47L, + 0x36194D42L, 0x32D850F5L, 0x3F9B762CL, 0x3B5A6B9BL, + 0x0315D626L, 0x07D4CB91L, 0x0A97ED48L, 0x0E56F0FFL, + 0x1011A0FAL, 0x14D0BD4DL, 0x19939B94L, 0x1D528623L, + 0xF12F560EL, 0xF5EE4BB9L, 0xF8AD6D60L, 0xFC6C70D7L, + 0xE22B20D2L, 0xE6EA3D65L, 0xEBA91BBCL, 0xEF68060BL, + 0xD727BBB6L, 0xD3E6A601L, 0xDEA580D8L, 0xDA649D6FL, + 0xC423CD6AL, 0xC0E2D0DDL, 0xCDA1F604L, 0xC960EBB3L, + 0xBD3E8D7EL, 0xB9FF90C9L, 0xB4BCB610L, 0xB07DABA7L, + 0xAE3AFBA2L, 0xAAFBE615L, 0xA7B8C0CCL, 0xA379DD7BL, + 0x9B3660C6L, 0x9FF77D71L, 0x92B45BA8L, 0x9675461FL, + 0x8832161AL, 0x8CF30BADL, 0x81B02D74L, 0x857130C3L, + 0x5D8A9099L, 0x594B8D2EL, 0x5408ABF7L, 0x50C9B640L, + 0x4E8EE645L, 0x4A4FFBF2L, 0x470CDD2BL, 0x43CDC09CL, + 0x7B827D21L, 0x7F436096L, 0x7200464FL, 0x76C15BF8L, + 0x68860BFDL, 0x6C47164AL, 0x61043093L, 0x65C52D24L, + 0x119B4BE9L, 0x155A565EL, 0x18197087L, 0x1CD86D30L, + 0x029F3D35L, 0x065E2082L, 0x0B1D065BL, 0x0FDC1BECL, + 0x3793A651L, 0x3352BBE6L, 0x3E119D3FL, 0x3AD08088L, + 0x2497D08DL, 0x2056CD3AL, 0x2D15EBE3L, 0x29D4F654L, + 0xC5A92679L, 0xC1683BCEL, 0xCC2B1D17L, 0xC8EA00A0L, + 0xD6AD50A5L, 0xD26C4D12L, 0xDF2F6BCBL, 0xDBEE767CL, + 0xE3A1CBC1L, 0xE760D676L, 0xEA23F0AFL, 0xEEE2ED18L, + 0xF0A5BD1DL, 0xF464A0AAL, 0xF9278673L, 0xFDE69BC4L, + 0x89B8FD09L, 0x8D79E0BEL, 0x803AC667L, 0x84FBDBD0L, + 0x9ABC8BD5L, 0x9E7D9662L, 0x933EB0BBL, 0x97FFAD0CL, + 0xAFB010B1L, 0xAB710D06L, 0xA6322BDFL, 0xA2F33668L, + 0xBCB4666DL, 0xB8757BDAL, 0xB5365D03L, 0xB1F740B4L + +}; + +uint32_t +calc_crc(uint8_t *mem, int len, uint32_t crc) +{ + while( len >= 8) { + DO8(mem, crc); + len -= 8; + } + + while( len ) { + DO1(mem, crc); + len--; + } + + return crc; +} + +void byte_swap(uint8_t *mem, int len, int element_size) { +#ifdef ARCH_IS_BIG_ENDIAN + int i; + + if(element_size == 1) { + /* No need to swap */ + } else if(element_size == 2) { + uint8_t temp[2]; + + for(i=0; i < (len/2); i++ ) { + temp[0] = mem[0]; + temp[1] = mem[1]; + mem[0] = temp[1]; + mem[1] = temp[0]; + + mem += 2; + } + } else if(element_size == 4) { + uint8_t temp[4]; + + for(i=0; i < (len/4); i++ ) { + temp[0] = mem[0]; + temp[1] = mem[1]; + temp[2] = mem[2]; + temp[3] = mem[3]; + mem[0] = temp[3]; + mem[1] = temp[2]; + mem[2] = temp[1]; + mem[3] = temp[0]; + + mem += 4; + } + } else { + printf("ERROR: byte_swap unsupported element_size(%u)\n", element_size); + } +#endif } /********************************************************************* @@ -144,52 +306,54 @@ void test_dct() { - const int nb_tests = 300*speed_ref; - int tst; - CPU *cpu; - int i; - short iDst0[8*8], iDst[8*8], fDst[8*8]; - double overhead; - - printf( "\n ===== test fdct/idct =====\n" ); - - for(i=0; i<8*8; ++i) iDst0[i] = (i*7-i*i) & 0x7f; - overhead = gettime_usec(); - for(tst=0; tstname!=0; ++cpu) - { - double t, PSNR, MSE; - - if (!init_cpu(cpu)) - continue; + const int nb_tests = 300*speed_ref; + int tst; + CPU *cpu; + int i; + DECLARE_ALIGNED_MATRIX(iDst0, 8, 8, short, 16); + DECLARE_ALIGNED_MATRIX(iDst, 8, 8, short, 16); + DECLARE_ALIGNED_MATRIX(fDst, 8, 8, short, 16); + double overhead; + + printf( "\n ===== test fdct/idct =====\n" ); + + for(i=0; i<8*8; ++i) iDst0[i] = (i*7-i*i) & 0x7f; + overhead = gettime_usec(); + for(tst=0; tstname, t, PSNR, MSE ); - if (ABS(MSE)>=64) printf( "*** CRC ERROR! ***\n" ); - } + for(cpu = cpu_list; cpu->name!=0; ++cpu) + { + double t, PSNR, MSE; + + if (!init_cpu(cpu)) + continue; + + t = gettime_usec(); + emms(); + for(tst=0; tstname, t, PSNR, MSE, + (ABS(MSE)>=64)? "| ERROR" :""); + } } /********************************************************************* @@ -198,60 +362,66 @@ void test_sad() { - const int nb_tests = 2000*speed_ref; - int tst; - CPU *cpu; - int i; - uint8_t Cur[16*16], Ref1[16*16], Ref2[16*16]; - - printf( "\n ====== test SAD ======\n" ); - for(i=0; i<16*16;++i) { - Cur[i] = (i/5) ^ 0x05; - Ref1[i] = (i + 0x0b) & 0xff; - Ref2[i] = i ^ 0x76; - } - - for(cpu = cpu_list; cpu->name!=0; ++cpu) - { - double t; - uint32_t s; - if (!init_cpu(cpu)) - continue; + const int nb_tests = 2000*speed_ref; + int tst; + CPU *cpu; + int i; + DECLARE_ALIGNED_MATRIX(Cur, 16, 16, uint8_t, 16); + DECLARE_ALIGNED_MATRIX(Ref1, 16, 16, uint8_t, 16); + DECLARE_ALIGNED_MATRIX(Ref2, 16, 16, uint8_t, 16); + + printf( "\n ====== test SAD ======\n" ); + for(i=0; i<16*16;++i) { + Cur[i] = (i/5) ^ 0x05; + Ref1[i] = (i + 0x0b) & 0xff; + Ref2[i] = i ^ 0x76; + } - t = gettime_usec(); - emms(); - for(tst=0; tstname, t, s ); - if (s!=3776) printf( "*** CRC ERROR! ***\n" ); - - t = gettime_usec(); - emms(); - for(tst=0; tstname, t, s ); - if (s!=27214) printf( "*** CRC ERROR! ***\n" ); - - t = gettime_usec(); - emms(); - for(tst=0; tstname, t, s ); - if (s!=26274) printf( "*** CRC ERROR! ***\n" ); - - t = gettime_usec(); - emms(); - for(tst=0; tstname, t, s ); - if (s!=3344) printf( "*** CRC ERROR! ***\n" ); + for(cpu = cpu_list; cpu->name!=0; ++cpu) + { + double t; + uint32_t s; + if (!init_cpu(cpu)) + continue; + + t = gettime_usec(); + emms(); + for(tst=0; tstname, t, s, + (s!=3776)?"| ERROR": "" ); + + t = gettime_usec(); + emms(); + for(tst=0; tstname, t, s, + (s!=27214)?"| ERROR": "" ); + + t = gettime_usec(); + emms(); + for(tst=0; tstname, t, s, + (s!=26274)?"| ERROR": "" ); + + t = gettime_usec(); + emms(); + for(tst=0; tstname, t, s, + (s!=3344)?"| ERROR": "" ); - printf( " --- \n" ); - } + printf( " --- \n" ); + } } /********************************************************************* @@ -259,91 +429,97 @@ *********************************************************************/ #define ENTER \ - for(i=0; i<16*8; ++i) Dst[i] = 0; \ - t = gettime_usec(); \ - emms(); +for(i=0; i<16*8; ++i) Dst[i] = 0; \ +t = gettime_usec(); \ +emms(); #define LEAVE \ - emms(); \ - t = (gettime_usec() - t) / nb_tests; \ - iCrc = 0; \ - for(i=0; i<16*8; ++i) { iCrc += Dst[i]^i; } +emms(); \ +t = (gettime_usec() - t) / nb_tests; \ + iCrc = calc_crc((uint8_t*)Dst, sizeof(Dst), CRC32_INITIAL) #define TEST_MB(FUNC, R) \ - ENTER \ - for(tst=0; tstname!=0; ++cpu) - { - double t; - int tst, i, iCrc; + const int nb_tests = 2000*speed_ref; + CPU *cpu; + const uint8_t Src0[16*9] = { + /* try to have every possible combinaison of rounding... */ + 0, 0, 1, 0, 2, 0, 3, 0, 4 ,0,0,0, 0,0,0,0, + 0, 1, 1, 1, 2, 1, 3, 1, 3 ,0,0,0, 0,0,0,0, + 0, 2, 1, 2, 2, 2, 3, 2, 2 ,0,0,0, 0,0,0,0, + 0, 3, 1, 3, 2, 3, 3, 3, 1 ,0,0,0, 0,0,0,0, + 1, 3, 0, 2, 1, 0, 2, 3, 4 ,0,0,0, 0,0,0,0, + 2, 2, 1, 2, 0, 1, 3, 5, 3 ,0,0,0, 0,0,0,0, + 3, 1, 2, 3, 1, 2, 2, 6, 2 ,0,0,0, 0,0,0,0, + 1, 0, 1, 3, 0, 3, 1, 6, 1 ,0,0,0, 0,0,0,0, + 4, 3, 2, 1, 2, 3, 4, 0, 3 ,0,0,0, 0,0,0,0 + }; + uint8_t Dst[16*8] = {0}; + + printf( "\n === test block motion ===\n" ); + + for(cpu = cpu_list; cpu->name!=0; ++cpu) + { + double t; + int tst, i, iCrc; + + if (!init_cpu(cpu)) + continue; + + TEST_MB(interpolate8x8_halfpel_h, 0); + printf("%s - interp- h-round0 %.3f usec crc32=0x%08x %s\n", + cpu->name, t, iCrc, + (iCrc!=0x115381ba)?"| ERROR": "" ); + + TEST_MB(interpolate8x8_halfpel_h, 1); + printf("%s - round1 %.3f usec crc32=0x%08x %s\n", + cpu->name, t, iCrc, + (iCrc!=0x2b1f528f)?"| ERROR": "" ); + + + TEST_MB(interpolate8x8_halfpel_v, 0); + printf("%s - interp- v-round0 %.3f usec crc32=0x%08x %s\n", + cpu->name, t, iCrc, + (iCrc!=0x423cdcc7)?"| ERROR": "" ); + + TEST_MB(interpolate8x8_halfpel_v, 1); + printf("%s - round1 %.3f usec crc32=0x%08x %s\n", + cpu->name, t, iCrc, + (iCrc!=0x42202efe)?"| ERROR": "" ); + + + TEST_MB(interpolate8x8_halfpel_hv, 0); + printf("%s - interp-hv-round0 %.3f usec crc32=0x%08x %s\n", + cpu->name, t, iCrc, + (iCrc!=0xd198d387)?"| ERROR": "" ); + + TEST_MB(interpolate8x8_halfpel_hv, 1); + printf("%s - round1 %.3f usec crc32=0x%08x %s\n", + cpu->name, t, iCrc, + (iCrc!=0x9ecfd921)?"| ERROR": "" ); - if (!init_cpu(cpu)) - continue; - - TEST_MB(interpolate8x8_halfpel_h, 0); - printf( "%s - interp- h-round0 %.3f usec iCrc=%d\n", cpu->name, t, iCrc ); - if (iCrc!=8107) printf( "*** CRC ERROR! ***\n" ); - - TEST_MB(interpolate8x8_halfpel_h, 1); - printf( "%s - round1 %.3f usec iCrc=%d\n", cpu->name, t, iCrc ); - if (iCrc!=8100) printf( "*** CRC ERROR! ***\n" ); - - - TEST_MB(interpolate8x8_halfpel_v, 0); - printf( "%s - interp- v-round0 %.3f usec iCrc=%d\n", cpu->name, t, iCrc ); - if (iCrc!=8108) printf( "*** CRC ERROR! ***\n" ); - - TEST_MB(interpolate8x8_halfpel_v, 1); - printf( "%s - round1 %.3f usec iCrc=%d\n", cpu->name, t, iCrc ); - if (iCrc!=8105) printf( "*** CRC ERROR! ***\n" ); - - - TEST_MB(interpolate8x8_halfpel_hv, 0); - printf( "%s - interp-hv-round0 %.3f usec iCrc=%d\n", cpu->name, t, iCrc ); - if (iCrc!=8112) printf( "*** CRC ERROR! ***\n" ); - TEST_MB(interpolate8x8_halfpel_hv, 1); - printf( "%s - round1 %.3f usec iCrc=%d\n", cpu->name, t, iCrc ); - if (iCrc!=8103) printf( "*** CRC ERROR! ***\n" ); - - - // this is a new function, as of 06.06.2002 + /* this is a new function, as of 06.06.2002 */ #if 0 - TEST_MB2(interpolate8x8_avrg); - printf( "%s - interpolate8x8_c %.3f usec iCrc=%d\n", cpu->name, t, iCrc ); - if (iCrc!=8107) printf( "*** CRC ERROR! ***\n" ); + TEST_MB2(interpolate8x8_avrg); + printf("%s - interpolate8x8_c %.3f usec crc32=0x%08x %s\n", + cpu->name, t, iCrc, + (iCrc!=8107)?"| ERROR": "" ); #endif - printf( " --- \n" ); - } + printf( " --- \n" ); + } } /********************************************************************* @@ -351,117 +527,132 @@ *********************************************************************/ #define INIT_TRANSFER \ - for(i=0; i<8*32; ++i) { \ - Src8[i] = i; Src16[i] = i; \ - Dst8[i] = 0; Dst16[i] = 0; \ - Ref1[i] = i^0x27; \ - Ref2[i] = i^0x51; \ - } +for(i=0; i<8*32; ++i) { \ +Src8[i] = i; Src16[i] = i; \ +Dst8[i] = 0; Dst16[i] = 0; \ +Ref1[i] = i^0x27; \ +Ref2[i] = i^0x51; \ +} #define TEST_TRANSFER_BEGIN(DST) \ - INIT_TRANSFER \ - overhead = -gettime_usec(); \ - for(tst=0; tstname!=0; ++cpu) - { - double t, overhead; - int tst, s; - - if (!init_cpu(cpu)) - continue; + const int nb_tests = 4000*speed_ref; + int i; + CPU *cpu; +// uint8_t Src8[8*32], Dst8[8*32], Ref1[8*32], Ref2[8*32]; +// int16_t Src16[8*32], Dst16[8*32]; + DECLARE_ALIGNED_MATRIX(Src8, 8, 32, uint8_t, CACHE_LINE); + DECLARE_ALIGNED_MATRIX(Dst8, 8, 32, uint8_t, CACHE_LINE); + DECLARE_ALIGNED_MATRIX(Ref1, 8, 32, uint8_t, CACHE_LINE); + DECLARE_ALIGNED_MATRIX(Ref2, 8, 32, uint8_t, CACHE_LINE); + DECLARE_ALIGNED_MATRIX(Src16, 8, 32, uint16_t, CACHE_LINE); + DECLARE_ALIGNED_MATRIX(Dst16, 8, 32, uint16_t, CACHE_LINE); + + printf( "\n === test transfer ===\n" ); + + for(cpu = cpu_list; cpu->name!=0; ++cpu) + { + double t, overhead; + int tst, s; + + if (!init_cpu(cpu)) + continue; + + TEST_TRANSFER(transfer_8to16copy, Dst16, Src8); + printf("%s - 8to16 %.3f usec crc32=0x%08x %s\n", + cpu->name, t, s, + (s!=0x115814bb)?"| ERROR": ""); + + TEST_TRANSFER(transfer_16to8copy, Dst8, Src16); + printf( "%s - 16to8 %.3f usec crc32=0x%08x %s\n", + cpu->name, t, s, + (s!=0xee7ccbb4)?"| ERROR": ""); + + TEST_TRANSFER(transfer8x8_copy, Dst8, Src8); + printf("%s - 8to8 %.3f usec crc32=0x%08x %s\n", + cpu->name, t, s, + (s!=0xd37b3295)?"| ERROR": ""); + + TEST_TRANSFER(transfer_16to8add, Dst8, Src16); + printf("%s - 16to8add %.3f usec crc32=0x%08x %s\n", + cpu->name, t, s, + (s!=0xdd817bf4)?"| ERROR": "" ); + + TEST_TRANSFER2(transfer_8to16sub, Dst16, Src8, Ref1); + { + int s1, s2; + s1 = calc_crc((uint8_t*)Dst16, 8*32*sizeof(Dst16[0]), CRC32_INITIAL); + s2 = calc_crc((uint8_t*)Src8, 8*32*sizeof(Src8[0]), CRC32_INITIAL); + printf("%s - 8to16sub %.3f usec crc32(1)=0x%08x crc32(2)=0x%08x %s %s\n", + cpu->name, t, s1, s2, + (s1!=0xa1e07163)?"| ERROR1": "", + (s2!=0xd86c5d23)?"| ERROR2": "" ); + } + + TEST_TRANSFER3(transfer_8to16sub2, Dst16, Src8, Ref1, Ref2); + printf("%s - 8to16sub2 %.3f usec crc32=0x%08x %s\n", + cpu->name, t, s, + (s!=0x99b6c4c7)?"| ERROR": "" ); - TEST_TRANSFER(transfer_8to16copy, Dst16, Src8); - printf( "%s - 8to16 %.3f usec crc=%d\n", cpu->name, t, s ); - if (s!=28288) printf( "*** CRC ERROR! ***\n" ); - - TEST_TRANSFER(transfer_16to8copy, Dst8, Src16); - printf( "%s - 16to8 %.3f usec crc=%d\n", cpu->name, t, s ); - if (s!=28288) printf( "*** CRC ERROR! ***\n" ); - - TEST_TRANSFER(transfer8x8_copy, Dst8, Src8); - printf( "%s - 8to8 %.3f usec crc=%d\n", cpu->name, t, s ); - if (s!=20352) printf( "*** CRC ERROR! ***\n" ); - - TEST_TRANSFER(transfer_16to8add, Dst8, Src16); - printf( "%s - 16to8add %.3f usec crc=%d\n", cpu->name, t, s ); - if (s!=25536) printf( "*** CRC ERROR! ***\n" ); - - TEST_TRANSFER2(transfer_8to16sub, Dst16, Src8, Ref1); - printf( "%s - 8to16sub %.3f usec crc1=%d ", cpu->name, t, s ); - if (s!=28064) printf( "*** CRC ERROR! ***\n" ); - s = 0; for(i=0; i<8*32; ++i) { s += (Src8[i]-Ref1[i])&i; } - printf( "crc2=%d\n", s); - if (s!=16256) printf( "*** CRC ERROR! ***\n" ); -#if 1 - TEST_TRANSFER3(transfer_8to16sub2, Dst16, Src8, Ref1, Ref2); - printf( "%s - 8to16sub2 %.3f usec crc=%d\n", cpu->name, t, s ); - if (s!=20384) printf( "*** CRC ERROR! ***\n" ); -// for(i=0; i<64; ++i) printf( "[%d]", Dst16[i]); -// printf("\n"); -#endif - printf( " --- \n" ); - } + printf( " --- \n" ); + } } /********************************************************************* @@ -469,164 +660,331 @@ *********************************************************************/ #define TEST_QUANT(FUNC, DST, SRC) \ - t = gettime_usec(); \ - for(s=0,qm=1; qm<=255; ++qm) { \ - for(i=0; i<8*8; ++i) Quant[i] = qm; \ - set_inter_matrix( Quant ); \ - emms(); \ - for(q=1; q<=max_Q; ++q) { \ - for(tst=0; tst>16) +t = gettime_usec(); \ +for(s=CRC32_INITIAL,qm=1; qm<=255; ++qm) { \ + for(i=0; i<8*8; ++i) Quant[i] = qm; \ + set_inter_matrix( mpeg_quant_matrices, Quant ); \ + emms(); \ + for(q=1; q<=max_Q; ++q) { \ + for(tst=0; tst>16) +t = gettime_usec(); \ +for(s=CRC32_INITIAL,qm=1; qm<=255; ++qm) { \ + for(i=0; i<8*8; ++i) Quant[i] = qm; \ + set_intra_matrix( mpeg_quant_matrices, Quant ); \ + emms(); \ + for(q=1; q<=max_Q; ++q) { \ + for(tst=0; tstname!=0; ++cpu) - { - double t, overhead; - int tst, q; - uint32_t s; + for(cpu = cpu_list; cpu->name!=0; ++cpu) + { + double t, overhead; + int tst, q; + uint32_t s; + + if (!init_cpu(cpu)) + continue; + + // exhaustive tests to compare against the (ref) C-version + TEST_INTRA(quant_h263_intra_c, quant_h263_intra, 2048); + TEST_INTRA(dequant_h263_intra_c, dequant_h263_intra , 512 ); + TEST_INTER(quant_h263_inter_c, quant_h263_inter , 2048); + TEST_INTER(dequant_h263_inter_c, dequant_h263_inter , 512 ); + + overhead = -gettime_usec(); + for(s=0,qm=1; qm<=255; ++qm) { + for(i=0; i<8*8; ++i) Quant[i] = qm; + set_inter_matrix(mpeg_quant_matrices, Quant ); + for(q=1; q<=max_Q; ++q) + for(i=0; i<64; ++i) s+=Dst[i]^i^qm; + } + overhead += gettime_usec(); + + TEST_QUANT2(quant_mpeg_intra, Dst, Src); + printf("%s - quant_mpeg_intra %.3f usec crc32=0x%08x %s\n", + cpu->name, t, s, + (s!=0xfd6a21a4)? "| ERROR": ""); + + TEST_QUANT(quant_mpeg_inter, Dst, Src); + printf("%s - quant_mpeg_inter %.3f usec crc32=0x%08x %s\n", + cpu->name, t, s, + (s!=0xf6de7757)?"| ERROR": ""); + + TEST_QUANT2(dequant_mpeg_intra, Dst, Src); + printf("%s - dequant_mpeg_intra %.3f usec crc32=0x%08x %s\n", + cpu->name, t, s, + (s!=0x2def7bc7)?"| ERROR": ""); + + TEST_QUANT(dequant_mpeg_inter, Dst, Src); + printf("%s - dequant_mpeg_inter %.3f usec crc32=0x%08x %s\n", + cpu->name, t, s, + (s!=0xd878c722)?"| ERROR": ""); + + TEST_QUANT2(quant_h263_intra, Dst, Src); + printf("%s - quant_h263_intra %.3f usec crc32=0x%08x %s\n", + cpu->name, t, s, + (s!=0x2eba9d43)?"| ERROR": ""); + + TEST_QUANT(quant_h263_inter, Dst, Src); + printf("%s - quant_h263_inter %.3f usec crc32=0x%08x %s\n", + cpu->name, t, s, + (s!=0xbd315a7e)?"| ERROR": ""); + + TEST_QUANT2(dequant_h263_intra, Dst, Src); + printf("%s - dequant_h263_intra %.3f usec crc32=0x%08x %s\n", + cpu->name, t, s, + (s!=0x9841212a)?"| ERROR": ""); + + TEST_QUANT(dequant_h263_inter, Dst, Src); + printf("%s - dequant_h263_inter %.3f usec crc32=0x%08x %s\n", + cpu->name, t, s, + (s!=0xe7df8fba)?"| ERROR": ""); - if (!init_cpu(cpu)) - continue; + printf( " --- \n" ); + } +} - overhead = -gettime_usec(); - for(s=0,qm=1; qm<=255; ++qm) { - for(i=0; i<8*8; ++i) Quant[i] = qm; - set_inter_matrix( Quant ); - for(q=1; q<=max_Q; ++q) - for(i=0; i<64; ++i) s+=Dst[i]^i^qm; - } - overhead += gettime_usec(); +/********************************************************************* + * test distortion operators + *********************************************************************/ -#if 1 - TEST_QUANT2(quant4_intra, Dst, Src); - printf( "%s - quant4_intra %.3f usec crc=%d\n", cpu->name, t, s ); - if (s!=29809) printf( "*** CRC ERROR! ***\n" ); - - TEST_QUANT(quant4_inter, Dst, Src); - printf( "%s - quant4_inter %.3f usec crc=%d\n", cpu->name, t, s ); - if (s!=12574) printf( "*** CRC ERROR! ***\n" ); -#endif -#if 1 - TEST_QUANT2(dequant4_intra, Dst, Src); - printf( "%s - dequant4_intra %.3f usec crc=%d\n", cpu->name, t, s ); - if (s!=24052) printf( "*** CRC ERROR! ***\n" ); - - TEST_QUANT(dequant4_inter, Dst, Src); - printf( "%s - dequant4_inter %.3f usec crc=%d\n", cpu->name, t, s ); - if (s!=63847) printf( "*** CRC ERROR! ***\n" ); -#endif -#if 1 - TEST_QUANT2(quant_intra, Dst, Src); - printf( "%s - quant_intra %.3f usec crc=%d\n", cpu->name, t, s ); - if (s!=25662) printf( "*** CRC ERROR! ***\n" ); - - TEST_QUANT(quant_inter, Dst, Src); - printf( "%s - quant_inter %.3f usec crc=%d\n", cpu->name, t, s ); - if (s!=23972) printf( "*** CRC ERROR! ***\n" ); -#endif -#if 1 - TEST_QUANT2(dequant_intra, Dst, Src); - printf( "%s - dequant_intra %.3f usec crc=%d\n", cpu->name, t, s ); - if (s!=49900) printf( "*** CRC ERROR! ***\n" ); - - TEST_QUANT(dequant_inter, Dst, Src); - printf( "%s - dequant_inter %.3f usec crc=%d\n", cpu->name, t, s ); - if (s!=48899) printf( "*** CRC ERROR! ***\n" ); -#endif - printf( " --- \n" ); - } +static void ieee_reseed(long s); +static long ieee_rand(int Min, int Max); + +#define TEST_SSE(FUNCTION, SRC1, SRC2, STRIDE) \ + do { \ + t = gettime_usec(); \ + tst = nb_tests; \ + while((tst--)>0) sse = (FUNCTION)((SRC1), (SRC2), (STRIDE)); \ + emms(); \ + t = (gettime_usec() - t)/(double)nb_tests; \ + } while(0) + + +void test_sse() +{ + const int nb_tests = 100000*speed_ref; + int i; + CPU *cpu; + DECLARE_ALIGNED_MATRIX(Src1, 8, 8, int16_t, 16); + DECLARE_ALIGNED_MATRIX(Src2, 8, 8, int16_t, 16); + DECLARE_ALIGNED_MATRIX(Src3, 8, 8, int16_t, 16); + DECLARE_ALIGNED_MATRIX(Src4, 8, 8, int16_t, 16); + + printf( "\n ===== test sse =====\n" ); + + ieee_reseed(1); + for(i=0; i<64; ++i) { + Src1[i] = ieee_rand(-2048, 2047); + Src2[i] = ieee_rand(-2048, 2047); + Src3[i] = ieee_rand(-2048, 2047); + Src4[i] = ieee_rand(-2048, 2047); + } + + for(cpu = cpu_list; cpu->name!=0; ++cpu) + { + double t; + int tst, sse; + + if (!init_cpu(cpu)) + continue; + + /* 16 bit element blocks */ + TEST_SSE(sse8_16bit, Src1, Src2, 16); + printf("%s - sse8_16bit#1 %.3f usec sse=%d %s\n", + cpu->name, t, sse, (sse!=182013834)?"| ERROR": ""); + TEST_SSE(sse8_16bit, Src1, Src3, 16); + printf("%s - sse8_16bit#2 %.3f usec sse=%d %s\n", + cpu->name, t, sse, (sse!=142545203)?"| ERROR": ""); + TEST_SSE(sse8_16bit, Src1, Src4, 16); + printf("%s - sse8_16bit#3 %.3f usec sse=%d %s\n", + cpu->name, t, sse, (sse!=146340935)?"| ERROR": ""); + TEST_SSE(sse8_16bit, Src2, Src3, 16); + printf("%s - sse8_16bit#4 %.3f usec sse=%d %s\n", + cpu->name, t, sse, (sse!=130136661)?"| ERROR": ""); + TEST_SSE(sse8_16bit, Src2, Src4, 16); + printf("%s - sse8_16bit#5 %.3f usec sse=%d %s\n", + cpu->name, t, sse, (sse!=136870353)?"| ERROR": ""); + TEST_SSE(sse8_16bit, Src3, Src4, 16); + printf("%s - sse8_16bit#6 %.3f usec sse=%d %s\n", + cpu->name, t, sse, (sse!=164107772)?"| ERROR": ""); + + /* 8 bit element blocks */ + TEST_SSE(sse8_8bit, (int8_t*)Src1, (int8_t*)Src2, 8); + printf("%s - sse8_8bit#1 %.3f usec sse=%d %s\n", + cpu->name, t, sse, (sse!=1356423)?"| ERROR": ""); + TEST_SSE(sse8_8bit, (int8_t*)Src1, (int8_t*)Src3, 8); + printf("%s - sse8_8bit#2 %.3f usec sse=%d %s\n", + cpu->name, t, sse, (sse!=1173074)?"| ERROR": ""); + TEST_SSE(sse8_8bit, (int8_t*)Src1, (int8_t*)Src4, 8); + printf("%s - sse8_8bit#3 %.3f usec sse=%d %s\n", + cpu->name, t, sse, (sse!=1092357)?"| ERROR": ""); + TEST_SSE(sse8_8bit, (int8_t*)Src2, (int8_t*)Src3, 8); + printf("%s - sse8_8bit#4 %.3f usec sse=%d %s\n", + cpu->name, t, sse, (sse!=1360239)?"| ERROR": ""); + TEST_SSE(sse8_8bit, (int8_t*)Src2, (int8_t*)Src4, 8); + printf("%s - sse8_8bit#5 %.3f usec sse=%d %s\n", + cpu->name, t, sse, (sse!=1208414)?"| ERROR": ""); + TEST_SSE(sse8_8bit, (int8_t*)Src3, (int8_t*)Src4, 8); + printf("%s - sse8_8bit#6 %.3f usec sse=%d %s\n", + cpu->name, t, sse, (sse!=1099285)?"| ERROR": ""); + + printf(" ---\n"); + } } /********************************************************************* * test non-zero AC counting *********************************************************************/ -#define TEST_CBP(FUNC, SRC) \ - t = gettime_usec(); \ - emms(); \ - for(tst=0; tst3*64); + Src4[i] = (i==(3*64+2) || i==(5*64+9)); + } - printf( "\n ===== test cbp =====\n" ); + for(cpu = cpu_list; cpu->name!=0; ++cpu) + { + double t; + int tst, cbp; + + if (!init_cpu(cpu)) + continue; + + TEST_CBP(calc_cbp, Src1, nb_tests); + printf("%s - calc_cbp#1 %.3f usec cbp=0x%02x %s\n", + cpu->name, t, cbp, (cbp!=0x15)?"| ERROR": ""); + TEST_CBP(calc_cbp, Src2, nb_tests); + printf("%s - calc_cbp#2 %.3f usec cbp=0x%02x %s\n", + cpu->name, t, cbp, (cbp!=0x38)?"| ERROR": ""); + TEST_CBP(calc_cbp, Src3, nb_tests); + printf("%s - calc_cbp#3 %.3f usec cbp=0x%02x %s\n", + cpu->name, t, cbp, (cbp!=0x0f)?"| ERROR": "" ); + TEST_CBP(calc_cbp, Src4, nb_tests); + printf("%s - calc_cbp#4 %.3f usec cbp=0x%02x %s\n", + cpu->name, t, cbp, (cbp!=0x05)?"| ERROR": "" ); + printf( " --- \n" ); + } - for(i=0; i<6*64; ++i) { - Src1[i] = (i*i*3/8192)&(i/64)&1; // 'random' - Src2[i] = (i<3*64); // half-full - Src3[i] = ((i+32)>3*64); - Src4[i] = (i==(3*64+2) || i==(5*64+9)); - } - - for(cpu = cpu_short_list2; cpu->name!=0; ++cpu) - { - double t; - int tst, cbp; + for(cpu = cpu_list; cpu->name!=0; ++cpu) /* bench suggested by Carlo (carlo dot bramix at libero dot it) */ + { + double t; + int tst, cbp, err; - if (!init_cpu(cpu)) - continue; + if (!init_cpu(cpu)) + continue; - TEST_CBP(calc_cbp, Src1); - printf( "%s - calc_cbp#1 %.3f usec cbp=0x%x\n", cpu->name, t, cbp ); - if (cbp!=0x15) printf( "*** CRC ERROR! ***\n" ); - TEST_CBP(calc_cbp, Src2); - printf( "%s - calc_cbp#2 %.3f usec cbp=0x%x\n", cpu->name, t, cbp ); - if (cbp!=0x38) printf( "*** CRC ERROR! ***\n" ); - TEST_CBP(calc_cbp, Src3); - printf( "%s - calc_cbp#3 %.3f usec cbp=0x%x\n", cpu->name, t, cbp ); - if (cbp!=0x0f) printf( "*** CRC ERROR! ***\n" ); - TEST_CBP(calc_cbp, Src4); - printf( "%s - calc_cbp#4 %.3f usec cbp=0x%x\n", cpu->name, t, cbp ); - if (cbp!=0x05) printf( "*** CRC ERROR! ***\n" ); - printf( " --- \n" ); - } + err = 0; + for(n=0; n<6; ++n) + { + for(m=0; m<64; ++m) + { + for(i=0; i<6*64; ++i) + Src1[i] = (i== (m + n*64)); + + TEST_CBP(calc_cbp, Src1, 1); + if (cbp!= (((m!=0)<<(5-n)))) + { + printf( "%s - calc_cbp#5: ERROR at pos %d / %d!\n", cpu->name, n, m); + err = 1; + break; + } + } + } + if (!err) + printf( " %s - calc_cbp#5 : OK\n", cpu->name ); + + } } /********************************************************************* @@ -634,149 +992,149 @@ *********************************************************************/ typedef struct { - long Errors[64]; - long Sqr_Errors[64]; - long Max_Errors[64]; - long Nb; + long Errors[64]; + long Sqr_Errors[64]; + long Max_Errors[64]; + long Nb; } STATS_8x8; void init_stats(STATS_8x8 *S) { - int i; - for(i=0; i<64; ++i) { - S->Errors[i] = 0; - S->Sqr_Errors[i] = 0; - S->Max_Errors[i] = 0; - } - S->Nb = 0; + int i; + for(i=0; i<64; ++i) { + S->Errors[i] = 0; + S->Sqr_Errors[i] = 0; + S->Max_Errors[i] = 0; + } + S->Nb = 0; } void store_stats(STATS_8x8 *S, short Blk[64], short Ref[64]) { - int i; - for(i=0; i<64; ++i) - { - short Err = Blk[i] - Ref[i]; - S->Errors[i] += Err; - S->Sqr_Errors[i] += Err * Err; - if (Err<0) Err = -Err; - if (S->Max_Errors[i]Max_Errors[i] = Err; - } - S->Nb++; + int i; + for(i=0; i<64; ++i) + { + short Err = Blk[i] - Ref[i]; + S->Errors[i] += Err; + S->Sqr_Errors[i] += Err * Err; + if (Err<0) Err = -Err; + if (S->Max_Errors[i]Max_Errors[i] = Err; + } + S->Nb++; } void print_stats(STATS_8x8 *S) { - int i; - double Norm; + int i; + double Norm; - assert(S->Nb>0); - Norm = 1. / (double)S->Nb; - printf("\n== Max absolute values of errors ==\n"); - for(i=0; i<64; i++) { - printf(" %4ld", S->Max_Errors[i]); - if ((i&7)==7) printf("\n"); - } + assert(S->Nb>0); + Norm = 1. / (double)S->Nb; + printf("\n== Max absolute values of errors ==\n"); + for(i=0; i<64; i++) { + printf(" %4ld", S->Max_Errors[i]); + if ((i&7)==7) printf("\n"); + } - printf("\n== Mean square errors ==\n"); - for(i=0; i<64; i++) - { - double Err = Norm * (double)S->Sqr_Errors[i]; - printf(" %.3f", Err); - if ((i&7)==7) printf("\n"); - } + printf("\n== Mean square errors ==\n"); + for(i=0; i<64; i++) + { + double Err = Norm * (double)S->Sqr_Errors[i]; + printf(" %.3f", Err); + if ((i&7)==7) printf("\n"); + } - printf("\n== Mean errors ==\n"); - for(i=0; i<64; i++) - { - double Err = Norm * (double)S->Errors[i]; - printf(" %.3f", Err); - if ((i&7)==7) printf("\n"); - } - printf("\n"); + printf("\n== Mean errors ==\n"); + for(i=0; i<64; i++) + { + double Err = Norm * (double)S->Errors[i]; + printf(" %.3f", Err); + if ((i&7)==7) printf("\n"); + } + printf("\n"); } static const char *CHECK(double v, double l) { - if (fabs(v)<=l) return "ok"; - else return "FAIL!"; + if (fabs(v)<=l) return "ok"; + else return "FAIL!"; } void report_stats(STATS_8x8 *S, const double *Limits) { - int i; - double Norm, PE, PMSE, OMSE, PME, OME; + int i; + double Norm, PE, PMSE, OMSE, PME, OME; - assert(S->Nb>0); - Norm = 1. / (double)S->Nb; - PE = 0.; - for(i=0; i<64; i++) { - if (PEMax_Errors[i]) - PE = S->Max_Errors[i]; - } - - PMSE = 0.; - OMSE = 0.; - for(i=0; i<64; i++) - { - double Err = Norm * (double)S->Sqr_Errors[i]; - OMSE += Err; - if (PMSE < Err) PMSE = Err; - } - OMSE /= 64.; + assert(S->Nb>0); + Norm = 1. / (double)S->Nb; + PE = 0.; + for(i=0; i<64; i++) { + if (PEMax_Errors[i]) + PE = S->Max_Errors[i]; + } - PME = 0.; - OME = 0.; - for(i=0; i<64; i++) - { - double Err = Norm * (double)S->Errors[i]; - OME += Err; - Err = fabs(Err); - if (PME < Err) PME = Err; - } - OME /= 64.; + PMSE = 0.; + OMSE = 0.; + for(i=0; i<64; i++) + { + double Err = Norm * (double)S->Sqr_Errors[i]; + OMSE += Err; + if (PMSE < Err) PMSE = Err; + } + OMSE /= 64.; - printf( "Peak error: %4.4f\n", PE ); - printf( "Peak MSE: %4.4f\n", PMSE ); - printf( "Overall MSE: %4.4f\n", OMSE ); - printf( "Peak ME: %4.4f\n", PME ); - printf( "Overall ME: %4.4f\n", OME ); - - if (Limits!=0) - { - printf( "[PE<=%.4f %s] ", Limits[0], CHECK(PE, Limits[0]) ); - printf( "\n" ); - printf( "[PMSE<=%.4f %s]", Limits[1], CHECK(PMSE, Limits[1]) ); - printf( "[OMSE<=%.4f %s]", Limits[2], CHECK(OMSE, Limits[2]) ); - printf( "\n" ); - printf( "[PME<=%.4f %s] ", Limits[3], CHECK(PME , Limits[3]) ); - printf( "[OME<=%.4f %s] ", Limits[4], CHECK(OME , Limits[4]) ); - printf( "\n" ); - } + PME = 0.; + OME = 0.; + for(i=0; i<64; i++) + { + double Err = Norm * (double)S->Errors[i]; + OME += Err; + Err = fabs(Err); + if (PME < Err) PME = Err; + } + OME /= 64.; + + printf( "Peak error: %4.4f\n", PE ); + printf( "Peak MSE: %4.4f\n", PMSE ); + printf( "Overall MSE: %4.4f\n", OMSE ); + printf( "Peak ME: %4.4f\n", PME ); + printf( "Overall ME: %4.4f\n", OME ); + + if (Limits!=0) + { + printf( "[PE<=%.4f %s] ", Limits[0], CHECK(PE, Limits[0]) ); + printf( "\n" ); + printf( "[PMSE<=%.4f %s]", Limits[1], CHECK(PMSE, Limits[1]) ); + printf( "[OMSE<=%.4f %s]", Limits[2], CHECK(OMSE, Limits[2]) ); + printf( "\n" ); + printf( "[PME<=%.4f %s] ", Limits[3], CHECK(PME , Limits[3]) ); + printf( "[OME<=%.4f %s] ", Limits[4], CHECK(OME , Limits[4]) ); + printf( "\n" ); + } } -////////////////////////////////////////////////////////// +///* ////////////////////////////////////////////////////// */ /* Pseudo-random generator specified by IEEE 1180 */ static long ieee_seed = 1; static void ieee_reseed(long s) { - ieee_seed = s; + ieee_seed = s; } static long ieee_rand(int Min, int Max) { - static double z = (double) 0x7fffffff; + static double z = (double) 0x7fffffff; - long i,j; - double x; + long i,j; + double x; - ieee_seed = (ieee_seed * 1103515245) + 12345; - i = ieee_seed & 0x7ffffffe; - x = ((double) i) / z; - x *= (Max-Min+1); - j = (long)x; - j = j + Min; - assert(j>=Min && j<=Max); - return (short)j; + ieee_seed = (ieee_seed * 1103515245) + 12345; + i = ieee_seed & 0x7ffffffe; + x = ((double) i) / z; + x *= (Max-Min+1); + j = (long)x; + j = j + Min; + assert(j>=Min && j<=Max); + return (short)j; } #define CLAMP(x, M) (x) = ((x)<-(M)) ? (-(M)) : ((x)>=(M) ? ((M)-1) : (x)) @@ -784,336 +1142,357 @@ static double Cos[8][8]; static void init_ref_dct() { - int i, j; - for(i=0; i<8; i++) - { - double scale = (i == 0) ? sqrt(0.125) : 0.5; - for (j=0; j<8; j++) - Cos[i][j] = scale*cos( (M_PI/8.0)*i*(j + 0.5) ); - } + int i, j; + for(i=0; i<8; i++) + { + double scale = (i == 0) ? sqrt(0.125) : 0.5; + for (j=0; j<8; j++) + Cos[i][j] = scale*cos( (M_PI/8.0)*i*(j + 0.5) ); + } } void ref_idct(short *M) { - int i, j, k; - double Tmp[8][8]; + int i, j, k; + double Tmp[8][8]; - for(i=0; i<8; i++) { - for(j=0; j<8; j++) - { - double Sum = 0.0; - for (k=0; k<8; k++) Sum += Cos[k][j]*M[8*i+k]; - Tmp[i][j] = Sum; - } - } - for(i=0; i<8; i++) { - for(j=0; j<8; j++) { - double Sum = 0.0; - for (k=0; k<8; k++) Sum += Cos[k][i]*Tmp[k][j]; - M[8*i+j] = (short)floor(Sum + .5); - } - } + for(i=0; i<8; i++) { + for(j=0; j<8; j++) + { + double Sum = 0.0; + for (k=0; k<8; k++) Sum += Cos[k][j]*M[8*i+k]; + Tmp[i][j] = Sum; + } + } + for(i=0; i<8; i++) { + for(j=0; j<8; j++) { + double Sum = 0.0; + for (k=0; k<8; k++) Sum += Cos[k][i]*Tmp[k][j]; + M[8*i+j] = (short)floor(Sum + .5); + } + } } void ref_fdct(short *M) { - int i, j, k; - double Tmp[8][8]; + int i, j, k; + double Tmp[8][8]; - for(i=0; i<8; i++) { - for(j=0; j<8; j++) - { - double Sum = 0.0; - for (k=0; k<8; k++) Sum += Cos[j][k]*M[8*i+k]; - Tmp[i][j] = Sum; - } - } - for(i=0; i<8; i++) { - for(j=0; j<8; j++) { - double Sum = 0.0; - for (k=0; k<8; k++) Sum += Cos[i][k]*Tmp[k][j]; - M[8*i+j] = (short)floor(Sum + 0.5); - } - } + for(i=0; i<8; i++) { + for(j=0; j<8; j++) + { + double Sum = 0.0; + for (k=0; k<8; k++) Sum += Cos[j][k]*M[8*i+k]; + Tmp[i][j] = Sum; + } + } + for(i=0; i<8; i++) { + for(j=0; j<8; j++) { + double Sum = 0.0; + for (k=0; k<8; k++) Sum += Cos[i][k]*Tmp[k][j]; + M[8*i+j] = (short)floor(Sum + 0.5); + } + } } void test_IEEE1180_compliance(int Min, int Max, int Sign) { - static const double ILimits[5] = { 1., 0.06, 0.02, 0.015, 0.0015 }; - int Loops = 10000; - int i, m, n; - short Blk0[64]; // reference - short Blk[64], iBlk[64]; - short Ref_FDCT[64]; - short Ref_IDCT[64]; - - STATS_8x8 FStats; // forward dct stats - STATS_8x8 IStats; // inverse dct stats - - CPU *cpu; - - init_ref_dct(); - - for(cpu = cpu_list; cpu->name!=0; ++cpu) - { - if (!init_cpu(cpu)) - continue; + static const double ILimits[5] = { 1., 0.06, 0.02, 0.015, 0.0015 }; + int Loops = 10000; + int i, m, n; + DECLARE_ALIGNED_MATRIX(Blk0, 8, 8, short, 16); /* reference */ + DECLARE_ALIGNED_MATRIX(Blk, 8, 8, short, 16); + DECLARE_ALIGNED_MATRIX(iBlk, 8, 8, short, 16); + DECLARE_ALIGNED_MATRIX(Ref_FDCT, 8, 8, short, 16); + DECLARE_ALIGNED_MATRIX(Ref_IDCT, 8, 8, short, 16); - printf( "\n===== IEEE test for %s ==== (Min=%d Max=%d Sign=%d Loops=%d)\n", - cpu->name, Min, Max, Sign, Loops); + STATS_8x8 FStats; /* forward dct stats */ + STATS_8x8 IStats; /* inverse dct stats */ - init_stats(&IStats); - init_stats(&FStats); + CPU *cpu; - ieee_reseed(1); - for(n=0; nname!=0; ++cpu) + { + if (!init_cpu(cpu)) + continue; - memcpy(Ref_FDCT, Blk0, 64*sizeof(short)); - ref_fdct(Ref_FDCT); - for(i=0; i<64; i++) CLAMP( Ref_FDCT[i], 2048 ); + printf( "\n===== IEEE test for %s ==== (Min=%d Max=%d Sign=%d Loops=%d)\n", + cpu->name, Min, Max, Sign, Loops); - memcpy(Blk, Blk0, 64*sizeof(short)); - emms(); fdct(Blk); emms(); - for(i=0; i<64; i++) CLAMP( Blk[i], 2048 ); + init_stats(&IStats); + init_stats(&FStats); - store_stats(&FStats, Blk, Ref_FDCT); + ieee_reseed(1); + for(n=0; nname!=0; ++cpu) - { - short Blk0[64], Blk[64]; - STATS_8x8 Stats; - - if (!init_cpu(cpu)) - continue; - - printf( "\n===== IEEE test for %s Min=%d Max=%d =====\n", - cpu->name, Min, Max ); - - // FDCT tests // +// const short IDCT_OUT = 256; /* 9bits ouput */ + const int Partitions = 4; + const int Loops = 10000 / Partitions; + + init_ref_dct(); + + for(cpu = cpu_list; cpu->name!=0; ++cpu) + { + short Blk0[64], Blk[64]; + STATS_8x8 Stats; + + if (!init_cpu(cpu)) + continue; + + printf( "\n===== IEEE test for %s Min=%d Max=%d =====\n", + cpu->name, Min, Max ); + + /* FDCT tests // */ + + init_stats(&Stats); + + /* test each computation channels separately */ + for(i=0; i<64; i++) Blk[i] = Blk0[i] = ((i/8)==(i%8)) ? Max : 0; + ref_fdct(Blk0); + emms(); fdct(Blk); emms(); + store_stats(&Stats, Blk, Blk0); + + for(i=0; i<64; i++) Blk[i] = Blk0[i] = ((i/8)==(i%8)) ? Min : 0; + ref_fdct(Blk0); + emms(); fdct(Blk); emms(); + store_stats(&Stats, Blk, Blk0); + + /* randomly saturated inputs */ + for(p=0; p=p)? Max : Min; + ref_fdct(Blk0); + emms(); fdct(Blk); emms(); + store_stats(&Stats, Blk, Blk0); + } + } + printf( "\n -- FDCT saturation report --\n" ); + report_stats(&Stats, 0); - init_stats(&Stats); - // test each computation channels separately - for(i=0; i<64; i++) Blk[i] = Blk0[i] = ((i/8)==(i%8)) ? Max : 0; - ref_fdct(Blk0); - emms(); fdct(Blk); emms(); - store_stats(&Stats, Blk, Blk0); - - for(i=0; i<64; i++) Blk[i] = Blk0[i] = ((i/8)==(i%8)) ? Min : 0; - ref_fdct(Blk0); - emms(); fdct(Blk); emms(); - store_stats(&Stats, Blk, Blk0); - - // randomly saturated inputs - for(p=0; p=p)? Max : Min; - ref_fdct(Blk0); - emms(); fdct(Blk); emms(); - store_stats(&Stats, Blk, Blk0); - } - } - printf( "\n -- FDCT saturation report --\n" ); - report_stats(&Stats, 0); - - - // IDCT tests // + /* IDCT tests // */ #if 0 - // no finished yet - - init_stats(&Stats); + /* no finished yet */ - // test each computation channel separately - for(i=0; i<64; i++) Blk[i] = Blk0[i] = ((i/8)==(i%8)) ? IDCT_MAX : 0; - ref_idct(Blk0); - emms(); idct(Blk); emms(); - for(i=0; i<64; i++) { CLAMP(Blk0[i], IDCT_OUT); CLAMP(Blk[i], IDCT_OUT); } - store_stats(&Stats, Blk, Blk0); - - for(i=0; i<64; i++) Blk[i] = Blk0[i] = ((i/8)==(i%8)) ? IDCT_MIN : 0; - ref_idct(Blk0); - emms(); idct(Blk); emms(); - for(i=0; i<64; i++) { CLAMP(Blk0[i], IDCT_OUT); CLAMP(Blk[i], IDCT_OUT); } - store_stats(&Stats, Blk, Blk0); + init_stats(&Stats); - // randomly saturated inputs - for(p=0; p=p)? IDCT_MAX : IDCT_MIN; - ref_idct(Blk0); - emms(); idct(Blk); emms(); - for(i=0; i<64; i++) { CLAMP(Blk0[i],IDCT_OUT); CLAMP(Blk[i],IDCT_OUT); } - store_stats(&Stats, Blk, Blk0); - } - } - - printf( "\n -- IDCT saturation report --\n" ); - print_stats(&Stats); - report_stats(&Stats, 0); +/* test each computation channel separately */ + for(i=0; i<64; i++) Blk[i] = Blk0[i] = ((i/8)==(i%8)) ? IDCT_MAX : 0; + ref_idct(Blk0); + emms(); idct(Blk); emms(); + for(i=0; i<64; i++) { CLAMP(Blk0[i], IDCT_OUT); CLAMP(Blk[i], IDCT_OUT); } + store_stats(&Stats, Blk, Blk0); + + for(i=0; i<64; i++) Blk[i] = Blk0[i] = ((i/8)==(i%8)) ? IDCT_MIN : 0; + ref_idct(Blk0); + emms(); idct(Blk); emms(); + for(i=0; i<64; i++) { CLAMP(Blk0[i], IDCT_OUT); CLAMP(Blk[i], IDCT_OUT); } + store_stats(&Stats, Blk, Blk0); + + /* randomly saturated inputs */ + for(p=0; p=p)? IDCT_MAX : IDCT_MIN; + ref_idct(Blk0); + emms(); idct(Blk); emms(); + for(i=0; i<64; i++) { CLAMP(Blk0[i],IDCT_OUT); CLAMP(Blk[i],IDCT_OUT); } + store_stats(&Stats, Blk, Blk0); + } + } + + printf( "\n -- IDCT saturation report --\n" ); + print_stats(&Stats); + report_stats(&Stats, 0); #endif - } + } } /********************************************************************* * measure raw decoding speed *********************************************************************/ -void test_dec(const char *name, int width, int height, int with_chksum) +void test_dec(const char *name, int width, int height, int ref_chksum) { - FILE *f = 0; - void *dechandle = 0; - int xerr; - XVID_INIT_PARAM xinit; - XVID_DEC_PARAM xparam; - XVID_DEC_FRAME xframe; + FILE *f = 0; + void *dechandle = 0; + int xerr; + xvid_gbl_init_t xinit; + xvid_dec_create_t xparam; + xvid_dec_frame_t xframe; double t = 0.; int nb = 0; - uint8_t *buf = 0; - uint8_t *rgb_out = 0; - int buf_size, pos; - uint32_t chksum = 0; - - xinit.cpu_flags = XVID_CPU_MMX | XVID_CPU_FORCE; - xvid_init(NULL, 0, &xinit, NULL); - printf( "API version: %d, core build:%d\n", xinit.api_version, xinit.core_build); - + uint8_t *buf = 0; + uint8_t *yuv_out = 0; + int buf_size, pos; + uint32_t chksum = 0; + int bps = (width+31) & ~31; + + memset(&xinit, 0, sizeof(xinit)); + xinit.cpu_flags = cpu_mask; + xinit.version = XVID_VERSION; + xvid_global(NULL, 0, &xinit, NULL); - xparam.width = width; + memset(&xparam, 0, sizeof(xparam)); + xparam.width = width; xparam.height = height; + xparam.version = XVID_VERSION; xerr = xvid_decore(NULL, XVID_DEC_CREATE, &xparam, NULL); - if (xerr!=XVID_ERR_OK) { - printf("can't init decoder (err=%d)\n", xerr); - return; + if (xerr==XVID_ERR_FAIL) { + printf("ERROR: can't init decoder (err=%d)\n", xerr); + return; } dechandle = xparam.handle; f = fopen(name, "rb"); - if (f==0) { - printf( "can't open file '%s'\n", name); - return; - } - fseek(f, 0, SEEK_END); - buf_size = ftell(f); - fseek(f, 0, SEEK_SET); - if (buf_size<=0) { - printf("error while stating file\n"); - goto End; - } - else printf( "Input size: %d\n", buf_size); + if (f==0) { + printf( "ERROR: can't open file '%s'\n", name); + return; + } + fseek(f, 0, SEEK_END); + buf_size = ftell(f); + fseek(f, 0, SEEK_SET); + if (buf_size<=0) { + printf("ERROR: error while stating file\n"); + goto End; + } - buf = malloc(buf_size); // should be enuf' - rgb_out = calloc(4, width*height); // <-room for _RGB24 - if (buf==0 || rgb_out==0) { - printf( "malloc failed!\n" ); - goto End; - } + buf = malloc(buf_size); + yuv_out = calloc(1, bps*height*3/2 + 15); + if (buf==0 || yuv_out==0) { + printf( "ERROR: malloc failed!\n" ); + goto End; + } - if (fread(buf, buf_size, 1, f)!=1) { - printf( "file-read failed\n" ); - goto End; - } + if (fread(buf, buf_size, 1, f)!=1) { + printf( "ERROR: file-read failed\n" ); + goto End; + } - nb = 0; - pos = 0; - t = -gettime_usec(); - while(1) { - xframe.bitstream = buf + pos; - xframe.length = buf_size - pos; - xframe.image = rgb_out; - xframe.stride = width; - xframe.colorspace = XVID_CSP_RGB24; - xerr = xvid_decore(dechandle, XVID_DEC_DECODE, &xframe, 0); - nb++; - pos += xframe.length; - if (with_chksum) { - int k = width*height; - uint32_t *ptr = (uint32_t *)rgb_out; - while(k-->0) chksum += *ptr++; - } - if (pos==buf_size) - break; - if (xerr!=XVID_ERR_OK) { - printf("decoding failed for frame #%d (err=%d)!\n", nb, xerr); - break; - } - } - t += gettime_usec(); - if (t>0.) - printf( "%d frames decoded in %.3f s -> %.1f FPS\n", nb, t*1.e-6f, (float)(nb*1.e6f/t) ); - if (with_chksum) - printf("checksum: 0x%.8x\n", chksum); - -End: - if (rgb_out!=0) free(rgb_out); - if (buf!=0) free(buf); - if (dechandle!=0) { - xerr= xvid_decore(dechandle, XVID_DEC_DESTROY, NULL, NULL); - if (xerr!=XVID_ERR_OK) - printf("destroy-decoder failed (err=%d)!\n", xerr); - } - if (f!=0) fclose(f); + nb = 0; + pos = 0; + t = -gettime_usec(); + while(1) { + int y; + + memset(&xframe, 0, sizeof(xframe)); + xframe.version = XVID_VERSION; + xframe.bitstream = buf + pos; + xframe.length = buf_size - pos; + xframe.output.plane[0] = (uint8_t*)(((size_t)yuv_out + 15) & ~15); + xframe.output.plane[1] = xframe.output.plane[0] + bps*height; + xframe.output.plane[2] = xframe.output.plane[1] + bps/2; + xframe.output.stride[0] = bps; + xframe.output.stride[1] = bps; + xframe.output.stride[2] = bps; + xframe.output.csp = XVID_CSP_I420; + xerr = xvid_decore(dechandle, XVID_DEC_DECODE, &xframe, 0); + if (xerr<0) { + printf("ERROR: decoding failed for frame #%d (err=%d)!\n", nb, xerr); + break; + } + else if (xerr==0) + break; + else if (verbose>0) printf("#%d %d\n", nb, xerr ); + + pos += xerr; + nb++; + + for(y=0; y0.) + printf( "%d frames decoded in %.3f s -> %.1f FPS Checksum:0x%.8x\n", nb, t*1.e-6f, (float)(nb*1.e6f/t), chksum ); + } + else { + printf("FPS:%.1f Checksum: 0x%.8x Expected:0x%.8x | %s\n", + t>0. ? (float)(nb*1.e6f/t) : 0.f, chksum, ref_chksum, (chksum==ref_chksum) ? "OK" : "ERROR"); + } + + End: + if (yuv_out!=0) free(yuv_out); + if (buf!=0) free(buf); + if (dechandle!=0) { + xerr= xvid_decore(dechandle, XVID_DEC_DESTROY, NULL, NULL); + if (xerr==XVID_ERR_FAIL) + printf("ERROR: destroy-decoder failed (err=%d)!\n", xerr); + } + if (f!=0) fclose(f); } /********************************************************************* @@ -1122,325 +1501,385 @@ void test_bugs1() { - CPU *cpu; - - printf( "\n ===== (de)quant4_intra saturation bug? =====\n" ); - - for(cpu = cpu_short_list; cpu->name!=0; ++cpu) - { - int i; - int16_t Src[8*8], Dst[8*8]; - - if (!init_cpu(cpu)) - continue; - - for(i=0; i<64; ++i) Src[i] = i-32; - set_intra_matrix( get_default_intra_matrix() ); - dequant4_intra(Dst, Src, 31, 5); - printf( "dequant4_intra with CPU=%s: ", cpu->name); - printf( " Out[]= " ); - for(i=0; i<64; ++i) printf( "[%d]", Dst[i]); - printf( "\n" ); - } + CPU *cpu; + uint16_t mpeg_quant_matrices[64*8]; - printf( "\n ===== (de)quant4_inter saturation bug? =====\n" ); + printf( "\n ===== (de)quant4_intra saturation bug? =====\n" ); - for(cpu = cpu_short_list; cpu->name!=0; ++cpu) - { - int i; - int16_t Src[8*8], Dst[8*8]; + for(cpu = cpu_list; cpu->name!=0; ++cpu) + { + int i; + int16_t Src[8*8], Dst[8*8]; + + if (!init_cpu(cpu)) + continue; + + for(i=0; i<64; ++i) Src[i] = i-32; + set_intra_matrix( mpeg_quant_matrices, get_default_intra_matrix() ); + dequant_mpeg_intra(Dst, Src, 31, 5, mpeg_quant_matrices); + printf( "dequant_mpeg_intra with CPU=%s: ", cpu->name); + printf( " Out[]= " ); + for(i=0; i<64; ++i) printf( "[%d]", Dst[i]); + printf( "\n" ); + } - if (!init_cpu(cpu)) - continue; + printf( "\n ===== (de)quant4_inter saturation bug? =====\n" ); - for(i=0; i<64; ++i) Src[i] = i-32; - set_inter_matrix( get_default_inter_matrix() ); - dequant4_inter(Dst, Src, 31); - printf( "dequant4_inter with CPU=%s: ", cpu->name); - printf( " Out[]= " ); - for(i=0; i<64; ++i) printf( "[%d]", Dst[i]); - printf( "\n" ); - } + for(cpu = cpu_list; cpu->name!=0; ++cpu) + { + int i; + int16_t Src[8*8], Dst[8*8]; + + if (!init_cpu(cpu)) + continue; + + for(i=0; i<64; ++i) Src[i] = i-32; + set_inter_matrix( mpeg_quant_matrices, get_default_inter_matrix() ); + dequant_mpeg_inter(Dst, Src, 31, mpeg_quant_matrices); + printf( "dequant_mpeg_inter with CPU=%s: ", cpu->name); + printf( " Out[]= " ); + for(i=0; i<64; ++i) printf( "[%d]", Dst[i]); + printf( "\n" ); + } } void test_dct_precision_diffs() { - CPU *cpu; - short Blk[8*8], Blk0[8*8]; - - printf( "\n ===== fdct/idct precision diffs =====\n" ); - - for(cpu = cpu_short_list; cpu->name!=0; ++cpu) - { - int i; - - if (!init_cpu(cpu)) - continue; - - for(i=0; i<8*8; ++i) { - Blk0[i] = (i*7-i*i) & 0x7f; - Blk[i] = Blk0[i]; - } - - fdct(Blk); - idct(Blk); - printf( " fdct+idct diffs with CPU=%s: \n", cpu->name ); - for(i=0; i<8; ++i) { - int j; - for(j=0; j<8; ++j) printf( " %d ", Blk[i*8+j]-Blk0[i*8+j]); - printf("\n"); - } - printf("\n"); - } + CPU *cpu; + DECLARE_ALIGNED_MATRIX(Blk, 8, 8, int16_t, 16); + DECLARE_ALIGNED_MATRIX(Blk0, 8, 8, int16_t, 16); + + printf( "\n ===== fdct/idct precision diffs =====\n" ); + + for(cpu = cpu_list; cpu->name!=0; ++cpu) + { + int i; + + if (!init_cpu(cpu)) + continue; + + for(i=0; i<8*8; ++i) { + Blk0[i] = (i*7-i*i) & 0x7f; + Blk[i] = Blk0[i]; + } + + fdct(Blk); + idct(Blk); + printf( " fdct+idct diffs with CPU=%s: \n", cpu->name ); + for(i=0; i<8; ++i) { + int j; + for(j=0; j<8; ++j) printf( " %d ", Blk[i*8+j]-Blk0[i*8+j]); + printf("\n"); + } + printf("\n"); + } } void test_quant_bug() { - const int max_Q = 31; - int i, n, qm, q; - CPU *cpu; - int16_t Src[8*8], Dst[8*8]; - uint8_t Quant[8*8]; - CPU cpu_bug_list[] = { { "PLAINC", 0 }, { "MMX ", XVID_CPU_MMX }, {0,0} }; - uint16_t Crcs_Inter[2][32]; - uint16_t Crcs_Intra[2][32]; - printf( "\n ===== test MPEG4-quantize bug =====\n" ); + const int max_Q = 31; + int i, n, qm, q; + CPU *cpu; + DECLARE_ALIGNED_MATRIX(Src, 8, 8, int16_t, 16); + DECLARE_ALIGNED_MATRIX(Dst, 8, 8, int16_t, 16); + uint8_t Quant[8*8]; + CPU cpu_bug_list[] = { { "PLAINC", 0 }, { "MMX ", XVID_CPU_MMX }, {0,0} }; + uint16_t Crcs_Inter[2][32]; + uint16_t Crcs_Intra[2][32]; + DECLARE_ALIGNED_MATRIX(mpeg_quant_matrices, 8, 64, uint16_t, 16); - for(i=0; i<64; ++i) Src[i] = 2048*(i-32)/32; + printf( "\n ===== test MPEG4-quantize bug =====\n" ); + + for(i=0; i<64; ++i) Src[i] = 2048*(i-32)/32; #if 1 - for(qm=1; qm<=255; ++qm) - { - for(i=0; i<8*8; ++i) Quant[i] = qm; - set_inter_matrix( Quant ); + for(qm=1; qm<=255; ++qm) + { + for(i=0; i<8*8; ++i) Quant[i] = qm; + set_inter_matrix( mpeg_quant_matrices, Quant ); + + for(n=0, cpu = cpu_bug_list; cpu->name!=0; ++cpu, ++n) + { + uint16_t s; + + if (!init_cpu(cpu)) + continue; + + for(q=1; q<=max_Q; ++q) { + emms(); + quant_mpeg_inter( Dst, Src, q, mpeg_quant_matrices ); + emms(); + for(s=0, i=0; i<64; ++i) s+=((uint16_t)Dst[i])^i; + Crcs_Inter[n][q] = s; + } + } + + for(q=1; q<=max_Q; ++q) + for(i=0; i %d/%d !\n", + qm, q, Crcs_Inter[i][q], Crcs_Inter[i+1][q]); + } +#endif - for(n=0, cpu = cpu_bug_list; cpu->name!=0; ++cpu, ++n) - { - uint16_t s; +#if 1 + for(qm=1; qm<=255; ++qm) + { + for(i=0; i<8*8; ++i) Quant[i] = qm; + set_intra_matrix( mpeg_quant_matrices, Quant ); + + for(n=0, cpu = cpu_bug_list; cpu->name!=0; ++cpu, ++n) + { + uint16_t s; + + if (!init_cpu(cpu)) + continue; + + for(q=1; q<=max_Q; ++q) { + emms(); + quant_mpeg_intra( Dst, Src, q, q, mpeg_quant_matrices); + emms(); + for(s=0, i=0; i<64; ++i) s+=((uint16_t)Dst[i])^i; + Crcs_Intra[n][q] = s; + } + } + + for(q=1; q<=max_Q; ++q) + for(i=0; i %d/%d!\n", + qm, q, Crcs_Inter[i][q], Crcs_Inter[i+1][q]); + } +#endif +} +/*********************************************************************/ - if (!init_cpu(cpu)) - continue; +static uint32_t __inline log2bin_v1(uint32_t value) +{ + int n = 0; + while (value) { + value >>= 1; + n++; + } + return n; +} - for(q=1; q<=max_Q; ++q) { - emms(); - quant4_inter( Dst, Src, q ); - emms(); - for(s=0, i=0; i<64; ++i) s+=((uint16_t)Dst[i])^i; - Crcs_Inter[n][q] = s; - } - } +static const uint8_t log2_tab_16[16] = { 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4 }; - for(q=1; q<=max_Q; ++q) - for(i=0; i %d/%d !\n", - qm, q, Crcs_Inter[i][q], Crcs_Inter[i+1][q]); +static uint32_t __inline log2bin_v2(uint32_t value) +{ + int n = 0; + if (value & 0xffff0000) { + value >>= 16; + n += 16; } -#endif + if (value & 0xff00) { + value >>= 8; + n += 8; + } + if (value & 0xf0) { + value >>= 4; + n += 4; + } + return n + log2_tab_16[value]; +} -#if 1 - for(qm=1; qm<=255; ++qm) - { - for(i=0; i<8*8; ++i) Quant[i] = qm; - set_intra_matrix( Quant ); +void test_log2bin() +{ + const int nb_tests = 3000*speed_ref; + int n, crc1=0, crc2=0; + uint32_t s, s0; + double t1, t2; - for(n=0, cpu = cpu_bug_list; cpu->name!=0; ++cpu, ++n) - { - uint16_t s; + t1 = gettime_usec(); + s0 = (int)(t1*31.241); + for(s=s0, n=0; n 1) { + if (*num % i == 0 && *den % i == 0) { + *num /= i; + *den /= i; + i = *num; + continue; } + i--; + } +} - for(q=1; q<=max_Q; ++q) - for(i=0; i %d/%d!\n", - qm, q, Crcs_Inter[i][q], Crcs_Inter[i+1][q]); +static uint32_t gcd(int num, int den) +{ + int tmp; + while( (tmp=num%den) ) { num = den; den = tmp; } + return den; +} +static void __inline new_gcd(int *num, int *den) +{ + const int div = gcd(*num, *den); + if (num) { + *num /= div; + *den /= div; } -#endif +} + +void test_gcd() +{ + const int nb_tests = 10*speed_ref; + int i; + uint32_t crc1=0, crc2=0; + uint32_t n0, n, d0, d; + double t1, t2; + + t1 = gettime_usec(); + n0 = 0xfffff & (int)(t1*31.241); + d0 = 0xfffff & (int)( ((n0*4123)%17) | 1 ); + for(n=n0, d=d0, i=0; i>4)^d) + ((crc1<<2)^n) ) & 0xffffff; + n = d; + d = (d*12363+31) & 0xffff; + d |= !d; + } + t1 = (gettime_usec()-t1) / nb_tests; + + t2 = gettime_usec(); + for(n=n0, d=d0, i=0; i>4)^d) + ((crc2<<2)^n) ) & 0xffffff; + n = d; + d = (d*12363+31) & 0xffff; + d |= !d; + } + t2 = (gettime_usec() - t2) / nb_tests; + + printf( "old_gcd: %.3f sec crc=%d\n", t1, crc1 ); + printf( "new_gcd: %.3f sec crc=%d\n", t2, crc2 ); + if (crc1!=crc2) printf( " CRC ERROR !\n" ); } /********************************************************************* * main *********************************************************************/ -int main(int argc, char *argv[]) +static void arg_missing(const char *opt) { - int what = 0; - if (argc>1) what = atoi(argv[1]); - if (what==0 || what==1) test_dct(); - if (what==0 || what==2) test_mb(); - if (what==0 || what==3) test_sad(); - if (what==0 || what==4) test_transfer(); - if (what==0 || what==5) test_quant(); - if (what==0 || what==6) test_cbp(); - - if (what==7) { - test_IEEE1180_compliance(-256, 255, 1); -#if 0 - test_IEEE1180_compliance(-256, 255,-1); - test_IEEE1180_compliance( -5, 5, 1); - test_IEEE1180_compliance( -5, 5,-1); - test_IEEE1180_compliance(-300, 300, 1); - test_IEEE1180_compliance(-300, 300,-1); -#endif - } - if (what==8) test_dct_saturation(-256, 255); + printf( "missing argument after option '%s'\n", opt); + exit(-1); +} - if (what==9) { - int width, height; - if (argc<5) { - printf("usage: %s %d [bitstream] [width] [height]\n", argv[0], what); - return 1; +int main(int argc, const char *argv[]) +{ + int c, what = 0; + int width, height; + uint32_t chksum = 0; + const char * test_bitstream = 0; + + cpu_mask = 0; // default => will use autodectect + for(c=1; cargc) { + printf("usage: %s %d bitstream width height (checksum)\n", argv[0], what); + exit(-1); + } + test_bitstream = argv[++c]; + width = atoi(argv[++c]); + height = atoi(argv[++c]); + if (c+15)); } - if (what==-1) { - test_dct_precision_diffs(); - test_bugs1(); - } - if (what==-2) - test_quant_bug(); - return 0; -} + if (what==0 || what==1) test_dct(); + if (what==0 || what==2) test_mb(); + if (what==0 || what==3) test_sad(); + if (what==0 || what==4) test_transfer(); + if (what==0 || what==5) test_quant(); + if (what==0 || what==6) test_cbp(); + if (what==0 || what==10) test_sse(); + if (what==0 || what==11) test_log2bin(); + if (what==0 || what==12) test_gcd(); + + + if (what==7) { + test_IEEE1180_compliance(-256, 255, 1); + test_IEEE1180_compliance(-256, 255,-1); + test_IEEE1180_compliance( -5, 5, 1); + test_IEEE1180_compliance( -5, 5,-1); + test_IEEE1180_compliance(-300, 300, 1); + test_IEEE1180_compliance(-300, 300,-1); + } + if (what==8) test_dct_saturation(-256, 255); -/********************************************************************* - * 'Reference' output (except for timing) on a PIII 1.13Ghz/linux - *********************************************************************/ + if (test_bitstream) + test_dec(test_bitstream, width, height, chksum); + if (what==-1) { + test_dct_precision_diffs(); + test_bugs1(); + } + if (what==-2) + test_quant_bug(); + + if ((what >= 0 && what <= 6) || what == 10) { + printf("\n\n" + "NB: If a function isn't optimised for a specific set of intructions,\n" + " a C function is used instead. So don't panic if some functions\n" + " may appear to be slow.\n"); + } - /* as of 07/01/2002, there's a problem with mpeg4-quantization */ -/* +#ifdef ARCH_IS_IA32 + if (what == 0 || what == 5) { + printf("\n" + "NB: MMX mpeg4 quantization is known to have very small errors (+/-1 magnitude)\n" + " for 1 or 2 coefficients a block. This is mainly caused by the fact the unit\n" + " test goes far behind the usual limits of real encoding. Please do not report\n" + " this error to the developers.\n"); + } +#endif - ===== test fdct/idct ===== -PLAINC - 3.312 usec PSNR=13.291 MSE=3.000 -MMX - 0.591 usec PSNR=13.291 MSE=3.000 -MMXEXT - 0.577 usec PSNR=13.291 MSE=3.000 -SSE2 - 0.588 usec PSNR=13.291 MSE=3.000 -3DNOW - skipped... -3DNOWE - skipped... - - === test block motion === -PLAINC - interp- h-round0 0.911 usec iCrc=8107 -PLAINC - round1 0.863 usec iCrc=8100 -PLAINC - interp- v-round0 0.860 usec iCrc=8108 -PLAINC - round1 0.857 usec iCrc=8105 -PLAINC - interp-hv-round0 2.103 usec iCrc=8112 -PLAINC - round1 2.050 usec iCrc=8103 - --- -MMX - interp- h-round0 0.105 usec iCrc=8107 -MMX - round1 0.106 usec iCrc=8100 -MMX - interp- v-round0 0.106 usec iCrc=8108 -MMX - round1 0.106 usec iCrc=8105 -MMX - interp-hv-round0 0.145 usec iCrc=8112 -MMX - round1 0.145 usec iCrc=8103 - --- -MMXEXT - interp- h-round0 0.028 usec iCrc=8107 -MMXEXT - round1 0.041 usec iCrc=8100 -MMXEXT - interp- v-round0 0.027 usec iCrc=8108 -MMXEXT - round1 0.041 usec iCrc=8105 -MMXEXT - interp-hv-round0 0.066 usec iCrc=8112 -MMXEXT - round1 0.065 usec iCrc=8103 - --- -SSE2 - interp- h-round0 0.109 usec iCrc=8107 -SSE2 - round1 0.105 usec iCrc=8100 -SSE2 - interp- v-round0 0.106 usec iCrc=8108 -SSE2 - round1 0.109 usec iCrc=8105 -SSE2 - interp-hv-round0 0.145 usec iCrc=8112 -SSE2 - round1 0.145 usec iCrc=8103 - --- -3DNOW - skipped... -3DNOWE - skipped... - - ====== test SAD ====== -PLAINC - sad8 0.251 usec sad=3776 -PLAINC - sad16 1.601 usec sad=27214 -PLAINC - sad16bi 2.371 usec sad=26274 -PLAINC - dev16 1.564 usec sad=3344 - --- -MMX - sad8 0.057 usec sad=3776 -MMX - sad16 0.182 usec sad=27214 -MMX - sad16bi 2.462 usec sad=26274 -MMX - dev16 0.311 usec sad=3344 - --- -MMXEXT - sad8 0.036 usec sad=3776 -MMXEXT - sad16 0.109 usec sad=27214 -MMXEXT - sad16bi 0.143 usec sad=26274 -MMXEXT - dev16 0.192 usec sad=3344 - --- -SSE2 - sad8 0.057 usec sad=3776 -SSE2 - sad16 0.179 usec sad=27214 -SSE2 - sad16bi 2.456 usec sad=26274 -SSE2 - dev16 0.321 usec sad=3344 - --- -3DNOW - skipped... -3DNOWE - skipped... - - === test transfer === -PLAINC - 8to16 0.151 usec crc=28288 -PLAINC - 16to8 1.113 usec crc=28288 -PLAINC - 8to8 0.043 usec crc=20352 -PLAINC - 16to8add 1.069 usec crc=25536 -PLAINC - 8to16sub 0.631 usec crc1=28064 crc2=16256 -PLAINC - 8to16sub2 0.597 usec crc=20384 - --- -MMX - 8to16 0.032 usec crc=28288 -MMX - 16to8 0.024 usec crc=28288 -MMX - 8to8 0.020 usec crc=20352 -MMX - 16to8add 0.043 usec crc=25536 -MMX - 8to16sub 0.066 usec crc1=28064 crc2=16256 -MMX - 8to16sub2 0.111 usec crc=20384 - --- - - ===== test quant ===== -PLAINC - quant4_intra 74.248 usec crc=29809 -PLAINC - quant4_inter 70.850 usec crc=12574 -PLAINC - dequant4_intra 40.628 usec crc=24052 -PLAINC - dequant4_inter 45.691 usec crc=63847 -PLAINC - quant_intra 43.357 usec crc=25662 -PLAINC - quant_inter 33.410 usec crc=23972 -PLAINC - dequant_intra 36.384 usec crc=49900 -PLAINC - dequant_inter 48.930 usec crc=48899 - --- -MMX - quant4_intra 7.445 usec crc=3459 -*** CRC ERROR! *** -MMX - quant4_inter 5.384 usec crc=51072 -*** CRC ERROR! *** -MMX - dequant4_intra 5.515 usec crc=24052 -MMX - dequant4_inter 7.745 usec crc=63847 -MMX - quant_intra 4.661 usec crc=25662 -MMX - quant_inter 4.406 usec crc=23972 -MMX - dequant_intra 4.928 usec crc=49900 -MMX - dequant_inter 4.532 usec crc=48899 - --- - - ===== test cbp ===== -PLAINC - calc_cbp#1 0.371 usec cbp=0x15 -PLAINC - calc_cbp#2 0.432 usec cbp=0x38 -PLAINC - calc_cbp#3 0.339 usec cbp=0xf -PLAINC - calc_cbp#4 0.506 usec cbp=0x5 - --- -MMX - calc_cbp#1 0.136 usec cbp=0x15 -MMX - calc_cbp#2 0.134 usec cbp=0x38 -MMX - calc_cbp#3 0.138 usec cbp=0xf -MMX - calc_cbp#4 0.135 usec cbp=0x5 - --- -SSE2 - calc_cbp#1 0.136 usec cbp=0x15 -SSE2 - calc_cbp#2 0.133 usec cbp=0x38 -SSE2 - calc_cbp#3 0.133 usec cbp=0xf -SSE2 - calc_cbp#4 0.141 usec cbp=0x5 - --- + return 0; +} -*/ +/*********************************************************************/