Parent Directory
|
Revision Log
Removed obsolete test for yv12_to_bgra_altivec_c function. Thanks go to Hanspeter Niederstrasser for reporting.
/***************************************************************************** * * XVID MPEG-4 VIDEO CODEC * - Unit tests and benches - * * Copyright(C) 2002 Pascal Massimino <skal@planet-d.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * $Id$ * ****************************************************************************/ /***************************************************************************** * * 'Reference' output is at the end of file. * * compiles with something like: * gcc -o xvid_bench xvid_bench.c -I../src/ -lxvidcore -lm * ****************************************************************************/ #include <stdio.h> #include <stdlib.h> #include <string.h> /* for memset */ #include <assert.h> #ifndef WIN32 #include <sys/time.h> /* for gettimeofday */ #else #include <time.h> #endif #include "xvid.h" // inner guts #include "portab.h" #include "dct/idct.h" #include "dct/fdct.h" #include "image/colorspace.h" #include "image/interpolate8x8.h" #include "utils/mem_transfer.h" #include "quant/quant.h" #include "motion/sad.h" #include "utils/emms.h" #include "utils/timer.h" #include "quant/quant_matrix.c" #include "bitstream/cbp.h" #include "bitstream/bitstream.h" #include <math.h> #ifndef M_PI #define M_PI 3.14159265358979323846 #endif int speed_ref = 100; /* on slow machines, decrease this value */ int verbose = 0; unsigned int cpu_mask; /********************************************************************* * misc *********************************************************************/ /* returns time in micro-s*/ double gettime_usec() { #ifndef WIN32 struct timeval tv; gettimeofday(&tv, 0); return tv.tv_sec*1.0e6 + tv.tv_usec; #else clock_t clk; clk = clock(); return clk * 1000. / CLOCKS_PER_SEC; /* clock() returns time in Milliseconds */ #endif } /* returns squared deviates (mean(v*v)-mean(v)^2) of a 8x8 block */ double sqr_dev(uint8_t v[8*8]) { double sum=0.; double sum2=0.; int n; for (n=0;n<8*8;n++) { sum += v[n]; sum2 += v[n]*v[n]; } sum2 /= n; sum /= n; return sum2-sum*sum; } /********************************************************************* * cpu init *********************************************************************/ typedef struct { const char *name; unsigned int cpu; } CPU; CPU cpu_list[] = { { "PLAINC ", 0 }, #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) { "MMX ", XVID_CPU_MMX }, { "MMXEXT ", XVID_CPU_MMXEXT | XVID_CPU_MMX }, { "SSE2 ", XVID_CPU_SSE2 | XVID_CPU_MMX }, { "SSE3 ", XVID_CPU_SSE3 | XVID_CPU_SSE2 | XVID_CPU_MMX }, { "SSE41 ", XVID_CPU_SSE41| XVID_CPU_SSE3 | XVID_CPU_SSE2 | XVID_CPU_MMX }, { "3DNOW ", XVID_CPU_3DNOW }, { "3DNOWE ", XVID_CPU_3DNOW | XVID_CPU_3DNOWEXT }, #endif #ifdef ARCH_IS_PPC { "ALTIVEC", XVID_CPU_ALTIVEC }, #endif #ifdef ARCH_IS_IA64 // { "IA64 ", XVID_CPU_IA64 }, #endif // { "TSC ", XVID_CPU_TSC }, { 0, 0 } }; int init_cpu(CPU *cpu) { xvid_gbl_info_t xinfo; /* Get the available CPU flags */ memset(&xinfo, 0, sizeof(xinfo)); xinfo.version = XVID_VERSION; xvid_global(NULL, XVID_GBL_INFO, &xinfo, NULL); /* Are we trying to test a subset of the host CPU features */ if ((xinfo.cpu_flags & cpu->cpu) == cpu->cpu) { int xerr; xvid_gbl_init_t xinit; memset(&xinit, 0, sizeof(xinit)); xinit.cpu_flags = cpu->cpu | XVID_CPU_FORCE; xinit.version = XVID_VERSION; xerr = xvid_global(NULL, XVID_GBL_INIT, &xinit, NULL); if (xerr==XVID_ERR_FAIL) { /* libxvidcore failed to init */ return 0; } } else { /* The host CPU doesn't support some required feature for this test */ return(0); } return 1; } #define CRC32_REMAINDER 0xCBF43926 #define CRC32_INITIAL 0xffffffff #define DO1(c, crc) ((crc) = crc32tab[((unsigned int)((crc)>>24) ^ (*c++)) & 0xff] ^ ((crc) << 8)) #define DO2(c, crc) DO1(c, crc); DO1(c, crc); #define DO4(c, crc) DO2(c, crc); DO2(c, crc); #define DO8(c, crc) DO4(c, crc); DO4(c, crc); /****************************************************************************** * Precomputed AAL5 CRC32 lookup table ******************************************************************************/ static unsigned long crc32tab[256] = { 0x00000000L, 0x04C11DB7L, 0x09823B6EL, 0x0D4326D9L, 0x130476DCL, 0x17C56B6BL, 0x1A864DB2L, 0x1E475005L, 0x2608EDB8L, 0x22C9F00FL, 0x2F8AD6D6L, 0x2B4BCB61L, 0x350C9B64L, 0x31CD86D3L, 0x3C8EA00AL, 0x384FBDBDL, 0x4C11DB70L, 0x48D0C6C7L, 0x4593E01EL, 0x4152FDA9L, 0x5F15ADACL, 0x5BD4B01BL, 0x569796C2L, 0x52568B75L, 0x6A1936C8L, 0x6ED82B7FL, 0x639B0DA6L, 0x675A1011L, 0x791D4014L, 0x7DDC5DA3L, 0x709F7B7AL, 0x745E66CDL, 0x9823B6E0L, 0x9CE2AB57L, 0x91A18D8EL, 0x95609039L, 0x8B27C03CL, 0x8FE6DD8BL, 0x82A5FB52L, 0x8664E6E5L, 0xBE2B5B58L, 0xBAEA46EFL, 0xB7A96036L, 0xB3687D81L, 0xAD2F2D84L, 0xA9EE3033L, 0xA4AD16EAL, 0xA06C0B5DL, 0xD4326D90L, 0xD0F37027L, 0xDDB056FEL, 0xD9714B49L, 0xC7361B4CL, 0xC3F706FBL, 0xCEB42022L, 0xCA753D95L, 0xF23A8028L, 0xF6FB9D9FL, 0xFBB8BB46L, 0xFF79A6F1L, 0xE13EF6F4L, 0xE5FFEB43L, 0xE8BCCD9AL, 0xEC7DD02DL, 0x34867077L, 0x30476DC0L, 0x3D044B19L, 0x39C556AEL, 0x278206ABL, 0x23431B1CL, 0x2E003DC5L, 0x2AC12072L, 0x128E9DCFL, 0x164F8078L, 0x1B0CA6A1L, 0x1FCDBB16L, 0x018AEB13L, 0x054BF6A4L, 0x0808D07DL, 0x0CC9CDCAL, 0x7897AB07L, 0x7C56B6B0L, 0x71159069L, 0x75D48DDEL, 0x6B93DDDBL, 0x6F52C06CL, 0x6211E6B5L, 0x66D0FB02L, 0x5E9F46BFL, 0x5A5E5B08L, 0x571D7DD1L, 0x53DC6066L, 0x4D9B3063L, 0x495A2DD4L, 0x44190B0DL, 0x40D816BAL, 0xACA5C697L, 0xA864DB20L, 0xA527FDF9L, 0xA1E6E04EL, 0xBFA1B04BL, 0xBB60ADFCL, 0xB6238B25L, 0xB2E29692L, 0x8AAD2B2FL, 0x8E6C3698L, 0x832F1041L, 0x87EE0DF6L, 0x99A95DF3L, 0x9D684044L, 0x902B669DL, 0x94EA7B2AL, 0xE0B41DE7L, 0xE4750050L, 0xE9362689L, 0xEDF73B3EL, 0xF3B06B3BL, 0xF771768CL, 0xFA325055L, 0xFEF34DE2L, 0xC6BCF05FL, 0xC27DEDE8L, 0xCF3ECB31L, 0xCBFFD686L, 0xD5B88683L, 0xD1799B34L, 0xDC3ABDEDL, 0xD8FBA05AL, 0x690CE0EEL, 0x6DCDFD59L, 0x608EDB80L, 0x644FC637L, 0x7A089632L, 0x7EC98B85L, 0x738AAD5CL, 0x774BB0EBL, 0x4F040D56L, 0x4BC510E1L, 0x46863638L, 0x42472B8FL, 0x5C007B8AL, 0x58C1663DL, 0x558240E4L, 0x51435D53L, 0x251D3B9EL, 0x21DC2629L, 0x2C9F00F0L, 0x285E1D47L, 0x36194D42L, 0x32D850F5L, 0x3F9B762CL, 0x3B5A6B9BL, 0x0315D626L, 0x07D4CB91L, 0x0A97ED48L, 0x0E56F0FFL, 0x1011A0FAL, 0x14D0BD4DL, 0x19939B94L, 0x1D528623L, 0xF12F560EL, 0xF5EE4BB9L, 0xF8AD6D60L, 0xFC6C70D7L, 0xE22B20D2L, 0xE6EA3D65L, 0xEBA91BBCL, 0xEF68060BL, 0xD727BBB6L, 0xD3E6A601L, 0xDEA580D8L, 0xDA649D6FL, 0xC423CD6AL, 0xC0E2D0DDL, 0xCDA1F604L, 0xC960EBB3L, 0xBD3E8D7EL, 0xB9FF90C9L, 0xB4BCB610L, 0xB07DABA7L, 0xAE3AFBA2L, 0xAAFBE615L, 0xA7B8C0CCL, 0xA379DD7BL, 0x9B3660C6L, 0x9FF77D71L, 0x92B45BA8L, 0x9675461FL, 0x8832161AL, 0x8CF30BADL, 0x81B02D74L, 0x857130C3L, 0x5D8A9099L, 0x594B8D2EL, 0x5408ABF7L, 0x50C9B640L, 0x4E8EE645L, 0x4A4FFBF2L, 0x470CDD2BL, 0x43CDC09CL, 0x7B827D21L, 0x7F436096L, 0x7200464FL, 0x76C15BF8L, 0x68860BFDL, 0x6C47164AL, 0x61043093L, 0x65C52D24L, 0x119B4BE9L, 0x155A565EL, 0x18197087L, 0x1CD86D30L, 0x029F3D35L, 0x065E2082L, 0x0B1D065BL, 0x0FDC1BECL, 0x3793A651L, 0x3352BBE6L, 0x3E119D3FL, 0x3AD08088L, 0x2497D08DL, 0x2056CD3AL, 0x2D15EBE3L, 0x29D4F654L, 0xC5A92679L, 0xC1683BCEL, 0xCC2B1D17L, 0xC8EA00A0L, 0xD6AD50A5L, 0xD26C4D12L, 0xDF2F6BCBL, 0xDBEE767CL, 0xE3A1CBC1L, 0xE760D676L, 0xEA23F0AFL, 0xEEE2ED18L, 0xF0A5BD1DL, 0xF464A0AAL, 0xF9278673L, 0xFDE69BC4L, 0x89B8FD09L, 0x8D79E0BEL, 0x803AC667L, 0x84FBDBD0L, 0x9ABC8BD5L, 0x9E7D9662L, 0x933EB0BBL, 0x97FFAD0CL, 0xAFB010B1L, 0xAB710D06L, 0xA6322BDFL, 0xA2F33668L, 0xBCB4666DL, 0xB8757BDAL, 0xB5365D03L, 0xB1F740B4L }; uint32_t calc_crc(uint8_t *mem, int len, uint32_t crc) { while( len >= 8) { DO8(mem, crc); len -= 8; } while( len ) { DO1(mem, crc); len--; } return crc; } void byte_swap(uint8_t *mem, int len, int element_size) { #ifdef ARCH_IS_BIG_ENDIAN int i; if(element_size == 1) { /* No need to swap */ } else if(element_size == 2) { uint8_t temp[2]; for(i=0; i < (len/2); i++ ) { temp[0] = mem[0]; temp[1] = mem[1]; mem[0] = temp[1]; mem[1] = temp[0]; mem += 2; } } else if(element_size == 4) { uint8_t temp[4]; for(i=0; i < (len/4); i++ ) { temp[0] = mem[0]; temp[1] = mem[1]; temp[2] = mem[2]; temp[3] = mem[3]; mem[0] = temp[3]; mem[1] = temp[2]; mem[2] = temp[1]; mem[3] = temp[0]; mem += 4; } } else { printf("ERROR: byte_swap unsupported element_size(%u)\n", element_size); } #endif } /********************************************************************* * test DCT *********************************************************************/ #define ABS(X) ((X)<0 ? -(X) : (X)) void test_dct() { const int nb_tests = 300*speed_ref; int tst; CPU *cpu; int i; DECLARE_ALIGNED_MATRIX(iDst0, 8, 8, short, 16); DECLARE_ALIGNED_MATRIX(iDst, 8, 8, short, 16); DECLARE_ALIGNED_MATRIX(fDst, 8, 8, short, 16); double overhead; printf( "\n ===== test fdct/idct =====\n" ); for(i=0; i<8*8; ++i) iDst0[i] = (i*7-i*i) & 0x7f; overhead = gettime_usec(); for(tst=0; tst<nb_tests; ++tst) { for(i=0; i<8*8; ++i) fDst[i] = iDst0[i]; for(i=0; i<8*8; ++i) iDst[i] = fDst[i]; } overhead = gettime_usec() - overhead; for(cpu = cpu_list; cpu->name!=0; ++cpu) { double t, PSNR, MSE; if (!init_cpu(cpu)) continue; t = gettime_usec(); emms(); for(tst=0; tst<nb_tests; ++tst) { for(i=0; i<8*8; ++i) fDst[i] = iDst0[i]; fdct(fDst); for(i=0; i<8*8; ++i) iDst[i] = fDst[i]; idct(iDst); } emms(); t = (gettime_usec() - t - overhead) / nb_tests; MSE = 0.; for(i=0; i<8*8; ++i) { double delta = 1.0*(iDst[i] - iDst0[i]); MSE += delta*delta; } PSNR = (MSE==0.) ? 1.e6 : -4.3429448*log( MSE/64. ); printf( "%s - %.3f usec PSNR=%.3f MSE=%.3f %s\n", cpu->name, t, PSNR, MSE, (ABS(MSE)>=64)? "| ERROR" :""); } } /********************************************************************* * test SAD *********************************************************************/ void test_sad() { const int nb_tests = 2000*speed_ref; int tst; CPU *cpu; int i; DECLARE_ALIGNED_MATRIX(Cur, 16, 16, uint8_t, 16); DECLARE_ALIGNED_MATRIX(Ref1, 16, 16, uint8_t, 16); DECLARE_ALIGNED_MATRIX(Ref2, 16, 16, uint8_t, 16); printf( "\n ====== test SAD ======\n" ); for(i=0; i<16*16;++i) { Cur[i] = (i/5) ^ 0x05; Ref1[i] = (i + 0x0b) & 0xff; Ref2[i] = i ^ 0x76; } for(cpu = cpu_list; cpu->name!=0; ++cpu) { double t; uint32_t s; if (!init_cpu(cpu)) continue; t = gettime_usec(); emms(); for(tst=0; tst<nb_tests; ++tst) s = sad8(Cur, Ref1, 16); emms(); t = (gettime_usec() - t) / nb_tests; printf("%s - sad8 %.3f usec sad=%d %s\n", cpu->name, t, s, (s!=3776)?"| ERROR": "" ); t = gettime_usec(); emms(); for(tst=0; tst<nb_tests; ++tst) s = sad16(Cur, Ref1, 16, -1); emms(); t = (gettime_usec() - t) / nb_tests; printf("%s - sad16 %.3f usec sad=%d %s\n", cpu->name, t, s, (s!=27214)?"| ERROR": "" ); t = gettime_usec(); emms(); for(tst=0; tst<nb_tests; ++tst) s = sad16bi(Cur, Ref1, Ref2, 16); emms(); t = (gettime_usec() - t) / nb_tests; printf( "%s - sad16bi %.3f usec sad=%d %s\n", cpu->name, t, s, (s!=26274)?"| ERROR": "" ); t = gettime_usec(); emms(); for(tst=0; tst<nb_tests; ++tst) s = sad8bi(Cur, Ref1, Ref2, 8); emms(); t = (gettime_usec() - t) / nb_tests; printf( "%s - sad8bi %.3f usec sad=%d %s\n", cpu->name, t, s, (s!=4002)?"| ERROR": "" ); t = gettime_usec(); emms(); for(tst=0; tst<nb_tests; ++tst) s = dev16(Cur, 16); emms(); t = (gettime_usec() - t) / nb_tests; printf( "%s - dev16 %.3f usec sad=%d %s\n", cpu->name, t, s, (s!=3344)?"| ERROR": "" ); printf( " --- \n" ); } } /********************************************************************* * test interpolation *********************************************************************/ #define ENTER \ for(i=0; i<16*8; ++i) Dst[i] = 0; \ t = gettime_usec(); \ emms(); #define LEAVE \ emms(); \ t = (gettime_usec() - t) / nb_tests; \ iCrc = calc_crc((uint8_t*)Dst, sizeof(Dst), CRC32_INITIAL) #define TEST_MB(FUNC, R) \ ENTER \ for(tst=0; tst<nb_tests; ++tst) (FUNC)(Dst, Src0, 16, (R)); \ LEAVE #define TEST_MB2(FUNC) \ ENTER \ for(tst=0; tst<nb_tests; ++tst) (FUNC)(Dst, Src0, 16); \ LEAVE void test_mb() { const int nb_tests = 2000*speed_ref; CPU *cpu; const uint8_t Src0[16*9] = { /* try to have every possible combinaison of rounding... */ 0, 0, 1, 0, 2, 0, 3, 0, 4 ,0,0,0, 0,0,0,0, 0, 1, 1, 1, 2, 1, 3, 1, 3 ,0,0,0, 0,0,0,0, 0, 2, 1, 2, 2, 2, 3, 2, 2 ,0,0,0, 0,0,0,0, 0, 3, 1, 3, 2, 3, 3, 3, 1 ,0,0,0, 0,0,0,0, 1, 3, 0, 2, 1, 0, 2, 3, 4 ,0,0,0, 0,0,0,0, 2, 2, 1, 2, 0, 1, 3, 5, 3 ,0,0,0, 0,0,0,0, 3, 1, 2, 3, 1, 2, 2, 6, 2 ,0,0,0, 0,0,0,0, 1, 0, 1, 3, 0, 3, 1, 6, 1 ,0,0,0, 0,0,0,0, 4, 3, 2, 1, 2, 3, 4, 0, 3 ,0,0,0, 0,0,0,0 }; uint8_t Dst[16*8] = {0}; printf( "\n === test block motion ===\n" ); for(cpu = cpu_list; cpu->name!=0; ++cpu) { double t; int tst, i, iCrc; if (!init_cpu(cpu)) continue; TEST_MB(interpolate8x8_halfpel_h, 0); printf("%s - interp- h-round0 %.3f usec crc32=0x%08x %s\n", cpu->name, t, iCrc, (iCrc!=0x115381ba)?"| ERROR": "" ); TEST_MB(interpolate8x8_halfpel_h, 1); printf("%s - round1 %.3f usec crc32=0x%08x %s\n", cpu->name, t, iCrc, (iCrc!=0x2b1f528f)?"| ERROR": "" ); TEST_MB(interpolate8x8_halfpel_v, 0); printf("%s - interp- v-round0 %.3f usec crc32=0x%08x %s\n", cpu->name, t, iCrc, (iCrc!=0x423cdcc7)?"| ERROR": "" ); TEST_MB(interpolate8x8_halfpel_v, 1); printf("%s - round1 %.3f usec crc32=0x%08x %s\n", cpu->name, t, iCrc, (iCrc!=0x42202efe)?"| ERROR": "" ); TEST_MB(interpolate8x8_halfpel_hv, 0); printf("%s - interp-hv-round0 %.3f usec crc32=0x%08x %s\n", cpu->name, t, iCrc, (iCrc!=0xd198d387)?"| ERROR": "" ); TEST_MB(interpolate8x8_halfpel_hv, 1); printf("%s - round1 %.3f usec crc32=0x%08x %s\n", cpu->name, t, iCrc, (iCrc!=0x9ecfd921)?"| ERROR": "" ); /* this is a new function, as of 06.06.2002 */ #if 0 TEST_MB2(interpolate8x8_avrg); printf("%s - interpolate8x8_c %.3f usec crc32=0x%08x %s\n", cpu->name, t, iCrc, (iCrc!=8107)?"| ERROR": "" ); #endif /* New functions for field prediction by CK 1.10.2005 */ #pragma NEW8X4 TEST_MB(interpolate8x4_halfpel_h, 0); printf("%s - interpfield-h -round0 %.3f usec crc32=0x%08x %s\n", cpu->name, t, iCrc, (iCrc!=0x9538d6df)?"| ERROR": "" ); TEST_MB(interpolate8x4_halfpel_h, 1); printf("%s - round1 %.3f usec crc32=0x%08x %s\n", cpu->name, t, iCrc, (iCrc!=0xde5f1db4)?"| ERROR": "" ); TEST_MB(interpolate8x4_halfpel_v, 0); printf("%s - interpfield- v-round0 %.3f usec crc32=0x%08x %s\n", cpu->name, t, iCrc, (iCrc!=0xea5a69ef)?"| ERROR": "" ); TEST_MB(interpolate8x4_halfpel_v, 1); printf("%s - round1 %.3f usec crc32=0x%08x %s\n", cpu->name, t, iCrc, (iCrc!=0x4f10ec0f)?"| ERROR": "" ); TEST_MB(interpolate8x4_halfpel_hv, 0); printf("%s - interpfield-hv-round0 %.3f usec crc32=0x%08x %s\n", cpu->name, t, iCrc, (iCrc!=0xf97ee367)?"| ERROR": "" ); TEST_MB(interpolate8x4_halfpel_hv, 1); printf("%s - round1 %.3f usec crc32=0x%08x %s\n", cpu->name, t, iCrc, (iCrc!=0xb6a9f581)?"| ERROR": "" ); /* End of 8x4 functions */ printf( " --- \n" ); } } #undef ENTER #undef LEAVE #undef TEST_MB #undef TEST_MB2 /********************************************************************* * test transfer *********************************************************************/ #define INIT_TRANSFER \ for(i=0; i<8*32; ++i) { \ Src8[i] = i; Src16[i] = i; \ Dst8[i] = 0; Dst16[i] = 0; \ Ref1[i] = i^0x27; \ Ref2[i] = i^0x51; \ } #define TEST_TRANSFER_BEGIN(DST) \ INIT_TRANSFER \ overhead = -gettime_usec(); \ for(tst=0; tst<nb_tests; ++tst) { \ for(i=0; i<8*32; ++i) (DST)[i] = i^0x6a;\ } \ overhead += gettime_usec(); \ t = gettime_usec(); \ emms(); \ for(tst=0; tst<nb_tests; ++tst) { \ for(i=0; i<8*32; ++i) (DST)[i] = i^0x6a; #define TEST_TRANSFER_END(DST) \ } \ emms(); \ t = (gettime_usec()-t -overhead) / nb_tests;\ byte_swap((uint8_t*)(DST), 8*32*sizeof((DST)[0]), sizeof((DST)[0])); \ s = calc_crc((uint8_t*)(DST), 8*32*sizeof((DST)[0]), CRC32_INITIAL) #define TEST_TRANSFER(FUNC, DST, SRC) \ TEST_TRANSFER_BEGIN(DST); \ (FUNC)((DST), (SRC), 32); \ TEST_TRANSFER_END(DST) #define TEST_TRANSFER2_BEGIN(DST, SRC) \ INIT_TRANSFER \ overhead = -gettime_usec(); \ for(tst=0; tst<nb_tests; ++tst) { \ for(i=0; i<8*32; ++i) (DST)[i] = i^0x6a;\ for(i=0; i<8*32; ++i) (SRC)[i] = i^0x3e;\ } \ overhead += gettime_usec(); \ t = gettime_usec(); \ emms(); \ for(tst=0; tst<nb_tests; ++tst) { \ for(i=0; i<8*32; ++i) (DST)[i] = i^0x6a;\ for(i=0; i<8*32; ++i) (SRC)[i] = i^0x3e; #define TEST_TRANSFER2_END(DST) \ } \ emms(); \ t = (gettime_usec()-t -overhead) / nb_tests;\ byte_swap((uint8_t*)(DST), 8*32*sizeof((DST)[0]), sizeof((DST)[0])); \ s = calc_crc((uint8_t*)(DST), 8*32*sizeof((DST)[0]), CRC32_INITIAL) #define TEST_TRANSFER2(FUNC, DST, SRC, R1) \ TEST_TRANSFER2_BEGIN(DST,SRC); \ (FUNC)((DST), (SRC), (R1), 32); \ TEST_TRANSFER2_END(DST) #define TEST_TRANSFER3(FUNC, DST, SRC, R1, R2)\ TEST_TRANSFER_BEGIN(DST); \ (FUNC)((DST), (SRC), (R1), (R2), 32); \ TEST_TRANSFER_END(DST) void test_transfer() { const int nb_tests = 4000*speed_ref; int i; CPU *cpu; // uint8_t Src8[8*32], Dst8[8*32], Ref1[8*32], Ref2[8*32]; // int16_t Src16[8*32], Dst16[8*32]; DECLARE_ALIGNED_MATRIX(Src8, 8, 32, uint8_t, CACHE_LINE); DECLARE_ALIGNED_MATRIX(Dst8, 8, 32, uint8_t, CACHE_LINE); DECLARE_ALIGNED_MATRIX(Ref1, 8, 32, uint8_t, CACHE_LINE); DECLARE_ALIGNED_MATRIX(Ref2, 8, 32, uint8_t, CACHE_LINE); DECLARE_ALIGNED_MATRIX(Src16, 8, 32, uint16_t, CACHE_LINE); DECLARE_ALIGNED_MATRIX(Dst16, 8, 32, uint16_t, CACHE_LINE); printf( "\n === test transfer ===\n" ); for(cpu = cpu_list; cpu->name!=0; ++cpu) { double t, overhead; int tst, s; if (!init_cpu(cpu)) continue; TEST_TRANSFER(transfer_8to16copy, Dst16, Src8); printf("%s - 8to16 %.3f usec crc32=0x%08x %s\n", cpu->name, t, s, (s!=0x115814bb)?"| ERROR": ""); TEST_TRANSFER(transfer_16to8copy, Dst8, Src16); printf( "%s - 16to8 %.3f usec crc32=0x%08x %s\n", cpu->name, t, s, (s!=0xee7ccbb4)?"| ERROR": ""); /* New functions for field prediction by CK 1.10.2005 */ #pragma NEW8X4 TEST_TRANSFER(transfer8x4_copy, Dst8, Src8); printf("%s - 8to4 %.3f usec crc32=0x%08x %s\n", cpu->name, t, s, (s!=0xbb9c3db5)?"| ERROR": ""); /* End of new functions */ TEST_TRANSFER(transfer8x8_copy, Dst8, Src8); printf("%s - 8to8 %.3f usec crc32=0x%08x %s\n", cpu->name, t, s, (s!=0xd37b3295)?"| ERROR": ""); TEST_TRANSFER(transfer_16to8add, Dst8, Src16); printf("%s - 16to8add %.3f usec crc32=0x%08x %s\n", cpu->name, t, s, (s!=0xdd817bf4)?"| ERROR": "" ); TEST_TRANSFER2(transfer_8to16sub, Dst16, Src8, Ref1); { int s1, s2; s1 = calc_crc((uint8_t*)Dst16, 8*32*sizeof(Dst16[0]), CRC32_INITIAL); s2 = calc_crc((uint8_t*)Src8, 8*32*sizeof(Src8[0]), CRC32_INITIAL); printf("%s - 8to16sub %.3f usec crc32(1)=0x%08x crc32(2)=0x%08x %s %s\n", cpu->name, t, s1, s2, (s1!=0xa1e07163)?"| ERROR1": "", (s2!=0xd86c5d23)?"| ERROR2": "" ); } TEST_TRANSFER3(transfer_8to16sub2, Dst16, Src8, Ref1, Ref2); printf("%s - 8to16sub2 %.3f usec crc32=0x%08x %s\n", cpu->name, t, s, (s!=0x99b6c4c7)?"| ERROR": "" ); printf( " --- \n" ); } } /********************************************************************* * test quantization *********************************************************************/ #define TEST_QUANT(FUNC, DST, SRC) \ t = gettime_usec(); \ for(s=CRC32_INITIAL,qm=1; qm<=255; ++qm) { \ for(i=0; i<8*8; ++i) Quant[i] = qm; \ set_inter_matrix( mpeg_quant_matrices, Quant ); \ emms(); \ for(q=1; q<=max_Q; ++q) { \ for(tst=0; tst<nb_tests; ++tst) \ (FUNC)((DST), (SRC), q, mpeg_quant_matrices); \ byte_swap((uint8_t*)(DST), 64*sizeof((DST)[0]), sizeof((DST)[0])); \ s = calc_crc((uint8_t*)(DST), 64*sizeof((DST)[0]), s); \ } \ emms(); \ } \ t = (gettime_usec()-t-overhead)/nb_tests/qm #define TEST_QUANT2(FUNC, DST, SRC) \ t = gettime_usec(); \ for(s=CRC32_INITIAL,qm=1; qm<=255; ++qm) { \ for(i=0; i<8*8; ++i) Quant[i] = qm; \ set_intra_matrix( mpeg_quant_matrices, Quant ); \ emms(); \ for(q=1; q<=max_Q; ++q) { \ init_intra_matrix( mpeg_quant_matrices, q ); \ for(tst=0; tst<nb_tests; ++tst) \ (FUNC)((DST), (SRC), q, q, mpeg_quant_matrices); \ byte_swap((uint8_t*)(DST), 64*sizeof((DST)[0]), sizeof((DST)[0])); \ s = calc_crc((uint8_t*)(DST), 64*sizeof((DST)[0]), s); \ } \ emms(); \ } \ t = (gettime_usec()-t-overhead)/nb_tests/qm #define TEST_INTRA(REFFUNC, NEWFUNC, RANGE) \ { int32_t i,q,s;\ DECLARE_ALIGNED_MATRIX(Src, 8, 8, int16_t, 16); \ DECLARE_ALIGNED_MATRIX(Dst, 8, 8, int16_t, 16); \ DECLARE_ALIGNED_MATRIX(Dst2,8, 8, int16_t, 16); \ for(q=1;q<=max_Q;q++) \ for(s=-RANGE;s<RANGE;s++) { \ for(i=0;i<64;i++) Src[i]=s; \ (REFFUNC)((Dst),(Src),q,q,mpeg_quant_matrices); \ (NEWFUNC)((Dst2),(Src),q,q,mpeg_quant_matrices); \ for(i=0;i<64;i++) \ if(Dst[i]!=Dst2[i]) printf("ERROR : " #NEWFUNC " i%d quant:%d input:%d C_result:%d ASM_result:%d\n",i,q,s,Dst[i],Dst2[i]); \ } \ } #define TEST_INTER(REFFUNC, NEWFUNC, RANGE) \ { int i,q,s; \ DECLARE_ALIGNED_MATRIX(Src, 8, 8, int16_t, 16); \ DECLARE_ALIGNED_MATRIX(Dst, 8, 8, int16_t, 16); \ DECLARE_ALIGNED_MATRIX(Dst2,8, 8, int16_t, 16); \ for(q=1;q<=max_Q;q++) \ for(s=-RANGE;s<RANGE;s++) { \ for(i=0;i<64;i++) Src[i]=s; \ (REFFUNC)((Dst),(Src),q,mpeg_quant_matrices); \ (NEWFUNC)((Dst2),(Src),q,mpeg_quant_matrices); \ emms(); \ for(i=0;i<64;i++) \ if(Dst[i]!=Dst2[i]) printf("ERROR : " #NEWFUNC " i%d quant:%d input:%d C_result:%d ASM_result:%d\n",i,q,s,Dst[i],Dst2[i]); \ } \ } void test_quant() { const int32_t nb_tests = 1*speed_ref; const int32_t max_Q = 31; DECLARE_ALIGNED_MATRIX(mpeg_quant_matrices, 8, 64, uint16_t, 16); int32_t i, qm; CPU *cpu; DECLARE_ALIGNED_MATRIX(Src, 8, 8, int16_t, 16); DECLARE_ALIGNED_MATRIX(Dst, 8, 8, int16_t, 16); DECLARE_ALIGNED_MATRIX(Dst2,8, 8, int16_t, 16); uint8_t Quant[8*8]; printf( "\n ===== test quant =====\n" ); /* we deliberately enfringe the norm's specified range [-127,127], */ /* to test the robustness of the iquant module */ for(i=0; i<64; ++i) { Src[i] = 1 + (i-32) * (i&6); Dst[i] = 0; } for(cpu = cpu_list; cpu->name!=0; ++cpu) { double t, overhead; int32_t tst, q; uint32_t s; if (!init_cpu(cpu)) continue; // exhaustive tests to compare against the (ref) C-version TEST_INTRA(quant_h263_intra_c, quant_h263_intra, 2048); TEST_INTRA(dequant_h263_intra_c, dequant_h263_intra , 512 ); TEST_INTER(quant_h263_inter_c, quant_h263_inter , 2048); TEST_INTER(dequant_h263_inter_c, dequant_h263_inter , 512 ); overhead = -gettime_usec(); for(s=0,qm=1; qm<=255; ++qm) { for(i=0; i<8*8; ++i) Quant[i] = qm; set_inter_matrix(mpeg_quant_matrices, Quant ); for(q=1; q<=max_Q; ++q) for(i=0; i<64; ++i) s+=Dst[i]^i^qm; } overhead += gettime_usec(); TEST_QUANT2(quant_mpeg_intra, Dst, Src); printf("%s - quant_mpeg_intra %.3f usec crc32=0x%08x %s\n", cpu->name, t, s, (s!=0x3b999af6)? "| ERROR": ""); TEST_QUANT(quant_mpeg_inter, Dst, Src); printf("%s - quant_mpeg_inter %.3f usec crc32=0x%08x %s\n", cpu->name, t, s, (s!=0xf6de7757)?"| ERROR": ""); TEST_QUANT2(dequant_mpeg_intra, Dst, Src); printf("%s - dequant_mpeg_intra %.3f usec crc32=0x%08x %s\n", cpu->name, t, s, (s!=0x2def7bc7)?"| ERROR": ""); TEST_QUANT(dequant_mpeg_inter, Dst, Src); printf("%s - dequant_mpeg_inter %.3f usec crc32=0x%08x %s\n", cpu->name, t, s, (s!=0xd878c722)?"| ERROR": ""); TEST_QUANT2(quant_h263_intra, Dst, Src); printf("%s - quant_h263_intra %.3f usec crc32=0x%08x %s\n", cpu->name, t, s, (s!=0x2eba9d43)?"| ERROR": ""); TEST_QUANT(quant_h263_inter, Dst, Src); printf("%s - quant_h263_inter %.3f usec crc32=0x%08x %s\n", cpu->name, t, s, (s!=0xbd315a7e)?"| ERROR": ""); TEST_QUANT2(dequant_h263_intra, Dst, Src); printf("%s - dequant_h263_intra %.3f usec crc32=0x%08x %s\n", cpu->name, t, s, (s!=0x9841212a)?"| ERROR": ""); TEST_QUANT(dequant_h263_inter, Dst, Src); printf("%s - dequant_h263_inter %.3f usec crc32=0x%08x %s\n", cpu->name, t, s, (s!=0xe7df8fba)?"| ERROR": ""); printf( " --- \n" ); } } /********************************************************************* * test distortion operators *********************************************************************/ static void ieee_reseed(long s); static long ieee_rand(int Min, int Max); #define TEST_SSE(FUNCTION, SRC1, SRC2, STRIDE) \ do { \ t = gettime_usec(); \ tst = nb_tests; \ while((tst--)>0) sse = (FUNCTION)((SRC1), (SRC2), (STRIDE)); \ emms(); \ t = (gettime_usec() - t)/(double)nb_tests; \ } while(0) void test_sse() { const int nb_tests = 100000*speed_ref; int i; CPU *cpu; DECLARE_ALIGNED_MATRIX(Src1, 8, 8, int16_t, 16); DECLARE_ALIGNED_MATRIX(Src2, 8, 8, int16_t, 16); DECLARE_ALIGNED_MATRIX(Src3, 8, 8, int16_t, 16); DECLARE_ALIGNED_MATRIX(Src4, 8, 8, int16_t, 16); printf( "\n ===== test sse =====\n" ); ieee_reseed(1); for(i=0; i<64; ++i) { Src1[i] = ieee_rand(-2048, 2047); Src2[i] = ieee_rand(-2048, 2047); Src3[i] = ieee_rand(-2048, 2047); Src4[i] = ieee_rand(-2048, 2047); } for(cpu = cpu_list; cpu->name!=0; ++cpu) { double t; int tst, sse; if (!init_cpu(cpu)) continue; /* 16 bit element blocks */ TEST_SSE(sse8_16bit, Src1, Src2, 16); printf("%s - sse8_16bit#1 %.3f usec sse=%d %s\n", cpu->name, t, sse, (sse!=182013834)?"| ERROR": ""); TEST_SSE(sse8_16bit, Src1, Src3, 16); printf("%s - sse8_16bit#2 %.3f usec sse=%d %s\n", cpu->name, t, sse, (sse!=142545203)?"| ERROR": ""); TEST_SSE(sse8_16bit, Src1, Src4, 16); printf("%s - sse8_16bit#3 %.3f usec sse=%d %s\n", cpu->name, t, sse, (sse!=146340935)?"| ERROR": ""); TEST_SSE(sse8_16bit, Src2, Src3, 16); printf("%s - sse8_16bit#4 %.3f usec sse=%d %s\n", cpu->name, t, sse, (sse!=130136661)?"| ERROR": ""); TEST_SSE(sse8_16bit, Src2, Src4, 16); printf("%s - sse8_16bit#5 %.3f usec sse=%d %s\n", cpu->name, t, sse, (sse!=136870353)?"| ERROR": ""); TEST_SSE(sse8_16bit, Src3, Src4, 16); printf("%s - sse8_16bit#6 %.3f usec sse=%d %s\n", cpu->name, t, sse, (sse!=164107772)?"| ERROR": ""); /* 8 bit element blocks */ TEST_SSE(sse8_8bit, (int8_t*)Src1, (int8_t*)Src2, 8); printf("%s - sse8_8bit#1 %.3f usec sse=%d %s\n", cpu->name, t, sse, (sse!=1356423)?"| ERROR": ""); TEST_SSE(sse8_8bit, (int8_t*)Src1, (int8_t*)Src3, 8); printf("%s - sse8_8bit#2 %.3f usec sse=%d %s\n", cpu->name, t, sse, (sse!=1173074)?"| ERROR": ""); TEST_SSE(sse8_8bit, (int8_t*)Src1, (int8_t*)Src4, 8); printf("%s - sse8_8bit#3 %.3f usec sse=%d %s\n", cpu->name, t, sse, (sse!=1092357)?"| ERROR": ""); TEST_SSE(sse8_8bit, (int8_t*)Src2, (int8_t*)Src3, 8); printf("%s - sse8_8bit#4 %.3f usec sse=%d %s\n", cpu->name, t, sse, (sse!=1360239)?"| ERROR": ""); TEST_SSE(sse8_8bit, (int8_t*)Src2, (int8_t*)Src4, 8); printf("%s - sse8_8bit#5 %.3f usec sse=%d %s\n", cpu->name, t, sse, (sse!=1208414)?"| ERROR": ""); TEST_SSE(sse8_8bit, (int8_t*)Src3, (int8_t*)Src4, 8); printf("%s - sse8_8bit#6 %.3f usec sse=%d %s\n", cpu->name, t, sse, (sse!=1099285)?"| ERROR": ""); printf(" ---\n"); } } /********************************************************************* * test non-zero AC counting *********************************************************************/ #define TEST_CBP(FUNC, SRC, NB) \ t = gettime_usec(); \ emms(); \ for(tst=0; tst<NB; ++tst) { \ cbp = (FUNC)((SRC)); \ } \ emms(); \ t = (gettime_usec()-t ) / nb_tests; void test_cbp() { const int nb_tests = 10000*speed_ref; int i, n, m; CPU *cpu; DECLARE_ALIGNED_MATRIX(Src1, 6, 64, int16_t, 16); DECLARE_ALIGNED_MATRIX(Src2, 6, 64, int16_t, 16); DECLARE_ALIGNED_MATRIX(Src3, 6, 64, int16_t, 16); DECLARE_ALIGNED_MATRIX(Src4, 6, 64, int16_t, 16); DECLARE_ALIGNED_MATRIX(Src5, 6, 64, int16_t, 16); printf( "\n ===== test cbp =====\n" ); for(i=0; i<6*64; ++i) { Src1[i] = (i*i*3/8192)&(i/64)&1; /* 'random' */ Src2[i] = (i<3*64); /* half-full */ Src3[i] = ((i+32)>3*64); Src4[i] = (i==(3*64+2) || i==(5*64+9)); Src5[i] = ieee_rand(0,1) ? -1 : 1; /* +/- test */ } for(cpu = cpu_list; cpu->name!=0; ++cpu) { double t; int tst, cbp; if (!init_cpu(cpu)) continue; TEST_CBP(calc_cbp, Src1, nb_tests); printf("%s - calc_cbp#1 %.3f usec cbp=0x%02x %s\n", cpu->name, t, cbp, (cbp!=0x15)?"| ERROR": ""); TEST_CBP(calc_cbp, Src2, nb_tests); printf("%s - calc_cbp#2 %.3f usec cbp=0x%02x %s\n", cpu->name, t, cbp, (cbp!=0x38)?"| ERROR": ""); TEST_CBP(calc_cbp, Src3, nb_tests); printf("%s - calc_cbp#3 %.3f usec cbp=0x%02x %s\n", cpu->name, t, cbp, (cbp!=0x0f)?"| ERROR": "" ); TEST_CBP(calc_cbp, Src4, nb_tests); printf("%s - calc_cbp#4 %.3f usec cbp=0x%02x %s\n", cpu->name, t, cbp, (cbp!=0x05)?"| ERROR": "" ); TEST_CBP(calc_cbp, Src5, nb_tests); printf("%s - calc_cbp#4 %.3f usec cbp=0x%02x %s\n", cpu->name, t, cbp, (cbp!=0x3f)?"| ERROR": "" ); printf( " --- \n" ); } for(cpu = cpu_list; cpu->name!=0; ++cpu) /* bench suggested by Carlo (carlo dot bramix at libero dot it) */ { double t; int tst, cbp, err; if (!init_cpu(cpu)) continue; err = 0; for(n=0; n<6; ++n) { for(m=0; m<64; ++m) { for(i=0; i<6*64; ++i) Src1[i] = (i== (m + n*64)); TEST_CBP(calc_cbp, Src1, 1); if (cbp!= (((m!=0)<<(5-n)))) { printf( "%s - calc_cbp#5: ERROR at pos %d / %d!\n", cpu->name, n, m); err = 1; break; } } } if (!err) printf( " %s - calc_cbp#5 : OK\n", cpu->name ); } } /********************************************************************* * fdct/idct IEEE1180 compliance *********************************************************************/ typedef struct { long Errors[64]; long Sqr_Errors[64]; long Max_Errors[64]; long Nb; } STATS_8x8; void init_stats(STATS_8x8 *S) { int i; for(i=0; i<64; ++i) { S->Errors[i] = 0; S->Sqr_Errors[i] = 0; S->Max_Errors[i] = 0; } S->Nb = 0; } void store_stats(STATS_8x8 *S, short Blk[64], short Ref[64]) { int i; for(i=0; i<64; ++i) { short Err = Blk[i] - Ref[i]; S->Errors[i] += Err; S->Sqr_Errors[i] += Err * Err; if (Err<0) Err = -Err; if (S->Max_Errors[i]<Err) S->Max_Errors[i] = Err; } S->Nb++; } void print_stats(STATS_8x8 *S) { int i; double Norm; assert(S->Nb>0); Norm = 1. / (double)S->Nb; printf("\n== Max absolute values of errors ==\n"); for(i=0; i<64; i++) { printf(" %4ld", S->Max_Errors[i]); if ((i&7)==7) printf("\n"); } printf("\n== Mean square errors ==\n"); for(i=0; i<64; i++) { double Err = Norm * (double)S->Sqr_Errors[i]; printf(" %.3f", Err); if ((i&7)==7) printf("\n"); } printf("\n== Mean errors ==\n"); for(i=0; i<64; i++) { double Err = Norm * (double)S->Errors[i]; printf(" %.3f", Err); if ((i&7)==7) printf("\n"); } printf("\n"); } static const char *CHECK(double v, double l) { if (fabs(v)<=l) return "ok"; else return "FAIL!"; } void report_stats(STATS_8x8 *S, const double *Limits) { int i; double Norm, PE, PMSE, OMSE, PME, OME; assert(S->Nb>0); Norm = 1. / (double)S->Nb; PE = 0.; for(i=0; i<64; i++) { if (PE<S->Max_Errors[i]) PE = S->Max_Errors[i]; } PMSE = 0.; OMSE = 0.; for(i=0; i<64; i++) { double Err = Norm * (double)S->Sqr_Errors[i]; OMSE += Err; if (PMSE < Err) PMSE = Err; } OMSE /= 64.; PME = 0.; OME = 0.; for(i=0; i<64; i++) { double Err = Norm * (double)S->Errors[i]; OME += Err; Err = fabs(Err); if (PME < Err) PME = Err; } OME /= 64.; printf( "Peak error: %4.4f\n", PE ); printf( "Peak MSE: %4.4f\n", PMSE ); printf( "Overall MSE: %4.4f\n", OMSE ); printf( "Peak ME: %4.4f\n", PME ); printf( "Overall ME: %4.4f\n", OME ); if (Limits!=0) { printf( "[PE<=%.4f %s] ", Limits[0], CHECK(PE, Limits[0]) ); printf( "\n" ); printf( "[PMSE<=%.4f %s]", Limits[1], CHECK(PMSE, Limits[1]) ); printf( "[OMSE<=%.4f %s]", Limits[2], CHECK(OMSE, Limits[2]) ); printf( "\n" ); printf( "[PME<=%.4f %s] ", Limits[3], CHECK(PME , Limits[3]) ); printf( "[OME<=%.4f %s] ", Limits[4], CHECK(OME , Limits[4]) ); printf( "\n" ); } } ///* ////////////////////////////////////////////////////// */ /* Pseudo-random generator specified by IEEE 1180 */ static long ieee_seed = 1; static void ieee_reseed(long s) { ieee_seed = s; } static long ieee_rand(int Min, int Max) { static double z = (double) 0x7fffffff; long i,j; double x; ieee_seed = (ieee_seed * 1103515245) + 12345; i = ieee_seed & 0x7ffffffe; x = ((double) i) / z; x *= (Max-Min+1); j = (long)x; j = j + Min; assert(j>=Min && j<=Max); return (short)j; } #define CLAMP(x, M) (x) = ((x)<-(M)) ? (-(M)) : ((x)>=(M) ? ((M)-1) : (x)) static double Cos[8][8]; static void init_ref_dct() { int i, j; for(i=0; i<8; i++) { double scale = (i == 0) ? sqrt(0.125) : 0.5; for (j=0; j<8; j++) Cos[i][j] = scale*cos( (M_PI/8.0)*i*(j + 0.5) ); } } void ref_idct(short *M) { int i, j, k; double Tmp[8][8]; for(i=0; i<8; i++) { for(j=0; j<8; j++) { double Sum = 0.0; for (k=0; k<8; k++) Sum += Cos[k][j]*M[8*i+k]; Tmp[i][j] = Sum; } } for(i=0; i<8; i++) { for(j=0; j<8; j++) { double Sum = 0.0; for (k=0; k<8; k++) Sum += Cos[k][i]*Tmp[k][j]; M[8*i+j] = (short)floor(Sum + .5); } } } void ref_fdct(short *M) { int i, j, k; double Tmp[8][8]; for(i=0; i<8; i++) { for(j=0; j<8; j++) { double Sum = 0.0; for (k=0; k<8; k++) Sum += Cos[j][k]*M[8*i+k]; Tmp[i][j] = Sum; } } for(i=0; i<8; i++) { for(j=0; j<8; j++) { double Sum = 0.0; for (k=0; k<8; k++) Sum += Cos[i][k]*Tmp[k][j]; M[8*i+j] = (short)floor(Sum + 0.5); } } } void test_IEEE1180_compliance(int Min, int Max, int Sign) { static const double ILimits[5] = { 1., 0.06, 0.02, 0.015, 0.0015 }; int Loops = 10000; int i, m, n; DECLARE_ALIGNED_MATRIX(Blk0, 8, 8, short, 16); /* reference */ DECLARE_ALIGNED_MATRIX(Blk, 8, 8, short, 16); DECLARE_ALIGNED_MATRIX(iBlk, 8, 8, short, 16); DECLARE_ALIGNED_MATRIX(Ref_FDCT, 8, 8, short, 16); DECLARE_ALIGNED_MATRIX(Ref_IDCT, 8, 8, short, 16); STATS_8x8 FStats; /* forward dct stats */ STATS_8x8 IStats; /* inverse dct stats */ CPU *cpu; init_ref_dct(); for(cpu = cpu_list; cpu->name!=0; ++cpu) { if (!init_cpu(cpu)) continue; printf( "\n===== IEEE test for %s ==== (Min=%d Max=%d Sign=%d Loops=%d)\n", cpu->name, Min, Max, Sign, Loops); init_stats(&IStats); init_stats(&FStats); ieee_reseed(1); for(n=0; n<Loops; ++n) { for(i=0; i<64; ++i) Blk0[i] = (short)ieee_rand(Min,Max) * Sign; /* hmm, I'm not quite sure this is exactly */ /* the tests described in the norm. check... */ memcpy(Ref_FDCT, Blk0, 64*sizeof(short)); ref_fdct(Ref_FDCT); for(i=0; i<64; i++) CLAMP( Ref_FDCT[i], 2048 ); memcpy(Blk, Blk0, 64*sizeof(short)); emms(); fdct(Blk); emms(); for(i=0; i<64; i++) CLAMP( Blk[i], 2048 ); store_stats(&FStats, Blk, Ref_FDCT); memcpy(Ref_IDCT, Ref_FDCT, 64*sizeof(short)); ref_idct(Ref_IDCT); for (i=0; i<64; i++) CLAMP( Ref_IDCT[i], 256 ); memcpy(iBlk, Ref_FDCT, 64*sizeof(short)); emms(); idct(iBlk); emms(); for(i=0; i<64; i++) CLAMP( iBlk[i], 256 ); store_stats(&IStats, iBlk, Ref_IDCT); } printf( "\n -- FDCT report --\n" ); // print_stats(&FStats); report_stats(&FStats, 0); /* so far I know, IEEE1180 says nothing for fdct */ for(i=0; i<64; i++) Blk[i] = 0; emms(); fdct(Blk); emms(); for(m=i=0; i<64; i++) if (Blk[i]!=0) m++; printf( "FDCT(0) == 0 ? %s\n", (m!=0) ? "NOPE!" : "yup." ); printf( "\n -- IDCT report --\n" ); // print_stats(&IStats); report_stats(&IStats, ILimits); for(i=0; i<64; i++) Blk[i] = 0; emms(); idct(Blk); emms(); for(m=i=0; i<64; i++) if (Blk[i]!=0) m++; printf( "IDCT(0) == 0 ? %s\n", (m!=0) ? "NOPE!" : "yup." ); } } void test_dct_saturation(int Min, int Max) { /* test behaviour on input range fringe */ int i, n, p; CPU *cpu; // const short IDCT_MAX = 2047; /* 12bits input */ // const short IDCT_MIN = -2048; // const short IDCT_OUT = 256; /* 9bits ouput */ const int Partitions = 4; const int Loops = 10000 / Partitions; init_ref_dct(); for(cpu = cpu_list; cpu->name!=0; ++cpu) { short Blk0[64], Blk[64]; STATS_8x8 Stats; if (!init_cpu(cpu)) continue; printf( "\n===== IEEE test for %s Min=%d Max=%d =====\n", cpu->name, Min, Max ); /* FDCT tests // */ init_stats(&Stats); /* test each computation channels separately */ for(i=0; i<64; i++) Blk[i] = Blk0[i] = ((i/8)==(i%8)) ? Max : 0; ref_fdct(Blk0); emms(); fdct(Blk); emms(); store_stats(&Stats, Blk, Blk0); for(i=0; i<64; i++) Blk[i] = Blk0[i] = ((i/8)==(i%8)) ? Min : 0; ref_fdct(Blk0); emms(); fdct(Blk); emms(); store_stats(&Stats, Blk, Blk0); /* randomly saturated inputs */ for(p=0; p<Partitions; ++p) { for(n=0; n<Loops; ++n) { for(i=0; i<64; ++i) Blk0[i] = Blk[i] = (ieee_rand(0,Partitions)>=p)? Max : Min; ref_fdct(Blk0); emms(); fdct(Blk); emms(); store_stats(&Stats, Blk, Blk0); } } printf( "\n -- FDCT saturation report --\n" ); report_stats(&Stats, 0); /* IDCT tests // */ #if 0 /* no finished yet */ init_stats(&Stats); /* test each computation channel separately */ for(i=0; i<64; i++) Blk[i] = Blk0[i] = ((i/8)==(i%8)) ? IDCT_MAX : 0; ref_idct(Blk0); emms(); idct(Blk); emms(); for(i=0; i<64; i++) { CLAMP(Blk0[i], IDCT_OUT); CLAMP(Blk[i], IDCT_OUT); } store_stats(&Stats, Blk, Blk0); for(i=0; i<64; i++) Blk[i] = Blk0[i] = ((i/8)==(i%8)) ? IDCT_MIN : 0; ref_idct(Blk0); emms(); idct(Blk); emms(); for(i=0; i<64; i++) { CLAMP(Blk0[i], IDCT_OUT); CLAMP(Blk[i], IDCT_OUT); } store_stats(&Stats, Blk, Blk0); /* randomly saturated inputs */ for(p=0; p<Partitions; ++p) { for(n=0; n<Loops; ++n) { for(i=0; i<64; ++i) Blk0[i] = Blk[i] = (ieee_rand(0,Partitions)>=p)? IDCT_MAX : IDCT_MIN; ref_idct(Blk0); emms(); idct(Blk); emms(); for(i=0; i<64; i++) { CLAMP(Blk0[i],IDCT_OUT); CLAMP(Blk[i],IDCT_OUT); } store_stats(&Stats, Blk, Blk0); } } printf( "\n -- IDCT saturation report --\n" ); print_stats(&Stats); report_stats(&Stats, 0); #endif } } /********************************************************************* * measure raw decoding speed *********************************************************************/ void test_dec(const char *name, int width, int height, int ref_chksum) { FILE *f = 0; void *dechandle = 0; int xerr; xvid_gbl_init_t xinit; xvid_dec_create_t xparam; xvid_dec_frame_t xframe; double t = 0.; int nb = 0; uint8_t *buf = 0; uint8_t *yuv_out = 0; int buf_size, pos; uint32_t chksum = 0; int bps = (width+31) & ~31; memset(&xinit, 0, sizeof(xinit)); xinit.cpu_flags = cpu_mask; xinit.version = XVID_VERSION; xvid_global(NULL, 0, &xinit, NULL); memset(&xparam, 0, sizeof(xparam)); xparam.width = width; xparam.height = height; xparam.version = XVID_VERSION; xerr = xvid_decore(NULL, XVID_DEC_CREATE, &xparam, NULL); if (xerr==XVID_ERR_FAIL) { printf("ERROR: can't init decoder (err=%d)\n", xerr); return; } dechandle = xparam.handle; f = fopen(name, "rb"); if (f==0) { printf( "ERROR: can't open file '%s'\n", name); return; } fseek(f, 0, SEEK_END); buf_size = ftell(f); fseek(f, 0, SEEK_SET); if (buf_size<=0) { printf("ERROR: error while stating file\n"); goto End; } buf = malloc(buf_size); yuv_out = calloc(1, bps*height*3/2 + 15); if (buf==0 || yuv_out==0) { printf( "ERROR: malloc failed!\n" ); goto End; } if (fread(buf, buf_size, 1, f)!=1) { printf( "ERROR: file-read failed\n" ); goto End; } nb = 0; pos = 0; t = -gettime_usec(); while(1) { int y; memset(&xframe, 0, sizeof(xframe)); xframe.version = XVID_VERSION; xframe.bitstream = buf + pos; xframe.length = buf_size - pos; xframe.output.plane[0] = (uint8_t*)(((size_t)yuv_out + 15) & ~15); xframe.output.plane[1] = (uint8_t*)xframe.output.plane[0] + bps*height; xframe.output.plane[2] = (uint8_t*)xframe.output.plane[1] + bps/2; xframe.output.stride[0] = bps; xframe.output.stride[1] = bps; xframe.output.stride[2] = bps; xframe.output.csp = XVID_CSP_I420; xerr = xvid_decore(dechandle, XVID_DEC_DECODE, &xframe, 0); if (xerr<0) { printf("ERROR: decoding failed for frame #%d (err=%d)!\n", nb, xerr); break; } else if (xerr==0) break; else if (verbose>0) printf("#%d %d\n", nb, xerr ); pos += xerr; nb++; for(y=0; y<height/2; ++y) { chksum = calc_crc((uint8_t*)xframe.output.plane[0] + (2*y+0)*bps, width, chksum); chksum = calc_crc((uint8_t*)xframe.output.plane[0] + (2*y+1)*bps, width, chksum); chksum = calc_crc((uint8_t*)xframe.output.plane[1] + y*bps, width/2, chksum); chksum = calc_crc((uint8_t*)xframe.output.plane[2] + y*bps, width/2, chksum); } if (pos==buf_size) break; } t += gettime_usec(); if (ref_chksum==0) { if (t>0.) printf( "%d frames decoded in %.3f s -> %.1f FPS Checksum:0x%.8x\n", nb, t*1.e-6f, (float)(nb*1.e6f/t), chksum ); } else { printf("FPS:%.1f Checksum: 0x%.8x Expected:0x%.8x | %s\n", t>0. ? (float)(nb*1.e6f/t) : 0.f, chksum, ref_chksum, (chksum==ref_chksum) ? "OK" : "ERROR"); } End: if (yuv_out!=0) free(yuv_out); if (buf!=0) free(buf); if (dechandle!=0) { xerr= xvid_decore(dechandle, XVID_DEC_DESTROY, NULL, NULL); if (xerr==XVID_ERR_FAIL) printf("ERROR: destroy-decoder failed (err=%d)!\n", xerr); } if (f!=0) fclose(f); } /********************************************************************* * non-regression tests *********************************************************************/ void test_bugs1() { CPU *cpu; uint16_t mpeg_quant_matrices[64*8]; printf( "\n ===== (de)quant4_intra saturation bug? =====\n" ); for(cpu = cpu_list; cpu->name!=0; ++cpu) { int i; int16_t Src[8*8], Dst[8*8]; if (!init_cpu(cpu)) continue; for(i=0; i<64; ++i) Src[i] = i-32; set_intra_matrix( mpeg_quant_matrices, get_default_intra_matrix() ); dequant_mpeg_intra(Dst, Src, 31, 5, mpeg_quant_matrices); printf( "dequant_mpeg_intra with CPU=%s: ", cpu->name); printf( " Out[]= " ); for(i=0; i<64; ++i) printf( "[%d]", Dst[i]); printf( "\n" ); } printf( "\n ===== (de)quant4_inter saturation bug? =====\n" ); for(cpu = cpu_list; cpu->name!=0; ++cpu) { int i; int16_t Src[8*8], Dst[8*8]; if (!init_cpu(cpu)) continue; for(i=0; i<64; ++i) Src[i] = i-32; set_inter_matrix( mpeg_quant_matrices, get_default_inter_matrix() ); dequant_mpeg_inter(Dst, Src, 31, mpeg_quant_matrices); printf( "dequant_mpeg_inter with CPU=%s: ", cpu->name); printf( " Out[]= " ); for(i=0; i<64; ++i) printf( "[%d]", Dst[i]); printf( "\n" ); } } void test_dct_precision_diffs() { CPU *cpu; DECLARE_ALIGNED_MATRIX(Blk, 8, 8, int16_t, 16); DECLARE_ALIGNED_MATRIX(Blk0, 8, 8, int16_t, 16); printf( "\n ===== fdct/idct precision diffs =====\n" ); for(cpu = cpu_list; cpu->name!=0; ++cpu) { int i; if (!init_cpu(cpu)) continue; for(i=0; i<8*8; ++i) { Blk0[i] = (i*7-i*i) & 0x7f; Blk[i] = Blk0[i]; } fdct(Blk); idct(Blk); printf( " fdct+idct diffs with CPU=%s: \n", cpu->name ); for(i=0; i<8; ++i) { int j; for(j=0; j<8; ++j) printf( " %d ", Blk[i*8+j]-Blk0[i*8+j]); printf("\n"); } printf("\n"); } } void test_quant_bug() { const int max_Q = 31; int i, n, qm, q; CPU *cpu; DECLARE_ALIGNED_MATRIX(Src, 8, 8, int16_t, 16); DECLARE_ALIGNED_MATRIX(Dst, 8, 8, int16_t, 16); uint8_t Quant[8*8]; CPU cpu_bug_list[] = { { "PLAINC", 0 }, { "MMX ", XVID_CPU_MMX }, {0,0} }; uint16_t Crcs_Inter[2][32]; uint16_t Crcs_Intra[2][32]; DECLARE_ALIGNED_MATRIX(mpeg_quant_matrices, 8, 64, uint16_t, 16); printf( "\n ===== test MPEG4-quantize bug =====\n" ); for(i=0; i<64; ++i) Src[i] = 2048*(i-32)/32; #if 1 for(qm=1; qm<=255; ++qm) { for(i=0; i<8*8; ++i) Quant[i] = qm; set_inter_matrix( mpeg_quant_matrices, Quant ); for(n=0, cpu = cpu_bug_list; cpu->name!=0; ++cpu, ++n) { uint16_t s; if (!init_cpu(cpu)) continue; for(q=1; q<=max_Q; ++q) { emms(); quant_mpeg_inter( Dst, Src, q, mpeg_quant_matrices ); emms(); for(s=0, i=0; i<64; ++i) s+=((uint16_t)Dst[i])^i; Crcs_Inter[n][q] = s; } } for(q=1; q<=max_Q; ++q) for(i=0; i<n-1; ++i) if (Crcs_Inter[i][q]!=Crcs_Inter[i+1][q]) printf( "Discrepancy Inter: qm=%d, q=%d -> %d/%d !\n", qm, q, Crcs_Inter[i][q], Crcs_Inter[i+1][q]); } #endif #if 1 for(qm=1; qm<=255; ++qm) { for(i=0; i<8*8; ++i) Quant[i] = qm; set_intra_matrix( mpeg_quant_matrices, Quant ); for(n=0, cpu = cpu_bug_list; cpu->name!=0; ++cpu, ++n) { uint16_t s; if (!init_cpu(cpu)) continue; for(q=1; q<=max_Q; ++q) { emms(); quant_mpeg_intra( Dst, Src, q, q, mpeg_quant_matrices); emms(); for(s=0, i=0; i<64; ++i) s+=((uint16_t)Dst[i])^i; Crcs_Intra[n][q] = s; } } for(q=1; q<=max_Q; ++q) for(i=0; i<n-1; ++i) if (Crcs_Intra[i][q]!=Crcs_Intra[i+1][q]) printf( "Discrepancy Intra: qm=%d, q=%d -> %d/%d!\n", qm, q, Crcs_Inter[i][q], Crcs_Inter[i+1][q]); } #endif } /********************************************************************* * test some YUV func *********************************************************************/ #define ENTER \ for(i=0; i<(int)sizeof(Dst0); ++i) Dst0[0][i] = 0; \ t = gettime_usec(); \ emms(); #define LEAVE \ emms(); \ t = (gettime_usec() - t) / nb_tests; \ iCrc = calc_crc((uint8_t*)Dst0, sizeof(Dst0), CRC32_INITIAL) #define TEST_YUYV(FUNC, S, FLIP) \ ENTER \ for(tst=0; tst<nb_tests; ++tst) (FUNC)(Dst0[0], S*WIDTH, Src0[0], Src0[1], Src0[2], WIDTH, WIDTH/2, WIDTH, HEIGHT, (FLIP)); \ LEAVE static const int yuv_CRCs[6][2] = { {0x0f4fb96b,0x780b6a68} , {0xa986b289,0x65e49b76} , {0x7f19c152,0xd539b86e} , {0x0f4fb96b,0x780b6a68} , {0xa986b289,0x65e49b76} , {0x36ab8b57,0x1cd92fee} }; #define WIDTH 128 #define HEIGHT 32 void test_yuv() { const int nb_tests = 200*speed_ref; CPU *cpu; uint8_t Src0[3][WIDTH*HEIGHT]; uint8_t Dst0[4][WIDTH*HEIGHT]; int i, j, with_flip; double t; int tst, iCrc; colorspace_init(); ieee_reseed(1); for(i=0; i<(int)sizeof(Src0); ++i) Src0[0][i] = ieee_rand(0,255); for(i=0; i<(int)sizeof(Dst0); ++i) Dst0[0][i] = 0x5a; printf( "\n === test YUV ===\n" ); for(with_flip=0; with_flip<=1; ++with_flip) { init_cpu(&cpu_list[0]); TEST_YUYV(yv12_to_yuyv_c, 4, with_flip); printf(" yv12_to_yuyv_c %.3f usec crc32=0x%08x %s\n", t, iCrc, (iCrc!=yuv_CRCs[0][with_flip])?"| ERROR": "" ); TEST_YUYV(yv12_to_uyvy_c, 4, with_flip); printf(" yv12_to_uyvy_c %.3f usec crc32=0x%08x %s\n", t, iCrc, (iCrc!=yuv_CRCs[1][with_flip])?"| ERROR": "" ); TEST_YUYV(yv12_to_bgra_c, 4, with_flip); printf(" yv12_to_bgra_c %.3f usec crc32=0x%08x %s\n", t, iCrc, (iCrc!=yuv_CRCs[2][with_flip])?"| ERROR": "" ); #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) init_cpu(&cpu_list[1]); TEST_YUYV(yv12_to_yuyv_mmx, 4, with_flip); printf(" yv12_to_yuyv_mmx %.3f usec crc32=0x%08x %s\n", t, iCrc, (iCrc!=yuv_CRCs[3][with_flip])?"| ERROR": "" ); TEST_YUYV(yv12_to_uyvy_mmx, 4, with_flip); printf(" yv12_to_uyvy_mmx %.3f usec crc32=0x%08x %s\n", t, iCrc, (iCrc!=yuv_CRCs[4][with_flip])?"| ERROR": "" ); TEST_YUYV(yv12_to_bgra_mmx, 4, with_flip); printf(" yv12_to_bgra_mmx %.3f usec crc32=0x%08x %s\n", t, iCrc, (iCrc!=yuv_CRCs[5][with_flip])?"| ERROR": "" ); #endif #ifdef ARCH_IS_PPC init_cpu(&cpu_list[1]); TEST_YUYV(yv12_to_yuyv_altivec_c, 4, with_flip); printf(" yv12_to_yuyv_altivec_c %.3f usec crc32=0x%08x %s\n", t, iCrc, (iCrc!=yuv_CRCs[3][with_flip])?"| ERROR": "" ); TEST_YUYV(yv12_to_uyvy_altivec_c, 4, with_flip); printf(" yv12_to_uyvy_altivec_c %.3f usec crc32=0x%08x %s\n", t, iCrc, (iCrc!=yuv_CRCs[4][with_flip])?"| ERROR": "" ); #endif } printf( " --- \n" ); } #define TEST_YV2(FUNC, WITH_UV, WITH_FLIP) \ ENTER \ for(tst=0; tst<nb_tests; ++tst) (FUNC)(Dst0[0], Dst0[1], Dst0[2], WIDTH, WIDTH, \ Src0[0], (WITH_UV) ? Src0[1] : 0, (WITH_UV) ? Src0[2] : 0, WIDTH, WIDTH, \ WIDTH-2, HEIGHT-2, WITH_FLIP); \ LEAVE #define PRINT_NxN(DATA,W,H,STR) { \ int i,j; \ for(j=0; j<(H); ++j) { \ for(i=0; i<(W); ++i) printf( "0x%.2x ", (DATA)[i+j*(STR)] );\ printf("\n"); \ } \ printf("---\n"); \ } static const int yv12_CRCs[2][2] = { {0x5cab7cf0,0xdab46541} , {0xe8bae865,0x1faf77b7} }; void test_yuv2() { const int nb_tests = 800*speed_ref; CPU *cpu; uint8_t Src0[3][WIDTH*HEIGHT]; uint8_t Dst0[3][WIDTH*HEIGHT]; int with_uv, with_flip; int i, j; double t; int tst, iCrc; colorspace_init(); ieee_reseed(1); for(i=0; i<(int)sizeof(Src0); ++i) Src0[0][i] = ieee_rand(0,255); printf( "\n === test YV2 ===\n" ); for(with_flip=0; with_flip<=1; ++with_flip) { for(with_uv=0; with_uv<=1; ++with_uv) { init_cpu(&cpu_list[0]); TEST_YV2(yv12_to_yv12_c, with_uv, with_flip); printf(" yv12_to_yv12_c %.3f usec \tcrc32=0x%08x %s\n", t, iCrc, (iCrc!=yv12_CRCs[with_flip][with_uv])?"| ERROR": "" ); /* if (!with_uv) PRINT_NxN(Dst0[1], WIDTH/2, HEIGHT/2, WIDTH ); */ #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) init_cpu(&cpu_list[1]); TEST_YV2(yv12_to_yv12_mmx, with_uv, with_flip); printf(" yv12_to_yv12_mmx %.3f usec \tcrc32=0x%08x %s\n", t, iCrc, (iCrc!=yv12_CRCs[with_flip][with_uv])?"| ERROR": "" ); /* if (!with_uv) PRINT_NxN(Dst0[1], WIDTH/2, HEIGHT/2, WIDTH ); */ TEST_YV2(yv12_to_yv12_xmm, with_uv, with_flip); printf(" yv12_to_yv12_xmm %.3f usec \tcrc32=0x%08x %s\n", t, iCrc, (iCrc!=yv12_CRCs[with_flip][with_uv])?"| ERROR": "" ); #endif } printf( " --- \n" ); } printf( " ===== \n" ); } #undef WIDTH #undef HEIGHT #undef ENTER #undef LEAVE /*********************************************************************/ static uint32_t __inline log2bin_v1(uint32_t value) { int n = 0; while (value) { value >>= 1; n++; } return n; } static const uint8_t log2_tab_16[16] = { 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4 }; static uint32_t __inline log2bin_v2(uint32_t value) { int n = 0; if (value & 0xffff0000) { value >>= 16; n += 16; } if (value & 0xff00) { value >>= 8; n += 8; } if (value & 0xf0) { value >>= 4; n += 4; } return n + log2_tab_16[value]; } void test_log2bin() { const int nb_tests = 3000*speed_ref; int n, crc1=0, crc2=0; uint32_t s, s0; double t1, t2; t1 = gettime_usec(); s0 = (int)(t1*31.241); for(s=s0, n=0; n<nb_tests; ++n, s=(s*12363+31)&0x7fffffff) crc1 += log2bin_v1(s); t1 = (gettime_usec()-t1) / nb_tests; t2 = gettime_usec(); for(s=s0, n=0; n<nb_tests; ++n, s=(s*12363+31)&0x7fffffff) crc2 += log2bin_v2(s); t2 = (gettime_usec() - t2) / nb_tests; printf( "log2bin_v1: %.3f sec crc=%d\n", t1, crc1 ); printf( "log2bin_v2: %.3f sec crc=%d\n", t2, crc2 ); if (crc1!=crc2) printf( " CRC ERROR !\n" ); } /*********************************************************************/ static void __inline old_gcd(int *num, int *den) { int i = *num; while (i > 1) { if (*num % i == 0 && *den % i == 0) { *num /= i; *den /= i; i = *num; continue; } i--; } } static uint32_t gcd(int num, int den) { int tmp; while( (tmp=num%den) ) { num = den; den = tmp; } return den; } static void __inline new_gcd(int *num, int *den) { const int div = gcd(*num, *den); if (num) { *num /= div; *den /= div; } } void test_gcd() { const int nb_tests = 10*speed_ref; int i; uint32_t crc1=0, crc2=0; uint32_t n0, n, d0, d; double t1, t2; t1 = gettime_usec(); n0 = 0xfffff & (int)(t1*31.241); d0 = 0xfffff & (int)( ((n0*4123)%17) | 1 ); for(n=n0, d=d0, i=0; i<nb_tests; ++i) { old_gcd(&n, &d); crc1 = (((crc1>>4)^d) + ((crc1<<2)^n) ) & 0xffffff; n = d; d = (d*12363+31) & 0xffff; d |= !d; } t1 = (gettime_usec()-t1) / nb_tests; t2 = gettime_usec(); for(n=n0, d=d0, i=0; i<nb_tests; ++i) { new_gcd(&n, &d); crc2 = (((crc2>>4)^d) + ((crc2<<2)^n) ) & 0xffffff; n = d; d = (d*12363+31) & 0xffff; d |= !d; } t2 = (gettime_usec() - t2) / nb_tests; printf( "old_gcd: %.3f sec crc=%d\n", t1, crc1 ); printf( "new_gcd: %.3f sec crc=%d\n", t2, crc2 ); if (crc1!=crc2) printf( " CRC ERROR !\n" ); } /********************************************************************* * test compiler *********************************************************************/ void test_compiler() { int nb_err = 0; int32_t v; if (sizeof(uint16_t)<2) { printf( "ERROR: sizeof(uint16_t)<2 !!\n" ); nb_err++; } if (sizeof(int16_t)<2) { printf( "ERROR: sizeof(int16_t)<2 !!\n" ); nb_err++; } if (sizeof(uint8_t)!=1) { printf( "ERROR: sizeof(uint8_t)!=1 !!\n" ); nb_err++; } if (sizeof(int8_t)!=1) { printf( "ERROR: sizeof(int8_t)!=1 !!\n" ); nb_err++; } if (sizeof(uint32_t)<4) { printf( "ERROR: sizeof(uint32_t)<4 !!\n" ); nb_err++; } if (sizeof(int32_t)<4) { printf( "ERROR: sizeof(int32_t)<4 !!\n" ); nb_err++; } /* yes, i know, this test is silly. But better be safe than sorry. :) */ for(v=1000; v>=0; v--) { if ( (v>>2) != v/4) nb_err++; } for(v=-1000; v!=-1; v++) { if ( (v>>2) != (v/4)-!!(v%4)) nb_err++; } if (nb_err!=0) { printf( "ERROR! please post your platform/compiler specs to xvid-devel@xvid.org !\n" ); } } /********************************************************************* * test SSIM functions *********************************************************************/ typedef int (*lumfunc)(uint8_t* ptr, int stride); typedef void (*csfunc)(uint8_t* ptro, uint8_t* ptrc, int stride, int lumo, int lumc, int* pdevo, int* pdevc, int* pcorr); extern int lum_8x8_c(uint8_t* ptr, int stride); extern int lum_8x8_mmx(uint8_t* ptr, int stride); extern int lum_2x8_c(uint8_t* ptr, int stride); extern void consim_c(uint8_t* ptro, uint8_t* ptrc, int stride, int lumo, int lumc, int* pdevo, int* pdevc, int* pcorr); extern void consim_mmx(uint8_t* ptro, uint8_t* ptrc, int stride, int lumo, int lumc, int* pdevo, int* pdevc, int* pcorr); extern void consim_sse2(uint8_t* ptro, uint8_t* ptrc, int stride, int lumo, int lumc, int* pdevo, int* pdevc, int* pcorr); void test_SSIM() { const int nb_tests = 3000*speed_ref; int tst; CPU *cpu; int i; int devs[3]; long lumo, lumc; DECLARE_ALIGNED_MATRIX(Ref1, 16, 16, uint8_t, 16); DECLARE_ALIGNED_MATRIX(Ref2, 16, 16, uint8_t, 16); lumfunc lum8x8; lumfunc lum2x8; csfunc csim; ieee_reseed(1); printf( "\n ====== test SSIM ======\n" ); for(i=0; i<16*16;++i) { long v1, v2; v1 = ieee_rand(-256, 511); v2 = ieee_rand(-256, 511); Ref1[i] = (v1<0) ? 0 : (v1>255) ? 255 : v1; Ref2[i] = (v2<0) ? 0 : (v2>255) ? 255 : v2; } lumc = ieee_rand(0, 255); lumo = ieee_rand(0, 255); for(cpu = cpu_list; cpu->name!=0; ++cpu) { double t; int m; if (!init_cpu(cpu)) continue; lum8x8 = lum_8x8_c; lum2x8 = lum_2x8_c; csim = consim_c; #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) if (cpu->cpu & XVID_CPU_MMX){ lum8x8 = lum_8x8_mmx; csim = consim_mmx; } if (cpu->cpu & XVID_CPU_MMX){ csim = consim_sse2; } #endif t = gettime_usec(); emms(); for(tst=0; tst<nb_tests; ++tst) m = lum8x8(Ref1, 16); emms(); t = (gettime_usec() - t) / nb_tests; printf("%s - ssim-lum8x8 %.3f usec m=%d %s\n", cpu->name, t, m, (m!=8230)?"| ERROR": "" ); t = gettime_usec(); emms(); for(tst=0; tst<nb_tests; ++tst) m = lum2x8(Ref1+8, 16); emms(); t = (gettime_usec() - t) / nb_tests; printf("%s - ssim-lum2x8 %.3f usec m=%d %s\n", cpu->name, t, m, (m!=681)?"| ERROR": "" ); t = gettime_usec(); emms(); for(tst=0; tst<nb_tests; ++tst) csim(Ref1, Ref2, 16, lumo, lumc, devs+0, devs+1, devs+2); emms(); t = (gettime_usec() - t) / nb_tests; printf("%s - ssim-consim %.3f usec devs=[0x%x 0x%x 0x%x] %s\n", cpu->name, t, devs[0], devs[1], devs[2], (devs[0]!=0x1bdf0f || devs[1]!=0x137258 || devs[2]!=0xcdb13)?"| ERROR": "" ); printf( " --- \n" ); } } /********************************************************************* * test bitstream functions *********************************************************************/ #define BIT_BUF_SIZE 2000 static void test_bits() { const int nb_tests = 50*speed_ref; int tst; uint32_t Crc; uint8_t Buf[BIT_BUF_SIZE]; uint32_t Extracted[BIT_BUF_SIZE*8]; /* worst case: bits read 1 by 1 */ int Lens[BIT_BUF_SIZE*8]; double t1; printf( "\n === test bitstream ===\n" ); ieee_reseed(1); Crc = 0; t1 = gettime_usec(); for(tst=0; tst<nb_tests; ++tst) { Bitstream bs; int m, m2, l, l2; for(l=0; l<BIT_BUF_SIZE; ++l) Buf[l] = (uint8_t)ieee_rand(0,255); l = BIT_BUF_SIZE - ieee_rand(1,BIT_BUF_SIZE/10); BitstreamInit(&bs, (void*)(Buf+BIT_BUF_SIZE-l), l); BitstreamReset(&bs); for(l2=l*8, m=0; l2>0; m++) { const int b = ieee_rand(1,32); Lens[m] = b; l2 -= b; if (l2<0) break; Extracted[m] = BitstreamShowBits(&bs, b); BitstreamSkip(&bs, b); // printf( "<= %d: %d 0x%x\n", m, b, Extracted[m]); } BitstreamReset(&bs); for(m2=0; m2<m; ++m2) { const int b = Lens[m2]; const uint32_t v = BitstreamGetBits(&bs, b); Crc |= (v!=Extracted[m2]); // printf( "=> %d: %d 0x%x %c\n", m2, b, v, " *"[Crc]); } } t1 = (gettime_usec() - t1) / nb_tests; printf(" test_bits %.3f usec %s\n", t1, (Crc!=0)?"| ERROR": "" ); } /********************************************************************* * main *********************************************************************/ static void arg_missing(const char *opt) { printf( "missing argument after option '%s'\n", opt); exit(-1); } int main(int argc, const char *argv[]) { int c, what = 0; int width, height; uint32_t chksum = 0; const char * test_bitstream = 0; #if defined(WIN32) && defined(ARCH_IS_X86_64) DECLARE_ALIGNED_MATRIX(xmm_save, 2, 4, uint64_t, 16); // assumes xmm6 and xmm7 won't be falsely preserved by C code for(c=0;c<4;c++) xmm_save[c] = read_counter(); prime_xmm(xmm_save); #endif cpu_mask = 0; // default => will use autodectect for(c=1; c<argc; ++c) { if (!strcmp(argv[c], "-v")) verbose++; else if (!strcmp(argv[c], "-c")) cpu_mask = 0 /* PLAIN_C */ | XVID_CPU_FORCE; else if (!strcmp(argv[c], "-mmx")) cpu_mask = XVID_CPU_MMX | XVID_CPU_FORCE; else if (!strcmp(argv[c], "-mmxext")) cpu_mask = XVID_CPU_MMXEXT | XVID_CPU_MMX | XVID_CPU_FORCE; else if (!strcmp(argv[c], "-sse2")) cpu_mask = XVID_CPU_SSE2 | XVID_CPU_MMXEXT | XVID_CPU_MMX | XVID_CPU_FORCE; else if (!strcmp(argv[c], "-sse3")) cpu_mask = XVID_CPU_SSE3 | XVID_CPU_SSE2 | XVID_CPU_MMXEXT | XVID_CPU_MMX | XVID_CPU_FORCE; else if (!strcmp(argv[c], "-sse4")) cpu_mask = XVID_CPU_SSE41 | XVID_CPU_SSE3 | XVID_CPU_SSE2 | XVID_CPU_MMXEXT | XVID_CPU_MMX | XVID_CPU_FORCE; else if (!strcmp(argv[c], "-3dnow")) cpu_mask = XVID_CPU_3DNOW | XVID_CPU_FORCE; else if (!strcmp(argv[c], "-3dnowe")) cpu_mask = XVID_CPU_3DNOW | XVID_CPU_3DNOWEXT | XVID_CPU_FORCE; else if (!strcmp(argv[c], "-altivec")) cpu_mask = XVID_CPU_ALTIVEC | XVID_CPU_FORCE; else if (!strcmp(argv[c], "-spd")) { if (++c==argc) arg_missing( argv[argc-1] ); speed_ref = atoi(argv[c]); } else if (argv[c][0]!='-') { what = atoi(argv[c]); if (what==9) { if (c+4>argc) { printf("usage: %s %d bitstream width height (checksum)\n", argv[0], what); exit(-1); } test_bitstream = argv[++c]; width = atoi(argv[++c]); height = atoi(argv[++c]); if (c+1<argc && argv[c+1][0]!='-') { if (sscanf(argv[c+1], "0x%x", &chksum)!=1) { printf( "can't read checksum value.\n" ); exit(-1); } else c++; } // printf( "[%s] %dx%d (0x%.8x)\n", test_bitstream, width, height, chksum); } } else { printf( "unrecognized option '%s'\n", argv[c]); exit(-1); } } if (what==0 || what==1) test_dct(); if (what==0 || what==2) test_mb(); if (what==0 || what==3) test_sad(); if (what==0 || what==4) test_transfer(); if (what==0 || what==5) test_quant(); if (what==0 || what==6) test_cbp(); if (what==0 || what==10) test_sse(); if (what==0 || what==11) test_log2bin(); if (what==0 || what==12) test_gcd(); if (what==0 || what==13) test_compiler(); if (what==0 || what==14) test_yuv(); if (what==0 || what==15) test_SSIM(); if (what==0 || what==16) test_yuv2(); if (what==0 || what==17) test_bits(); if (what==7) { test_IEEE1180_compliance(-256, 255, 1); test_IEEE1180_compliance(-256, 255,-1); test_IEEE1180_compliance( -5, 5, 1); test_IEEE1180_compliance( -5, 5,-1); test_IEEE1180_compliance(-300, 300, 1); test_IEEE1180_compliance(-300, 300,-1); } if (what==8) test_dct_saturation(-256, 255); if (test_bitstream) test_dec(test_bitstream, width, height, chksum); if (what==-1) { test_dct_precision_diffs(); test_bugs1(); } if (what==-2) test_quant_bug(); #if defined(WIN32) && defined(ARCH_IS_X86_64) get_xmm(xmm_save+4); if (memcmp(xmm_save, xmm_save+4, 4*sizeof(int64_t))) { printf("\nWIN64 ERROR: XMM6 and XMM7 contents not preserved!\n" " XMM6 XMM7\n" "Before: %.16I64X%.16I64X %.16I64X%.16I64X\n" "After: %.16I64X%.16I64X %.16I64X%.16I64X", xmm_save[0],xmm_save[1],xmm_save[2],xmm_save[3], xmm_save[4],xmm_save[5],xmm_save[6],xmm_save[7]); } else { printf("\nWIN64: XMM6 and XMM7 contents preserved correctly.\n"); } #endif if ((what >= 0 && what <= 6) || what == 10) { printf("\n\n" "NB: If a function isn't optimised for a specific set of intructions,\n" " a C function is used instead. So don't panic if some functions\n" " may appear to be slow.\n"); } #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) if (what == 0 || what == 5) { printf("\n" "NB: MMX mpeg4 quantization is known to have very small errors (+/-1 magnitude)\n" " for 1 or 2 coefficients a block. This is mainly caused by the fact the unit\n" " test goes far behind the usual limits of real encoding. Please do not report\n" " this error to the developers.\n"); } #endif return 0; } /*********************************************************************/
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |