--- trunk/xvidcore/examples/xvid_bench.c 2005/05/17 15:40:11 1614 +++ trunk/xvidcore/examples/xvid_bench.c 2008/11/14 15:43:28 1794 @@ -19,7 +19,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: xvid_bench.c,v 1.17 2005-05-17 15:40:11 Skal Exp $ + * $Id: xvid_bench.c,v 1.36 2008-11-14 15:43:27 Isibaar Exp $ * ****************************************************************************/ @@ -58,6 +58,7 @@ #include "utils/timer.h" #include "quant/quant_matrix.c" #include "bitstream/cbp.h" +#include "bitstream/bitstream.h" #include @@ -112,41 +113,29 @@ unsigned int cpu; } CPU; -CPU cpu_list[] = -{ { "PLAINC", 0 } +CPU cpu_list[] = { + { "PLAINC ", 0 }, #ifdef ARCH_IS_IA32 - , { "MMX ", XVID_CPU_MMX } - , { "MMXEXT", XVID_CPU_MMXEXT | XVID_CPU_MMX } - , { "SSE2 ", XVID_CPU_SSE2 | XVID_CPU_MMX } - , { "3DNOW ", XVID_CPU_3DNOW } - , { "3DNOWE", XVID_CPU_3DNOW | XVID_CPU_3DNOWEXT } + { "MMX ", XVID_CPU_MMX }, + { "MMXEXT ", XVID_CPU_MMXEXT | XVID_CPU_MMX }, + { "SSE2 ", XVID_CPU_SSE2 | XVID_CPU_MMX }, + { "SSE3 ", XVID_CPU_SSE3 | XVID_CPU_SSE2 | XVID_CPU_MMX }, + { "SSE41 ", XVID_CPU_SSE41| XVID_CPU_SSE3 | XVID_CPU_SSE2 | XVID_CPU_MMX }, + { "3DNOW ", XVID_CPU_3DNOW }, + { "3DNOWE ", XVID_CPU_3DNOW | XVID_CPU_3DNOWEXT }, #endif #ifdef ARCH_IS_PPC - , { "ALTIVEC", XVID_CPU_ALTIVEC } + { "ALTIVEC", XVID_CPU_ALTIVEC }, #endif #ifdef ARCH_IS_X86_64 - , { "X86_64", XVID_CPU_ASM} + { "X86_64 ", XVID_CPU_ASM}, #endif -//, { "IA64 ", XVID_CPU_IA64 } -//, { "TSC ", XVID_CPU_TSC } - , { 0, 0 } }; - -CPU cpu_short_list[] = -{ { "PLAINC", 0 } -#ifdef ARCH_IS_IA32 - , { "MMX ", XVID_CPU_MMX } -//, { "MMXEXT", XVID_CPU_MMXEXT | XVID_CPU_MMX } +#ifdef ARCH_IS_IA64 +// { "IA64 ", XVID_CPU_IA64 }, #endif -//, { "IA64 ", XVID_CPU_IA64 } - , { 0, 0 } }; - -CPU cpu_short_list2[] = -{ { "PLAINC", 0 } -#ifdef ARCH_IS_IA32 - , { "MMX ", XVID_CPU_MMX } - , { "SSE2 ", XVID_CPU_SSE2 | XVID_CPU_MMX } -#endif - , { 0, 0 } }; +// { "TSC ", XVID_CPU_TSC }, + { 0, 0 } +}; int init_cpu(CPU *cpu) @@ -274,6 +263,44 @@ return crc; } +void byte_swap(uint8_t *mem, int len, int element_size) { +#ifdef ARCH_IS_BIG_ENDIAN + int i; + + if(element_size == 1) { + /* No need to swap */ + } else if(element_size == 2) { + uint8_t temp[2]; + + for(i=0; i < (len/2); i++ ) { + temp[0] = mem[0]; + temp[1] = mem[1]; + mem[0] = temp[1]; + mem[1] = temp[0]; + + mem += 2; + } + } else if(element_size == 4) { + uint8_t temp[4]; + + for(i=0; i < (len/4); i++ ) { + temp[0] = mem[0]; + temp[1] = mem[1]; + temp[2] = mem[2]; + temp[3] = mem[3]; + mem[0] = temp[3]; + mem[1] = temp[2]; + mem[2] = temp[1]; + mem[3] = temp[0]; + + mem += 4; + } + } else { + printf("ERROR: byte_swap unsupported element_size(%u)\n", element_size); + } +#endif +} + /********************************************************************* * test DCT *********************************************************************/ @@ -494,10 +521,50 @@ (iCrc!=8107)?"| ERROR": "" ); #endif + /* New functions for field prediction by CK 1.10.2005 */ +#pragma NEW8X4 + TEST_MB(interpolate8x4_halfpel_h, 0); + printf("%s - interpfield-h -round0 %.3f usec crc32=0x%08x %s\n", + cpu->name, t, iCrc, + (iCrc!=0x9538d6df)?"| ERROR": "" ); + + TEST_MB(interpolate8x4_halfpel_h, 1); + printf("%s - round1 %.3f usec crc32=0x%08x %s\n", + cpu->name, t, iCrc, + (iCrc!=0xde5f1db4)?"| ERROR": "" ); + + + TEST_MB(interpolate8x4_halfpel_v, 0); + printf("%s - interpfield- v-round0 %.3f usec crc32=0x%08x %s\n", + cpu->name, t, iCrc, + (iCrc!=0xea5a69ef)?"| ERROR": "" ); + + TEST_MB(interpolate8x4_halfpel_v, 1); + printf("%s - round1 %.3f usec crc32=0x%08x %s\n", + cpu->name, t, iCrc, + (iCrc!=0x4f10ec0f)?"| ERROR": "" ); + + + TEST_MB(interpolate8x4_halfpel_hv, 0); + printf("%s - interpfield-hv-round0 %.3f usec crc32=0x%08x %s\n", + cpu->name, t, iCrc, + (iCrc!=0xf97ee367)?"| ERROR": "" ); + + TEST_MB(interpolate8x4_halfpel_hv, 1); + printf("%s - round1 %.3f usec crc32=0x%08x %s\n", + cpu->name, t, iCrc, + (iCrc!=0xb6a9f581)?"| ERROR": "" ); +/* End of 8x4 functions */ + printf( " --- \n" ); } } +#undef ENTER +#undef LEAVE +#undef TEST_MB +#undef TEST_MB2 + /********************************************************************* * test transfer *********************************************************************/ @@ -527,7 +594,8 @@ } \ emms(); \ t = (gettime_usec()-t -overhead) / nb_tests;\ -s = calc_crc((uint8_t*)(DST), sizeof((DST)), CRC32_INITIAL) +byte_swap((uint8_t*)(DST), 8*32*sizeof((DST)[0]), sizeof((DST)[0])); \ +s = calc_crc((uint8_t*)(DST), 8*32*sizeof((DST)[0]), CRC32_INITIAL) #define TEST_TRANSFER(FUNC, DST, SRC) \ TEST_TRANSFER_BEGIN(DST); \ @@ -553,7 +621,8 @@ } \ emms(); \ t = (gettime_usec()-t -overhead) / nb_tests;\ -s = calc_crc((uint8_t*)(DST), sizeof((DST)), CRC32_INITIAL) +byte_swap((uint8_t*)(DST), 8*32*sizeof((DST)[0]), sizeof((DST)[0])); \ +s = calc_crc((uint8_t*)(DST), 8*32*sizeof((DST)[0]), CRC32_INITIAL) #define TEST_TRANSFER2(FUNC, DST, SRC, R1) \ TEST_TRANSFER2_BEGIN(DST,SRC); \ @@ -570,8 +639,14 @@ const int nb_tests = 4000*speed_ref; int i; CPU *cpu; - uint8_t Src8[8*32], Dst8[8*32], Ref1[8*32], Ref2[8*32]; - int16_t Src16[8*32], Dst16[8*32]; +// uint8_t Src8[8*32], Dst8[8*32], Ref1[8*32], Ref2[8*32]; +// int16_t Src16[8*32], Dst16[8*32]; + DECLARE_ALIGNED_MATRIX(Src8, 8, 32, uint8_t, CACHE_LINE); + DECLARE_ALIGNED_MATRIX(Dst8, 8, 32, uint8_t, CACHE_LINE); + DECLARE_ALIGNED_MATRIX(Ref1, 8, 32, uint8_t, CACHE_LINE); + DECLARE_ALIGNED_MATRIX(Ref2, 8, 32, uint8_t, CACHE_LINE); + DECLARE_ALIGNED_MATRIX(Src16, 8, 32, uint16_t, CACHE_LINE); + DECLARE_ALIGNED_MATRIX(Dst16, 8, 32, uint16_t, CACHE_LINE); printf( "\n === test transfer ===\n" ); @@ -593,6 +668,14 @@ cpu->name, t, s, (s!=0xee7ccbb4)?"| ERROR": ""); + /* New functions for field prediction by CK 1.10.2005 */ +#pragma NEW8X4 + TEST_TRANSFER(transfer8x4_copy, Dst8, Src8); + printf("%s - 8to4 %.3f usec crc32=0x%08x %s\n", + cpu->name, t, s, + (s!=0xbb9c3db5)?"| ERROR": ""); +/* End of new functions */ + TEST_TRANSFER(transfer8x8_copy, Dst8, Src8); printf("%s - 8to8 %.3f usec crc32=0x%08x %s\n", cpu->name, t, s, @@ -606,8 +689,8 @@ TEST_TRANSFER2(transfer_8to16sub, Dst16, Src8, Ref1); { int s1, s2; - s1 = calc_crc((uint8_t*)Dst16, sizeof(Dst16), CRC32_INITIAL); - s2 = calc_crc((uint8_t*)Src8, sizeof(Src8), CRC32_INITIAL); + s1 = calc_crc((uint8_t*)Dst16, 8*32*sizeof(Dst16[0]), CRC32_INITIAL); + s2 = calc_crc((uint8_t*)Src8, 8*32*sizeof(Src8[0]), CRC32_INITIAL); printf("%s - 8to16sub %.3f usec crc32(1)=0x%08x crc32(2)=0x%08x %s %s\n", cpu->name, t, s1, s2, (s1!=0xa1e07163)?"| ERROR1": "", @@ -636,7 +719,8 @@ for(q=1; q<=max_Q; ++q) { \ for(tst=0; tst3*64); Src4[i] = (i==(3*64+2) || i==(5*64+9)); + Src5[i] = ieee_rand(0,1) ? -1 : 1; /* +/- test */ } for(cpu = cpu_list; cpu->name!=0; ++cpu) @@ -907,20 +994,53 @@ if (!init_cpu(cpu)) continue; - TEST_CBP(calc_cbp, Src1); + TEST_CBP(calc_cbp, Src1, nb_tests); printf("%s - calc_cbp#1 %.3f usec cbp=0x%02x %s\n", cpu->name, t, cbp, (cbp!=0x15)?"| ERROR": ""); - TEST_CBP(calc_cbp, Src2); + TEST_CBP(calc_cbp, Src2, nb_tests); printf("%s - calc_cbp#2 %.3f usec cbp=0x%02x %s\n", cpu->name, t, cbp, (cbp!=0x38)?"| ERROR": ""); - TEST_CBP(calc_cbp, Src3); + TEST_CBP(calc_cbp, Src3, nb_tests); printf("%s - calc_cbp#3 %.3f usec cbp=0x%02x %s\n", cpu->name, t, cbp, (cbp!=0x0f)?"| ERROR": "" ); - TEST_CBP(calc_cbp, Src4); + TEST_CBP(calc_cbp, Src4, nb_tests); printf("%s - calc_cbp#4 %.3f usec cbp=0x%02x %s\n", cpu->name, t, cbp, (cbp!=0x05)?"| ERROR": "" ); + TEST_CBP(calc_cbp, Src5, nb_tests); + printf("%s - calc_cbp#4 %.3f usec cbp=0x%02x %s\n", + cpu->name, t, cbp, (cbp!=0x3f)?"| ERROR": "" ); printf( " --- \n" ); } + + for(cpu = cpu_list; cpu->name!=0; ++cpu) /* bench suggested by Carlo (carlo dot bramix at libero dot it) */ + { + double t; + int tst, cbp, err; + + if (!init_cpu(cpu)) + continue; + + err = 0; + for(n=0; n<6; ++n) + { + for(m=0; m<64; ++m) + { + for(i=0; i<6*64; ++i) + Src1[i] = (i== (m + n*64)); + + TEST_CBP(calc_cbp, Src1, 1); + if (cbp!= (((m!=0)<<(5-n)))) + { + printf( "%s - calc_cbp#5: ERROR at pos %d / %d!\n", cpu->name, n, m); + err = 1; + break; + } + } + } + if (!err) + printf( " %s - calc_cbp#5 : OK\n", cpu->name ); + + } } /********************************************************************* @@ -1383,8 +1503,8 @@ xframe.bitstream = buf + pos; xframe.length = buf_size - pos; xframe.output.plane[0] = (uint8_t*)(((size_t)yuv_out + 15) & ~15); - xframe.output.plane[1] = xframe.output.plane[0] + bps*height; - xframe.output.plane[2] = xframe.output.plane[1] + bps/2; + xframe.output.plane[1] = (uint8_t*)xframe.output.plane[0] + bps*height; + xframe.output.plane[2] = (uint8_t*)xframe.output.plane[1] + bps/2; xframe.output.stride[0] = bps; xframe.output.stride[1] = bps; xframe.output.stride[2] = bps; @@ -1402,10 +1522,10 @@ nb++; for(y=0; y>= 1; + n++; + } + return n; +} + +static const uint8_t log2_tab_16[16] = { 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4 }; + +static uint32_t __inline log2bin_v2(uint32_t value) +{ + int n = 0; + if (value & 0xffff0000) { + value >>= 16; + n += 16; + } + if (value & 0xff00) { + value >>= 8; + n += 8; + } + if (value & 0xf0) { + value >>= 4; + n += 4; + } + return n + log2_tab_16[value]; +} + +void test_log2bin() +{ + const int nb_tests = 3000*speed_ref; + int n, crc1=0, crc2=0; + uint32_t s, s0; + double t1, t2; + + t1 = gettime_usec(); + s0 = (int)(t1*31.241); + for(s=s0, n=0; n 1) { + if (*num % i == 0 && *den % i == 0) { + *num /= i; + *den /= i; + i = *num; + continue; + } + i--; + } +} + +static uint32_t gcd(int num, int den) +{ + int tmp; + while( (tmp=num%den) ) { num = den; den = tmp; } + return den; +} +static void __inline new_gcd(int *num, int *den) +{ + const int div = gcd(*num, *den); + if (num) { + *num /= div; + *den /= div; + } +} + +void test_gcd() +{ + const int nb_tests = 10*speed_ref; + int i; + uint32_t crc1=0, crc2=0; + uint32_t n0, n, d0, d; + double t1, t2; + + t1 = gettime_usec(); + n0 = 0xfffff & (int)(t1*31.241); + d0 = 0xfffff & (int)( ((n0*4123)%17) | 1 ); + for(n=n0, d=d0, i=0; i>4)^d) + ((crc1<<2)^n) ) & 0xffffff; + n = d; + d = (d*12363+31) & 0xffff; + d |= !d; + } + t1 = (gettime_usec()-t1) / nb_tests; + + t2 = gettime_usec(); + for(n=n0, d=d0, i=0; i>4)^d) + ((crc2<<2)^n) ) & 0xffffff; + n = d; + d = (d*12363+31) & 0xffff; + d |= !d; + } + t2 = (gettime_usec() - t2) / nb_tests; + + printf( "old_gcd: %.3f sec crc=%d\n", t1, crc1 ); + printf( "new_gcd: %.3f sec crc=%d\n", t2, crc2 ); + if (crc1!=crc2) printf( " CRC ERROR !\n" ); +} + +/********************************************************************* + * test compiler + *********************************************************************/ + +void test_compiler() { + int nb_err = 0; + int32_t v; + if (sizeof(uint16_t)<2) { + printf( "ERROR: sizeof(uint16_t)<2 !!\n" ); + nb_err++; + } + if (sizeof(int16_t)<2) { + printf( "ERROR: sizeof(int16_t)<2 !!\n" ); + nb_err++; + } + if (sizeof(uint8_t)!=1) { + printf( "ERROR: sizeof(uint8_t)!=1 !!\n" ); + nb_err++; + } + if (sizeof(int8_t)!=1) { + printf( "ERROR: sizeof(int8_t)!=1 !!\n" ); + nb_err++; + } + if (sizeof(uint32_t)<4) { + printf( "ERROR: sizeof(uint32_t)<4 !!\n" ); + nb_err++; + } + if (sizeof(int32_t)<4) { + printf( "ERROR: sizeof(int32_t)<4 !!\n" ); + nb_err++; + } + /* yes, i know, this test is silly. But better be safe than sorry. :) */ + for(v=1000; v>=0; v--) { + if ( (v>>2) != v/4) + nb_err++; + } + for(v=-1000; v!=-1; v++) { + if ( (v>>2) != (v/4)-!!(v%4)) + nb_err++; + } + if (nb_err!=0) { + printf( "ERROR! please post your platform/compiler specs to xvid-devel@xvid.org !\n" ); + } +} + +/********************************************************************* + * test SSIM functions + *********************************************************************/ + +typedef int (*lumfunc)(uint8_t* ptr, int stride); +typedef void (*csfunc)(uint8_t* ptro, uint8_t* ptrc, int stride, int lumo, int lumc, int* pdevo, int* pdevc, int* pcorr); + +extern int lum_8x8_c(uint8_t* ptr, int stride); +extern int lum_8x8_mmx(uint8_t* ptr, int stride); +extern int lum_2x8_c(uint8_t* ptr, int stride); +extern void consim_c(uint8_t* ptro, uint8_t* ptrc, int stride, int lumo, int lumc, int* pdevo, int* pdevc, int* pcorr); +extern void consim_mmx(uint8_t* ptro, uint8_t* ptrc, int stride, int lumo, int lumc, int* pdevo, int* pdevc, int* pcorr); +extern void consim_sse2(uint8_t* ptro, uint8_t* ptrc, int stride, int lumo, int lumc, int* pdevo, int* pdevc, int* pcorr); + +void test_SSIM() +{ + const int nb_tests = 3000*speed_ref; + int tst; + CPU *cpu; + int i; + int devs[3]; + long lumo, lumc; + DECLARE_ALIGNED_MATRIX(Ref1, 16, 16, uint8_t, 16); + DECLARE_ALIGNED_MATRIX(Ref2, 16, 16, uint8_t, 16); + lumfunc lum8x8; + lumfunc lum2x8; + csfunc csim; + + ieee_reseed(1); + printf( "\n ====== test SSIM ======\n" ); + for(i=0; i<16*16;++i) { + long v1, v2; + v1 = ieee_rand(-256, 511); + v2 = ieee_rand(-256, 511); + Ref1[i] = (v1<0) ? 0 : (v1>255) ? 255 : v1; + Ref2[i] = (v2<0) ? 0 : (v2>255) ? 255 : v2; + } + lumc = ieee_rand(0, 255); + lumo = ieee_rand(0, 255); + + for(cpu = cpu_list; cpu->name!=0; ++cpu) + { + double t; + int m; + if (!init_cpu(cpu)) + continue; + lum8x8 = lum_8x8_c; + lum2x8 = lum_2x8_c; + csim = consim_c; +#ifdef ARCH_IS_IA32 + if (cpu->cpu & XVID_CPU_MMX){ + lum8x8 = lum_8x8_mmx; + csim = consim_mmx; + } + if (cpu->cpu & XVID_CPU_MMX){ + csim = consim_sse2; + } +#endif + t = gettime_usec(); + emms(); + for(tst=0; tstname, t, m, + (m!=8230)?"| ERROR": "" ); + + t = gettime_usec(); + emms(); + for(tst=0; tstname, t, m, + (m!=681)?"| ERROR": "" ); + + t = gettime_usec(); + emms(); + for(tst=0; tstname, t, devs[0], devs[1], devs[2], + (devs[0]!=0x1bdf0f || devs[1]!=0x137258 || devs[2]!=0xcdb13)?"| ERROR": "" ); + printf( " --- \n" ); + } +} + +/********************************************************************* + * test bitstream functions + *********************************************************************/ + +#define BIT_BUF_SIZE 2000 + +static void test_bits() +{ + const int nb_tests = 50*speed_ref; + int tst; + uint32_t Crc; + uint8_t Buf[BIT_BUF_SIZE]; + uint32_t Extracted[BIT_BUF_SIZE*8]; /* worst case: bits read 1 by 1 */ + int Lens[BIT_BUF_SIZE*8]; + double t1; + + + printf( "\n === test bitstream ===\n" ); + ieee_reseed(1); + Crc = 0; + + t1 = gettime_usec(); + for(tst=0; tst0; m++) { + const int b = ieee_rand(1,32); + Lens[m] = b; + l2 -= b; + if (l2<0) break; + Extracted[m] = BitstreamShowBits(&bs, b); + BitstreamSkip(&bs, b); +// printf( "<= %d: %d 0x%x\n", m, b, Extracted[m]); + } + + BitstreamReset(&bs); + for(m2=0; m2 %d: %d 0x%x %c\n", m2, b, v, " *"[Crc]); + } + } + t1 = (gettime_usec() - t1) / nb_tests; + printf(" test_bits %.3f usec %s\n", t1, (Crc!=0)?"| ERROR": "" ); +} + +/********************************************************************* * main *********************************************************************/ @@ -1604,7 +2172,7 @@ int c, what = 0; int width, height; uint32_t chksum = 0; - const char * test_bitstream = 0; + const char * test_bitstream = 0; cpu_mask = 0; // default => will use autodectect for(c=1; c