--- trunk/xvidcore/examples/xvid_bench.c 2004/03/22 22:36:25 1382 +++ trunk/xvidcore/examples/xvid_bench.c 2005/09/23 12:53:35 1641 @@ -19,15 +19,13 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: xvid_bench.c,v 1.11 2004-03-22 22:36:23 edgomez Exp $ + * $Id: xvid_bench.c,v 1.25 2005-09-23 12:53:35 suxen_drol Exp $ * ****************************************************************************/ /***************************************************************************** * * 'Reference' output is at the end of file. - * Don't take the checksums and crc too seriouly, they aren't - * bullet-proof (should plug some .md5 here)... * * compiles with something like: * gcc -o xvid_bench xvid_bench.c -I../src/ -lxvidcore -lm @@ -67,7 +65,9 @@ #define M_PI 3.14159265358979323846 #endif -const int speed_ref = 100; /* on slow machines, decrease this value */ +int speed_ref = 100; /* on slow machines, decrease this value */ +int verbose = 0; +unsigned int cpu_mask; /********************************************************************* * misc @@ -83,7 +83,7 @@ #else clock_t clk; clk = clock(); - return clk * 1000000 / CLOCKS_PER_SEC; + return clk * 1000. / CLOCKS_PER_SEC; /* clock() returns time in Milliseconds */ #endif } @@ -112,35 +112,27 @@ unsigned int cpu; } CPU; -CPU cpu_list[] = -{ { "PLAINC", 0 } +CPU cpu_list[] = { + { "PLAINC ", 0 }, #ifdef ARCH_IS_IA32 - , { "MMX ", XVID_CPU_MMX } - , { "MMXEXT", XVID_CPU_MMXEXT | XVID_CPU_MMX } - , { "SSE2 ", XVID_CPU_SSE2 | XVID_CPU_MMX } - , { "3DNOW ", XVID_CPU_3DNOW } - , { "3DNOWE", XVID_CPU_3DNOW | XVID_CPU_3DNOWEXT } + { "MMX ", XVID_CPU_MMX }, + { "MMXEXT ", XVID_CPU_MMXEXT | XVID_CPU_MMX }, + { "SSE2 ", XVID_CPU_SSE2 | XVID_CPU_MMX }, + { "3DNOW ", XVID_CPU_3DNOW }, + { "3DNOWE ", XVID_CPU_3DNOW | XVID_CPU_3DNOWEXT }, #endif -//, { "IA64 ", XVID_CPU_IA64 } -//, { "TSC ", XVID_CPU_TSC } - , { 0, 0 } }; - -CPU cpu_short_list[] = -{ { "PLAINC", 0 } -#ifdef ARCH_IS_IA32 - , { "MMX ", XVID_CPU_MMX } -//, { "MMXEXT", XVID_CPU_MMXEXT | XVID_CPU_MMX } +#ifdef ARCH_IS_PPC + { "ALTIVEC", XVID_CPU_ALTIVEC }, #endif -//, { "IA64 ", XVID_CPU_IA64 } - , { 0, 0 } }; - -CPU cpu_short_list2[] = -{ { "PLAINC", 0 } -#ifdef ARCH_IS_IA32 - , { "MMX ", XVID_CPU_MMX } - , { "SSE2 ", XVID_CPU_SSE2 | XVID_CPU_MMX } +#ifdef ARCH_IS_X86_64 + { "X86_64 ", XVID_CPU_ASM}, +#endif +#ifdef ARCH_IS_IA64 +// { "IA64 ", XVID_CPU_IA64 }, #endif - , { 0, 0 } }; +// { "TSC ", XVID_CPU_TSC }, + { 0, 0 } +}; int init_cpu(CPU *cpu) @@ -253,13 +245,8 @@ }; uint32_t -calc_crc(uint8_t *mem, int len, uint32_t initial) +calc_crc(uint8_t *mem, int len, uint32_t crc) { - - register unsigned int crc; - - crc = initial; - while( len >= 8) { DO8(mem, crc); len -= 8; @@ -270,8 +257,45 @@ len--; } - return(crc); + return crc; +} + +void byte_swap(uint8_t *mem, int len, int element_size) { +#ifdef ARCH_IS_BIG_ENDIAN + int i; + if(element_size == 1) { + /* No need to swap */ + } else if(element_size == 2) { + uint8_t temp[2]; + + for(i=0; i < (len/2); i++ ) { + temp[0] = mem[0]; + temp[1] = mem[1]; + mem[0] = temp[1]; + mem[1] = temp[0]; + + mem += 2; + } + } else if(element_size == 4) { + uint8_t temp[4]; + + for(i=0; i < (len/4); i++ ) { + temp[0] = mem[0]; + temp[1] = mem[1]; + temp[2] = mem[2]; + temp[3] = mem[3]; + mem[0] = temp[3]; + mem[1] = temp[2]; + mem[2] = temp[1]; + mem[3] = temp[0]; + + mem += 4; + } + } else { + printf("ERROR: byte_swap unsupported element_size(%u)\n", element_size); + } +#endif } /********************************************************************* @@ -527,7 +551,8 @@ } \ emms(); \ t = (gettime_usec()-t -overhead) / nb_tests;\ -s = calc_crc((uint8_t*)(DST), sizeof((DST)), CRC32_INITIAL) +byte_swap((uint8_t*)(DST), 8*32*sizeof((DST)[0]), sizeof((DST)[0])); \ +s = calc_crc((uint8_t*)(DST), 8*32*sizeof((DST)[0]), CRC32_INITIAL) #define TEST_TRANSFER(FUNC, DST, SRC) \ TEST_TRANSFER_BEGIN(DST); \ @@ -553,7 +578,8 @@ } \ emms(); \ t = (gettime_usec()-t -overhead) / nb_tests;\ -s = calc_crc((uint8_t*)(DST), sizeof((DST)), CRC32_INITIAL) +byte_swap((uint8_t*)(DST), 8*32*sizeof((DST)[0]), sizeof((DST)[0])); \ +s = calc_crc((uint8_t*)(DST), 8*32*sizeof((DST)[0]), CRC32_INITIAL) #define TEST_TRANSFER2(FUNC, DST, SRC, R1) \ TEST_TRANSFER2_BEGIN(DST,SRC); \ @@ -570,8 +596,14 @@ const int nb_tests = 4000*speed_ref; int i; CPU *cpu; - uint8_t Src8[8*32], Dst8[8*32], Ref1[8*32], Ref2[8*32]; - int16_t Src16[8*32], Dst16[8*32]; +// uint8_t Src8[8*32], Dst8[8*32], Ref1[8*32], Ref2[8*32]; +// int16_t Src16[8*32], Dst16[8*32]; + DECLARE_ALIGNED_MATRIX(Src8, 8, 32, uint8_t, CACHE_LINE); + DECLARE_ALIGNED_MATRIX(Dst8, 8, 32, uint8_t, CACHE_LINE); + DECLARE_ALIGNED_MATRIX(Ref1, 8, 32, uint8_t, CACHE_LINE); + DECLARE_ALIGNED_MATRIX(Ref2, 8, 32, uint8_t, CACHE_LINE); + DECLARE_ALIGNED_MATRIX(Src16, 8, 32, uint16_t, CACHE_LINE); + DECLARE_ALIGNED_MATRIX(Dst16, 8, 32, uint16_t, CACHE_LINE); printf( "\n === test transfer ===\n" ); @@ -606,8 +638,8 @@ TEST_TRANSFER2(transfer_8to16sub, Dst16, Src8, Ref1); { int s1, s2; - s1 = calc_crc((uint8_t*)Dst16, sizeof(Dst16), CRC32_INITIAL); - s2 = calc_crc((uint8_t*)Src8, sizeof(Src8), CRC32_INITIAL); + s1 = calc_crc((uint8_t*)Dst16, 8*32*sizeof(Dst16[0]), CRC32_INITIAL); + s2 = calc_crc((uint8_t*)Src8, 8*32*sizeof(Src8[0]), CRC32_INITIAL); printf("%s - 8to16sub %.3f usec crc32(1)=0x%08x crc32(2)=0x%08x %s %s\n", cpu->name, t, s1, s2, (s1!=0xa1e07163)?"| ERROR1": "", @@ -636,7 +668,8 @@ for(q=1; q<=max_Q; ++q) { \ for(tst=0; tst0) sse = (FUNCTION)((SRC1), (SRC2), (STRIDE)); \ + emms(); \ + t = (gettime_usec() - t)/(double)nb_tests; \ + } while(0) + + +void test_sse() +{ + const int nb_tests = 100000*speed_ref; + int i; + CPU *cpu; + DECLARE_ALIGNED_MATRIX(Src1, 8, 8, int16_t, 16); + DECLARE_ALIGNED_MATRIX(Src2, 8, 8, int16_t, 16); + DECLARE_ALIGNED_MATRIX(Src3, 8, 8, int16_t, 16); + DECLARE_ALIGNED_MATRIX(Src4, 8, 8, int16_t, 16); + + printf( "\n ===== test sse =====\n" ); + + ieee_reseed(1); + for(i=0; i<64; ++i) { + Src1[i] = ieee_rand(-2048, 2047); + Src2[i] = ieee_rand(-2048, 2047); + Src3[i] = ieee_rand(-2048, 2047); + Src4[i] = ieee_rand(-2048, 2047); + } + + for(cpu = cpu_list; cpu->name!=0; ++cpu) + { + double t; + int tst, sse; + + if (!init_cpu(cpu)) + continue; + + /* 16 bit element blocks */ + TEST_SSE(sse8_16bit, Src1, Src2, 16); + printf("%s - sse8_16bit#1 %.3f usec sse=%d %s\n", + cpu->name, t, sse, (sse!=182013834)?"| ERROR": ""); + TEST_SSE(sse8_16bit, Src1, Src3, 16); + printf("%s - sse8_16bit#2 %.3f usec sse=%d %s\n", + cpu->name, t, sse, (sse!=142545203)?"| ERROR": ""); + TEST_SSE(sse8_16bit, Src1, Src4, 16); + printf("%s - sse8_16bit#3 %.3f usec sse=%d %s\n", + cpu->name, t, sse, (sse!=146340935)?"| ERROR": ""); + TEST_SSE(sse8_16bit, Src2, Src3, 16); + printf("%s - sse8_16bit#4 %.3f usec sse=%d %s\n", + cpu->name, t, sse, (sse!=130136661)?"| ERROR": ""); + TEST_SSE(sse8_16bit, Src2, Src4, 16); + printf("%s - sse8_16bit#5 %.3f usec sse=%d %s\n", + cpu->name, t, sse, (sse!=136870353)?"| ERROR": ""); + TEST_SSE(sse8_16bit, Src3, Src4, 16); + printf("%s - sse8_16bit#6 %.3f usec sse=%d %s\n", + cpu->name, t, sse, (sse!=164107772)?"| ERROR": ""); + + /* 8 bit element blocks */ + TEST_SSE(sse8_8bit, (int8_t*)Src1, (int8_t*)Src2, 8); + printf("%s - sse8_8bit#1 %.3f usec sse=%d %s\n", + cpu->name, t, sse, (sse!=1356423)?"| ERROR": ""); + TEST_SSE(sse8_8bit, (int8_t*)Src1, (int8_t*)Src3, 8); + printf("%s - sse8_8bit#2 %.3f usec sse=%d %s\n", + cpu->name, t, sse, (sse!=1173074)?"| ERROR": ""); + TEST_SSE(sse8_8bit, (int8_t*)Src1, (int8_t*)Src4, 8); + printf("%s - sse8_8bit#3 %.3f usec sse=%d %s\n", + cpu->name, t, sse, (sse!=1092357)?"| ERROR": ""); + TEST_SSE(sse8_8bit, (int8_t*)Src2, (int8_t*)Src3, 8); + printf("%s - sse8_8bit#4 %.3f usec sse=%d %s\n", + cpu->name, t, sse, (sse!=1360239)?"| ERROR": ""); + TEST_SSE(sse8_8bit, (int8_t*)Src2, (int8_t*)Src4, 8); + printf("%s - sse8_8bit#5 %.3f usec sse=%d %s\n", + cpu->name, t, sse, (sse!=1208414)?"| ERROR": ""); + TEST_SSE(sse8_8bit, (int8_t*)Src3, (int8_t*)Src4, 8); + printf("%s - sse8_8bit#6 %.3f usec sse=%d %s\n", + cpu->name, t, sse, (sse!=1099285)?"| ERROR": ""); + + printf(" ---\n"); + } +} + +/********************************************************************* * test non-zero AC counting *********************************************************************/ -#define TEST_CBP(FUNC, SRC) \ +#define TEST_CBP(FUNC, SRC, NB) \ t = gettime_usec(); \ emms(); \ -for(tst=0; tst3*64); Src4[i] = (i==(3*64+2) || i==(5*64+9)); + Src5[i] = ieee_rand(0,1) ? -1 : 1; /* +/- test */ } for(cpu = cpu_list; cpu->name!=0; ++cpu) @@ -780,20 +943,53 @@ if (!init_cpu(cpu)) continue; - TEST_CBP(calc_cbp, Src1); - printf("%s - calc_cbp#1 %.3f usec cbp=0x%02x\n", + TEST_CBP(calc_cbp, Src1, nb_tests); + printf("%s - calc_cbp#1 %.3f usec cbp=0x%02x %s\n", cpu->name, t, cbp, (cbp!=0x15)?"| ERROR": ""); - TEST_CBP(calc_cbp, Src2); - printf("%s - calc_cbp#2 %.3f usec cbp=0x%02x\n", + TEST_CBP(calc_cbp, Src2, nb_tests); + printf("%s - calc_cbp#2 %.3f usec cbp=0x%02x %s\n", cpu->name, t, cbp, (cbp!=0x38)?"| ERROR": ""); - TEST_CBP(calc_cbp, Src3); - printf("%s - calc_cbp#3 %.3f usec cbp=0x%02x\n", + TEST_CBP(calc_cbp, Src3, nb_tests); + printf("%s - calc_cbp#3 %.3f usec cbp=0x%02x %s\n", cpu->name, t, cbp, (cbp!=0x0f)?"| ERROR": "" ); - TEST_CBP(calc_cbp, Src4); - printf("%s - calc_cbp#4 %.3f usec cbp=0x%02x\n", + TEST_CBP(calc_cbp, Src4, nb_tests); + printf("%s - calc_cbp#4 %.3f usec cbp=0x%02x %s\n", cpu->name, t, cbp, (cbp!=0x05)?"| ERROR": "" ); + TEST_CBP(calc_cbp, Src5, nb_tests); + printf("%s - calc_cbp#4 %.3f usec cbp=0x%02x %s\n", + cpu->name, t, cbp, (cbp!=0x3f)?"| ERROR": "" ); printf( " --- \n" ); } + + for(cpu = cpu_list; cpu->name!=0; ++cpu) /* bench suggested by Carlo (carlo dot bramix at libero dot it) */ + { + double t; + int tst, cbp, err; + + if (!init_cpu(cpu)) + continue; + + err = 0; + for(n=0; n<6; ++n) + { + for(m=0; m<64; ++m) + { + for(i=0; i<6*64; ++i) + Src1[i] = (i== (m + n*64)); + + TEST_CBP(calc_cbp, Src1, 1); + if (cbp!= (((m!=0)<<(5-n)))) + { + printf( "%s - calc_cbp#5: ERROR at pos %d / %d!\n", cpu->name, n, m); + err = 1; + break; + } + } + } + if (!err) + printf( " %s - calc_cbp#5 : OK\n", cpu->name ); + + } } /********************************************************************* @@ -1187,7 +1383,7 @@ * measure raw decoding speed *********************************************************************/ -void test_dec(const char *name, int width, int height, int with_chksum) +void test_dec(const char *name, int width, int height, int ref_chksum) { FILE *f = 0; void *dechandle = 0; @@ -1198,22 +1394,23 @@ double t = 0.; int nb = 0; uint8_t *buf = 0; - uint8_t *rgb_out = 0; + uint8_t *yuv_out = 0; int buf_size, pos; uint32_t chksum = 0; + int bps = (width+31) & ~31; memset(&xinit, 0, sizeof(xinit)); - xinit.cpu_flags = XVID_CPU_MMX | XVID_CPU_FORCE; + xinit.cpu_flags = cpu_mask; xinit.version = XVID_VERSION; xvid_global(NULL, 0, &xinit, NULL); memset(&xparam, 0, sizeof(xparam)); - xparam.width = width; + xparam.width = width; xparam.height = height; xparam.version = XVID_VERSION; xerr = xvid_decore(NULL, XVID_DEC_CREATE, &xparam, NULL); if (xerr==XVID_ERR_FAIL) { - printf("can't init decoder (err=%d)\n", xerr); + printf("ERROR: can't init decoder (err=%d)\n", xerr); return; } dechandle = xparam.handle; @@ -1221,27 +1418,26 @@ f = fopen(name, "rb"); if (f==0) { - printf( "can't open file '%s'\n", name); + printf( "ERROR: can't open file '%s'\n", name); return; } fseek(f, 0, SEEK_END); buf_size = ftell(f); fseek(f, 0, SEEK_SET); if (buf_size<=0) { - printf("error while stating file\n"); + printf("ERROR: error while stating file\n"); goto End; } - else printf( "Input size: %d\n", buf_size); - buf = malloc(buf_size); /* should be enuf' */ - rgb_out = calloc(4, width*height); /* <-room for _RGB24 */ - if (buf==0 || rgb_out==0) { - printf( "malloc failed!\n" ); + buf = malloc(buf_size); + yuv_out = calloc(1, bps*height*3/2 + 15); + if (buf==0 || yuv_out==0) { + printf( "ERROR: malloc failed!\n" ); goto End; } if (fread(buf, buf_size, 1, f)!=1) { - printf( "file-read failed\n" ); + printf( "ERROR: file-read failed\n" ); goto End; } @@ -1249,41 +1445,57 @@ pos = 0; t = -gettime_usec(); while(1) { + int y; + memset(&xframe, 0, sizeof(xframe)); xframe.version = XVID_VERSION; xframe.bitstream = buf + pos; xframe.length = buf_size - pos; - xframe.output.plane[0] = rgb_out; - xframe.output.stride[0] = width; - xframe.output.csp = XVID_CSP_BGR; + xframe.output.plane[0] = (uint8_t*)(((size_t)yuv_out + 15) & ~15); + xframe.output.plane[1] = (uint8_t*)xframe.output.plane[0] + bps*height; + xframe.output.plane[2] = (uint8_t*)xframe.output.plane[1] + bps/2; + xframe.output.stride[0] = bps; + xframe.output.stride[1] = bps; + xframe.output.stride[2] = bps; + xframe.output.csp = XVID_CSP_I420; xerr = xvid_decore(dechandle, XVID_DEC_DECODE, &xframe, 0); + if (xerr<0) { + printf("ERROR: decoding failed for frame #%d (err=%d)!\n", nb, xerr); + break; + } + else if (xerr==0) + break; + else if (verbose>0) printf("#%d %d\n", nb, xerr ); + + pos += xerr; nb++; - pos += xframe.length; - if (with_chksum) { - int k = width*height; - uint32_t *ptr = (uint32_t *)rgb_out; - while(k-->0) chksum += *ptr++; + + for(y=0; y0.) - printf( "%d frames decoded in %.3f s -> %.1f FPS\n", nb, t*1.e-6f, (float)(nb*1.e6f/t) ); - if (with_chksum) - printf("checksum: 0x%.8x\n", chksum); + if (ref_chksum==0) { + if (t>0.) + printf( "%d frames decoded in %.3f s -> %.1f FPS Checksum:0x%.8x\n", nb, t*1.e-6f, (float)(nb*1.e6f/t), chksum ); + } + else { + printf("FPS:%.1f Checksum: 0x%.8x Expected:0x%.8x | %s\n", + t>0. ? (float)(nb*1.e6f/t) : 0.f, chksum, ref_chksum, (chksum==ref_chksum) ? "OK" : "ERROR"); + } End: - if (rgb_out!=0) free(rgb_out); + if (yuv_out!=0) free(yuv_out); if (buf!=0) free(buf); if (dechandle!=0) { xerr= xvid_decore(dechandle, XVID_DEC_DESTROY, NULL, NULL); if (xerr==XVID_ERR_FAIL) - printf("destroy-decoder failed (err=%d)!\n", xerr); + printf("ERROR: destroy-decoder failed (err=%d)!\n", xerr); } if (f!=0) fclose(f); } @@ -1445,21 +1657,196 @@ } #endif } +/*********************************************************************/ + +static uint32_t __inline log2bin_v1(uint32_t value) +{ + int n = 0; + while (value) { + value >>= 1; + n++; + } + return n; +} + +static const uint8_t log2_tab_16[16] = { 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4 }; + +static uint32_t __inline log2bin_v2(uint32_t value) +{ + int n = 0; + if (value & 0xffff0000) { + value >>= 16; + n += 16; + } + if (value & 0xff00) { + value >>= 8; + n += 8; + } + if (value & 0xf0) { + value >>= 4; + n += 4; + } + return n + log2_tab_16[value]; +} + +void test_log2bin() +{ + const int nb_tests = 3000*speed_ref; + int n, crc1=0, crc2=0; + uint32_t s, s0; + double t1, t2; + + t1 = gettime_usec(); + s0 = (int)(t1*31.241); + for(s=s0, n=0; n 1) { + if (*num % i == 0 && *den % i == 0) { + *num /= i; + *den /= i; + i = *num; + continue; + } + i--; + } +} + +static uint32_t gcd(int num, int den) +{ + int tmp; + while( (tmp=num%den) ) { num = den; den = tmp; } + return den; +} +static void __inline new_gcd(int *num, int *den) +{ + const int div = gcd(*num, *den); + if (num) { + *num /= div; + *den /= div; + } +} + +void test_gcd() +{ + const int nb_tests = 10*speed_ref; + int i; + uint32_t crc1=0, crc2=0; + uint32_t n0, n, d0, d; + double t1, t2; + + t1 = gettime_usec(); + n0 = 0xfffff & (int)(t1*31.241); + d0 = 0xfffff & (int)( ((n0*4123)%17) | 1 ); + for(n=n0, d=d0, i=0; i>4)^d) + ((crc1<<2)^n) ) & 0xffffff; + n = d; + d = (d*12363+31) & 0xffff; + d |= !d; + } + t1 = (gettime_usec()-t1) / nb_tests; + + t2 = gettime_usec(); + for(n=n0, d=d0, i=0; i>4)^d) + ((crc2<<2)^n) ) & 0xffffff; + n = d; + d = (d*12363+31) & 0xffff; + d |= !d; + } + t2 = (gettime_usec() - t2) / nb_tests; + + printf( "old_gcd: %.3f sec crc=%d\n", t1, crc1 ); + printf( "new_gcd: %.3f sec crc=%d\n", t2, crc2 ); + if (crc1!=crc2) printf( " CRC ERROR !\n" ); +} /********************************************************************* * main *********************************************************************/ -int main(int argc, char *argv[]) +static void arg_missing(const char *opt) +{ + printf( "missing argument after option '%s'\n", opt); + exit(-1); +} + +int main(int argc, const char *argv[]) { - int what = 0; - if (argc>1) what = atoi(argv[1]); + int c, what = 0; + int width, height; + uint32_t chksum = 0; + const char * test_bitstream = 0; + + cpu_mask = 0; // default => will use autodectect + for(c=1; cargc) { + printf("usage: %s %d bitstream width height (checksum)\n", argv[0], what); + exit(-1); + } + test_bitstream = argv[++c]; + width = atoi(argv[++c]); + height = atoi(argv[++c]); + if (c+15)); - } - + if (test_bitstream) + test_dec(test_bitstream, width, height, chksum); if (what==-1) { test_dct_precision_diffs(); test_bugs1(); @@ -1489,7 +1867,7 @@ if (what==-2) test_quant_bug(); - if (what >= 0 && what <= 6) { + if ((what >= 0 && what <= 6) || what == 10) { printf("\n\n" "NB: If a function isn't optimised for a specific set of intructions,\n" " a C function is used instead. So don't panic if some functions\n" @@ -1509,206 +1887,4 @@ return 0; } -/********************************************************************* - * 'Reference' output (except for timing) on an Athlon XP 2200+ - *********************************************************************/ - -/* as of 2002-01-07, there's a problem with MMX mpeg4-quantization */ -/* as of 2003-11-30, the problem is still here */ - -/********************************************************************* - - - ===== test fdct/idct ===== -PLAINC - 2.867 usec PSNR=13.291 MSE=3.000 -MMX - -0.211 usec PSNR=9.611 MSE=7.000 -MMXEXT - -0.256 usec PSNR=9.611 MSE=7.000 -3DNOW - 2.855 usec PSNR=13.291 MSE=3.000 -3DNOWE - 1.429 usec PSNR=13.291 MSE=3.000 - - === test block motion === -PLAINC - interp- h-round0 0.538 usec crc32=0x115381ba -PLAINC - round1 0.527 usec crc32=0x2b1f528f -PLAINC - interp- v-round0 0.554 usec crc32=0x423cdcc7 -PLAINC - round1 0.551 usec crc32=0x42202efe -PLAINC - interp-hv-round0 1.041 usec crc32=0xd198d387 -PLAINC - round1 1.038 usec crc32=0x9ecfd921 - --- -MMX - interp- h-round0 0.051 usec crc32=0x115381ba -MMX - round1 0.053 usec crc32=0x2b1f528f -MMX - interp- v-round0 0.048 usec crc32=0x423cdcc7 -MMX - round1 0.048 usec crc32=0x42202efe -MMX - interp-hv-round0 0.074 usec crc32=0xd198d387 -MMX - round1 0.073 usec crc32=0x9ecfd921 - --- -MMXEXT - interp- h-round0 0.020 usec crc32=0x115381ba -MMXEXT - round1 0.025 usec crc32=0x2b1f528f -MMXEXT - interp- v-round0 0.016 usec crc32=0x423cdcc7 -MMXEXT - round1 0.024 usec crc32=0x42202efe -MMXEXT - interp-hv-round0 0.037 usec crc32=0xd198d387 -MMXEXT - round1 0.037 usec crc32=0x9ecfd921 - --- -3DNOW - interp- h-round0 0.020 usec crc32=0x115381ba -3DNOW - round1 0.029 usec crc32=0x2b1f528f -3DNOW - interp- v-round0 0.016 usec crc32=0x423cdcc7 -3DNOW - round1 0.024 usec crc32=0x42202efe -3DNOW - interp-hv-round0 0.038 usec crc32=0xd198d387 -3DNOW - round1 0.039 usec crc32=0x9ecfd921 - --- -3DNOWE - interp- h-round0 0.020 usec crc32=0x115381ba -3DNOWE - round1 0.024 usec crc32=0x2b1f528f -3DNOWE - interp- v-round0 0.016 usec crc32=0x423cdcc7 -3DNOWE - round1 0.021 usec crc32=0x42202efe -3DNOWE - interp-hv-round0 0.037 usec crc32=0xd198d387 -3DNOWE - round1 0.036 usec crc32=0x9ecfd921 - --- - - ====== test SAD ====== -PLAINC - sad8 0.505 usec sad=3776 -PLAINC - sad16 1.941 usec sad=27214 -PLAINC - sad16bi 4.925 usec sad=26274 -PLAINC - dev16 4.254 usec sad=3344 - --- -MMX - sad8 0.036 usec sad=3776 -MMX - sad16 0.107 usec sad=27214 -MMX - sad16bi 0.259 usec sad=26274 -MMX - dev16 0.187 usec sad=3344 - --- -MMXEXT - sad8 0.016 usec sad=3776 -MMXEXT - sad16 0.050 usec sad=27214 -MMXEXT - sad16bi 0.060 usec sad=26274 -MMXEXT - dev16 0.086 usec sad=3344 - --- -3DNOW - sad8 0.506 usec sad=3776 -3DNOW - sad16 1.954 usec sad=27214 -3DNOW - sad16bi 0.119 usec sad=26274 -3DNOW - dev16 4.252 usec sad=3344 - --- -3DNOWE - sad8 0.017 usec sad=3776 -3DNOWE - sad16 0.038 usec sad=27214 -3DNOWE - sad16bi 0.052 usec sad=26274 -3DNOWE - dev16 0.067 usec sad=3344 - --- - - === test transfer === -PLAINC - 8to16 0.603 usec crc32=0x115814bb -PLAINC - 16to8 1.077 usec crc32=0xee7ccbb4 -PLAINC - 8to8 0.679 usec crc32=0xd37b3295 -PLAINC - 16to8add 1.341 usec crc32=0xdd817bf4 -PLAINC - 8to16sub 1.566 usec crc32(1)=0xa1e07163 crc32(2)=0xd86c5d23 -PLAINC - 8to16sub2 2.206 usec crc32=0x99b6c4c7 - --- -MMX - 8to16 -0.025 usec crc32=0x115814bb -MMX - 16to8 -0.049 usec crc32=0xee7ccbb4 -MMX - 8to8 0.014 usec crc32=0xd37b3295 -MMX - 16to8add 0.011 usec crc32=0xdd817bf4 -MMX - 8to16sub 0.108 usec crc32(1)=0xa1e07163 crc32(2)=0xd86c5d23 -MMX - 8to16sub2 0.164 usec crc32=0x99b6c4c7 - --- -MMXEXT - 8to16 -0.054 usec crc32=0x115814bb -MMXEXT - 16to8 0.010 usec crc32=0xee7ccbb4 -MMXEXT - 8to8 0.015 usec crc32=0xd37b3295 -MMXEXT - 16to8add 0.008 usec crc32=0xdd817bf4 -MMXEXT - 8to16sub 0.263 usec crc32(1)=0xa1e07163 crc32(2)=0xd86c5d23 -MMXEXT - 8to16sub2 0.178 usec crc32=0x99b6c4c7 - --- -3DNOW - 8to16 0.666 usec crc32=0x115814bb -3DNOW - 16to8 1.078 usec crc32=0xee7ccbb4 -3DNOW - 8to8 0.665 usec crc32=0xd37b3295 -3DNOW - 16to8add 1.365 usec crc32=0xdd817bf4 -3DNOW - 8to16sub 1.356 usec crc32(1)=0xa1e07163 crc32(2)=0xd86c5d23 -3DNOW - 8to16sub2 2.098 usec crc32=0x99b6c4c7 - --- -3DNOWE - 8to16 -0.024 usec crc32=0x115814bb -3DNOWE - 16to8 0.010 usec crc32=0xee7ccbb4 -3DNOWE - 8to8 0.014 usec crc32=0xd37b3295 -3DNOWE - 16to8add 0.016 usec crc32=0xdd817bf4 -3DNOWE - 8to16sub -0.000 usec crc32(1)=0xa1e07163 crc32(2)=0xd86c5d23 -3DNOWE - 8to16sub2 -0.031 usec crc32=0x99b6c4c7 - --- - - ===== test quant ===== -PLAINC - quant_mpeg_intra 98.631 usec crc32=0xfd6a21a4 -PLAINC - quant_mpeg_inter 104.876 usec crc32=0xf6de7757 -PLAINC - dequant_mpeg_intra 50.285 usec crc32=0x2def7bc7 -PLAINC - dequant_mpeg_inter 58.316 usec crc32=0xd878c722 -PLAINC - quant_h263_intra 33.803 usec crc32=0x2eba9d43 -PLAINC - quant_h263_inter 45.411 usec crc32=0xbd315a7e -PLAINC - dequant_h263_intra 39.302 usec crc32=0x9841212a -PLAINC - dequant_h263_inter 44.124 usec crc32=0xe7df8fba - --- -MMX - quant_mpeg_intra 4.273 usec crc32=0xdacabdb6 | ERROR -MMX - quant_mpeg_inter 3.576 usec crc32=0x72883ab6 | ERROR -MMX - dequant_mpeg_intra 3.793 usec crc32=0x2def7bc7 -MMX - dequant_mpeg_inter 4.808 usec crc32=0xd878c722 -MMX - quant_h263_intra 2.881 usec crc32=0x2eba9d43 -MMX - quant_h263_inter 2.550 usec crc32=0xbd315a7e -MMX - dequant_h263_intra 2.974 usec crc32=0x9841212a -MMX - dequant_h263_inter 2.906 usec crc32=0xe7df8fba - --- -MMXEXT - quant_mpeg_intra 4.221 usec crc32=0xfd6a21a4 -MMXEXT - quant_mpeg_inter 4.339 usec crc32=0xf6de7757 -MMXEXT - dequant_mpeg_intra 3.802 usec crc32=0x2def7bc7 -MMXEXT - dequant_mpeg_inter 4.821 usec crc32=0xd878c722 -MMXEXT - quant_h263_intra 2.884 usec crc32=0x2eba9d43 -MMXEXT - quant_h263_inter 2.554 usec crc32=0xbd315a7e -MMXEXT - dequant_h263_intra 2.728 usec crc32=0x9841212a -MMXEXT - dequant_h263_inter 2.611 usec crc32=0xe7df8fba - --- -3DNOW - quant_mpeg_intra 98.512 usec crc32=0xfd6a21a4 -3DNOW - quant_mpeg_inter 104.873 usec crc32=0xf6de7757 -3DNOW - dequant_mpeg_intra 50.219 usec crc32=0x2def7bc7 -3DNOW - dequant_mpeg_inter 58.254 usec crc32=0xd878c722 -3DNOW - quant_h263_intra 33.778 usec crc32=0x2eba9d43 -3DNOW - quant_h263_inter 41.998 usec crc32=0xbd315a7e -3DNOW - dequant_h263_intra 39.344 usec crc32=0x9841212a -3DNOW - dequant_h263_inter 43.607 usec crc32=0xe7df8fba - --- -3DNOWE - quant_mpeg_intra 98.490 usec crc32=0xfd6a21a4 -3DNOWE - quant_mpeg_inter 104.889 usec crc32=0xf6de7757 -3DNOWE - dequant_mpeg_intra 3.277 usec crc32=0x2def7bc7 -3DNOWE - dequant_mpeg_inter 4.485 usec crc32=0xd878c722 -3DNOWE - quant_h263_intra 1.882 usec crc32=0x2eba9d43 -3DNOWE - quant_h263_inter 2.246 usec crc32=0xbd315a7e -3DNOWE - dequant_h263_intra 3.457 usec crc32=0x9841212a -3DNOWE - dequant_h263_inter 3.275 usec crc32=0xe7df8fba - --- - - ===== test cbp ===== -PLAINC - calc_cbp#1 0.168 usec cbp=0x15 -PLAINC - calc_cbp#2 0.168 usec cbp=0x38 -PLAINC - calc_cbp#3 0.157 usec cbp=0x0f -PLAINC - calc_cbp#4 0.235 usec cbp=0x05 - --- -MMX - calc_cbp#1 0.070 usec cbp=0x15 -MMX - calc_cbp#2 0.062 usec cbp=0x38 -MMX - calc_cbp#3 0.062 usec cbp=0x0f -MMX - calc_cbp#4 0.061 usec cbp=0x05 - --- -MMXEXT - calc_cbp#1 0.062 usec cbp=0x15 -MMXEXT - calc_cbp#2 0.061 usec cbp=0x38 -MMXEXT - calc_cbp#3 0.061 usec cbp=0x0f -MMXEXT - calc_cbp#4 0.061 usec cbp=0x05 - --- -3DNOW - calc_cbp#1 0.168 usec cbp=0x15 -3DNOW - calc_cbp#2 0.168 usec cbp=0x38 -3DNOW - calc_cbp#3 0.157 usec cbp=0x0f -3DNOW - calc_cbp#4 0.238 usec cbp=0x05 - --- -3DNOWE - calc_cbp#1 0.049 usec cbp=0x15 -3DNOWE - calc_cbp#2 0.049 usec cbp=0x38 -3DNOWE - calc_cbp#3 0.049 usec cbp=0x0f -3DNOWE - calc_cbp#4 0.049 usec cbp=0x05 - --- - - -NB: If a function isn't optimised for a specific set of intructions, - a C function is used instead. So don't panic if some functions - may appear to be slow. - -NB: MMX mpeg4 quantization is known to have very small errors (+/-1 magnitude) - for 1 or 2 coefficients a block. This is mainly caused by the fact the unit - test goes far behind the usual limits of real encoding. Please do not report - this error to the developers - -*********************************************************************/ +/*********************************************************************/