37 |
* - 22.12.2001 API change: added xvid_init() - Isibaar |
* - 22.12.2001 API change: added xvid_init() - Isibaar |
38 |
* - 16.12.2001 inital version; (c)2001 peter ross <pross@cs.rmit.edu.au> |
* - 16.12.2001 inital version; (c)2001 peter ross <pross@cs.rmit.edu.au> |
39 |
* |
* |
40 |
* $Id: xvid.c,v 1.33.2.21 2003-01-03 16:25:14 suxen_drol Exp $ |
* $Id: xvid.c,v 1.33.2.22 2003-01-04 06:14:32 suxen_drol Exp $ |
41 |
* |
* |
42 |
****************************************************************************/ |
****************************************************************************/ |
43 |
|
|
65 |
#include "utils/timer.h" |
#include "utils/timer.h" |
66 |
#include "bitstream/mbcoding.h" |
#include "bitstream/mbcoding.h" |
67 |
|
|
68 |
#if defined(ARCH_X86) && defined(EXPERIMENTAL_SSE2_CODE) |
#if defined(ARCH_X86) |
69 |
|
|
70 |
#ifdef WIN32 |
#if defined(_MSC_VER) |
71 |
#include <windows.h> |
#include <windows.h> |
72 |
#else |
#else |
73 |
#include <signal.h> |
#include <signal.h> |
74 |
#include <setjmp.h> |
#include <setjmp.h> |
|
#endif |
|
|
|
|
|
|
|
|
#ifndef WIN32 |
|
75 |
|
|
76 |
static jmp_buf mark; |
static jmp_buf mark; |
77 |
|
|
94 |
int |
int |
95 |
sigill_check(void (*func)()) |
sigill_check(void (*func)()) |
96 |
{ |
{ |
97 |
#ifdef WIN32 |
#if defined(_MSC_VER) |
98 |
_try { |
_try { |
99 |
func(); |
func(); |
100 |
} |
} |
128 |
} |
} |
129 |
#endif |
#endif |
130 |
|
|
131 |
|
|
132 |
|
/* detect cpu flags */ |
133 |
|
static unsigned int |
134 |
|
detect_cpu_flags() |
135 |
|
{ |
136 |
|
/* enable native assembly optimizations by default */ |
137 |
|
unsigned int cpu_flags = XVID_CPU_ASM; |
138 |
|
|
139 |
|
#if defined(ARCH_X86) |
140 |
|
cpu_flags |= check_cpu_features(); |
141 |
|
if ((cpu_flags & XVID_CPU_SSE) && sigill_check(sse_os_trigger)) |
142 |
|
cpu_flags &= ~XVID_CPU_SSE; |
143 |
|
|
144 |
|
if ((cpu_flags & XVID_CPU_SSE2) && sigill_check(sse2_os_trigger)) |
145 |
|
cpu_flags &= ~XVID_CPU_SSE2; |
146 |
|
#endif |
147 |
|
|
148 |
|
#if defined(ARCH_PPC) |
149 |
|
#if defined(ARCH_PPC_ALTIVEC) |
150 |
|
cpu_flags |= XVID_CPU_ALTIVEC; |
151 |
|
#endif |
152 |
|
#endif |
153 |
|
|
154 |
|
return cpu_flags; |
155 |
|
} |
156 |
|
|
157 |
|
|
158 |
/***************************************************************************** |
/***************************************************************************** |
159 |
* XviD Init Entry point |
* XviD Init Entry point |
160 |
* |
* |
188 |
|
|
189 |
} else { |
} else { |
190 |
|
|
191 |
cpu_flags = check_cpu_features(); |
cpu_flags = detect_cpu_flags(); |
|
|
|
|
#if defined(ARCH_X86) && defined(EXPERIMENTAL_SSE2_CODE) |
|
|
if ((cpu_flags & XVID_CPU_SSE) && sigill_check(sse_os_trigger)) |
|
|
cpu_flags &= ~XVID_CPU_SSE; |
|
|
|
|
|
if ((cpu_flags & XVID_CPU_SSE2) && sigill_check(sse2_os_trigger)) |
|
|
cpu_flags &= ~XVID_CPU_SSE2; |
|
|
#endif |
|
192 |
} |
} |
193 |
|
|
194 |
if ((init_param->cpu_flags & XVID_CPU_CHKONLY)) |
if ((init_param->cpu_flags & XVID_CPU_CHKONLY)) |
256 |
interpolate8x8_avg4 = interpolate8x8_avg4_c; |
interpolate8x8_avg4 = interpolate8x8_avg4_c; |
257 |
|
|
258 |
/* reduced resoltuion */ |
/* reduced resoltuion */ |
|
|
|
259 |
copy_upsampled_8x8_16to8 = xvid_Copy_Upsampled_8x8_16To8_C; |
copy_upsampled_8x8_16to8 = xvid_Copy_Upsampled_8x8_16To8_C; |
260 |
add_upsampled_8x8_16to8 = xvid_Add_Upsampled_8x8_16To8_C; |
add_upsampled_8x8_16to8 = xvid_Add_Upsampled_8x8_16To8_C; |
|
#ifdef ARCH_X86 |
|
|
vfilter_31 = xvid_VFilter_31_x86; |
|
|
hfilter_31 = xvid_HFilter_31_x86; |
|
|
#else |
|
261 |
vfilter_31 = xvid_VFilter_31_C; |
vfilter_31 = xvid_VFilter_31_C; |
262 |
hfilter_31 = xvid_HFilter_31_C; |
hfilter_31 = xvid_HFilter_31_C; |
|
#endif |
|
263 |
filter_18x18_to_8x8 = xvid_Filter_18x18_To_8x8_C; |
filter_18x18_to_8x8 = xvid_Filter_18x18_To_8x8_C; |
264 |
filter_diff_18x18_to_8x8 = xvid_Filter_Diff_18x18_To_8x8_C; |
filter_diff_18x18_to_8x8 = xvid_Filter_Diff_18x18_To_8x8_C; |
265 |
|
|
317 |
|
|
318 |
// Halfpel8_Refine = Halfpel8_Refine_c; |
// Halfpel8_Refine = Halfpel8_Refine_c; |
319 |
|
|
320 |
#ifdef ARCH_X86 |
#if defined(ARCH_X86) |
321 |
|
|
322 |
|
if ((cpu_flags & XVID_CPU_ASM)) |
323 |
|
{ |
324 |
|
vfilter_31 = xvid_VFilter_31_x86; |
325 |
|
hfilter_31 = xvid_HFilter_31_x86; |
326 |
|
} |
327 |
|
|
328 |
if ((cpu_flags & XVID_CPU_MMX) || (cpu_flags & XVID_CPU_MMXEXT) || |
if ((cpu_flags & XVID_CPU_MMX) || (cpu_flags & XVID_CPU_MMXEXT) || |
329 |
(cpu_flags & XVID_CPU_3DNOW) || (cpu_flags & XVID_CPU_3DNOWEXT) || |
(cpu_flags & XVID_CPU_3DNOW) || (cpu_flags & XVID_CPU_3DNOWEXT) || |
333 |
emms = emms_mmx; |
emms = emms_mmx; |
334 |
} |
} |
335 |
|
|
336 |
if ((cpu_flags & XVID_CPU_MMX) > 0) { |
if ((cpu_flags & XVID_CPU_MMX)) { |
337 |
|
|
338 |
/* Forward and Inverse Discrete Cosine Transformation functions */ |
/* Forward and Inverse Discrete Cosine Transformation functions */ |
339 |
fdct = fdct_mmx; |
fdct = fdct_mmx; |
403 |
sad8bi = sad8bi_mmx; |
sad8bi = sad8bi_mmx; |
404 |
dev16 = dev16_mmx; |
dev16 = dev16_mmx; |
405 |
sad16v = sad16v_mmx; |
sad16v = sad16v_mmx; |
|
|
|
406 |
} |
} |
407 |
|
|
408 |
/* these 3dnow functions are faster than mmx, but slower than xmm. */ |
/* these 3dnow functions are faster than mmx, but slower than xmm. */ |
409 |
if ((cpu_flags & XVID_CPU_3DNOW) > 0) { |
if ((cpu_flags & XVID_CPU_3DNOW)) { |
410 |
|
|
411 |
|
emms = emms_3dn; |
412 |
|
|
413 |
/* ME functions */ |
/* ME functions */ |
414 |
sad16bi = sad16bi_3dn; |
sad16bi = sad16bi_3dn; |
419 |
} |
} |
420 |
|
|
421 |
|
|
422 |
if ((cpu_flags & XVID_CPU_MMXEXT) > 0) { |
if ((cpu_flags & XVID_CPU_MMXEXT)) { |
423 |
|
|
424 |
/* Inverse DCT */ |
/* Inverse DCT */ |
425 |
idct = idct_xmm; |
idct = idct_xmm; |
457 |
sad16v = sad16v_xmm; |
sad16v = sad16v_xmm; |
458 |
} |
} |
459 |
|
|
460 |
if ((cpu_flags & XVID_CPU_3DNOW) > 0) { |
if ((cpu_flags & XVID_CPU_3DNOW)) { |
461 |
|
|
462 |
/* Interpolation */ |
/* Interpolation */ |
463 |
interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_3dn; |
interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_3dn; |
465 |
interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_3dn; |
interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_3dn; |
466 |
} |
} |
467 |
|
|
468 |
if ((cpu_flags & XVID_CPU_3DNOWEXT) > 0) { |
if ((cpu_flags & XVID_CPU_3DNOWEXT)) { |
469 |
|
|
470 |
/* Inverse DCT */ |
/* Inverse DCT */ |
471 |
idct = idct_3dne; |
idct = idct_3dne; |
501 |
} |
} |
502 |
|
|
503 |
|
|
504 |
if ((cpu_flags & XVID_CPU_SSE2) > 0) { |
if ((cpu_flags & XVID_CPU_SSE2)) { |
|
#ifdef EXPERIMENTAL_SSE2_CODE |
|
505 |
|
|
506 |
calc_cbp = calc_cbp_sse2; |
calc_cbp = calc_cbp_sse2; |
507 |
|
|
511 |
quant_inter = quant_inter_sse2; |
quant_inter = quant_inter_sse2; |
512 |
dequant_inter = dequant_inter_sse2; |
dequant_inter = dequant_inter_sse2; |
513 |
|
|
514 |
/* ME */ |
#if defined(EXPERIMENTAL_SSE2_CODE) |
515 |
|
/* ME; slower than xmm */ |
516 |
sad16 = sad16_sse2; |
sad16 = sad16_sse2; |
517 |
dev16 = dev16_sse2; |
dev16 = dev16_sse2; |
518 |
|
#endif |
519 |
/* Forward and Inverse DCT */ |
/* Forward and Inverse DCT */ |
520 |
idct = idct_sse2; |
idct = idct_sse2; |
521 |
fdct = fdct_sse2; |
fdct = fdct_sse2; |
|
#endif |
|
522 |
} |
} |
|
|
|
523 |
#endif |
#endif |
524 |
|
|
525 |
#ifdef ARCH_IA64 |
#if defined(ARCH_IA64) |
526 |
if ((cpu_flags & XVID_CPU_IA64) > 0) { //use assembler routines? |
if ((cpu_flags & XVID_CPU_ASM)) { //use assembler routines? |
527 |
idct_ia64_init(); |
idct_ia64_init(); |
528 |
fdct = fdct_ia64; |
fdct = fdct_ia64; |
529 |
idct = idct_ia64; //not yet working, crashes |
idct = idct_ia64; //not yet working, crashes |
549 |
} |
} |
550 |
#endif |
#endif |
551 |
|
|
552 |
#ifdef ARCH_PPC |
#if defined(ARCH_PPC) |
553 |
#ifdef ARCH_PPC_ALTIVEC |
if ((cpu_flags & XVID_CPU_ASM)) |
554 |
|
{ |
555 |
|
calc_cbp = calc_cbp_ppc; |
556 |
|
} |
557 |
|
|
558 |
|
if ((cpu_flags & XVID_CPU_ALTIVEC)) |
559 |
|
{ |
560 |
calc_cbp = calc_cbp_altivec; |
calc_cbp = calc_cbp_altivec; |
561 |
fdct = fdct_altivec; |
fdct = fdct_altivec; |
562 |
idct = idct_altivec; |
idct = idct_altivec; |
564 |
sad16 = sad16_altivec; |
sad16 = sad16_altivec; |
565 |
sad8 = sad8_altivec; |
sad8 = sad8_altivec; |
566 |
dev16 = dev16_altivec; |
dev16 = dev16_altivec; |
567 |
#else |
} |
|
calc_cbp = calc_cbp_ppc; |
|
|
#endif |
|
568 |
#endif |
#endif |
569 |
|
|
570 |
return XVID_ERR_OK; |
return XVID_ERR_OK; |
829 |
printf("xvid_init_test\n"); |
printf("xvid_init_test\n"); |
830 |
|
|
831 |
#if defined(ARCH_X86) |
#if defined(ARCH_X86) |
832 |
cpu_flags = check_cpu_features(); |
cpu_flags = detect_cpu_flags(); |
833 |
idct_int32_init(); |
idct_int32_init(); |
834 |
emms_mmx(); |
emms_mmx(); |
835 |
|
|