[svn] / branches / dev-api-4 / xvidcore / src / bitstream / x86_asm / cbp_sse2.asm Repository:
ViewVC logotype

Annotation of /branches/dev-api-4/xvidcore/src/bitstream/x86_asm/cbp_sse2.asm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 655 - (view) (download)
Original Path: trunk/xvidcore/src/bitstream/x86_asm/cbp_sse2.asm

1 : Isibaar 262 ;/**************************************************************************
2 :     ; *
3 : edgomez 655 ; * XVID MPEG-4 VIDEO CODEC
4 :     ; * sse2 cbp calc
5 : Isibaar 262 ; *
6 : edgomez 655 ; * This file is part of XviD, a free MPEG-4 video encoder/decoder
7 : Isibaar 262 ; *
8 : edgomez 655 ; * XviD is free software; you can redistribute it and/or modify it
9 :     ; * under the terms of the GNU General Public License as published by
10 :     ; * the Free Software Foundation; either version 2 of the License, or
11 :     ; * (at your option) any later version.
12 : Isibaar 262 ; *
13 : edgomez 655 ; * This program is distributed in the hope that it will be useful,
14 :     ; * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 :     ; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 :     ; * GNU General Public License for more details.
17 : Isibaar 262 ; *
18 : edgomez 655 ; * You should have received a copy of the GNU General Public License
19 :     ; * along with this program; if not, write to the Free Software
20 :     ; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 : Isibaar 262 ; *
22 : edgomez 655 ; * Under section 8 of the GNU General Public License, the copyright
23 :     ; * holders of XVID explicitly forbid distribution in the following
24 :     ; * countries:
25 : Isibaar 262 ; *
26 : edgomez 655 ; * - Japan
27 :     ; * - United States of America
28 : Isibaar 262 ; *
29 : edgomez 655 ; * Linking XviD statically or dynamically with other modules is making a
30 :     ; * combined work based on XviD. Thus, the terms and conditions of the
31 :     ; * GNU General Public License cover the whole combination.
32 : Isibaar 262 ; *
33 : edgomez 655 ; * As a special exception, the copyright holders of XviD give you
34 :     ; * permission to link XviD with independent modules that communicate with
35 :     ; * XviD solely through the VFW1.1 and DShow interfaces, regardless of the
36 :     ; * license terms of these independent modules, and to copy and distribute
37 :     ; * the resulting combined work under terms of your choice, provided that
38 :     ; * every copy of the combined work is accompanied by a complete copy of
39 :     ; * the source code of XviD (the version of XviD used to produce the
40 :     ; * combined work), being distributed under the terms of the GNU General
41 :     ; * Public License plus this exception. An independent module is a module
42 :     ; * which is not derived from or based on XviD.
43 :     ; *
44 :     ; * Note that people who make modified versions of XviD are not obligated
45 :     ; * to grant this special exception for their modified versions; it is
46 :     ; * their choice whether to do so. The GNU General Public License gives
47 :     ; * permission to release a modified version without this exception; this
48 :     ; * exception also makes it possible to release a modified version which
49 :     ; * carries forward this exception.
50 :     ; *
51 :     ; * $Id: cbp_sse2.asm,v 1.2 2002-11-17 00:57:58 edgomez Exp $
52 :     ; *
53 : Isibaar 262 ; *************************************************************************/
54 :    
55 :     bits 32
56 :    
57 :     section .data
58 :    
59 :     %macro cglobal 1
60 :     %ifdef PREFIX
61 :     global _%1
62 :     %define %1 _%1
63 :     %else
64 :     global %1
65 :     %endif
66 :     %endmacro
67 :    
68 :     align 16
69 :    
70 :     ignore_dc dw 0, -1, -1, -1, -1, -1, -1, -1
71 :    
72 :     section .text
73 :    
74 :     cglobal calc_cbp_sse2
75 :    
76 :     ;===========================================================================
77 :     ;
78 :     ; uint32_t calc_cbp_sse2(const int16_t coeff[6][64]);
79 :     ;
80 :     ; not enabled - slower than mmx?
81 :     ;
82 :     ;===========================================================================
83 :    
84 :     %macro LOOP_SSE2 1
85 :     movdqa xmm0, [edx+(%1)*128]
86 :     pand xmm0, xmm7
87 :     movdqa xmm1, [edx+(%1)*128+16]
88 :    
89 :     por xmm0, [edx+(%1)*128+32]
90 :     por xmm1, [edx+(%1)*128+48]
91 :     por xmm0, [edx+(%1)*128+64]
92 :     por xmm1, [edx+(%1)*128+80]
93 :     por xmm0, [edx+(%1)*128+96]
94 :     por xmm1, [edx+(%1)*128+112]
95 :    
96 :     por xmm0, xmm1 ; xmm0 = xmm1 = 128 bits worth of info
97 :     psadbw xmm0, xmm6 ; contains 2 dwords with sums
98 :     movhlps xmm1, xmm0 ; move high dword from xmm0 to low xmm1
99 :     por xmm0, xmm1 ; combine
100 :     movd ecx, xmm0 ; if ecx set, values were found
101 :     test ecx, ecx
102 :     %endmacro
103 :    
104 :     align 16
105 :    
106 :     calc_cbp_sse2:
107 :     mov edx, [esp+4] ; coeff[]
108 :     xor eax, eax ; cbp = 0
109 :    
110 :     movdqu xmm7, [ignore_dc] ; mask to ignore dc value
111 :     pxor xmm6, xmm6 ; zero
112 :    
113 :     LOOP_SSE2 0
114 :     test ecx, ecx
115 :     jz .blk2
116 :     or eax, (1<<5)
117 :     .blk2
118 :     LOOP_SSE2 1
119 :     test ecx, ecx
120 :     jz .blk3
121 :     or eax, (1<<4)
122 :     .blk3
123 :     LOOP_SSE2 2
124 :     test ecx, ecx
125 :     jz .blk4
126 :     or eax, (1<<3)
127 :     .blk4
128 :     LOOP_SSE2 3
129 :     test ecx, ecx
130 :     jz .blk5
131 :     or eax, (1<<2)
132 :     .blk5
133 :     LOOP_SSE2 4
134 :     test ecx, ecx
135 :     jz .blk6
136 :     or eax, (1<<1)
137 :     .blk6
138 :     LOOP_SSE2 5
139 :     test ecx, ecx
140 :     jz .finished
141 :     or eax, (1<<0)
142 :     .finished
143 :    
144 :     ret

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4