Parent Directory
|
Revision Log
Revision 1989 - (view) (download)
1 : | Isibaar | 1989 | /***************************************************************************** |
2 : | * | ||
3 : | * XVID MPEG-4 VIDEO CODEC | ||
4 : | * - Forward DCT - | ||
5 : | * | ||
6 : | * Copyright (C) 2006-2011 Xvid Solutions GmbH | ||
7 : | * | ||
8 : | * This program is free software ; you can redistribute it and/or modify | ||
9 : | * it under the terms of the GNU General Public License as published by | ||
10 : | * the Free Software Foundation ; either version 2 of the License, or | ||
11 : | * (at your option) any later version. | ||
12 : | * | ||
13 : | * This program is distributed in the hope that it will be useful, | ||
14 : | * but WITHOUT ANY WARRANTY ; without even the implied warranty of | ||
15 : | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 : | * GNU General Public License for more details. | ||
17 : | * | ||
18 : | * You should have received a copy of the GNU General Public License | ||
19 : | * along with this program ; if not, write to the Free Software | ||
20 : | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 : | * | ||
22 : | * $Id$ | ||
23 : | * | ||
24 : | ****************************************************************************/ | ||
25 : | |||
26 : | Isibaar | 1979 | /* |
27 : | * Authors: Skal | ||
28 : | * | ||
29 : | * "Fast and precise" LLM implementation of FDCT/IDCT, where | ||
30 : | * rotations are decomposed using: | ||
31 : | * tmp = (x+y).cos t | ||
32 : | * x' = tmp + y.(sin t - cos t) | ||
33 : | * y' = tmp - x.(sin t + cos t) | ||
34 : | * | ||
35 : | * See details at the end of this file... | ||
36 : | * | ||
37 : | * Reference (e.g.): | ||
38 : | * Loeffler C., Ligtenberg A., and Moschytz C.S.: | ||
39 : | * Practical Fast 1D DCT Algorithm with Eleven Multiplications, | ||
40 : | * Proc. ICASSP 1989, 988-991. | ||
41 : | * | ||
42 : | * IEEE-1180-like error specs for FDCT: | ||
43 : | * Peak error: 1.0000 | ||
44 : | * Peak MSE: 0.0340 | ||
45 : | * Overall MSE: 0.0200 | ||
46 : | * Peak ME: 0.0191 | ||
47 : | * Overall ME: -0.0033 | ||
48 : | * | ||
49 : | ********************************************************/ | ||
50 : | |||
51 : | #include "fdct.h" | ||
52 : | |||
53 : | /* function pointer */ | ||
54 : | fdctFuncPtr fdct; | ||
55 : | |||
56 : | /* | ||
57 : | ////////////////////////////////////////////////////////// | ||
58 : | */ | ||
59 : | |||
60 : | #define BUTF(a, b, tmp) \ | ||
61 : | (tmp) = (a)+(b); \ | ||
62 : | (b) = (a)-(b); \ | ||
63 : | (a) = (tmp) | ||
64 : | |||
65 : | #define LOAD_BUTF(m1, m2, a, b, tmp, S) \ | ||
66 : | (m1) = (S)[(a)] + (S)[(b)]; \ | ||
67 : | (m2) = (S)[(a)] - (S)[(b)] | ||
68 : | |||
69 : | #define ROTATE(m1,m2,c,k1,k2,tmp,Fix,Rnd) \ | ||
70 : | (tmp) = ( (m1) + (m2) )*(c); \ | ||
71 : | (m1) *= k1; \ | ||
72 : | (m2) *= k2; \ | ||
73 : | (tmp) += (Rnd); \ | ||
74 : | (m1) = ((m1)+(tmp))>>(Fix); \ | ||
75 : | (m2) = ((m2)+(tmp))>>(Fix); | ||
76 : | |||
77 : | #define ROTATE2(m1,m2,c,k1,k2,tmp) \ | ||
78 : | (tmp) = ( (m1) + (m2) )*(c); \ | ||
79 : | (m1) *= k1; \ | ||
80 : | (m2) *= k2; \ | ||
81 : | (m1) = (m1)+(tmp); \ | ||
82 : | (m2) = (m2)+(tmp); | ||
83 : | |||
84 : | #define ROTATE0(m1,m2,c,k1,k2,tmp) \ | ||
85 : | (m1) = ( (m2) )*(c); \ | ||
86 : | (m2) = (m2)*k2+(m1); | ||
87 : | |||
88 : | #define SHIFTL(x,n) ((x)<<(n)) | ||
89 : | #define SHIFTR(x, n) ((x)>>(n)) | ||
90 : | #define HALF(n) (1<<((n)-1)) | ||
91 : | |||
92 : | #define IPASS 3 | ||
93 : | #define FPASS 2 | ||
94 : | #define FIX 16 | ||
95 : | |||
96 : | #if 1 | ||
97 : | |||
98 : | #define ROT6_C 35468 | ||
99 : | #define ROT6_SmC 50159 | ||
100 : | #define ROT6_SpC 121095 | ||
101 : | #define ROT17_C 77062 | ||
102 : | #define ROT17_SmC 25571 | ||
103 : | #define ROT17_SpC 128553 | ||
104 : | #define ROT37_C 58981 | ||
105 : | #define ROT37_SmC 98391 | ||
106 : | #define ROT37_SpC 19571 | ||
107 : | #define ROT13_C 167963 | ||
108 : | #define ROT13_SmC 134553 | ||
109 : | #define ROT13_SpC 201373 | ||
110 : | |||
111 : | #else | ||
112 : | |||
113 : | #include <math.h> | ||
114 : | #define FX(x) ( (int)floor((x)*(1<<FIX) + .5 ) ) | ||
115 : | |||
116 : | static const double c1 = cos(1.*M_PI/16); | ||
117 : | static const double c2 = cos(2.*M_PI/16); | ||
118 : | static const double c3 = cos(3.*M_PI/16); | ||
119 : | static const double c4 = cos(4.*M_PI/16); | ||
120 : | static const double c5 = cos(5.*M_PI/16); | ||
121 : | static const double c6 = cos(6.*M_PI/16); | ||
122 : | static const double c7 = cos(7.*M_PI/16); | ||
123 : | |||
124 : | static const int ROT6_C = FX(c2-c6); // 0.541 | ||
125 : | static const int ROT6_SmC = FX(2*c6); // 0.765 | ||
126 : | static const int ROT6_SpC = FX(2*c2); // 1.847 | ||
127 : | |||
128 : | static const int ROT17_C = FX(c1+c7); // 1.175 | ||
129 : | static const int ROT17_SmC = FX(2*c7); // 0.390 | ||
130 : | static const int ROT17_SpC = FX(2*c1); // 1.961 | ||
131 : | |||
132 : | static const int ROT37_C = FX((c3-c7)/c4); // 0.899 | ||
133 : | static const int ROT37_SmC = FX(2*(c5+c7)); // 1.501 | ||
134 : | static const int ROT37_SpC = FX(2*(c1-c3)); // 0.298 | ||
135 : | |||
136 : | static const int ROT13_C = FX((c1+c3)/c4); // 2.562 | ||
137 : | static const int ROT13_SmC = FX(2*(c3+c7)); // 2.053 | ||
138 : | static const int ROT13_SpC = FX(2*(c1+c5)); // 3.072 | ||
139 : | |||
140 : | #endif | ||
141 : | |||
142 : | /* | ||
143 : | ////////////////////////////////////////////////////////// | ||
144 : | // Forward transform | ||
145 : | */ | ||
146 : | |||
147 : | void fdct_int32( short *const In ) | ||
148 : | { | ||
149 : | short *pIn; | ||
150 : | int i; | ||
151 : | |||
152 : | pIn = In; | ||
153 : | for(i=8; i>0; --i) | ||
154 : | { | ||
155 : | int mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, Spill; | ||
156 : | |||
157 : | // even | ||
158 : | |||
159 : | LOAD_BUTF(mm1,mm6, 1, 6, mm0, pIn); | ||
160 : | LOAD_BUTF(mm2,mm5, 2, 5, mm0, pIn); | ||
161 : | LOAD_BUTF(mm3,mm4, 3, 4, mm0, pIn); | ||
162 : | LOAD_BUTF(mm0,mm7, 0, 7, Spill, pIn); | ||
163 : | |||
164 : | BUTF(mm1, mm2, Spill); | ||
165 : | BUTF(mm0, mm3, Spill); | ||
166 : | |||
167 : | ROTATE(mm3, mm2, ROT6_C, ROT6_SmC, -ROT6_SpC, Spill, FIX-FPASS, HALF(FIX-FPASS)); | ||
168 : | pIn[2] = mm3; | ||
169 : | pIn[6] = mm2; | ||
170 : | |||
171 : | BUTF(mm0, mm1, Spill); | ||
172 : | pIn[0] = SHIFTL(mm0, FPASS); | ||
173 : | pIn[4] = SHIFTL(mm1, FPASS); | ||
174 : | |||
175 : | |||
176 : | // odd | ||
177 : | |||
178 : | mm3 = mm5 + mm7; | ||
179 : | mm2 = mm4 + mm6; | ||
180 : | ROTATE(mm2, mm3, ROT17_C, -ROT17_SpC, -ROT17_SmC, mm0, FIX-FPASS, HALF(FIX-FPASS)); | ||
181 : | ROTATE(mm4, mm7, -ROT37_C, ROT37_SpC, ROT37_SmC, mm0, FIX-FPASS, HALF(FIX-FPASS)); | ||
182 : | mm7 += mm3; | ||
183 : | mm4 += mm2; | ||
184 : | pIn[1] = mm7; | ||
185 : | pIn[7] = mm4; | ||
186 : | |||
187 : | ROTATE(mm5, mm6, -ROT13_C, ROT13_SmC, ROT13_SpC, mm0, FIX-FPASS, HALF(FIX-FPASS)); | ||
188 : | mm5 += mm3; | ||
189 : | mm6 += mm2; | ||
190 : | pIn[3] = mm6; | ||
191 : | pIn[5] = mm5; | ||
192 : | |||
193 : | pIn += 8; | ||
194 : | } | ||
195 : | |||
196 : | pIn = In; | ||
197 : | for(i=8; i>0; --i) | ||
198 : | { | ||
199 : | int mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, Spill; | ||
200 : | |||
201 : | // even | ||
202 : | |||
203 : | LOAD_BUTF(mm1,mm6, 1*8, 6*8, mm0, pIn); | ||
204 : | LOAD_BUTF(mm2,mm5, 2*8, 5*8, mm0, pIn); | ||
205 : | BUTF(mm1, mm2, mm0); | ||
206 : | |||
207 : | LOAD_BUTF(mm3,mm4, 3*8, 4*8, mm0, pIn); | ||
208 : | LOAD_BUTF(mm0,mm7, 0*8, 7*8, Spill, pIn); | ||
209 : | BUTF(mm0, mm3, Spill); | ||
210 : | |||
211 : | ROTATE(mm3, mm2, ROT6_C, ROT6_SmC, -ROT6_SpC, Spill, 0, HALF(FIX+FPASS+3)); | ||
212 : | pIn[2*8] = (int16_t)SHIFTR(mm3,FIX+FPASS+3); | ||
213 : | pIn[6*8] = (int16_t)SHIFTR(mm2,FIX+FPASS+3); | ||
214 : | |||
215 : | mm0 += HALF(FPASS+3) - 1; | ||
216 : | BUTF(mm0, mm1, Spill); | ||
217 : | pIn[0*8] = (int16_t)SHIFTR(mm0, FPASS+3); | ||
218 : | pIn[4*8] = (int16_t)SHIFTR(mm1, FPASS+3); | ||
219 : | |||
220 : | // odd | ||
221 : | |||
222 : | mm3 = mm5 + mm7; | ||
223 : | mm2 = mm4 + mm6; | ||
224 : | |||
225 : | ROTATE(mm2, mm3, ROT17_C, -ROT17_SpC, -ROT17_SmC, mm0, 0, HALF(FIX+FPASS+3)); | ||
226 : | ROTATE2(mm4, mm7, -ROT37_C, ROT37_SpC, ROT37_SmC, mm0); | ||
227 : | mm7 += mm3; | ||
228 : | mm4 += mm2; | ||
229 : | pIn[7*8] = (int16_t)SHIFTR(mm4,FIX+FPASS+3); | ||
230 : | pIn[1*8] = (int16_t)SHIFTR(mm7,FIX+FPASS+3); | ||
231 : | |||
232 : | ROTATE2(mm5, mm6, -ROT13_C, ROT13_SmC, ROT13_SpC, mm0); | ||
233 : | mm5 += mm3; | ||
234 : | mm6 += mm2; | ||
235 : | pIn[5*8] = (int16_t)SHIFTR(mm5,FIX+FPASS+3); | ||
236 : | pIn[3*8] = (int16_t)SHIFTR(mm6,FIX+FPASS+3); | ||
237 : | |||
238 : | pIn++; | ||
239 : | } | ||
240 : | } | ||
241 : | #undef FIX | ||
242 : | #undef FPASS | ||
243 : | #undef IPASS | ||
244 : | |||
245 : | #undef BUTF | ||
246 : | #undef LOAD_BUTF | ||
247 : | #undef ROTATE | ||
248 : | #undef ROTATE2 | ||
249 : | #undef SHIFTL | ||
250 : | #undef SHIFTR | ||
251 : | |||
252 : | ////////////////////////////////////////////////////////// | ||
253 : | // - Data flow schematics for FDCT - | ||
254 : | // Output is scaled by 2.sqrt(2) | ||
255 : | // Initial butterflies (in0/in7, etc.) are not fully depicted. | ||
256 : | // Note: Rot6 coeffs are multiplied by sqrt(2). | ||
257 : | ////////////////////////////////////////////////////////// | ||
258 : | /* | ||
259 : | <---------Stage1 =even part=-----------> | ||
260 : | |||
261 : | in3 mm3 +_____.___-___________.____* out6 | ||
262 : | x \ / | | ||
263 : | in4 mm4 \ | | ||
264 : | / \ | | ||
265 : | in0 mm0 +_____o___+__.___-___ | ___* out4 | ||
266 : | x \ / | | ||
267 : | in7 mm7 \ (Rot6) | ||
268 : | / \ | | ||
269 : | in1 mm1 +_____o___+__o___+___ | ___* out0 | ||
270 : | x \ / | | ||
271 : | in6 mm6 / | | ||
272 : | / \ | | ||
273 : | in2 mm2 +_____.___-___________o____* out2 | ||
274 : | x | ||
275 : | in5 mm5 | ||
276 : | |||
277 : | <---------Stage2 =odd part=----------------> | ||
278 : | |||
279 : | mm7*___._________.___-___[xSqrt2]___* out3 | ||
280 : | | \ / | ||
281 : | (Rot3) \ | ||
282 : | | / \ | ||
283 : | mm5*__ | ___o____o___+___.___-______* out7 | ||
284 : | | | \ / | ||
285 : | | (Rot1) \ | ||
286 : | | | / \ | ||
287 : | mm6*__ |____.____o___+___o___+______* out1 | ||
288 : | | \ / | ||
289 : | | / | ||
290 : | | / \ | ||
291 : | mm4*___o_________.___-___[xSqrt2]___* out5 | ||
292 : | |||
293 : | |||
294 : | |||
295 : | Alternative schematics for stage 2: | ||
296 : | ----------------------------------- | ||
297 : | |||
298 : | mm7 *___[xSqrt2]____o___+____o_______* out1 | ||
299 : | \ / | | ||
300 : | / (Rot1) | ||
301 : | / \ | | ||
302 : | mm6 *____o___+______.___-___ | __.___* out5 | ||
303 : | \ / | | | ||
304 : | / | | | ||
305 : | / \ | | | ||
306 : | mm5 *____.___-______.___-____.__ | __* out7 | ||
307 : | \ / | | ||
308 : | \ (Rot3) | ||
309 : | / \ | | ||
310 : | mm4 *___[xSqrt2]____o___+________o___* out3 | ||
311 : | |||
312 : | */ |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |