1 |
/***************************************************************************** |
/* idct.c, inverse fast discrete cosine transform */ |
2 |
* |
|
3 |
* XVID MPEG-4 VIDEO CODEC |
/* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */ |
4 |
* - inverse fast disrete cosine transformation - integer C version |
|
5 |
* |
/* |
6 |
* These routines are from Independent JPEG Group's free JPEG software |
* Disclaimer of Warranty |
|
* Copyright (C) 1991-1998, Thomas G. Lane (see the file README.IJG) |
|
|
* |
|
|
* This file is part of XviD, a free MPEG-4 video encoder/decoder |
|
|
* |
|
|
* XviD is free software; you can redistribute it and/or modify it |
|
|
* under the terms of the GNU General Public License as published by |
|
|
* the Free Software Foundation; either version 2 of the License, or |
|
|
* (at your option) any later version. |
|
|
* |
|
|
* This program is distributed in the hope that it will be useful, |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
|
* GNU General Public License for more details. |
|
|
* |
|
|
* You should have received a copy of the GNU General Public License |
|
|
* along with this program; if not, write to the Free Software |
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|
|
* |
|
|
* Under section 8 of the GNU General Public License, the copyright |
|
|
* holders of XVID explicitly forbid distribution in the following |
|
|
* countries: |
|
|
* |
|
|
* - Japan |
|
|
* - United States of America |
|
|
* |
|
|
* Linking XviD statically or dynamically with other modules is making a |
|
|
* combined work based on XviD. Thus, the terms and conditions of the |
|
|
* GNU General Public License cover the whole combination. |
|
|
* |
|
|
* As a special exception, the copyright holders of XviD give you |
|
|
* permission to link XviD with independent modules that communicate with |
|
|
* XviD solely through the VFW1.1 and DShow interfaces, regardless of the |
|
|
* license terms of these independent modules, and to copy and distribute |
|
|
* the resulting combined work under terms of your choice, provided that |
|
|
* every copy of the combined work is accompanied by a complete copy of |
|
|
* the source code of XviD (the version of XviD used to produce the |
|
|
* combined work), being distributed under the terms of the GNU General |
|
|
* Public License plus this exception. An independent module is a module |
|
|
* which is not derived from or based on XviD. |
|
|
* |
|
|
* Note that people who make modified versions of XviD are not obligated |
|
|
* to grant this special exception for their modified versions; it is |
|
|
* their choice whether to do so. The GNU General Public License gives |
|
|
* permission to release a modified version without this exception; this |
|
|
* exception also makes it possible to release a modified version which |
|
|
* carries forward this exception. |
|
7 |
* |
* |
8 |
* $Id: idct.c,v 1.5 2002-11-26 23:44:10 edgomez Exp $ |
* These software programs are available to the user without any license fee or |
9 |
|
* royalty on an "as is" basis. The MPEG Software Simulation Group disclaims |
10 |
|
* any and all warranties, whether express, implied, or statuary, including any |
11 |
|
* implied warranties or merchantability or of fitness for a particular |
12 |
|
* purpose. In no event shall the copyright-holder be liable for any |
13 |
|
* incidental, punitive, or consequential damages of any kind whatsoever |
14 |
|
* arising from the use of these programs. |
15 |
|
* |
16 |
|
* This disclaimer of warranty extends to the user of these programs and user's |
17 |
|
* customers, employees, agents, transferees, successors, and assigns. |
18 |
|
* |
19 |
|
* The MPEG Software Simulation Group does not represent or warrant that the |
20 |
|
* programs furnished hereunder are free of infringement of any third-party |
21 |
|
* patents. |
22 |
|
* |
23 |
|
* Commercial implementations of MPEG-1 and MPEG-2 video, including shareware, |
24 |
|
* are subject to royalty fees to patent holders. Many of these patents are |
25 |
|
* general enough such that they are unavoidable regardless of implementation |
26 |
|
* design. |
27 |
|
* |
28 |
|
* MPEG2AVI |
29 |
|
* -------- |
30 |
|
* v0.16B33 renamed the initialization function to init_idct_int32() |
31 |
|
* v0.16B32 removed the unused idct_row() and idct_col() functions |
32 |
|
* v0.16B3 changed var declarations to static, to enforce data align |
33 |
|
* v0.16B22 idct_FAST() renamed to idct_int32() |
34 |
|
* also merged idct_FAST() into a single function, to help VC++ |
35 |
|
* optimize it. |
36 |
* |
* |
37 |
*************************************************************************/ |
* v0.14 changed int to long, to avoid confusion when compiling on x86 |
38 |
|
* platform ( in VC++ "int" -> 32bits ) |
39 |
|
*/ |
40 |
|
|
41 |
/**********************************************************/ |
/**********************************************************/ |
42 |
/* inverse two dimensional DCT, Chen-Wang algorithm */ |
/* inverse two dimensional DCT, Chen-Wang algorithm */ |
52 |
/* this code assumes >> to be a two's-complement arithmetic */ |
/* this code assumes >> to be a two's-complement arithmetic */ |
53 |
/* right shift: (-2)>>1 == -1 , (-3)>>1 == -2 */ |
/* right shift: (-2)>>1 == -1 , (-3)>>1 == -2 */ |
54 |
|
|
55 |
|
//#include <windows.h> |
56 |
#include "idct.h" |
#include "idct.h" |
57 |
|
|
58 |
#define W1 2841 /* 2048*sqrt(2)*cos(1*pi/16) */ |
#define W1 2841 /* 2048*sqrt(2)*cos(1*pi/16) */ |
64 |
|
|
65 |
|
|
66 |
/* global declarations */ |
/* global declarations */ |
67 |
/*void init_idct_int32 (void); */ |
//void init_idct_int32 (void); |
68 |
/*void idct_int32 (short *block); */ |
//void idct_int32 (short *block); |
69 |
|
|
70 |
/* private data */ |
/* private data */ |
71 |
static short iclip[1024]; /* clipping table */ |
static short iclip[1024]; /* clipping table */ |
72 |
static short *iclp; |
static short *iclp; |
73 |
|
|
74 |
/* private prototypes */ |
/* private prototypes */ |
75 |
/*static void idctrow _ANSI_ARGS_((short *blk)); */ |
//static void idctrow _ANSI_ARGS_((short *blk)); |
76 |
/*static void idctcol _ANSI_ARGS_((short *blk)); */ |
//static void idctcol _ANSI_ARGS_((short *blk)); |
77 |
|
|
78 |
/* row (horizontal) IDCT |
/* row (horizontal) IDCT |
79 |
* |
* |
85 |
* c[1..7] = 128*sqrt(2) |
* c[1..7] = 128*sqrt(2) |
86 |
*/ |
*/ |
87 |
|
|
88 |
#if 0 |
/* |
89 |
static void idctrow(blk) |
static void idctrow(blk) |
90 |
short *blk; |
short *blk; |
91 |
{ |
{ |
92 |
int X0, X1, X2, X3, X4, X5, X6, X7, X8; |
int X0, X1, X2, X3, X4, X5, X6, X7, X8; |
93 |
|
|
94 |
/* shortcut */ |
// shortcut |
95 |
if (!((X1 = blk[4]<<11) | (X2 = blk[6]) | (X3 = blk[2]) | |
if (!((X1 = blk[4]<<11) | (X2 = blk[6]) | (X3 = blk[2]) | |
96 |
(X4 = blk[1]) | (X5 = blk[7]) | (X6 = blk[5]) | (X7 = blk[3]))) |
(X4 = blk[1]) | (X5 = blk[7]) | (X6 = blk[5]) | (X7 = blk[3]))) |
97 |
{ |
{ |
99 |
return; |
return; |
100 |
} |
} |
101 |
|
|
102 |
X0 = (blk[0]<<11) + 128; /* for proper rounding in the fourth stage */ |
X0 = (blk[0]<<11) + 128; // for proper rounding in the fourth stage |
103 |
|
|
104 |
/* first stage */ |
// first stage |
105 |
X8 = W7*(X4+X5); |
X8 = W7*(X4+X5); |
106 |
X4 = X8 + (W1-W7)*X4; |
X4 = X8 + (W1-W7)*X4; |
107 |
X5 = X8 - (W1+W7)*X5; |
X5 = X8 - (W1+W7)*X5; |
109 |
X6 = X8 - (W3-W5)*X6; |
X6 = X8 - (W3-W5)*X6; |
110 |
X7 = X8 - (W3+W5)*X7; |
X7 = X8 - (W3+W5)*X7; |
111 |
|
|
112 |
/* second stage */ |
// second stage |
113 |
X8 = X0 + X1; |
X8 = X0 + X1; |
114 |
X0 -= X1; |
X0 -= X1; |
115 |
X1 = W6*(X3+X2); |
X1 = W6*(X3+X2); |
120 |
X6 = X5 + X7; |
X6 = X5 + X7; |
121 |
X5 -= X7; |
X5 -= X7; |
122 |
|
|
123 |
/* third stage */ |
// third stage |
124 |
X7 = X8 + X3; |
X7 = X8 + X3; |
125 |
X8 -= X3; |
X8 -= X3; |
126 |
X3 = X0 + X2; |
X3 = X0 + X2; |
128 |
X2 = (181*(X4+X5)+128)>>8; |
X2 = (181*(X4+X5)+128)>>8; |
129 |
X4 = (181*(X4-X5)+128)>>8; |
X4 = (181*(X4-X5)+128)>>8; |
130 |
|
|
131 |
/* fourth stage */ |
// fourth stage |
132 |
blk[0] = (X7+X1)>>8; |
blk[0] = (X7+X1)>>8; |
133 |
blk[1] = (X3+X2)>>8; |
blk[1] = (X3+X2)>>8; |
134 |
blk[2] = (X0+X4)>>8; |
blk[2] = (X0+X4)>>8; |
137 |
blk[5] = (X0-X4)>>8; |
blk[5] = (X0-X4)>>8; |
138 |
blk[6] = (X3-X2)>>8; |
blk[6] = (X3-X2)>>8; |
139 |
blk[7] = (X7-X1)>>8; |
blk[7] = (X7-X1)>>8; |
140 |
} |
}*/ |
|
#endif |
|
141 |
|
|
142 |
/* column (vertical) IDCT |
/* column (vertical) IDCT |
143 |
* |
* |
148 |
* where: c[0] = 1/1024 |
* where: c[0] = 1/1024 |
149 |
* c[1..7] = (1/1024)*sqrt(2) |
* c[1..7] = (1/1024)*sqrt(2) |
150 |
*/ |
*/ |
151 |
#if 0 |
/* |
152 |
static void idctcol(blk) |
static void idctcol(blk) |
153 |
short *blk; |
short *blk; |
154 |
{ |
{ |
155 |
int X0, X1, X2, X3, X4, X5, X6, X7, X8; |
int X0, X1, X2, X3, X4, X5, X6, X7, X8; |
156 |
|
|
157 |
/* shortcut */ |
// shortcut |
158 |
if (!((X1 = (blk[8*4]<<8)) | (X2 = blk[8*6]) | (X3 = blk[8*2]) | |
if (!((X1 = (blk[8*4]<<8)) | (X2 = blk[8*6]) | (X3 = blk[8*2]) | |
159 |
(X4 = blk[8*1]) | (X5 = blk[8*7]) | (X6 = blk[8*5]) | (X7 = blk[8*3]))) |
(X4 = blk[8*1]) | (X5 = blk[8*7]) | (X6 = blk[8*5]) | (X7 = blk[8*3]))) |
160 |
{ |
{ |
165 |
|
|
166 |
X0 = (blk[8*0]<<8) + 8192; |
X0 = (blk[8*0]<<8) + 8192; |
167 |
|
|
168 |
/* first stage */ |
// first stage |
169 |
X8 = W7*(X4+X5) + 4; |
X8 = W7*(X4+X5) + 4; |
170 |
X4 = (X8+(W1-W7)*X4)>>3; |
X4 = (X8+(W1-W7)*X4)>>3; |
171 |
X5 = (X8-(W1+W7)*X5)>>3; |
X5 = (X8-(W1+W7)*X5)>>3; |
173 |
X6 = (X8-(W3-W5)*X6)>>3; |
X6 = (X8-(W3-W5)*X6)>>3; |
174 |
X7 = (X8-(W3+W5)*X7)>>3; |
X7 = (X8-(W3+W5)*X7)>>3; |
175 |
|
|
176 |
/* second stage */ |
// second stage |
177 |
X8 = X0 + X1; |
X8 = X0 + X1; |
178 |
X0 -= X1; |
X0 -= X1; |
179 |
X1 = W6*(X3+X2) + 4; |
X1 = W6*(X3+X2) + 4; |
184 |
X6 = X5 + X7; |
X6 = X5 + X7; |
185 |
X5 -= X7; |
X5 -= X7; |
186 |
|
|
187 |
/* third stage */ |
// third stage |
188 |
X7 = X8 + X3; |
X7 = X8 + X3; |
189 |
X8 -= X3; |
X8 -= X3; |
190 |
X3 = X0 + X2; |
X3 = X0 + X2; |
192 |
X2 = (181*(X4+X5)+128)>>8; |
X2 = (181*(X4+X5)+128)>>8; |
193 |
X4 = (181*(X4-X5)+128)>>8; |
X4 = (181*(X4-X5)+128)>>8; |
194 |
|
|
195 |
/* fourth stage */ |
// fourth stage |
196 |
blk[8*0] = iclp[(X7+X1)>>14]; |
blk[8*0] = iclp[(X7+X1)>>14]; |
197 |
blk[8*1] = iclp[(X3+X2)>>14]; |
blk[8*1] = iclp[(X3+X2)>>14]; |
198 |
blk[8*2] = iclp[(X0+X4)>>14]; |
blk[8*2] = iclp[(X0+X4)>>14]; |
201 |
blk[8*5] = iclp[(X0-X4)>>14]; |
blk[8*5] = iclp[(X0-X4)>>14]; |
202 |
blk[8*6] = iclp[(X3-X2)>>14]; |
blk[8*6] = iclp[(X3-X2)>>14]; |
203 |
blk[8*7] = iclp[(X7-X1)>>14]; |
blk[8*7] = iclp[(X7-X1)>>14]; |
204 |
} |
}*/ |
|
#endif |
|
205 |
|
|
206 |
/* function pointer */ |
// function pointer |
207 |
idctFuncPtr idct; |
idctFuncPtr idct; |
208 |
|
|
209 |
/* two dimensional inverse discrete cosine transform */ |
/* two dimensional inverse discrete cosine transform */ |
210 |
/*void j_rev_dct(block) */ |
//void j_rev_dct(block) |
211 |
/*short *block; */ |
//short *block; |
212 |
void |
void |
213 |
idct_int32(short *const block) |
idct_int32(short *const block) |
214 |
{ |
{ |
215 |
|
|
216 |
/* idct_int32_init() must be called before the first call to this function! */ |
// idct_int32_init() must be called before the first call to this function! |
217 |
|
|
218 |
|
|
219 |
/*int i; |
/*int i; |
229 |
static long X0, X1, X2, X3, X4, X5, X6, X7, X8; |
static long X0, X1, X2, X3, X4, X5, X6, X7, X8; |
230 |
|
|
231 |
|
|
232 |
for (i = 0; i < 8; i++) /* idct rows */ |
for (i = 0; i < 8; i++) // idct rows |
233 |
{ |
{ |
234 |
blk = block + (i << 3); |
blk = block + (i << 3); |
235 |
if (! |
if (! |
241 |
continue; |
continue; |
242 |
} |
} |
243 |
|
|
244 |
X0 = (blk[0] << 11) + 128; /* for proper rounding in the fourth stage */ |
X0 = (blk[0] << 11) + 128; // for proper rounding in the fourth stage |
245 |
|
|
246 |
/* first stage */ |
// first stage |
247 |
X8 = W7 * (X4 + X5); |
X8 = W7 * (X4 + X5); |
248 |
X4 = X8 + (W1 - W7) * X4; |
X4 = X8 + (W1 - W7) * X4; |
249 |
X5 = X8 - (W1 + W7) * X5; |
X5 = X8 - (W1 + W7) * X5; |
251 |
X6 = X8 - (W3 - W5) * X6; |
X6 = X8 - (W3 - W5) * X6; |
252 |
X7 = X8 - (W3 + W5) * X7; |
X7 = X8 - (W3 + W5) * X7; |
253 |
|
|
254 |
/* second stage */ |
// second stage |
255 |
X8 = X0 + X1; |
X8 = X0 + X1; |
256 |
X0 -= X1; |
X0 -= X1; |
257 |
X1 = W6 * (X3 + X2); |
X1 = W6 * (X3 + X2); |
262 |
X6 = X5 + X7; |
X6 = X5 + X7; |
263 |
X5 -= X7; |
X5 -= X7; |
264 |
|
|
265 |
/* third stage */ |
// third stage |
266 |
X7 = X8 + X3; |
X7 = X8 + X3; |
267 |
X8 -= X3; |
X8 -= X3; |
268 |
X3 = X0 + X2; |
X3 = X0 + X2; |
270 |
X2 = (181 * (X4 + X5) + 128) >> 8; |
X2 = (181 * (X4 + X5) + 128) >> 8; |
271 |
X4 = (181 * (X4 - X5) + 128) >> 8; |
X4 = (181 * (X4 - X5) + 128) >> 8; |
272 |
|
|
273 |
/* fourth stage */ |
// fourth stage |
274 |
|
|
275 |
blk[0] = (short) ((X7 + X1) >> 8); |
blk[0] = (short) ((X7 + X1) >> 8); |
276 |
blk[1] = (short) ((X3 + X2) >> 8); |
blk[1] = (short) ((X3 + X2) >> 8); |
281 |
blk[6] = (short) ((X3 - X2) >> 8); |
blk[6] = (short) ((X3 - X2) >> 8); |
282 |
blk[7] = (short) ((X7 - X1) >> 8); |
blk[7] = (short) ((X7 - X1) >> 8); |
283 |
|
|
284 |
} /* end for ( i = 0; i < 8; ++i ) IDCT-rows */ |
} // end for ( i = 0; i < 8; ++i ) IDCT-rows |
285 |
|
|
286 |
|
|
287 |
|
|
288 |
for (i = 0; i < 8; i++) /* idct columns */ |
for (i = 0; i < 8; i++) // idct columns |
289 |
{ |
{ |
290 |
blk = block + i; |
blk = block + i; |
291 |
/* shortcut */ |
// shortcut |
292 |
if (! |
if (! |
293 |
((X1 = (blk[8 * 4] << 8)) | (X2 = blk[8 * 6]) | (X3 = |
((X1 = (blk[8 * 4] << 8)) | (X2 = blk[8 * 6]) | (X3 = |
294 |
blk[8 * |
blk[8 * |
304 |
|
|
305 |
X0 = (blk[8 * 0] << 8) + 8192; |
X0 = (blk[8 * 0] << 8) + 8192; |
306 |
|
|
307 |
/* first stage */ |
// first stage |
308 |
X8 = W7 * (X4 + X5) + 4; |
X8 = W7 * (X4 + X5) + 4; |
309 |
X4 = (X8 + (W1 - W7) * X4) >> 3; |
X4 = (X8 + (W1 - W7) * X4) >> 3; |
310 |
X5 = (X8 - (W1 + W7) * X5) >> 3; |
X5 = (X8 - (W1 + W7) * X5) >> 3; |
312 |
X6 = (X8 - (W3 - W5) * X6) >> 3; |
X6 = (X8 - (W3 - W5) * X6) >> 3; |
313 |
X7 = (X8 - (W3 + W5) * X7) >> 3; |
X7 = (X8 - (W3 + W5) * X7) >> 3; |
314 |
|
|
315 |
/* second stage */ |
// second stage |
316 |
X8 = X0 + X1; |
X8 = X0 + X1; |
317 |
X0 -= X1; |
X0 -= X1; |
318 |
X1 = W6 * (X3 + X2) + 4; |
X1 = W6 * (X3 + X2) + 4; |
323 |
X6 = X5 + X7; |
X6 = X5 + X7; |
324 |
X5 -= X7; |
X5 -= X7; |
325 |
|
|
326 |
/* third stage */ |
// third stage |
327 |
X7 = X8 + X3; |
X7 = X8 + X3; |
328 |
X8 -= X3; |
X8 -= X3; |
329 |
X3 = X0 + X2; |
X3 = X0 + X2; |
331 |
X2 = (181 * (X4 + X5) + 128) >> 8; |
X2 = (181 * (X4 + X5) + 128) >> 8; |
332 |
X4 = (181 * (X4 - X5) + 128) >> 8; |
X4 = (181 * (X4 - X5) + 128) >> 8; |
333 |
|
|
334 |
/* fourth stage */ |
// fourth stage |
335 |
blk[8 * 0] = iclp[(X7 + X1) >> 14]; |
blk[8 * 0] = iclp[(X7 + X1) >> 14]; |
336 |
blk[8 * 1] = iclp[(X3 + X2) >> 14]; |
blk[8 * 1] = iclp[(X3 + X2) >> 14]; |
337 |
blk[8 * 2] = iclp[(X0 + X4) >> 14]; |
blk[8 * 2] = iclp[(X0 + X4) >> 14]; |
342 |
blk[8 * 7] = iclp[(X7 - X1) >> 14]; |
blk[8 * 7] = iclp[(X7 - X1) >> 14]; |
343 |
} |
} |
344 |
|
|
345 |
} /* end function idct_int32(block) */ |
} // end function idct_int32(block) |
346 |
|
|
347 |
|
|
348 |
/*void */ |
//void |
349 |
/*idct_int32_init() */ |
//idct_int32_init() |
350 |
void |
void |
351 |
idct_int32_init(void) |
idct_int32_init() |
352 |
{ |
{ |
353 |
int i; |
int i; |
354 |
|
|