Annotation of /trunk/xvidcore/src/image/qpel.c

Revision 1530 - (view) (download)

1 :	edgomez	1382	/*****************************************************************************
2 :			*
3 :			* XVID MPEG-4 VIDEO CODEC
4 :			* - QPel interpolation -
5 :			*
6 :			* Copyright(C) 2003 Pascal Massimino <skal@planet-d.net>
7 :			*
8 :			* This program is free software ; you can redistribute it and/or modify
9 :			* it under the terms of the GNU General Public License as published by
10 :			* the Free Software Foundation ; either version 2 of the License, or
11 :			* (at your option) any later version.
12 :			*
13 :			* This program is distributed in the hope that it will be useful,
14 :			* but WITHOUT ANY WARRANTY ; without even the implied warranty of
15 :			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 :			* GNU General Public License for more details.
17 :			*
18 :			* You should have received a copy of the GNU General Public License
19 :			* along with this program ; if not, write to the Free Software
20 :			* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 :			*
22 :	edgomez	1530	* $Id: qpel.c,v 1.3 2004-08-10 21:58:55 edgomez Exp $
23 :	edgomez	1382	*
24 :			****************************************************************************/
25 :
26 :			#ifndef XVID_AUTO_INCLUDE
27 :
28 :			#include "../portab.h"
29 :			#include "qpel.h"
30 :
31 :			/* Quarterpel FIR definition
32 :			****************************************************************************/
33 :
34 :			static const int32_t FIR_Tab_8[9][8] = {
35 :			{ 14, -3, 2, -1, 0, 0, 0, 0 },
36 :			{ 23, 19, -6, 3, -1, 0, 0, 0 },
37 :			{ -7, 20, 20, -6, 3, -1, 0, 0 },
38 :			{ 3, -6, 20, 20, -6, 3, -1, 0 },
39 :			{ -1, 3, -6, 20, 20, -6, 3, -1 },
40 :			{ 0, -1, 3, -6, 20, 20, -6, 3 },
41 :			{ 0, 0, -1, 3, -6, 20, 20, -7 },
42 :			{ 0, 0, 0, -1, 3, -6, 19, 23 },
43 :			{ 0, 0, 0, 0, -1, 2, -3, 14 }
44 :			};
45 :
46 :			static const int32_t FIR_Tab_16[17][16] = {
47 :			{ 14, -3, 2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
48 :			{ 23, 19, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
49 :			{ -7, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
50 :			{ 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
51 :			{ -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0 },
52 :			{ 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0 },
53 :			{ 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0 },
54 :			{ 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0 },
55 :			{ 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0 },
56 :			{ 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0 },
57 :			{ 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0 },
58 :			{ 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0 },
59 :			{ 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1 },
60 :			{ 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3 },
61 :			{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -7 },
62 :			{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 19, 23 },
63 :			{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 2, -3, 14 }
64 :			};
65 :
66 :			/* Implementation
67 :			****************************************************************************/
68 :
69 :			#define XVID_AUTO_INCLUDE
70 :	edgomez	1530	/* First auto include this file to generate reference code for SIMD versions
71 :			* This set of functions are good for educational purpose, because they're
72 :			* straightforward to understand, use loops and so on... But obviously they
73 :			* sux when it comes to speed */
74 :			#define REFERENCE_CODE
75 :	edgomez	1382
76 :			/* 16x? filters */
77 :
78 :			#define SIZE 16
79 :			#define TABLE FIR_Tab_16
80 :
81 :			#define STORE(d,s) (d) = (s)
82 :	edgomez	1530	#define FUNC_H H_Pass_16_C_ref
83 :			#define FUNC_V V_Pass_16_C_ref
84 :			#define FUNC_HA H_Pass_Avrg_16_C_ref
85 :			#define FUNC_VA V_Pass_Avrg_16_C_ref
86 :			#define FUNC_HA_UP H_Pass_Avrg_Up_16_C_ref
87 :			#define FUNC_VA_UP V_Pass_Avrg_Up_16_C_ref
88 :
89 :			#include "qpel.c"
90 :
91 :			/* note: B-frame always uses Rnd=0... */
92 :			#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1
93 :			#define FUNC_H H_Pass_16_Add_C_ref
94 :			#define FUNC_V V_Pass_16_Add_C_ref
95 :			#define FUNC_HA H_Pass_Avrg_16_Add_C_ref
96 :			#define FUNC_VA V_Pass_Avrg_16_Add_C_ref
97 :			#define FUNC_HA_UP H_Pass_Avrg_Up_16_Add_C_ref
98 :			#define FUNC_VA_UP V_Pass_Avrg_Up_16_Add_C_ref
99 :
100 :			#include "qpel.c"
101 :
102 :			#undef SIZE
103 :			#undef TABLE
104 :
105 :			/* 8x? filters */
106 :
107 :			#define SIZE 8
108 :			#define TABLE FIR_Tab_8
109 :
110 :			#define STORE(d,s) (d) = (s)
111 :			#define FUNC_H H_Pass_8_C_ref
112 :			#define FUNC_V V_Pass_8_C_ref
113 :			#define FUNC_HA H_Pass_Avrg_8_C_ref
114 :			#define FUNC_VA V_Pass_Avrg_8_C_ref
115 :			#define FUNC_HA_UP H_Pass_Avrg_Up_8_C_ref
116 :			#define FUNC_VA_UP V_Pass_Avrg_Up_8_C_ref
117 :
118 :			#include "qpel.c"
119 :
120 :			/* note: B-frame always uses Rnd=0... */
121 :			#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1
122 :			#define FUNC_H H_Pass_8_Add_C_ref
123 :			#define FUNC_V V_Pass_8_Add_C_ref
124 :			#define FUNC_HA H_Pass_Avrg_8_Add_C_ref
125 :			#define FUNC_VA V_Pass_Avrg_8_Add_C_ref
126 :			#define FUNC_HA_UP H_Pass_Avrg_Up_8_Add_C_ref
127 :			#define FUNC_VA_UP V_Pass_Avrg_Up_8_Add_C_ref
128 :
129 :			#include "qpel.c"
130 :
131 :			#undef SIZE
132 :			#undef TABLE
133 :
134 :			/* Then we define more optimized C version where loops are unrolled, where
135 :			* FIR coeffcients are not read from memory but are hardcoded in instructions
136 :			* They should be faster */
137 :			#undef REFERENCE_CODE
138 :
139 :			/* 16x? filters */
140 :
141 :			#define SIZE 16
142 :
143 :			#define STORE(d,s) (d) = (s)
144 :	edgomez	1382	#define FUNC_H H_Pass_16_C
145 :			#define FUNC_V V_Pass_16_C
146 :			#define FUNC_HA H_Pass_Avrg_16_C
147 :			#define FUNC_VA V_Pass_Avrg_16_C
148 :			#define FUNC_HA_UP H_Pass_Avrg_Up_16_C
149 :			#define FUNC_VA_UP V_Pass_Avrg_Up_16_C
150 :
151 :	edgomez	1530	#include "qpel.c"
152 :	edgomez	1382
153 :			/* note: B-frame always uses Rnd=0... */
154 :			#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1
155 :			#define FUNC_H H_Pass_16_Add_C
156 :			#define FUNC_V V_Pass_16_Add_C
157 :			#define FUNC_HA H_Pass_Avrg_16_Add_C
158 :			#define FUNC_VA V_Pass_Avrg_16_Add_C
159 :			#define FUNC_HA_UP H_Pass_Avrg_Up_16_Add_C
160 :			#define FUNC_VA_UP V_Pass_Avrg_Up_16_Add_C
161 :
162 :	edgomez	1530	#include "qpel.c"
163 :	edgomez	1382
164 :			#undef SIZE
165 :			#undef TABLE
166 :
167 :			/* 8x? filters */
168 :
169 :			#define SIZE 8
170 :			#define TABLE FIR_Tab_8
171 :
172 :			#define STORE(d,s) (d) = (s)
173 :			#define FUNC_H H_Pass_8_C
174 :			#define FUNC_V V_Pass_8_C
175 :			#define FUNC_HA H_Pass_Avrg_8_C
176 :			#define FUNC_VA V_Pass_Avrg_8_C
177 :			#define FUNC_HA_UP H_Pass_Avrg_Up_8_C
178 :			#define FUNC_VA_UP V_Pass_Avrg_Up_8_C
179 :
180 :	edgomez	1530	#include "qpel.c"
181 :	edgomez	1382
182 :			/* note: B-frame always uses Rnd=0... */
183 :			#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1
184 :			#define FUNC_H H_Pass_8_Add_C
185 :			#define FUNC_V V_Pass_8_Add_C
186 :			#define FUNC_HA H_Pass_Avrg_8_Add_C
187 :			#define FUNC_VA V_Pass_Avrg_8_Add_C
188 :			#define FUNC_HA_UP H_Pass_Avrg_Up_8_Add_C
189 :			#define FUNC_VA_UP V_Pass_Avrg_Up_8_Add_C
190 :
191 :	edgomez	1530	#include "qpel.c"
192 :	edgomez	1382
193 :			#undef SIZE
194 :			#undef TABLE
195 :			#undef XVID_AUTO_INCLUDE
196 :
197 :	edgomez	1530	/* Global scope hooks
198 :	edgomez	1382	****************************************************************************/
199 :
200 :			XVID_QP_FUNCS *xvid_QP_Funcs = 0;
201 :			XVID_QP_FUNCS *xvid_QP_Add_Funcs = 0;
202 :
203 :	edgomez	1530	/* Reference plain C impl. declaration
204 :	edgomez	1382	****************************************************************************/
205 :
206 :	edgomez	1530	XVID_QP_FUNCS xvid_QP_Funcs_C_ref = {
207 :			H_Pass_16_C_ref, H_Pass_Avrg_16_C_ref, H_Pass_Avrg_Up_16_C_ref,
208 :			V_Pass_16_C_ref, V_Pass_Avrg_16_C_ref, V_Pass_Avrg_Up_16_C_ref,
209 :
210 :			H_Pass_8_C_ref, H_Pass_Avrg_8_C_ref, H_Pass_Avrg_Up_8_C_ref,
211 :			V_Pass_8_C_ref, V_Pass_Avrg_8_C_ref, V_Pass_Avrg_Up_8_C_ref
212 :			};
213 :
214 :			XVID_QP_FUNCS xvid_QP_Add_Funcs_C_ref = {
215 :			H_Pass_16_Add_C_ref, H_Pass_Avrg_16_Add_C_ref, H_Pass_Avrg_Up_16_Add_C_ref,
216 :			V_Pass_16_Add_C_ref, V_Pass_Avrg_16_Add_C_ref, V_Pass_Avrg_Up_16_Add_C_ref,
217 :
218 :			H_Pass_8_Add_C_ref, H_Pass_Avrg_8_Add_C_ref, H_Pass_Avrg_Up_8_Add_C_ref,
219 :			V_Pass_8_Add_C_ref, V_Pass_Avrg_8_Add_C_ref, V_Pass_Avrg_Up_8_Add_C_ref
220 :			};
221 :
222 :			/* Plain C impl. declaration (faster than ref one)
223 :			****************************************************************************/
224 :
225 :	edgomez	1382	XVID_QP_FUNCS xvid_QP_Funcs_C = {
226 :			H_Pass_16_C, H_Pass_Avrg_16_C, H_Pass_Avrg_Up_16_C,
227 :			V_Pass_16_C, V_Pass_Avrg_16_C, V_Pass_Avrg_Up_16_C,
228 :
229 :			H_Pass_8_C, H_Pass_Avrg_8_C, H_Pass_Avrg_Up_8_C,
230 :			V_Pass_8_C, V_Pass_Avrg_8_C, V_Pass_Avrg_Up_8_C
231 :			};
232 :
233 :			XVID_QP_FUNCS xvid_QP_Add_Funcs_C = {
234 :			H_Pass_16_Add_C, H_Pass_Avrg_16_Add_C, H_Pass_Avrg_Up_16_Add_C,
235 :			V_Pass_16_Add_C, V_Pass_Avrg_16_Add_C, V_Pass_Avrg_Up_16_Add_C,
236 :
237 :			H_Pass_8_Add_C, H_Pass_Avrg_8_Add_C, H_Pass_Avrg_Up_8_Add_C,
238 :			V_Pass_8_Add_C, V_Pass_Avrg_8_Add_C, V_Pass_Avrg_Up_8_Add_C
239 :			};
240 :
241 :			/* mmx impl. declaration (see. qpel_mmx.asm
242 :			****************************************************************************/
243 :
244 :			#ifdef ARCH_IS_IA32
245 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_16_mmx);
246 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_16_mmx);
247 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_16_mmx);
248 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_16_mmx);
249 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_16_mmx);
250 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_16_mmx);
251 :
252 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_8_mmx);
253 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_8_mmx);
254 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_8_mmx);
255 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_8_mmx);
256 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_mmx);
257 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_mmx);
258 :
259 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Add_16_mmx);
260 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Add_16_mmx);
261 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_Add_16_mmx);
262 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Add_16_mmx);
263 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Add_16_mmx);
264 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_Add_16_mmx);
265 :
266 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_8_Add_mmx);
267 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_8_Add_mmx);
268 :			extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_8_Add_mmx);
269 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_8_Add_mmx);
270 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_Add_mmx);
271 :			extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_Add_mmx);
272 :
273 :	edgomez	1530	XVID_QP_FUNCS xvid_QP_Funcs_mmx = {
274 :			xvid_H_Pass_16_mmx, xvid_H_Pass_Avrg_16_mmx, xvid_H_Pass_Avrg_Up_16_mmx,
275 :			xvid_V_Pass_16_mmx, xvid_V_Pass_Avrg_16_mmx, xvid_V_Pass_Avrg_Up_16_mmx,
276 :
277 :			xvid_H_Pass_8_mmx, xvid_H_Pass_Avrg_8_mmx, xvid_H_Pass_Avrg_Up_8_mmx,
278 :			xvid_V_Pass_8_mmx, xvid_V_Pass_Avrg_8_mmx, xvid_V_Pass_Avrg_Up_8_mmx
279 :			};
280 :
281 :	edgomez	1382	XVID_QP_FUNCS xvid_QP_Add_Funcs_mmx = {
282 :			xvid_H_Pass_Add_16_mmx, xvid_H_Pass_Avrg_Add_16_mmx, xvid_H_Pass_Avrg_Up_Add_16_mmx,
283 :			xvid_V_Pass_Add_16_mmx, xvid_V_Pass_Avrg_Add_16_mmx, xvid_V_Pass_Avrg_Up_Add_16_mmx,
284 :
285 :			xvid_H_Pass_8_Add_mmx, xvid_H_Pass_Avrg_8_Add_mmx, xvid_H_Pass_Avrg_Up_8_Add_mmx,
286 :			xvid_V_Pass_8_Add_mmx, xvid_V_Pass_Avrg_8_Add_mmx, xvid_V_Pass_Avrg_Up_8_Add_mmx,
287 :			};
288 :			#endif /* ARCH_IS_IA32 */
289 :
290 :			/* tables for ASM
291 :			****************************************************************************/
292 :
293 :			#ifdef ARCH_IS_IA32
294 :			uint16_t xvid_Expand_mmx[256][4]; /* 8b -> 64b expansion table */
295 :			#endif
296 :
297 :			/* Alternate way of filtering (cf. USE_TABLES flag in qpel_mmx.asm)
298 :			*
299 :			* 17 tables, 2K each => 34K
300 :			* Mirroring can be acheived composing 11 basic tables
301 :			* (for instance: (23,19,-6,3)=(20,20,-6,3)+(3,-1,0,0)
302 :			* Using Symmetries (and bswap) could reduce further
303 :			* the memory to 7 tables (->14K). */
304 :
305 :			int16_t xvid_FIR_1_0_0_0[256][4];
306 :			int16_t xvid_FIR_3_1_0_0[256][4];
307 :			int16_t xvid_FIR_6_3_1_0[256][4];
308 :			int16_t xvid_FIR_14_3_2_1[256][4];
309 :			int16_t xvid_FIR_20_6_3_1[256][4];
310 :			int16_t xvid_FIR_20_20_6_3[256][4];
311 :			int16_t xvid_FIR_23_19_6_3[256][4];
312 :			int16_t xvid_FIR_7_20_20_6[256][4];
313 :			int16_t xvid_FIR_6_20_20_6[256][4];
314 :			int16_t xvid_FIR_6_20_20_7[256][4];
315 :			int16_t xvid_FIR_3_6_20_20[256][4];
316 :			int16_t xvid_FIR_3_6_19_23[256][4];
317 :			int16_t xvid_FIR_1_3_6_20[256][4];
318 :			int16_t xvid_FIR_1_2_3_14[256][4];
319 :			int16_t xvid_FIR_0_1_3_6[256][4];
320 :			int16_t xvid_FIR_0_0_1_3[256][4];
321 :			int16_t xvid_FIR_0_0_0_1[256][4];
322 :
323 :			static void Init_FIR_Table(int16_t Tab[][4],
324 :			int A, int B, int C, int D)
325 :			{
326 :			int i;
327 :			for(i=0; i<256; ++i) {
328 :			Tab[i][0] = i*A;
329 :			Tab[i][1] = i*B;
330 :			Tab[i][2] = i*C;
331 :			Tab[i][3] = i*D;
332 :			}
333 :			}
334 :
335 :
336 :			void xvid_Init_QP()
337 :			{
338 :			#ifdef ARCH_IS_IA32
339 :			int i;
340 :
341 :			for(i=0; i<256; ++i) {
342 :			xvid_Expand_mmx[i][0] = i;
343 :			xvid_Expand_mmx[i][1] = i;
344 :			xvid_Expand_mmx[i][2] = i;
345 :			xvid_Expand_mmx[i][3] = i;
346 :			}
347 :			#endif
348 :
349 :			/* Alternate way of filtering (cf. USE_TABLES flag in qpel_mmx.asm) */
350 :
351 :			Init_FIR_Table(xvid_FIR_1_0_0_0, -1, 0, 0, 0);
352 :			Init_FIR_Table(xvid_FIR_3_1_0_0, 3, -1, 0, 0);
353 :			Init_FIR_Table(xvid_FIR_6_3_1_0, -6, 3, -1, 0);
354 :			Init_FIR_Table(xvid_FIR_14_3_2_1, 14, -3, 2, -1);
355 :			Init_FIR_Table(xvid_FIR_20_6_3_1, 20, -6, 3, -1);
356 :			Init_FIR_Table(xvid_FIR_20_20_6_3, 20, 20, -6, 3);
357 :			Init_FIR_Table(xvid_FIR_23_19_6_3, 23, 19, -6, 3);
358 :			Init_FIR_Table(xvid_FIR_7_20_20_6, -7, 20, 20, -6);
359 :			Init_FIR_Table(xvid_FIR_6_20_20_6, -6, 20, 20, -6);
360 :			Init_FIR_Table(xvid_FIR_6_20_20_7, -6, 20, 20, -7);
361 :			Init_FIR_Table(xvid_FIR_3_6_20_20, 3, -6, 20, 20);
362 :			Init_FIR_Table(xvid_FIR_3_6_19_23, 3, -6, 19, 23);
363 :			Init_FIR_Table(xvid_FIR_1_3_6_20, -1, 3, -6, 20);
364 :			Init_FIR_Table(xvid_FIR_1_2_3_14, -1, 2, -3, 14);
365 :			Init_FIR_Table(xvid_FIR_0_1_3_6, 0, -1, 3, -6);
366 :			Init_FIR_Table(xvid_FIR_0_0_1_3, 0, 0, -1, 3);
367 :			Init_FIR_Table(xvid_FIR_0_0_0_1, 0, 0, 0, -1);
368 :
369 :			}
370 :
371 :			#endif /* !XVID_AUTO_INCLUDE */
372 :
373 :	edgomez	1530	#if defined(XVID_AUTO_INCLUDE) && defined(REFERENCE_CODE)
374 :
375 :	edgomez	1382	/*****************************************************************************
376 :			* "reference" filters impl. in plain C
377 :			****************************************************************************/
378 :
379 :			static
380 :			void FUNC_H(uint8_t Dst, const uint8_t Src, int32_t H, int32_t BpS, int32_t Rnd)
381 :			{
382 :			while(H-->0) {
383 :			int32_t i, k;
384 :			int32_t Sums[SIZE] = { 0 };
385 :			for(i=0; i<=SIZE; ++i)
386 :			for(k=0; k<SIZE; ++k)
387 :			Sums[k] += TABLE[i][k] * Src[i];
388 :
389 :			for(i=0; i<SIZE; ++i) {
390 :			int32_t C = ( Sums[i] + 16-Rnd ) >> 5;
391 :			if (C<0) C = 0; else if (C>255) C = 255;
392 :			STORE(Dst[i], C);
393 :			}
394 :			Src += BpS;
395 :			Dst += BpS;
396 :			}
397 :			}
398 :
399 :			static
400 :			void FUNC_V(uint8_t Dst, const uint8_t Src, int32_t W, int32_t BpS, int32_t Rnd)
401 :			{
402 :			while(W-->0) {
403 :			int32_t i, k;
404 :			int32_t Sums[SIZE] = { 0 };
405 :			const uint8_t *S = Src++;
406 :			uint8_t *D = Dst++;
407 :			for(i=0; i<=SIZE; ++i) {
408 :			for(k=0; k<SIZE; ++k)
409 :			Sums[k] += TABLE[i][k] * S[0];
410 :			S += BpS;
411 :			}
412 :
413 :			for(i=0; i<SIZE; ++i) {
414 :			int32_t C = ( Sums[i] + 16-Rnd )>>5;
415 :			if (C<0) C = 0; else if (C>255) C = 255;
416 :			STORE(D[0], C);
417 :			D += BpS;
418 :			}
419 :			}
420 :			}
421 :
422 :			static
423 :			void FUNC_HA(uint8_t Dst, const uint8_t Src, int32_t H, int32_t BpS, int32_t Rnd)
424 :			{
425 :			while(H-->0) {
426 :			int32_t i, k;
427 :			int32_t Sums[SIZE] = { 0 };
428 :			for(i=0; i<=SIZE; ++i)
429 :			for(k=0; k<SIZE; ++k)
430 :			Sums[k] += TABLE[i][k] * Src[i];
431 :
432 :			for(i=0; i<SIZE; ++i) {
433 :			int32_t C = ( Sums[i] + 16-Rnd ) >> 5;
434 :			if (C<0) C = 0; else if (C>255) C = 255;
435 :			C = (C+Src[i]+1-Rnd) >> 1;
436 :			STORE(Dst[i], C);
437 :			}
438 :			Src += BpS;
439 :			Dst += BpS;
440 :			}
441 :			}
442 :
443 :			static
444 :			void FUNC_HA_UP(uint8_t Dst, const uint8_t Src, int32_t H, int32_t BpS, int32_t Rnd)
445 :			{
446 :			while(H-->0) {
447 :			int32_t i, k;
448 :			int32_t Sums[SIZE] = { 0 };
449 :			for(i=0; i<=SIZE; ++i)
450 :			for(k=0; k<SIZE; ++k)
451 :			Sums[k] += TABLE[i][k] * Src[i];
452 :
453 :			for(i=0; i<SIZE; ++i) {
454 :			int32_t C = ( Sums[i] + 16-Rnd ) >> 5;
455 :			if (C<0) C = 0; else if (C>255) C = 255;
456 :			C = (C+Src[i+1]+1-Rnd) >> 1;
457 :			STORE(Dst[i], C);
458 :			}
459 :			Src += BpS;
460 :			Dst += BpS;
461 :			}
462 :			}
463 :
464 :			static
465 :			void FUNC_VA(uint8_t Dst, const uint8_t Src, int32_t W, int32_t BpS, int32_t Rnd)
466 :			{
467 :			while(W-->0) {
468 :			int32_t i, k;
469 :			int32_t Sums[SIZE] = { 0 };
470 :			const uint8_t *S = Src;
471 :			uint8_t *D = Dst;
472 :
473 :			for(i=0; i<=SIZE; ++i) {
474 :			for(k=0; k<SIZE; ++k)
475 :			Sums[k] += TABLE[i][k] * S[0];
476 :			S += BpS;
477 :			}
478 :
479 :			S = Src;
480 :			for(i=0; i<SIZE; ++i) {
481 :			int32_t C = ( Sums[i] + 16-Rnd )>>5;
482 :			if (C<0) C = 0; else if (C>255) C = 255;
483 :			C = ( C+S[0]+1-Rnd ) >> 1;
484 :			STORE(D[0], C);
485 :			D += BpS;
486 :			S += BpS;
487 :			}
488 :			Src++;
489 :			Dst++;
490 :			}
491 :			}
492 :
493 :			static
494 :			void FUNC_VA_UP(uint8_t Dst, const uint8_t Src, int32_t W, int32_t BpS, int32_t Rnd)
495 :			{
496 :			while(W-->0) {
497 :			int32_t i, k;
498 :			int32_t Sums[SIZE] = { 0 };
499 :			const uint8_t *S = Src;
500 :			uint8_t *D = Dst;
501 :
502 :			for(i=0; i<=SIZE; ++i) {
503 :			for(k=0; k<SIZE; ++k)
504 :			Sums[k] += TABLE[i][k] * S[0];
505 :			S += BpS;
506 :			}
507 :
508 :			S = Src + BpS;
509 :			for(i=0; i<SIZE; ++i) {
510 :			int32_t C = ( Sums[i] + 16-Rnd )>>5;
511 :			if (C<0) C = 0; else if (C>255) C = 255;
512 :			C = ( C+S[0]+1-Rnd ) >> 1;
513 :			STORE(D[0], C);
514 :			D += BpS;
515 :			S += BpS;
516 :			}
517 :			Dst++;
518 :			Src++;
519 :			}
520 :			}
521 :
522 :			#undef STORE
523 :			#undef FUNC_H
524 :			#undef FUNC_V
525 :			#undef FUNC_HA
526 :			#undef FUNC_VA
527 :			#undef FUNC_HA_UP
528 :			#undef FUNC_VA_UP
529 :
530 :	edgomez	1530	#elif defined(XVID_AUTO_INCLUDE) && !defined(REFERENCE_CODE)
531 :
532 :			/*****************************************************************************
533 :			* "fast" filters impl. in plain C
534 :			****************************************************************************/
535 :
536 :			#define CLIP_STORE(D,C) \
537 :			if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
538 :			STORE(D, C)
539 :
540 :			static void
541 :			FUNC_H(uint8_t Dst, const uint8_t Src, int32_t H, int32_t BpS, int32_t RND)
542 :			{
543 :			#if (SIZE==16)
544 :			while(H-->0) {
545 :			int C;
546 :			C = 16-RND +14Src[0] +23Src[1] - 7Src[2] + 3Src[3] - Src[4];
547 :			CLIP_STORE(Dst[ 0],C);
548 :			C = 16-RND - 3(Src[0]-Src[4]) +19Src[1] +20Src[2] - 6Src[3] - Src[5];
549 :			CLIP_STORE(Dst[ 1],C);
550 :			C = 16-RND + 2Src[0] - 6(Src[1]+Src[4]) +20(Src[2]+Src[3]) + 3Src[5] - Src[6];
551 :			CLIP_STORE(Dst[ 2],C);
552 :			C = 16-RND - (Src[0]+Src[7 ]) + 3(Src[ 1]+Src[ 6])-6(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
553 :			CLIP_STORE(Dst[ 3],C);
554 :			C = 16-RND - (Src[1]+Src[8 ]) + 3(Src[ 2]+Src[ 7])-6(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
555 :			CLIP_STORE(Dst[ 4],C);
556 :			C = 16-RND - (Src[2]+Src[9 ]) + 3(Src[ 3]+Src[ 8])-6(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
557 :			CLIP_STORE(Dst[ 5],C);
558 :			C = 16-RND - (Src[3]+Src[10]) + 3(Src[ 4]+Src[ 9])-6(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
559 :			CLIP_STORE(Dst[ 6],C);
560 :			C = 16-RND - (Src[4]+Src[11]) + 3(Src[ 5]+Src[10])-6(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
561 :			CLIP_STORE(Dst[ 7],C);
562 :			C = 16-RND - (Src[5]+Src[12]) + 3(Src[ 6]+Src[11])-6(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
563 :			CLIP_STORE(Dst[ 8],C);
564 :			C = 16-RND - (Src[6]+Src[13]) + 3(Src[ 7]+Src[12])-6(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
565 :			CLIP_STORE(Dst[ 9],C);
566 :			C = 16-RND - (Src[7]+Src[14]) + 3(Src[ 8]+Src[13])-6(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
567 :			CLIP_STORE(Dst[10],C);
568 :			C = 16-RND - (Src[8]+Src[15]) + 3(Src[ 9]+Src[14])-6(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
569 :			CLIP_STORE(Dst[11],C);
570 :			C = 16-RND - (Src[9]+Src[16]) + 3(Src[10]+Src[15])-6(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
571 :			CLIP_STORE(Dst[12],C);
572 :			C = 16-RND - Src[10] +3Src[11] -6(Src[12]+Src[15]) + 20(Src[13]+Src[14]) +2Src[16];
573 :			CLIP_STORE(Dst[13],C);
574 :			C = 16-RND - Src[11] +3(Src[12]-Src[16]) -6Src[13] + 20Src[14] + 19Src[15];
575 :			CLIP_STORE(Dst[14],C);
576 :			C = 16-RND - Src[12] +3Src[13] -7Src[14] + 23Src[15] + 14Src[16];
577 :			CLIP_STORE(Dst[15],C);
578 :			Src += BpS;
579 :			Dst += BpS;
580 :			}
581 :			#else
582 :			while(H-->0) {
583 :			int C;
584 :			C = 16-RND +14Src[0] +23Src[1] - 7Src[2] + 3Src[3] - Src[4];
585 :			CLIP_STORE(Dst[0],C);
586 :			C = 16-RND - 3(Src[0]-Src[4]) +19Src[1] +20Src[2] - 6Src[3] - Src[5];
587 :			CLIP_STORE(Dst[1],C);
588 :			C = 16-RND + 2Src[0] - 6(Src[1]+Src[4]) +20(Src[2]+Src[3]) + 3Src[5] - Src[6];
589 :			CLIP_STORE(Dst[2],C);
590 :			C = 16-RND - (Src[0]+Src[7]) + 3(Src[1]+Src[6])-6(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
591 :			CLIP_STORE(Dst[3],C);
592 :			C = 16-RND - (Src[1]+Src[8]) + 3(Src[2]+Src[7])-6(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
593 :			CLIP_STORE(Dst[4],C);
594 :			C = 16-RND - Src[2] +3Src[3] -6(Src[4]+Src[7]) + 20(Src[5]+Src[6]) +2Src[8];
595 :			CLIP_STORE(Dst[5],C);
596 :			C = 16-RND - Src[3] +3(Src[4]-Src[8]) -6Src[5] + 20Src[6] + 19Src[7];
597 :			CLIP_STORE(Dst[6],C);
598 :			C = 16-RND - Src[4] +3Src[5] -7Src[6] + 23Src[7] + 14Src[8];
599 :			CLIP_STORE(Dst[7],C);
600 :			Src += BpS;
601 :			Dst += BpS;
602 :			}
603 :			#endif
604 :			}
605 :			#undef CLIP_STORE
606 :
607 :			#define CLIP_STORE(i,C) \
608 :			if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
609 :			C = (C+Src[i]+1-RND) >> 1; \
610 :			STORE(Dst[i], C)
611 :
612 :			static void
613 :			FUNC_HA(uint8_t Dst, const uint8_t Src, int32_t H, int32_t BpS, int32_t RND)
614 :			{
615 :			#if (SIZE==16)
616 :			while(H-->0) {
617 :			int C;
618 :			C = 16-RND +14Src[0] +23Src[1] - 7Src[2] + 3Src[3] - Src[4];
619 :			CLIP_STORE(0,C);
620 :			C = 16-RND - 3(Src[0]-Src[4]) +19Src[1] +20Src[2] - 6Src[3] - Src[5];
621 :			CLIP_STORE( 1,C);
622 :			C = 16-RND + 2Src[0] - 6(Src[1]+Src[4]) +20(Src[2]+Src[3]) + 3Src[5] - Src[6];
623 :			CLIP_STORE( 2,C);
624 :			C = 16-RND - (Src[0]+Src[7 ]) + 3(Src[ 1]+Src[ 6])-6(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
625 :			CLIP_STORE( 3,C);
626 :			C = 16-RND - (Src[1]+Src[8 ]) + 3(Src[ 2]+Src[ 7])-6(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
627 :			CLIP_STORE( 4,C);
628 :			C = 16-RND - (Src[2]+Src[9 ]) + 3(Src[ 3]+Src[ 8])-6(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
629 :			CLIP_STORE( 5,C);
630 :			C = 16-RND - (Src[3]+Src[10]) + 3(Src[ 4]+Src[ 9])-6(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
631 :			CLIP_STORE( 6,C);
632 :			C = 16-RND - (Src[4]+Src[11]) + 3(Src[ 5]+Src[10])-6(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
633 :			CLIP_STORE( 7,C);
634 :			C = 16-RND - (Src[5]+Src[12]) + 3(Src[ 6]+Src[11])-6(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
635 :			CLIP_STORE( 8,C);
636 :			C = 16-RND - (Src[6]+Src[13]) + 3(Src[ 7]+Src[12])-6(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
637 :			CLIP_STORE( 9,C);
638 :			C = 16-RND - (Src[7]+Src[14]) + 3(Src[ 8]+Src[13])-6(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
639 :			CLIP_STORE(10,C);
640 :			C = 16-RND - (Src[8]+Src[15]) + 3(Src[ 9]+Src[14])-6(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
641 :			CLIP_STORE(11,C);
642 :			C = 16-RND - (Src[9]+Src[16]) + 3(Src[10]+Src[15])-6(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
643 :			CLIP_STORE(12,C);
644 :			C = 16-RND - Src[10] +3Src[11] -6(Src[12]+Src[15]) + 20(Src[13]+Src[14]) +2Src[16];
645 :			CLIP_STORE(13,C);
646 :			C = 16-RND - Src[11] +3(Src[12]-Src[16]) -6Src[13] + 20Src[14] + 19Src[15];
647 :			CLIP_STORE(14,C);
648 :			C = 16-RND - Src[12] +3Src[13] -7Src[14] + 23Src[15] + 14Src[16];
649 :			CLIP_STORE(15,C);
650 :			Src += BpS;
651 :			Dst += BpS;
652 :			}
653 :			#else
654 :			while(H-->0) {
655 :			int C;
656 :			C = 16-RND +14Src[0] +23Src[1] - 7Src[2] + 3Src[3] - Src[4];
657 :			CLIP_STORE(0,C);
658 :			C = 16-RND - 3(Src[0]-Src[4]) +19Src[1] +20Src[2] - 6Src[3] - Src[5];
659 :			CLIP_STORE(1,C);
660 :			C = 16-RND + 2Src[0] - 6(Src[1]+Src[4]) +20(Src[2]+Src[3]) + 3Src[5] - Src[6];
661 :			CLIP_STORE(2,C);
662 :			C = 16-RND - (Src[0]+Src[7]) + 3(Src[1]+Src[6])-6(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
663 :			CLIP_STORE(3,C);
664 :			C = 16-RND - (Src[1]+Src[8]) + 3(Src[2]+Src[7])-6(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
665 :			CLIP_STORE(4,C);
666 :			C = 16-RND - Src[2] +3Src[3] -6(Src[4]+Src[7]) + 20(Src[5]+Src[6]) +2Src[8];
667 :			CLIP_STORE(5,C);
668 :			C = 16-RND - Src[3] +3(Src[4]-Src[8]) -6Src[5] + 20Src[6] + 19Src[7];
669 :			CLIP_STORE(6,C);
670 :			C = 16-RND - Src[4] +3Src[5] -7Src[6] + 23Src[7] + 14Src[8];
671 :			CLIP_STORE(7,C);
672 :			Src += BpS;
673 :			Dst += BpS;
674 :			}
675 :			#endif
676 :			}
677 :			#undef CLIP_STORE
678 :
679 :			#define CLIP_STORE(i,C) \
680 :			if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
681 :			C = (C+Src[i+1]+1-RND) >> 1; \
682 :			STORE(Dst[i], C)
683 :
684 :			static void
685 :			FUNC_HA_UP(uint8_t Dst, const uint8_t Src, int32_t H, int32_t BpS, int32_t RND)
686 :			{
687 :			#if (SIZE==16)
688 :			while(H-->0) {
689 :			int C;
690 :			C = 16-RND +14Src[0] +23Src[1] - 7Src[2] + 3Src[3] - Src[4];
691 :			CLIP_STORE(0,C);
692 :			C = 16-RND - 3(Src[0]-Src[4]) +19Src[1] +20Src[2] - 6Src[3] - Src[5];
693 :			CLIP_STORE( 1,C);
694 :			C = 16-RND + 2Src[0] - 6(Src[1]+Src[4]) +20(Src[2]+Src[3]) + 3Src[5] - Src[6];
695 :			CLIP_STORE( 2,C);
696 :			C = 16-RND - (Src[0]+Src[7 ]) + 3(Src[ 1]+Src[ 6])-6(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
697 :			CLIP_STORE( 3,C);
698 :			C = 16-RND - (Src[1]+Src[8 ]) + 3(Src[ 2]+Src[ 7])-6(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
699 :			CLIP_STORE( 4,C);
700 :			C = 16-RND - (Src[2]+Src[9 ]) + 3(Src[ 3]+Src[ 8])-6(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
701 :			CLIP_STORE( 5,C);
702 :			C = 16-RND - (Src[3]+Src[10]) + 3(Src[ 4]+Src[ 9])-6(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
703 :			CLIP_STORE( 6,C);
704 :			C = 16-RND - (Src[4]+Src[11]) + 3(Src[ 5]+Src[10])-6(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
705 :			CLIP_STORE( 7,C);
706 :			C = 16-RND - (Src[5]+Src[12]) + 3(Src[ 6]+Src[11])-6(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
707 :			CLIP_STORE( 8,C);
708 :			C = 16-RND - (Src[6]+Src[13]) + 3(Src[ 7]+Src[12])-6(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
709 :			CLIP_STORE( 9,C);
710 :			C = 16-RND - (Src[7]+Src[14]) + 3(Src[ 8]+Src[13])-6(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
711 :			CLIP_STORE(10,C);
712 :			C = 16-RND - (Src[8]+Src[15]) + 3(Src[ 9]+Src[14])-6(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
713 :			CLIP_STORE(11,C);
714 :			C = 16-RND - (Src[9]+Src[16]) + 3(Src[10]+Src[15])-6(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
715 :			CLIP_STORE(12,C);
716 :			C = 16-RND - Src[10] +3Src[11] -6(Src[12]+Src[15]) + 20(Src[13]+Src[14]) +2Src[16];
717 :			CLIP_STORE(13,C);
718 :			C = 16-RND - Src[11] +3(Src[12]-Src[16]) -6Src[13] + 20Src[14] + 19Src[15];
719 :			CLIP_STORE(14,C);
720 :			C = 16-RND - Src[12] +3Src[13] -7Src[14] + 23Src[15] + 14Src[16];
721 :			CLIP_STORE(15,C);
722 :			Src += BpS;
723 :			Dst += BpS;
724 :			}
725 :			#else
726 :			while(H-->0) {
727 :			int C;
728 :			C = 16-RND +14Src[0] +23Src[1] - 7Src[2] + 3Src[3] - Src[4];
729 :			CLIP_STORE(0,C);
730 :			C = 16-RND - 3(Src[0]-Src[4]) +19Src[1] +20Src[2] - 6Src[3] - Src[5];
731 :			CLIP_STORE(1,C);
732 :			C = 16-RND + 2Src[0] - 6(Src[1]+Src[4]) +20(Src[2]+Src[3]) + 3Src[5] - Src[6];
733 :			CLIP_STORE(2,C);
734 :			C = 16-RND - (Src[0]+Src[7]) + 3(Src[1]+Src[6])-6(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
735 :			CLIP_STORE(3,C);
736 :			C = 16-RND - (Src[1]+Src[8]) + 3(Src[2]+Src[7])-6(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
737 :			CLIP_STORE(4,C);
738 :			C = 16-RND - Src[2] +3Src[3] -6(Src[4]+Src[7]) + 20(Src[5]+Src[6]) +2Src[8];
739 :			CLIP_STORE(5,C);
740 :			C = 16-RND - Src[3] +3(Src[4]-Src[8]) -6Src[5] + 20Src[6] + 19Src[7];
741 :			CLIP_STORE(6,C);
742 :			C = 16-RND - Src[4] +3Src[5] -7Src[6] + 23Src[7] + 14Src[8];
743 :			CLIP_STORE(7,C);
744 :			Src += BpS;
745 :			Dst += BpS;
746 :			}
747 :			#endif
748 :			}
749 :			#undef CLIP_STORE
750 :
751 :			//////////////////////////////////////////////////////////
752 :			// vertical passes
753 :			//////////////////////////////////////////////////////////
754 :			// Note: for vertical passes, width (W) needs only be 8 or 16.
755 :
756 :			#define CLIP_STORE(D,C) \
757 :			if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
758 :			STORE(D, C)
759 :
760 :			static void
761 :			FUNC_V(uint8_t Dst, const uint8_t Src, int32_t H, int32_t BpS, int32_t RND)
762 :			{
763 :			#if (SIZE==16)
764 :			while(H-->0) {
765 :			int C;
766 :			C = 16-RND +14Src[BpS0] +23Src[BpS1] - 7Src[BpS2] + 3Src[BpS3] - Src[BpS*4];
767 :			CLIP_STORE(Dst[BpS* 0],C);
768 :			C = 16-RND - 3(Src[BpS0]-Src[BpS4]) +19Src[BpS1] +20Src[BpS2] - 6Src[BpS3] - Src[BpS5];
769 :			CLIP_STORE(Dst[BpS* 1],C);
770 :			C = 16-RND + 2Src[BpS0] - 6(Src[BpS1]+Src[BpS4]) +20(Src[BpS2]+Src[BpS3]) + 3Src[BpS5] - Src[BpS*6];
771 :			CLIP_STORE(Dst[BpS* 2],C);
772 :			C = 16-RND - (Src[BpS0]+Src[BpS7 ]) + 3(Src[BpS 1]+Src[BpS* 6])-6(Src[BpS 2]+Src[BpS* 5]) + 20(Src[BpS 3]+Src[BpS* 4]);
773 :			CLIP_STORE(Dst[BpS* 3],C);
774 :			C = 16-RND - (Src[BpS1]+Src[BpS8 ]) + 3(Src[BpS 2]+Src[BpS* 7])-6(Src[BpS 3]+Src[BpS* 6]) + 20(Src[BpS 4]+Src[BpS* 5]);
775 :			CLIP_STORE(Dst[BpS* 4],C);
776 :			C = 16-RND - (Src[BpS2]+Src[BpS9 ]) + 3(Src[BpS 3]+Src[BpS* 8])-6(Src[BpS 4]+Src[BpS* 7]) + 20(Src[BpS 5]+Src[BpS* 6]);
777 :			CLIP_STORE(Dst[BpS* 5],C);
778 :			C = 16-RND - (Src[BpS3]+Src[BpS10]) + 3(Src[BpS 4]+Src[BpS* 9])-6(Src[BpS 5]+Src[BpS* 8]) + 20(Src[BpS 6]+Src[BpS* 7]);
779 :			CLIP_STORE(Dst[BpS* 6],C);
780 :			C = 16-RND - (Src[BpS4]+Src[BpS11]) + 3(Src[BpS 5]+Src[BpS10])-6(Src[BpS* 6]+Src[BpS* 9]) + 20(Src[BpS 7]+Src[BpS* 8]);
781 :			CLIP_STORE(Dst[BpS* 7],C);
782 :			C = 16-RND - (Src[BpS5]+Src[BpS12]) + 3(Src[BpS 6]+Src[BpS11])-6(Src[BpS* 7]+Src[BpS10]) + 20(Src[BpS* 8]+Src[BpS* 9]);
783 :			CLIP_STORE(Dst[BpS* 8],C);
784 :			C = 16-RND - (Src[BpS6]+Src[BpS13]) + 3(Src[BpS 7]+Src[BpS12])-6(Src[BpS* 8]+Src[BpS11]) + 20(Src[BpS* 9]+Src[BpS*10]);
785 :			CLIP_STORE(Dst[BpS* 9],C);
786 :			C = 16-RND - (Src[BpS7]+Src[BpS14]) + 3(Src[BpS 8]+Src[BpS13])-6(Src[BpS* 9]+Src[BpS12]) + 20(Src[BpS10]+Src[BpS11]);
787 :			CLIP_STORE(Dst[BpS*10],C);
788 :			C = 16-RND - (Src[BpS8]+Src[BpS15]) + 3(Src[BpS 9]+Src[BpS14])-6(Src[BpS10]+Src[BpS13]) + 20(Src[BpS11]+Src[BpS*12]);
789 :			CLIP_STORE(Dst[BpS*11],C);
790 :			C = 16-RND - (Src[BpS9]+Src[BpS16]) + 3(Src[BpS10]+Src[BpS15])-6(Src[BpS11]+Src[BpS14]) + 20(Src[BpS12]+Src[BpS*13]);
791 :			CLIP_STORE(Dst[BpS*12],C);
792 :			C = 16-RND - Src[BpS10] +3Src[BpS11] -6(Src[BpS12]+Src[BpS15]) + 20(Src[BpS13]+Src[BpS14]) +2Src[BpS*16];
793 :			CLIP_STORE(Dst[BpS*13],C);
794 :			C = 16-RND - Src[BpS11] +3(Src[BpS12]-Src[BpS16]) -6Src[BpS13] + 20Src[BpS14] + 19Src[BpS15];
795 :			CLIP_STORE(Dst[BpS*14],C);
796 :			C = 16-RND - Src[BpS12] +3Src[BpS13] -7Src[BpS14] + 23Src[BpS15] + 14Src[BpS*16];
797 :			CLIP_STORE(Dst[BpS*15],C);
798 :			Src += 1;
799 :			Dst += 1;
800 :			}
801 :			#else
802 :			while(H-->0) {
803 :			int C;
804 :			C = 16-RND +14Src[BpS0] +23Src[BpS1] - 7Src[BpS2] + 3Src[BpS3] - Src[BpS*4];
805 :			CLIP_STORE(Dst[BpS*0],C);
806 :			C = 16-RND - 3(Src[BpS0]-Src[BpS4]) +19Src[BpS1] +20Src[BpS2] - 6Src[BpS3] - Src[BpS5];
807 :			CLIP_STORE(Dst[BpS*1],C);
808 :			C = 16-RND + 2Src[BpS0] - 6(Src[BpS1]+Src[BpS4]) +20(Src[BpS2]+Src[BpS3]) + 3Src[BpS5] - Src[BpS*6];
809 :			CLIP_STORE(Dst[BpS*2],C);
810 :			C = 16-RND - (Src[BpS0]+Src[BpS7]) + 3(Src[BpS1]+Src[BpS6])-6(Src[BpS2]+Src[BpS5]) + 20(Src[BpS3]+Src[BpS*4]);
811 :			CLIP_STORE(Dst[BpS*3],C);
812 :			C = 16-RND - (Src[BpS1]+Src[BpS8]) + 3(Src[BpS2]+Src[BpS7])-6(Src[BpS3]+Src[BpS6]) + 20(Src[BpS4]+Src[BpS*5]);
813 :			CLIP_STORE(Dst[BpS*4],C);
814 :			C = 16-RND - Src[BpS2] +3Src[BpS3] -6(Src[BpS4]+Src[BpS7]) + 20(Src[BpS5]+Src[BpS6]) +2Src[BpS*8];
815 :			CLIP_STORE(Dst[BpS*5],C);
816 :			C = 16-RND - Src[BpS3] +3(Src[BpS4]-Src[BpS8]) -6Src[BpS5] + 20Src[BpS6] + 19Src[BpS7];
817 :			CLIP_STORE(Dst[BpS*6],C);
818 :			C = 16-RND - Src[BpS4] +3Src[BpS5] -7Src[BpS6] + 23Src[BpS7] + 14Src[BpS*8];
819 :			CLIP_STORE(Dst[BpS*7],C);
820 :			Src += 1;
821 :			Dst += 1;
822 :			}
823 :			#endif
824 :			}
825 :			#undef CLIP_STORE
826 :
827 :			#define CLIP_STORE(i,C) \
828 :			if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
829 :			C = (C+Src[BpS*i]+1-RND) >> 1; \
830 :			STORE(Dst[BpS*i], C)
831 :
832 :			static void
833 :			FUNC_VA(uint8_t Dst, const uint8_t Src, int32_t H, int32_t BpS, int32_t RND)
834 :			{
835 :			#if (SIZE==16)
836 :			while(H-->0) {
837 :			int C;
838 :			C = 16-RND +14Src[BpS0] +23Src[BpS1] - 7Src[BpS2] + 3Src[BpS3] - Src[BpS*4];
839 :			CLIP_STORE(0,C);
840 :			C = 16-RND - 3(Src[BpS0]-Src[BpS4]) +19Src[BpS1] +20Src[BpS2] - 6Src[BpS3] - Src[BpS5];
841 :			CLIP_STORE( 1,C);
842 :			C = 16-RND + 2Src[BpS0] - 6(Src[BpS1]+Src[BpS4]) +20(Src[BpS2]+Src[BpS3]) + 3Src[BpS5] - Src[BpS*6];
843 :			CLIP_STORE( 2,C);
844 :			C = 16-RND - (Src[BpS0]+Src[BpS7 ]) + 3(Src[BpS 1]+Src[BpS* 6])-6(Src[BpS 2]+Src[BpS* 5]) + 20(Src[BpS 3]+Src[BpS* 4]);
845 :			CLIP_STORE( 3,C);
846 :			C = 16-RND - (Src[BpS1]+Src[BpS8 ]) + 3(Src[BpS 2]+Src[BpS* 7])-6(Src[BpS 3]+Src[BpS* 6]) + 20(Src[BpS 4]+Src[BpS* 5]);
847 :			CLIP_STORE( 4,C);
848 :			C = 16-RND - (Src[BpS2]+Src[BpS9 ]) + 3(Src[BpS 3]+Src[BpS* 8])-6(Src[BpS 4]+Src[BpS* 7]) + 20(Src[BpS 5]+Src[BpS* 6]);
849 :			CLIP_STORE( 5,C);
850 :			C = 16-RND - (Src[BpS3]+Src[BpS10]) + 3(Src[BpS 4]+Src[BpS* 9])-6(Src[BpS 5]+Src[BpS* 8]) + 20(Src[BpS 6]+Src[BpS* 7]);
851 :			CLIP_STORE( 6,C);
852 :			C = 16-RND - (Src[BpS4]+Src[BpS11]) + 3(Src[BpS 5]+Src[BpS10])-6(Src[BpS* 6]+Src[BpS* 9]) + 20(Src[BpS 7]+Src[BpS* 8]);
853 :			CLIP_STORE( 7,C);
854 :			C = 16-RND - (Src[BpS5]+Src[BpS12]) + 3(Src[BpS 6]+Src[BpS11])-6(Src[BpS* 7]+Src[BpS10]) + 20(Src[BpS* 8]+Src[BpS* 9]);
855 :			CLIP_STORE( 8,C);
856 :			C = 16-RND - (Src[BpS6]+Src[BpS13]) + 3(Src[BpS 7]+Src[BpS12])-6(Src[BpS* 8]+Src[BpS11]) + 20(Src[BpS* 9]+Src[BpS*10]);
857 :			CLIP_STORE( 9,C);
858 :			C = 16-RND - (Src[BpS7]+Src[BpS14]) + 3(Src[BpS 8]+Src[BpS13])-6(Src[BpS* 9]+Src[BpS12]) + 20(Src[BpS10]+Src[BpS11]);
859 :			CLIP_STORE(10,C);
860 :			C = 16-RND - (Src[BpS8]+Src[BpS15]) + 3(Src[BpS 9]+Src[BpS14])-6(Src[BpS10]+Src[BpS13]) + 20(Src[BpS11]+Src[BpS*12]);
861 :			CLIP_STORE(11,C);
862 :			C = 16-RND - (Src[BpS9]+Src[BpS16]) + 3(Src[BpS10]+Src[BpS15])-6(Src[BpS11]+Src[BpS14]) + 20(Src[BpS12]+Src[BpS*13]);
863 :			CLIP_STORE(12,C);
864 :			C = 16-RND - Src[BpS10] +3Src[BpS11] -6(Src[BpS12]+Src[BpS15]) + 20(Src[BpS13]+Src[BpS14]) +2Src[BpS*16];
865 :			CLIP_STORE(13,C);
866 :			C = 16-RND - Src[BpS11] +3(Src[BpS12]-Src[BpS16]) -6Src[BpS13] + 20Src[BpS14] + 19Src[BpS15];
867 :			CLIP_STORE(14,C);
868 :			C = 16-RND - Src[BpS12] +3Src[BpS13] -7Src[BpS14] + 23Src[BpS15] + 14Src[BpS*16];
869 :			CLIP_STORE(15,C);
870 :			Src += 1;
871 :			Dst += 1;
872 :			}
873 :			#else
874 :			while(H-->0) {
875 :			int C;
876 :			C = 16-RND +14Src[BpS0] +23Src[BpS1] - 7Src[BpS2] + 3Src[BpS3] - Src[BpS*4];
877 :			CLIP_STORE(0,C);
878 :			C = 16-RND - 3(Src[BpS0]-Src[BpS4]) +19Src[BpS1] +20Src[BpS2] - 6Src[BpS3] - Src[BpS5];
879 :			CLIP_STORE(1,C);
880 :			C = 16-RND + 2Src[BpS0] - 6(Src[BpS1]+Src[BpS4]) +20(Src[BpS2]+Src[BpS3]) + 3Src[BpS5] - Src[BpS*6];
881 :			CLIP_STORE(2,C);
882 :			C = 16-RND - (Src[BpS0]+Src[BpS7]) + 3(Src[BpS1]+Src[BpS6])-6(Src[BpS2]+Src[BpS5]) + 20(Src[BpS3]+Src[BpS*4]);
883 :			CLIP_STORE(3,C);
884 :			C = 16-RND - (Src[BpS1]+Src[BpS8]) + 3(Src[BpS2]+Src[BpS7])-6(Src[BpS3]+Src[BpS6]) + 20(Src[BpS4]+Src[BpS*5]);
885 :			CLIP_STORE(4,C);
886 :			C = 16-RND - Src[BpS2] +3Src[BpS3] -6(Src[BpS4]+Src[BpS7]) + 20(Src[BpS5]+Src[BpS6]) +2Src[BpS*8];
887 :			CLIP_STORE(5,C);
888 :			C = 16-RND - Src[BpS3] +3(Src[BpS4]-Src[BpS8]) -6Src[BpS5] + 20Src[BpS6] + 19Src[BpS7];
889 :			CLIP_STORE(6,C);
890 :			C = 16-RND - Src[BpS4] +3Src[BpS5] -7Src[BpS6] + 23Src[BpS7] + 14Src[BpS*8];
891 :			CLIP_STORE(7,C);
892 :			Src += 1;
893 :			Dst += 1;
894 :			}
895 :			#endif
896 :			}
897 :			#undef CLIP_STORE
898 :
899 :			#define CLIP_STORE(i,C) \
900 :			if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
901 :			C = (C+Src[BpS*i+BpS]+1-RND) >> 1; \
902 :			STORE(Dst[BpS*i], C)
903 :
904 :			static void
905 :			FUNC_VA_UP(uint8_t Dst, const uint8_t Src, int32_t H, int32_t BpS, int32_t RND)
906 :			{
907 :			#if (SIZE==16)
908 :			while(H-->0) {
909 :			int C;
910 :			C = 16-RND +14Src[BpS0] +23Src[BpS1] - 7Src[BpS2] + 3Src[BpS3] - Src[BpS*4];
911 :			CLIP_STORE(0,C);
912 :			C = 16-RND - 3(Src[BpS0]-Src[BpS4]) +19Src[BpS1] +20Src[BpS2] - 6Src[BpS3] - Src[BpS5];
913 :			CLIP_STORE( 1,C);
914 :			C = 16-RND + 2Src[BpS0] - 6(Src[BpS1]+Src[BpS4]) +20(Src[BpS2]+Src[BpS3]) + 3Src[BpS5] - Src[BpS*6];
915 :			CLIP_STORE( 2,C);
916 :			C = 16-RND - (Src[BpS0]+Src[BpS7 ]) + 3(Src[BpS 1]+Src[BpS* 6])-6(Src[BpS 2]+Src[BpS* 5]) + 20(Src[BpS 3]+Src[BpS* 4]);
917 :			CLIP_STORE( 3,C);
918 :			C = 16-RND - (Src[BpS1]+Src[BpS8 ]) + 3(Src[BpS 2]+Src[BpS* 7])-6(Src[BpS 3]+Src[BpS* 6]) + 20(Src[BpS 4]+Src[BpS* 5]);
919 :			CLIP_STORE( 4,C);
920 :			C = 16-RND - (Src[BpS2]+Src[BpS9 ]) + 3(Src[BpS 3]+Src[BpS* 8])-6(Src[BpS 4]+Src[BpS* 7]) + 20(Src[BpS 5]+Src[BpS* 6]);
921 :			CLIP_STORE( 5,C);
922 :			C = 16-RND - (Src[BpS3]+Src[BpS10]) + 3(Src[BpS 4]+Src[BpS* 9])-6(Src[BpS 5]+Src[BpS* 8]) + 20(Src[BpS 6]+Src[BpS* 7]);
923 :			CLIP_STORE( 6,C);
924 :			C = 16-RND - (Src[BpS4]+Src[BpS11]) + 3(Src[BpS 5]+Src[BpS10])-6(Src[BpS* 6]+Src[BpS* 9]) + 20(Src[BpS 7]+Src[BpS* 8]);
925 :			CLIP_STORE( 7,C);
926 :			C = 16-RND - (Src[BpS5]+Src[BpS12]) + 3(Src[BpS 6]+Src[BpS11])-6(Src[BpS* 7]+Src[BpS10]) + 20(Src[BpS* 8]+Src[BpS* 9]);
927 :			CLIP_STORE( 8,C);
928 :			C = 16-RND - (Src[BpS6]+Src[BpS13]) + 3(Src[BpS 7]+Src[BpS12])-6(Src[BpS* 8]+Src[BpS11]) + 20(Src[BpS* 9]+Src[BpS*10]);
929 :			CLIP_STORE( 9,C);
930 :			C = 16-RND - (Src[BpS7]+Src[BpS14]) + 3(Src[BpS 8]+Src[BpS13])-6(Src[BpS* 9]+Src[BpS12]) + 20(Src[BpS10]+Src[BpS11]);
931 :			CLIP_STORE(10,C);
932 :			C = 16-RND - (Src[BpS8]+Src[BpS15]) + 3(Src[BpS 9]+Src[BpS14])-6(Src[BpS10]+Src[BpS13]) + 20(Src[BpS11]+Src[BpS*12]);
933 :			CLIP_STORE(11,C);
934 :			C = 16-RND - (Src[BpS9]+Src[BpS16]) + 3(Src[BpS10]+Src[BpS15])-6(Src[BpS11]+Src[BpS14]) + 20(Src[BpS12]+Src[BpS*13]);
935 :			CLIP_STORE(12,C);
936 :			C = 16-RND - Src[BpS10] +3Src[BpS11] -6(Src[BpS12]+Src[BpS15]) + 20(Src[BpS13]+Src[BpS14]) +2Src[BpS*16];
937 :			CLIP_STORE(13,C);
938 :			C = 16-RND - Src[BpS11] +3(Src[BpS12]-Src[BpS16]) -6Src[BpS13] + 20Src[BpS14] + 19Src[BpS15];
939 :			CLIP_STORE(14,C);
940 :			C = 16-RND - Src[BpS12] +3Src[BpS13] -7Src[BpS14] + 23Src[BpS15] + 14Src[BpS*16];
941 :			CLIP_STORE(15,C);
942 :			Src += 1;
943 :			Dst += 1;
944 :			}
945 :			#else
946 :			while(H-->0) {
947 :			int C;
948 :			C = 16-RND +14Src[BpS0] +23Src[BpS1] - 7Src[BpS2] + 3Src[BpS3] - Src[BpS*4];
949 :			CLIP_STORE(0,C);
950 :			C = 16-RND - 3(Src[BpS0]-Src[BpS4]) +19Src[BpS1] +20Src[BpS2] - 6Src[BpS3] - Src[BpS5];
951 :			CLIP_STORE(1,C);
952 :			C = 16-RND + 2Src[BpS0] - 6(Src[BpS1]+Src[BpS4]) +20(Src[BpS2]+Src[BpS3]) + 3Src[BpS5] - Src[BpS*6];
953 :			CLIP_STORE(2,C);
954 :			C = 16-RND - (Src[BpS0]+Src[BpS7]) + 3(Src[BpS1]+Src[BpS6])-6(Src[BpS2]+Src[BpS5]) + 20(Src[BpS3]+Src[BpS*4]);
955 :			CLIP_STORE(3,C);
956 :			C = 16-RND - (Src[BpS1]+Src[BpS8]) + 3(Src[BpS2]+Src[BpS7])-6(Src[BpS3]+Src[BpS6]) + 20(Src[BpS4]+Src[BpS*5]);
957 :			CLIP_STORE(4,C);
958 :			C = 16-RND - Src[BpS2] +3Src[BpS3] -6(Src[BpS4]+Src[BpS7]) + 20(Src[BpS5]+Src[BpS6]) +2Src[BpS*8];
959 :			CLIP_STORE(5,C);
960 :			C = 16-RND - Src[BpS3] +3(Src[BpS4]-Src[BpS8]) -6Src[BpS5] + 20Src[BpS6] + 19Src[BpS7];
961 :			CLIP_STORE(6,C);
962 :			C = 16-RND - Src[BpS4] +3Src[BpS5] -7Src[BpS6] + 23Src[BpS7] + 14Src[BpS*8];
963 :			CLIP_STORE(7,C);
964 :			Src += 1;
965 :			Dst += 1;
966 :			}
967 :			#endif
968 :			}
969 :			#undef CLIP_STORE
970 :
971 :			#undef STORE
972 :			#undef FUNC_H
973 :			#undef FUNC_V
974 :			#undef FUNC_HA
975 :			#undef FUNC_VA
976 :			#undef FUNC_HA_UP
977 :			#undef FUNC_VA_UP
978 :
979 :
980 :			#endif /* XVID_AUTO_INCLUDE && !defined(REF) */

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4