19 |
* along with this program ; if not, write to the Free Software |
* along with this program ; if not, write to the Free Software |
20 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
21 |
* |
* |
22 |
* $Id: qpel.c,v 1.2 2004-03-22 22:36:23 edgomez Exp $ |
* $Id: qpel.c,v 1.3 2004-08-10 21:58:55 edgomez Exp $ |
23 |
* |
* |
24 |
****************************************************************************/ |
****************************************************************************/ |
25 |
|
|
67 |
****************************************************************************/ |
****************************************************************************/ |
68 |
|
|
69 |
#define XVID_AUTO_INCLUDE |
#define XVID_AUTO_INCLUDE |
70 |
|
/* First auto include this file to generate reference code for SIMD versions |
71 |
|
* This set of functions are good for educational purpose, because they're |
72 |
|
* straightforward to understand, use loops and so on... But obviously they |
73 |
|
* sux when it comes to speed */ |
74 |
|
#define REFERENCE_CODE |
75 |
|
|
76 |
/* 16x? filters */ |
/* 16x? filters */ |
77 |
|
|
79 |
#define TABLE FIR_Tab_16 |
#define TABLE FIR_Tab_16 |
80 |
|
|
81 |
#define STORE(d,s) (d) = (s) |
#define STORE(d,s) (d) = (s) |
82 |
|
#define FUNC_H H_Pass_16_C_ref |
83 |
|
#define FUNC_V V_Pass_16_C_ref |
84 |
|
#define FUNC_HA H_Pass_Avrg_16_C_ref |
85 |
|
#define FUNC_VA V_Pass_Avrg_16_C_ref |
86 |
|
#define FUNC_HA_UP H_Pass_Avrg_Up_16_C_ref |
87 |
|
#define FUNC_VA_UP V_Pass_Avrg_Up_16_C_ref |
88 |
|
|
89 |
|
#include "qpel.c" /* self-include ourself */ |
90 |
|
|
91 |
|
/* note: B-frame always uses Rnd=0... */ |
92 |
|
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
93 |
|
#define FUNC_H H_Pass_16_Add_C_ref |
94 |
|
#define FUNC_V V_Pass_16_Add_C_ref |
95 |
|
#define FUNC_HA H_Pass_Avrg_16_Add_C_ref |
96 |
|
#define FUNC_VA V_Pass_Avrg_16_Add_C_ref |
97 |
|
#define FUNC_HA_UP H_Pass_Avrg_Up_16_Add_C_ref |
98 |
|
#define FUNC_VA_UP V_Pass_Avrg_Up_16_Add_C_ref |
99 |
|
|
100 |
|
#include "qpel.c" /* self-include ourself */ |
101 |
|
|
102 |
|
#undef SIZE |
103 |
|
#undef TABLE |
104 |
|
|
105 |
|
/* 8x? filters */ |
106 |
|
|
107 |
|
#define SIZE 8 |
108 |
|
#define TABLE FIR_Tab_8 |
109 |
|
|
110 |
|
#define STORE(d,s) (d) = (s) |
111 |
|
#define FUNC_H H_Pass_8_C_ref |
112 |
|
#define FUNC_V V_Pass_8_C_ref |
113 |
|
#define FUNC_HA H_Pass_Avrg_8_C_ref |
114 |
|
#define FUNC_VA V_Pass_Avrg_8_C_ref |
115 |
|
#define FUNC_HA_UP H_Pass_Avrg_Up_8_C_ref |
116 |
|
#define FUNC_VA_UP V_Pass_Avrg_Up_8_C_ref |
117 |
|
|
118 |
|
#include "qpel.c" /* self-include ourself */ |
119 |
|
|
120 |
|
/* note: B-frame always uses Rnd=0... */ |
121 |
|
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
122 |
|
#define FUNC_H H_Pass_8_Add_C_ref |
123 |
|
#define FUNC_V V_Pass_8_Add_C_ref |
124 |
|
#define FUNC_HA H_Pass_Avrg_8_Add_C_ref |
125 |
|
#define FUNC_VA V_Pass_Avrg_8_Add_C_ref |
126 |
|
#define FUNC_HA_UP H_Pass_Avrg_Up_8_Add_C_ref |
127 |
|
#define FUNC_VA_UP V_Pass_Avrg_Up_8_Add_C_ref |
128 |
|
|
129 |
|
#include "qpel.c" /* self-include ourself */ |
130 |
|
|
131 |
|
#undef SIZE |
132 |
|
#undef TABLE |
133 |
|
|
134 |
|
/* Then we define more optimized C version where loops are unrolled, where |
135 |
|
* FIR coeffcients are not read from memory but are hardcoded in instructions |
136 |
|
* They should be faster */ |
137 |
|
#undef REFERENCE_CODE |
138 |
|
|
139 |
|
/* 16x? filters */ |
140 |
|
|
141 |
|
#define SIZE 16 |
142 |
|
|
143 |
|
#define STORE(d,s) (d) = (s) |
144 |
#define FUNC_H H_Pass_16_C |
#define FUNC_H H_Pass_16_C |
145 |
#define FUNC_V V_Pass_16_C |
#define FUNC_V V_Pass_16_C |
146 |
#define FUNC_HA H_Pass_Avrg_16_C |
#define FUNC_HA H_Pass_Avrg_16_C |
148 |
#define FUNC_HA_UP H_Pass_Avrg_Up_16_C |
#define FUNC_HA_UP H_Pass_Avrg_Up_16_C |
149 |
#define FUNC_VA_UP V_Pass_Avrg_Up_16_C |
#define FUNC_VA_UP V_Pass_Avrg_Up_16_C |
150 |
|
|
151 |
#include __FILE__ /* self-include ourself */ |
#include "qpel.c" /* self-include ourself */ |
152 |
|
|
153 |
/* note: B-frame always uses Rnd=0... */ |
/* note: B-frame always uses Rnd=0... */ |
154 |
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
159 |
#define FUNC_HA_UP H_Pass_Avrg_Up_16_Add_C |
#define FUNC_HA_UP H_Pass_Avrg_Up_16_Add_C |
160 |
#define FUNC_VA_UP V_Pass_Avrg_Up_16_Add_C |
#define FUNC_VA_UP V_Pass_Avrg_Up_16_Add_C |
161 |
|
|
162 |
#include __FILE__ /* self-include ourself */ |
#include "qpel.c" /* self-include ourself */ |
163 |
|
|
164 |
#undef SIZE |
#undef SIZE |
165 |
#undef TABLE |
#undef TABLE |
177 |
#define FUNC_HA_UP H_Pass_Avrg_Up_8_C |
#define FUNC_HA_UP H_Pass_Avrg_Up_8_C |
178 |
#define FUNC_VA_UP V_Pass_Avrg_Up_8_C |
#define FUNC_VA_UP V_Pass_Avrg_Up_8_C |
179 |
|
|
180 |
#include __FILE__ /* self-include ourself */ |
#include "qpel.c" /* self-include ourself */ |
181 |
|
|
182 |
/* note: B-frame always uses Rnd=0... */ |
/* note: B-frame always uses Rnd=0... */ |
183 |
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
188 |
#define FUNC_HA_UP H_Pass_Avrg_Up_8_Add_C |
#define FUNC_HA_UP H_Pass_Avrg_Up_8_Add_C |
189 |
#define FUNC_VA_UP V_Pass_Avrg_Up_8_Add_C |
#define FUNC_VA_UP V_Pass_Avrg_Up_8_Add_C |
190 |
|
|
191 |
#include __FILE__ /* self-include ourself */ |
#include "qpel.c" /* self-include ourself */ |
192 |
|
|
193 |
#undef SIZE |
#undef SIZE |
194 |
#undef TABLE |
#undef TABLE |
|
|
|
195 |
#undef XVID_AUTO_INCLUDE |
#undef XVID_AUTO_INCLUDE |
196 |
|
|
197 |
/* general-purpose hooks |
/* Global scope hooks |
|
* TODO: embed in enc/dec structure? |
|
198 |
****************************************************************************/ |
****************************************************************************/ |
199 |
|
|
200 |
XVID_QP_FUNCS *xvid_QP_Funcs = 0; |
XVID_QP_FUNCS *xvid_QP_Funcs = 0; |
201 |
XVID_QP_FUNCS *xvid_QP_Add_Funcs = 0; |
XVID_QP_FUNCS *xvid_QP_Add_Funcs = 0; |
202 |
|
|
203 |
/* plain C impl. declaration |
/* Reference plain C impl. declaration |
204 |
* TODO: should be declared elsewhere? |
****************************************************************************/ |
205 |
|
|
206 |
|
XVID_QP_FUNCS xvid_QP_Funcs_C_ref = { |
207 |
|
H_Pass_16_C_ref, H_Pass_Avrg_16_C_ref, H_Pass_Avrg_Up_16_C_ref, |
208 |
|
V_Pass_16_C_ref, V_Pass_Avrg_16_C_ref, V_Pass_Avrg_Up_16_C_ref, |
209 |
|
|
210 |
|
H_Pass_8_C_ref, H_Pass_Avrg_8_C_ref, H_Pass_Avrg_Up_8_C_ref, |
211 |
|
V_Pass_8_C_ref, V_Pass_Avrg_8_C_ref, V_Pass_Avrg_Up_8_C_ref |
212 |
|
}; |
213 |
|
|
214 |
|
XVID_QP_FUNCS xvid_QP_Add_Funcs_C_ref = { |
215 |
|
H_Pass_16_Add_C_ref, H_Pass_Avrg_16_Add_C_ref, H_Pass_Avrg_Up_16_Add_C_ref, |
216 |
|
V_Pass_16_Add_C_ref, V_Pass_Avrg_16_Add_C_ref, V_Pass_Avrg_Up_16_Add_C_ref, |
217 |
|
|
218 |
|
H_Pass_8_Add_C_ref, H_Pass_Avrg_8_Add_C_ref, H_Pass_Avrg_Up_8_Add_C_ref, |
219 |
|
V_Pass_8_Add_C_ref, V_Pass_Avrg_8_Add_C_ref, V_Pass_Avrg_Up_8_Add_C_ref |
220 |
|
}; |
221 |
|
|
222 |
|
/* Plain C impl. declaration (faster than ref one) |
223 |
****************************************************************************/ |
****************************************************************************/ |
224 |
|
|
225 |
XVID_QP_FUNCS xvid_QP_Funcs_C = { |
XVID_QP_FUNCS xvid_QP_Funcs_C = { |
239 |
}; |
}; |
240 |
|
|
241 |
/* mmx impl. declaration (see. qpel_mmx.asm |
/* mmx impl. declaration (see. qpel_mmx.asm |
|
* TODO: should be declared elsewhere? |
|
242 |
****************************************************************************/ |
****************************************************************************/ |
243 |
|
|
244 |
#ifdef ARCH_IS_IA32 |
#ifdef ARCH_IS_IA32 |
256 |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_mmx); |
257 |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_mmx); |
258 |
|
|
|
XVID_QP_FUNCS xvid_QP_Funcs_mmx = { |
|
|
xvid_H_Pass_16_mmx, xvid_H_Pass_Avrg_16_mmx, xvid_H_Pass_Avrg_Up_16_mmx, |
|
|
xvid_V_Pass_16_mmx, xvid_V_Pass_Avrg_16_mmx, xvid_V_Pass_Avrg_Up_16_mmx, |
|
|
|
|
|
xvid_H_Pass_8_mmx, xvid_H_Pass_Avrg_8_mmx, xvid_H_Pass_Avrg_Up_8_mmx, |
|
|
xvid_V_Pass_8_mmx, xvid_V_Pass_Avrg_8_mmx, xvid_V_Pass_Avrg_Up_8_mmx |
|
|
}; |
|
|
|
|
259 |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Add_16_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Add_16_mmx); |
260 |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Add_16_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Add_16_mmx); |
261 |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_Add_16_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_Add_16_mmx); |
270 |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_Add_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_Add_mmx); |
271 |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_Add_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_Add_mmx); |
272 |
|
|
273 |
|
XVID_QP_FUNCS xvid_QP_Funcs_mmx = { |
274 |
|
xvid_H_Pass_16_mmx, xvid_H_Pass_Avrg_16_mmx, xvid_H_Pass_Avrg_Up_16_mmx, |
275 |
|
xvid_V_Pass_16_mmx, xvid_V_Pass_Avrg_16_mmx, xvid_V_Pass_Avrg_Up_16_mmx, |
276 |
|
|
277 |
|
xvid_H_Pass_8_mmx, xvid_H_Pass_Avrg_8_mmx, xvid_H_Pass_Avrg_Up_8_mmx, |
278 |
|
xvid_V_Pass_8_mmx, xvid_V_Pass_Avrg_8_mmx, xvid_V_Pass_Avrg_Up_8_mmx |
279 |
|
}; |
280 |
|
|
281 |
XVID_QP_FUNCS xvid_QP_Add_Funcs_mmx = { |
XVID_QP_FUNCS xvid_QP_Add_Funcs_mmx = { |
282 |
xvid_H_Pass_Add_16_mmx, xvid_H_Pass_Avrg_Add_16_mmx, xvid_H_Pass_Avrg_Up_Add_16_mmx, |
xvid_H_Pass_Add_16_mmx, xvid_H_Pass_Avrg_Add_16_mmx, xvid_H_Pass_Avrg_Up_Add_16_mmx, |
283 |
xvid_V_Pass_Add_16_mmx, xvid_V_Pass_Avrg_Add_16_mmx, xvid_V_Pass_Avrg_Up_Add_16_mmx, |
xvid_V_Pass_Add_16_mmx, xvid_V_Pass_Avrg_Add_16_mmx, xvid_V_Pass_Avrg_Up_Add_16_mmx, |
370 |
|
|
371 |
#endif /* !XVID_AUTO_INCLUDE */ |
#endif /* !XVID_AUTO_INCLUDE */ |
372 |
|
|
373 |
|
#if defined(XVID_AUTO_INCLUDE) && defined(REFERENCE_CODE) |
374 |
|
|
375 |
/***************************************************************************** |
/***************************************************************************** |
376 |
* "reference" filters impl. in plain C |
* "reference" filters impl. in plain C |
377 |
****************************************************************************/ |
****************************************************************************/ |
378 |
|
|
|
#ifdef XVID_AUTO_INCLUDE |
|
|
|
|
379 |
static |
static |
380 |
void FUNC_H(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t Rnd) |
void FUNC_H(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t Rnd) |
381 |
{ |
{ |
527 |
#undef FUNC_HA_UP |
#undef FUNC_HA_UP |
528 |
#undef FUNC_VA_UP |
#undef FUNC_VA_UP |
529 |
|
|
530 |
#endif /* XVID_AUTO_INCLUDE */ |
#elif defined(XVID_AUTO_INCLUDE) && !defined(REFERENCE_CODE) |
531 |
|
|
532 |
|
/***************************************************************************** |
533 |
|
* "fast" filters impl. in plain C |
534 |
|
****************************************************************************/ |
535 |
|
|
536 |
|
#define CLIP_STORE(D,C) \ |
537 |
|
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \ |
538 |
|
STORE(D, C) |
539 |
|
|
540 |
|
static void |
541 |
|
FUNC_H(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND) |
542 |
|
{ |
543 |
|
#if (SIZE==16) |
544 |
|
while(H-->0) { |
545 |
|
int C; |
546 |
|
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; |
547 |
|
CLIP_STORE(Dst[ 0],C); |
548 |
|
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; |
549 |
|
CLIP_STORE(Dst[ 1],C); |
550 |
|
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; |
551 |
|
CLIP_STORE(Dst[ 2],C); |
552 |
|
C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]); |
553 |
|
CLIP_STORE(Dst[ 3],C); |
554 |
|
C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]); |
555 |
|
CLIP_STORE(Dst[ 4],C); |
556 |
|
C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]); |
557 |
|
CLIP_STORE(Dst[ 5],C); |
558 |
|
C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]); |
559 |
|
CLIP_STORE(Dst[ 6],C); |
560 |
|
C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]); |
561 |
|
CLIP_STORE(Dst[ 7],C); |
562 |
|
C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]); |
563 |
|
CLIP_STORE(Dst[ 8],C); |
564 |
|
C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]); |
565 |
|
CLIP_STORE(Dst[ 9],C); |
566 |
|
C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]); |
567 |
|
CLIP_STORE(Dst[10],C); |
568 |
|
C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]); |
569 |
|
CLIP_STORE(Dst[11],C); |
570 |
|
C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]); |
571 |
|
CLIP_STORE(Dst[12],C); |
572 |
|
C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16]; |
573 |
|
CLIP_STORE(Dst[13],C); |
574 |
|
C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15]; |
575 |
|
CLIP_STORE(Dst[14],C); |
576 |
|
C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16]; |
577 |
|
CLIP_STORE(Dst[15],C); |
578 |
|
Src += BpS; |
579 |
|
Dst += BpS; |
580 |
|
} |
581 |
|
#else |
582 |
|
while(H-->0) { |
583 |
|
int C; |
584 |
|
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; |
585 |
|
CLIP_STORE(Dst[0],C); |
586 |
|
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; |
587 |
|
CLIP_STORE(Dst[1],C); |
588 |
|
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; |
589 |
|
CLIP_STORE(Dst[2],C); |
590 |
|
C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]); |
591 |
|
CLIP_STORE(Dst[3],C); |
592 |
|
C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]); |
593 |
|
CLIP_STORE(Dst[4],C); |
594 |
|
C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8]; |
595 |
|
CLIP_STORE(Dst[5],C); |
596 |
|
C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7]; |
597 |
|
CLIP_STORE(Dst[6],C); |
598 |
|
C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8]; |
599 |
|
CLIP_STORE(Dst[7],C); |
600 |
|
Src += BpS; |
601 |
|
Dst += BpS; |
602 |
|
} |
603 |
|
#endif |
604 |
|
} |
605 |
|
#undef CLIP_STORE |
606 |
|
|
607 |
|
#define CLIP_STORE(i,C) \ |
608 |
|
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \ |
609 |
|
C = (C+Src[i]+1-RND) >> 1; \ |
610 |
|
STORE(Dst[i], C) |
611 |
|
|
612 |
|
static void |
613 |
|
FUNC_HA(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND) |
614 |
|
{ |
615 |
|
#if (SIZE==16) |
616 |
|
while(H-->0) { |
617 |
|
int C; |
618 |
|
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; |
619 |
|
CLIP_STORE(0,C); |
620 |
|
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; |
621 |
|
CLIP_STORE( 1,C); |
622 |
|
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; |
623 |
|
CLIP_STORE( 2,C); |
624 |
|
C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]); |
625 |
|
CLIP_STORE( 3,C); |
626 |
|
C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]); |
627 |
|
CLIP_STORE( 4,C); |
628 |
|
C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]); |
629 |
|
CLIP_STORE( 5,C); |
630 |
|
C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]); |
631 |
|
CLIP_STORE( 6,C); |
632 |
|
C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]); |
633 |
|
CLIP_STORE( 7,C); |
634 |
|
C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]); |
635 |
|
CLIP_STORE( 8,C); |
636 |
|
C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]); |
637 |
|
CLIP_STORE( 9,C); |
638 |
|
C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]); |
639 |
|
CLIP_STORE(10,C); |
640 |
|
C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]); |
641 |
|
CLIP_STORE(11,C); |
642 |
|
C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]); |
643 |
|
CLIP_STORE(12,C); |
644 |
|
C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16]; |
645 |
|
CLIP_STORE(13,C); |
646 |
|
C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15]; |
647 |
|
CLIP_STORE(14,C); |
648 |
|
C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16]; |
649 |
|
CLIP_STORE(15,C); |
650 |
|
Src += BpS; |
651 |
|
Dst += BpS; |
652 |
|
} |
653 |
|
#else |
654 |
|
while(H-->0) { |
655 |
|
int C; |
656 |
|
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; |
657 |
|
CLIP_STORE(0,C); |
658 |
|
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; |
659 |
|
CLIP_STORE(1,C); |
660 |
|
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; |
661 |
|
CLIP_STORE(2,C); |
662 |
|
C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]); |
663 |
|
CLIP_STORE(3,C); |
664 |
|
C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]); |
665 |
|
CLIP_STORE(4,C); |
666 |
|
C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8]; |
667 |
|
CLIP_STORE(5,C); |
668 |
|
C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7]; |
669 |
|
CLIP_STORE(6,C); |
670 |
|
C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8]; |
671 |
|
CLIP_STORE(7,C); |
672 |
|
Src += BpS; |
673 |
|
Dst += BpS; |
674 |
|
} |
675 |
|
#endif |
676 |
|
} |
677 |
|
#undef CLIP_STORE |
678 |
|
|
679 |
|
#define CLIP_STORE(i,C) \ |
680 |
|
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \ |
681 |
|
C = (C+Src[i+1]+1-RND) >> 1; \ |
682 |
|
STORE(Dst[i], C) |
683 |
|
|
684 |
|
static void |
685 |
|
FUNC_HA_UP(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND) |
686 |
|
{ |
687 |
|
#if (SIZE==16) |
688 |
|
while(H-->0) { |
689 |
|
int C; |
690 |
|
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; |
691 |
|
CLIP_STORE(0,C); |
692 |
|
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; |
693 |
|
CLIP_STORE( 1,C); |
694 |
|
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; |
695 |
|
CLIP_STORE( 2,C); |
696 |
|
C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]); |
697 |
|
CLIP_STORE( 3,C); |
698 |
|
C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]); |
699 |
|
CLIP_STORE( 4,C); |
700 |
|
C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]); |
701 |
|
CLIP_STORE( 5,C); |
702 |
|
C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]); |
703 |
|
CLIP_STORE( 6,C); |
704 |
|
C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]); |
705 |
|
CLIP_STORE( 7,C); |
706 |
|
C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]); |
707 |
|
CLIP_STORE( 8,C); |
708 |
|
C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]); |
709 |
|
CLIP_STORE( 9,C); |
710 |
|
C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]); |
711 |
|
CLIP_STORE(10,C); |
712 |
|
C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]); |
713 |
|
CLIP_STORE(11,C); |
714 |
|
C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]); |
715 |
|
CLIP_STORE(12,C); |
716 |
|
C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16]; |
717 |
|
CLIP_STORE(13,C); |
718 |
|
C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15]; |
719 |
|
CLIP_STORE(14,C); |
720 |
|
C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16]; |
721 |
|
CLIP_STORE(15,C); |
722 |
|
Src += BpS; |
723 |
|
Dst += BpS; |
724 |
|
} |
725 |
|
#else |
726 |
|
while(H-->0) { |
727 |
|
int C; |
728 |
|
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; |
729 |
|
CLIP_STORE(0,C); |
730 |
|
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; |
731 |
|
CLIP_STORE(1,C); |
732 |
|
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; |
733 |
|
CLIP_STORE(2,C); |
734 |
|
C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]); |
735 |
|
CLIP_STORE(3,C); |
736 |
|
C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]); |
737 |
|
CLIP_STORE(4,C); |
738 |
|
C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8]; |
739 |
|
CLIP_STORE(5,C); |
740 |
|
C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7]; |
741 |
|
CLIP_STORE(6,C); |
742 |
|
C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8]; |
743 |
|
CLIP_STORE(7,C); |
744 |
|
Src += BpS; |
745 |
|
Dst += BpS; |
746 |
|
} |
747 |
|
#endif |
748 |
|
} |
749 |
|
#undef CLIP_STORE |
750 |
|
|
751 |
|
////////////////////////////////////////////////////////// |
752 |
|
// vertical passes |
753 |
|
////////////////////////////////////////////////////////// |
754 |
|
// Note: for vertical passes, width (W) needs only be 8 or 16. |
755 |
|
|
756 |
|
#define CLIP_STORE(D,C) \ |
757 |
|
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \ |
758 |
|
STORE(D, C) |
759 |
|
|
760 |
|
static void |
761 |
|
FUNC_V(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND) |
762 |
|
{ |
763 |
|
#if (SIZE==16) |
764 |
|
while(H-->0) { |
765 |
|
int C; |
766 |
|
C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4]; |
767 |
|
CLIP_STORE(Dst[BpS* 0],C); |
768 |
|
C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5]; |
769 |
|
CLIP_STORE(Dst[BpS* 1],C); |
770 |
|
C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6]; |
771 |
|
CLIP_STORE(Dst[BpS* 2],C); |
772 |
|
C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]); |
773 |
|
CLIP_STORE(Dst[BpS* 3],C); |
774 |
|
C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]); |
775 |
|
CLIP_STORE(Dst[BpS* 4],C); |
776 |
|
C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]); |
777 |
|
CLIP_STORE(Dst[BpS* 5],C); |
778 |
|
C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]); |
779 |
|
CLIP_STORE(Dst[BpS* 6],C); |
780 |
|
C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]); |
781 |
|
CLIP_STORE(Dst[BpS* 7],C); |
782 |
|
C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]); |
783 |
|
CLIP_STORE(Dst[BpS* 8],C); |
784 |
|
C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]); |
785 |
|
CLIP_STORE(Dst[BpS* 9],C); |
786 |
|
C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]); |
787 |
|
CLIP_STORE(Dst[BpS*10],C); |
788 |
|
C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]); |
789 |
|
CLIP_STORE(Dst[BpS*11],C); |
790 |
|
C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]); |
791 |
|
CLIP_STORE(Dst[BpS*12],C); |
792 |
|
C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16]; |
793 |
|
CLIP_STORE(Dst[BpS*13],C); |
794 |
|
C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15]; |
795 |
|
CLIP_STORE(Dst[BpS*14],C); |
796 |
|
C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16]; |
797 |
|
CLIP_STORE(Dst[BpS*15],C); |
798 |
|
Src += 1; |
799 |
|
Dst += 1; |
800 |
|
} |
801 |
|
#else |
802 |
|
while(H-->0) { |
803 |
|
int C; |
804 |
|
C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4]; |
805 |
|
CLIP_STORE(Dst[BpS*0],C); |
806 |
|
C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5]; |
807 |
|
CLIP_STORE(Dst[BpS*1],C); |
808 |
|
C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6]; |
809 |
|
CLIP_STORE(Dst[BpS*2],C); |
810 |
|
C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]); |
811 |
|
CLIP_STORE(Dst[BpS*3],C); |
812 |
|
C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]); |
813 |
|
CLIP_STORE(Dst[BpS*4],C); |
814 |
|
C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8]; |
815 |
|
CLIP_STORE(Dst[BpS*5],C); |
816 |
|
C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7]; |
817 |
|
CLIP_STORE(Dst[BpS*6],C); |
818 |
|
C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8]; |
819 |
|
CLIP_STORE(Dst[BpS*7],C); |
820 |
|
Src += 1; |
821 |
|
Dst += 1; |
822 |
|
} |
823 |
|
#endif |
824 |
|
} |
825 |
|
#undef CLIP_STORE |
826 |
|
|
827 |
|
#define CLIP_STORE(i,C) \ |
828 |
|
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \ |
829 |
|
C = (C+Src[BpS*i]+1-RND) >> 1; \ |
830 |
|
STORE(Dst[BpS*i], C) |
831 |
|
|
832 |
|
static void |
833 |
|
FUNC_VA(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND) |
834 |
|
{ |
835 |
|
#if (SIZE==16) |
836 |
|
while(H-->0) { |
837 |
|
int C; |
838 |
|
C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4]; |
839 |
|
CLIP_STORE(0,C); |
840 |
|
C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5]; |
841 |
|
CLIP_STORE( 1,C); |
842 |
|
C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6]; |
843 |
|
CLIP_STORE( 2,C); |
844 |
|
C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]); |
845 |
|
CLIP_STORE( 3,C); |
846 |
|
C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]); |
847 |
|
CLIP_STORE( 4,C); |
848 |
|
C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]); |
849 |
|
CLIP_STORE( 5,C); |
850 |
|
C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]); |
851 |
|
CLIP_STORE( 6,C); |
852 |
|
C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]); |
853 |
|
CLIP_STORE( 7,C); |
854 |
|
C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]); |
855 |
|
CLIP_STORE( 8,C); |
856 |
|
C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]); |
857 |
|
CLIP_STORE( 9,C); |
858 |
|
C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]); |
859 |
|
CLIP_STORE(10,C); |
860 |
|
C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]); |
861 |
|
CLIP_STORE(11,C); |
862 |
|
C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]); |
863 |
|
CLIP_STORE(12,C); |
864 |
|
C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16]; |
865 |
|
CLIP_STORE(13,C); |
866 |
|
C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15]; |
867 |
|
CLIP_STORE(14,C); |
868 |
|
C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16]; |
869 |
|
CLIP_STORE(15,C); |
870 |
|
Src += 1; |
871 |
|
Dst += 1; |
872 |
|
} |
873 |
|
#else |
874 |
|
while(H-->0) { |
875 |
|
int C; |
876 |
|
C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4]; |
877 |
|
CLIP_STORE(0,C); |
878 |
|
C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5]; |
879 |
|
CLIP_STORE(1,C); |
880 |
|
C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6]; |
881 |
|
CLIP_STORE(2,C); |
882 |
|
C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]); |
883 |
|
CLIP_STORE(3,C); |
884 |
|
C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]); |
885 |
|
CLIP_STORE(4,C); |
886 |
|
C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8]; |
887 |
|
CLIP_STORE(5,C); |
888 |
|
C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7]; |
889 |
|
CLIP_STORE(6,C); |
890 |
|
C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8]; |
891 |
|
CLIP_STORE(7,C); |
892 |
|
Src += 1; |
893 |
|
Dst += 1; |
894 |
|
} |
895 |
|
#endif |
896 |
|
} |
897 |
|
#undef CLIP_STORE |
898 |
|
|
899 |
|
#define CLIP_STORE(i,C) \ |
900 |
|
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \ |
901 |
|
C = (C+Src[BpS*i+BpS]+1-RND) >> 1; \ |
902 |
|
STORE(Dst[BpS*i], C) |
903 |
|
|
904 |
|
static void |
905 |
|
FUNC_VA_UP(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND) |
906 |
|
{ |
907 |
|
#if (SIZE==16) |
908 |
|
while(H-->0) { |
909 |
|
int C; |
910 |
|
C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4]; |
911 |
|
CLIP_STORE(0,C); |
912 |
|
C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5]; |
913 |
|
CLIP_STORE( 1,C); |
914 |
|
C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6]; |
915 |
|
CLIP_STORE( 2,C); |
916 |
|
C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]); |
917 |
|
CLIP_STORE( 3,C); |
918 |
|
C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]); |
919 |
|
CLIP_STORE( 4,C); |
920 |
|
C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]); |
921 |
|
CLIP_STORE( 5,C); |
922 |
|
C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]); |
923 |
|
CLIP_STORE( 6,C); |
924 |
|
C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]); |
925 |
|
CLIP_STORE( 7,C); |
926 |
|
C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]); |
927 |
|
CLIP_STORE( 8,C); |
928 |
|
C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]); |
929 |
|
CLIP_STORE( 9,C); |
930 |
|
C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]); |
931 |
|
CLIP_STORE(10,C); |
932 |
|
C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]); |
933 |
|
CLIP_STORE(11,C); |
934 |
|
C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]); |
935 |
|
CLIP_STORE(12,C); |
936 |
|
C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16]; |
937 |
|
CLIP_STORE(13,C); |
938 |
|
C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15]; |
939 |
|
CLIP_STORE(14,C); |
940 |
|
C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16]; |
941 |
|
CLIP_STORE(15,C); |
942 |
|
Src += 1; |
943 |
|
Dst += 1; |
944 |
|
} |
945 |
|
#else |
946 |
|
while(H-->0) { |
947 |
|
int C; |
948 |
|
C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4]; |
949 |
|
CLIP_STORE(0,C); |
950 |
|
C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5]; |
951 |
|
CLIP_STORE(1,C); |
952 |
|
C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6]; |
953 |
|
CLIP_STORE(2,C); |
954 |
|
C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]); |
955 |
|
CLIP_STORE(3,C); |
956 |
|
C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]); |
957 |
|
CLIP_STORE(4,C); |
958 |
|
C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8]; |
959 |
|
CLIP_STORE(5,C); |
960 |
|
C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7]; |
961 |
|
CLIP_STORE(6,C); |
962 |
|
C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8]; |
963 |
|
CLIP_STORE(7,C); |
964 |
|
Src += 1; |
965 |
|
Dst += 1; |
966 |
|
} |
967 |
|
#endif |
968 |
|
} |
969 |
|
#undef CLIP_STORE |
970 |
|
|
971 |
|
#undef STORE |
972 |
|
#undef FUNC_H |
973 |
|
#undef FUNC_V |
974 |
|
#undef FUNC_HA |
975 |
|
#undef FUNC_VA |
976 |
|
#undef FUNC_HA_UP |
977 |
|
#undef FUNC_VA_UP |
978 |
|
|
979 |
|
|
980 |
|
#endif /* XVID_AUTO_INCLUDE && !defined(REF) */ |