[svn] / trunk / xvidcore / src / quant / ppc_asm / quant_h263_altivec.c Repository:
ViewVC logotype

Annotation of /trunk/xvidcore/src/quant/ppc_asm/quant_h263_altivec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1413 - (view) (download)

1 : edgomez 1413 /*****************************************************************************
2 :     *
3 :     * XVID MPEG-4 VIDEO CODEC
4 :     * - MPEG4 Quantization H263 implementation with altivec optimization -
5 :     *
6 :     * Copyright(C) 2004 Christoph Naegeli <chn@kbw.ch>
7 :     *
8 :     * This program is free software ; you can redistribute it and/or modify
9 :     * it under the terms of the GNU General Public License as published by
10 :     * the Free Software Foundation ; either version 2 of the License, or
11 :     * (at your option) any later version.
12 :     *
13 :     * This program is distributed in the hope that it will be useful,
14 :     * but WITHOUT ANY WARRANTY ; without even the implied warranty of
15 :     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 :     * GNU General Public License for more details.
17 :     *
18 :     * You should have received a copy of the GNU General Public License
19 :     * along with this program ; if not, write to the Free Software
20 :     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 :     *
22 :     * $Id: quant_h263_altivec.c,v 1.1 2004-04-05 20:36:37 edgomez Exp $
23 :     *
24 :     ****************************************************************************/
25 :    
26 :     #ifdef HAVE_ALTIVEC_H
27 :     #include <altivec.h>
28 :     #endif
29 :    
30 :     #include "../../portab.h"
31 :     #include "../../global.h"
32 :    
33 :     #undef DEBUG
34 :     #include <stdio.h>
35 :    
36 :    
37 :     /*****************************************************************************
38 :     * Local data
39 :     ****************************************************************************/
40 :    
41 :     /* divide-by-multiply table
42 :     * a 16 bit shiting is enough in this case */
43 :    
44 :     #define SCALEBITS 16
45 :     #define FIX(X) ((1L << SCALEBITS) / (X) + 1)
46 :    
47 :     static const uint32_t multipliers[32] =
48 :     {
49 :     0, FIX(2), FIX(4), FIX(6),
50 :     FIX(8), FIX(10), FIX(12), FIX(14),
51 :     FIX(16), FIX(18), FIX(20), FIX(22),
52 :     FIX(24), FIX(26), FIX(28), FIX(30),
53 :     FIX(32), FIX(34), FIX(36), FIX(38),
54 :     FIX(40), FIX(42), FIX(44), FIX(46),
55 :     FIX(48), FIX(50), FIX(52), FIX(54),
56 :     FIX(56), FIX(58), FIX(60), FIX(62)
57 :     };
58 :    
59 :    
60 :     /*****************************************************************************
61 :     * Function definitions
62 :     ****************************************************************************/
63 :    
64 :    
65 :     /* quantize intra-block
66 :     */
67 :    
68 :     #define QUANT_H263_INTRA_ALTIVEC() \
69 :     acLevel = vec_perm(vec_ld(0, data), vec_ld(16, data), vec_lvsl(0, data)); \
70 :     zero_mask = vec_cmplt(acLevel, (vector signed short)zerovec); \
71 :     acLevel = vec_abs(acLevel); \
72 :     \
73 :     m2_mask = vec_cmpgt(quant_m_2, (vector unsigned short)acLevel); \
74 :     acLevel = vec_sel(acLevel, (vector signed short)zerovec, m2_mask); \
75 :     \
76 :     even = vec_mule(mult, (vector unsigned short)acLevel); \
77 :     odd = vec_mulo(mult, (vector unsigned short)acLevel); \
78 :     \
79 :     even = vec_sr(even, vec_add(vec_splat_u32(8), vec_splat_u32(8))); \
80 :     odd = vec_sr(odd, vec_add(vec_splat_u32(8), vec_splat_u32(8))); \
81 :     \
82 :     acLevel = (vector signed short)vec_pack(vec_mergeh(even, odd), vec_mergel(even, odd)); \
83 :     acLevel = vec_xor(acLevel, zero_mask); \
84 :     acLevel = vec_add(acLevel, vec_and(zero_mask, vec_splat_s16(1))); \
85 :     vec_st(acLevel, 0, coeff); \
86 :     \
87 :     coeff += 8; \
88 :     data += 8
89 :    
90 :     /* This function assumes:
91 :     * coeff is 16 byte aligned
92 :     * data is unaligned
93 :     */
94 :    
95 :     uint32_t
96 :     quant_h263_intra_altivec_c(int16_t *coeff,
97 :     int16_t *data,
98 :     const uint32_t quant,
99 :     const uint32_t dcscalar,
100 :     const uint16_t *mpeg_quant_matrices)
101 :     {
102 :     vector unsigned char zerovec;
103 :     vector unsigned short mult;
104 :     vector unsigned short quant_m_2;
105 :     vector signed short acLevel;
106 :    
107 :     register vector unsigned int even;
108 :     register vector unsigned int odd;
109 :    
110 :     vector bool short zero_mask;
111 :     vector bool short m2_mask;
112 :    
113 :     register int16_t *origin_coeff = coeff;
114 :     register int16_t *origin_data = data;
115 :    
116 :     #ifdef DEBUG
117 :     if(((unsigned)coeff) & 15)
118 :     fprintf(stderr, "quant_h263_intra_altivec_c:incorrect align, coeff: %x\n", coeff);
119 :     #endif
120 :    
121 :     zerovec = vec_splat_u8(0);
122 :    
123 :     *((unsigned short*)&mult) = (unsigned short)multipliers[quant];
124 :     mult = vec_splat(mult, 0);
125 :    
126 :     *((unsigned short*)&quant_m_2) = (unsigned short)quant;
127 :     quant_m_2 = vec_splat(quant_m_2, 0);
128 :     quant_m_2 = vec_sl(quant_m_2, vec_splat_u16(1));
129 :    
130 :     QUANT_H263_INTRA_ALTIVEC();
131 :     QUANT_H263_INTRA_ALTIVEC();
132 :     QUANT_H263_INTRA_ALTIVEC();
133 :     QUANT_H263_INTRA_ALTIVEC();
134 :    
135 :     QUANT_H263_INTRA_ALTIVEC();
136 :     QUANT_H263_INTRA_ALTIVEC();
137 :     QUANT_H263_INTRA_ALTIVEC();
138 :     QUANT_H263_INTRA_ALTIVEC();
139 :    
140 :     // noch erstes setzen
141 :     origin_coeff[0] = DIV_DIV(origin_data[0], (int32_t)dcscalar);
142 :    
143 :     return 0;
144 :     }
145 :    
146 :    
147 :     #define QUANT_H263_INTER_ALTIVEC() \
148 :     acLevel = vec_perm(vec_ld(0, data), vec_ld(16, data), vec_lvsl(0, data)); \
149 :     zero_mask = vec_cmplt(acLevel, (vector signed short)zerovec); \
150 :     acLevel = vec_abs(acLevel); \
151 :     acLevel = (vector signed short)vec_sub((vector unsigned short)acLevel, quant_d_2); \
152 :     \
153 :     m2_mask = vec_cmpgt((vector signed short)quant_m_2, acLevel); \
154 :     acLevel = vec_sel(acLevel, (vector signed short)zerovec, m2_mask); \
155 :     \
156 :     even = vec_mule((vector unsigned short)acLevel, mult); \
157 :     odd = vec_mulo((vector unsigned short)acLevel, mult); \
158 :     \
159 :     even = vec_sr(even, vec_add(vec_splat_u32(8), vec_splat_u32(8))); \
160 :     odd = vec_sr(odd, vec_add(vec_splat_u32(8), vec_splat_u32(8))); \
161 :     \
162 :     acLevel = (vector signed short)vec_pack(vec_mergeh(even, odd), vec_mergel(even, odd)); \
163 :     sum_short = vec_add(sum_short, (vector unsigned short)acLevel); \
164 :     \
165 :     acLevel = vec_xor(acLevel, zero_mask); \
166 :     acLevel = vec_add(acLevel, vec_and(zero_mask, vec_splat_s16(1))); \
167 :     \
168 :     vec_st(acLevel, 0, coeff); \
169 :     \
170 :     coeff += 8; \
171 :     data += 8
172 :    
173 :     /* This function assumes:
174 :     * coeff is 16 byte aligned
175 :     * data is unaligned
176 :     */
177 :    
178 :     uint32_t
179 :     quant_h263_inter_altivec_c(int16_t *coeff,
180 :     int16_t *data,
181 :     const uint32_t quant,
182 :     const uint16_t *mpeg_quant_matrices)
183 :     {
184 :     vector unsigned char zerovec;
185 :     vector unsigned short mult;
186 :     vector unsigned short quant_m_2;
187 :     vector unsigned short quant_d_2;
188 :     vector unsigned short sum_short;
189 :     vector signed short acLevel;
190 :    
191 :     vector unsigned int even;
192 :     vector unsigned int odd;
193 :    
194 :     vector bool short m2_mask;
195 :     vector bool short zero_mask;
196 :    
197 :     uint32_t result;
198 :    
199 :     #ifdef DEBUG
200 :     if(((unsigned)coeff) & 0x15)
201 :     fprintf(stderr, "quant_h263_inter_altivec_c:incorrect align, coeff: %x\n", coeff);
202 :     #endif
203 :    
204 :     /* initialisation stuff */
205 :     zerovec = vec_splat_u8(0);
206 :     *((unsigned short*)&mult) = (unsigned short)multipliers[quant];
207 :     mult = vec_splat(mult, 0);
208 :     *((unsigned short*)&quant_m_2) = (unsigned short)quant;
209 :     quant_m_2 = vec_splat(quant_m_2, 0);
210 :     quant_m_2 = vec_sl(quant_m_2, vec_splat_u16(1));
211 :     *((unsigned short*)&quant_d_2) = (unsigned short)quant;
212 :     quant_d_2 = vec_splat(quant_d_2, 0);
213 :     quant_d_2 = vec_sr(quant_d_2, vec_splat_u16(1));
214 :     sum_short = (vector unsigned short)zerovec;
215 :    
216 :     /* Quantize */
217 :     QUANT_H263_INTER_ALTIVEC();
218 :     QUANT_H263_INTER_ALTIVEC();
219 :     QUANT_H263_INTER_ALTIVEC();
220 :     QUANT_H263_INTER_ALTIVEC();
221 :    
222 :     QUANT_H263_INTER_ALTIVEC();
223 :     QUANT_H263_INTER_ALTIVEC();
224 :     QUANT_H263_INTER_ALTIVEC();
225 :     QUANT_H263_INTER_ALTIVEC();
226 :    
227 :     /* Calculate the return value */
228 :     even = (vector unsigned int)vec_sum4s((vector signed short)sum_short, (vector signed int)zerovec);
229 :     even = (vector unsigned int)vec_sums((vector signed int)even, (vector signed int)zerovec);
230 :     even = vec_splat(even, 3);
231 :     vec_ste(even, 0, &result);
232 :     return result;
233 :     }
234 :    
235 :    
236 :    
237 :     /* dequantize intra-block & clamp to [-2048,2047]
238 :     */
239 :    
240 :    
241 :     #define DEQUANT_H263_INTRA_ALTIVEC() \
242 :     acLevel = vec_perm(vec_ld(0,coeff_ptr), vec_ld(16,coeff_ptr), vec_lvsl(0,coeff_ptr)); \
243 :     equal_zero = vec_cmpeq(acLevel, (vector signed short)zerovec); \
244 :     less_zero = vec_cmplt(acLevel, (vector signed short)zerovec); \
245 :     acLevel = vec_abs(acLevel); \
246 :     \
247 :     even = vec_mule((vector unsigned short)acLevel, quant_m_2); \
248 :     odd = vec_mulo((vector unsigned short)acLevel, quant_m_2); \
249 :     \
250 :     high = vec_mergeh(even,odd); \
251 :     low = vec_mergel(even,odd); \
252 :     \
253 :     t = vec_sel(quant_add, (vector unsigned short)zerovec, equal_zero); \
254 :     high = vec_add(high, (vector unsigned int)vec_mergeh((vector unsigned short)zerovec, t)); \
255 :     low = vec_add(low, (vector unsigned int)vec_mergel((vector unsigned short)zerovec, t)); \
256 :     \
257 :     acLevel = vec_packs((vector signed int)high, (vector signed int)low); \
258 :     \
259 :     overflow = vec_cmpgt(acLevel, vec_2048); \
260 :     acLevel = vec_sel(acLevel, vec_2048, overflow); \
261 :     overflow = (vector bool short)vec_and(overflow, vec_xor(less_zero, vec_splat_s16(-1))); \
262 :     overflow = (vector bool short)vec_and(overflow, vec_splat_s16(1)); \
263 :     acLevel = vec_sub(acLevel, (vector signed short)overflow); \
264 :     \
265 :     acLevel = vec_xor(acLevel, less_zero); \
266 :     acLevel = vec_add(acLevel, vec_and(less_zero, vec_splat_s16(1))); \
267 :     \
268 :     vec_st(acLevel, 0, data_ptr); \
269 :     \
270 :     data_ptr += 8; \
271 :     coeff_ptr += 8
272 :    
273 :     /* This function assumes:
274 :     * data is 16 byte aligned
275 :     * coeff is unaligned
276 :     */
277 :    
278 :     uint32_t
279 :     dequant_h263_intra_altivec_c(int16_t *data,
280 :     const int16_t *coeff,
281 :     const uint32_t quant,
282 :     const uint32_t dcscalar,
283 :     const uint16_t *mpeg_quant_matrices)
284 :     {
285 :     vector signed short acLevel;
286 :     vector signed short vec_2048;
287 :     vector unsigned short quant_add;
288 :     vector unsigned short quant_m_2;
289 :     vector unsigned short t;
290 :    
291 :     vector bool short equal_zero;
292 :     vector bool short less_zero;
293 :     vector bool short overflow;
294 :    
295 :     register vector unsigned int even;
296 :     register vector unsigned int odd;
297 :     register vector unsigned int high;
298 :     register vector unsigned int low;
299 :    
300 :     register vector unsigned char zerovec;
301 :    
302 :     register int16_t *data_ptr;
303 :     register int16_t *coeff_ptr;
304 :    
305 :     #ifdef DEBUG
306 :     if(((unsigned)data) & 0x15)
307 :     fprintf(stderr, "dequant_h263_intra_altivec_c:incorrect align, data: %x\n", data);
308 :     #endif
309 :    
310 :     /* initialize */
311 :     *((unsigned short*)&quant_add) = (unsigned short)(quant & 1 ? quant : quant - 1);
312 :     quant_add = vec_splat(quant_add,0);
313 :    
314 :     *((unsigned short*)&quant_m_2) = (unsigned short)(quant << 1);
315 :     quant_m_2 = vec_splat(quant_m_2,0);
316 :    
317 :     vec_2048 = vec_sl(vec_splat_s16(1), vec_splat_u16(11));
318 :     zerovec = vec_splat_u8(0);
319 :    
320 :     data_ptr = (int16_t*)data;
321 :     coeff_ptr = (int16_t*)coeff;
322 :    
323 :     /* dequant */
324 :     DEQUANT_H263_INTRA_ALTIVEC();
325 :     DEQUANT_H263_INTRA_ALTIVEC();
326 :     DEQUANT_H263_INTRA_ALTIVEC();
327 :     DEQUANT_H263_INTRA_ALTIVEC();
328 :    
329 :     DEQUANT_H263_INTRA_ALTIVEC();
330 :     DEQUANT_H263_INTRA_ALTIVEC();
331 :     DEQUANT_H263_INTRA_ALTIVEC();
332 :     DEQUANT_H263_INTRA_ALTIVEC();
333 :    
334 :     /* data[0] is special */
335 :     data[0] = coeff[0] * dcscalar;
336 :     if(data[0] < -2048)
337 :     data[0] = -2048;
338 :     else if(data[0] > 2047)
339 :     data[0] = 2047;
340 :    
341 :     return 0;
342 :     }
343 :    
344 :    
345 :     /* dequantize inter-block & clamp to [-2048,2047]
346 :     */
347 :    
348 :     #define DEQUANT_H263_INTER_ALTIVEC() \
349 :     acLevel = vec_perm(vec_ld(0,coeff), vec_ld(16,coeff), vec_lvsl(0,coeff)); \
350 :     equal_zero = vec_cmpeq(acLevel, (vector signed short)zerovec); \
351 :     less_zero = vec_cmplt(acLevel, (vector signed short)zerovec); \
352 :     acLevel = vec_abs(acLevel); \
353 :     \
354 :     even = vec_mule((vector unsigned short)acLevel, quant_m_2); \
355 :     odd = vec_mulo((vector unsigned short)acLevel, quant_m_2); \
356 :     high = vec_mergeh(even,odd); \
357 :     low = vec_mergel(even,odd); \
358 :     \
359 :     t = vec_sel(quant_add, (vector unsigned short)zerovec, equal_zero); \
360 :     high = vec_add(high, (vector unsigned int)vec_mergeh((vector unsigned short)zerovec, t)); \
361 :     low = vec_add(low, (vector unsigned int)vec_mergel((vector unsigned short)zerovec, t)); \
362 :     acLevel = vec_packs((vector signed int)high, (vector signed int)low); \
363 :     \
364 :     overflow = vec_cmpgt(acLevel,vec_2048); \
365 :     acLevel = vec_sel(acLevel, vec_2048, overflow); \
366 :     overflow = (vector bool short)vec_and(overflow, vec_xor(less_zero, vec_splat_s16(-1))); \
367 :     overflow = (vector bool short)vec_and(overflow, vec_splat_s16(1)); \
368 :     acLevel = vec_sub(acLevel, (vector signed short)overflow); \
369 :     \
370 :     acLevel = vec_xor(acLevel, less_zero); \
371 :     acLevel = vec_add(acLevel, vec_and(less_zero, vec_splat_s16(1))); \
372 :     \
373 :     vec_st(acLevel, 0, data); \
374 :     data += 8; \
375 :     coeff += 8
376 :    
377 :    
378 :     /* This function assumes:
379 :     * data is 16 byte aligned
380 :     * coeff is unaligned
381 :     */
382 :    
383 :     uint32_t
384 :     dequant_h263_inter_altivec_c(int16_t *data,
385 :     int16_t *coeff,
386 :     const uint32_t quant,
387 :     const uint16_t *mpeg_quant_matrices)
388 :     {
389 :     vector signed short acLevel;
390 :     vector signed short vec_2048;
391 :    
392 :     vector unsigned short quant_m_2;
393 :     vector unsigned short quant_add;
394 :     vector unsigned short t;
395 :    
396 :     register vector unsigned int even;
397 :     register vector unsigned int odd;
398 :     register vector unsigned int high;
399 :     register vector unsigned int low;
400 :    
401 :     register vector unsigned char zerovec;
402 :    
403 :     vector bool short equal_zero;
404 :     vector bool short less_zero;
405 :     vector bool short overflow;
406 :    
407 :     #ifdef DEBUG
408 :     /* print alignment errors if this is on */
409 :     if(((unsigned)data) & 0x15)
410 :     fprintf(stderr, "dequant_h263_inter_altivec_c:incorrect align, data: %x\n", data);
411 :     #endif
412 :    
413 :     /* initialize */
414 :     *((unsigned short*)&quant_m_2) = (unsigned short)(quant << 1);
415 :     quant_m_2 = vec_splat(quant_m_2,0);
416 :    
417 :     *((unsigned short*)&quant_add) = (unsigned short)(quant & 1 ? quant : quant - 1);
418 :     quant_add = vec_splat(quant_add,0);
419 :    
420 :     vec_2048 = vec_sl(vec_splat_s16(1), vec_splat_u16(11));
421 :     zerovec = vec_splat_u8(0);
422 :    
423 :     /* dequant */
424 :     DEQUANT_H263_INTER_ALTIVEC();
425 :     DEQUANT_H263_INTER_ALTIVEC();
426 :     DEQUANT_H263_INTER_ALTIVEC();
427 :     DEQUANT_H263_INTER_ALTIVEC();
428 :    
429 :     DEQUANT_H263_INTER_ALTIVEC();
430 :     DEQUANT_H263_INTER_ALTIVEC();
431 :     DEQUANT_H263_INTER_ALTIVEC();
432 :     DEQUANT_H263_INTER_ALTIVEC();
433 :    
434 :     return 0;
435 :     }

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4