Parent Directory | Revision Log
Revision 1570 -
(view)
(download)
Original Path: trunk/xvidcore/src/quant/ppc_asm/quant_h263_altivec.c
1 : | edgomez | 1413 | /***************************************************************************** |
2 : | * | ||
3 : | * XVID MPEG-4 VIDEO CODEC | ||
4 : | * - MPEG4 Quantization H263 implementation with altivec optimization - | ||
5 : | * | ||
6 : | * Copyright(C) 2004 Christoph Naegeli <chn@kbw.ch> | ||
7 : | * | ||
8 : | * This program is free software ; you can redistribute it and/or modify | ||
9 : | * it under the terms of the GNU General Public License as published by | ||
10 : | * the Free Software Foundation ; either version 2 of the License, or | ||
11 : | * (at your option) any later version. | ||
12 : | * | ||
13 : | * This program is distributed in the hope that it will be useful, | ||
14 : | * but WITHOUT ANY WARRANTY ; without even the implied warranty of | ||
15 : | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 : | * GNU General Public License for more details. | ||
17 : | * | ||
18 : | * You should have received a copy of the GNU General Public License | ||
19 : | * along with this program ; if not, write to the Free Software | ||
20 : | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 : | * | ||
22 : | edgomez | 1570 | * $Id: quant_h263_altivec.c,v 1.2 2004-12-09 23:02:54 edgomez Exp $ |
23 : | edgomez | 1413 | * |
24 : | ****************************************************************************/ | ||
25 : | |||
26 : | #ifdef HAVE_ALTIVEC_H | ||
27 : | #include <altivec.h> | ||
28 : | #endif | ||
29 : | |||
30 : | #include "../../portab.h" | ||
31 : | #include "../../global.h" | ||
32 : | |||
33 : | #undef DEBUG | ||
34 : | #include <stdio.h> | ||
35 : | |||
36 : | |||
37 : | /***************************************************************************** | ||
38 : | * Local data | ||
39 : | ****************************************************************************/ | ||
40 : | |||
41 : | /* divide-by-multiply table | ||
42 : | * a 16 bit shiting is enough in this case */ | ||
43 : | |||
44 : | #define SCALEBITS 16 | ||
45 : | #define FIX(X) ((1L << SCALEBITS) / (X) + 1) | ||
46 : | |||
47 : | static const uint32_t multipliers[32] = | ||
48 : | { | ||
49 : | 0, FIX(2), FIX(4), FIX(6), | ||
50 : | FIX(8), FIX(10), FIX(12), FIX(14), | ||
51 : | FIX(16), FIX(18), FIX(20), FIX(22), | ||
52 : | FIX(24), FIX(26), FIX(28), FIX(30), | ||
53 : | FIX(32), FIX(34), FIX(36), FIX(38), | ||
54 : | FIX(40), FIX(42), FIX(44), FIX(46), | ||
55 : | FIX(48), FIX(50), FIX(52), FIX(54), | ||
56 : | FIX(56), FIX(58), FIX(60), FIX(62) | ||
57 : | }; | ||
58 : | |||
59 : | |||
60 : | /***************************************************************************** | ||
61 : | * Function definitions | ||
62 : | ****************************************************************************/ | ||
63 : | |||
64 : | |||
65 : | /* quantize intra-block | ||
66 : | */ | ||
67 : | |||
68 : | #define QUANT_H263_INTRA_ALTIVEC() \ | ||
69 : | acLevel = vec_perm(vec_ld(0, data), vec_ld(16, data), vec_lvsl(0, data)); \ | ||
70 : | zero_mask = vec_cmplt(acLevel, (vector signed short)zerovec); \ | ||
71 : | acLevel = vec_abs(acLevel); \ | ||
72 : | \ | ||
73 : | m2_mask = vec_cmpgt(quant_m_2, (vector unsigned short)acLevel); \ | ||
74 : | acLevel = vec_sel(acLevel, (vector signed short)zerovec, m2_mask); \ | ||
75 : | \ | ||
76 : | even = vec_mule(mult, (vector unsigned short)acLevel); \ | ||
77 : | odd = vec_mulo(mult, (vector unsigned short)acLevel); \ | ||
78 : | \ | ||
79 : | even = vec_sr(even, vec_add(vec_splat_u32(8), vec_splat_u32(8))); \ | ||
80 : | odd = vec_sr(odd, vec_add(vec_splat_u32(8), vec_splat_u32(8))); \ | ||
81 : | \ | ||
82 : | acLevel = (vector signed short)vec_pack(vec_mergeh(even, odd), vec_mergel(even, odd)); \ | ||
83 : | acLevel = vec_xor(acLevel, zero_mask); \ | ||
84 : | acLevel = vec_add(acLevel, vec_and(zero_mask, vec_splat_s16(1))); \ | ||
85 : | vec_st(acLevel, 0, coeff); \ | ||
86 : | \ | ||
87 : | coeff += 8; \ | ||
88 : | data += 8 | ||
89 : | |||
90 : | /* This function assumes: | ||
91 : | * coeff is 16 byte aligned | ||
92 : | * data is unaligned | ||
93 : | */ | ||
94 : | |||
95 : | uint32_t | ||
96 : | quant_h263_intra_altivec_c(int16_t *coeff, | ||
97 : | int16_t *data, | ||
98 : | const uint32_t quant, | ||
99 : | const uint32_t dcscalar, | ||
100 : | const uint16_t *mpeg_quant_matrices) | ||
101 : | { | ||
102 : | vector unsigned char zerovec; | ||
103 : | vector unsigned short mult; | ||
104 : | vector unsigned short quant_m_2; | ||
105 : | vector signed short acLevel; | ||
106 : | |||
107 : | register vector unsigned int even; | ||
108 : | register vector unsigned int odd; | ||
109 : | |||
110 : | vector bool short zero_mask; | ||
111 : | vector bool short m2_mask; | ||
112 : | |||
113 : | register int16_t *origin_coeff = coeff; | ||
114 : | register int16_t *origin_data = data; | ||
115 : | |||
116 : | #ifdef DEBUG | ||
117 : | if(((unsigned)coeff) & 15) | ||
118 : | edgomez | 1570 | fprintf(stderr, "quant_h263_intra_altivec_c:incorrect align, coeff: %lx\n", (long)coeff); |
119 : | edgomez | 1413 | #endif |
120 : | |||
121 : | zerovec = vec_splat_u8(0); | ||
122 : | |||
123 : | *((unsigned short*)&mult) = (unsigned short)multipliers[quant]; | ||
124 : | mult = vec_splat(mult, 0); | ||
125 : | |||
126 : | *((unsigned short*)&quant_m_2) = (unsigned short)quant; | ||
127 : | quant_m_2 = vec_splat(quant_m_2, 0); | ||
128 : | quant_m_2 = vec_sl(quant_m_2, vec_splat_u16(1)); | ||
129 : | |||
130 : | QUANT_H263_INTRA_ALTIVEC(); | ||
131 : | QUANT_H263_INTRA_ALTIVEC(); | ||
132 : | QUANT_H263_INTRA_ALTIVEC(); | ||
133 : | QUANT_H263_INTRA_ALTIVEC(); | ||
134 : | |||
135 : | QUANT_H263_INTRA_ALTIVEC(); | ||
136 : | QUANT_H263_INTRA_ALTIVEC(); | ||
137 : | QUANT_H263_INTRA_ALTIVEC(); | ||
138 : | QUANT_H263_INTRA_ALTIVEC(); | ||
139 : | |||
140 : | // noch erstes setzen | ||
141 : | origin_coeff[0] = DIV_DIV(origin_data[0], (int32_t)dcscalar); | ||
142 : | |||
143 : | return 0; | ||
144 : | } | ||
145 : | |||
146 : | |||
147 : | #define QUANT_H263_INTER_ALTIVEC() \ | ||
148 : | acLevel = vec_perm(vec_ld(0, data), vec_ld(16, data), vec_lvsl(0, data)); \ | ||
149 : | zero_mask = vec_cmplt(acLevel, (vector signed short)zerovec); \ | ||
150 : | acLevel = vec_abs(acLevel); \ | ||
151 : | acLevel = (vector signed short)vec_sub((vector unsigned short)acLevel, quant_d_2); \ | ||
152 : | \ | ||
153 : | m2_mask = vec_cmpgt((vector signed short)quant_m_2, acLevel); \ | ||
154 : | acLevel = vec_sel(acLevel, (vector signed short)zerovec, m2_mask); \ | ||
155 : | \ | ||
156 : | even = vec_mule((vector unsigned short)acLevel, mult); \ | ||
157 : | odd = vec_mulo((vector unsigned short)acLevel, mult); \ | ||
158 : | \ | ||
159 : | even = vec_sr(even, vec_add(vec_splat_u32(8), vec_splat_u32(8))); \ | ||
160 : | odd = vec_sr(odd, vec_add(vec_splat_u32(8), vec_splat_u32(8))); \ | ||
161 : | \ | ||
162 : | acLevel = (vector signed short)vec_pack(vec_mergeh(even, odd), vec_mergel(even, odd)); \ | ||
163 : | sum_short = vec_add(sum_short, (vector unsigned short)acLevel); \ | ||
164 : | \ | ||
165 : | acLevel = vec_xor(acLevel, zero_mask); \ | ||
166 : | acLevel = vec_add(acLevel, vec_and(zero_mask, vec_splat_s16(1))); \ | ||
167 : | \ | ||
168 : | vec_st(acLevel, 0, coeff); \ | ||
169 : | \ | ||
170 : | coeff += 8; \ | ||
171 : | data += 8 | ||
172 : | |||
173 : | /* This function assumes: | ||
174 : | * coeff is 16 byte aligned | ||
175 : | * data is unaligned | ||
176 : | */ | ||
177 : | |||
178 : | uint32_t | ||
179 : | quant_h263_inter_altivec_c(int16_t *coeff, | ||
180 : | int16_t *data, | ||
181 : | const uint32_t quant, | ||
182 : | const uint16_t *mpeg_quant_matrices) | ||
183 : | { | ||
184 : | vector unsigned char zerovec; | ||
185 : | vector unsigned short mult; | ||
186 : | vector unsigned short quant_m_2; | ||
187 : | vector unsigned short quant_d_2; | ||
188 : | vector unsigned short sum_short; | ||
189 : | vector signed short acLevel; | ||
190 : | |||
191 : | vector unsigned int even; | ||
192 : | vector unsigned int odd; | ||
193 : | |||
194 : | vector bool short m2_mask; | ||
195 : | vector bool short zero_mask; | ||
196 : | |||
197 : | uint32_t result; | ||
198 : | |||
199 : | #ifdef DEBUG | ||
200 : | if(((unsigned)coeff) & 0x15) | ||
201 : | edgomez | 1570 | fprintf(stderr, "quant_h263_inter_altivec_c:incorrect align, coeff: %lx\n", (long)coeff); |
202 : | edgomez | 1413 | #endif |
203 : | |||
204 : | /* initialisation stuff */ | ||
205 : | zerovec = vec_splat_u8(0); | ||
206 : | *((unsigned short*)&mult) = (unsigned short)multipliers[quant]; | ||
207 : | mult = vec_splat(mult, 0); | ||
208 : | *((unsigned short*)&quant_m_2) = (unsigned short)quant; | ||
209 : | quant_m_2 = vec_splat(quant_m_2, 0); | ||
210 : | quant_m_2 = vec_sl(quant_m_2, vec_splat_u16(1)); | ||
211 : | *((unsigned short*)&quant_d_2) = (unsigned short)quant; | ||
212 : | quant_d_2 = vec_splat(quant_d_2, 0); | ||
213 : | quant_d_2 = vec_sr(quant_d_2, vec_splat_u16(1)); | ||
214 : | sum_short = (vector unsigned short)zerovec; | ||
215 : | |||
216 : | /* Quantize */ | ||
217 : | QUANT_H263_INTER_ALTIVEC(); | ||
218 : | QUANT_H263_INTER_ALTIVEC(); | ||
219 : | QUANT_H263_INTER_ALTIVEC(); | ||
220 : | QUANT_H263_INTER_ALTIVEC(); | ||
221 : | |||
222 : | QUANT_H263_INTER_ALTIVEC(); | ||
223 : | QUANT_H263_INTER_ALTIVEC(); | ||
224 : | QUANT_H263_INTER_ALTIVEC(); | ||
225 : | QUANT_H263_INTER_ALTIVEC(); | ||
226 : | |||
227 : | /* Calculate the return value */ | ||
228 : | even = (vector unsigned int)vec_sum4s((vector signed short)sum_short, (vector signed int)zerovec); | ||
229 : | even = (vector unsigned int)vec_sums((vector signed int)even, (vector signed int)zerovec); | ||
230 : | even = vec_splat(even, 3); | ||
231 : | vec_ste(even, 0, &result); | ||
232 : | return result; | ||
233 : | } | ||
234 : | |||
235 : | |||
236 : | |||
237 : | /* dequantize intra-block & clamp to [-2048,2047] | ||
238 : | */ | ||
239 : | |||
240 : | |||
241 : | #define DEQUANT_H263_INTRA_ALTIVEC() \ | ||
242 : | acLevel = vec_perm(vec_ld(0,coeff_ptr), vec_ld(16,coeff_ptr), vec_lvsl(0,coeff_ptr)); \ | ||
243 : | equal_zero = vec_cmpeq(acLevel, (vector signed short)zerovec); \ | ||
244 : | less_zero = vec_cmplt(acLevel, (vector signed short)zerovec); \ | ||
245 : | acLevel = vec_abs(acLevel); \ | ||
246 : | \ | ||
247 : | even = vec_mule((vector unsigned short)acLevel, quant_m_2); \ | ||
248 : | odd = vec_mulo((vector unsigned short)acLevel, quant_m_2); \ | ||
249 : | \ | ||
250 : | high = vec_mergeh(even,odd); \ | ||
251 : | low = vec_mergel(even,odd); \ | ||
252 : | \ | ||
253 : | t = vec_sel(quant_add, (vector unsigned short)zerovec, equal_zero); \ | ||
254 : | high = vec_add(high, (vector unsigned int)vec_mergeh((vector unsigned short)zerovec, t)); \ | ||
255 : | low = vec_add(low, (vector unsigned int)vec_mergel((vector unsigned short)zerovec, t)); \ | ||
256 : | \ | ||
257 : | acLevel = vec_packs((vector signed int)high, (vector signed int)low); \ | ||
258 : | \ | ||
259 : | overflow = vec_cmpgt(acLevel, vec_2048); \ | ||
260 : | acLevel = vec_sel(acLevel, vec_2048, overflow); \ | ||
261 : | overflow = (vector bool short)vec_and(overflow, vec_xor(less_zero, vec_splat_s16(-1))); \ | ||
262 : | overflow = (vector bool short)vec_and(overflow, vec_splat_s16(1)); \ | ||
263 : | acLevel = vec_sub(acLevel, (vector signed short)overflow); \ | ||
264 : | \ | ||
265 : | acLevel = vec_xor(acLevel, less_zero); \ | ||
266 : | acLevel = vec_add(acLevel, vec_and(less_zero, vec_splat_s16(1))); \ | ||
267 : | \ | ||
268 : | vec_st(acLevel, 0, data_ptr); \ | ||
269 : | \ | ||
270 : | data_ptr += 8; \ | ||
271 : | coeff_ptr += 8 | ||
272 : | |||
273 : | /* This function assumes: | ||
274 : | * data is 16 byte aligned | ||
275 : | * coeff is unaligned | ||
276 : | */ | ||
277 : | |||
278 : | uint32_t | ||
279 : | dequant_h263_intra_altivec_c(int16_t *data, | ||
280 : | const int16_t *coeff, | ||
281 : | const uint32_t quant, | ||
282 : | const uint32_t dcscalar, | ||
283 : | const uint16_t *mpeg_quant_matrices) | ||
284 : | { | ||
285 : | vector signed short acLevel; | ||
286 : | vector signed short vec_2048; | ||
287 : | vector unsigned short quant_add; | ||
288 : | vector unsigned short quant_m_2; | ||
289 : | vector unsigned short t; | ||
290 : | |||
291 : | vector bool short equal_zero; | ||
292 : | vector bool short less_zero; | ||
293 : | vector bool short overflow; | ||
294 : | |||
295 : | register vector unsigned int even; | ||
296 : | register vector unsigned int odd; | ||
297 : | register vector unsigned int high; | ||
298 : | register vector unsigned int low; | ||
299 : | |||
300 : | register vector unsigned char zerovec; | ||
301 : | |||
302 : | register int16_t *data_ptr; | ||
303 : | register int16_t *coeff_ptr; | ||
304 : | |||
305 : | #ifdef DEBUG | ||
306 : | if(((unsigned)data) & 0x15) | ||
307 : | edgomez | 1570 | fprintf(stderr, "dequant_h263_intra_altivec_c:incorrect align, data: %lx\n", (long)data); |
308 : | edgomez | 1413 | #endif |
309 : | |||
310 : | /* initialize */ | ||
311 : | *((unsigned short*)&quant_add) = (unsigned short)(quant & 1 ? quant : quant - 1); | ||
312 : | quant_add = vec_splat(quant_add,0); | ||
313 : | |||
314 : | *((unsigned short*)&quant_m_2) = (unsigned short)(quant << 1); | ||
315 : | quant_m_2 = vec_splat(quant_m_2,0); | ||
316 : | |||
317 : | vec_2048 = vec_sl(vec_splat_s16(1), vec_splat_u16(11)); | ||
318 : | zerovec = vec_splat_u8(0); | ||
319 : | |||
320 : | data_ptr = (int16_t*)data; | ||
321 : | coeff_ptr = (int16_t*)coeff; | ||
322 : | |||
323 : | /* dequant */ | ||
324 : | DEQUANT_H263_INTRA_ALTIVEC(); | ||
325 : | DEQUANT_H263_INTRA_ALTIVEC(); | ||
326 : | DEQUANT_H263_INTRA_ALTIVEC(); | ||
327 : | DEQUANT_H263_INTRA_ALTIVEC(); | ||
328 : | |||
329 : | DEQUANT_H263_INTRA_ALTIVEC(); | ||
330 : | DEQUANT_H263_INTRA_ALTIVEC(); | ||
331 : | DEQUANT_H263_INTRA_ALTIVEC(); | ||
332 : | DEQUANT_H263_INTRA_ALTIVEC(); | ||
333 : | |||
334 : | /* data[0] is special */ | ||
335 : | data[0] = coeff[0] * dcscalar; | ||
336 : | if(data[0] < -2048) | ||
337 : | data[0] = -2048; | ||
338 : | else if(data[0] > 2047) | ||
339 : | data[0] = 2047; | ||
340 : | |||
341 : | return 0; | ||
342 : | } | ||
343 : | |||
344 : | |||
345 : | /* dequantize inter-block & clamp to [-2048,2047] | ||
346 : | */ | ||
347 : | |||
348 : | #define DEQUANT_H263_INTER_ALTIVEC() \ | ||
349 : | acLevel = vec_perm(vec_ld(0,coeff), vec_ld(16,coeff), vec_lvsl(0,coeff)); \ | ||
350 : | equal_zero = vec_cmpeq(acLevel, (vector signed short)zerovec); \ | ||
351 : | less_zero = vec_cmplt(acLevel, (vector signed short)zerovec); \ | ||
352 : | acLevel = vec_abs(acLevel); \ | ||
353 : | \ | ||
354 : | even = vec_mule((vector unsigned short)acLevel, quant_m_2); \ | ||
355 : | odd = vec_mulo((vector unsigned short)acLevel, quant_m_2); \ | ||
356 : | high = vec_mergeh(even,odd); \ | ||
357 : | low = vec_mergel(even,odd); \ | ||
358 : | \ | ||
359 : | t = vec_sel(quant_add, (vector unsigned short)zerovec, equal_zero); \ | ||
360 : | high = vec_add(high, (vector unsigned int)vec_mergeh((vector unsigned short)zerovec, t)); \ | ||
361 : | low = vec_add(low, (vector unsigned int)vec_mergel((vector unsigned short)zerovec, t)); \ | ||
362 : | acLevel = vec_packs((vector signed int)high, (vector signed int)low); \ | ||
363 : | \ | ||
364 : | overflow = vec_cmpgt(acLevel,vec_2048); \ | ||
365 : | acLevel = vec_sel(acLevel, vec_2048, overflow); \ | ||
366 : | overflow = (vector bool short)vec_and(overflow, vec_xor(less_zero, vec_splat_s16(-1))); \ | ||
367 : | overflow = (vector bool short)vec_and(overflow, vec_splat_s16(1)); \ | ||
368 : | acLevel = vec_sub(acLevel, (vector signed short)overflow); \ | ||
369 : | \ | ||
370 : | acLevel = vec_xor(acLevel, less_zero); \ | ||
371 : | acLevel = vec_add(acLevel, vec_and(less_zero, vec_splat_s16(1))); \ | ||
372 : | \ | ||
373 : | vec_st(acLevel, 0, data); \ | ||
374 : | data += 8; \ | ||
375 : | coeff += 8 | ||
376 : | |||
377 : | |||
378 : | /* This function assumes: | ||
379 : | * data is 16 byte aligned | ||
380 : | * coeff is unaligned | ||
381 : | */ | ||
382 : | |||
383 : | uint32_t | ||
384 : | dequant_h263_inter_altivec_c(int16_t *data, | ||
385 : | int16_t *coeff, | ||
386 : | const uint32_t quant, | ||
387 : | const uint16_t *mpeg_quant_matrices) | ||
388 : | { | ||
389 : | vector signed short acLevel; | ||
390 : | vector signed short vec_2048; | ||
391 : | |||
392 : | vector unsigned short quant_m_2; | ||
393 : | vector unsigned short quant_add; | ||
394 : | vector unsigned short t; | ||
395 : | |||
396 : | register vector unsigned int even; | ||
397 : | register vector unsigned int odd; | ||
398 : | register vector unsigned int high; | ||
399 : | register vector unsigned int low; | ||
400 : | |||
401 : | register vector unsigned char zerovec; | ||
402 : | |||
403 : | vector bool short equal_zero; | ||
404 : | vector bool short less_zero; | ||
405 : | vector bool short overflow; | ||
406 : | |||
407 : | #ifdef DEBUG | ||
408 : | /* print alignment errors if this is on */ | ||
409 : | if(((unsigned)data) & 0x15) | ||
410 : | edgomez | 1570 | fprintf(stderr, "dequant_h263_inter_altivec_c:incorrect align, data: %lx\n", (long)data); |
411 : | edgomez | 1413 | #endif |
412 : | |||
413 : | /* initialize */ | ||
414 : | *((unsigned short*)&quant_m_2) = (unsigned short)(quant << 1); | ||
415 : | quant_m_2 = vec_splat(quant_m_2,0); | ||
416 : | |||
417 : | *((unsigned short*)&quant_add) = (unsigned short)(quant & 1 ? quant : quant - 1); | ||
418 : | quant_add = vec_splat(quant_add,0); | ||
419 : | |||
420 : | vec_2048 = vec_sl(vec_splat_s16(1), vec_splat_u16(11)); | ||
421 : | zerovec = vec_splat_u8(0); | ||
422 : | |||
423 : | /* dequant */ | ||
424 : | DEQUANT_H263_INTER_ALTIVEC(); | ||
425 : | DEQUANT_H263_INTER_ALTIVEC(); | ||
426 : | DEQUANT_H263_INTER_ALTIVEC(); | ||
427 : | DEQUANT_H263_INTER_ALTIVEC(); | ||
428 : | |||
429 : | DEQUANT_H263_INTER_ALTIVEC(); | ||
430 : | DEQUANT_H263_INTER_ALTIVEC(); | ||
431 : | DEQUANT_H263_INTER_ALTIVEC(); | ||
432 : | DEQUANT_H263_INTER_ALTIVEC(); | ||
433 : | |||
434 : | return 0; | ||
435 : | } |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |