Annotation of /trunk/xvidcore/src/utils/ppc_asm/mem_transfer_altivec.c

Revision 1413 - (view) (download)

1 :	edgomez	1413	/*****************************************************************************
2 :			*
3 :			* XVID MPEG-4 VIDEO CODEC
4 :			* - Altivec 8bit<->16bit transfer -
5 :			*
6 :			* Copyright(C) 2004 Christoph Naegeli <chn@kbw.ch>
7 :			*
8 :			* This program is free software ; you can redistribute it and/or modify
9 :			* it under the terms of the GNU General Public License as published by
10 :			* the Free Software Foundation ; either version 2 of the License, or
11 :			* (at your option) any later version.
12 :			*
13 :			* This program is distributed in the hope that it will be useful,
14 :			* but WITHOUT ANY WARRANTY ; without even the implied warranty of
15 :			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 :			* GNU General Public License for more details.
17 :			*
18 :			* You should have received a copy of the GNU General Public License
19 :			* along with this program ; if not, write to the Free Software
20 :			* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 :			*
22 :			* $Id: mem_transfer_altivec.c,v 1.1 2004-04-05 20:36:37 edgomez Exp $
23 :			*
24 :			****************************************************************************/
25 :
26 :			#ifdef HAVE_ALTIVEC_H
27 :			#include <altivec.h>
28 :			#endif
29 :
30 :			#include "../../portab.h"
31 :
32 :
33 :			/* Turn this on if you like debugging the alignment */
34 :			#undef DEBUG
35 :
36 :			#include <stdio.h>
37 :
38 :			/*
39 :			* This Function assumes dst is 16 byte aligned src is unaligned and stride is
40 :			* a multiple of 16.
41 :			*/
42 :
43 :			#define COPY8TO16() \
44 :			s = vec_perm(vec_ld(0, src), vec_ld(16, src), perm); /* load the next 8 bytes */ \
45 :			dst++ = (vector signed short)vec_mergeh(zerovec, s); / convert and save */ \
46 :			src += stride
47 :
48 :			void
49 :			transfer_8to16copy_altivec_c(vector signed short *dst,
50 :			uint8_t * src,
51 :			uint32_t stride)
52 :			{
53 :			register vector unsigned char perm;
54 :			register vector unsigned char s;
55 :			register vector unsigned char zerovec;
56 :
57 :			#ifdef DEBUG
58 :			/* check the alignment */
59 :			if(((unsigned long)dst) & 0xf)
60 :			fprintf(stderr, "transfer_8to16copy_altivec:incorrect align, dst: %x\n", dst);
61 :			if(stride & 0xf)
62 :			fprintf(stderr, "transfer_8to16copy_altivec:incorrect align, stride: %u\n", stride);
63 :			#endif
64 :
65 :			/* initialisation */
66 :			perm = vec_lvsl(0, src);
67 :			zerovec = vec_splat_u8(0);
68 :
69 :			/* to the actual copy */
70 :			COPY8TO16();
71 :			COPY8TO16();
72 :			COPY8TO16();
73 :			COPY8TO16();
74 :
75 :			COPY8TO16();
76 :			COPY8TO16();
77 :			COPY8TO16();
78 :			COPY8TO16();
79 :			}
80 :
81 :
82 :			/*
83 :			* This function assumes dst is 8 byte aligned and stride is a multiple of 8
84 :			* src may be unaligned
85 :			*/
86 :
87 :			#define COPY16TO8() \
88 :			s = vec_perm(src[0], src[1], load_src_perm); \
89 :			packed = vec_packsu(s, vec_splat_s16(0)); \
90 :			mask = vec_perm(mask_stencil, mask_stencil, vec_lvsl(0, dst)); \
91 :			packed = vec_perm(packed, packed, vec_lvsl(0, dst)); \
92 :			packed = vec_sel(packed, vec_ld(0, dst), mask); \
93 :			vec_st(packed, 0, dst); \
94 :			src++; \
95 :			dst += stride
96 :
97 :			void transfer_16to8copy_altivec_c(uint8_t *dst,
98 :			vector signed short *src,
99 :			uint32_t stride)
100 :			{
101 :			register vector signed short s;
102 :			register vector unsigned char packed;
103 :			register vector unsigned char mask_stencil;
104 :			register vector unsigned char mask;
105 :			register vector unsigned char load_src_perm;
106 :
107 :			#ifdef DEBUG
108 :			/* if this is on, print alignment errors */
109 :			if(((unsigned long) dst) & 0x7)
110 :			fprintf(stderr, "transfer_16to8copy_altivec:incorrect align, dst %x\n", dst);
111 :			if(stride & 0x7)
112 :			fprintf(stderr, "transfer_16to8copy_altivec:incorrect align, stride %u\n", stride);
113 :			#endif
114 :			/* Initialisation stuff */
115 :			load_src_perm = vec_lvsl(0, (unsigned char*)src);
116 :			mask_stencil = vec_pack(vec_splat_u16(0), vec_splat_u16(-1));
117 :
118 :			COPY16TO8();
119 :			COPY16TO8();
120 :			COPY16TO8();
121 :			COPY16TO8();
122 :
123 :			COPY16TO8();
124 :			COPY16TO8();
125 :			COPY16TO8();
126 :			COPY16TO8();
127 :			}
128 :
129 :
130 :
131 :			/*
132 :			* This function assumes dst is 8 byte aligned and src is unaligned. Stride has
133 :			* to be a multiple of 8
134 :			*/
135 :
136 :			#define COPY8TO8() \
137 :			tmp = vec_perm(vec_ld(0, src), vec_ld(16, src), vec_lvsl(0, src)); \
138 :			tmp = vec_sel(vec_perm(tmp, tmp, vec_lvsl(0, dst)), vec_ld(0, dst), vec_perm(mask, mask, vec_lvsl(0, dst))); \
139 :			vec_st(tmp, 0, dst); \
140 :			dst += stride; \
141 :			src += stride
142 :
143 :			void
144 :			transfer8x8_copy_altivec_c( uint8_t * dst,
145 :			uint8_t * src,
146 :			uint32_t stride)
147 :			{
148 :			register vector unsigned char tmp;
149 :			register vector unsigned char mask;
150 :
151 :			#ifdef DEBUG
152 :			if(((unsigned long)dst) & 0x7)
153 :			fprintf("transfer8x8_copy_altivec:incorrect align, dst: %x\n", dst);
154 :			if(stride & 0x7)
155 :			fprintf("transfer8x8_copy_altivec:incorrect stride, stride: %u\n", stride);
156 :			#endif
157 :			mask = vec_pack(vec_splat_u16(0), vec_splat_u16(-1));
158 :
159 :			COPY8TO8();
160 :			COPY8TO8();
161 :			COPY8TO8();
162 :			COPY8TO8();
163 :
164 :			COPY8TO8();
165 :			COPY8TO8();
166 :			COPY8TO8();
167 :			COPY8TO8();
168 :			}
169 :
170 :
171 :			/*
172 :			* This function assumes dct is 16 bytes aligned, cur and ref are 8 bytes
173 :			* aligned and stride is a multiple of 8
174 :			*/
175 :
176 :			#define SUB8TO16() \
177 :			c = vec_perm(vec_ld(0, cur), vec_ld(16, cur), vec_lvsl(0, cur)); \
178 :			r = vec_perm(vec_ld(0, ref), vec_ld(16, ref), vec_lvsl(0, ref)); \
179 :			t = vec_sel(vec_perm(r, r, vec_lvsl(0, cur)), vec_ld(0, cur), vec_perm(mask, mask, vec_lvsl(0, cur))); \
180 :			vec_st(t, 0, cur); \
181 :			t = vec_splat_u8(0); \
182 :			cs = (vector signed short)vec_mergeh(t, c); \
183 :			rs = (vector signed short)vec_mergeh(t, r); \
184 :			*dct++ = vec_sub(cs, rs); \
185 :			cur += stride; \
186 :			ref += stride
187 :
188 :			void
189 :			transfer_8to16sub_altivec_c(vector signed short *dct,
190 :			uint8_t *cur,
191 :			uint8_t *ref,
192 :			uint32_t stride)
193 :			{
194 :			vector unsigned char c;
195 :			vector unsigned char r;
196 :			vector unsigned char t;
197 :			vector unsigned char mask;
198 :			vector signed short cs;
199 :			vector signed short rs;
200 :
201 :			#ifdef DEBUG
202 :			if(((unsigned long)dct) & 0xf)
203 :			fprintf(stderr, "transfer_8to16sub_altivec:incorrect align, dct: %x\n", dct);
204 :			if(((unsigned long)cur) & 0x7)
205 :			fprintf(stderr, "transfer_8to16sub_altivec:incorrect align, cur: %x\n", cur);
206 :			if(((unsigned long)ref) & 0x7)
207 :			fprintf(stderr, "transfer_8to16sub_altivec:incorrect align, ref: %x\n", ref);
208 :			if(stride & 0x7)
209 :			fprintf(stderr, "transfer_8to16sub_altivec:incorrect align, stride: %u\n", stride);
210 :			#endif
211 :
212 :			/* Initialisation */
213 :			mask = vec_pack(vec_splat_u16(0), vec_splat_u16(-1));
214 :
215 :			SUB8TO16();
216 :			SUB8TO16();
217 :			SUB8TO16();
218 :			SUB8TO16();
219 :
220 :			SUB8TO16();
221 :			SUB8TO16();
222 :			SUB8TO16();
223 :			SUB8TO16();
224 :			}
225 :
226 :			/*
227 :			* This function assumes that dct is 16 bytes aligned, cur and ref is 8 bytes aligned
228 :			* and stride is a multiple of 8
229 :			*/
230 :
231 :			#define SUBRO8TO16() \
232 :			c = vec_perm(vec_ld(0, cur), vec_ld(16, cur), vec_lvsl(0, cur)); \
233 :			r = vec_perm(vec_ld(0, ref), vec_ld(16, ref), vec_lvsl(0, ref)); \
234 :			cs = (vector signed short)vec_mergeh(z, c); \
235 :			rs = (vector signed short)vec_mergeh(z, r); \
236 :			*dct++ = vec_sub(cs, rs); \
237 :			cur += stride; \
238 :			ref += stride
239 :
240 :			void
241 :			transfer_8to16subro_altivec_c(vector signed short *dct,
242 :			uint8_t *cur,
243 :			uint8_t *ref,
244 :			uint32_t stride)
245 :			{
246 :			register vector unsigned char c;
247 :			register vector unsigned char r;
248 :			register vector unsigned char z;
249 :			register vector signed short cs;
250 :			register vector signed short rs;
251 :
252 :			#ifdef DEBUG
253 :			/* if this is on, print alignment errors */
254 :			if(((unsigned long)dct) & 0xf)
255 :			fprintf(stderr, "transfer_8to16subro_altivec_c:incorrect align, dct: %x\n", dct);
256 :			if(((unsigned long)cur) & 0x7)
257 :			fprintf(stderr, "transfer_8to16subro_altivec_c:incorrect align, cur: %x\n", cur);
258 :			if(((unsigned long)ref) & 0x7)
259 :			fprintf(stderr, "transfer_8to16subro_altivec_c:incorrect align, ref: %x\n", ref);
260 :			if(stride & 0x7)
261 :			fprintf(stderr, "transfer_8to16subro_altivec_c:incorrect align, stride: %u\n", stride);
262 :			#endif
263 :
264 :			z = vec_splat_u8(0);
265 :
266 :			SUBRO8TO16();
267 :			SUBRO8TO16();
268 :			SUBRO8TO16();
269 :			SUBRO8TO16();
270 :
271 :			SUBRO8TO16();
272 :			SUBRO8TO16();
273 :			SUBRO8TO16();
274 :			SUBRO8TO16();
275 :			}
276 :
277 :
278 :			/*
279 :			* This function assumes:
280 :			* dct: 16 bytes alignment
281 :			* cur: 8 bytes alignment
282 :			* ref1: unaligned
283 :			* ref2: unaligned
284 :			* stride: multiple of 8
285 :			*/
286 :
287 :			#define SUB28TO16() \
288 :			r1 = vec_perm(vec_ld(0, ref1), vec_ld(16, ref1), vec_lvsl(0, ref1)); \
289 :			r2 = vec_perm(vec_ld(0, ref2), vec_ld(16, ref2), vec_lvsl(0, ref2)); \
290 :			c = vec_perm(vec_ld(0, cur), vec_ld(16, cur), vec_lvsl(0, cur)); \
291 :			r = vec_avg(r1, r2); \
292 :			vec_st(vec_sel(r, vec_ld(0, cur), vec_perm(mask, mask, vec_lvsl(0, cur))), 0, cur); \
293 :			cs = (vector signed short)vec_mergeh(vec_splat_u8(0), c); \
294 :			rs = (vector signed short)vec_mergeh(vec_splat_u8(0), r); \
295 :			*dct++ = vec_sub(cs, rs); \
296 :			cur += stride; \
297 :			ref1 += stride; \
298 :			ref2 += stride
299 :
300 :			void
301 :			transfer_8to16sub2_altivec_c(vector signed short *dct,
302 :			uint8_t *cur,
303 :			uint8_t *ref1,
304 :			uint8_t *ref2,
305 :			const uint32_t stride)
306 :			{
307 :			vector unsigned char r1;
308 :			vector unsigned char r2;
309 :			vector unsigned char r;
310 :			vector unsigned char c;
311 :			vector unsigned char mask;
312 :			vector signed short cs;
313 :			vector signed short rs;
314 :
315 :			#ifdef DEBUG
316 :			/* Dump alignment erros if DEBUG is set */
317 :			if(((unsigned long)dct) & 0xf)
318 :			fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, dct: %x\n", dct);
319 :			if(((unsigned long)cur) & 0x7)
320 :			fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, cur: %x\n", cur);
321 :			if(stride & 0x7)
322 :			fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, dct: %u\n", stride);
323 :			#endif
324 :
325 :			/* Initialisation */
326 :			mask = vec_pack(vec_splat_u16(0), vec_splat_u16(-1));
327 :
328 :			SUB28TO16();
329 :			SUB28TO16();
330 :			SUB28TO16();
331 :			SUB28TO16();
332 :
333 :			SUB28TO16();
334 :			SUB28TO16();
335 :			SUB28TO16();
336 :			SUB28TO16();
337 :			}
338 :
339 :
340 :
341 :			/*
342 :			* This function assumes:
343 :			* dst: 8 byte aligned
344 :			* src: unaligned
345 :			* stride: multiple of 8
346 :			*/
347 :
348 :			#define ADD16TO8() \
349 :			s = vec_perm(vec_ld(0, src), vec_ld(16, src), vec_lvsl(0, src)); \
350 :			d = vec_perm(vec_ld(0, dst), vec_ld(16, dst), vec_lvsl(0, dst)); \
351 :			ds = (vector signed short)vec_mergeh(vec_splat_u8(0), d); \
352 :			ds = vec_add(ds, s); \
353 :			packed = vec_packsu(ds, vec_splat_s16(0)); \
354 :			mask = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); \
355 :			mask = vec_perm(mask, mask, vec_lvsl(0, dst)); \
356 :			packed = vec_perm(packed, packed, vec_lvsl(0, dst)); \
357 :			packed = vec_sel(packed, vec_ld(0, dst), mask); \
358 :			vec_st(packed, 0, dst); \
359 :			src += 8; \
360 :			dst += stride
361 :
362 :			void
363 :			transfer_16to8add_altivec_c(uint8_t *dst,
364 :			int16_t *src,
365 :			uint32_t stride)
366 :			{
367 :			vector signed short s;
368 :			vector signed short ds;
369 :			vector unsigned char d;
370 :			vector unsigned char packed;
371 :			vector unsigned char mask;
372 :
373 :			#ifdef DEBUG
374 :			/* if this is set, dump alignment errors */
375 :			if(((unsigned long)dst) & 0x7)
376 :			fprintf(stderr, "transfer_16to8add_altivec_c:incorrect align, dst: %x\n", dst);
377 :			if(stride & 0x7)
378 :			fprintf(stderr, "transfer_16to8add_altivec_c:incorrect align, dst: %u\n", stride);
379 :			#endif
380 :
381 :			ADD16TO8();
382 :			ADD16TO8();
383 :			ADD16TO8();
384 :			ADD16TO8();
385 :
386 :			ADD16TO8();
387 :			ADD16TO8();
388 :			ADD16TO8();
389 :			ADD16TO8();
390 :			}

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4