[svn] / trunk / xvidcore / src / utils / ppc_asm / mem_transfer_altivec.c Repository:
ViewVC logotype

Diff of /trunk/xvidcore/src/utils/ppc_asm/mem_transfer_altivec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1569, Thu Dec 9 04:58:12 2004 UTC revision 1570, Thu Dec 9 23:02:54 2004 UTC
# Line 19  Line 19 
19   *  along with this program ; if not, write to the Free Software   *  along with this program ; if not, write to the Free Software
20   *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA   *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
21   *   *
22   * $Id: mem_transfer_altivec.c,v 1.1 2004-04-05 20:36:37 edgomez Exp $   * $Id: mem_transfer_altivec.c,v 1.2 2004-12-09 23:02:54 edgomez Exp $
23   *   *
24   ****************************************************************************/   ****************************************************************************/
25    
# Line 35  Line 35 
35    
36  #include <stdio.h>  #include <stdio.h>
37    
38  /*  /* This function assumes:
39   * This Function assumes dst is 16 byte aligned src is unaligned and stride is   *      dst: 16 byte aligned
  * a multiple of 16.  
40   */   */
41    
42  #define COPY8TO16() \  #define COPY8TO16() \
43  s = vec_perm(vec_ld(0, src), vec_ld(16, src), perm); /* load the next 8 bytes */ \  s = vec_perm(vec_ld(0,src),vec_ld(16,src),vec_lvsl(0,src));\
44  *dst++ = (vector signed short)vec_mergeh(zerovec, s); /* convert and save */ \  vec_st((vector signed short)vec_mergeh(zerovec,s),0,dst);\
45  src += stride  src += stride;\
46    dst += 8
47    
48  void  void
49  transfer_8to16copy_altivec_c(vector signed short *dst,  transfer_8to16copy_altivec_c(int16_t *dst,
50                              uint8_t * src,                              uint8_t * src,
51                              uint32_t stride)                              uint32_t stride)
52  {  {
     register vector unsigned char perm;  
53      register vector unsigned char s;      register vector unsigned char s;
54      register vector unsigned char zerovec;      register vector unsigned char zerovec;
55    
56  #ifdef DEBUG  #ifdef DEBUG
57      /* check the alignment */          /* Check the alignment */
58      if(((unsigned long)dst) & 0xf)          if((long)dst & 0xf)
59          fprintf(stderr, "transfer_8to16copy_altivec:incorrect align, dst: %x\n", dst);                  fprintf(stderr, "transfer_8to16copy_altivec_c:incorrect align, dst: %lx\n", (long)dst);
     if(stride & 0xf)  
         fprintf(stderr, "transfer_8to16copy_altivec:incorrect align, stride: %u\n", stride);  
60  #endif  #endif
61    
62      /* initialisation */          /* initialization */
     perm = vec_lvsl(0, src);  
63      zerovec = vec_splat_u8(0);      zerovec = vec_splat_u8(0);
64    
     /* to the actual copy */  
65      COPY8TO16();      COPY8TO16();
66      COPY8TO16();      COPY8TO16();
67      COPY8TO16();      COPY8TO16();
# Line 107  Line 102 
102  #ifdef DEBUG  #ifdef DEBUG
103      /* if this is on, print alignment errors */      /* if this is on, print alignment errors */
104      if(((unsigned long) dst) & 0x7)      if(((unsigned long) dst) & 0x7)
105          fprintf(stderr, "transfer_16to8copy_altivec:incorrect align, dst %x\n", dst);          fprintf(stderr, "transfer_16to8copy_altivec:incorrect align, dst %lx\n", (long)dst);
106      if(stride & 0x7)      if(stride & 0x7)
107          fprintf(stderr, "transfer_16to8copy_altivec:incorrect align, stride %u\n", stride);          fprintf(stderr, "transfer_16to8copy_altivec:incorrect align, stride %u\n", stride);
108  #endif  #endif
# Line 135  Line 130 
130    
131  #define COPY8TO8() \  #define COPY8TO8() \
132  tmp = vec_perm(vec_ld(0, src), vec_ld(16, src), vec_lvsl(0, src)); \  tmp = vec_perm(vec_ld(0, src), vec_ld(16, src), vec_lvsl(0, src)); \
133  tmp = vec_sel(vec_perm(tmp, tmp, vec_lvsl(0, dst)), vec_ld(0, dst), vec_perm(mask, mask, vec_lvsl(0, dst))); \  t0 = vec_perm(tmp, tmp, vec_lvsl(0, dst));\
134    t1 = vec_perm(mask, mask, vec_lvsl(0, dst));\
135    tmp = vec_sel(t0, vec_ld(0, dst), t1);\
136  vec_st(tmp, 0, dst); \  vec_st(tmp, 0, dst); \
137  dst += stride; \  dst += stride; \
138  src += stride  src += stride
# Line 147  Line 144 
144  {  {
145      register vector unsigned char tmp;      register vector unsigned char tmp;
146      register vector unsigned char mask;      register vector unsigned char mask;
147            register vector unsigned char t0, t1;
148    
149  #ifdef DEBUG  #ifdef DEBUG
150      if(((unsigned long)dst) & 0x7)      if(((unsigned long)dst) & 0x7)
151          fprintf("transfer8x8_copy_altivec:incorrect align, dst: %x\n", dst);          fprintf(stderr, "transfer8x8_copy_altivec:incorrect align, dst: %lx\n", (long)dst);
152      if(stride & 0x7)      if(stride & 0x7)
153          fprintf("transfer8x8_copy_altivec:incorrect stride, stride: %u\n", stride);          fprintf(stderr, "transfer8x8_copy_altivec:incorrect stride, stride: %u\n", stride);
154  #endif  #endif
155      mask = vec_pack(vec_splat_u16(0), vec_splat_u16(-1));      mask = vec_pack(vec_splat_u16(0), vec_splat_u16(-1));
156    
# Line 168  Line 166 
166  }  }
167    
168    
 /*  
  * This function assumes dct is 16 bytes aligned, cur and ref are 8 bytes  
  * aligned and stride is a multiple of 8  
  */  
   
169  #define SUB8TO16() \  #define SUB8TO16() \
170  c = vec_perm(vec_ld(0, cur), vec_ld(16, cur), vec_lvsl(0, cur)); \  c = vec_perm(vec_ld(0, cur), vec_ld(16, cur), vec_lvsl(0, cur)); \
171  r = vec_perm(vec_ld(0, ref), vec_ld(16, ref), vec_lvsl(0, ref)); \  r = vec_perm(vec_ld(0, ref), vec_ld(16, ref), vec_lvsl(0, ref)); \
172  t = vec_sel(vec_perm(r, r, vec_lvsl(0, cur)), vec_ld(0, cur), vec_perm(mask, mask, vec_lvsl(0, cur))); \          cs = (vector signed short)vec_mergeh(ox00,c);\
173  vec_st(t, 0, cur); \          rs = (vector signed short)vec_mergeh(ox00,r);\
174  t = vec_splat_u8(0); \          \
175  cs = (vector signed short)vec_mergeh(t, c); \          c = vec_lvsr(0,cur);\
176  rs = (vector signed short)vec_mergeh(t, r); \          mask = vec_perm(mask_00ff, mask_00ff, c);\
177  *dct++ = vec_sub(cs, rs); \          r = vec_perm(r, r, c);\
178            r = vec_sel(r, vec_ld(0,cur), mask);\
179            vec_st(r,0,cur);\
180            vec_st( vec_sub(cs,rs), 0, dct );\
181            \
182            dct += 8;\
183  cur += stride; \  cur += stride; \
184  ref += stride  ref += stride
185    
186    
187    /* This function assumes:
188     *      dct: 16 Byte aligned
189     *      cur:  8 Byte aligned
190     *      stride: multiple of 8
191     */
192    
193  void  void
194  transfer_8to16sub_altivec_c(vector signed short *dct,  transfer_8to16sub_altivec_c(int16_t * dct,
195                      uint8_t *cur,                      uint8_t *cur,
196                      uint8_t *ref,                      uint8_t *ref,
197                      uint32_t stride)                                                          const uint32_t stride)
198  {  {
199      vector unsigned char c;          register vector unsigned char c,r;
200      vector unsigned char r;          register vector unsigned char ox00;
201      vector unsigned char t;          register vector unsigned char mask_00ff;
202      vector unsigned char mask;          register vector unsigned char mask;
203      vector signed short cs;          register vector signed short cs,rs;
     vector signed short rs;  
204    
205  #ifdef DEBUG  #ifdef DEBUG
206      if(((unsigned long)dct) & 0xf)          if((long)dct & 0xf)
207          fprintf(stderr, "transfer_8to16sub_altivec:incorrect align, dct: %x\n", dct);                  fprintf(stderr, "transfer_8to16sub_altivec_c:incorrect align, dct: %lx\n", (long)dct);
208      if(((unsigned long)cur) & 0x7)          if((long)cur & 0x7)
209          fprintf(stderr, "transfer_8to16sub_altivec:incorrect align, cur: %x\n", cur);                  fprintf(stderr, "transfer_8to16sub_altivec_c:incorrect align, cur: %lx\n", (long)cur);
     if(((unsigned long)ref) & 0x7)  
         fprintf(stderr, "transfer_8to16sub_altivec:incorrect align, ref: %x\n", ref);  
210      if(stride & 0x7)      if(stride & 0x7)
211          fprintf(stderr, "transfer_8to16sub_altivec:incorrect align, stride: %u\n", stride);                  fprintf(stderr, "transfer_8to16sub_altivec_c:incorrect stride, stride: %lu\n", (long)stride);
212  #endif  #endif
213            /* initialize */
214      /* Initialisation */          ox00 = vec_splat_u8(0);
215      mask = vec_pack(vec_splat_u16(0), vec_splat_u16(-1));          mask_00ff = vec_pack((vector unsigned short)ox00,vec_splat_u16(-1));
216    
217      SUB8TO16();      SUB8TO16();
218      SUB8TO16();      SUB8TO16();
# Line 223  Line 225 
225      SUB8TO16();      SUB8TO16();
226  }  }
227    
 /*  
   * This function assumes that dct is 16 bytes aligned, cur and ref is 8 bytes aligned  
   * and stride is a multiple of 8  
 */  
228    
229  #define SUBRO8TO16() \  #define SUBRO8TO16() \
230  c = vec_perm(vec_ld(0, cur), vec_ld(16, cur), vec_lvsl(0, cur)); \  c = vec_perm(vec_ld(0, cur), vec_ld(16, cur), vec_lvsl(0, cur)); \
231  r = vec_perm(vec_ld(0, ref), vec_ld(16, ref), vec_lvsl(0, ref)); \  r = vec_perm(vec_ld(0, ref), vec_ld(16, ref), vec_lvsl(0, ref)); \
232  cs = (vector signed short)vec_mergeh(z, c); \  cs = (vector signed short)vec_mergeh(z, c); \
233  rs = (vector signed short)vec_mergeh(z, r); \  rs = (vector signed short)vec_mergeh(z, r); \
234  *dct++ = vec_sub(cs, rs); \          vec_st( vec_sub(cs,rs), 0, dct );\
235            dct += 8;\
236  cur += stride; \  cur += stride; \
237  ref += stride  ref += stride
238    
239    
240    /* This function assumes:
241     *      dct: 16 Byte aligned
242     */
243    
244  void  void
245  transfer_8to16subro_altivec_c(vector signed short *dct,  transfer_8to16subro_altivec_c(int16_t * dct,
246                        uint8_t *cur,                                          const uint8_t * cur,
247                        uint8_t *ref,                                          const uint8_t * ref,
248                        uint32_t stride)                                          const uint32_t stride)
249  {  {
250      register vector unsigned char c;      register vector unsigned char c;
251      register vector unsigned char r;      register vector unsigned char r;
# Line 250  Line 254 
254      register vector signed short rs;      register vector signed short rs;
255    
256  #ifdef DEBUG  #ifdef DEBUG
257      /* if this is on, print alignment errors */          /* Check the alignment assumptions if this is on */
258      if(((unsigned long)dct) & 0xf)          if((long)dct & 0xf)
259          fprintf(stderr, "transfer_8to16subro_altivec_c:incorrect align, dct: %x\n", dct);                  fprintf(stderr, "transfer_8to16subro_altivec_c:incorrect align, dct: %lx\n", (long)dct);
     if(((unsigned long)cur) & 0x7)  
         fprintf(stderr, "transfer_8to16subro_altivec_c:incorrect align, cur: %x\n", cur);  
     if(((unsigned long)ref) & 0x7)  
         fprintf(stderr, "transfer_8to16subro_altivec_c:incorrect align, ref: %x\n", ref);  
     if(stride & 0x7)  
         fprintf(stderr, "transfer_8to16subro_altivec_c:incorrect align, stride: %u\n", stride);  
260  #endif  #endif
261            /* initialize */
262      z = vec_splat_u8(0);      z = vec_splat_u8(0);
263    
264      SUBRO8TO16();      SUBRO8TO16();
# Line 274  Line 272 
272      SUBRO8TO16();      SUBRO8TO16();
273  }  }
274    
   
275  /*  /*
276   * This function assumes:   * This function assumes:
277   *  dct: 16 bytes alignment   *  dct: 16 bytes alignment
# Line 289  Line 286 
286  r2 = vec_perm(vec_ld(0, ref2), vec_ld(16, ref2), vec_lvsl(0, ref2)); \  r2 = vec_perm(vec_ld(0, ref2), vec_ld(16, ref2), vec_lvsl(0, ref2)); \
287  c = vec_perm(vec_ld(0, cur), vec_ld(16, cur), vec_lvsl(0, cur)); \  c = vec_perm(vec_ld(0, cur), vec_ld(16, cur), vec_lvsl(0, cur)); \
288  r = vec_avg(r1, r2); \  r = vec_avg(r1, r2); \
 vec_st(vec_sel(r, vec_ld(0, cur), vec_perm(mask, mask, vec_lvsl(0, cur))), 0, cur); \  
289  cs = (vector signed short)vec_mergeh(vec_splat_u8(0), c); \  cs = (vector signed short)vec_mergeh(vec_splat_u8(0), c); \
290  rs = (vector signed short)vec_mergeh(vec_splat_u8(0), r); \  rs = (vector signed short)vec_mergeh(vec_splat_u8(0), r); \
291    c = vec_perm(mask, mask, vec_lvsl(0, cur));\
292    r = vec_sel(r, vec_ld(0, cur), c);\
293    vec_st(r, 0, cur); \
294  *dct++ = vec_sub(cs, rs); \  *dct++ = vec_sub(cs, rs); \
295  cur += stride; \  cur += stride; \
296  ref1 += stride; \  ref1 += stride; \
# Line 315  Line 314 
314  #ifdef DEBUG  #ifdef DEBUG
315      /* Dump alignment erros if DEBUG is set */      /* Dump alignment erros if DEBUG is set */
316      if(((unsigned long)dct) & 0xf)      if(((unsigned long)dct) & 0xf)
317          fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, dct: %x\n", dct);          fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, dct: %lx\n", (long)dct);
318      if(((unsigned long)cur) & 0x7)      if(((unsigned long)cur) & 0x7)
319          fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, cur: %x\n", cur);          fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, cur: %lx\n", (long)cur);
320      if(stride & 0x7)      if(stride & 0x7)
321          fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, dct: %u\n", stride);          fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, dct: %u\n", stride);
322  #endif  #endif
# Line 373  Line 372 
372  #ifdef DEBUG  #ifdef DEBUG
373      /* if this is set, dump alignment errors */      /* if this is set, dump alignment errors */
374      if(((unsigned long)dst) & 0x7)      if(((unsigned long)dst) & 0x7)
375          fprintf(stderr, "transfer_16to8add_altivec_c:incorrect align, dst: %x\n", dst);          fprintf(stderr, "transfer_16to8add_altivec_c:incorrect align, dst: %lx\n", (long)dst);
376      if(stride & 0x7)      if(stride & 0x7)
377          fprintf(stderr, "transfer_16to8add_altivec_c:incorrect align, dst: %u\n", stride);          fprintf(stderr, "transfer_16to8add_altivec_c:incorrect align, dst: %u\n", stride);
378  #endif  #endif

Legend:
Removed from v.1569  
changed lines
  Added in v.1570

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4