[svn] / trunk / xvidcore / src / motion / ppc_asm / sad_altivec.c Repository:
ViewVC logotype

Diff of /trunk/xvidcore/src/motion/ppc_asm/sad_altivec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1423, Mon Apr 12 14:05:08 2004 UTC revision 1570, Thu Dec 9 23:02:54 2004 UTC
# Line 17  Line 17 
17      Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA      Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18    
19    
20      $Id: sad_altivec.c,v 1.10 2004-04-12 14:05:08 edgomez Exp $      $Id: sad_altivec.c,v 1.11 2004-12-09 23:02:54 edgomez Exp $
21  */  */
22    
23  #ifdef HAVE_ALTIVEC_H  #ifdef HAVE_ALTIVEC_H
# Line 46  Line 46 
46  /*  /*
47   * This function assumes cur and stride are 16 bytes aligned and ref is unaligned   * This function assumes cur and stride are 16 bytes aligned and ref is unaligned
48   */   */
49  unsigned long  
50  sad16_altivec_c(const vector unsigned char *cur,  uint32_t
51                            const vector unsigned char *ref,  sad16_altivec_c(vector unsigned char *cur,
52                            unsigned long stride,                            vector unsigned char *ref,
53                            const unsigned long best_sad)                            uint32_t stride,
54                              const uint32_t best_sad)
55  {  {
56          vector unsigned char perm;          vector unsigned char perm;
57          vector unsigned char t1, t2;          vector unsigned char t1, t2;
58          vector unsigned int sad;          vector unsigned int sad;
59          vector unsigned int sumdiffs;          vector unsigned int sumdiffs;
60          vector unsigned int best_vec;          vector unsigned int best_vec;
61          unsigned long result;          uint32_t result;
62    
63    
64  #ifdef DEBUG  #ifdef DEBUG
65          /* print alignment errors if DEBUG is on */          /* print alignment errors if DEBUG is on */
66          if (((unsigned long) cur) & 0xf)          if (((unsigned long) cur) & 0xf)
67                  fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur);                  fprintf(stderr, "sad16_altivec:incorrect align, cur: %lx\n", (long)cur);
68          if (stride & 0xf)          if (stride & 0xf)
69                  fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride);                  fprintf(stderr, "sad16_altivec:incorrect align, stride: %lu\n", stride);
70  #endif  #endif
71          /* initialization */          /* initialization */
72          sad = vec_splat_u32(0);          sad = vec_splat_u32(0);
73          sumdiffs = sad;          sumdiffs = sad;
74          stride >>= 4;          stride >>= 4;
75          perm = vec_lvsl(0, (unsigned char *) ref);          perm = vec_lvsl(0, (unsigned char *) ref);
76          *((unsigned long *) &best_vec) = best_sad;          *((uint32_t*)&best_vec) = best_sad;
77          best_vec = vec_splat(best_vec, 0);          best_vec = vec_splat(best_vec, 0);
78    
79          /* perform sum of differences between current and previous */          /* perform sum of differences between current and previous */
# Line 99  Line 100 
100    bail:    bail:
101          /* copy vector sum into unaligned result */          /* copy vector sum into unaligned result */
102          sumdiffs = vec_splat(sumdiffs, 3);          sumdiffs = vec_splat(sumdiffs, 3);
103          vec_ste(sumdiffs, 0, (unsigned long *) &result);          vec_ste(sumdiffs, 0, (uint32_t*) &result);
104          return result;          return result;
105  }  }
106    
107    
108  #define SAD8() \  #define SAD8() \
109  t1  = vec_perm(cur[0], cur[stride], perm_cur);  /* align current vector  */ \          c = vec_perm(vec_ld(0,cur),vec_ld(16,cur),vec_lvsl(0,cur));\
110  t2  = vec_perm(ref[0], ref[1], perm_ref1);  /* align current vector  */ \          r = vec_perm(vec_ld(0,ref),vec_ld(16,ref),vec_lvsl(0,ref));\
111  tp  = vec_perm(ref[stride], ref[stride+1], perm_ref1);  /* align current vector  */ \          c = vec_sub(vec_max(c,r),vec_min(c,r));\
112  t2  = vec_perm(t2,tp,perm_ref2); \          sad = vec_sum4s(c,sad);\
113  tp  = vec_max(t1, t2);                  /* find largest of two           */ \          cur += stride;\
114  t1  = vec_min(t1, t2);                   /* find smaller of two           */ \          ref += stride
 tp  = vec_sub(tp, t1);                   /* find absolute difference      */ \  
 sad = vec_sum4s(tp, sad);                /* accumulate sum of differences */ \  
 cur += stride<<1; ref += stride<<1;  
115    
116  /*  /*
117   * This function assumes cur is 8 bytes aligned, stride is 16 bytes   * This function assumes nothing
  * aligned and ref is unaligned  
118   */   */
 unsigned long  
 sad8_altivec_c(const vector unsigned char *cur,  
                          const vector unsigned char *ref,  
                          unsigned long stride)  
 {  
         vector unsigned char t1, t2, tp;  
         vector unsigned int sad;  
         vector unsigned int sumdiffs;  
         vector unsigned char perm_cur;  
         vector unsigned char perm_ref1, perm_ref2;  
         unsigned long result;  
119    
120  #ifdef DEBUG  uint32_t
121          /* print alignment errors if DEBUG is on */  sad8_altivec_c(const uint8_t * cur,
122          if (((unsigned long) cur) & 0x7)             const uint8_t *ref,
123                  fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur);             const uint32_t stride)
124          if (stride & 0xf)  {
125                  fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride);          uint32_t result = 0;
 #endif  
126    
127          /* check if cur is 8 or 16 bytes aligned an create the perm_cur vector */          register vector unsigned int sad;
128          perm_ref1 = vec_lvsl(0, (unsigned char*)ref);          register vector unsigned char c;
129          perm_ref2 = vec_add(vec_lvsl(0, (unsigned char*)NULL), vec_pack(vec_splat_u16(0), vec_splat_u16(8)));          register vector unsigned char r;
         perm_cur = vec_add(perm_ref2, vec_splat(vec_lvsl(0, (unsigned char*)cur), 0));  
130    
131          /* initialization */          /* initialize */
132          sad = vec_splat_u32(0);          sad = vec_splat_u32(0);
         stride >>= 4;  
133    
134          /* perform sum of differences between current and previous */          /* Perform sad operations */
135          SAD8();          SAD8();
136          SAD8();          SAD8();
137          SAD8();          SAD8();
138          SAD8();          SAD8();
139    
140          /* sum all parts of difference into one 32 bit quantity */          SAD8();
141          sumdiffs = (vector unsigned int)vec_sums((vector signed int) sad, vec_splat_s32(0));          SAD8();
142            SAD8();
143            SAD8();
144    
145            /* finish addition, add the first 2 together */
146            sad = vec_and(sad, (vector unsigned int)vec_pack(vec_splat_u16(-1),vec_splat_u16(0)));
147            sad = (vector unsigned int)vec_sums((vector signed int)sad, vec_splat_s32(0));
148            sad = vec_splat(sad,3);
149            vec_ste(sad, 0, &result);
150    
         /* copy vector sum into unaligned result */  
         sumdiffs = vec_splat(sumdiffs, 3);  
         vec_ste(sumdiffs, 0, (unsigned int *) &result);  
151          return result;          return result;
152  }  }
153    
154    
155    
156    
157  #define MEAN16() \  #define MEAN16() \
158  mean = vec_sum4s(*ptr,mean);\  mean = vec_sum4s(*ptr,mean);\
159  ptr += stride  ptr += stride
# Line 179  Line 169 
169   * This function assumes cur is 16 bytes aligned and stride is 16 bytes   * This function assumes cur is 16 bytes aligned and stride is 16 bytes
170   * aligned   * aligned
171  */  */
172  unsigned long  
173  dev16_altivec_c(const vector unsigned char *cur,  uint32_t
174                            unsigned long stride)  dev16_altivec_c(vector unsigned char *cur,
175                              uint32_t stride)
176  {  {
177          vector unsigned char t2, t3, mn;          vector unsigned char t2, t3, mn;
178          vector unsigned int mean, dev;          vector unsigned int mean, dev;
179          vector unsigned int sumdiffs;          vector unsigned int sumdiffs;
180          const vector unsigned char *ptr;          vector unsigned char *ptr;
181          unsigned long result;          uint32_t result;
182    
183  #ifdef DEBUG  #ifdef DEBUG
184          /* print alignment errors if DEBUG is on */          /* print alignment errors if DEBUG is on */
185          if(((unsigned long)cur) & 0x7)          if(((unsigned long)cur) & 0x7)
186              fprintf(stderr, "dev16_altivec:incorrect align, cur: %x\n", cur);              fprintf(stderr, "dev16_altivec:incorrect align, cur: %lx\n", (long)cur);
187          if(stride & 0xf)          if(stride & 0xf)
188              fprintf(stderr, "dev16_altivec:incorrect align, stride: %ld\n", stride);              fprintf(stderr, "dev16_altivec:incorrect align, stride: %lu\n", stride);
189  #endif  #endif
190    
191          dev = mean = vec_splat_u32(0);          dev = mean = vec_splat_u32(0);
# Line 250  Line 241 
241    
242          /* copy vector sum into unaligned result */          /* copy vector sum into unaligned result */
243          sumdiffs = vec_splat(sumdiffs, 3);          sumdiffs = vec_splat(sumdiffs, 3);
244          vec_ste(sumdiffs, 0, (unsigned int *) &result);          vec_ste(sumdiffs, 0, (uint32_t*) &result);
245          return result;          return result;
246  }  }
247    
# Line 270  Line 261 
261   * This function assumes cur is 16 bytes aligned, stride is 16 bytes   * This function assumes cur is 16 bytes aligned, stride is 16 bytes
262   * aligned and ref1 and ref2 is unaligned   * aligned and ref1 and ref2 is unaligned
263  */  */
264  unsigned long  
265    uint32_t
266  sad16bi_altivec_c(vector unsigned char *cur,  sad16bi_altivec_c(vector unsigned char *cur,
267                          vector unsigned char *ref1,                          vector unsigned char *ref1,
268                          vector unsigned char *ref2,                          vector unsigned char *ref2,
269                          unsigned long stride)                          uint32_t stride)
270  {  {
271      vector unsigned char t1, t2;      vector unsigned char t1, t2;
272      vector unsigned char mask1, mask2;      vector unsigned char mask1, mask2;
273      vector unsigned char sad;      vector unsigned char sad;
274      vector unsigned int sum;      vector unsigned int sum;
275      unsigned long result;      uint32_t result;
276    
277  #ifdef DEBUG  #ifdef DEBUG
278      /* print alignment errors if this is on */      /* print alignment errors if this is on */
279      if(cur & 0xf)      if((long)cur & 0xf)
280          fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %x\n", cur);          fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %lx\n", (long)cur);
281      if(stride & 0xf)      if(stride & 0xf)
282          fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %ld\n", stride);          fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %lu\n", stride);
283  #endif  #endif
284    
285      /* Initialisation stuff */      /* Initialisation stuff */
# Line 319  Line 311 
311    
312      sum = (vector unsigned int)vec_sums((vector signed int)sum, vec_splat_s32(0));      sum = (vector unsigned int)vec_sums((vector signed int)sum, vec_splat_s32(0));
313      sum = vec_splat(sum, 3);      sum = vec_splat(sum, 3);
314      vec_ste(sum, 0, (unsigned int*)&result);      vec_ste(sum, 0, (uint32_t*)&result);
315    
316      return result;      return result;
317  }  }

Legend:
Removed from v.1423  
changed lines
  Added in v.1570

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4