[svn] / branches / dev-api-4 / xvidcore / src / motion / ppc_asm / sad_altivec.c Repository:
ViewVC logotype

Diff of /branches/dev-api-4/xvidcore/src/motion/ppc_asm/sad_altivec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 98, Wed Apr 3 14:17:05 2002 UTC revision 652, Sun Nov 17 00:35:33 2002 UTC
# Line 1  Line 1 
1  /*  /*****************************************************************************
2     *
3     *  XVID MPEG-4 VIDEO CODEC
4     *  - altivec sum of absolute difference (C version)
5     *
6     *  Copyright (C) 2002 Benjamin Herrenschmidt <benh@kernel.crashing.org>
7     *
8     *  This file is part of XviD, a free MPEG-4 video encoder/decoder
9     *
10     *  XviD is free software; you can redistribute it and/or modify it
11     *  under the terms of the GNU General Public License as published by
12     *  the Free Software Foundation; either version 2 of the License, or
13     *  (at your option) any later version.
14     *
15     *  This program is distributed in the hope that it will be useful,
16     *  but WITHOUT ANY WARRANTY; without even the implied warranty of
17     *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18     *  GNU General Public License for more details.
19     *
20     *  You should have received a copy of the GNU General Public License
21     *  along with this program; if not, write to the Free Software
22     *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
23     *
24     *  Under section 8 of the GNU General Public License, the copyright
25     *  holders of XVID explicitly forbid distribution in the following
26     *  countries:
27     *
28     *    - Japan
29     *    - United States of America
30     *
31     *  Linking XviD statically or dynamically with other modules is making a
32     *  combined work based on XviD.  Thus, the terms and conditions of the
33     *  GNU General Public License cover the whole combination.
34     *
35     *  As a special exception, the copyright holders of XviD give you
36     *  permission to link XviD with independent modules that communicate with
37     *  XviD solely through the VFW1.1 and DShow interfaces, regardless of the
38     *  license terms of these independent modules, and to copy and distribute
39     *  the resulting combined work under terms of your choice, provided that
40     *  every copy of the combined work is accompanied by a complete copy of
41     *  the source code of XviD (the version of XviD used to produce the
42     *  combined work), being distributed under the terms of the GNU General
43     *  Public License plus this exception.  An independent module is a module
44     *  which is not derived from or based on XviD.
45     *
46     *  Note that people who make modified versions of XviD are not obligated
47     *  to grant this special exception for their modified versions; it is
48     *  their choice whether to do so.  The GNU General Public License gives
49     *  permission to release a modified version without this exception; this
50     *  exception also makes it possible to release a modified version which
51     *  carries forward this exception.
52     *
53     * $Id: sad_altivec.c,v 1.5 2002-11-17 00:32:06 edgomez Exp $
54     *
55     ****************************************************************************/
56    
57    #define G_REG
58    
59    #ifdef G_REG
60    register vector unsigned char perm0 asm("%v29");
61    register vector unsigned char perm1 asm("%v30");
62    register vector unsigned int zerovec asm("%v31");
63    #endif
64    
65      Copyright (C) 2002 Benjamin Herrenschmidt <benh@kernel.crashing.org>  #include <stdio.h>
66    
67      This program is free software; you can redistribute it and/or modify  #undef DEBUG
     it under the terms of the GNU General Public License as published by  
     the Free Software Foundation; either version 2 of the License, or  
     (at your option) any later version.  
   
     This program is distributed in the hope that it will be useful,  
     but WITHOUT ANY WARRANTY; without even the implied warranty of  
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the  
     GNU General Public License for more details.  
   
     You should have received a copy of the GNU General Public License  
     along with this program; if not, write to the Free Software  
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA  
   
   
     $Id: sad_altivec.c,v 1.1 2002-04-03 14:17:05 canard Exp $  
     $Source: /home/xvid/cvs_copy/cvs-server-root/xvid/xvidcore/src/motion/ppc_asm/sad_altivec.c,v $  
     $Date: 2002-04-03 14:17:05 $  
     $Author: canard $  
68    
69  */  static const vector unsigned char perms[2] = {
70            (vector unsigned char) (        /* Used when cur is aligned */
71                                                               0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
72                                                               0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17),
73            (vector unsigned char) (        /* Used when cur is unaligned */
74                                                               0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
75                                                               0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f),
76    };
77    
78  #include <stdio.h>  #ifdef G_REG
79    void
80    sadInit_altivec(void)
81    {
82            perm0 = perms[0];
83            perm1 = perms[1];
84            zerovec = (vector unsigned int) (0);
85    }
86    static inline const vector unsigned char
87    get_perm(unsigned long i)
88    {
89            return i ? perm1 : perm0;
90    }
91    
92    #define ZERODEF
93    #define ZEROVEC zerovec
94    #else
95    void
96    sadInit_altivec(void)
97    {
98    }
99    static inline const vector unsigned char
100    get_perm(unsigned long i)
101    {
102            return perms[i];
103    }
104    
105    #define ZERODEF vector unsigned int zerovec = (vector unsigned int)(0)
106    #define ZEROVEC zerovec
107    #endif
108    
 #undef DEBUG  
109    
110  #define SAD16() \  #define SAD16() \
111  t1  = vec_perm(ref[0], ref[1], perm);  /* align current vector  */ \  t1  = vec_perm(ref[0], ref[1], perm);  /* align current vector  */ \
# Line 47  Line 126 
126  {  {
127    vector unsigned char perm;    vector unsigned char perm;
128    vector unsigned char t1, t2, t3, t4 ;    vector unsigned char t1, t2, t3, t4 ;
129    vector unsigned int sad, zero;          vector unsigned int sad;
130    vector signed int sumdiffs, best_vec;    vector signed int sumdiffs, best_vec;
131    unsigned long result;    unsigned long result;
132    
133            ZERODEF;
134    
135  #ifdef DEBUG  #ifdef DEBUG
136    if (((unsigned long)cur) & 0xf)    if (((unsigned long)cur) & 0xf)
137          fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur);          fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur);
# Line 60  Line 141 
141          fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride);          fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride);
142  #endif  #endif
143    /* initialization */    /* initialization */
144    zero = (vector unsigned int)(0);          sad = (vector unsigned int) (ZEROVEC);
   sad  = (vector unsigned int)(0);  
145    stride >>= 4;    stride >>= 4;
146    perm = vec_lvsl(0, (unsigned char *)ref);    perm = vec_lvsl(0, (unsigned char *)ref);
147    *((unsigned long *)&best_vec) = best_sad;    *((unsigned long *)&best_vec) = best_sad;
# Line 73  Line 153 
153    SAD16();    SAD16();
154    SAD16();    SAD16();
155    /* Temp sum for exit */    /* Temp sum for exit */
156    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);          sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
157    if (vec_all_ge(sumdiffs, best_vec))    if (vec_all_ge(sumdiffs, best_vec))
158          goto bail;          goto bail;
159    SAD16();    SAD16();
160    SAD16();    SAD16();
161    SAD16();    SAD16();
162    SAD16();    SAD16();
163    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);          sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
164    if (vec_all_ge(sumdiffs, best_vec))    if (vec_all_ge(sumdiffs, best_vec))
165          goto bail;          goto bail;
166    SAD16();    SAD16();
# Line 93  Line 173 
173    SAD16();    SAD16();
174    
175    /* sum all parts of difference into one 32 bit quantity */    /* sum all parts of difference into one 32 bit quantity */
176    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);          sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
177  bail:  bail:
178    /* copy vector sum into unaligned result */    /* copy vector sum into unaligned result */
179    sumdiffs = vec_splat( sumdiffs, 3 );    sumdiffs = vec_splat( sumdiffs, 3 );
# Line 112  Line 192 
192  sad = vec_sum4s(t5, sad);                /* accumulate sum of differences */ \  sad = vec_sum4s(t5, sad);                /* accumulate sum of differences */ \
193  cur += stride<<1; ref += stride<<1;  cur += stride<<1; ref += stride<<1;
194    
 static const vector unsigned char perms[2] = {  
         (vector unsigned char)( /* Used when cur is aligned */  
                 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,  
                 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17  
         ),  
         (vector unsigned char)( /* Used when cur is unaligned */  
                 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,  
                 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f  
         ),  
 };  
   
195  /*  /*
196   * This function assumes cur is 8 bytes aligned, stride is 16 bytes   * This function assumes cur is 8 bytes aligned, stride is 16 bytes
197   * aligned and ref is unaligned   * aligned and ref is unaligned
# Line 133  Line 202 
202                  unsigned long stride)                  unsigned long stride)
203  {  {
204    vector unsigned char t1, t2, t3, t4, t5, tp ;    vector unsigned char t1, t2, t3, t4, t5, tp ;
205    vector unsigned int sad, zero;          vector unsigned int sad;
206    vector signed int sumdiffs;    vector signed int sumdiffs;
207    vector unsigned char perm_cur;    vector unsigned char perm_cur;
208    vector unsigned char perm_ref1, perm_ref2;    vector unsigned char perm_ref1, perm_ref2;
209    unsigned long result;    unsigned long result;
210    
211            ZERODEF;
212    
213  #ifdef DEBUG  #ifdef DEBUG
214    if (((unsigned long)cur) & 0x7)    if (((unsigned long)cur) & 0x7)
215          fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur);          fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur);
# Line 148  Line 219 
219          fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride);          fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride);
220  #endif  #endif
221    
222    perm_cur = perms[(((unsigned long)cur)>>3) & 0x01];          perm_cur = get_perm((((unsigned long) cur) >> 3) & 0x01);
223    perm_ref1 = vec_lvsl(0, (unsigned char *)ref);    perm_ref1 = vec_lvsl(0, (unsigned char *)ref);
224    perm_ref2 = perms[0];          perm_ref2 = get_perm(0);
225    
226    /* initialization */    /* initialization */
227    zero = (vector unsigned int)(0);          sad = (vector unsigned int) (ZEROVEC);
   sad  = (vector unsigned int)(0);  
228    stride >>= 4;    stride >>= 4;
229    
230    /* perform sum of differences between current and previous */    /* perform sum of differences between current and previous */
# Line 164  Line 234 
234    SAD8();    SAD8();
235    
236    /* sum all parts of difference into one 32 bit quantity */    /* sum all parts of difference into one 32 bit quantity */
237    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);          sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
238    
239    /* copy vector sum into unaligned result */    /* copy vector sum into unaligned result */
240    sumdiffs = vec_splat( sumdiffs, 3 );    sumdiffs = vec_splat( sumdiffs, 3 );
# Line 188  Line 258 
258                  unsigned long stride)                  unsigned long stride)
259  {  {
260    vector unsigned char t2,t3,t4, mn;    vector unsigned char t2,t3,t4, mn;
261    vector unsigned int mean, dev, zero;          vector unsigned int mean, dev;
262    vector signed int sumdiffs;    vector signed int sumdiffs;
263    vector unsigned char c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15;          vector unsigned char c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12,
264                    c13, c14, c15;
265    unsigned long result;    unsigned long result;
266    
267    zero = (vector unsigned int)(0);          ZERODEF;
268    mean = (vector unsigned int)(0);  
269    dev = (vector unsigned int)(0);          mean = (vector unsigned int) (ZEROVEC);
270            dev = (vector unsigned int) (ZEROVEC);
271    stride >>= 4;    stride >>= 4;
272    
273    MEAN16(0);    MEAN16(0);
# Line 215  Line 287 
287    MEAN16(14);    MEAN16(14);
288    MEAN16(15);    MEAN16(15);
289    
290    sumdiffs = vec_sums((vector signed int) mean, (vector signed int) zero);          sumdiffs = vec_sums((vector signed int) mean, (vector signed int) ZEROVEC);
291    mn = vec_perm((vector unsigned char)sumdiffs, (vector unsigned char)sumdiffs,          mn = vec_perm((vector unsigned char) sumdiffs,
292          (vector unsigned char)(14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14));                                    (vector unsigned char) sumdiffs, (vector unsigned char) (14,
293                                                                                                                                                       14,
294                                                                                                                                                       14,
295                                                                                                                                                       14,
296                                                                                                                                                       14,
297                                                                                                                                                       14,
298                                                                                                                                                       14,
299                                                                                                                                                       14,
300                                                                                                                                                       14,
301                                                                                                                                                       14,
302                                                                                                                                                       14,
303                                                                                                                                                       14,
304                                                                                                                                                       14,
305                                                                                                                                                       14,
306                                                                                                                                                       14,
307                                                                                                                                                       14));
308    DEV16(0);    DEV16(0);
309    DEV16(1);    DEV16(1);
310    DEV16(2);    DEV16(2);
# Line 236  Line 323 
323    DEV16(15);    DEV16(15);
324    
325    /* sum all parts of difference into one 32 bit quantity */    /* sum all parts of difference into one 32 bit quantity */
326    sumdiffs = vec_sums((vector signed int) dev, (vector signed int) zero);          sumdiffs = vec_sums((vector signed int) dev, (vector signed int) ZEROVEC);
327    
328    /* copy vector sum into unaligned result */    /* copy vector sum into unaligned result */
329    sumdiffs = vec_splat( sumdiffs, 3 );    sumdiffs = vec_splat( sumdiffs, 3 );

Legend:
Removed from v.98  
changed lines
  Added in v.652

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4