[svn] / trunk / xvidcore / src / motion / x86_asm / sad_mmx.asm Repository:
ViewVC logotype

Diff of /trunk/xvidcore/src/motion/x86_asm/sad_mmx.asm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 262, Sun Jul 7 09:45:40 2002 UTC revision 652, Sun Nov 17 00:35:33 2002 UTC
# Line 1  Line 1 
1  ;/**************************************************************************  ;/*****************************************************************************
2  ; *  ; *
3  ; *     XVID MPEG-4 VIDEO CODEC  ; *     XVID MPEG-4 VIDEO CODEC
4  ; *     mmx sum of absolute difference  ; *     mmx sum of absolute difference
5  ; *  ; *
6  ; *     This program is free software; you can redistribute it and/or modify  ; *  Copyright(C) 2002 Peter Ross <pross@xvid.org>
7  ; *     it under the terms of the GNU General Public License as published by  ; *
8    ; *  This file is part of XviD, a free MPEG-4 video encoder/decoder
9    ; *
10    ; *  XviD is free software; you can redistribute it and/or modify it
11    ; *  under the terms of the GNU General Public License as published by
12  ; *     the Free Software Foundation; either version 2 of the License, or  ; *     the Free Software Foundation; either version 2 of the License, or
13  ; *     (at your option) any later version.  ; *     (at your option) any later version.
14  ; *  ; *
# Line 15  Line 19 
19  ; *  ; *
20  ; *     You should have received a copy of the GNU General Public License  ; *     You should have received a copy of the GNU General Public License
21  ; *     along with this program; if not, write to the Free Software  ; *     along with this program; if not, write to the Free Software
22  ; *     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  ; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
23  ; *  ; *
24  ; *************************************************************************/  ; *  Under section 8 of the GNU General Public License, the copyright
25    ; *  holders of XVID explicitly forbid distribution in the following
26  ;/**************************************************************************  ; *  countries:
27    ; *
28    ; *    - Japan
29    ; *    - United States of America
30    ; *
31    ; *  Linking XviD statically or dynamically with other modules is making a
32    ; *  combined work based on XviD.  Thus, the terms and conditions of the
33    ; *  GNU General Public License cover the whole combination.
34  ; *  ; *
35  ; *     History:  ; *  As a special exception, the copyright holders of XviD give you
36    ; *  permission to link XviD with independent modules that communicate with
37    ; *  XviD solely through the VFW1.1 and DShow interfaces, regardless of the
38    ; *  license terms of these independent modules, and to copy and distribute
39    ; *  the resulting combined work under terms of your choice, provided that
40    ; *  every copy of the combined work is accompanied by a complete copy of
41    ; *  the source code of XviD (the version of XviD used to produce the
42    ; *  combined work), being distributed under the terms of the GNU General
43    ; *  Public License plus this exception.  An independent module is a module
44    ; *  which is not derived from or based on XviD.
45  ; *  ; *
46  ; * 23.07.2002  sad[16,8]bi_xmm; <pross@xvid.org>  ; *  Note that people who make modified versions of XviD are not obligated
47  ; * 04.06.2002  cleanup -Skal-  ; *  to grant this special exception for their modified versions; it is
48  ; *     12.11.2001      inital version; (c)2001 peter ross <pross@cs.rmit.edu.au>  ; *  their choice whether to do so.  The GNU General Public License gives
49    ; *  permission to release a modified version without this exception; this
50    ; *  exception also makes it possible to release a modified version which
51    ; *  carries forward this exception.
52  ; *  ; *
53  ; *************************************************************************/  ; * $Id: sad_mmx.asm,v 1.10 2002-11-17 00:32:06 edgomez Exp $
54    ; *
55    ; ****************************************************************************/
56    
57  bits 32  bits 32
58    
59  %macro cglobal 1  %macro cglobal 1
60            %ifdef PREFIX
61                  global _%1                  global _%1
62                  %define %1 _%1                  %define %1 _%1
63          %else          %else
# Line 57  Line 83 
83  ; uint32_t sad16_mmx(const uint8_t * const cur,  ; uint32_t sad16_mmx(const uint8_t * const cur,
84  ;                                        const uint8_t * const ref,  ;                                        const uint8_t * const ref,
85  ;                                        const uint32_t stride,  ;                                        const uint32_t stride,
86    ;                                        const uint32_t best_sad);
87    ;
88  ; (early termination ignore; slows this down)  ; (early termination ignore; slows this down)
89  ;  ;
90  ;===========================================================================  ;===========================================================================
# Line 206  Line 234 
234  ;  ;
235  ; uint32_t sad16bi_mmx(const uint8_t * const cur,  ; uint32_t sad16bi_mmx(const uint8_t * const cur,
236  ; const uint8_t * const ref1,  ; const uint8_t * const ref1,
237    ; const uint8_t * const ref2,
238    ; const uint32_t stride);
239    ;
240    ;===========================================================================
241    %macro SADBI_16x16_MMX 2    ; SADBI_16x16_MMX( int_ptr_offset, bool_increment_ptr );
242    
243       movq mm0, [edx+%1]
244       movq mm2, [ebx+%1]
245       movq mm1, mm0
246       movq mm3, mm2
247    
248    %if %2 != 0
249       add edx, ecx
250    %endif
251    
252       punpcklbw mm0, mm7
253       punpckhbw mm1, mm7
254    punpcklbw mm2, mm7
255    punpckhbw mm3, mm7
256    
257    %if %2 != 0
258       add ebx, ecx
259    %endif
260    
261    paddusw mm0, mm2    ; mm01 = ref1 + ref2
262    paddusw mm1, mm3
263    paddusw mm0, [mmx_one] ; mm01 += 1
264    paddusw mm1, [mmx_one]
265    psrlw mm0, 1     ; mm01 >>= 1
266    psrlw mm1, 1
267    
268       movq mm2, [eax+%1]
269       movq mm3, mm2
270       punpcklbw mm2, mm7          ; mm23 = src
271       punpckhbw mm3, mm7
272    
273    %if %2 != 0
274       add eax, ecx
275    %endif
276    
277       movq mm4, mm0
278       movq mm5, mm1
279       psubusw mm0, mm2
280       psubusw mm1, mm3
281       psubusw mm2, mm4
282       psubusw mm3, mm5
283       por mm0, mm2                ; mm01 = ABS(mm01 - mm23)
284       por mm1, mm3
285    
286       paddusw mm6,mm0             ; mm6 += mm01
287       paddusw mm6,mm1
288    
289    %endmacro
290    
291    align 16
292    sad16bi_mmx:
293       push ebx
294       mov eax, [esp+4+ 4] ; Src
295       mov edx, [esp+4+ 8] ; Ref1
296       mov ebx, [esp+4+12] ; Ref2
297       mov ecx, [esp+4+16] ; Stride
298    
299       pxor mm6, mm6 ; accum2
300    pxor mm7, mm7
301    .Loop
302       SADBI_16x16_MMX 0, 0
303       SADBI_16x16_MMX 8, 1
304       SADBI_16x16_MMX 0, 0
305       SADBI_16x16_MMX 8, 1
306       SADBI_16x16_MMX 0, 0
307       SADBI_16x16_MMX 8, 1
308       SADBI_16x16_MMX 0, 0
309       SADBI_16x16_MMX 8, 1
310       SADBI_16x16_MMX 0, 0
311       SADBI_16x16_MMX 8, 1
312       SADBI_16x16_MMX 0, 0
313       SADBI_16x16_MMX 8, 1
314       SADBI_16x16_MMX 0, 0
315       SADBI_16x16_MMX 8, 1
316       SADBI_16x16_MMX 0, 0
317       SADBI_16x16_MMX 8, 1
318    
319       SADBI_16x16_MMX 0, 0
320       SADBI_16x16_MMX 8, 1
321       SADBI_16x16_MMX 0, 0
322       SADBI_16x16_MMX 8, 1
323       SADBI_16x16_MMX 0, 0
324       SADBI_16x16_MMX 8, 1
325       SADBI_16x16_MMX 0, 0
326       SADBI_16x16_MMX 8, 1
327       SADBI_16x16_MMX 0, 0
328       SADBI_16x16_MMX 8, 1
329       SADBI_16x16_MMX 0, 0
330       SADBI_16x16_MMX 8, 1
331       SADBI_16x16_MMX 0, 0
332       SADBI_16x16_MMX 8, 1
333       SADBI_16x16_MMX 0, 0
334       SADBI_16x16_MMX 8, 1
335    
336       pmaddwd mm6, [mmx_one] ; collapse
337       movq mm7, mm6
338       psrlq mm7, 32
339       paddd mm6, mm7
340    
341       movd eax, mm6
342       pop ebx
343       ret
344    
345    ;===========================================================================
346    ;
347    ; uint32_t sad8bi_mmx(const uint8_t * const cur,
348    ; const uint8_t * const ref1,
349    ; const uint8_t * const ref2,
350    ; const uint32_t stride);
351    ;
352    ;===========================================================================
353    align 16
354    sad8bi_mmx:
355       push ebx
356       mov eax, [esp+4+ 4] ; Src
357       mov edx, [esp+4+ 8] ; Ref1
358       mov ebx, [esp+4+12] ; Ref2
359       mov ecx, [esp+4+16] ; Stride
360    
361       pxor mm6, mm6 ; accum2
362    pxor mm7, mm7
363    .Loop
364       SADBI_16x16_MMX 0, 1
365       SADBI_16x16_MMX 0, 1
366       SADBI_16x16_MMX 0, 1
367       SADBI_16x16_MMX 0, 1
368       SADBI_16x16_MMX 0, 1
369       SADBI_16x16_MMX 0, 1
370       SADBI_16x16_MMX 0, 1
371       SADBI_16x16_MMX 0, 1
372    
373       pmaddwd mm6, [mmx_one] ; collapse
374       movq mm7, mm6
375       psrlq mm7, 32
376       paddd mm6, mm7
377    
378       movd eax, mm6
379       pop ebx
380       ret
381    
382    
383    
384    
385    ;===========================================================================
386    ;
387    ; uint32_t dev16_mmx(const uint8_t * const cur,
388    ;                                       const uint32_t stride);
389    ;
390    ;===========================================================================
391    
392    %macro MEAN_16x16_MMX 0
393      movq mm0, [eax]      movq mm0, [eax]
394      movq mm2, [eax+8]      movq mm2, [eax+8]
395      lea eax,[eax+ecx]      lea eax,[eax+ecx]
# Line 326  Line 510 
510    
511      movd eax, mm6      movd eax, mm6
512      ret      ret
513    

Legend:
Removed from v.262  
changed lines
  Added in v.652

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4