Diff of /branches/release-1_3-branch/xvidcore/src/image/x86_asm/colorspace_yuv_mmx.asm

-revision 851, Sat Feb 15 15:22:19 2003 UTC
+revision 1742, Mon Oct 30 10:52:00 2006 UTC
 Line 1
- ;------------------------------------------------------------------------------
+ ;/****************************************************************************
- ;
+ ; *
- ;  This file is part of XviD, a free MPEG-4 video encoder/decoder
+ ; *  XVID MPEG-4 VIDEO CODEC
- ;
+ ; *  - MMX and XMM YV12->YV12 conversion -
- ;  This program is free software; you can redistribute it and/or modify it
+ ; *
- ;  under the terms of the GNU General Public License as published by
+ ; *  Copyright(C) 2001 Michael Militzer <isibaar@xvid.org>
- ;  the Free Software Foundation; either version 2 of the License, or
+ ; *
- ;  (at your option) any later version.
+ ; *  This program is free software; you can redistribute it and/or modify it
- ;
+ ; *  under the terms of the GNU General Public License as published by
- ;  This program is distributed in the hope that it will be useful, but
+ ; *  the Free Software Foundation; either version 2 of the License, or
- ;  WITHOUT ANY WARRANTY; without even the implied warranty of
+ ; *  (at your option) any later version.
- ;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ ; *
- ;  GNU General Public License for more details.
+ ; *  This program is distributed in the hope that it will be useful,
- ;
+ ; *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- ;  You should have received a copy of the GNU General Public License
+ ; *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- ;  along with this program; if not, write to the Free Software
+ ; *  GNU General Public License for more details.
- ;  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ ; *
- ;
+ ; *  You should have received a copy of the GNU General Public License
- ;------------------------------------------------------------------------------
+ ; *  along with this program; if not, write to the Free Software
- ;------------------------------------------------------------------------------
+ ; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- ;
+ ; *
- ;  yuv_to_yuv.asm, MMX optimized color conversion
+ ; * $Id: colorspace_yuv_mmx.asm,v 1.6 2006-10-30 10:52:00 Skal Exp $
- ;
+ ; *
- ;  Copyright (C) 2001 - Michael Militzer <isibaar@xvid.org>
+ ; ***************************************************************************/
- ;
- ;  For more information visit the XviD homepage: http://www.xvid.org
- ;
- ;------------------------------------------------------------------------------
- ;------------------------------------------------------------------------------
- ;
- ;  Revision history:
- ;
- ;  24.11.2001 initial version  (Isibaar)
- ;  23.07.2002 thread safe (edgomez)
- ;
- ;  $Id: colorspace_yuv_mmx.asm,v 1.2 2003-02-15 15:22:18 edgomez Exp $
- ;
- ;------------------------------------------------------------------------------
  BITS 32
  %macro cglobal 1
  %ifdef PREFIX
+                 %ifdef MARK_FUNCS
+                         global _%1:function %1.endfunc-%1
+                         %define %1 _%1:function %1.endfunc-%1
+                 %else
          global _%1
                  %define %1 _%1
+                 %endif
+         %else
+                 %ifdef MARK_FUNCS
+                         global %1:function %1.endfunc-%1
          %else
                  global %1
          %endif
+         %endif
  %endmacro
- SECTION .text
+ ;=============================================================================
+ ; Helper macros
- ALIGN 64
+ ;=============================================================================
  ;------------------------------------------------------------------------------
- ; PLANE_COPY ( DST, DST_DIF, SRC, SRC_DIF, WIDTH, HEIGHT, OPT )
+ ; PLANE_COPY ( DST, DST_STRIDE, SRC, SRC_STRIDE, WIDTH, HEIGHT, OPT )
  ; DST           dst buffer
- ; DST_DIF       dst stride difference (e.g. stride - width)
+ ; DST_STRIDE    dst stride
  ; SRC           src destination buffer
- ; SRC_DIF       src stride difference (e.g. stride - width)
+ ; SRC_STRIDE    src stride
  ; WIDTH         width
  ; HEIGHT        height
  ; OPT           0=plain mmx, 1=xmm
  ;------------------------------------------------------------------------------
  %macro  PLANE_COPY      7
  %define DST                     %1
- %define DST_DIF         %2
+ %define DST_STRIDE      %2
  %define SRC                     %3
- %define SRC_DIF         %4
+ %define SRC_STRIDE      %4
  %define WIDTH           %5
  %define HEIGHT          %6
  %define OPT                     %7
-Line 80
+Line 76
          shr eax, 6                      ; $eax$ = width / 64
          and ebx, 63                     ; remainder = width % 64
          mov edx, ebx
-         shr ebx, 4                      ; $ebx$ = reaminder / 16
+   shr ebx, 4                ; $ebx$ = remainder / 16
          and edx, 15                     ; $edx$ = remainder % 16
- %%loop64_start
+ %%loop64_start_pc:
-         or eax, eax
+   push edi
-         jz %%loop16_start
+   push esi
          mov ecx, eax            ; width64
- %%loop64:
+   test eax, eax
+   jz %%loop16_start_pc
+ %%loop64_pc:
  %if OPT == 1                    ; xmm
          prefetchnta [esi + 64]  ; non temporal prefetch
          prefetchnta [esi + 96]
-Line 123
+Line 122
          add esi, 64
          add edi, 64
-         dec ecx
+   loop %%loop64_pc
-         jnz %%loop64
- %%loop16_start
+ %%loop16_start_pc:
-         or ebx, ebx
-         jz %%loop1_start
          mov ecx, ebx            ; width16
- %%loop16:
+   test ebx, ebx
+   jz %%loop1_start_pc
+ %%loop16_pc:
          movq mm1, [esi]
          movq mm2, [esi + 8]
  %if OPT == 0                    ; plain mmx
-Line 144
+Line 143
          add esi, 16
          add edi, 16
-         dec ecx
+   loop %%loop16_pc
-         jnz %%loop16
- %%loop1_start
+ %%loop1_start_pc:
          mov ecx, edx
          rep movsb
-         add esi, SRC_DIF
+   pop esi
-         add edi, DST_DIF
+   pop edi
+   add esi, SRC_STRIDE
+   add edi, DST_STRIDE
          dec ebp
-         jnz near %%loop64_start
+   jg near %%loop64_start_pc
  %endmacro
+ ;------------------------------------------------------------------------------
+ ; PLANE_FILL ( DST, DST_STRIDE, WIDTH, HEIGHT, OPT )
+ ; DST           dst buffer
+ ; DST_STRIDE    dst stride
+ ; WIDTH         width
+ ; HEIGHT        height
+ ; OPT           0=plain mmx, 1=xmm
  ;------------------------------------------------------------------------------
+ %macro  PLANE_FILL      5
+ %define DST             %1
+ %define DST_STRIDE      %2
+ %define WIDTH           %3
+ %define HEIGHT          %4
+ %define OPT             %5
+   mov esi, WIDTH
+   mov ebp, HEIGHT           ; $ebp$ = height
+   mov edi, DST
+   mov eax, 0x80808080
+   mov ebx, esi
+   shr esi, 6                ; $esi$ = width / 64
+   and ebx, 63               ; ebx = remainder = width % 64
+   movd mm0, eax
+   mov edx, ebx
+   shr ebx, 4                ; $ebx$ = remainder / 16
+   and edx, 15               ; $edx$ = remainder % 16
+   punpckldq mm0, mm0
+ %%loop64_start_pf:
+   push edi
+   mov  ecx, esi              ; width64
+   test esi, esi
+   jz %%loop16_start_pf
+ %%loop64_pf:
+ %if OPT == 0                ; plain mmx
+   movq [edi     ], mm0          ; write to y_out
+   movq [edi +  8], mm0
+   movq [edi + 16], mm0
+   movq [edi + 24], mm0
+   movq [edi + 32], mm0
+   movq [edi + 40], mm0
+   movq [edi + 48], mm0
+   movq [edi + 56], mm0
+ %else
+   movntq [edi     ], mm0        ; write to y_out
+   movntq [edi +  8], mm0
+   movntq [edi + 16], mm0
+   movntq [edi + 24], mm0
+   movntq [edi + 32], mm0
+   movntq [edi + 40], mm0
+   movntq [edi + 48], mm0
+   movntq [edi + 56], mm0
+ %endif
+   add edi, 64
+   loop %%loop64_pf
+ %%loop16_start_pf:
+   mov  ecx, ebx              ; width16
+   test ebx, ebx
+   jz %%loop1_start_pf
+ %%loop16_pf:
+ %if OPT == 0                ; plain mmx
+   movq [edi    ], mm0
+   movq [edi + 8], mm0
+ %else
+   movntq [edi    ], mm0
+   movntq [edi + 8], mm0
+ %endif
+   add edi, 16
+   loop %%loop16_pf
+ %%loop1_start_pf:
+   mov ecx, edx
+   rep stosb
+   pop edi
+   add edi, DST_STRIDE
+   dec ebp
+   jg near %%loop64_start_pf
+ %endmacro
  ;------------------------------------------------------------------------------
  ; MAKE_YV12_TO_YV12( NAME, OPT )
-Line 175
+Line 260
  %macro  MAKE_YV12_TO_YV12       2
  %define NAME            %1
  %define OPT                     %2
- align 16
+ ALIGN 16
  cglobal NAME
- NAME
+ NAME:
  %define pushsize        16
- %define localsize       24
+ %define localsize       12
  %define vflip                   esp + localsize + pushsize + 52
  %define height                  esp + localsize + pushsize + 48
-Line 203
+Line 288
  %define width2                  esp + localsize - 4
  %define height2                 esp + localsize - 8
- %define y_src_dif               esp + localsize - 12
- %define y_dst_dif               esp + localsize - 16
- %define uv_src_dif              esp + localsize - 20
- %define uv_dst_dif              esp + localsize - 24
          sub esp, localsize
-Line 217
+Line 298
          mov [width2], eax
          mov [height2], ebx
-         mov ebp, [vflip]
+   mov eax, [vflip]
-         or ebp, ebp
+   test eax, eax
-         jz near .dont_flip
+   jz near .go
  ; flipping support
          mov eax, [height]
          mov esi, [y_src]
-         mov edx, [y_src_stride]
+   mov ecx, [y_src_stride]
-         push edx
+   sub eax, 1
-         mul edx
+   imul eax, ecx
-         pop edx
          add esi, eax                            ; y_src += (height-1) * y_src_stride
-         neg edx
+   neg ecx
          mov [y_src], esi
-         mov [y_src_stride], edx         ; y_src_stride = -y_src_stride
+   mov [y_src_stride], ecx       ; y_src_stride = -y_src_stride
          mov eax, [height2]
          mov esi, [u_src]
          mov edi, [v_src]
-         mov edx, [uv_src_stride]
+   mov ecx, [uv_src_stride]
-         sub eax, 1                                      ; ebp = height2 - 1
+   test esi, esi
-         push edx
+   jz .go
-         mul edx
+   test edi, edi
-         pop edx
+   jz .go
+   sub eax, 1                    ; eax = height2 - 1
+   imul eax, ecx
          add esi, eax                            ; u_src += (height2-1) * uv_src_stride
          add edi, eax                            ; v_src += (height2-1) * uv_src_stride
-         neg edx
+   neg ecx
          mov [u_src], esi
          mov [v_src], edi
-         mov [uv_src_stride], edx        ; uv_src_stride = -uv_src_stride
+   mov [uv_src_stride], ecx      ; uv_src_stride = -uv_src_stride
- .dont_flip
+ .go:
-         mov eax, [y_src_stride]
+   PLANE_COPY [y_dst], [y_dst_stride],  [y_src], [y_src_stride],  [width],  [height], OPT
-         mov ebx, [y_dst_stride]
-         mov ecx, [uv_src_stride]
+   mov eax, [u_src]
-         mov edx, [uv_dst_stride]
+   or  eax, [v_src]
-         sub eax, [width]
+   jz near .UVFill_0x80
-         sub ebx, [width]
+   PLANE_COPY [u_dst], [uv_dst_stride], [u_src], [uv_src_stride], [width2], [height2], OPT
-         sub ecx, [width2]
+   PLANE_COPY [v_dst], [uv_dst_stride], [v_src], [uv_src_stride], [width2], [height2], OPT
-         sub edx, [width2]
-         mov [y_src_dif], eax            ; y_src_dif = y_src_stride - width
-         mov [y_dst_dif], ebx            ; y_dst_dif = y_dst_stride - width
-         mov [uv_src_dif], ecx           ; uv_src_dif = uv_src_stride - width2
-         mov [uv_dst_dif], edx           ; uv_dst_dif = uv_dst_stride - width2
-         PLANE_COPY      [y_dst], [y_dst_dif],  [y_src], [y_src_dif],  [width],  [height], OPT
-         PLANE_COPY      [u_dst], [uv_dst_dif], [u_src], [uv_src_dif], [width2], [height2], OPT
-         PLANE_COPY      [v_dst], [uv_dst_dif], [v_src], [uv_src_dif], [width2], [height2], OPT
+ .Done_UVPlane:
          add esp, localsize
          pop ebp
          pop edi
          pop esi
          pop ebx
          ret
+ .UVFill_0x80:
+   PLANE_FILL [u_dst], [uv_dst_stride], [width2], [height2], OPT
+   PLANE_FILL [v_dst], [uv_dst_stride], [width2], [height2], OPT
+   jmp near .Done_UVPlane
+ .endfunc
  %endmacro
- ;------------------------------------------------------------------------------
+ ;=============================================================================
+ ; Code
+ ;=============================================================================
+ SECTION .text
  MAKE_YV12_TO_YV12       yv12_to_yv12_mmx, 0
  MAKE_YV12_TO_YV12       yv12_to_yv12_xmm, 1

 Legend:



Removed from v.851
 


changed lines


 
Added in v.1742
 Legend:



Removed from v.851
 


changed lines


 
Added in v.1742
-Removed from v.851
+Added in v.1742

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4