[svn] / tags / release-0_9_2 / xvidcore / src / image / x86_asm / colorspace_yuv_mmx.asm Repository:
ViewVC logotype

View of /tags/release-0_9_2/xvidcore/src/image/x86_asm/colorspace_yuv_mmx.asm

Parent Directory Parent Directory | Revision Log Revision Log

Revision 851 - (download) (annotate)
Sat Feb 15 15:22:19 2003 UTC (21 years, 6 months ago) by edgomez
Original Path: trunk/xvidcore/src/image/x86_asm/colorspace_yuv_mmx.asm
File size: 7422 byte(s)
Moved dev-api-3 to HEAD -- Nasty but efficient -- Merging work has been done too
;  This file is part of XviD, a free MPEG-4 video encoder/decoder
;  This program is free software; you can redistribute it and/or modify it
;  under the terms of the GNU General Public License as published by
;  the Free Software Foundation; either version 2 of the License, or
;  (at your option) any later version.
;  This program is distributed in the hope that it will be useful, but
;  WITHOUT ANY WARRANTY; without even the implied warranty of
;  GNU General Public License for more details.
;  You should have received a copy of the GNU General Public License
;  along with this program; if not, write to the Free Software
;  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
;  yuv_to_yuv.asm, MMX optimized color conversion
;  Copyright (C) 2001 - Michael Militzer <isibaar@xvid.org>
;  For more information visit the XviD homepage: http://www.xvid.org
;  Revision history:
;  24.11.2001 initial version  (Isibaar)
;  23.07.2002 thread safe (edgomez)
;  $Id: colorspace_yuv_mmx.asm,v 1.2 2003-02-15 15:22:18 edgomez Exp $ 


%macro cglobal 1 
%ifdef PREFIX
	global _%1 
		%define %1 _%1
		global %1



; DST		dst buffer
; DST_DIF	dst stride difference (e.g. stride - width)
; SRC		src destination buffer
; SRC_DIF	src stride difference (e.g. stride - width)
; WIDTH		width
; HEIGHT	height
; OPT		0=plain mmx, 1=xmm
%macro	PLANE_COPY	7
%define DST			%1
%define DST_DIF		%2
%define SRC			%3
%define SRC_DIF		%4
%define WIDTH		%5
%define HEIGHT		%6
%define OPT			%7

	mov eax, WIDTH	
	mov ebp, HEIGHT		; $ebp$ = height
	mov esi, SRC
	mov edi, DST

	mov ebx, eax
	shr eax, 6			; $eax$ = width / 64
	and ebx, 63			; remainder = width % 64
	mov edx, ebx
	shr ebx, 4			; $ebx$ = reaminder / 16
	and edx, 15			; $edx$ = remainder % 16

	or eax, eax
	jz %%loop16_start
	mov ecx, eax		; width64
%if OPT == 1			; xmm
	prefetchnta [esi + 64]	; non temporal prefetch 
	prefetchnta [esi + 96] 
	movq mm1, [esi]		; read from src 
	movq mm2, [esi + 8] 
	movq mm3, [esi + 16] 
	movq mm4, [esi + 24] 
	movq mm5, [esi + 32] 
	movq mm6, [esi + 40] 
	movq mm7, [esi + 48] 
	movq mm0, [esi + 56]

%if OPT == 0			; plain mmx
	movq [edi], mm1		; write to y_out 
	movq [edi + 8], mm2 
	movq [edi + 16], mm3 
	movq [edi + 24], mm4 
	movq [edi + 32], mm5 
	movq [edi + 40], mm6 
	movq [edi + 48], mm7 
	movq [edi + 56], mm0 
	movntq [edi], mm1		; write to y_out 
	movntq [edi + 8], mm2 
	movntq [edi + 16], mm3 
	movntq [edi + 24], mm4 
	movntq [edi + 32], mm5 
	movntq [edi + 40], mm6 
	movntq [edi + 48], mm7 
	movntq [edi + 56], mm0 

	add esi, 64
	add edi, 64
	dec ecx
	jnz %%loop64

	or ebx, ebx
	jz %%loop1_start
	mov ecx, ebx		; width16
	movq mm1, [esi]
	movq mm2, [esi + 8] 
%if OPT == 0			; plain mmx
	movq [edi], mm1
	movq [edi + 8], mm2 
	movntq [edi], mm1
	movntq [edi + 8], mm2 

	add esi, 16
	add edi, 16 
	dec ecx
	jnz %%loop16

	mov ecx, edx
	rep movsb

	add esi, SRC_DIF
	add edi, DST_DIF
	dec ebp
	jnz near %%loop64_start

; NAME	function name
; OPT	0=plain mmx, 1=xmm
; yv12_to_yv12_mmx(uint8_t * y_dst, uint8_t * u_dst, uint8_t * v_dst, 
; 				int y_dst_stride, int uv_dst_stride,
; 				uint8_t * y_src, uint8_t * u_src, uint8_t * v_src, 
; 				int y_src_stride, int uv_src_stride,
; 				int width, int height, int vflip)
%macro	MAKE_YV12_TO_YV12	2
%define	NAME		%1
%define	OPT			%2
align 16
cglobal NAME
%define pushsize	16
%define localsize	24

%define vflip			esp + localsize + pushsize + 52
%define height			esp + localsize + pushsize + 48
%define width        	esp + localsize + pushsize + 44
%define uv_src_stride	esp + localsize + pushsize + 40
%define y_src_stride	esp + localsize + pushsize + 36
%define v_src			esp	+ localsize + pushsize + 32
%define u_src   		esp + localsize + pushsize + 28
%define y_src		    esp + localsize + pushsize + 24
%define uv_dst_stride	esp + localsize + pushsize + 20
%define y_dst_stride	esp + localsize + pushsize + 16
%define v_dst			esp	+ localsize + pushsize + 12
%define u_dst   		esp + localsize + pushsize + 8
%define y_dst		    esp + localsize + pushsize + 4
%define _ip				esp + localsize + pushsize + 0

	push ebx	;	esp + localsize + 16
	push esi	;	esp + localsize + 8
	push edi	;	esp + localsize + 4
	push ebp	;	esp + localsize + 0

%define width2			esp + localsize - 4
%define height2			esp + localsize - 8
%define y_src_dif		esp + localsize - 12
%define y_dst_dif		esp + localsize - 16
%define uv_src_dif		esp + localsize - 20
%define uv_dst_dif		esp + localsize - 24

	sub esp, localsize

	mov eax, [width]		
	mov ebx, [height]
	shr eax, 1					; calculate widht/2, heigh/2
	shr ebx, 1
	mov [width2], eax
	mov [height2], ebx

	mov ebp, [vflip]
	or ebp, ebp
	jz near .dont_flip

; flipping support
	mov eax, [height]
	mov esi, [y_src]
	mov edx, [y_src_stride]
	push edx
	mul edx
	pop edx
	add esi, eax				; y_src += (height-1) * y_src_stride
	neg edx
	mov [y_src], esi
	mov [y_src_stride], edx		; y_src_stride = -y_src_stride

	mov eax, [height2]
	mov esi, [u_src]
	mov edi, [v_src]
	mov edx, [uv_src_stride]
	sub eax, 1					; ebp = height2 - 1
	push edx
	mul edx
	pop edx
	add esi, eax				; u_src += (height2-1) * uv_src_stride
	add edi, eax				; v_src += (height2-1) * uv_src_stride
	neg edx
	mov [u_src], esi
	mov [v_src], edi
	mov [uv_src_stride], edx	; uv_src_stride = -uv_src_stride


	mov eax, [y_src_stride]
	mov ebx, [y_dst_stride]
	mov ecx, [uv_src_stride]
	mov edx, [uv_dst_stride]
	sub eax, [width]
	sub ebx, [width]
	sub ecx, [width2]
	sub edx, [width2]
	mov [y_src_dif], eax		; y_src_dif = y_src_stride - width
	mov [y_dst_dif], ebx		; y_dst_dif = y_dst_stride - width
	mov [uv_src_dif], ecx		; uv_src_dif = uv_src_stride - width2
	mov [uv_dst_dif], edx		; uv_dst_dif = uv_dst_stride - width2

	PLANE_COPY	[y_dst], [y_dst_dif],  [y_src], [y_src_dif],  [width],  [height], OPT
	PLANE_COPY	[u_dst], [uv_dst_dif], [u_src], [uv_src_dif], [width2], [height2], OPT
	PLANE_COPY	[v_dst], [uv_dst_dif], [v_src], [uv_src_dif], [width2], [height2], OPT

	add esp, localsize
	pop ebp
	pop edi
	pop esi
	pop ebx


MAKE_YV12_TO_YV12	yv12_to_yv12_mmx, 0
MAKE_YV12_TO_YV12	yv12_to_yv12_xmm, 1

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4