From fe57ed4f2566e30384d0c786998842405d8e8990 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Thu, 1 Jun 2006 22:56:40 +0000 Subject: Fix extended swizzling in vertex programs by introducing special swizzle instruction, extend the 2 bit rsw field to 3 bit like used in other places. While here, also fix up rsw (negation), dph and try to fix up rsq with negative values (doesn't work, bug seems elsewhere) in the sse codegen code. --- src/mesa/x86/rtasm/x86sse.c | 16 ++++++++++++++++ src/mesa/x86/rtasm/x86sse.h | 2 ++ 2 files changed, 18 insertions(+) (limited to 'src/mesa/x86/rtasm') diff --git a/src/mesa/x86/rtasm/x86sse.c b/src/mesa/x86/rtasm/x86sse.c index 9f34004ba0..6137aef8ec 100644 --- a/src/mesa/x86/rtasm/x86sse.c +++ b/src/mesa/x86/rtasm/x86sse.c @@ -424,6 +424,14 @@ void sse_maxps( struct x86_function *p, emit_modrm( p, dst, src ); } +void sse_maxss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x5F); + emit_modrm( p, dst, src ); +} + void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) @@ -456,6 +464,14 @@ void sse_mulps( struct x86_function *p, emit_modrm( p, dst, src ); } +void sse_mulss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x59); + emit_modrm( p, dst, src ); +} + void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) diff --git a/src/mesa/x86/rtasm/x86sse.h b/src/mesa/x86/rtasm/x86sse.h index 430cf2f939..5ec5489431 100644 --- a/src/mesa/x86/rtasm/x86sse.h +++ b/src/mesa/x86/rtasm/x86sse.h @@ -156,6 +156,7 @@ void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, GLubyte cc ); void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); @@ -165,6 +166,7 @@ void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, GLubyte shuf ); -- cgit v1.2.3