summaryrefslogtreecommitdiff
path: root/src/gallium
diff options
context:
space:
mode:
authorKeith Whitwell <keith@tungstengraphics.com>2008-05-22 13:41:49 +0100
committerKeith Whitwell <keith@tungstengraphics.com>2008-05-23 09:16:57 +0100
commitc684ffa02d8d43ee04b99ee63ccd1adb66e81c1a (patch)
tree4084055b225ed1bc6062a8aa568e4591e5412f32 /src/gallium
parenta5c3b499fa40f46298389900e74f1db04f99166a (diff)
draw: clean up internal immediates in aos sse
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.c64
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.h5
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos_io.c10
3 files changed, 55 insertions, 24 deletions
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index fde92c7226..0b8600696a 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -44,12 +44,6 @@
#ifdef PIPE_ARCH_X86
-#define DISASSEM 0
-
-
-
-
-
static INLINE boolean eq( struct x86_reg a,
struct x86_reg b )
{
@@ -92,13 +86,6 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp,
}
-struct x86_reg aos_get_internal( struct aos_compilation *cp,
- unsigned imm )
-{
- return get_reg_ptr( cp,
- AOS_FILE_INTERNAL,
- imm );
-}
#define X87_CW_EXCEPTION_INV_OP (1<<0)
#define X87_CW_EXCEPTION_DENORM_OP (1<<1)
@@ -123,6 +110,9 @@ static void init_internals( struct aos_machine *machine )
float inv = 1.0f/255.0f;
float f255 = 255.0f;
+ ASSIGN_4V(machine->internal[IMM_SWZ], 1.0f, -1.0f, 0.0f, 1.0f);
+ *(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff;
+
ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f);
ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f);
ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f);
@@ -337,6 +327,39 @@ struct x86_reg aos_get_shader_reg( struct aos_compilation *cp,
+static struct x86_reg aos_get_shader_reg_xmm( struct aos_compilation *cp,
+ unsigned file,
+ unsigned idx )
+{
+ struct x86_reg reg = aos_get_shader_reg( cp, file, idx );
+
+ if (reg.file != file_XMM) {
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+ sse_movups(cp->func, tmp, reg);
+ aos_adopt_xmm_reg( cp, tmp, file, idx, FALSE );
+ reg = tmp;
+ }
+
+ return reg;
+}
+
+
+
+struct x86_reg aos_get_internal_xmm( struct aos_compilation *cp,
+ unsigned imm )
+{
+ return aos_get_shader_reg_xmm( cp, AOS_FILE_INTERNAL, imm );
+}
+
+
+struct x86_reg aos_get_internal( struct aos_compilation *cp,
+ unsigned imm )
+{
+ return aos_get_shader_reg( cp, AOS_FILE_INTERNAL, imm );
+}
+
+
+
/* Emulate pshufd insn in regular SSE, if necessary:
@@ -461,15 +484,15 @@ static struct x86_reg fetch_src( struct aos_compilation *cp,
arg0 = dst;
}
- if (negs) {
- struct x86_reg imm_negs = aos_get_internal(cp, IMM_NEGS);
+ if (negs && negs != 0xf) {
+ struct x86_reg imm_swz = aos_get_internal_xmm(cp, IMM_SWZ);
struct x86_reg tmp = aos_get_xmm_reg(cp);
/* Load 1,-1,0,0
* Use neg as arg to pshufd
* Multiply
*/
- emit_pshufd(cp, tmp, imm_negs,
+ emit_pshufd(cp, tmp, imm_swz,
SHUF((negs & 1) ? 1 : 0,
(negs & 2) ? 1 : 0,
(negs & 4) ? 1 : 0,
@@ -479,12 +502,17 @@ static struct x86_reg fetch_src( struct aos_compilation *cp,
aos_release_xmm_reg(cp, tmp.idx);
arg0 = dst;
}
+ else if (negs) {
+ struct x86_reg imm_negs = aos_get_internal_xmm(cp, IMM_NEGS);
+ sse_mulps(cp->func, dst, imm_negs);
+ arg0 = dst;
+ }
+
if (abs && abs != 0xf) {
ERROR(cp, "unsupported partial abs");
}
-
- if (abs) {
+ else if (abs) {
struct x86_reg neg = aos_get_internal(cp, IMM_NEGS);
struct x86_reg tmp = aos_get_xmm_reg(cp);
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h
index c2afd4e9a0..efdc9a38f4 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.h
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.h
@@ -142,13 +142,16 @@ boolean aos_emit_outputs( struct aos_compilation *cp );
#define IMM_ONES 0 /* 1, 1,1,1 */
-#define IMM_NEGS 1 /* 1,-1,0,0 */
+#define IMM_SWZ 1 /* 1,-1,0, 0xffffffff */
#define IMM_IDENTITY 2 /* 0, 0,0,1 */
#define IMM_INV_255 3 /* 1/255, 1/255, 1/255, 1/255 */
#define IMM_255 4 /* 255, 255, 255, 255 */
+#define IMM_NEGS 5 /* -1,-1,-1,-1 */
struct x86_reg aos_get_internal( struct aos_compilation *cp,
unsigned imm );
+struct x86_reg aos_get_internal_xmm( struct aos_compilation *cp,
+ unsigned imm );
#define ERROR(cp, msg) \
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
index 72b2b3d11d..0dda9df97d 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
@@ -54,7 +54,7 @@ static void emit_load_R32G32B32( struct aos_compilation *cp,
struct x86_reg src_ptr )
{
sse_movss(cp->func, data, x86_make_disp(src_ptr, 8));
- sse_shufps(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) );
+ sse_shufps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) );
sse_shufps(cp->func, data, data, SHUF(Y,Z,X,W) );
sse_movlps(cp->func, data, src_ptr);
}
@@ -63,7 +63,7 @@ static void emit_load_R32G32( struct aos_compilation *cp,
struct x86_reg data,
struct x86_reg src_ptr )
{
- sse_movups(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ) );
+ sse_movups(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
sse_movlps(cp->func, data, src_ptr);
}
@@ -73,7 +73,7 @@ static void emit_load_R32( struct aos_compilation *cp,
struct x86_reg src_ptr )
{
sse_movss(cp->func, data, src_ptr);
- sse_orps(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ) );
+ sse_orps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
}
@@ -82,8 +82,8 @@ static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp,
struct x86_reg src_ptr )
{
sse_movss(cp->func, data, src_ptr);
- sse2_punpcklbw(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ));
- sse2_punpcklbw(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ));
+ sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
+ sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
sse2_cvtdq2ps(cp->func, data, data);
sse_mulps(cp->func, data, aos_get_internal(cp, IMM_INV_255));
}