summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMichal Krol <michal@ubuntu-vbox.(none)>2008-06-03 10:59:46 +0200
committerMichal Krol <michal@ubuntu-vbox.(none)>2008-06-03 10:59:46 +0200
commit183d490ab139483c88d0b0f541714919de86235c (patch)
tree48f920578fca10b9e1e07c609ca537c8b3e82965 /src
parent58cccc8d6b49c75eeabe9b61055e69de824ff757 (diff)
draw: Fix fetch_src(). Resurrect SSE version of DP3.
Diffstat (limited to 'src')
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.c43
1 files changed, 6 insertions, 37 deletions
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 5bfcd96ac3..891f8c211a 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -532,10 +532,11 @@ static struct x86_reg fetch_src( struct aos_compilation *cp,
if (swz != SSE_SWIZZLE_NOOP || negs != 0 || abs != 0) {
struct x86_reg dst = aos_get_xmm_reg(cp);
- if (swz != SSE_SWIZZLE_NOOP) {
+ if (swz != SSE_SWIZZLE_NOOP)
emit_pshufd(cp, dst, arg0, swz);
- arg0 = dst;
- }
+ else
+ sse_movaps(cp->func, dst, arg0);
+ arg0 = dst;
if (negs && negs != 0xf) {
struct x86_reg imm_swz = aos_get_internal_xmm(cp, IMM_SWZ);
@@ -550,15 +551,13 @@ static struct x86_reg fetch_src( struct aos_compilation *cp,
(negs & 2) ? 1 : 0,
(negs & 4) ? 1 : 0,
(negs & 8) ? 1 : 0));
- sse_mulps(cp->func, dst, arg0);
+ sse_mulps(cp->func, dst, tmp);
aos_release_xmm_reg(cp, tmp.idx);
- arg0 = dst;
}
else if (negs) {
struct x86_reg imm_negs = aos_get_internal_xmm(cp, IMM_NEGS);
sse_mulps(cp->func, dst, imm_negs);
- arg0 = dst;
}
@@ -571,10 +570,9 @@ static struct x86_reg fetch_src( struct aos_compilation *cp,
sse_movaps(cp->func, tmp, arg0);
sse_mulps(cp->func, tmp, neg);
- sse_maxps(cp->func, dst, arg0);
+ sse_maxps(cp->func, dst, tmp);
aos_release_xmm_reg(cp, tmp.idx);
- arg0 = dst;
}
}
@@ -962,33 +960,6 @@ static boolean emit_COS( struct aos_compilation *cp, const struct tgsi_full_inst
return TRUE;
}
-#if 1
-
-/* The x87 version.
- */
-static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
-{
- struct x86_reg st1 = x86_make_reg( file_x87, 1 );
-
- x87_fld_src( cp, &op->FullSrcRegisters[0], 0 );
- x87_fld_src( cp, &op->FullSrcRegisters[1], 0 );
- x87_fmulp( cp->func, st1 );
- x87_fld_src( cp, &op->FullSrcRegisters[0], 1 );
- x87_fld_src( cp, &op->FullSrcRegisters[1], 1 );
- x87_fmulp( cp->func, st1 );
- x87_faddp( cp->func, st1 );
- x87_fld_src( cp, &op->FullSrcRegisters[0], 2 );
- x87_fld_src( cp, &op->FullSrcRegisters[1], 2 );
- x87_fmulp( cp->func, st1 );
- x87_faddp( cp->func, st1 );
-
- x87_fstp_dest4( cp, &op->FullDstRegisters[0] );
-
- return TRUE;
-}
-
-#else
-
/* The dotproduct instructions don't really do that well in sse:
* XXX: produces wrong results -- disabled.
*/
@@ -1012,8 +983,6 @@ static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_inst
return TRUE;
}
-#endif
-
static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);