summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/draw
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/draw')
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.c66
1 files changed, 64 insertions, 2 deletions
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 441877d46f..41bdd012d5 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -31,6 +31,7 @@
#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_exec.h"
@@ -43,6 +44,7 @@
#ifdef PIPE_ARCH_X86
#define DISASSEM 0
+#define FAST_MATH 1
static const char *files[] =
{
@@ -1380,14 +1382,28 @@ static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_inst
return TRUE;
}
+
+
/* A wrapper for powf().
* Makes sure it is cdecl and operates on floats.
*/
static float PIPE_CDECL _powerf( float x, float y )
{
+#if FAST_MATH
+ return util_fast_pow(x, y);
+#else
return powf( x, y );
+#endif
}
+#if FAST_MATH
+static float PIPE_CDECL _exp2(float x)
+{
+ return util_fast_exp2(x);
+}
+#endif
+
+
/* Really not sufficient -- need to check for conditions that could
* generate inf/nan values, which will slow things down hugely.
*/
@@ -1442,6 +1458,48 @@ static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_inst
}
+#if FAST_MATH
+static boolean emit_EXPBASE2( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ uint i;
+
+ /* For absolute correctness, need to spill/invalidate all XMM regs
+ * too.
+ */
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].dirty)
+ spill(cp, i);
+ aos_release_xmm_reg(cp, i);
+ }
+
+ /* Push caller-save (ie scratch) regs.
+ */
+ x86_cdecl_caller_push_regs( cp->func );
+
+ x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, -4) );
+
+ x87_fld_src( cp, &op->FullSrcRegisters[0], 0 );
+ x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) );
+
+ /* tmp_EAX has been pushed & will be restored below */
+ x86_mov_reg_imm( cp->func, cp->tmp_EAX, (unsigned long) _exp2 );
+ x86_call( cp->func, cp->tmp_EAX );
+
+ x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, 4) );
+
+ x86_cdecl_caller_pop_regs( cp->func );
+
+ /* Note retval on x87 stack:
+ */
+ cp->func->x87_stack++;
+
+ x87_fstp_dest4( cp, &op->FullDstRegisters[0] );
+
+ return TRUE;
+}
+#endif
+
+
static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
@@ -1662,7 +1720,9 @@ emit_instruction( struct aos_compilation *cp,
return emit_RND(cp, inst);
case TGSI_OPCODE_EXPBASE2:
-#if 0
+#if FAST_MATH
+ return emit_EXPBASE2(cp, inst);
+#elif 0
/* this seems to fail for "larger" exponents.
* See glean tvertProg1's EX2 test.
*/
@@ -1827,6 +1887,8 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
struct aos_compilation cp;
unsigned fixup, label;
+ util_init_math();
+
tgsi_parse_init( &parse, varient->base.vs->state.tokens );
memset(&cp, 0, sizeof(cp));
@@ -2135,4 +2197,4 @@ struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs,
-#endif
+#endif /* PIPE_ARCH_X86 */