summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/draw/draw_vs_aos.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/draw/draw_vs_aos.c')
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.c79
1 files changed, 76 insertions, 3 deletions
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 441877d46f..a556477a76 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -29,8 +29,10 @@
*/
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
#include "pipe/p_shader_tokens.h"
+#include "pipe/p_debug.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_exec.h"
@@ -43,6 +45,7 @@
#ifdef PIPE_ARCH_X86
#define DISASSEM 0
+#define FAST_MATH 1
static const char *files[] =
{
@@ -117,21 +120,26 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp,
switch (file) {
case TGSI_FILE_INPUT:
+ assert(idx < MAX_INPUTS);
return x86_make_disp(ptr, Offset(struct aos_machine, input[idx]));
case TGSI_FILE_OUTPUT:
return x86_make_disp(ptr, Offset(struct aos_machine, output[idx]));
case TGSI_FILE_TEMPORARY:
+ assert(idx < MAX_TEMPS);
return x86_make_disp(ptr, Offset(struct aos_machine, temp[idx]));
case AOS_FILE_INTERNAL:
+ assert(idx < MAX_INTERNALS);
return x86_make_disp(ptr, Offset(struct aos_machine, internal[idx]));
case TGSI_FILE_IMMEDIATE:
+ assert(idx < MAX_IMMEDIATES); /* just a sanity check */
return x86_make_disp(aos_get_x86(cp, 0, X86_IMMEDIATES), idx * 4 * sizeof(float));
case TGSI_FILE_CONSTANT:
+ assert(idx < MAX_CONSTANTS); /* just a sanity check */
return x86_make_disp(aos_get_x86(cp, 1, X86_CONSTANTS), idx * 4 * sizeof(float));
default:
@@ -1380,14 +1388,28 @@ static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_inst
return TRUE;
}
+
+
/* A wrapper for powf().
* Makes sure it is cdecl and operates on floats.
*/
static float PIPE_CDECL _powerf( float x, float y )
{
+#if FAST_MATH
+ return util_fast_pow(x, y);
+#else
return powf( x, y );
+#endif
}
+#if FAST_MATH
+static float PIPE_CDECL _exp2(float x)
+{
+ return util_fast_exp2(x);
+}
+#endif
+
+
/* Really not sufficient -- need to check for conditions that could
* generate inf/nan values, which will slow things down hugely.
*/
@@ -1442,6 +1464,48 @@ static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_inst
}
+#if FAST_MATH
+static boolean emit_EXPBASE2( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ uint i;
+
+ /* For absolute correctness, need to spill/invalidate all XMM regs
+ * too.
+ */
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].dirty)
+ spill(cp, i);
+ aos_release_xmm_reg(cp, i);
+ }
+
+ /* Push caller-save (ie scratch) regs.
+ */
+ x86_cdecl_caller_push_regs( cp->func );
+
+ x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, -4) );
+
+ x87_fld_src( cp, &op->FullSrcRegisters[0], 0 );
+ x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) );
+
+ /* tmp_EAX has been pushed & will be restored below */
+ x86_mov_reg_imm( cp->func, cp->tmp_EAX, (unsigned long) _exp2 );
+ x86_call( cp->func, cp->tmp_EAX );
+
+ x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, 4) );
+
+ x86_cdecl_caller_pop_regs( cp->func );
+
+ /* Note retval on x87 stack:
+ */
+ cp->func->x87_stack++;
+
+ x87_fstp_dest4( cp, &op->FullDstRegisters[0] );
+
+ return TRUE;
+}
+#endif
+
+
static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
@@ -1662,7 +1726,9 @@ emit_instruction( struct aos_compilation *cp,
return emit_RND(cp, inst);
case TGSI_OPCODE_EXPBASE2:
-#if 0
+#if FAST_MATH
+ return emit_EXPBASE2(cp, inst);
+#elif 0
/* this seems to fail for "larger" exponents.
* See glean tvertProg1's EX2 test.
*/
@@ -1827,6 +1893,8 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
struct aos_compilation cp;
unsigned fixup, label;
+ util_init_math();
+
tgsi_parse_init( &parse, varient->base.vs->state.tokens );
memset(&cp, 0, sizeof(cp));
@@ -2046,6 +2114,11 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient,
start,
count,
output_buffer );
+
+ /* Sanity spot checks to make sure we didn't trash our constants */
+ assert(machine->internal[IMM_ONES][0] == 1.0f);
+ assert(machine->internal[IMM_IDENTITY][0] == 0.0f);
+ assert(machine->internal[IMM_NEGS][0] == -1.0f);
}
@@ -2135,4 +2208,4 @@ struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs,
-#endif
+#endif /* PIPE_ARCH_X86 */