From 9311bc253e0942af621b7efab0549ed75c0ce4a8 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 10 Jul 2005 11:23:10 +0000 Subject: Add runtime compiled x87 implementations for most of the remaining opcodes. When enabled via environment vars, gears runs and almost looks right but other apps are still quite buggy. --- src/mesa/tnl/t_vb_arbprogram.c | 53 +++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 19 deletions(-) (limited to 'src/mesa/tnl/t_vb_arbprogram.c') diff --git a/src/mesa/tnl/t_vb_arbprogram.c b/src/mesa/tnl/t_vb_arbprogram.c index 508233cf31..13041949d1 100644 --- a/src/mesa/tnl/t_vb_arbprogram.c +++ b/src/mesa/tnl/t_vb_arbprogram.c @@ -110,6 +110,10 @@ static GLfloat ApproxPower(GLfloat x, GLfloat y) return (GLfloat) _mesa_pow(x, y); } +static GLfloat rough_approx_log2_0_1(GLfloat x) +{ + return LOG2(x); +} @@ -273,13 +277,11 @@ static void do_EXP( struct arb_vp_machine *m, union instruction op ) const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; GLfloat tmp = arg0[0]; GLfloat flr_tmp = FLOORF(tmp); + GLfloat frac_tmp = tmp - flr_tmp; - /* KW: nvvertexec has an optimized version of this which is pretty - * hard to understand/validate, but avoids the RoughApproxExp2. - */ - result[0] = (GLfloat) (1 << (int)flr_tmp); - result[1] = tmp - flr_tmp; - result[2] = RoughApproxExp2(tmp); + result[0] = ldexpf(1.0, (int)flr_tmp); + result[1] = frac_tmp; + result[2] = ldexpf(rough_approx_log2_0_1(frac_tmp), (int)flr_tmp); result[3] = 1.0F; } @@ -322,18 +324,22 @@ static void do_LIT( struct arb_vp_machine *m, union instruction op ) { GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; - - const GLfloat epsilon = 1.0F / 256.0F; /* per NV spec */ GLfloat tmp[4]; - tmp[0] = MAX2(arg0[0], 0.0F); - tmp[1] = MAX2(arg0[1], 0.0F); - tmp[3] = CLAMP(arg0[3], -(128.0F - epsilon), (128.0F - epsilon)); + tmp[0] = 1.0; + tmp[1] = 0.0; + tmp[2] = 0.0; + tmp[3] = 1.0; - result[0] = 1.0; - result[1] = tmp[0]; - result[2] = (tmp[0] > 0.0) ? RoughApproxPower(tmp[1], tmp[3]) : 0.0F; - result[3] = 1.0; + if (arg0[0] > 0.0) { + tmp[1] = arg0[0]; + + if (arg0[1] > 0.0) { + tmp[2] = RoughApproxPower(arg0[1], arg0[3]); + } + } + + COPY_4V(result, tmp); } @@ -353,7 +359,7 @@ static void do_LOG( struct arb_vp_machine *m, union instruction op ) result[0] = (GLfloat) (exponent - 1); result[1] = 2.0 * mantissa; /* map [.5, 1) -> [1, 2) */ - result[2] = result[0] + LOG2(result[1]); + result[2] = exponent + LOG2(mantissa); result[3] = 1.0; } @@ -1201,9 +1207,6 @@ run_arb_vertex_program(GLcontext *ctx, struct tnl_pipeline_stage *stage) p = (struct tnl_compiled_program *)program->TnlData; assert(p); - /* Initialize regs where necessary: - */ - ASSIGN_4V(m->File[0][REG_ID], 0, 0, 0, 1); m->nr_inputs = m->nr_outputs = 0; @@ -1386,6 +1389,15 @@ static GLboolean init_vertex_program( GLcontext *ctx, m->File[0] = ALIGN_MALLOC(REG_MAX * sizeof(GLfloat) * 4, 16); + /* Initialize regs where necessary: + */ + ASSIGN_4V(m->File[0][REG_ID], 0, 0, 0, 1); + ASSIGN_4V(m->File[0][REG_ONES], 1, 1, 1, 1); + ASSIGN_4V(m->File[0][REG_SWZ], -1, 1, 0, 0); + ASSIGN_4V(m->File[0][REG_NEG], -1, -1, -1, -1); + ASSIGN_4V(m->File[0][REG_LIT], 1, 0, 0, 1); + ASSIGN_4V(m->File[0][REG_LIT2], 1, .5, .2, 1); /* debug value */ + if (_mesa_getenv("MESA_EXPERIMENTAL")) m->try_codegen = 1; @@ -1402,6 +1414,9 @@ static GLboolean init_vertex_program( GLcontext *ctx, if (ctx->_MaintainTnlProgram) _mesa_allow_light_in_model( ctx, GL_FALSE ); + m->fpucntl_rnd_neg = RND_NEG_FPU; /* const value */ + m->fpucntl_restore = RESTORE_FPU; /* const value */ + return GL_TRUE; } -- cgit v1.2.3