From 54dac2c84310536cce962101de29546d3eb80175 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Fri, 9 Feb 2007 00:36:40 +0100 Subject: optimize generated vertex programs a bit Use new internal state to avoid per-vertex normalization of static spot direction vector. Use internal state for simpler per-vertex fog computations (MAD instead of SUB/MUL for linear fog, EX2 instead of POW for EXP/EXP2 fog). Simplify point size calc (2 MADs instead of MOV, MUL, MUL, DP3), and while there fix it up (RSQ instead of RCP). All untested... --- src/mesa/main/imports.h | 8 ++++++ src/mesa/shader/arbprogparse.c | 2 +- src/mesa/shader/program.c | 30 +++++++++++++++++++++ src/mesa/shader/program.h | 2 ++ src/mesa/tnl/t_vp_build.c | 61 +++++++++++++++++++++--------------------- 5 files changed, 71 insertions(+), 32 deletions(-) (limited to 'src') diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h index d9885dbeec..0633b3b8bf 100644 --- a/src/mesa/main/imports.h +++ b/src/mesa/main/imports.h @@ -138,6 +138,14 @@ typedef union { GLfloat f; GLint i; } fi_type; #define M_E (2.7182818284590452354) #endif +#ifndef ONE_DIV_LN2 +#define ONE_DIV_LN2 (1.442695040888963456) +#endif + +#ifndef ONE_DIV_SQRT_LN2 +#define ONE_DIV_SQRT_LN2 (1.201122408786449815) +#endif + #ifndef FLT_MAX_EXP #define FLT_MAX_EXP 128 #endif diff --git a/src/mesa/shader/arbprogparse.c b/src/mesa/shader/arbprogparse.c index 72d4909372..b9ff08de5f 100644 --- a/src/mesa/shader/arbprogparse.c +++ b/src/mesa/shader/arbprogparse.c @@ -4101,7 +4101,7 @@ _mesa_parse_arb_vertex_program(GLcontext *ctx, GLenum target, program->Base.Parameters = ap.Base.Parameters; #if DEBUG_VP - _mesa_printf("____________Vertex program %u __________\n", program->Base.ID); + _mesa_printf("____________Vertex program %u __________\n", program->Base.Id); _mesa_print_program(&program->Base); #endif } diff --git a/src/mesa/shader/program.c b/src/mesa/shader/program.c index d301f19090..7e6cd26c55 100644 --- a/src/mesa/shader/program.c +++ b/src/mesa/shader/program.c @@ -996,6 +996,30 @@ _mesa_fetch_state(GLcontext *ctx, const enum state_index state[], } break; } + case STATE_FOG_PARAMS_OPTIMIZED: + /* this makes it possible to use simpler per-vertex fog calcs. POW + (for EXP/EXP2 fog) might be more expensive than EX2 on some hw, + plus it needs another constant (e) anyway. Linear fog can now be + done with a single MAD. + linear: fogcoord * -1/(end-start) + end/(end-start) + exp: 2^-(density/ln(2) * fogcoord) + exp2: 2^-((density/(ln(2)^2) * fogcoord)^2) */ + value[0] = -1.0F / (ctx->Fog.End - ctx->Fog.Start); + value[1] = ctx->Fog.End / (ctx->Fog.End - ctx->Fog.Start); + value[2] = ctx->Fog.Density * ONE_DIV_LN2; + value[3] = ctx->Fog.Density * ONE_DIV_SQRT_LN2; + break; + case STATE_SPOT_DIR_NORMALIZED: { + /* here, state[2] is the light number */ + /* pre-normalize spot dir */ + const GLuint ln = (GLuint) state[2]; + value[0] = ctx->Light.Light[ln].EyeDirection[0]; + value[1] = ctx->Light.Light[ln].EyeDirection[1]; + value[2] = ctx->Light.Light[ln].EyeDirection[2]; + NORMALIZE_3FV(value); + value[3] = ctx->Light.Light[ln]._CosCutoff; + break; + } default: /* unknown state indexes are silently ignored * should be handled by the driver. @@ -1075,6 +1099,10 @@ make_state_flags(const GLint state[]) return _NEW_MODELVIEW; case STATE_TEXRECT_SCALE: return _NEW_TEXTURE; + case STATE_FOG_PARAMS_OPTIMIZED: + return _NEW_FOG; + case STATE_SPOT_DIR_NORMALIZED: + return _NEW_LIGHT; default: /* unknown state indexes are silently ignored and * no flag set, since it is handled by the driver. @@ -1232,6 +1260,8 @@ append_token(char *dst, enum state_index k) case STATE_INTERNAL: case STATE_NORMAL_SCALE: case STATE_POSITION_NORMALIZED: + case STATE_FOG_PARAMS_OPTIMIZED: + case STATE_SPOT_DIR_NORMALIZED: append(dst, "(internal)"); break; default: diff --git a/src/mesa/shader/program.h b/src/mesa/shader/program.h index af06c03598..a0bde07762 100644 --- a/src/mesa/shader/program.h +++ b/src/mesa/shader/program.h @@ -190,6 +190,8 @@ enum state_index { STATE_NORMAL_SCALE, STATE_TEXRECT_SCALE, STATE_POSITION_NORMALIZED, /* normalized light position */ + STATE_FOG_PARAMS_OPTIMIZED, /* for faster fog calc */ + STATE_SPOT_DIR_NORMALIZED, /* pre-normalized spot dir */ STATE_INTERNAL_DRIVER /* first available state index for drivers (must be last) */ }; diff --git a/src/mesa/tnl/t_vp_build.c b/src/mesa/tnl/t_vp_build.c index 805d05ae72..0b6f506f4e 100644 --- a/src/mesa/tnl/t_vp_build.c +++ b/src/mesa/tnl/t_vp_build.c @@ -806,14 +806,13 @@ static struct ureg calculate_light_attenuation( struct tnl_program *p, /* Calculate spot attenuation: */ if (!p->state->unit[i].light_spotcutoff_is_180) { - struct ureg spot_dir = register_param3(p, STATE_LIGHT, i, - STATE_SPOT_DIRECTION); + struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL, + STATE_SPOT_DIR_NORMALIZED, i); struct ureg spot = get_temp(p); struct ureg slt = get_temp(p); - - emit_normalize_vec3( p, spot, spot_dir ); /* XXX: precompute! */ - emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot); - emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir,W), spot); + + emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm); + emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot); emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W)); emit_op2(p, OPCODE_MUL, att, 0, slt, spot); @@ -1103,29 +1102,26 @@ static void build_fog( struct tnl_program *p ) } if (p->state->tnl_do_vertex_fog) { - struct ureg params = register_param1(p, STATE_FOG_PARAMS); + struct ureg params = register_param1(p, STATE_FOG_PARAMS_OPTIMIZED); struct ureg tmp = get_temp(p); switch (p->state->fog_mode) { case FOG_LINEAR: { struct ureg id = get_identity_param(p); - emit_op2(p, OPCODE_SUB, tmp, 0, swizzle1(params,Z), input); - emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,W)); + emit_op3(p, OPCODE_MAD, tmp, 0, input, swizzle1(params,X), swizzle1(params,Y)); emit_op2(p, OPCODE_MAX, tmp, 0, tmp, swizzle1(id,X)); /* saturate */ emit_op2(p, OPCODE_MIN, fog, WRITEMASK_X, tmp, swizzle1(id,W)); break; } case FOG_EXP: emit_op1(p, OPCODE_ABS, tmp, 0, input); - emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,X)); - emit_op2(p, OPCODE_POW, fog, WRITEMASK_X, - register_const1f(p, M_E), negate(tmp)); + emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,Z)); + emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, negate(tmp)); break; case FOG_EXP2: - emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,X)); + emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,W)); emit_op2(p, OPCODE_MUL, tmp, 0, tmp, tmp); - emit_op2(p, OPCODE_POW, fog, WRITEMASK_X, - register_const1f(p, M_E), negate(tmp)); + emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, negate(tmp)); break; } @@ -1317,8 +1313,6 @@ static void build_texture_transform( struct tnl_program *p ) } -/* Seems like it could be tighter: - */ static void build_pointsize( struct tnl_program *p ) { struct ureg eye = get_eye_position(p); @@ -1327,20 +1321,25 @@ static void build_pointsize( struct tnl_program *p ) struct ureg out = register_output(p, VERT_RESULT_PSIZ); struct ureg ut = get_temp(p); - /* 1, -Z, Z * Z, 1 */ - emit_op1(p, OPCODE_MOV, ut, 0, swizzle1(get_identity_param(p), W)); - emit_op2(p, OPCODE_MUL, ut, WRITEMASK_YZ, ut, negate(swizzle1(eye, Z))); - emit_op2(p, OPCODE_MUL, ut, WRITEMASK_Z, ut, negate(swizzle1(eye, Z))); - - - /* p1 + p2 * dist + p3 * dist * dist, 0 */ - emit_op2(p, OPCODE_DP3, ut, 0, ut, state_attenuation); - - /* 1 / factor */ - emit_op1(p, OPCODE_RCP, ut, 0, ut ); - - /* out = pointSize / factor */ - emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size); + /* p1 + dist * (p2 + dist * p3); */ + emit_op3(p, OPCODE_MAD, ut, 0, negate(swizzle1(eye, Z)), + swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y)); + emit_op3(p, OPCODE_MAD, ut, 0, negate(swizzle1(eye, Z)), + ut, swizzle1(state_attenuation, X)); + + /* 1 / sqrt(factor) */ + emit_op1(p, OPCODE_RSQ, ut, 0, ut ); + +#if 1 + /* out = pointSize / sqrt(factor) */ + emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size); +#else + /* not sure, might make sense to do clamping here, + but it's not done in t_vb_points neither */ + emit_op2(p, OPCODE_MUL, ut, 0, ut, state_size); + emit_op2(p, OPCODE_MAX, ut, 0, ut, swizzle1(state_size, Y)); + emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z)); +#endif release_temp(p, ut); } -- cgit v1.2.3