From 62bccd6df0c963a14e801bcac95dc8046b978a7f Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sat, 5 Jul 2008 22:21:24 +0200 Subject: r300: Allow adding parameters during fragprog transform, share LIT code --- src/mesa/drivers/dri/r300/r300_context.h | 24 ++--- src/mesa/drivers/dri/r300/r300_fragprog.c | 18 +++- src/mesa/drivers/dri/r300/r300_fragprog_emit.c | 143 ++++--------------------- src/mesa/drivers/dri/r300/r300_state.c | 41 +++++-- src/mesa/drivers/dri/r300/r500_fragprog.c | 12 ++- src/mesa/drivers/dri/r300/r500_fragprog_emit.c | 112 +++---------------- src/mesa/drivers/dri/r300/radeon_program_alu.c | 124 +++++++++++++++++++-- 7 files changed, 216 insertions(+), 258 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index a24ab0cad7..a69beba9a7 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -716,14 +716,11 @@ struct r300_fragment_program_code { int tex_offset; int tex_end; - /* Hardware constants. - * Contains a pointer to the value. The destination of the pointer - * is supposed to be updated when GL state changes. - * Typically, this is either a pointer into - * gl_program_parameter_list::ParameterValues, or a pointer to a - * global constant (e.g. for sin/cos-approximation) + /** + * Remember which program register a given hardware constant + * belongs to. */ - const GLfloat *constant[PFS_NUM_CONST_REGS]; + struct prog_src_register constant[PFS_NUM_CONST_REGS]; int const_nr; int max_temp_idx; @@ -787,14 +784,11 @@ struct r500_fragment_program_code { int inst_offset; int inst_end; - /* Hardware constants. - * Contains a pointer to the value. The destination of the pointer - * is supposed to be updated when GL state changes. - * Typically, this is either a pointer into - * gl_program_parameter_list::ParameterValues, or a pointer to a - * global constant (e.g. for sin/cos-approximation) - */ - const GLfloat *constant[PFS_NUM_CONST_REGS]; + /** + * Remember which program register a given hardware constant + * belongs to. + */ + struct prog_src_register constant[PFS_NUM_CONST_REGS]; int const_nr; int max_temp_idx; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 6a8ef0ef5f..57987f5d0f 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -117,9 +117,7 @@ static GLboolean transform_TEX( int factor_index; tokens[2] = inst.TexSrcUnit; - factor_index = - _mesa_add_state_reference( - compiler->fp->mesa_program.Base.Parameters, tokens); + factor_index = _mesa_add_state_reference(t->Program->Parameters, tokens); tgt = radeonAppendInstructions(t->Program, 1); @@ -303,7 +301,7 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) i++; /* viewport transformation */ - window_index = _mesa_add_state_reference(compiler->fp->mesa_program.Base.Parameters, tokens); + window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens); fpi[i].Opcode = OPCODE_MAD; @@ -401,6 +399,11 @@ void r300TranslateFragmentShader(r300ContextPtr r300, compiler.code = &fp->code; compiler.program = _mesa_clone_program(r300->radeon.glCtx, &fp->mesa_program.Base); + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Fragment Program: Initial program:\n"); + _mesa_print_program(compiler.program); + } + insert_WPOS_trailer(&compiler); struct radeon_program_transformation transformations[] = { @@ -413,13 +416,18 @@ void r300TranslateFragmentShader(r300ContextPtr r300, 2, transformations); if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Program after transformations:\n"); + _mesa_printf("Fragment Program: After transformations:\n"); _mesa_print_program(compiler.program); } if (!r300FragmentProgramEmit(&compiler)) fp->error = GL_TRUE; + /* Subtle: Rescue any parameters that have been added during transformations */ + _mesa_free_parameter_list(fp->mesa_program.Base.Parameters); + fp->mesa_program.Base.Parameters = compiler.program->Parameters; + compiler.program->Parameters = 0; + _mesa_reference_program(r300->radeon.glCtx, &compiler.program, NULL); if (!fp->error) diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c index 889631f705..d95008edc0 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c @@ -549,22 +549,17 @@ static void free_temp(struct r300_pfs_compile_state *cs, GLuint r) /** * Emit a hardware constant/parameter. - * - * \p cp Stable pointer to an array of 4 floats. - * The pointer must be stable in the sense that it remains to be valid - * and hold the contents of the constant/parameter throughout the lifetime - * of the fragment program (actually, up until the next time the fragment - * program is translated). */ static GLuint emit_const4fv(struct r300_pfs_compile_state *cs, - const GLfloat * cp) + struct prog_src_register srcreg) { COMPILE_STATE; GLuint reg = undef; int index; for (index = 0; index < code->const_nr; ++index) { - if (code->constant[index] == cp) + if (code->constant[index].File == srcreg.File && + code->constant[index].Index == srcreg.Index) break; } @@ -575,7 +570,7 @@ static GLuint emit_const4fv(struct r300_pfs_compile_state *cs, } code->const_nr++; - code->constant[index] = cp; + code->constant[index] = srcreg; } REG_SET_TYPE(reg, REG_TYPE_CONST); @@ -806,20 +801,11 @@ static GLuint t_src(struct r300_pfs_compile_state *cs, REG_SET_TYPE(r, REG_TYPE_INPUT); break; case PROGRAM_LOCAL_PARAM: - r = emit_const4fv(cs, - fp->mesa_program.Base.LocalParams[fpsrc. - Index]); - break; case PROGRAM_ENV_PARAM: - r = emit_const4fv(cs, - cs->compiler->r300->radeon.glCtx->FragmentProgram.Parameters[fpsrc.Index]); - break; case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: case PROGRAM_CONSTANT: - r = emit_const4fv(cs, - fp->mesa_program.Base.Parameters-> - ParameterValues[fpsrc.Index]); + r = emit_const4fv(cs, fpsrc); break; case PROGRAM_BUILTIN: switch(fpsrc.Swizzle) { @@ -1452,100 +1438,17 @@ static GLfloat SinCosConsts[2][4] = { } }; -/** - * Emit a LIT instruction. - * \p flags may be PFS_FLAG_SAT - * - * Definition of LIT (from ARB_fragment_program): - * tmp = VectorLoad(op0); - * if (tmp.x < 0) tmp.x = 0; - * if (tmp.y < 0) tmp.y = 0; - * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); - * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; - * result.x = 1.0; - * result.y = tmp.x; - * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; - * result.w = 1.0; - * - * The longest path of computation is the one leading to result.z, - * consisting of 5 operations. This implementation of LIT takes - * 5 slots. So unless there's some special undocumented opcode, - * this implementation is potentially optimal. Unfortunately, - * emit_arith is a bit too conservative because it doesn't understand - * partial writes to the vector component. - */ -static const GLfloat LitConst[4] = - { 127.999999, 127.999999, 127.999999, -127.999999 }; - -static void emit_lit(struct r300_pfs_compile_state *cs, - GLuint dest, int mask, GLuint src, int flags) +static GLuint emit_sincosconsts(struct r300_pfs_compile_state *cs, int i) { - COMPILE_STATE; - GLuint cnst; - int needTemporary; - GLuint temp; - - cnst = emit_const4fv(cs, LitConst); - - needTemporary = 0; - if ((mask & WRITEMASK_XYZW) != WRITEMASK_XYZW) { - needTemporary = 1; - } else if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { - // LIT is typically followed by DP3/DP4, so there's no point - // in creating special code for this case - needTemporary = 1; - } + struct prog_src_register srcreg; + GLuint constant_swizzle; - if (needTemporary) { - temp = keep(get_temp_reg(cs)); - } else { - temp = keep(dest); - } + srcreg.File = PROGRAM_CONSTANT; + srcreg.Index = _mesa_add_unnamed_constant(cs->compiler->program->Parameters, + SinCosConsts[i], 4, &constant_swizzle); + srcreg.Swizzle = constant_swizzle; - // Note: The order of emit_arith inside the slots is relevant, - // because emit_arith only looks at scalar vs. vector when resolving - // dependencies, and it does not consider individual vector components, - // so swizzling between the two parts can create fake dependencies. - - // First slot - emit_arith(cs, PFS_OP_MAX, temp, WRITEMASK_XY, - keep(src), pfs_zero, undef, 0); - emit_arith(cs, PFS_OP_MAX, temp, WRITEMASK_W, src, cnst, undef, 0); - - // Second slot - emit_arith(cs, PFS_OP_MIN, temp, WRITEMASK_Z, - swizzle(temp, W, W, W, W), cnst, undef, 0); - emit_arith(cs, PFS_OP_LG2, temp, WRITEMASK_W, - swizzle(temp, Y, Y, Y, Y), undef, undef, 0); - - // Third slot - // If desired, we saturate the y result here. - // This does not affect the use as a condition variable in the CMP later - emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_W, - temp, swizzle(temp, Z, Z, Z, Z), pfs_zero, 0); - emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_Y, - swizzle(temp, X, X, X, X), pfs_one, pfs_zero, flags); - - // Fourth slot - emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_X, - pfs_one, pfs_one, pfs_zero, 0); - emit_arith(cs, PFS_OP_EX2, temp, WRITEMASK_W, temp, undef, undef, 0); - - // Fifth slot - emit_arith(cs, PFS_OP_CMP, temp, WRITEMASK_Z, - pfs_zero, swizzle(temp, W, W, W, W), - negate(swizzle(temp, Y, Y, Y, Y)), flags); - emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_W, pfs_one, pfs_one, - pfs_zero, 0); - - if (needTemporary) { - emit_arith(cs, PFS_OP_MAD, dest, mask, - temp, pfs_one, pfs_zero, flags); - free_temp(cs, temp); - } else { - // Decrease refcount of the destination - t_hw_dst(cs, dest, GL_FALSE, cs->nrslots); - } + return emit_const4fv(cs, srcreg); } static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_instruction *fpi) @@ -1577,8 +1480,8 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst src[1] = t_src(cs, fpi->SrcReg[1]); src[2] = t_src(cs, fpi->SrcReg[2]); /* ARB_f_p - if src0.c < 0.0 ? src1.c : src2.c - * r300 - if src2.c < 0.0 ? src1.c : src0.c - */ + * r300 - if src2.c < 0.0 ? src1.c : src0.c + */ emit_arith(cs, PFS_OP_CMP, dest, mask, src[2], src[1], src[0], flags); break; @@ -1592,8 +1495,8 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst * result = sin(x) */ temp[0] = get_temp_reg(cs); - const_sin[0] = emit_const4fv(cs, SinCosConsts[0]); - const_sin[1] = emit_const4fv(cs, SinCosConsts[1]); + const_sin[0] = emit_sincosconsts(cs, 0); + const_sin[1] = emit_sincosconsts(cs, 1); src[0] = t_scalar_src(cs, fpi->SrcReg[0]); /* add 0.5*PI and do range reduction */ @@ -1687,10 +1590,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst emit_arith(cs, PFS_OP_LG2, dest, mask, src[0], undef, undef, flags); break; - case OPCODE_LIT: - src[0] = t_src(cs, fpi->SrcReg[0]); - emit_lit(cs, dest, mask, src[0], flags); - break; case OPCODE_LRP: src[0] = t_src(cs, fpi->SrcReg[0]); src[1] = t_src(cs, fpi->SrcReg[1]); @@ -1758,8 +1657,8 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst */ temp[0] = get_temp_reg(cs); temp[1] = get_temp_reg(cs); - const_sin[0] = emit_const4fv(cs, SinCosConsts[0]); - const_sin[1] = emit_const4fv(cs, SinCosConsts[1]); + const_sin[0] = emit_sincosconsts(cs, 0); + const_sin[1] = emit_sincosconsts(cs, 1); src[0] = t_scalar_src(cs, fpi->SrcReg[0]); /* x = -abs(x)+0.5*PI */ @@ -1825,8 +1724,8 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst */ temp[0] = get_temp_reg(cs); - const_sin[0] = emit_const4fv(cs, SinCosConsts[0]); - const_sin[1] = emit_const4fv(cs, SinCosConsts[1]); + const_sin[0] = emit_sincosconsts(cs, 0); + const_sin[1] = emit_sincosconsts(cs, 1); src[0] = t_scalar_src(cs, fpi->SrcReg[0]); /* do range reduction */ diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 0f7c179de8..d7a6962acc 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2453,6 +2453,27 @@ void r300UpdateShaders(r300ContextPtr rmesa) r300UpdateStateParameters(ctx, _NEW_PROGRAM); } +static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, + struct gl_program *program, struct prog_src_register srcreg) +{ + static const GLfloat dummy[4] = { 0, 0, 0, 0 }; + + switch(srcreg.File) { + case PROGRAM_LOCAL_PARAM: + return program->LocalParams[srcreg.Index]; + case PROGRAM_ENV_PARAM: + return ctx->FragmentProgram.Parameters[srcreg.Index]; + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + return program->Parameters->ParameterValues[srcreg.Index]; + default: + _mesa_problem(ctx, "get_fragmentprogram_constant: Unknown\n"); + return dummy; + } +} + + static void r300SetupPixelShader(r300ContextPtr rmesa) { GLcontext *ctx = rmesa->radeon.glCtx; @@ -2523,10 +2544,12 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) R300_STATECHANGE(rmesa, fpp); rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, code->const_nr * 4); for (i = 0; i < code->const_nr; i++) { - rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(code->constant[i][0]); - rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(code->constant[i][1]); - rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(code->constant[i][2]); - rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(code->constant[i][3]); + const GLfloat *constant = get_fragmentprogram_constant(ctx, + &fp->mesa_program.Base, code->constant[i]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(constant[3]); } } @@ -2595,10 +2618,12 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) R300_STATECHANGE(rmesa, r500fp_const); for (i = 0; i < code->const_nr; i++) { - rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(code->constant[i][0]); - rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(code->constant[i][1]); - rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(code->constant[i][2]); - rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(code->constant[i][3]); + const GLfloat *constant = get_fragmentprogram_constant(ctx, + &fp->mesa_program.Base, code->constant[i]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(constant[3]); } bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, code->const_nr * 4); diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 7ee8494722..1cdb065354 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -212,7 +212,7 @@ static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler) i++; /* viewport transformation */ - window_index = _mesa_add_state_reference(compiler->fp->mesa_program.Base.Parameters, tokens); + window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens); fpi[i].Opcode = OPCODE_MAD; @@ -332,6 +332,11 @@ void r500TranslateFragmentShader(r300ContextPtr r300, fp->translated = r500FragmentProgramEmit(&compiler); + /* Subtle: Rescue any parameters that have been added during transformations */ + _mesa_free_parameter_list(fp->mesa_program.Base.Parameters); + fp->mesa_program.Base.Parameters = compiler.program->Parameters; + compiler.program->Parameters = 0; + _mesa_reference_program(r300->radeon.glCtx, &compiler.program, 0); r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); @@ -461,9 +466,8 @@ static void dump_program(struct r500_fragment_program_code *code) if (code->const_nr) { fprintf(stderr, "--------\nConstants:\n"); for (n = 0; n < code->const_nr; n++) { - fprintf(stderr, "Constant %d: %f %f\n\t %f %f\n", n, - code->constant[n][0], code->constant[n][1], code->constant[n][2], - code->constant[n][3]); + fprintf(stderr, "Constant %d: %i[%i]\n", n, + code->constant[n].File, code->constant[n].Index); } fprintf(stderr, "--------\n"); } diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c index 0e95c81e48..c79bff96bd 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c @@ -266,7 +266,7 @@ static int get_temp(struct r500_pfs_compile_state *cs, int slot) { /* Borrowed verbatim from r300_fragprog since it hasn't changed. */ static GLuint emit_const4fv(struct r500_pfs_compile_state *cs, - const GLfloat * cp) + struct prog_src_register srcreg) { PROG_CODE; @@ -274,7 +274,8 @@ static GLuint emit_const4fv(struct r500_pfs_compile_state *cs, int index; for (index = 0; index < code->const_nr; ++index) { - if (code->constant[index] == cp) + if (code->constant[index].File == srcreg.File && + code->constant[index].Index == srcreg.Index) break; } @@ -285,7 +286,7 @@ static GLuint emit_const4fv(struct r500_pfs_compile_state *cs, } code->const_nr++; - code->constant[index] = cp; + code->constant[index] = srcreg; } reg = index | REG_CONSTANT; @@ -303,18 +304,11 @@ static GLuint make_src(struct r500_pfs_compile_state *cs, struct prog_src_regist reg = cs->inputs[src.Index].reg; break; case PROGRAM_LOCAL_PARAM: - reg = emit_const4fv(cs, - cs->compiler->fp->mesa_program.Base.LocalParams[src.Index]); - break; case PROGRAM_ENV_PARAM: - reg = emit_const4fv(cs, - cs->compiler->r300->radeon.glCtx->FragmentProgram.Parameters[src.Index]); - break; case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: case PROGRAM_CONSTANT: - reg = emit_const4fv(cs, - cs->compiler->fp->mesa_program.Base.Parameters->ParameterValues[src.Index]); + reg = emit_const4fv(cs, src); break; case PROGRAM_BUILTIN: reg = 0x0; @@ -628,12 +622,20 @@ static void emit_trig(struct r500_pfs_compile_state *cs, struct prog_instruction temp.Index = get_temp(cs, 0); temp.WriteMask = WRITEMASK_W; + struct prog_src_register srcreg; + GLuint constant_swizzle; + + srcreg.File = PROGRAM_CONSTANT; + srcreg.Index = _mesa_add_unnamed_constant(cs->compiler->program->Parameters, + RCP_2PI, 4, &constant_swizzle); + srcreg.Swizzle = constant_swizzle; + /* temp = Input*(1/2pi) */ ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, temp); set_src0(cs, ip, fpi->SrcReg[0]); - set_src1_direct(cs, ip, emit_const4fv(cs, RCP_2PI)); + set_src1(cs, ip, srcreg); set_argA(cs, ip, 0, R500_SWIZ_RGB_ZERO, make_sop_swizzle(fpi->SrcReg[0])); - set_argB(cs, ip, 1, R500_SWIZ_RGB_ZERO, SWIZZLE_W); + set_argB(cs, ip, 1, R500_SWIZ_RGB_ZERO, make_alpha_swizzle(srcreg)); set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO); /* temp = frac(dst) */ @@ -660,87 +662,6 @@ static void emit_trig(struct r500_pfs_compile_state *cs, struct prog_instruction } } -/** - * Emit a LIT instruction. - * - * Definition of LIT (from ARB_fragment_program): - * tmp = VectorLoad(op0); - * if (tmp.x < 0) tmp.x = 0; - * if (tmp.y < 0) tmp.y = 0; - * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); - * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; - * result.x = 1.0; - * result.y = tmp.x; - * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; - * result.w = 1.0; - */ -static void emit_lit(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi) -{ - GLuint cnst; - int needTemporary; - GLuint temp; - int ip; - - cnst = emit_const4fv(cs, LIT); - - needTemporary = 0; - if (fpi->DstReg.WriteMask != WRITEMASK_XYZW || fpi->DstReg.File == PROGRAM_OUTPUT) - needTemporary = 1; - - if (needTemporary) { - temp = get_temp(cs, 0); - } else { - temp = fpi->DstReg.Index; - } - - // MAX tmp.xyw, op0, { 0, 0, 0, -128+eps } - ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_MAX, R500_ALPHA_OP_MAX, temp, WRITEMASK_XYW); - set_src0(cs, ip, fpi->SrcReg[0]); - set_src1_direct(cs, ip, cnst); - set_argA_reg(cs, ip, 0, fpi->SrcReg[0]); - set_argB(cs, ip, 1, R500_SWIZ_RGB_ZERO, SWIZZLE_W); - - // MIN tmp.z, tmp.w, { 128-eps } - // LG2 tmp.w, tmp.y - ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_MIN, R500_ALPHA_OP_LN2, temp, WRITEMASK_ZW); - set_src0_direct(cs, ip, temp); - set_src1_direct(cs, ip, cnst); - set_argA(cs, ip, 0, SWIZZLE_W | (SWIZZLE_W<<3) | (SWIZZLE_W<<6), SWIZZLE_Y); - set_argB(cs, ip, 1, SWIZZLE_X | (SWIZZLE_X<<3) | (SWIZZLE_X<<6), SWIZZLE_X); - - // MOV tmp.y, tmp.x - // MUL tmp.w, tmp.z, tmp.w - ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, temp, WRITEMASK_YW); - set_src0_direct(cs, ip, temp); - set_argA(cs, ip, 0, SWIZZLE_X | (SWIZZLE_X<<3) | (SWIZZLE_X<<6), SWIZZLE_Z); - set_argB(cs, ip, 0, R500_SWIZ_RGB_ONE, SWIZZLE_W); - set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO); - - // MOV tmp.x, 1.0 - // EX2 tmp.w, tmp.w - ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_CMP, R500_ALPHA_OP_EX2, temp, WRITEMASK_XW); - set_src0_direct(cs, ip, temp); - set_argA(cs, ip, 0, R500_SWIZ_RGB_ONE, SWIZZLE_W); - set_argB(cs, ip, 0, R500_SWIZ_RGB_ONE, R500_SWIZZLE_ZERO); - set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO); - - // tmp.z := (-tmp.x >= 0) ? tmp.y : 0.0 - // MOV tmp.w, 1.0 - ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_CMP, R500_ALPHA_OP_CMP, temp, WRITEMASK_ZW); - set_src0_direct(cs, ip, temp); - set_argA(cs, ip, 0, R500_SWIZZLE_ZERO, R500_SWIZZLE_ONE); - set_argB(cs, ip, 0, SWIZZLE_W | (SWIZZLE_W<<3) | (SWIZZLE_W<<6), R500_SWIZZLE_ONE); - set_argC(cs, ip, 0, SWIZZLE_Y | (SWIZZLE_Y<<3) | (SWIZZLE_Y<<6) | (R500_SWIZ_MOD_NEG<<9), R500_SWIZZLE_ZERO); - - if (needTemporary) { - ip = emit_alu(cs, R500_ALU_RGBA_OP_CMP, R500_ALPHA_OP_CMP, fpi->DstReg); - set_src0_direct(cs, ip, temp); - set_argA(cs, ip, 0, R500_SWIZ_RGB_RGB, SWIZZLE_W); - set_argB(cs, ip, 1, R500_SWIZ_RGB_RGB, SWIZZLE_W); - set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO); - } -} - static void do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi) { PROG_CODE; GLuint src[3], dest = 0; @@ -830,9 +751,6 @@ static void do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction * src[0] = make_src(cs, fpi->SrcReg[0]); emit_sop(cs, R500_ALPHA_OP_LN2, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0])); break; - case OPCODE_LIT: - emit_lit(cs, fpi); - break; case OPCODE_LRP: /* result = src0*src1 + (1-src0)*src2 * = src0*src1 + src2 + (-src0)*src2 diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.c b/src/mesa/drivers/dri/r300/radeon_program_alu.c index d6d016d7c1..85ea810523 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/radeon_program_alu.c @@ -35,6 +35,8 @@ #include "radeon_program_alu.h" +#include "shader/prog_parameter.h" + static struct prog_instruction *emit1(struct gl_program* p, gl_inst_opcode Opcode, struct prog_dst_register DstReg, @@ -101,6 +103,19 @@ static struct prog_dst_register dstreg(int file, int index) return dst; } +static struct prog_dst_register dstregtmpmask(int index, int mask) +{ + struct prog_dst_register dst; + dst.File = PROGRAM_TEMPORARY; + dst.Index = index; + dst.WriteMask = mask; + dst.CondMask = COND_TR; + dst.CondSwizzle = SWIZZLE_NOOP; + dst.CondSrc = 0; + dst.pad = 0; + return dst; +} + static const struct prog_src_register builtin_zero = { .File = PROGRAM_BUILTIN, .Index = 0, @@ -125,6 +140,15 @@ static struct prog_src_register srcreg(int file, int index) return src; } +static struct prog_src_register srcregswz(int file, int index, int swz) +{ + struct prog_src_register src = srcreg_undefined; + src.File = file; + src.Index = index; + src.Swizzle = swz; + return src; +} + static struct prog_src_register negate(struct prog_src_register reg) { struct prog_src_register newreg = reg; @@ -136,10 +160,10 @@ static struct prog_src_register swizzle(struct prog_src_register reg, GLuint x, { struct prog_src_register swizzled = reg; swizzled.Swizzle = MAKE_SWIZZLE4( - GET_SWZ(reg.Swizzle, x), - GET_SWZ(reg.Swizzle, y), - GET_SWZ(reg.Swizzle, z), - GET_SWZ(reg.Swizzle, w)); + x >= 4 ? x : GET_SWZ(reg.Swizzle, x), + y >= 4 ? y : GET_SWZ(reg.Swizzle, y), + z >= 4 ? z : GET_SWZ(reg.Swizzle, z), + w >= 4 ? w : GET_SWZ(reg.Swizzle, w)); return swizzled; } @@ -185,6 +209,93 @@ static void transform_FLR(struct radeon_transform_context* t, emit2(t->Program, OPCODE_ADD, inst->DstReg, inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg))); } +/** + * Definition of LIT (from ARB_fragment_program): + * + * tmp = VectorLoad(op0); + * if (tmp.x < 0) tmp.x = 0; + * if (tmp.y < 0) tmp.y = 0; + * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); + * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; + * result.x = 1.0; + * result.y = tmp.x; + * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; + * result.w = 1.0; + * + * The longest path of computation is the one leading to result.z, + * consisting of 5 operations. This implementation of LIT takes + * 5 slots, if the subsequent optimization passes are clever enough + * to pair instructions correctly. + */ +static void transform_LIT(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + static const GLfloat LitConst[4] = { -127.999999 }; + + GLuint constant; + GLuint constant_swizzle; + GLuint temp; + int needTemporary = 0; + struct prog_src_register srctemp; + + constant = _mesa_add_unnamed_constant(t->Program->Parameters, LitConst, 1, &constant_swizzle); + + if (inst->DstReg.WriteMask != WRITEMASK_XYZW) { + needTemporary = 1; + } else if (inst->DstReg.File != PROGRAM_TEMPORARY) { + // LIT is typically followed by DP3/DP4, so there's no point + // in creating special code for this case + needTemporary = 1; + } + + if (needTemporary) { + temp = radeonFindFreeTemporary(t); + } else { + temp = inst->DstReg.Index; + } + srctemp = srcreg(PROGRAM_TEMPORARY, temp); + + // tmp.x = max(0.0, Src.x); + // tmp.y = max(0.0, Src.y); + // tmp.w = clamp(Src.z, -128+eps, 128-eps); + emit2(t->Program, OPCODE_MAX, + dstregtmpmask(temp, WRITEMASK_XYW), + inst->SrcReg[0], + swizzle(srcreg(PROGRAM_CONSTANT, constant), + SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3)); + emit2(t->Program, OPCODE_MIN, + dstregtmpmask(temp, WRITEMASK_Z), + swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle))); + + // tmp.w = Pow(tmp.y, tmp.w) + emit1(t->Program, OPCODE_LG2, + dstregtmpmask(temp, WRITEMASK_W), + swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); + emit2(t->Program, OPCODE_MUL, + dstregtmpmask(temp, WRITEMASK_W), + swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)); + emit1(t->Program, OPCODE_EX2, + dstregtmpmask(temp, WRITEMASK_W), + swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); + + // tmp.z = (tmp.x > 0) ? tmp.w : 0.0 + emit3(t->Program, OPCODE_CMP, + dstregtmpmask(temp, WRITEMASK_Z), + negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), + swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + builtin_zero); + + // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 + emit1(t->Program, OPCODE_MOV, + dstregtmpmask(temp, WRITEMASK_XYW), + swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE)); + + if (needTemporary) + emit1(t->Program, OPCODE_MOV, inst->DstReg, srctemp); +} + static void transform_POW(struct radeon_transform_context* t, struct prog_instruction* inst) { @@ -249,13 +360,11 @@ static void transform_XPD(struct radeon_transform_context* t, * no userData necessary. * * Eliminates the following ALU instructions: - * ABS, DPH, FLR, POW, SGE, SLT, SUB, SWZ, XPD + * ABS, DPH, FLR, LIT, POW, SGE, SLT, SUB, SWZ, XPD * using: * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP * * @note should be applicable to R300 and R500 fragment programs. - * - * @todo add LIT here as well? */ GLboolean radeonTransformALU(struct radeon_transform_context* t, struct prog_instruction* inst, @@ -265,6 +374,7 @@ GLboolean radeonTransformALU(struct radeon_transform_context* t, case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE; case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE; case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE; + case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE; case OPCODE_POW: transform_POW(t, inst); return GL_TRUE; case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE; case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE; -- cgit v1.2.3