summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r300/r300_fragprog_emit.c
diff options
context:
space:
mode:
authorNicolai Haehnle <nhaehnle@gmail.com>2008-07-05 22:21:24 +0200
committerNicolai Haehnle <nhaehnle@gmail.com>2008-07-06 09:59:43 +0200
commit62bccd6df0c963a14e801bcac95dc8046b978a7f (patch)
treebef7225f252c272272cc445c24c1935967554d37 /src/mesa/drivers/dri/r300/r300_fragprog_emit.c
parent77fdfaa23adeaaf6a217ef1ee751410c6a5b0d21 (diff)
r300: Allow adding parameters during fragprog transform, share LIT code
Diffstat (limited to 'src/mesa/drivers/dri/r300/r300_fragprog_emit.c')
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog_emit.c143
1 files changed, 21 insertions, 122 deletions
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c
index 889631f705..d95008edc0 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c
@@ -549,22 +549,17 @@ static void free_temp(struct r300_pfs_compile_state *cs, GLuint r)
/**
* Emit a hardware constant/parameter.
- *
- * \p cp Stable pointer to an array of 4 floats.
- * The pointer must be stable in the sense that it remains to be valid
- * and hold the contents of the constant/parameter throughout the lifetime
- * of the fragment program (actually, up until the next time the fragment
- * program is translated).
*/
static GLuint emit_const4fv(struct r300_pfs_compile_state *cs,
- const GLfloat * cp)
+ struct prog_src_register srcreg)
{
COMPILE_STATE;
GLuint reg = undef;
int index;
for (index = 0; index < code->const_nr; ++index) {
- if (code->constant[index] == cp)
+ if (code->constant[index].File == srcreg.File &&
+ code->constant[index].Index == srcreg.Index)
break;
}
@@ -575,7 +570,7 @@ static GLuint emit_const4fv(struct r300_pfs_compile_state *cs,
}
code->const_nr++;
- code->constant[index] = cp;
+ code->constant[index] = srcreg;
}
REG_SET_TYPE(reg, REG_TYPE_CONST);
@@ -806,20 +801,11 @@ static GLuint t_src(struct r300_pfs_compile_state *cs,
REG_SET_TYPE(r, REG_TYPE_INPUT);
break;
case PROGRAM_LOCAL_PARAM:
- r = emit_const4fv(cs,
- fp->mesa_program.Base.LocalParams[fpsrc.
- Index]);
- break;
case PROGRAM_ENV_PARAM:
- r = emit_const4fv(cs,
- cs->compiler->r300->radeon.glCtx->FragmentProgram.Parameters[fpsrc.Index]);
- break;
case PROGRAM_STATE_VAR:
case PROGRAM_NAMED_PARAM:
case PROGRAM_CONSTANT:
- r = emit_const4fv(cs,
- fp->mesa_program.Base.Parameters->
- ParameterValues[fpsrc.Index]);
+ r = emit_const4fv(cs, fpsrc);
break;
case PROGRAM_BUILTIN:
switch(fpsrc.Swizzle) {
@@ -1452,100 +1438,17 @@ static GLfloat SinCosConsts[2][4] = {
}
};
-/**
- * Emit a LIT instruction.
- * \p flags may be PFS_FLAG_SAT
- *
- * Definition of LIT (from ARB_fragment_program):
- * tmp = VectorLoad(op0);
- * if (tmp.x < 0) tmp.x = 0;
- * if (tmp.y < 0) tmp.y = 0;
- * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
- * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
- * result.x = 1.0;
- * result.y = tmp.x;
- * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
- * result.w = 1.0;
- *
- * The longest path of computation is the one leading to result.z,
- * consisting of 5 operations. This implementation of LIT takes
- * 5 slots. So unless there's some special undocumented opcode,
- * this implementation is potentially optimal. Unfortunately,
- * emit_arith is a bit too conservative because it doesn't understand
- * partial writes to the vector component.
- */
-static const GLfloat LitConst[4] =
- { 127.999999, 127.999999, 127.999999, -127.999999 };
-
-static void emit_lit(struct r300_pfs_compile_state *cs,
- GLuint dest, int mask, GLuint src, int flags)
+static GLuint emit_sincosconsts(struct r300_pfs_compile_state *cs, int i)
{
- COMPILE_STATE;
- GLuint cnst;
- int needTemporary;
- GLuint temp;
-
- cnst = emit_const4fv(cs, LitConst);
-
- needTemporary = 0;
- if ((mask & WRITEMASK_XYZW) != WRITEMASK_XYZW) {
- needTemporary = 1;
- } else if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
- // LIT is typically followed by DP3/DP4, so there's no point
- // in creating special code for this case
- needTemporary = 1;
- }
+ struct prog_src_register srcreg;
+ GLuint constant_swizzle;
- if (needTemporary) {
- temp = keep(get_temp_reg(cs));
- } else {
- temp = keep(dest);
- }
+ srcreg.File = PROGRAM_CONSTANT;
+ srcreg.Index = _mesa_add_unnamed_constant(cs->compiler->program->Parameters,
+ SinCosConsts[i], 4, &constant_swizzle);
+ srcreg.Swizzle = constant_swizzle;
- // Note: The order of emit_arith inside the slots is relevant,
- // because emit_arith only looks at scalar vs. vector when resolving
- // dependencies, and it does not consider individual vector components,
- // so swizzling between the two parts can create fake dependencies.
-
- // First slot
- emit_arith(cs, PFS_OP_MAX, temp, WRITEMASK_XY,
- keep(src), pfs_zero, undef, 0);
- emit_arith(cs, PFS_OP_MAX, temp, WRITEMASK_W, src, cnst, undef, 0);
-
- // Second slot
- emit_arith(cs, PFS_OP_MIN, temp, WRITEMASK_Z,
- swizzle(temp, W, W, W, W), cnst, undef, 0);
- emit_arith(cs, PFS_OP_LG2, temp, WRITEMASK_W,
- swizzle(temp, Y, Y, Y, Y), undef, undef, 0);
-
- // Third slot
- // If desired, we saturate the y result here.
- // This does not affect the use as a condition variable in the CMP later
- emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_W,
- temp, swizzle(temp, Z, Z, Z, Z), pfs_zero, 0);
- emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_Y,
- swizzle(temp, X, X, X, X), pfs_one, pfs_zero, flags);
-
- // Fourth slot
- emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_X,
- pfs_one, pfs_one, pfs_zero, 0);
- emit_arith(cs, PFS_OP_EX2, temp, WRITEMASK_W, temp, undef, undef, 0);
-
- // Fifth slot
- emit_arith(cs, PFS_OP_CMP, temp, WRITEMASK_Z,
- pfs_zero, swizzle(temp, W, W, W, W),
- negate(swizzle(temp, Y, Y, Y, Y)), flags);
- emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_W, pfs_one, pfs_one,
- pfs_zero, 0);
-
- if (needTemporary) {
- emit_arith(cs, PFS_OP_MAD, dest, mask,
- temp, pfs_one, pfs_zero, flags);
- free_temp(cs, temp);
- } else {
- // Decrease refcount of the destination
- t_hw_dst(cs, dest, GL_FALSE, cs->nrslots);
- }
+ return emit_const4fv(cs, srcreg);
}
static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_instruction *fpi)
@@ -1577,8 +1480,8 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst
src[1] = t_src(cs, fpi->SrcReg[1]);
src[2] = t_src(cs, fpi->SrcReg[2]);
/* ARB_f_p - if src0.c < 0.0 ? src1.c : src2.c
- * r300 - if src2.c < 0.0 ? src1.c : src0.c
- */
+ * r300 - if src2.c < 0.0 ? src1.c : src0.c
+ */
emit_arith(cs, PFS_OP_CMP, dest, mask,
src[2], src[1], src[0], flags);
break;
@@ -1592,8 +1495,8 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst
* result = sin(x)
*/
temp[0] = get_temp_reg(cs);
- const_sin[0] = emit_const4fv(cs, SinCosConsts[0]);
- const_sin[1] = emit_const4fv(cs, SinCosConsts[1]);
+ const_sin[0] = emit_sincosconsts(cs, 0);
+ const_sin[1] = emit_sincosconsts(cs, 1);
src[0] = t_scalar_src(cs, fpi->SrcReg[0]);
/* add 0.5*PI and do range reduction */
@@ -1687,10 +1590,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst
emit_arith(cs, PFS_OP_LG2, dest, mask,
src[0], undef, undef, flags);
break;
- case OPCODE_LIT:
- src[0] = t_src(cs, fpi->SrcReg[0]);
- emit_lit(cs, dest, mask, src[0], flags);
- break;
case OPCODE_LRP:
src[0] = t_src(cs, fpi->SrcReg[0]);
src[1] = t_src(cs, fpi->SrcReg[1]);
@@ -1758,8 +1657,8 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst
*/
temp[0] = get_temp_reg(cs);
temp[1] = get_temp_reg(cs);
- const_sin[0] = emit_const4fv(cs, SinCosConsts[0]);
- const_sin[1] = emit_const4fv(cs, SinCosConsts[1]);
+ const_sin[0] = emit_sincosconsts(cs, 0);
+ const_sin[1] = emit_sincosconsts(cs, 1);
src[0] = t_scalar_src(cs, fpi->SrcReg[0]);
/* x = -abs(x)+0.5*PI */
@@ -1825,8 +1724,8 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst
*/
temp[0] = get_temp_reg(cs);
- const_sin[0] = emit_const4fv(cs, SinCosConsts[0]);
- const_sin[1] = emit_const4fv(cs, SinCosConsts[1]);
+ const_sin[0] = emit_sincosconsts(cs, 0);
+ const_sin[1] = emit_sincosconsts(cs, 1);
src[0] = t_scalar_src(cs, fpi->SrcReg[0]);
/* do range reduction */