summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarek Olšák <maraeo@gmail.com>2010-04-18 20:49:50 +0200
committerMarek Olšák <maraeo@gmail.com>2010-04-19 00:35:26 +0200
commitebd05a798e34f99bfa35c18803de47662e9e4840 (patch)
treea52609cc8cffced68a7e8e273ff970abf6dd7700
parent65fd6fb2044521511b867c76e270f285d0b15f06 (diff)
r300/compiler: optimize CMP for vertex shaders a bit
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c29
1 files changed, 9 insertions, 20 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index f5b7d57eab..fced31d6cb 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -511,37 +511,26 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c,
{
/* There is no decent CMP available, so let's rig one up.
* CMP is defined as dst = src0 < 0.0 ? src1 : src2
- * The following sequence consumes two temps and three extra slots,
+ * The following sequence consumes two temps and two extra slots
+ * (the second temp and the second slot is consumed by transform_LRP),
* but should be equivalent:
*
* SLT tmp0, src0, 0.0
- * SGE tmp1, src0, 0.0
- * MUL tmp0, tmp0, src1
- * MAD dst, src2, tmp1, tmp0
+ * LRP dst, tmp0, src1, src2
*
- * Yes, I know, I'm a mad scientist. ~ C. */
+ * Yes, I know, I'm a mad scientist. ~ C. & M. */
int tempreg0 = rc_find_free_temporary(c);
- int tempreg1 = rc_find_free_temporary(c);
/* SLT tmp0, src0, 0.0 */
emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
dstreg(RC_FILE_TEMPORARY, tempreg0),
inst->U.I.SrcReg[0], builtin_zero);
- /* SGE tmp1, src0, 0.0 */
- emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
- dstreg(RC_FILE_TEMPORARY, tempreg1),
- inst->U.I.SrcReg[0], builtin_zero);
-
- /* MUL tmp0, tmp0, src1 */
- emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
- dstreg(RC_FILE_TEMPORARY, tempreg0),
- srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1]);
-
- /* MAD dst, src2, tmp1, tmp0 */
- emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode,
- inst->U.I.DstReg,
- inst->U.I.SrcReg[2], srcreg(RC_FILE_TEMPORARY, tempreg1), srcreg(RC_FILE_TEMPORARY, tempreg0));
+ /* LRP dst, tmp0, src1, src2 */
+ transform_LRP(c,
+ emit3(c, inst->Prev, RC_OPCODE_LRP, 0,
+ inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]));
rc_remove_instruction(inst);
}