summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCorbin Simpson <MostAwesomeDude@gmail.com>2010-03-26 05:24:44 -0700
committerCorbin Simpson <MostAwesomeDude@gmail.com>2010-03-26 05:24:44 -0700
commitb6df7aed60189d5f28a139c6fe351022ca2907a4 (patch)
tree66071ce190cddf714c5fbe5dcaf56a6d92bd462d /src
parent38c7a01b6c220ad04c5754602673ad3cf36ad508 (diff)
r300/compiler: Lower CMP for vertex programs.
I think my maths is right?
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c41
1 files changed, 41 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index b5c08aea49..f5b7d57eab 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -506,6 +506,46 @@ static void transform_r300_vertex_ABS(struct radeon_compiler* c,
inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
}
+static void transform_r300_vertex_CMP(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* There is no decent CMP available, so let's rig one up.
+ * CMP is defined as dst = src0 < 0.0 ? src1 : src2
+ * The following sequence consumes two temps and three extra slots,
+ * but should be equivalent:
+ *
+ * SLT tmp0, src0, 0.0
+ * SGE tmp1, src0, 0.0
+ * MUL tmp0, tmp0, src1
+ * MAD dst, src2, tmp1, tmp0
+ *
+ * Yes, I know, I'm a mad scientist. ~ C. */
+ int tempreg0 = rc_find_free_temporary(c);
+ int tempreg1 = rc_find_free_temporary(c);
+
+ /* SLT tmp0, src0, 0.0 */
+ emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+ dstreg(RC_FILE_TEMPORARY, tempreg0),
+ inst->U.I.SrcReg[0], builtin_zero);
+
+ /* SGE tmp1, src0, 0.0 */
+ emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
+ dstreg(RC_FILE_TEMPORARY, tempreg1),
+ inst->U.I.SrcReg[0], builtin_zero);
+
+ /* MUL tmp0, tmp0, src1 */
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
+ dstreg(RC_FILE_TEMPORARY, tempreg0),
+ srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1]);
+
+ /* MAD dst, src2, tmp1, tmp0 */
+ emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode,
+ inst->U.I.DstReg,
+ inst->U.I.SrcReg[2], srcreg(RC_FILE_TEMPORARY, tempreg1), srcreg(RC_FILE_TEMPORARY, tempreg0));
+
+ rc_remove_instruction(inst);
+}
+
/**
* For use with radeonLocalTransform, this transforms non-native ALU
* instructions of the r300 up to r500 vertex engine.
@@ -517,6 +557,7 @@ int r300_transform_vertex_alu(
{
switch(inst->U.I.Opcode) {
case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1;
+ case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1;
case RC_OPCODE_DP3: transform_DP3(c, inst); return 1;
case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;