diff options
Diffstat (limited to 'src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c')
-rw-r--r-- | src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c | 53 |
1 files changed, 32 insertions, 21 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index f5b7d57eab..05b874ba7c 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -175,6 +175,26 @@ static void transform_ABS(struct radeon_compiler* c, rc_remove_instruction(inst); } +static void transform_CEIL(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* Assuming: + * ceil(x) = -floor(-x) + * + * After inlining floor: + * ceil(x) = -(-x-frac(-x)) + * + * After simplification: + * ceil(x) = x+frac(-x) + */ + + int tempreg = rc_find_free_temporary(c); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0])); + emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg)); + rc_remove_instruction(inst); +} + static void transform_DP3(struct radeon_compiler* c, struct rc_instruction* inst) { @@ -458,7 +478,7 @@ static void transform_XPD(struct radeon_compiler* c, * no userData necessary. * * Eliminates the following ALU instructions: - * ABS, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD + * ABS, CEIL, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD * using: * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP * @@ -474,6 +494,7 @@ int radeonTransformALU( { switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; + case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; case RC_OPCODE_DST: transform_DST(c, inst); return 1; case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; @@ -511,37 +532,26 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c, { /* There is no decent CMP available, so let's rig one up. * CMP is defined as dst = src0 < 0.0 ? src1 : src2 - * The following sequence consumes two temps and three extra slots, + * The following sequence consumes two temps and two extra slots + * (the second temp and the second slot is consumed by transform_LRP), * but should be equivalent: * * SLT tmp0, src0, 0.0 - * SGE tmp1, src0, 0.0 - * MUL tmp0, tmp0, src1 - * MAD dst, src2, tmp1, tmp0 + * LRP dst, tmp0, src1, src2 * - * Yes, I know, I'm a mad scientist. ~ C. */ + * Yes, I know, I'm a mad scientist. ~ C. & M. */ int tempreg0 = rc_find_free_temporary(c); - int tempreg1 = rc_find_free_temporary(c); /* SLT tmp0, src0, 0.0 */ emit2(c, inst->Prev, RC_OPCODE_SLT, 0, dstreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[0], builtin_zero); - /* SGE tmp1, src0, 0.0 */ - emit2(c, inst->Prev, RC_OPCODE_SGE, 0, - dstreg(RC_FILE_TEMPORARY, tempreg1), - inst->U.I.SrcReg[0], builtin_zero); - - /* MUL tmp0, tmp0, src1 */ - emit2(c, inst->Prev, RC_OPCODE_MUL, 0, - dstreg(RC_FILE_TEMPORARY, tempreg0), - srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1]); - - /* MAD dst, src2, tmp1, tmp0 */ - emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, - inst->U.I.DstReg, - inst->U.I.SrcReg[2], srcreg(RC_FILE_TEMPORARY, tempreg1), srcreg(RC_FILE_TEMPORARY, tempreg0)); + /* LRP dst, tmp0, src1, src2 */ + transform_LRP(c, + emit3(c, inst->Prev, RC_OPCODE_LRP, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); rc_remove_instruction(inst); } @@ -557,6 +567,7 @@ int r300_transform_vertex_alu( { switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; + case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; case RC_OPCODE_DP3: transform_DP3(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; |