From e6e6fcd3a674429886aed499f9a63594aa5a0f58 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 25 Nov 2010 15:07:02 +0100 Subject: r300/compiler: move util functions to radeon_compiler_util The compiler seriously needs a cleanup as far as the arrangement of functions is concerned. It's hard to know whether some function was implemented or not because there are so many places to search in and it can be anywhere and named anyhow. --- src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 39408845d5..106e03495d 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -36,6 +36,7 @@ #include "radeon_program_alu.h" #include "radeon_compiler.h" +#include "radeon_compiler_util.h" static struct rc_instruction *emit1( -- cgit v1.2.3 From e6d798948e00e255b80a69562a7d262257f77ee5 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 1 Dec 2010 16:44:22 +0100 Subject: r300/compiler: implement and lower OPCODE_CLAMP Needed for st/vega. --- src/gallium/drivers/r300/r300_tgsi_to_rc.c | 2 +- src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c | 7 +++++++ src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h | 3 +++ .../drivers/dri/r300/compiler/radeon_program_alu.c | 18 ++++++++++++++++++ 4 files changed, 29 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c') diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 33448bf0de..15a323989b 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -57,7 +57,7 @@ static unsigned translate_opcode(unsigned opcode) /* case TGSI_OPCODE_DP2A: return RC_OPCODE_DP2A; */ /* gap */ case TGSI_OPCODE_FRC: return RC_OPCODE_FRC; - /* case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP; */ + case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP; case TGSI_OPCODE_FLR: return RC_OPCODE_FLR; /* case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND; */ case TGSI_OPCODE_EX2: return RC_OPCODE_EX2; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index da495a3afa..113b27632a 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -66,6 +66,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .HasDstReg = 1, .IsComponentwise = 1 }, + { + .Opcode = RC_OPCODE_CLAMP, + .Name = "CLAMP", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, { .Opcode = RC_OPCODE_CMP, .Name = "CMP", diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h index d3f639c870..7e66610127 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -50,6 +50,9 @@ typedef enum { /** vec4 instruction: dst.c = ceil(src0.c) */ RC_OPCODE_CEIL, + /** vec4 instruction: dst.c = clamp(src0.c, src1.c, src2.c) */ + RC_OPCODE_CLAMP, + /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */ RC_OPCODE_CMP, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 106e03495d..01c2e74e7b 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -217,6 +217,22 @@ static void transform_CEIL(struct radeon_compiler* c, rc_remove_instruction(inst); } +static void transform_CLAMP(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + /* CLAMP dst, src, min, max + * into: + * MIN tmp, src, max + * MAX dst, tmp, min + */ + int tempreg = rc_find_free_temporary(c); + emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dstreg(RC_FILE_TEMPORARY, tempreg), + inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]); + emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[1]); + rc_remove_instruction(inst); +} + static void transform_DP2(struct radeon_compiler* c, struct rc_instruction* inst) { @@ -554,6 +570,7 @@ int radeonTransformALU( switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; case RC_OPCODE_DP2: transform_DP2(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; case RC_OPCODE_DST: transform_DST(c, inst); return 1; @@ -782,6 +799,7 @@ int r300_transform_vertex_alu( switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1; case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1; -- cgit v1.2.3 From 2ff9d4474bdf5f05852ad4963d0b597d20743678 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 7 Dec 2010 21:57:18 +0100 Subject: r300/compiler: make lowering passes possibly use up to two less temps CMP may now use two less temps, other non-native instructions may end up using one less temp, except for SIN/COS/SCS, which I am leaving unchanged for now. This may reduce register pressure inside loops, because the register allocator doesn't do a very good job there. --- .../drivers/dri/r300/compiler/radeon_program_alu.c | 149 ++++++++++++--------- 1 file changed, 86 insertions(+), 63 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 01c2e74e7b..58977a40c7 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -85,16 +85,6 @@ static struct rc_instruction *emit3( return fpi; } -static struct rc_dst_register dstreg(int file, int index) -{ - struct rc_dst_register dst; - dst.File = file; - dst.Index = index; - dst.WriteMask = RC_MASK_XYZW; - dst.RelAddr = 0; - return dst; -} - static struct rc_dst_register dstregtmpmask(int index, int mask) { struct rc_dst_register dst = {0}; @@ -187,6 +177,38 @@ static struct rc_src_register swizzle_wwww(struct rc_src_register reg) return swizzle_smear(reg, RC_SWIZZLE_W); } +static int is_dst_safe_to_reuse(struct rc_instruction *inst) +{ + const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + assert(info->HasDstReg); + + if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) + return 0; + + for (i = 0; i < info->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[i].Index == inst->U.I.DstReg.Index) + return 0; + } + + return 1; +} + +static struct rc_dst_register try_to_reuse_dst(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + unsigned tmp; + + if (is_dst_safe_to_reuse(inst)) + tmp = inst->U.I.DstReg.Index; + else + tmp = rc_find_free_temporary(c); + + return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask); +} + static void transform_ABS(struct radeon_compiler* c, struct rc_instruction* inst) { @@ -210,10 +232,10 @@ static void transform_CEIL(struct radeon_compiler* c, * ceil(x) = x+frac(-x) */ - int tempreg = rc_find_free_temporary(c); - emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0])); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0])); emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, - inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg)); + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index)); rc_remove_instruction(inst); } @@ -225,11 +247,11 @@ static void transform_CLAMP(struct radeon_compiler *c, * MIN tmp, src, max * MAX dst, tmp, min */ - int tempreg = rc_find_free_temporary(c); - emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dstreg(RC_FILE_TEMPORARY, tempreg), + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst, inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]); emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[1]); + srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]); rc_remove_instruction(inst); } @@ -275,10 +297,10 @@ static void transform_DST(struct radeon_compiler* c, static void transform_FLR(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); - emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0]); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]); emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, - inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, tempreg))); + inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); rc_remove_instruction(inst); } @@ -368,14 +390,14 @@ static void transform_LIT(struct radeon_compiler* c, static void transform_LRP(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_ADD, 0, - dstreg(RC_FILE_TEMPORARY, tempreg), + dst, inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2])); emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg, - inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[2]); + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]); rc_remove_instruction(inst); } @@ -383,9 +405,8 @@ static void transform_LRP(struct radeon_compiler* c, static void transform_POW(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); - struct rc_dst_register tempdst = dstreg(RC_FILE_TEMPORARY, tempreg); - struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempreg); + struct rc_dst_register tempdst = try_to_reuse_dst(c, inst); + struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempdst.Index); tempdst.WriteMask = RC_MASK_W; tempsrc.Swizzle = RC_SWIZZLE_WWWW; @@ -405,11 +426,11 @@ static void transform_RSQ(struct radeon_compiler* c, static void transform_SEQ(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_zero, builtin_one); + negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one); rc_remove_instruction(inst); } @@ -424,11 +445,11 @@ static void transform_SFL(struct radeon_compiler* c, static void transform_SGE(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one); + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); rc_remove_instruction(inst); } @@ -436,11 +457,11 @@ static void transform_SGE(struct radeon_compiler* c, static void transform_SGT(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero); + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); rc_remove_instruction(inst); } @@ -448,11 +469,11 @@ static void transform_SGT(struct radeon_compiler* c, static void transform_SLE(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one); + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); rc_remove_instruction(inst); } @@ -460,11 +481,11 @@ static void transform_SLE(struct radeon_compiler* c, static void transform_SLT(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero); + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); rc_remove_instruction(inst); } @@ -472,11 +493,11 @@ static void transform_SLT(struct radeon_compiler* c, static void transform_SNE(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_one, builtin_zero); + negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero); rc_remove_instruction(inst); } @@ -490,12 +511,13 @@ static void transform_SSG(struct radeon_compiler* c, * CMP tmp1, x, 1, 0 * ADD result, tmp0, -tmp1; */ - unsigned tmp0, tmp1; + struct rc_dst_register dst0; + unsigned tmp1; /* 0 < x */ - tmp0 = rc_find_free_temporary(c); + dst0 = try_to_reuse_dst(c, inst); emit3(c, inst->Prev, RC_OPCODE_CMP, 0, - dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask), + dst0, negate(inst->U.I.SrcReg[0]), builtin_one, builtin_zero); @@ -512,7 +534,7 @@ static void transform_SSG(struct radeon_compiler* c, /* result = tmp0 - tmp1 */ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tmp0), + srcreg(RC_FILE_TEMPORARY, dst0.Index), negate(srcreg(RC_FILE_TEMPORARY, tmp1))); rc_remove_instruction(inst); @@ -534,15 +556,15 @@ static void transform_SWZ(struct radeon_compiler* c, static void transform_XPD(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstreg(RC_FILE_TEMPORARY, tempreg), + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst, swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W)); emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg, swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W), swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), - negate(srcreg(RC_FILE_TEMPORARY, tempreg))); + negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); rc_remove_instruction(inst); } @@ -610,7 +632,7 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c, { /* There is no decent CMP available, so let's rig one up. * CMP is defined as dst = src0 < 0.0 ? src1 : src2 - * The following sequence consumes two temps and two extra slots + * The following sequence consumes zero to two temps and two extra slots * (the second temp and the second slot is consumed by transform_LRP), * but should be equivalent: * @@ -618,18 +640,18 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c, * LRP dst, tmp0, src1, src2 * * Yes, I know, I'm a mad scientist. ~ C. & M. */ - int tempreg0 = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); /* SLT tmp0, src0, 0.0 */ emit2(c, inst->Prev, RC_OPCODE_SLT, 0, - dstreg(RC_FILE_TEMPORARY, tempreg0), + dst, inst->U.I.SrcReg[0], builtin_zero); /* LRP dst, tmp0, src1, src2 */ transform_LRP(c, emit3(c, inst->Prev, RC_OPCODE_LRP, 0, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); + srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); rc_remove_instruction(inst); } @@ -660,7 +682,7 @@ static void transform_r300_vertex_DP3(struct radeon_compiler* c, static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); unsigned constant_swizzle; int constant = rc_constants_add_immediate_scalar(&c->Program.Constants, 0.0000000000000000001, @@ -668,16 +690,16 @@ static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, /* MOV dst, src */ emit1(c, inst->Prev, RC_OPCODE_MOV, 0, - dstreg(RC_FILE_TEMPORARY, tempreg), + dst, inst->U.I.SrcReg[0]); /* MAX dst.z, src, 0.00...001 */ emit2(c, inst->Prev, RC_OPCODE_MAX, 0, - dstregtmpmask(tempreg, RC_MASK_Y), - srcreg(RC_FILE_TEMPORARY, tempreg), + dstregtmpmask(dst.Index, RC_MASK_Y), + srcreg(RC_FILE_TEMPORARY, dst.Index), srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); - inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, tempreg); + inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index); } static void transform_r300_vertex_SEQ(struct radeon_compiler *c, @@ -761,12 +783,13 @@ static void transform_r300_vertex_SSG(struct radeon_compiler* c, * SLT tmp1, x, 0; * ADD result, tmp0, -tmp1; */ - unsigned tmp0, tmp1; + struct rc_dst_register dst0 = try_to_reuse_dst(c, inst); + unsigned tmp1; /* 0 < x */ - tmp0 = rc_find_free_temporary(c); + dst0 = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_SLT, 0, - dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask), + dst0, builtin_zero, inst->U.I.SrcReg[0]); @@ -781,7 +804,7 @@ static void transform_r300_vertex_SSG(struct radeon_compiler* c, /* result = tmp0 - tmp1 */ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tmp0), + srcreg(RC_FILE_TEMPORARY, dst0.Index), negate(srcreg(RC_FILE_TEMPORARY, tmp1))); rc_remove_instruction(inst); -- cgit v1.2.3 From 9e1fbd3d6e2a8accd0ebdc5937dd6eb7a8ef9b10 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 11 Dec 2010 07:32:24 +0100 Subject: r300/compiler: fix LIT in VS This fixes broken rendering of trees in ETQW. The trees still disappear for an unknown reason when they are close. Broken since: 2ff9d4474bdf5f05852ad4963d0b597d20743678 r300/compiler: make lowering passes possibly use up to two less temps NOTE: This is a candidate for the 7.10 branch. --- src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 58977a40c7..c8063171b8 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -689,11 +689,12 @@ static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, &constant_swizzle); /* MOV dst, src */ + dst.WriteMask = RC_MASK_XYZW; emit1(c, inst->Prev, RC_OPCODE_MOV, 0, dst, inst->U.I.SrcReg[0]); - /* MAX dst.z, src, 0.00...001 */ + /* MAX dst.y, src, 0.00...001 */ emit2(c, inst->Prev, RC_OPCODE_MAX, 0, dstregtmpmask(dst.Index, RC_MASK_Y), srcreg(RC_FILE_TEMPORARY, dst.Index), -- cgit v1.2.3