diff options
author | Christoph Bumiller <e0425955@student.tuwien.ac.at> | 2010-12-19 21:46:33 +0100 |
---|---|---|
committer | Christoph Bumiller <e0425955@student.tuwien.ac.at> | 2010-12-19 21:46:33 +0100 |
commit | 0f68236a2487dbeb0396b996debcda595b0b54a1 (patch) | |
tree | 938ae3b779349b6dba6f5a891550604f9a9ca895 /src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c | |
parent | d047168d81cfeb39a98f3ae16416872facc6237c (diff) | |
parent | 237880463d5168cad8df0bae6018b5fd76617777 (diff) |
Merge remote branch 'origin/master' into nvc0-new
Diffstat (limited to 'src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c')
-rw-r--r-- | src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c | 165 |
1 files changed, 104 insertions, 61 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 39408845d5..c8063171b8 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -36,6 +36,7 @@ #include "radeon_program_alu.h" #include "radeon_compiler.h" +#include "radeon_compiler_util.h" static struct rc_instruction *emit1( @@ -84,16 +85,6 @@ static struct rc_instruction *emit3( return fpi; } -static struct rc_dst_register dstreg(int file, int index) -{ - struct rc_dst_register dst; - dst.File = file; - dst.Index = index; - dst.WriteMask = RC_MASK_XYZW; - dst.RelAddr = 0; - return dst; -} - static struct rc_dst_register dstregtmpmask(int index, int mask) { struct rc_dst_register dst = {0}; @@ -186,6 +177,38 @@ static struct rc_src_register swizzle_wwww(struct rc_src_register reg) return swizzle_smear(reg, RC_SWIZZLE_W); } +static int is_dst_safe_to_reuse(struct rc_instruction *inst) +{ + const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + assert(info->HasDstReg); + + if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) + return 0; + + for (i = 0; i < info->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[i].Index == inst->U.I.DstReg.Index) + return 0; + } + + return 1; +} + +static struct rc_dst_register try_to_reuse_dst(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + unsigned tmp; + + if (is_dst_safe_to_reuse(inst)) + tmp = inst->U.I.DstReg.Index; + else + tmp = rc_find_free_temporary(c); + + return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask); +} + static void transform_ABS(struct radeon_compiler* c, struct rc_instruction* inst) { @@ -209,10 +232,26 @@ static void transform_CEIL(struct radeon_compiler* c, * ceil(x) = x+frac(-x) */ - int tempreg = rc_find_free_temporary(c); - emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0])); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0])); emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, - inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg)); + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index)); + rc_remove_instruction(inst); +} + +static void transform_CLAMP(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + /* CLAMP dst, src, min, max + * into: + * MIN tmp, src, max + * MAX dst, tmp, min + */ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst, + inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]); + emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]); rc_remove_instruction(inst); } @@ -258,10 +297,10 @@ static void transform_DST(struct radeon_compiler* c, static void transform_FLR(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); - emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0]); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]); emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, - inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, tempreg))); + inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); rc_remove_instruction(inst); } @@ -351,14 +390,14 @@ static void transform_LIT(struct radeon_compiler* c, static void transform_LRP(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_ADD, 0, - dstreg(RC_FILE_TEMPORARY, tempreg), + dst, inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2])); emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg, - inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[2]); + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]); rc_remove_instruction(inst); } @@ -366,9 +405,8 @@ static void transform_LRP(struct radeon_compiler* c, static void transform_POW(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); - struct rc_dst_register tempdst = dstreg(RC_FILE_TEMPORARY, tempreg); - struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempreg); + struct rc_dst_register tempdst = try_to_reuse_dst(c, inst); + struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempdst.Index); tempdst.WriteMask = RC_MASK_W; tempsrc.Swizzle = RC_SWIZZLE_WWWW; @@ -388,11 +426,11 @@ static void transform_RSQ(struct radeon_compiler* c, static void transform_SEQ(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_zero, builtin_one); + negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one); rc_remove_instruction(inst); } @@ -407,11 +445,11 @@ static void transform_SFL(struct radeon_compiler* c, static void transform_SGE(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one); + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); rc_remove_instruction(inst); } @@ -419,11 +457,11 @@ static void transform_SGE(struct radeon_compiler* c, static void transform_SGT(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero); + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); rc_remove_instruction(inst); } @@ -431,11 +469,11 @@ static void transform_SGT(struct radeon_compiler* c, static void transform_SLE(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one); + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); rc_remove_instruction(inst); } @@ -443,11 +481,11 @@ static void transform_SLE(struct radeon_compiler* c, static void transform_SLT(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero); + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); rc_remove_instruction(inst); } @@ -455,11 +493,11 @@ static void transform_SLT(struct radeon_compiler* c, static void transform_SNE(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_one, builtin_zero); + negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero); rc_remove_instruction(inst); } @@ -473,12 +511,13 @@ static void transform_SSG(struct radeon_compiler* c, * CMP tmp1, x, 1, 0 * ADD result, tmp0, -tmp1; */ - unsigned tmp0, tmp1; + struct rc_dst_register dst0; + unsigned tmp1; /* 0 < x */ - tmp0 = rc_find_free_temporary(c); + dst0 = try_to_reuse_dst(c, inst); emit3(c, inst->Prev, RC_OPCODE_CMP, 0, - dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask), + dst0, negate(inst->U.I.SrcReg[0]), builtin_one, builtin_zero); @@ -495,7 +534,7 @@ static void transform_SSG(struct radeon_compiler* c, /* result = tmp0 - tmp1 */ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tmp0), + srcreg(RC_FILE_TEMPORARY, dst0.Index), negate(srcreg(RC_FILE_TEMPORARY, tmp1))); rc_remove_instruction(inst); @@ -517,15 +556,15 @@ static void transform_SWZ(struct radeon_compiler* c, static void transform_XPD(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstreg(RC_FILE_TEMPORARY, tempreg), + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst, swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W)); emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg, swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W), swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), - negate(srcreg(RC_FILE_TEMPORARY, tempreg))); + negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); rc_remove_instruction(inst); } @@ -553,6 +592,7 @@ int radeonTransformALU( switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; case RC_OPCODE_DP2: transform_DP2(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; case RC_OPCODE_DST: transform_DST(c, inst); return 1; @@ -592,7 +632,7 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c, { /* There is no decent CMP available, so let's rig one up. * CMP is defined as dst = src0 < 0.0 ? src1 : src2 - * The following sequence consumes two temps and two extra slots + * The following sequence consumes zero to two temps and two extra slots * (the second temp and the second slot is consumed by transform_LRP), * but should be equivalent: * @@ -600,18 +640,18 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c, * LRP dst, tmp0, src1, src2 * * Yes, I know, I'm a mad scientist. ~ C. & M. */ - int tempreg0 = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); /* SLT tmp0, src0, 0.0 */ emit2(c, inst->Prev, RC_OPCODE_SLT, 0, - dstreg(RC_FILE_TEMPORARY, tempreg0), + dst, inst->U.I.SrcReg[0], builtin_zero); /* LRP dst, tmp0, src1, src2 */ transform_LRP(c, emit3(c, inst->Prev, RC_OPCODE_LRP, 0, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); + srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); rc_remove_instruction(inst); } @@ -642,24 +682,25 @@ static void transform_r300_vertex_DP3(struct radeon_compiler* c, static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); unsigned constant_swizzle; int constant = rc_constants_add_immediate_scalar(&c->Program.Constants, 0.0000000000000000001, &constant_swizzle); /* MOV dst, src */ + dst.WriteMask = RC_MASK_XYZW; emit1(c, inst->Prev, RC_OPCODE_MOV, 0, - dstreg(RC_FILE_TEMPORARY, tempreg), + dst, inst->U.I.SrcReg[0]); - /* MAX dst.z, src, 0.00...001 */ + /* MAX dst.y, src, 0.00...001 */ emit2(c, inst->Prev, RC_OPCODE_MAX, 0, - dstregtmpmask(tempreg, RC_MASK_Y), - srcreg(RC_FILE_TEMPORARY, tempreg), + dstregtmpmask(dst.Index, RC_MASK_Y), + srcreg(RC_FILE_TEMPORARY, dst.Index), srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); - inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, tempreg); + inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index); } static void transform_r300_vertex_SEQ(struct radeon_compiler *c, @@ -743,12 +784,13 @@ static void transform_r300_vertex_SSG(struct radeon_compiler* c, * SLT tmp1, x, 0; * ADD result, tmp0, -tmp1; */ - unsigned tmp0, tmp1; + struct rc_dst_register dst0 = try_to_reuse_dst(c, inst); + unsigned tmp1; /* 0 < x */ - tmp0 = rc_find_free_temporary(c); + dst0 = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_SLT, 0, - dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask), + dst0, builtin_zero, inst->U.I.SrcReg[0]); @@ -763,7 +805,7 @@ static void transform_r300_vertex_SSG(struct radeon_compiler* c, /* result = tmp0 - tmp1 */ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tmp0), + srcreg(RC_FILE_TEMPORARY, dst0.Index), negate(srcreg(RC_FILE_TEMPORARY, tmp1))); rc_remove_instruction(inst); @@ -781,6 +823,7 @@ int r300_transform_vertex_alu( switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1; case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1; |