From 63432ecfce5415fbf07f1781ec77b5ea3efff599 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 13 Jul 2010 21:25:27 -0700 Subject: r300/compiler: Enable presubtract sources The r300 compiler can now emit instructions that select from the presubtract source. A peephole optimization has been added to convert instructions like: ADD Temp[0].x, none.1, -Temp[1].x into the INV (1 - src0) presubtract operation. --- src/mesa/drivers/dri/r300/compiler/r300_fragprog.c | 76 ++++++- .../drivers/dri/r300/compiler/r300_fragprog_emit.c | 49 +++++ .../dri/r300/compiler/r300_fragprog_swizzle.c | 33 +-- .../drivers/dri/r300/compiler/r500_fragprog_emit.c | 37 ++++ .../drivers/dri/r300/compiler/radeon_compiler.h | 1 + .../drivers/dri/r300/compiler/radeon_dataflow.c | 128 ++++++++--- .../drivers/dri/r300/compiler/radeon_optimize.c | 245 +++++++++++++++++++-- .../dri/r300/compiler/radeon_pair_schedule.c | 158 ++++++++++++- .../dri/r300/compiler/radeon_pair_translate.c | 53 +++++ .../drivers/dri/r300/compiler/radeon_program.h | 11 +- .../dri/r300/compiler/radeon_program_constants.h | 36 ++- .../dri/r300/compiler/radeon_program_pair.c | 185 +++++++++++++++- .../dri/r300/compiler/radeon_program_pair.h | 33 ++- .../dri/r300/compiler/radeon_program_print.c | 87 +++++++- .../dri/r300/compiler/radeon_remove_constants.c | 34 ++- 15 files changed, 1072 insertions(+), 94 deletions(-) (limited to 'src/mesa/drivers/dri/r300') diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c index 3c83deffcb..782671bac0 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c @@ -31,6 +31,24 @@ #include "../r300_reg.h" +static void presub_string(char out[10], unsigned int inst) +{ + switch(inst & 0x600000){ + case R300_ALU_SRCP_1_MINUS_2_SRC0: + sprintf(out, "bias"); + break; + case R300_ALU_SRCP_SRC1_MINUS_SRC0: + sprintf(out, "sub"); + break; + case R300_ALU_SRCP_SRC1_PLUS_SRC0: + sprintf(out, "add"); + break; + case R300_ALU_SRCP_1_MINUS_SRC0: + sprintf(out, "inv "); + break; + } +} + /* just some random things... */ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) { @@ -98,8 +116,8 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) for (i = alu_offset; i <= alu_offset + alu_end; ++i) { - char srcc[3][10], dstc[20]; - char srca[3][10], dsta[20]; + char srcc[4][10], dstc[20]; + char srca[4][10], dsta[20]; char argc[3][20]; char arga[3][20]; char flags[5], tmp[10]; @@ -142,6 +160,9 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) flags); strcat(dstc, tmp); } + /* Presub */ + presub_string(srcc[3], code->alu.inst[i].rgb_inst); + presub_string(srca[3], code->alu.inst[i].alpha_inst); dsta[0] = 0; if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) { @@ -160,11 +181,12 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) } fprintf(stderr, - "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" - " w: %3s %3s %3s -> %-20s (%08x)\n", i, - srcc[0], srcc[1], srcc[2], dstc, + "%3i: xyz: %3s %3s %3s %5s-> %-20s (%08x)\n" + " w: %3s %3s %3s %5s-> %-20s (%08x)\n", i, + srcc[0], srcc[1], srcc[2], srcc[3], dstc, code->alu.inst[i].rgb_addr, srca[0], srca[1], - srca[2], dsta, code->alu.inst[i].alpha_addr); + srca[2], srca[3], dsta, + code->alu.inst[i].alpha_addr); for (j = 0; j < 3; ++j) { int regc = code->alu.inst[i].rgb_inst >> (j * 7); @@ -194,6 +216,24 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) } } else if (d < 15) { sprintf(buf, "%s.www", srca[d - 12]); + } else if (d < 20 ) { + switch(d) { + case R300_ALU_ARGC_SRCP_XYZ: + sprintf(buf, "srcp.xyz"); + break; + case R300_ALU_ARGC_SRCP_XXX: + sprintf(buf, "srcp.xxx"); + break; + case R300_ALU_ARGC_SRCP_YYY: + sprintf(buf, "srcp.yyy"); + break; + case R300_ALU_ARGC_SRCP_ZZZ: + sprintf(buf, "srcp.zzz"); + break; + case R300_ALU_ARGC_SRCP_WWW: + sprintf(buf, "srcp.www"); + break; + } } else if (d == 20) { sprintf(buf, "0.0"); } else if (d == 21) { @@ -231,6 +271,21 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) 'x' + (char)(d % 3)); } else if (d < 12) { sprintf(buf, "%s.w", srca[d - 9]); + } else if (d < 16) { + switch(d) { + case R300_ALU_ARGA_SRCP_X: + sprintf(buf, "srcp.x"); + break; + case R300_ALU_ARGA_SRCP_Y: + sprintf(buf, "srcp.y"); + break; + case R300_ALU_ARGA_SRCP_Z: + sprintf(buf, "srcp.z"); + break; + case R300_ALU_ARGA_SRCP_W: + sprintf(buf, "srcp.w"); + break; + } } else if (d == 16) { sprintf(buf, "0.0"); } else if (d == 17) { @@ -247,11 +302,14 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) buf, (rega & 64) ? "|" : ""); } - fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" + fprintf(stderr, " xyz: %8s %8s %8s op: %08x %s\n" " w: %8s %8s %8s op: %08x\n", argc[0], argc[1], argc[2], - code->alu.inst[i].rgb_inst, arga[0], arga[1], - arga[2], code->alu.inst[i].alpha_inst); + code->alu.inst[i].rgb_inst, + code->alu.inst[i].rgb_inst & R300_ALU_INSERT_NOP ? + "NOP" : "", + arga[0], arga[1],arga[2], + code->alu.inst[i].alpha_inst); } } } diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index eead2ea426..3b2b06fc2b 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -164,6 +164,53 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i code->alu.inst[ip].alpha_inst |= arg << (7*j); } + /* Presubtract */ + if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { + switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->alu.inst[ip].rgb_inst |= + R300_ALU_SRCP_1_MINUS_2_SRC0; + break; + case RC_PRESUB_ADD: + code->alu.inst[ip].rgb_inst |= + R300_ALU_SRCP_SRC1_PLUS_SRC0; + break; + case RC_PRESUB_SUB: + code->alu.inst[ip].rgb_inst |= + R300_ALU_SRCP_SRC1_MINUS_SRC0; + break; + case RC_PRESUB_INV: + code->alu.inst[ip].rgb_inst |= + R300_ALU_SRCP_1_MINUS_SRC0; + break; + default: + break; + } + } + + if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { + switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->alu.inst[ip].alpha_inst |= + R300_ALU_SRCP_1_MINUS_2_SRC0; + break; + case RC_PRESUB_ADD: + code->alu.inst[ip].alpha_inst |= + R300_ALU_SRCP_SRC1_PLUS_SRC0; + break; + case RC_PRESUB_SUB: + code->alu.inst[ip].alpha_inst |= + R300_ALU_SRCP_SRC1_MINUS_SRC0; + break; + case RC_PRESUB_INV: + code->alu.inst[ip].alpha_inst |= + R300_ALU_SRCP_1_MINUS_SRC0; + break; + default: + break; + } + } + if (inst->RGB.Saturate) code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP; if (inst->Alpha.Saturate) @@ -198,6 +245,8 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i emit->node_flags |= R300_W_OUT; c->code->writes_depth = 1; } + if (inst->Nop) + code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP; return 1; } diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c index 5d5de2f1b2..caa48fe478 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c @@ -44,25 +44,25 @@ struct swizzle_data { unsigned int hash; /**< swizzle value this matches */ unsigned int base; /**< base value for hw swizzle */ unsigned int stride; /**< difference in base between arg0/1/2 */ + unsigned int srcp_stride; /**< difference in base between arg0/scrp */ }; static const struct swizzle_data native_swizzles[] = { - {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4}, - {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4}, - {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4}, - {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4}, - {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1}, - {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1}, - {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1}, - {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1}, - {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0}, - {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0}, - {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0} + {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, 15}, + {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, 15}, + {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, 15}, + {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, 15}, + {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, 7}, + {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, 0}, + {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, 0}, + {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, 0}, + {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0}, + {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0}, + {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0} }; static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]); - /** * Find a native RGB swizzle that matches the given swizzle. * Returns 0 if none found. @@ -205,7 +205,11 @@ unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle) return 0; } - return sd->base + src*sd->stride; + if (src == RC_PAIR_PRESUB_SRC) { + return sd->base + sd->srcp_stride; + } else { + return sd->base + src*sd->stride; + } } @@ -215,6 +219,9 @@ unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle) */ unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle) { + if (src == RC_PAIR_PRESUB_SRC) { + return R300_ALU_ARGA_SRCP_X + swizzle; + } if (swizzle < 3) return swizzle + 3*src; diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index 3220349f26..54cff9169a 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -260,6 +260,9 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14); code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); + if (inst->Nop) { + code->inst[ip].inst0 |= R500_INST_NOP; + } if (inst->Alpha.DepthWriteMask) { code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; c->code->writes_depth = 1; @@ -275,6 +278,40 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair if (inst->Alpha.Saturate) code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP; + /* Set the presubtract operation. */ + switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0; + break; + case RC_PRESUB_SUB: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0; + break; + case RC_PRESUB_ADD: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0; + break; + case RC_PRESUB_INV: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0; + break; + default: + break; + } + switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0; + break; + case RC_PRESUB_SUB: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0; + break; + case RC_PRESUB_ADD: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0; + break; + case RC_PRESUB_INV: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0; + break; + default: + break; + } + code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0])); code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1])); code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2])); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index 85c2e43ad6..6d96ac9fdd 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -42,6 +42,7 @@ struct radeon_compiler { /* Hardware specification. */ unsigned is_r500:1; unsigned has_half_swizzles:1; + unsigned has_presub:1; unsigned disable_optimizations:1; unsigned max_temp_regs; unsigned max_constants; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index 0e6c62541f..e73700f84a 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -29,6 +29,25 @@ #include "radeon_program.h" +static void reads_normal_callback( + rc_read_write_chan_fn cb, + struct rc_instruction * fullinst, + struct rc_src_register src, + void * userdata) +{ + unsigned int refmask = 0; + unsigned int chan; + for(chan = 0; chan < 4; chan++) { + refmask |= 1 << GET_SWZ(src.Swizzle, chan); + } + refmask &= RC_MASK_XYZW; + + if (refmask) + cb(userdata, fullinst, src.File, src.Index, refmask); + + if (refmask && src.RelAddr) + cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X); +} static void reads_normal(struct rc_instruction * fullinst, rc_read_write_chan_fn cb, void * userdata) { @@ -36,47 +55,81 @@ static void reads_normal(struct rc_instruction * fullinst, rc_read_write_chan_fn const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { - unsigned int refmask = 0; if (inst->SrcReg[src].File == RC_FILE_NONE) return; - for(unsigned int chan = 0; chan < 4; ++chan) - refmask |= 1 << GET_SWZ(inst->SrcReg[src].Swizzle, chan); - - refmask &= RC_MASK_XYZW; - - if (refmask) - cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, refmask); - - if (refmask && inst->SrcReg[src].RelAddr) - cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X); + if (inst->SrcReg[src].File == RC_FILE_PRESUB) { + unsigned int i; + unsigned int srcp_regs = rc_presubtract_src_reg_count( + inst->PreSub.Opcode); + for( i = 0; i < srcp_regs; i++) { + reads_normal_callback(cb, fullinst, + inst->PreSub.SrcReg[i], + userdata); + } + } else { + reads_normal_callback(cb, fullinst, + inst->SrcReg[src], userdata); + } } } -static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) +static void pair_get_src_refmasks(unsigned int * refmasks, + struct rc_pair_instruction * inst, + unsigned int swz, unsigned int src) { - struct rc_pair_instruction * inst = &fullinst->U.P; - unsigned int refmasks[3] = { 0, 0, 0 }; - - if (inst->RGB.Opcode != RC_OPCODE_NOP) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode); + if (swz >= 4) + return; + + if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y || swz == RC_SWIZZLE_Z) { + if(src == RC_PAIR_PRESUB_SRC) { + unsigned int i; + int srcp_regs = + rc_presubtract_src_reg_count( + inst->RGB.Src[src].Index); + for(i = 0; i < srcp_regs; i++) { + refmasks[i] |= 1 << swz; + } + } + else { + refmasks[src] |= 1 << swz; + } + } - for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { - for(unsigned int chan = 0; chan < 3; ++chan) { - unsigned int swz = GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan); - if (swz < 4) - refmasks[inst->RGB.Arg[arg].Source] |= 1 << swz; + if (swz == RC_SWIZZLE_W) { + if (src == RC_PAIR_PRESUB_SRC) { + unsigned int i; + int srcp_regs = rc_presubtract_src_reg_count( + inst->Alpha.Src[src].Index); + for(i = 0; i < srcp_regs; i++) { + refmasks[i] |= 1 << swz; } } + else { + refmasks[src] |= 1 << swz; + } } +} - if (inst->Alpha.Opcode != RC_OPCODE_NOP) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode); +static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + unsigned int refmasks[3] = { 0, 0, 0 }; - for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { - if (inst->Alpha.Arg[arg].Swizzle < 4) - refmasks[inst->Alpha.Arg[arg].Source] |= 1 << inst->Alpha.Arg[arg].Swizzle; + unsigned int arg; + + for(arg = 0; arg < 3; ++arg) { + unsigned int chan; + for(chan = 0; chan < 3; ++chan) { + unsigned int swz_rgb = + GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan); + unsigned int swz_alpha = + GET_SWZ(inst->Alpha.Arg[arg].Swizzle, chan); + pair_get_src_refmasks(refmasks, inst, swz_rgb, + inst->RGB.Arg[arg].Source); + pair_get_src_refmasks(refmasks, inst, swz_alpha, + inst->Alpha.Arg[arg].Source); } } @@ -212,10 +265,25 @@ static void remap_normal_instruction(struct rc_instruction * fullinst, rc_register_file file = inst->SrcReg[src].File; unsigned int index = inst->SrcReg[src].Index; - cb(userdata, fullinst, &file, &index); + if (file == RC_FILE_PRESUB) { + unsigned int i; + unsigned int srcp_srcs = rc_presubtract_src_reg_count( + inst->PreSub.Opcode); + for(i = 0; i < srcp_srcs; i++) { + file = inst->PreSub.SrcReg[i].File; + index = inst->PreSub.SrcReg[i].Index; + cb(userdata, fullinst, &file, &index); + inst->PreSub.SrcReg[i].File = file; + inst->PreSub.SrcReg[i].Index = index; + } - inst->SrcReg[src].File = file; - inst->SrcReg[src].Index = index; + } + else { + cb(userdata, fullinst, &file, &index); + + inst->SrcReg[src].File = file; + inst->SrcReg[src].Index = index; + } } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index 8e99467106..e01ba85aa5 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -30,6 +30,13 @@ #include "radeon_compiler.h" #include "radeon_swizzle.h" +struct peephole_state { + struct rc_instruction * Inst; + /** Stores a bitmask of the components that are still "alive" (i.e. + * they have not been written to since Inst was executed.) + */ + unsigned int WriteMask; +}; static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) { @@ -54,7 +61,7 @@ static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct return combine; } -struct peephole_state { +struct copy_propagate_state { struct radeon_compiler * C; struct rc_instruction * Mov; unsigned int Conflict:1; @@ -84,10 +91,10 @@ struct peephole_state { * @param index The index of the source register. * @param mask The components of the source register that are being read from. */ -static void peephole_scan_read(void * data, struct rc_instruction * inst, +static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int mask) { - struct peephole_state * s = data; + struct copy_propagate_state * s = data; /* XXX This could probably be handled better. */ if (file == RC_FILE_ADDRESS) { @@ -123,10 +130,10 @@ static void peephole_scan_read(void * data, struct rc_instruction * inst, } } -static void peephole_scan_write(void * data, struct rc_instruction * inst, +static void copy_propagate_scan_write(void * data, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int mask) { - struct peephole_state * s = data; + struct copy_propagate_state * s = data; if (s->BranchDepth < 0) return; @@ -146,9 +153,9 @@ static void peephole_scan_write(void * data, struct rc_instruction * inst, } } -static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mov) +static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov) { - struct peephole_state s; + struct copy_propagate_state s; if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || inst_mov->U.I.DstReg.RelAddr || @@ -170,14 +177,23 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo for(struct rc_instruction * inst = inst_mov->Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); /* XXX In the future we might be able to make the optimizer * smart enough to handle loops. */ if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP || inst->U.I.Opcode == RC_OPCODE_ENDLOOP){ return; } - rc_for_all_reads_mask(inst, peephole_scan_read, &s); - rc_for_all_writes_mask(inst, peephole_scan_write, &s); + + /* It is possible to do copy propigation in this situation, + * just not right now, see peephole_add_presub_inv() */ + if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE && + info->NumSrcRegs > 2) { + return; + } + + rc_for_all_reads_mask(inst, copy_propagate_scan_read, &s); + rc_for_all_writes_mask(inst, copy_propagate_scan_write, &s); if (s.Conflict) return; @@ -206,7 +222,6 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo inst != &c->Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY && inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) { @@ -217,8 +232,11 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo refmask |= (1 << swz) & RC_MASK_XYZW; } - if ((refmask & s.MovMask) == refmask) + if ((refmask & s.MovMask) == refmask) { inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]); + if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) + inst->U.I.PreSub = s.Mov->U.I.PreSub; + } } } @@ -283,7 +301,6 @@ static int is_src_uniform_constant(struct rc_src_register src, return 1; } - static void constant_folding_mad(struct rc_instruction * inst) { rc_swizzle swz; @@ -379,7 +396,6 @@ static void constant_folding_add(struct rc_instruction * inst) } } - /** * Replace 0.0, 1.0 and 0.5 immediate constants by their * respective swizzles. Simplify instructions like ADD dst, src, 0; @@ -454,6 +470,204 @@ static void constant_folding(struct radeon_compiler * c, struct rc_instruction * constant_folding_add(inst); } +/** + * This function returns a writemask that indicates wich components are + * read by src and also written by dst. + */ +static unsigned int src_reads_dst_mask(struct rc_src_register src, + struct rc_dst_register dst) +{ + unsigned int mask = 0; + unsigned int i; + if (dst.File != src.File || dst.Index != src.Index) { + return 0; + } + + for(i = 0; i < 4; i++) { + mask |= 1 << GET_SWZ(src.Swizzle, i); + } + mask &= RC_MASK_XYZW; + + return mask; +} + +/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0) + * in any of its channels. Return 0 otherwise. */ +static int src_has_const_swz(struct rc_src_register src) { + int chan; + for(chan = 0; chan < 4; chan++) { + unsigned int swz = GET_SWZ(src.Swizzle, chan); + if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF + || swz == RC_SWIZZLE_ONE) { + return 1; + } + } + return 0; +} + +static void peephole_scan_write(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct peephole_state * s = data; + if(s->Inst->U.I.DstReg.File == file + && s->Inst->U.I.DstReg.Index == index) { + unsigned int common_mask = s->WriteMask & mask; + s->WriteMask &= ~common_mask; + } +} + +/** + * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1] + * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source + * of the add instruction must have the constatnt 1 swizzle. This function + * does not check const registers to see if their value is 1.0, so it should + * be called after the constant_folding optimization. + * @return + * 0 if the ADD instruction is still part of the program. + * 1 if the ADD instruction is no longer part of the program. + */ +static int peephole_add_presub_inv( + struct radeon_compiler * c, + struct rc_instruction * inst_add) +{ + unsigned int i, swz, mask; + unsigned int can_remove = 0; + unsigned int cant_sub = 0; + struct rc_instruction * inst; + struct peephole_state s; + + if (inst_add->U.I.SaturateMode) + return 0; + + mask = inst_add->U.I.DstReg.WriteMask; + + /* Check if src0 is 1. */ + /* XXX It would be nice to use is_src_uniform_constant here, but that + * function only works if the register's file is RC_FILE_NONE */ + for(i = 0; i < 4; i++ ) { + swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); + if(((1 << i) & inst_add->U.I.DstReg.WriteMask) + && swz != RC_SWIZZLE_ONE) { + return 0; + } + } + + /* Check src1. */ + if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) != + inst_add->U.I.DstReg.WriteMask + || inst_add->U.I.SrcReg[1].Abs + || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY + && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT) + || src_has_const_swz(inst_add->U.I.SrcReg[1])) { + + return 0; + } + + /* Setup the peephole_state information. */ + s.Inst = inst_add; + s.WriteMask = inst_add->U.I.DstReg.WriteMask; + + /* For all instructions that read inst_add->U.I.DstReg before it is + * written again, use the 1 - src0 presubtact instead. */ + for(inst = inst_add->Next; inst != &c->Program.Instructions; + inst = inst->Next) { + const struct rc_opcode_info * info = + rc_get_opcode_info(inst->U.I.Opcode); + + for(i = 0; i < info->NumSrcRegs; i++) { + if(inst_add->U.I.DstReg.WriteMask != + src_reads_dst_mask(inst->U.I.SrcReg[i], + inst_add->U.I.DstReg)) { + continue; + } + if (cant_sub) { + can_remove = 0; + break; + } + /* XXX: There are some situations where instructions + * with more than 2 src registers can use the + * presubtract select, but to keep things simple we + * will disable presubtract on these instructions for + * now. Note: This if statement should not be pulled + * outside of the loop, because it only applies to + * instructions that could potentially use the + * presubtract source. */ + if (info->NumSrcRegs > 2) { + can_remove = 0; + break; + } + + /* We can't use more than one presubtract value in an + * instruction, unless the two prsubtract operations + * are the same and read from the same registers. */ + if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { + if (inst->U.I.PreSub.Opcode != RC_PRESUB_INV + || inst->U.I.PreSub.SrcReg[0].File != + inst_add->U.I.SrcReg[1].File + || inst->U.I.PreSub.SrcReg[0].Index != + inst_add->U.I.SrcReg[1].Index) { + + can_remove = 0; + break; + } + } + /* We must be careful not to modify inst_add, since it + * is possible it will remain part of the program. */ + inst->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; + inst->U.I.PreSub.SrcReg[0].Negate = 0; + inst->U.I.PreSub.Opcode = RC_PRESUB_INV; + inst->U.I.SrcReg[i] = chain_srcregs(inst->U.I.SrcReg[i], + inst->U.I.PreSub.SrcReg[0]); + + inst->U.I.SrcReg[i].File = RC_FILE_PRESUB; + inst->U.I.SrcReg[i].Index = RC_PRESUB_INV; + can_remove = 1; + } + if(!can_remove) + break; + rc_for_all_writes_mask(inst, peephole_scan_write, &s); + /* If all components of inst_add's destination register have + * been written to by subsequent instructions, the original + * value of the destination register is no longer valid and + * we can't keep doing substitutions. */ + if (!s.WriteMask){ + break; + } + /* Make this instruction doesn't write to the presubtract source. */ + if (inst->U.I.DstReg.WriteMask & + src_reads_dst_mask(inst_add->U.I.SrcReg[1], + inst->U.I.DstReg) + || info->IsFlowControl) { + cant_sub = 1; + } + } + if(can_remove) { + rc_remove_instruction(inst_add); + return 1; + } + return 0; +} + +/** + * @return + * 0 if inst is still part of the program. + * 1 if inst is no longer part of the program. + */ +static int peephole(struct radeon_compiler * c, struct rc_instruction * inst) +{ + switch(inst->U.I.Opcode){ + case RC_OPCODE_ADD: + if (c->has_presub) { + if(peephole_add_presub_inv(c, inst)) + return 1; + } + break; + default: + break; + } + return 0; +} + void rc_optimize(struct radeon_compiler * c, void *user) { struct rc_instruction * inst = c->Program.Instructions.Next; @@ -463,8 +677,11 @@ void rc_optimize(struct radeon_compiler * c, void *user) constant_folding(c, cur); + if(peephole(c, cur)) + continue; + if (cur->U.I.Opcode == RC_OPCODE_MOV) { - peephole(c, cur); + copy_propagate(c, cur); /* cur may no longer be part of the program */ } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index 8e232bb243..32c54fd74b 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -279,11 +279,118 @@ static int destructive_merge_instructions( struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha) { + const struct rc_opcode_info * opcode; assert(rgb->Alpha.Opcode == RC_OPCODE_NOP); assert(alpha->RGB.Opcode == RC_OPCODE_NOP); + /* Presubtract registers need to be merged first so that registers + * needed by the presubtract operation can be placed in src0 and/or + * src1. */ + + /* Merge the rgb presubtract registers. */ + const struct rc_opcode_info * rgb_info = + rc_get_opcode_info(rgb->RGB.Opcode); + if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { + unsigned int srcp_src; + unsigned int srcp_regs; + if (rgb->RGB.Src[RC_PAIR_PRESUB_SRC].Used) + return 0; + srcp_regs = rc_presubtract_src_reg_count( + alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Index); + for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) { + unsigned int arg; + int free_source; + struct radeon_pair_instruction_source srcp = + alpha->RGB.Src[srcp_src]; + struct radeon_pair_instruction_source temp; + /* 2nd arg of 1 means this is an rgb source. + * 3rd arg of 0 means this is not an alpha source. */ + free_source = rc_pair_alloc_source(rgb, 1, 0, + srcp.File, srcp.Index); + /* If free_source == srcp_src, then either the + * presubtract source is already in the correct place. */ + if (free_source == srcp_src) + continue; + /* If free_source < 0 then there are no free source + * slots. */ + if (free_source < 0) + return 0; + /* Shuffle the sources, so we can put the + * presubtract source in the correct place. */ + for (arg = 0; arg < rgb_info->NumSrcRegs; arg++) { + /*If this arg does not read from an rgb source, + * do nothing. */ + if (rc_source_type_that_arg_reads( + rgb->RGB.Arg[arg].Source, + rgb->RGB.Arg[arg].Swizzle, 3) + != RC_PAIR_SOURCE_RGB) { + continue; + } + if (rgb->RGB.Arg[arg].Source == srcp_src) + rgb->RGB.Arg[arg].Source = free_source; + /* We need to do this just in case register + * is one of the sources already, but in the + * wrong spot. */ + else if(rgb->RGB.Arg[arg].Source == free_source) + rgb->RGB.Arg[arg].Source = srcp_src; + } + temp = rgb->RGB.Src[srcp_src]; + rgb->RGB.Src[srcp_src] = rgb->RGB.Src[free_source]; + rgb->RGB.Src[free_source] = temp; + } + } + + /* Merge the alpha presubtract registers */ + if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { + unsigned int srcp_src; + unsigned int srcp_regs; + if(rgb->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) + return 0; + + srcp_regs = rc_presubtract_src_reg_count( + alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Index); + for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) { + unsigned int arg; + int free_source; + struct radeon_pair_instruction_source srcp = + alpha->Alpha.Src[srcp_src]; + struct radeon_pair_instruction_source temp; + /* 2nd arg of 0 means this is not an rgb source. + * 3rd arg of 1 means this is an alpha source. */ + free_source = rc_pair_alloc_source(rgb, 0, 1, + srcp.File, srcp.Index); + /* If free_source == srcp_src, then either the + * presubtract source is already in the correct place. */ + if (free_source == srcp_src) + continue; + /* If free_source < 0 then there are no free source + * slots. */ + if (free_source < 0) + return 0; + /* Shuffle the sources, so we can put the + * presubtract source in the correct place. */ + for(arg = 0; arg < rgb_info->NumSrcRegs; arg++) { + /*If this arg does not read from an alpha + * source, do nothing. */ + if (rc_source_type_that_arg_reads( + rgb->RGB.Arg[arg].Source, + rgb->RGB.Arg[arg].Swizzle, 3) + != RC_PAIR_SOURCE_ALPHA) { + continue; + } + if (rgb->RGB.Arg[arg].Source == srcp_src) + rgb->RGB.Arg[arg].Source = free_source; + else if (rgb->RGB.Arg[arg].Source == free_source) + rgb->RGB.Arg[arg].Source = srcp_src; + } + temp = rgb->Alpha.Src[srcp_src]; + rgb->Alpha.Src[srcp_src] = rgb->Alpha.Src[free_source]; + rgb->Alpha.Src[free_source] = temp; + } + } + /* Copy alpha args into rgb */ - const struct rc_opcode_info * opcode = rc_get_opcode_info(alpha->Alpha.Opcode); + opcode = rc_get_opcode_info(alpha->Alpha.Opcode); for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { unsigned int srcrgb = 0; @@ -351,7 +458,52 @@ static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_i return 0; } +static void presub_nop(struct rc_instruction * emitted) { + int prev_rgb_index, prev_alpha_index, i, num_src; + /* We don't need a nop if the previous instruction is a TEX. */ + if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) { + return; + } + if (emitted->Prev->U.P.RGB.WriteMask) + prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex; + else + prev_rgb_index = -1; + if (emitted->Prev->U.P.Alpha.WriteMask) + prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex; + else + prev_alpha_index = 1; + + /* Check the previous rgb instruction */ + if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) { + num_src = rc_presubtract_src_reg_count( + emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index); + for (i = 0; i < num_src; i++) { + unsigned int index = emitted->U.P.RGB.Src[i].Index; + if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY + && (index == prev_rgb_index + || index == prev_alpha_index)) { + emitted->Prev->U.P.Nop = 1; + return; + } + } + } + + /* Check the previous alpha instruction. */ + if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) + return; + + num_src = rc_presubtract_src_reg_count( + emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index); + for (i = 0; i < num_src; i++) { + unsigned int index = emitted->U.P.Alpha.Src[i].Index; + if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY + && (index == prev_rgb_index || index == prev_alpha_index)) { + emitted->Prev->U.P.Nop = 1; + return; + } + } +} /** * Find a good ALU instruction or pair of ALU instruction and emit it. * @@ -408,6 +560,10 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor commit_alu_instruction(s, sinst); success: ; } + /* If the instruction we just emitted uses a presubtract value, and + * the presubtract sources were written by the previous intstruction, + * the previous instruction needs a nop. */ + presub_nop(before->Prev); } static void scan_read(void * data, struct rc_instruction * inst, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c index 9fe39344f8..4cdb7ea748 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c @@ -127,6 +127,18 @@ static void classify_instruction(struct rc_sub_instruction * inst, } } +static void src_uses(struct rc_src_register src, unsigned int * rgb, + unsigned int * alpha) +{ + int j; + for(j = 0; j < 4; ++j) { + unsigned int swz = GET_SWZ(src.Swizzle, j); + if (swz < 3) + *rgb = 1; + else if (swz < 4) + *alpha = 1; + } +} /** * Fill the given ALU instruction's opcodes and source operands into the given pair, @@ -158,12 +170,51 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c, const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); int i; + /* Presubtract handling: + * We need to make sure that the values used by the presubtract + * operation end up in src0 or src1. */ + if(inst->PreSub.Opcode != RC_PRESUB_NONE) { + /* rc_pair_alloc_source() will fill in data for + * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */ + int j; + for(j = 0; j < 3; j++) { + int src_regs; + if(inst->SrcReg[j].File != RC_FILE_PRESUB) + continue; + + src_regs = rc_presubtract_src_reg_count( + inst->PreSub.Opcode); + for(i = 0; i < src_regs; i++) { + unsigned int rgb = 0; + unsigned int alpha = 0; + src_uses(inst->SrcReg[j], &rgb, &alpha); + if(rgb) { + pair->RGB.Src[i].File = + inst->PreSub.SrcReg[i].File; + pair->RGB.Src[i].Index = + inst->PreSub.SrcReg[i].Index; + pair->RGB.Src[i].Used = 1; + } + if(alpha) { + pair->Alpha.Src[i].File = + inst->PreSub.SrcReg[i].File; + pair->Alpha.Src[i].Index = + inst->PreSub.SrcReg[i].Index; + pair->Alpha.Src[i].Used = 1; + } + } + } + } + for(i = 0; i < opcode->NumSrcRegs; ++i) { int source; if (needrgb && !istranscendent) { unsigned int srcrgb = 0; unsigned int srcalpha = 0; int j; + /* We don't care about the alpha channel here. We only + * want the part of the swizzle that writes to rgb, + * since we are creating an rgb instruction. */ for(j = 0; j < 3; ++j) { unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); if (swz < 3) @@ -173,6 +224,7 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c, } source = rc_pair_alloc_source(pair, srcrgb, srcalpha, inst->SrcReg[i].File, inst->SrcReg[i].Index); + assert(source != -1); pair->RGB.Arg[i].Source = source; pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; @@ -188,6 +240,7 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c, srcalpha = 1; source = rc_pair_alloc_source(pair, srcrgb, srcalpha, inst->SrcReg[i].File, inst->SrcReg[i].Index); + assert(source != -1); pair->Alpha.Arg[i].Source = source; pair->Alpha.Arg[i].Swizzle = swz; pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index 5582f56d92..f0a77d7b53 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -39,7 +39,7 @@ struct radeon_compiler; struct rc_src_register { - unsigned int File:3; + unsigned int File:4; /** Negative values may be used for relative addressing. */ signed int Index:(RC_REGISTER_INDEX_BITS+1); @@ -64,6 +64,11 @@ struct rc_dst_register { unsigned int WriteMask:4; }; +struct rc_presub_instruction { + rc_presubtract_op Opcode; + struct rc_src_register SrcReg[2]; +}; + /** * Instructions are maintained by the compiler in a doubly linked list * of these structures. @@ -108,6 +113,10 @@ struct rc_sub_instruction { /** True if tex instruction should do shadow comparison */ unsigned int TexShadow:1; /*@}*/ + + /** This holds information about the presubtract operation used by + * this instruction. */ + struct rc_presub_instruction PreSub; }; typedef enum { diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h index 2ddf60b677..9dcd44c522 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h @@ -79,7 +79,13 @@ typedef enum { /** * Indicates a special register, see RC_SPECIAL_xxx. */ - RC_FILE_SPECIAL + RC_FILE_SPECIAL, + + /** + * Indicates this register should use the result of the presubtract + * operation. + */ + RC_FILE_PRESUB } rc_register_file; enum { @@ -147,4 +153,32 @@ typedef enum { RC_ALURESULT_W } rc_write_aluresult; +typedef enum { + RC_PRESUB_NONE = 0, + + /** 1 - 2 * src0 */ + RC_PRESUB_BIAS, + + /** src1 - src0 */ + RC_PRESUB_SUB, + + /** src1 + src0 */ + RC_PRESUB_ADD, + + /** 1 - src0 */ + RC_PRESUB_INV +} rc_presubtract_op; + +static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){ + switch(op){ + case RC_PRESUB_BIAS: + case RC_PRESUB_INV: + return 1; + case RC_PRESUB_ADD: + case RC_PRESUB_SUB: + return 2; + default: + return 0; + } +} #endif /* RADEON_PROGRAM_CONSTANTS_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c index ee839596aa..5a50584b72 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c @@ -38,26 +38,52 @@ int rc_pair_alloc_source(struct rc_pair_instruction *pair, { int candidate = -1; int candidate_quality = -1; + unsigned int alpha_used = 0; + unsigned int rgb_used = 0; int i; if ((!rgb && !alpha) || file == RC_FILE_NONE) return 0; + if (rgb && pair->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { + if (file == RC_FILE_PRESUB) { + if (index != pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { + return -1; + } + } else { + rgb_used++; + } + } + + if (alpha && pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { + if (file == RC_FILE_PRESUB) { + if (index != pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { + return -1; + } + } else { + alpha_used++; + } + } + for(i = 0; i < 3; ++i) { int q = 0; if (rgb) { if (pair->RGB.Src[i].Used) { if (pair->RGB.Src[i].File != file || - pair->RGB.Src[i].Index != index) + pair->RGB.Src[i].Index != index) { + rgb_used++; continue; + } q++; } } if (alpha) { if (pair->Alpha.Src[i].Used) { if (pair->Alpha.Src[i].File != file || - pair->Alpha.Src[i].Index != index) + pair->Alpha.Src[i].Index != index) { + alpha_used++; continue; + } q++; } } @@ -66,19 +92,156 @@ int rc_pair_alloc_source(struct rc_pair_instruction *pair, candidate = i; } } + if (candidate < 0 || (rgb && rgb_used > 2) || (alpha && alpha_used > 2)) + return -1; - if (candidate >= 0) { - if (rgb) { - pair->RGB.Src[candidate].Used = 1; - pair->RGB.Src[candidate].File = file; - pair->RGB.Src[candidate].Index = index; + /* candidate >= 0 */ + + /* Even if we have a presub src, the above loop needs to run, + * because we still need to make sure there is a free source. + */ + if (file == RC_FILE_PRESUB) + candidate = RC_PAIR_PRESUB_SRC; + + if (rgb) { + pair->RGB.Src[candidate].Used = 1; + pair->RGB.Src[candidate].File = file; + pair->RGB.Src[candidate].Index = index; + if (candidate == RC_PAIR_PRESUB_SRC) { + /* For registers with the RC_FILE_PRESUB file, + * the index stores the presubtract op. */ + int src_regs = rc_presubtract_src_reg_count(index); + for(i = 0; i < src_regs; i++) { + pair->RGB.Src[i].Used = 1; + } } - if (alpha) { - pair->Alpha.Src[candidate].Used = 1; - pair->Alpha.Src[candidate].File = file; - pair->Alpha.Src[candidate].Index = index; + } + if (alpha) { + pair->Alpha.Src[candidate].Used = 1; + pair->Alpha.Src[candidate].File = file; + pair->Alpha.Src[candidate].Index = index; + if (candidate == RC_PAIR_PRESUB_SRC) { + /* For registers with the RC_FILE_PRESUB file, + * the index stores the presubtract op. */ + int src_regs = rc_presubtract_src_reg_count(index); + for(i=0; i < src_regs; i++) { + pair->Alpha.Src[i].Used = 1; + } } } return candidate; } + +static void pair_foreach_source_callback( + struct rc_pair_instruction * pair, + void * data, + rc_pair_foreach_src_fn cb, + unsigned int swz, + unsigned int src) +{ + /* swz > 3 means that the swizzle is either not used, or a constant + * swizzle (e.g. 0, 1, 0.5). */ + if(swz > 3) + return; + + if(swz == RC_SWIZZLE_W) { + if (src == RC_PAIR_PRESUB_SRC) { + unsigned int i; + unsigned int src_count = rc_presubtract_src_reg_count( + pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index); + for(i = 0; i < src_count; i++) { + cb(data, &pair->Alpha.Src[i]); + } + } else { + cb(data, &pair->Alpha.Src[src]); + } + } else { + if (src == RC_PAIR_PRESUB_SRC) { + unsigned int i; + unsigned int src_count = rc_presubtract_src_reg_count( + pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index); + for(i = 0; i < src_count; i++) { + cb(data, &pair->RGB.Src[i]); + } + } + else { + cb(data, &pair->RGB.Src[src]); + } + } +} + +void rc_pair_foreach_source_that_alpha_reads( + struct rc_pair_instruction * pair, + void * data, + rc_pair_foreach_src_fn cb) +{ + unsigned int i; + const struct rc_opcode_info * info = + rc_get_opcode_info(pair->Alpha.Opcode); + for(i = 0; i < info->NumSrcRegs; i++) { + pair_foreach_source_callback(pair, data, cb, + GET_SWZ(pair->Alpha.Arg[i].Swizzle, 0), + pair->Alpha.Arg[i].Source); + } +} + +void rc_pair_foreach_source_that_rgb_reads( + struct rc_pair_instruction * pair, + void * data, + rc_pair_foreach_src_fn cb) +{ + unsigned int i; + const struct rc_opcode_info * info = + rc_get_opcode_info(pair->RGB.Opcode); + for(i = 0; i < info->NumSrcRegs; i++) { + unsigned int chan; + unsigned int swz = RC_SWIZZLE_UNUSED; + /* Find a swizzle that is either X,Y,Z,or W. We assume here + * that if one channel swizzles X,Y, or Z, then none of the + * other channels swizzle W, and vice-versa. */ + for(chan = 0; chan < 4; chan++) { + swz = GET_SWZ(pair->RGB.Arg[i].Swizzle, chan); + if(swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y + || swz == RC_SWIZZLE_Z || swz == RC_SWIZZLE_W) + continue; + } + pair_foreach_source_callback(pair, data, cb, + swz, + pair->RGB.Arg[i].Source); + } +} + +/*return 0 for rgb, 1 for alpha -1 for error. */ + +rc_pair_source_type rc_source_type_that_arg_reads( + unsigned int source, + unsigned int swizzle, + unsigned int channels) +{ + unsigned int chan; + unsigned int swz = RC_SWIZZLE_UNUSED; + int isRGB = 0; + int isAlpha = 0; + /* Find a swizzle that is either X,Y,Z,or W. We assume here + * that if one channel swizzles X,Y, or Z, then none of the + * other channels swizzle W, and vice-versa. */ + for(chan = 0; chan < channels; chan++) { + swz = GET_SWZ(swizzle, chan); + if (swz == RC_SWIZZLE_W) { + isAlpha = 1; + } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y + || swz == RC_SWIZZLE_Z) { + isRGB = 1; + } + } + assert(!isRGB || !isAlpha); + + if(!isRGB && !isAlpha) + return RC_PAIR_SOURCE_NONE; + + if (isRGB) + return RC_PAIR_SOURCE_RGB; + /*isAlpha*/ + return RC_PAIR_SOURCE_ALPHA; +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h index ef5a034700..e0061e454b 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -49,6 +49,11 @@ struct radeon_compiler; * see \ref rc_pair_translate */ +/* For rgb and alpha instructions when arg[n].Source = RC_PAIR_PRESUB_SRC, then + * the presubtract value will be used, and + * {RGB,Alpha}.Src[RC_PAIR_PRESUB_SRC].File will be set to RC_FILE_PRESUB. + */ +#define RC_PAIR_PRESUB_SRC 3 struct radeon_pair_instruction_source { unsigned int Used:1; @@ -64,7 +69,7 @@ struct radeon_pair_instruction_rgb { unsigned int OutputWriteMask:3; unsigned int Saturate:1; - struct radeon_pair_instruction_source Src[3]; + struct radeon_pair_instruction_source Src[4]; struct { unsigned int Source:2; @@ -83,7 +88,7 @@ struct radeon_pair_instruction_alpha { unsigned int DepthWriteMask:1; unsigned int Saturate:1; - struct radeon_pair_instruction_source Src[3]; + struct radeon_pair_instruction_source Src[4]; struct { unsigned int Source:2; @@ -99,8 +104,17 @@ struct rc_pair_instruction { unsigned int WriteALUResult:2; unsigned int ALUResultCompare:3; + unsigned int Nop:1; }; +typedef void (*rc_pair_foreach_src_fn) + (void *, struct radeon_pair_instruction_source *); + +typedef enum { + RC_PAIR_SOURCE_NONE = 0, + RC_PAIR_SOURCE_RGB, + RC_PAIR_SOURCE_ALPHA +} rc_pair_source_type; /** * General helper functions for dealing with the paired instruction format. @@ -109,6 +123,21 @@ struct rc_pair_instruction { int rc_pair_alloc_source(struct rc_pair_instruction *pair, unsigned int rgb, unsigned int alpha, rc_register_file file, unsigned int index); + +void rc_pair_foreach_source_that_alpha_reads( + struct rc_pair_instruction * pair, + void * data, + rc_pair_foreach_src_fn cb); + +void rc_pair_foreach_source_that_rgb_reads( + struct rc_pair_instruction * pair, + void * data, + rc_pair_foreach_src_fn cb); + +rc_pair_source_type rc_source_type_that_arg_reads( + unsigned int source, + unsigned int swizzle, + unsigned int channels); /*@}*/ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c index a356e94e03..0161219581 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c @@ -38,6 +38,24 @@ static const char * textarget_to_string(rc_texture_target target) } } +static const char * presubtract_op_to_string(rc_presubtract_op op) +{ + switch(op) { + case RC_PRESUB_NONE: + return "NONE"; + case RC_PRESUB_BIAS: + return "(1 - 2 * src0)"; + case RC_PRESUB_SUB: + return "(src1 - src0)"; + case RC_PRESUB_ADD: + return "(src1 + src0)"; + case RC_PRESUB_INV: + return "(1 - src0)"; + default: + return "BAD_PRESUBTRACT_OP"; + } +} + static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs) { if (func == RC_COMPARE_FUNC_NEVER) { @@ -125,7 +143,43 @@ static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate } } -static void rc_print_src_register(FILE * f, struct rc_src_register src) +static void rc_print_presub_instruction(FILE * f, + struct rc_presub_instruction inst) +{ + fprintf(f,"("); + switch(inst.Opcode){ + case RC_PRESUB_BIAS: + fprintf(f, "1 - 2 * "); + rc_print_register(f, inst.SrcReg[0].File, + inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); + break; + case RC_PRESUB_SUB: + rc_print_register(f, inst.SrcReg[0].File, + inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); + fprintf(f, " - "); + rc_print_register(f, inst.SrcReg[1].File, + inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr); + break; + case RC_PRESUB_ADD: + rc_print_register(f, inst.SrcReg[0].File, + inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); + fprintf(f, " + "); + rc_print_register(f, inst.SrcReg[1].File, + inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr); + break; + case RC_PRESUB_INV: + fprintf(f, "1 - "); + rc_print_register(f, inst.SrcReg[0].File, + inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); + break; + default: + break; + } + fprintf(f, ")"); +} + +static void rc_print_src_register(FILE * f, struct rc_instruction * inst, + struct rc_src_register src) { int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW); @@ -134,7 +188,10 @@ static void rc_print_src_register(FILE * f, struct rc_src_register src) if (src.Abs) fprintf(f, "|"); - rc_print_register(f, src.File, src.Index, src.RelAddr); + if(src.File == RC_FILE_PRESUB) + rc_print_presub_instruction(f, inst->U.I.PreSub); + else + rc_print_register(f, src.File, src.Index, src.RelAddr); if (src.Abs && !trivial_negate) fprintf(f, "|"); @@ -198,7 +255,7 @@ static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst, if (reg > 0) fprintf(f, ","); fprintf(f, " "); - rc_print_src_register(f, inst->U.I.SrcReg[reg]); + rc_print_src_register(f, inst, inst->U.I.SrcReg[reg]); } if (opcode->HasTexture) { @@ -247,6 +304,16 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst printedsrc = 1; } } + if(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { + fprintf(f, ", srcp.xyz = %s", + presubtract_op_to_string( + inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index)); + } + if(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { + fprintf(f, ", srcp.w = %s", + presubtract_op_to_string( + inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index)); + } fprintf(f, "\n"); if (inst->RGB.Opcode != RC_OPCODE_NOP) { @@ -272,7 +339,12 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { const char* abs = inst->RGB.Arg[arg].Abs ? "|" : ""; const char* neg = inst->RGB.Arg[arg].Negate ? "-" : ""; - fprintf(f, ", %s%ssrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[arg].Source, + fprintf(f, ", %s%ssrc", neg, abs); + if(inst->RGB.Arg[arg].Source == RC_PAIR_PRESUB_SRC) + fprintf(f,"p"); + else + fprintf(f,"%d", inst->RGB.Arg[arg].Source); + fprintf(f,".%c%c%c%s", rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)), rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)), rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)), @@ -300,7 +372,12 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : ""; const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : ""; - fprintf(f, ", %s%ssrc%i.%c%s", neg, abs, inst->Alpha.Arg[arg].Source, + fprintf(f, ", %s%ssrc", neg, abs); + if(inst->Alpha.Arg[arg].Source == RC_PAIR_PRESUB_SRC) + fprintf(f,"p"); + else + fprintf(f,"%d", inst->Alpha.Arg[arg].Source); + fprintf(f,".%c%s", rc_swizzle_char(inst->Alpha.Arg[arg].Swizzle), abs); } fprintf(f, "\n"); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c index 9281feecfa..facea382f4 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c @@ -27,6 +27,16 @@ #include "radeon_remove_constants.h" +static void remap_regs(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex) +{ + unsigned *inv_remap_table = userdata; + + if (*pfile == RC_FILE_CONSTANT) { + *pindex = inv_remap_table[*pindex]; + } +} + void rc_remove_unused_constants(struct radeon_compiler *c, void *user) { unsigned **out_remap_table = (unsigned**)user; @@ -51,6 +61,10 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user) inst != &c->Program.Instructions; inst = inst->Next) { const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + /* XXX: This loop and the if statement after it should be + * replaced by a call to one of the rc_for_all_reads_* functions. + * The reason it does not use one of those functions now is + * because none of them have RelAddr as an argument. */ for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { if (inst->U.I.SrcReg[i].File == RC_FILE_CONSTANT) { if (inst->U.I.SrcReg[i].RelAddr) { @@ -60,6 +74,18 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user) } } } + if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { + unsigned int i; + unsigned int srcp_regs = rc_presubtract_src_reg_count( + inst->U.I.PreSub.Opcode); + for( i = 0; i < srcp_regs; i++) { + if (inst->U.I.PreSub.SrcReg[i].File == + RC_FILE_CONSTANT) { + const_used[ + inst->U.I.PreSub.SrcReg[i].Index] = 1; + } + } + } } /* Pass 2: If there is relative addressing, mark all externals as used. */ @@ -100,13 +126,7 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user) if (!is_identity) { for (struct rc_instruction *inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); - - for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { - if (inst->U.I.SrcReg[i].File == RC_FILE_CONSTANT) { - inst->U.I.SrcReg[i].Index = inv_remap_table[inst->U.I.SrcReg[i].Index]; - } - } + rc_remap_registers(inst, remap_regs, inv_remap_table); } } -- cgit v1.2.3