diff options
Diffstat (limited to 'src/mesa/drivers/dri/r300')
10 files changed, 539 insertions, 49 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile index e432afc3d4..34d22b4559 100644 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -21,6 +21,7 @@ C_SOURCES = \ radeon_dataflow.c \ radeon_dataflow_deadcode.c \ radeon_dataflow_swizzles.c \ + radeon_optimize.c \ r3xx_fragprog.c \ r300_fragprog.c \ r300_fragprog_swizzle.c \ diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c index cfa48a59e3..5d5de2f1b2 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c @@ -56,7 +56,8 @@ static const struct swizzle_data native_swizzles[] = { {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1}, {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1}, {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0}, - {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0} + {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0}, + {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0} }; static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]); @@ -221,6 +222,7 @@ unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle) case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE; case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; + case RC_SWIZZLE_HALF: return R300_ALU_ARGA_HALF; default: return R300_ALU_ARGA_ONE; } } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 25bf373b6f..3e88ccbc46 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -152,6 +152,10 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) debug_program_log(c, "after deadcode"); + rc_optimize(&c->Base); + + debug_program_log(c, "after dataflow optimize"); + rc_dataflow_swizzles(&c->Base); if (c->Base.Error) return; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index 16e2f3a218..0e6c62541f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -30,7 +30,7 @@ #include "radeon_program.h" -static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +static void reads_normal(struct rc_instruction * fullinst, rc_read_write_chan_fn cb, void * userdata) { struct rc_sub_instruction * inst = &fullinst->U.I; const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); @@ -46,18 +46,15 @@ static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, refmask &= RC_MASK_XYZW; - for(unsigned int chan = 0; chan < 4; ++chan) { - if (GET_BIT(refmask, chan)) { - cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, chan); - } - } + if (refmask) + cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, refmask); if (refmask && inst->SrcReg[src].RelAddr) cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X); } } -static void reads_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) { struct rc_pair_instruction * inst = &fullinst->U.P; unsigned int refmasks[3] = { 0, 0, 0 }; @@ -84,27 +81,23 @@ static void reads_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, v } for(unsigned int src = 0; src < 3; ++src) { - if (inst->RGB.Src[src].Used) { - for(unsigned int chan = 0; chan < 3; ++chan) { - if (GET_BIT(refmasks[src], chan)) - cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, chan); - } - } + if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ)) + cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, + refmasks[src] & RC_MASK_XYZ); - if (inst->Alpha.Src[src].Used) { - if (GET_BIT(refmasks[src], 3)) - cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 3); - } + if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W)) + cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W); } } /** - * Calls a callback function for all sourced register channels. + * Calls a callback function for all register reads. * - * This is conservative, i.e. channels may be called multiple times, - * and the writemask of the instruction is not taken into account. + * This is conservative, i.e. if the same register is referenced multiple times, + * the callback may also be called multiple times. + * Also, the writemask of the instruction is not taken into account. */ -void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata) +void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata) { if (inst->Type == RC_INSTRUCTION_NORMAL) { reads_normal(inst, cb, userdata); @@ -115,44 +108,39 @@ void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * -static void writes_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) { struct rc_sub_instruction * inst = &fullinst->U.I; const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); - if (opcode->HasDstReg) { - for(unsigned int chan = 0; chan < 4; ++chan) { - if (GET_BIT(inst->DstReg.WriteMask, chan)) - cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, chan); - } - } + if (opcode->HasDstReg && inst->DstReg.WriteMask) + cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask); if (inst->WriteALUResult) - cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0); + cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); } -static void writes_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) { struct rc_pair_instruction * inst = &fullinst->U.P; - for(unsigned int chan = 0; chan < 3; ++chan) { - if (GET_BIT(inst->RGB.WriteMask, chan)) - cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, chan); - } + if (inst->RGB.WriteMask) + cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask); if (inst->Alpha.WriteMask) - cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, 3); + cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W); if (inst->WriteALUResult) - cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0); + cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); } /** - * Calls a callback function for all written register channels. + * Calls a callback function for all register writes in the instruction, + * reporting writemasks to the callback function. * * \warning Does not report output registers for paired instructions! */ -void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata) +void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata) { if (inst->Type == RC_INSTRUCTION_NORMAL) { writes_normal(inst, cb, userdata); @@ -162,6 +150,48 @@ void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * } +struct mask_to_chan_data { + void * UserData; + rc_read_write_chan_fn Fn; +}; + +static void mask_to_chan_cb(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct mask_to_chan_data * d = data; + for(unsigned int chan = 0; chan < 4; ++chan) { + if (GET_BIT(mask, chan)) + d->Fn(d->UserData, inst, file, index, chan); + } +} + +/** + * Calls a callback function for all sourced register channels. + * + * This is conservative, i.e. channels may be called multiple times, + * and the writemask of the instruction is not taken into account. + */ +void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) +{ + struct mask_to_chan_data d; + d.UserData = userdata; + d.Fn = cb; + rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d); +} + +/** + * Calls a callback function for all written register channels. + * + * \warning Does not report output registers for paired instructions! + */ +void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) +{ + struct mask_to_chan_data d; + d.UserData = userdata; + d.Fn = cb; + rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d); +} + static void remap_normal_instruction(struct rc_instruction * fullinst, rc_remap_register_fn cb, void * userdata) { diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h index 62cda20eea..60a6e192a9 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h @@ -39,10 +39,15 @@ struct rc_swizzle_caps; * Help analyze and modify the register accesses of instructions. */ /*@{*/ -typedef void (*rc_read_write_fn)(void * userdata, struct rc_instruction * inst, +typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int chan); -void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata); -void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata); +void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); +void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); + +typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask); +void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata); +void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata); typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst, rc_register_file * pfile, unsigned int * pindex); @@ -60,4 +65,6 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f void rc_dataflow_swizzles(struct radeon_compiler * c); /*@}*/ +void rc_optimize(struct radeon_compiler * c); + #endif /* RADEON_DATAFLOW_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c index d889612f4f..863654cf68 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c @@ -150,7 +150,7 @@ static void allocate_and_insert_proxies(struct emulate_branch_state * s, sap.Proxies = proxies; for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { - rc_for_all_writes(inst, scan_write, &sap); + rc_for_all_writes_mask(inst, scan_write, &sap); rc_remap_registers(inst, remap_proxy_function, &sap); } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c new file mode 100644 index 0000000000..21d7210888 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -0,0 +1,446 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" +#include "radeon_swizzle.h" + + +static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) +{ + struct rc_src_register combine; + combine.File = inner.File; + combine.Index = inner.Index; + combine.RelAddr = inner.RelAddr; + if (outer.Abs) { + combine.Abs = 1; + combine.Negate = outer.Negate; + } else { + combine.Abs = inner.Abs; + combine.Negate = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(outer.Swizzle, chan); + if (swz < 4) + combine.Negate |= GET_BIT(inner.Negate, swz) << chan; + } + combine.Negate ^= outer.Negate; + } + combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); + return combine; +} + +struct peephole_state { + struct radeon_compiler * C; + struct rc_instruction * Mov; + unsigned int Conflict:1; + + /** Whether Mov's source has been clobbered */ + unsigned int SourceClobbered:1; + + /** Which components of Mov's destination register are still from that Mov? */ + unsigned int MovMask:4; + + /** Which components of Mov's destination register are clearly *not* from that Mov */ + unsigned int DefinedMask:4; + + /** Which components of Mov's source register are sourced */ + unsigned int SourcedMask:4; + + /** Branch depth beyond Mov; negative value indicates we left the Mov's block */ + int BranchDepth; +}; + +static void peephole_scan_read(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct peephole_state * s = data; + + if (file != RC_FILE_TEMPORARY || index != s->Mov->U.I.DstReg.Index) + return; + + /* These instructions cannot read from the constants file. + * see radeonTransformTEX() + */ + if(s->Mov->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && + s->Mov->U.I.SrcReg[0].File != RC_FILE_INPUT && + (inst->U.I.Opcode == RC_OPCODE_TEX || + inst->U.I.Opcode == RC_OPCODE_TXB || + inst->U.I.Opcode == RC_OPCODE_TXP || + inst->U.I.Opcode == RC_OPCODE_KIL)){ + s->Conflict = 1; + return; + } + if ((mask & s->MovMask) == mask) { + if (s->SourceClobbered) { + s->Conflict = 1; + } + } else if ((mask & s->DefinedMask) == mask) { + /* read from something entirely written by other instruction: this is okay */ + } else { + /* read from component combination that is not well-defined without + * the MOV: cannot remove it */ + s->Conflict = 1; + } +} + +static void peephole_scan_write(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct peephole_state * s = data; + + if (s->BranchDepth < 0) + return; + + if (file == s->Mov->U.I.DstReg.File && index == s->Mov->U.I.DstReg.Index) { + s->MovMask &= ~mask; + if (s->BranchDepth == 0) + s->DefinedMask |= mask; + else + s->DefinedMask &= ~mask; + } + if (file == s->Mov->U.I.SrcReg[0].File && index == s->Mov->U.I.SrcReg[0].Index) { + if (mask & s->SourcedMask) + s->SourceClobbered = 1; + } else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) { + s->SourceClobbered = 1; + } +} + +static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mov) +{ + struct peephole_state s; + + if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || inst_mov->U.I.WriteALUResult) + return; + + memset(&s, 0, sizeof(s)); + s.C = c; + s.Mov = inst_mov; + s.MovMask = inst_mov->U.I.DstReg.WriteMask; + s.DefinedMask = RC_MASK_XYZW & ~s.MovMask; + + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan); + s.SourcedMask |= (1 << swz) & RC_MASK_XYZW; + } + + /* 1st pass: Check whether all subsequent readers can be changed */ + for(struct rc_instruction * inst = inst_mov->Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + rc_for_all_reads_mask(inst, peephole_scan_read, &s); + rc_for_all_writes_mask(inst, peephole_scan_write, &s); + if (s.Conflict) + return; + + if (s.BranchDepth >= 0) { + if (inst->U.I.Opcode == RC_OPCODE_IF) { + s.BranchDepth++; + } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) { + s.BranchDepth--; + if (s.BranchDepth < 0) { + s.DefinedMask &= ~s.MovMask; + s.MovMask = 0; + } + } + } + } + + if (s.Conflict) + return; + + /* 2nd pass: We can satisfy all readers, so switch them over all at once */ + s.MovMask = inst_mov->U.I.DstReg.WriteMask; + s.BranchDepth = 0; + + for(struct rc_instruction * inst = inst_mov->Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) { + unsigned int refmask = 0; + + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); + refmask |= (1 << swz) & RC_MASK_XYZW; + } + + if ((refmask & s.MovMask) == refmask) + inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]); + } + } + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY && + inst->U.I.DstReg.Index == s.Mov->U.I.DstReg.Index) { + s.MovMask &= ~inst->U.I.DstReg.WriteMask; + } + } + + if (s.BranchDepth >= 0) { + if (inst->U.I.Opcode == RC_OPCODE_IF) { + s.BranchDepth++; + } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) { + s.BranchDepth--; + if (s.BranchDepth < 0) + break; /* no more readers after this point */ + } + } + } + + /* Finally, remove the original MOV instruction */ + rc_remove_instruction(inst_mov); +} + +/** + * Check if a source register is actually always the same + * swizzle constant. + */ +static int is_src_uniform_constant(struct rc_src_register src, + rc_swizzle * pswz, unsigned int * pnegate) +{ + int have_used = 0; + + if (src.File != RC_FILE_NONE) { + *pswz = 0; + return 0; + } + + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(src.Swizzle, chan); + if (swz < 4) { + *pswz = 0; + return 0; + } + if (swz == RC_SWIZZLE_UNUSED) + continue; + + if (!have_used) { + *pswz = swz; + *pnegate = GET_BIT(src.Negate, chan); + have_used = 1; + } else { + if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) { + *pswz = 0; + return 0; + } + } + } + + return 1; +} + + +static void constant_folding_mad(struct rc_instruction * inst) +{ + rc_swizzle swz; + unsigned int negate; + + if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MUL; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_ADD; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2]; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_ADD; + if (negate) + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } + } +} + +static void constant_folding_mul(struct rc_instruction * inst) +{ + rc_swizzle swz; + unsigned int negate; + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_MOV; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + return; + } + } +} + +static void constant_folding_add(struct rc_instruction * inst) +{ + rc_swizzle swz; + unsigned int negate; + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + return; + } + } +} + + +/** + * Replace 0.0, 1.0 and 0.5 immediate constants by their + * respective swizzles. Simplify instructions like ADD dst, src, 0; + */ +static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */ + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || + inst->U.I.SrcReg[src].RelAddr || + inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) + continue; + + struct rc_constant * constant = + &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index]; + + if (constant->Type != RC_CONSTANT_IMMEDIATE) + continue; + + struct rc_src_register newsrc = inst->U.I.SrcReg[src]; + int have_real_reference = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); + if (swz >= 4) + continue; + + unsigned int newswz; + float imm = constant->u.Immediate[swz]; + float baseimm = imm; + if (imm < 0.0) + baseimm = -baseimm; + + if (baseimm == 0.0) { + newswz = RC_SWIZZLE_ZERO; + } else if (baseimm == 1.0) { + newswz = RC_SWIZZLE_ONE; + } else if (baseimm == 0.5) { + newswz = RC_SWIZZLE_HALF; + } else { + have_real_reference = 1; + continue; + } + + SET_SWZ(newsrc.Swizzle, chan, newswz); + if (imm < 0.0 && !newsrc.Abs) + newsrc.Negate ^= 1 << chan; + } + + if (!have_real_reference) { + newsrc.File = RC_FILE_NONE; + newsrc.Index = 0; + } + + /* don't make the swizzle worse */ + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) && + c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) + continue; + + inst->U.I.SrcReg[src] = newsrc; + } + + /* Simplify instructions based on constants */ + if (inst->U.I.Opcode == RC_OPCODE_MAD) + constant_folding_mad(inst); + + /* note: MAD can simplify to MUL or ADD */ + if (inst->U.I.Opcode == RC_OPCODE_MUL) + constant_folding_mul(inst); + else if (inst->U.I.Opcode == RC_OPCODE_ADD) + constant_folding_add(inst); +} + +void rc_optimize(struct radeon_compiler * c) +{ + struct rc_instruction * inst = c->Program.Instructions.Next; + while(inst != &c->Program.Instructions) { + struct rc_instruction * cur = inst; + inst = inst->Next; + + constant_folding(c, cur); + + if (cur->U.I.Opcode == RC_OPCODE_MOV) { + peephole(c, cur); + /* cur may no longer be part of the program */ + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c index fdfee86701..8a912da461 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c @@ -159,7 +159,7 @@ static int try_add_live_intervals(struct regalloc_state * s, } static void scan_callback(void * data, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int chan) + rc_register_file file, unsigned int index, unsigned int mask) { struct regalloc_state * s = data; struct register_info * reg; @@ -191,8 +191,8 @@ static void compute_live_intervals(struct regalloc_state * s) for(struct rc_instruction * inst = s->C->Program.Instructions.Next; inst != &s->C->Program.Instructions; inst = inst->Next) { - rc_for_all_reads(inst, scan_callback, s); - rc_for_all_writes(inst, scan_callback, s); + rc_for_all_reads_mask(inst, scan_callback, s); + rc_for_all_writes_mask(inst, scan_callback, s); } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index df67aafe02..a279549ff8 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -448,8 +448,8 @@ static void schedule_block(struct r300_fragment_program_compiler * c, * counter-intuitive, to account for the case where an * instruction writes to the same register as it reads * from. */ - rc_for_all_writes(inst, &scan_write, &s); - rc_for_all_reads(inst, &scan_read, &s); + rc_for_all_writes_chan(inst, &scan_write, &s); + rc_for_all_reads_chan(inst, &scan_read, &s); DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c index 42c08cd550..8336e58d55 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c @@ -119,7 +119,7 @@ int radeonTransformTEX( struct rc_instruction * inst_cmp; unsigned tmp_texsample = rc_find_free_temporary(c); unsigned tmp_sum = rc_find_free_temporary(c); - unsigned tmp_recip_w; + unsigned tmp_recip_w = 0; int pass, fail, tex; /* Save the output register. */ |