diff options
author | Nicolai Haehnle <nhaehnle@gmail.com> | 2008-07-06 19:48:50 +0200 |
---|---|---|
committer | Nicolai Haehnle <nhaehnle@gmail.com> | 2008-07-12 09:36:02 +0200 |
commit | d8d086c20b5a43353c4980cf234d8329900585f5 (patch) | |
tree | 6d88f83ba0763080a16c36a4e028e520a7d34848 /src/mesa/drivers/dri/r300 | |
parent | 7904c9fad4c2cb2a4153258a9e86e530a0330a78 (diff) |
r500: Add "Not quite SSA" and dead code elimination pass
In addition, this pass fixes non-native swizzles.
Diffstat (limited to 'src/mesa/drivers/dri/r300')
-rw-r--r-- | src/mesa/drivers/dri/r300/Makefile | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r500_fragprog.c | 67 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r500_fragprog_emit.c | 28 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/radeon_nqssadce.c | 282 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/radeon_nqssadce.h | 96 |
5 files changed, 462 insertions, 12 deletions
diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index d52b2b4c36..1dc75a3062 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -38,6 +38,7 @@ DRIVER_SOURCES = \ r300_texstate.c \ radeon_program.c \ radeon_program_alu.c \ + radeon_nqssadce.c \ r300_vertprog.c \ r300_fragprog.c \ r300_fragprog_emit.c \ diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 9bb92d3ba4..c92ea8f5e6 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -27,6 +27,7 @@ #include "r500_fragprog.h" +#include "radeon_nqssadce.h" #include "radeon_program_alu.h" @@ -250,6 +251,57 @@ static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler) } +static void nqssadce_init(struct nqssadce_state* s) +{ + s->Outputs[FRAG_RESULT_COLR].Sourced = WRITEMASK_XYZW; + s->Outputs[FRAG_RESULT_DEPR].Sourced = WRITEMASK_W; +} + +static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) +{ + GLuint relevant; + int i; + + if (reg.Abs) + return GL_TRUE; + + relevant = 0; + for(i = 0; i < 3; ++i) { + GLuint swz = GET_SWZ(reg.Swizzle, i); + if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO) + relevant |= 1 << i; + } + if ((reg.NegateBase & relevant) && ((reg.NegateBase & relevant) != relevant)) + return GL_FALSE; + + return GL_TRUE; +} + +/** + * Implement a non-native swizzle. This function assumes that + * is_native_swizzle returned true. + */ +static void nqssadce_build_swizzle(struct nqssadce_state *s, + struct prog_dst_register dst, struct prog_src_register src) +{ + struct prog_instruction *inst; + + _mesa_insert_instructions(s->Program, s->IP, 2); + inst = s->Program->Instructions + s->IP; + + inst[0].Opcode = OPCODE_MOV; + inst[0].DstReg = dst; + inst[0].DstReg.WriteMask &= src.NegateBase; + inst[0].SrcReg[0] = src; + + inst[1].Opcode = OPCODE_MOV; + inst[1].DstReg = dst; + inst[1].DstReg.WriteMask &= ~src.NegateBase; + inst[1].SrcReg[0] = src; + + s->IP += 2; +} + static GLuint build_dtm(GLuint depthmode) { switch(depthmode) { @@ -327,7 +379,20 @@ void r500TranslateFragmentShader(r300ContextPtr r300, 3, transformations); if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Compiler: after all transformations:\n"); + _mesa_printf("Compiler: after native rewrite:\n"); + _mesa_print_program(compiler.program); + } + + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &is_native_swizzle, + .BuildSwizzle = &nqssadce_build_swizzle, + .RewriteDepthOut = GL_TRUE + }; + radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce); + + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Compiler: after NqSSA-DCE:\n"); _mesa_print_program(compiler.program); } diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c index 4f65803953..275911679d 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c @@ -163,23 +163,30 @@ static const struct prog_dst_register dstreg_template = { .WriteMask = WRITEMASK_XYZW }; +static INLINE GLuint fix_hw_swizzle(GLuint swz) +{ + if (swz == 5) swz = 6; + if (swz == SWIZZLE_NIL) swz = 4; + return swz; +} + static INLINE GLuint make_rgb_swizzle(struct prog_src_register src) { GLuint swiz = 0x0; GLuint temp; /* This could be optimized, but it should be plenty fast already. */ int i; + int negatebase = 0; for (i = 0; i < 3; i++) { - temp = GET_SWZ(src.Swizzle, i); - /* Fix SWIZZLE_ONE */ - if (temp == 5) temp++; + temp = GET_SWZ(src.Swizzle, i); + if (temp != SWIZZLE_NIL && GET_BIT(src.NegateBase, i)) + negatebase = 1; + temp = fix_hw_swizzle(temp); swiz |= temp << i*3; } - if (src.Abs) { + if (src.Abs) swiz |= R500_SWIZ_MOD_ABS << 9; - } else if (src.NegateBase & 7) { - ASSERT((src.NegateBase & 7) == 7); + else if (negatebase) swiz |= R500_SWIZ_MOD_NEG << 9; - } if (src.NegateAbs) swiz ^= R500_SWIZ_MOD_NEG << 9; return swiz; @@ -191,8 +198,7 @@ static INLINE GLuint make_rgba_swizzle(GLuint src) { int i; for (i = 0; i < 4; i++) { temp = GET_SWZ(src, i); - /* Fix SWIZZLE_ONE */ - if (temp == 5) temp++; + temp = fix_hw_swizzle(temp); swiz |= temp << i*3; } return swiz; @@ -201,7 +207,7 @@ static INLINE GLuint make_rgba_swizzle(GLuint src) { static INLINE GLuint make_alpha_swizzle(struct prog_src_register src) { GLuint swiz = GET_SWZ(src.Swizzle, 3); - if (swiz == 5) swiz++; + swiz = fix_hw_swizzle(swiz); if (src.Abs) { swiz |= R500_SWIZ_MOD_ABS << 3; @@ -217,7 +223,7 @@ static INLINE GLuint make_alpha_swizzle(struct prog_src_register src) { static INLINE GLuint make_sop_swizzle(struct prog_src_register src) { GLuint swiz = GET_SWZ(src.Swizzle, 0); - if (swiz == 5) swiz++; + swiz = fix_hw_swizzle(swiz); if (src.Abs) { swiz |= R500_SWIZ_MOD_ABS << 3; diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/radeon_nqssadce.c new file mode 100644 index 0000000000..f10ba4004a --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_nqssadce.c @@ -0,0 +1,282 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * + * "Not-quite SSA" and Dead-Code Elimination. + * + * @note This code uses SWIZZLE_NIL in a source register to indicate that + * the corresponding component is ignored by the corresponding instruction. + */ + +#include "radeon_nqssadce.h" + + +/** + * Return the @ref register_state for the given register (or 0 for untracked + * registers, i.e. constants). + */ +static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index) +{ + switch(file) { + case PROGRAM_TEMPORARY: return &s->Temps[index]; + case PROGRAM_OUTPUT: return &s->Outputs[index]; + default: return 0; + } +} + + +/** + * Left multiplication of a register with a swizzle + * + * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles. + */ +static struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg) +{ + struct prog_src_register tmp = srcreg; + int i; + tmp.Swizzle = 0; + tmp.NegateBase = 0; + for(i = 0; i < 4; ++i) { + GLuint swz = GET_SWZ(swizzle, i); + if (swz < 4) { + tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); + tmp.NegateBase |= GET_BIT(srcreg.NegateBase, swz) << i; + } else { + tmp.Swizzle |= swz << (i*3); + } + } + return tmp; +} + + +static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s, + struct prog_instruction *inst, GLint src, GLuint sourced) +{ + int i; + GLuint deswz_source = 0; + + for(i = 0; i < 4; ++i) { + if (GET_BIT(sourced, i)) { + GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i); + deswz_source |= 1 << swz; + } else { + inst->SrcReg[src].Swizzle &= ~(7 << (3*i)); + inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); + } + } + + if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) { + struct prog_dst_register dstreg = inst->DstReg; + dstreg.File = PROGRAM_TEMPORARY; + dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY); + dstreg.WriteMask = sourced; + + s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]); + + inst = s->Program->Instructions + s->IP; + inst->SrcReg[src].File = PROGRAM_TEMPORARY; + inst->SrcReg[src].Index = dstreg.Index; + inst->SrcReg[src].Swizzle = 0; + inst->SrcReg[src].NegateBase = 0; + inst->SrcReg[src].Abs = 0; + inst->SrcReg[src].NegateAbs = 0; + for(i = 0; i < 4; ++i) { + if (GET_BIT(sourced, i)) + inst->SrcReg[src].Swizzle |= i << (3*i); + else + inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); + } + deswz_source = sourced; + } + + struct register_state *regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index); + if (regstate) + regstate->Sourced |= deswz_source & 0xf; + + return inst; +} + + +static void rewrite_depth_out(struct prog_instruction *inst) +{ + if (inst->DstReg.WriteMask & WRITEMASK_Z) { + inst->DstReg.WriteMask = WRITEMASK_W; + } else { + inst->DstReg.WriteMask = 0; + return; + } + + switch (inst->Opcode) { + case OPCODE_FRC: + case OPCODE_MOV: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + break; + case OPCODE_ADD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MUL: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); + break; + case OPCODE_CMP: + case OPCODE_MAD: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); + inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]); + break; + default: + // Scalar instructions needn't be reswizzled + break; + } +} + +static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex) +{ + int nsrc = _mesa_num_inst_src_regs(inst->Opcode); + int i; + for(i = 0; i < nsrc; ++i) + if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex) + inst->SrcReg[i].Index = newindex; +} + +static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex) +{ + GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY); + int ip; + for(ip = 0; ip < s->IP; ++ip) { + struct prog_instruction* inst = s->Program->Instructions + ip; + if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex) + inst->DstReg.Index = newindex; + unalias_srcregs(inst, oldindex, newindex); + } + unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex); +} + + +/** + * Handle one instruction. + */ +static void process_instruction(struct nqssadce_state* s) +{ + struct prog_instruction *inst = s->Program->Instructions + s->IP; + + if (inst->Opcode == OPCODE_END) + return; + + if (inst->Opcode != OPCODE_KIL) { + if (s->Descr->RewriteDepthOut) { + if (inst->DstReg.File == PROGRAM_OUTPUT && inst->DstReg.Index == FRAG_RESULT_DEPR) + rewrite_depth_out(inst); + } + + struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index); + if (!regstate) { + _mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n", + inst->DstReg.File, inst->DstReg.Index); + return; + } + + inst->DstReg.WriteMask &= regstate->Sourced; + regstate->Sourced &= ~inst->DstReg.WriteMask; + + if (inst->DstReg.WriteMask == 0) { + _mesa_delete_instructions(s->Program, s->IP, 1); + return; + } + + if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced) + unalias_temporary(s, inst->DstReg.Index); + } + + /* Attention: Due to swizzle emulation code, the following + * might change the instruction stream under us, so we have + * to be careful with the inst pointer. */ + switch (inst->Opcode) { + case OPCODE_FRC: + case OPCODE_MOV: + inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); + break; + case OPCODE_ADD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MUL: + inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); + inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); + break; + case OPCODE_CMP: + case OPCODE_MAD: + inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); + inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); + inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask); + break; + case OPCODE_COS: + case OPCODE_EX2: + case OPCODE_LG2: + case OPCODE_RCP: + case OPCODE_RSQ: + case OPCODE_SIN: + inst = track_used_srcreg(s, inst, 0, 0x1); + break; + case OPCODE_DP3: + inst = track_used_srcreg(s, inst, 0, 0x7); + inst = track_used_srcreg(s, inst, 1, 0x7); + break; + case OPCODE_DP4: + inst = track_used_srcreg(s, inst, 0, 0xf); + inst = track_used_srcreg(s, inst, 1, 0xf); + break; + case OPCODE_KIL: + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXP: + inst = track_used_srcreg(s, inst, 0, 0xf); + break; + default: + _mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode); + return; + } +} + + +void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr) +{ + struct nqssadce_state s; + + _mesa_bzero(&s, sizeof(s)); + s.Ctx = ctx; + s.Program = p; + s.Descr = descr; + s.Descr->Init(&s); + s.IP = p->NumInstructions; + + while(s.IP > 0) { + s.IP--; + process_instruction(&s); + } +} diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/radeon_nqssadce.h new file mode 100644 index 0000000000..a4f94abcb6 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_nqssadce.h @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_NQSSADCE_H_ +#define __RADEON_PROGRAM_NQSSADCE_H_ + +#include "radeon_program.h" + + +struct register_state { + /** + * Bitmask indicating which components of the register are sourced + * by later instructions. + */ + GLuint Sourced : 4; +}; + +/** + * Maintain state such as which registers are used, which registers are + * read from, etc. + */ +struct nqssadce_state { + GLcontext *Ctx; + struct gl_program *Program; + struct radeon_nqssadce_descr *Descr; + + /** + * All instructions after this instruction pointer have been dealt with. + */ + int IP; + + /** + * Which registers are read by subsequent instructions? + */ + struct register_state Temps[MAX_PROGRAM_TEMPS]; + struct register_state Outputs[VERT_RESULT_MAX]; +}; + + +/** + * This structure contains a description of the hardware in-so-far as + * it is required for the NqSSA-DCE pass. + */ +struct radeon_nqssadce_descr { + /** + * Fill in which outputs + */ + void (*Init)(struct nqssadce_state *); + + /** + * Check whether the given swizzle, absolute and negate combination + * can be implemented natively by the hardware for this opcode. + */ + GLboolean (*IsNativeSwizzle)(GLuint opcode, struct prog_src_register reg); + + /** + * Emit (at the current IP) the instruction MOV dst, src; + * The transformation will work recursively on the emitted instruction(s). + */ + void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); + + /** + * Rewrite instructions that write to DEPR.z to write to DEPR.w + * instead (rewriting is done *before* the WriteMask test). + */ + GLboolean RewriteDepthOut; + void *Data; +}; + +void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr); + +#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */ |