summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r300
diff options
context:
space:
mode:
authorNicolai Haehnle <nhaehnle@gmail.com>2008-07-06 19:48:50 +0200
committerNicolai Haehnle <nhaehnle@gmail.com>2008-07-12 09:36:02 +0200
commitd8d086c20b5a43353c4980cf234d8329900585f5 (patch)
tree6d88f83ba0763080a16c36a4e028e520a7d34848 /src/mesa/drivers/dri/r300
parent7904c9fad4c2cb2a4153258a9e86e530a0330a78 (diff)
r500: Add "Not quite SSA" and dead code elimination pass
In addition, this pass fixes non-native swizzles.
Diffstat (limited to 'src/mesa/drivers/dri/r300')
-rw-r--r--src/mesa/drivers/dri/r300/Makefile1
-rw-r--r--src/mesa/drivers/dri/r300/r500_fragprog.c67
-rw-r--r--src/mesa/drivers/dri/r300/r500_fragprog_emit.c28
-rw-r--r--src/mesa/drivers/dri/r300/radeon_nqssadce.c282
-rw-r--r--src/mesa/drivers/dri/r300/radeon_nqssadce.h96
5 files changed, 462 insertions, 12 deletions
diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
index d52b2b4c36..1dc75a3062 100644
--- a/src/mesa/drivers/dri/r300/Makefile
+++ b/src/mesa/drivers/dri/r300/Makefile
@@ -38,6 +38,7 @@ DRIVER_SOURCES = \
r300_texstate.c \
radeon_program.c \
radeon_program_alu.c \
+ radeon_nqssadce.c \
r300_vertprog.c \
r300_fragprog.c \
r300_fragprog_emit.c \
diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c
index 9bb92d3ba4..c92ea8f5e6 100644
--- a/src/mesa/drivers/dri/r300/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/r500_fragprog.c
@@ -27,6 +27,7 @@
#include "r500_fragprog.h"
+#include "radeon_nqssadce.h"
#include "radeon_program_alu.h"
@@ -250,6 +251,57 @@ static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler)
}
+static void nqssadce_init(struct nqssadce_state* s)
+{
+ s->Outputs[FRAG_RESULT_COLR].Sourced = WRITEMASK_XYZW;
+ s->Outputs[FRAG_RESULT_DEPR].Sourced = WRITEMASK_W;
+}
+
+static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg)
+{
+ GLuint relevant;
+ int i;
+
+ if (reg.Abs)
+ return GL_TRUE;
+
+ relevant = 0;
+ for(i = 0; i < 3; ++i) {
+ GLuint swz = GET_SWZ(reg.Swizzle, i);
+ if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO)
+ relevant |= 1 << i;
+ }
+ if ((reg.NegateBase & relevant) && ((reg.NegateBase & relevant) != relevant))
+ return GL_FALSE;
+
+ return GL_TRUE;
+}
+
+/**
+ * Implement a non-native swizzle. This function assumes that
+ * is_native_swizzle returned true.
+ */
+static void nqssadce_build_swizzle(struct nqssadce_state *s,
+ struct prog_dst_register dst, struct prog_src_register src)
+{
+ struct prog_instruction *inst;
+
+ _mesa_insert_instructions(s->Program, s->IP, 2);
+ inst = s->Program->Instructions + s->IP;
+
+ inst[0].Opcode = OPCODE_MOV;
+ inst[0].DstReg = dst;
+ inst[0].DstReg.WriteMask &= src.NegateBase;
+ inst[0].SrcReg[0] = src;
+
+ inst[1].Opcode = OPCODE_MOV;
+ inst[1].DstReg = dst;
+ inst[1].DstReg.WriteMask &= ~src.NegateBase;
+ inst[1].SrcReg[0] = src;
+
+ s->IP += 2;
+}
+
static GLuint build_dtm(GLuint depthmode)
{
switch(depthmode) {
@@ -327,7 +379,20 @@ void r500TranslateFragmentShader(r300ContextPtr r300,
3, transformations);
if (RADEON_DEBUG & DEBUG_PIXEL) {
- _mesa_printf("Compiler: after all transformations:\n");
+ _mesa_printf("Compiler: after native rewrite:\n");
+ _mesa_print_program(compiler.program);
+ }
+
+ struct radeon_nqssadce_descr nqssadce = {
+ .Init = &nqssadce_init,
+ .IsNativeSwizzle = &is_native_swizzle,
+ .BuildSwizzle = &nqssadce_build_swizzle,
+ .RewriteDepthOut = GL_TRUE
+ };
+ radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce);
+
+ if (RADEON_DEBUG & DEBUG_PIXEL) {
+ _mesa_printf("Compiler: after NqSSA-DCE:\n");
_mesa_print_program(compiler.program);
}
diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c
index 4f65803953..275911679d 100644
--- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c
@@ -163,23 +163,30 @@ static const struct prog_dst_register dstreg_template = {
.WriteMask = WRITEMASK_XYZW
};
+static INLINE GLuint fix_hw_swizzle(GLuint swz)
+{
+ if (swz == 5) swz = 6;
+ if (swz == SWIZZLE_NIL) swz = 4;
+ return swz;
+}
+
static INLINE GLuint make_rgb_swizzle(struct prog_src_register src) {
GLuint swiz = 0x0;
GLuint temp;
/* This could be optimized, but it should be plenty fast already. */
int i;
+ int negatebase = 0;
for (i = 0; i < 3; i++) {
- temp = GET_SWZ(src.Swizzle, i);
- /* Fix SWIZZLE_ONE */
- if (temp == 5) temp++;
+ temp = GET_SWZ(src.Swizzle, i);
+ if (temp != SWIZZLE_NIL && GET_BIT(src.NegateBase, i))
+ negatebase = 1;
+ temp = fix_hw_swizzle(temp);
swiz |= temp << i*3;
}
- if (src.Abs) {
+ if (src.Abs)
swiz |= R500_SWIZ_MOD_ABS << 9;
- } else if (src.NegateBase & 7) {
- ASSERT((src.NegateBase & 7) == 7);
+ else if (negatebase)
swiz |= R500_SWIZ_MOD_NEG << 9;
- }
if (src.NegateAbs)
swiz ^= R500_SWIZ_MOD_NEG << 9;
return swiz;
@@ -191,8 +198,7 @@ static INLINE GLuint make_rgba_swizzle(GLuint src) {
int i;
for (i = 0; i < 4; i++) {
temp = GET_SWZ(src, i);
- /* Fix SWIZZLE_ONE */
- if (temp == 5) temp++;
+ temp = fix_hw_swizzle(temp);
swiz |= temp << i*3;
}
return swiz;
@@ -201,7 +207,7 @@ static INLINE GLuint make_rgba_swizzle(GLuint src) {
static INLINE GLuint make_alpha_swizzle(struct prog_src_register src) {
GLuint swiz = GET_SWZ(src.Swizzle, 3);
- if (swiz == 5) swiz++;
+ swiz = fix_hw_swizzle(swiz);
if (src.Abs) {
swiz |= R500_SWIZ_MOD_ABS << 3;
@@ -217,7 +223,7 @@ static INLINE GLuint make_alpha_swizzle(struct prog_src_register src) {
static INLINE GLuint make_sop_swizzle(struct prog_src_register src) {
GLuint swiz = GET_SWZ(src.Swizzle, 0);
- if (swiz == 5) swiz++;
+ swiz = fix_hw_swizzle(swiz);
if (src.Abs) {
swiz |= R500_SWIZ_MOD_ABS << 3;
diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/radeon_nqssadce.c
new file mode 100644
index 0000000000..f10ba4004a
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_nqssadce.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * @file
+ *
+ * "Not-quite SSA" and Dead-Code Elimination.
+ *
+ * @note This code uses SWIZZLE_NIL in a source register to indicate that
+ * the corresponding component is ignored by the corresponding instruction.
+ */
+
+#include "radeon_nqssadce.h"
+
+
+/**
+ * Return the @ref register_state for the given register (or 0 for untracked
+ * registers, i.e. constants).
+ */
+static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index)
+{
+ switch(file) {
+ case PROGRAM_TEMPORARY: return &s->Temps[index];
+ case PROGRAM_OUTPUT: return &s->Outputs[index];
+ default: return 0;
+ }
+}
+
+
+/**
+ * Left multiplication of a register with a swizzle
+ *
+ * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles.
+ */
+static struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg)
+{
+ struct prog_src_register tmp = srcreg;
+ int i;
+ tmp.Swizzle = 0;
+ tmp.NegateBase = 0;
+ for(i = 0; i < 4; ++i) {
+ GLuint swz = GET_SWZ(swizzle, i);
+ if (swz < 4) {
+ tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
+ tmp.NegateBase |= GET_BIT(srcreg.NegateBase, swz) << i;
+ } else {
+ tmp.Swizzle |= swz << (i*3);
+ }
+ }
+ return tmp;
+}
+
+
+static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
+ struct prog_instruction *inst, GLint src, GLuint sourced)
+{
+ int i;
+ GLuint deswz_source = 0;
+
+ for(i = 0; i < 4; ++i) {
+ if (GET_BIT(sourced, i)) {
+ GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i);
+ deswz_source |= 1 << swz;
+ } else {
+ inst->SrcReg[src].Swizzle &= ~(7 << (3*i));
+ inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
+ }
+ }
+
+ if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) {
+ struct prog_dst_register dstreg = inst->DstReg;
+ dstreg.File = PROGRAM_TEMPORARY;
+ dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
+ dstreg.WriteMask = sourced;
+
+ s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]);
+
+ inst = s->Program->Instructions + s->IP;
+ inst->SrcReg[src].File = PROGRAM_TEMPORARY;
+ inst->SrcReg[src].Index = dstreg.Index;
+ inst->SrcReg[src].Swizzle = 0;
+ inst->SrcReg[src].NegateBase = 0;
+ inst->SrcReg[src].Abs = 0;
+ inst->SrcReg[src].NegateAbs = 0;
+ for(i = 0; i < 4; ++i) {
+ if (GET_BIT(sourced, i))
+ inst->SrcReg[src].Swizzle |= i << (3*i);
+ else
+ inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
+ }
+ deswz_source = sourced;
+ }
+
+ struct register_state *regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
+ if (regstate)
+ regstate->Sourced |= deswz_source & 0xf;
+
+ return inst;
+}
+
+
+static void rewrite_depth_out(struct prog_instruction *inst)
+{
+ if (inst->DstReg.WriteMask & WRITEMASK_Z) {
+ inst->DstReg.WriteMask = WRITEMASK_W;
+ } else {
+ inst->DstReg.WriteMask = 0;
+ return;
+ }
+
+ switch (inst->Opcode) {
+ case OPCODE_FRC:
+ case OPCODE_MOV:
+ inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ break;
+ case OPCODE_ADD:
+ case OPCODE_MAX:
+ case OPCODE_MIN:
+ case OPCODE_MUL:
+ inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
+ break;
+ case OPCODE_CMP:
+ case OPCODE_MAD:
+ inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
+ inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]);
+ break;
+ default:
+ // Scalar instructions needn't be reswizzled
+ break;
+ }
+}
+
+static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex)
+{
+ int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
+ int i;
+ for(i = 0; i < nsrc; ++i)
+ if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex)
+ inst->SrcReg[i].Index = newindex;
+}
+
+static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex)
+{
+ GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
+ int ip;
+ for(ip = 0; ip < s->IP; ++ip) {
+ struct prog_instruction* inst = s->Program->Instructions + ip;
+ if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex)
+ inst->DstReg.Index = newindex;
+ unalias_srcregs(inst, oldindex, newindex);
+ }
+ unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex);
+}
+
+
+/**
+ * Handle one instruction.
+ */
+static void process_instruction(struct nqssadce_state* s)
+{
+ struct prog_instruction *inst = s->Program->Instructions + s->IP;
+
+ if (inst->Opcode == OPCODE_END)
+ return;
+
+ if (inst->Opcode != OPCODE_KIL) {
+ if (s->Descr->RewriteDepthOut) {
+ if (inst->DstReg.File == PROGRAM_OUTPUT && inst->DstReg.Index == FRAG_RESULT_DEPR)
+ rewrite_depth_out(inst);
+ }
+
+ struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index);
+ if (!regstate) {
+ _mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n",
+ inst->DstReg.File, inst->DstReg.Index);
+ return;
+ }
+
+ inst->DstReg.WriteMask &= regstate->Sourced;
+ regstate->Sourced &= ~inst->DstReg.WriteMask;
+
+ if (inst->DstReg.WriteMask == 0) {
+ _mesa_delete_instructions(s->Program, s->IP, 1);
+ return;
+ }
+
+ if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced)
+ unalias_temporary(s, inst->DstReg.Index);
+ }
+
+ /* Attention: Due to swizzle emulation code, the following
+ * might change the instruction stream under us, so we have
+ * to be careful with the inst pointer. */
+ switch (inst->Opcode) {
+ case OPCODE_FRC:
+ case OPCODE_MOV:
+ inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
+ break;
+ case OPCODE_ADD:
+ case OPCODE_MAX:
+ case OPCODE_MIN:
+ case OPCODE_MUL:
+ inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
+ inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
+ break;
+ case OPCODE_CMP:
+ case OPCODE_MAD:
+ inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
+ inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
+ inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask);
+ break;
+ case OPCODE_COS:
+ case OPCODE_EX2:
+ case OPCODE_LG2:
+ case OPCODE_RCP:
+ case OPCODE_RSQ:
+ case OPCODE_SIN:
+ inst = track_used_srcreg(s, inst, 0, 0x1);
+ break;
+ case OPCODE_DP3:
+ inst = track_used_srcreg(s, inst, 0, 0x7);
+ inst = track_used_srcreg(s, inst, 1, 0x7);
+ break;
+ case OPCODE_DP4:
+ inst = track_used_srcreg(s, inst, 0, 0xf);
+ inst = track_used_srcreg(s, inst, 1, 0xf);
+ break;
+ case OPCODE_KIL:
+ case OPCODE_TEX:
+ case OPCODE_TXB:
+ case OPCODE_TXP:
+ inst = track_used_srcreg(s, inst, 0, 0xf);
+ break;
+ default:
+ _mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
+ return;
+ }
+}
+
+
+void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr)
+{
+ struct nqssadce_state s;
+
+ _mesa_bzero(&s, sizeof(s));
+ s.Ctx = ctx;
+ s.Program = p;
+ s.Descr = descr;
+ s.Descr->Init(&s);
+ s.IP = p->NumInstructions;
+
+ while(s.IP > 0) {
+ s.IP--;
+ process_instruction(&s);
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/radeon_nqssadce.h
new file mode 100644
index 0000000000..a4f94abcb6
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_nqssadce.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_NQSSADCE_H_
+#define __RADEON_PROGRAM_NQSSADCE_H_
+
+#include "radeon_program.h"
+
+
+struct register_state {
+ /**
+ * Bitmask indicating which components of the register are sourced
+ * by later instructions.
+ */
+ GLuint Sourced : 4;
+};
+
+/**
+ * Maintain state such as which registers are used, which registers are
+ * read from, etc.
+ */
+struct nqssadce_state {
+ GLcontext *Ctx;
+ struct gl_program *Program;
+ struct radeon_nqssadce_descr *Descr;
+
+ /**
+ * All instructions after this instruction pointer have been dealt with.
+ */
+ int IP;
+
+ /**
+ * Which registers are read by subsequent instructions?
+ */
+ struct register_state Temps[MAX_PROGRAM_TEMPS];
+ struct register_state Outputs[VERT_RESULT_MAX];
+};
+
+
+/**
+ * This structure contains a description of the hardware in-so-far as
+ * it is required for the NqSSA-DCE pass.
+ */
+struct radeon_nqssadce_descr {
+ /**
+ * Fill in which outputs
+ */
+ void (*Init)(struct nqssadce_state *);
+
+ /**
+ * Check whether the given swizzle, absolute and negate combination
+ * can be implemented natively by the hardware for this opcode.
+ */
+ GLboolean (*IsNativeSwizzle)(GLuint opcode, struct prog_src_register reg);
+
+ /**
+ * Emit (at the current IP) the instruction MOV dst, src;
+ * The transformation will work recursively on the emitted instruction(s).
+ */
+ void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src);
+
+ /**
+ * Rewrite instructions that write to DEPR.z to write to DEPR.w
+ * instead (rewriting is done *before* the WriteMask test).
+ */
+ GLboolean RewriteDepthOut;
+ void *Data;
+};
+
+void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr);
+
+#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */