summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp115
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h37
2 files changed, 152 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 76794fa414..edb02fabb2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -177,6 +177,46 @@ type_size(const struct glsl_type *type)
}
}
+/**
+ * Returns how many MRFs an FS opcode will write over.
+ *
+ * Note that this is not the 0 or 1 implied writes in an actual gen
+ * instruction -- the FS opcodes often generate MOVs in addition.
+ */
+int
+fs_visitor::implied_mrf_writes(fs_inst *inst)
+{
+ if (inst->mlen == 0)
+ return 0;
+
+ switch (inst->opcode) {
+ case FS_OPCODE_RCP:
+ case FS_OPCODE_RSQ:
+ case FS_OPCODE_SQRT:
+ case FS_OPCODE_EXP2:
+ case FS_OPCODE_LOG2:
+ case FS_OPCODE_SIN:
+ case FS_OPCODE_COS:
+ return 1;
+ case FS_OPCODE_POW:
+ return 2;
+ case FS_OPCODE_TEX:
+ case FS_OPCODE_TXB:
+ case FS_OPCODE_TXL:
+ return 1;
+ case FS_OPCODE_FB_WRITE:
+ return 2;
+ case FS_OPCODE_PULL_CONSTANT_LOAD:
+ case FS_OPCODE_UNSPILL:
+ return 1;
+ case FS_OPCODE_SPILL:
+ return 2;
+ default:
+ assert(!"not reached");
+ return inst->mlen;
+ }
+}
+
int
fs_visitor::virtual_grf_alloc(int size)
{
@@ -3066,6 +3106,78 @@ fs_visitor::compute_to_mrf()
return progress;
}
+/**
+ * Walks through basic blocks, locking for repeated MRF writes and
+ * removing the later ones.
+ */
+bool
+fs_visitor::remove_duplicate_mrf_writes()
+{
+ fs_inst *last_mrf_move[16];
+ bool progress = false;
+
+ memset(last_mrf_move, 0, sizeof(last_mrf_move));
+
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ fs_inst *inst = (fs_inst *)iter.get();
+
+ switch (inst->opcode) {
+ case BRW_OPCODE_DO:
+ case BRW_OPCODE_WHILE:
+ case BRW_OPCODE_IF:
+ case BRW_OPCODE_ELSE:
+ case BRW_OPCODE_ENDIF:
+ memset(last_mrf_move, 0, sizeof(last_mrf_move));
+ continue;
+ default:
+ break;
+ }
+
+ if (inst->opcode == BRW_OPCODE_MOV &&
+ inst->dst.file == MRF) {
+ fs_inst *prev_inst = last_mrf_move[inst->dst.hw_reg];
+ if (prev_inst && inst->equals(prev_inst)) {
+ inst->remove();
+ progress = true;
+ continue;
+ }
+ }
+
+ /* Clear out the last-write records for MRFs that were overwritten. */
+ if (inst->dst.file == MRF) {
+ last_mrf_move[inst->dst.hw_reg] = NULL;
+ }
+
+ if (inst->mlen > 0) {
+ /* Found a SEND instruction, which will include two of fewer
+ * implied MRF writes. We could do better here.
+ */
+ for (int i = 0; i < implied_mrf_writes(inst); i++) {
+ last_mrf_move[inst->base_mrf + i] = NULL;
+ }
+ }
+
+ /* Clear out any MRF move records whose sources got overwritten. */
+ if (inst->dst.file == GRF) {
+ for (unsigned int i = 0; i < Elements(last_mrf_move); i++) {
+ if (last_mrf_move[i] &&
+ last_mrf_move[i]->src[0].reg == inst->dst.reg) {
+ last_mrf_move[i] = NULL;
+ }
+ }
+ }
+
+ if (inst->opcode == BRW_OPCODE_MOV &&
+ inst->dst.file == MRF &&
+ inst->src[0].file == GRF &&
+ !inst->predicated) {
+ last_mrf_move[inst->dst.hw_reg] = inst;
+ }
+ }
+
+ return progress;
+}
+
bool
fs_visitor::virtual_grf_interferes(int a, int b)
{
@@ -3438,6 +3550,9 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
bool progress;
do {
progress = false;
+
+ progress = v.remove_duplicate_mrf_writes() || progress;
+
v.calculate_live_intervals();
progress = v.propagate_constants() || progress;
progress = v.register_coalesce() || progress;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index f546fabd55..de7b15312a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -148,6 +148,21 @@ public:
fs_reg(enum register_file file, int hw_reg, uint32_t type);
fs_reg(class fs_visitor *v, const struct glsl_type *type);
+ bool equals(fs_reg *r)
+ {
+ return (file == r->file &&
+ reg == r->reg &&
+ reg_offset == r->reg_offset &&
+ hw_reg == r->hw_reg &&
+ type == r->type &&
+ negate == r->negate &&
+ abs == r->abs &&
+ memcmp(&fixed_hw_reg, &r->fixed_hw_reg,
+ sizeof(fixed_hw_reg)) == 0 &&
+ smear == r->smear &&
+ imm.u == r->imm.u);
+ }
+
/** Register file: ARF, GRF, MRF, IMM. */
enum register_file file;
/** virtual register number. 0 = fixed hw reg */
@@ -270,6 +285,26 @@ public:
assert(src[2].reg_offset >= 0);
}
+ bool equals(fs_inst *inst)
+ {
+ return (opcode == inst->opcode &&
+ dst.equals(&inst->dst) &&
+ src[0].equals(&inst->src[0]) &&
+ src[1].equals(&inst->src[1]) &&
+ src[2].equals(&inst->src[2]) &&
+ saturate == inst->saturate &&
+ predicated == inst->predicated &&
+ conditional_mod == inst->conditional_mod &&
+ mlen == inst->mlen &&
+ base_mrf == inst->base_mrf &&
+ sampler == inst->sampler &&
+ target == inst->target &&
+ eot == inst->eot &&
+ header_present == inst->header_present &&
+ shadow_compare == inst->shadow_compare &&
+ offset == inst->offset);
+ }
+
int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
fs_reg dst;
fs_reg src[3];
@@ -372,6 +407,7 @@ public:
bool register_coalesce();
bool compute_to_mrf();
bool dead_code_eliminate();
+ bool remove_duplicate_mrf_writes();
bool virtual_grf_interferes(int a, int b);
void generate_code();
void generate_fb_write(fs_inst *inst);
@@ -409,6 +445,7 @@ public:
struct brw_reg interp_reg(int location, int channel);
int setup_uniform_values(int loc, const glsl_type *type);
void setup_builtin_uniform_values(ir_variable *ir);
+ int implied_mrf_writes(fs_inst *inst);
struct brw_context *brw;
const struct gl_fragment_program *fp;