diff options
author | Eric Anholt <eric@anholt.net> | 2010-10-08 14:00:14 -0700 |
---|---|---|
committer | Eric Anholt <eric@anholt.net> | 2010-10-11 12:08:13 -0700 |
commit | c6dbf253d284f68b0d0e4a3c145583880855324b (patch) | |
tree | 76d4ca3432ac56bfddd71ad563bcb9981003bd96 /src/mesa/drivers | |
parent | 06fd639c519214b6ebcbf29127b6d9ed429f8641 (diff) |
i965: Compute to MRF in the new FS backend.
This didn't produce a statistically significant performance difference
in my demo (n=4) or nexuiz (n=3), but it still seems like a good idea
and is recommended by the HW team.
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 123 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 1 |
2 files changed, 124 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 836faaef3f..645145d3af 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2492,6 +2492,128 @@ fs_visitor::register_coalesce() return progress; } + +bool +fs_visitor::compute_to_mrf() +{ + bool progress = false; + int next_ip = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + fs_inst *inst = (fs_inst *)iter.get(); + + int ip = next_ip; + next_ip++; + + if (inst->opcode != BRW_OPCODE_MOV || + inst->predicated || + inst->dst.file != MRF || inst->src[0].file != GRF || + inst->dst.type != inst->src[0].type || + inst->src[0].abs || inst->src[0].negate) + continue; + + /* Can't compute-to-MRF this GRF if someone else was going to + * read it later. + */ + if (this->virtual_grf_use[inst->src[0].reg] > ip) + continue; + + /* Found a move of a GRF to a MRF. Let's see if we can go + * rewrite the thing that made this GRF to write into the MRF. + */ + bool found = false; + fs_inst *scan_inst; + for (scan_inst = (fs_inst *)inst->prev; + scan_inst->prev != NULL; + scan_inst = (fs_inst *)scan_inst->prev) { + /* We don't handle flow control here. Most computation of + * values that end up in MRFs are shortly before the MRF + * write anyway. + */ + if (scan_inst->opcode == BRW_OPCODE_DO || + scan_inst->opcode == BRW_OPCODE_WHILE || + scan_inst->opcode == BRW_OPCODE_ENDIF) { + break; + } + + /* You can't read from an MRF, so if someone else reads our + * MRF's source GRF that we wanted to rewrite, that stops us. + */ + bool interfered = false; + for (int i = 0; i < 3; i++) { + if (scan_inst->src[i].file == GRF && + scan_inst->src[i].reg == inst->src[0].reg && + scan_inst->src[i].reg_offset == inst->src[0].reg_offset) { + interfered = true; + } + } + if (interfered) + break; + + if (scan_inst->dst.file == MRF && + scan_inst->dst.hw_reg == inst->dst.hw_reg) { + /* Somebody else wrote our MRF here, so we can't can't + * compute-to-MRF before that. + */ + break; + } + + if (scan_inst->mlen > 0) { + /* Found a SEND instruction, which will do some amount of + * implied write that may overwrite our MRF that we were + * hoping to compute-to-MRF somewhere above it. Nothing + * we have implied-writes more than 2 MRFs from base_mrf, + * though. + */ + int implied_write_len = MIN2(scan_inst->mlen, 2); + if (inst->dst.hw_reg >= scan_inst->base_mrf && + inst->dst.hw_reg < scan_inst->base_mrf + implied_write_len) { + break; + } + } + + if (scan_inst->dst.file == GRF && + scan_inst->dst.reg == inst->src[0].reg) { + /* Found the last thing to write our reg we want to turn + * into a compute-to-MRF. + */ + + if (scan_inst->opcode == FS_OPCODE_TEX) { + /* texturing writes several continuous regs, so we can't + * compute-to-mrf that. + */ + break; + } + + /* If it's predicated, it (probably) didn't populate all + * the channels. + */ + if (scan_inst->predicated) + break; + + /* SEND instructions can't have MRF as a destination. */ + if (scan_inst->mlen) + break; + + if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) { + /* Found the creator of our MRF's source value. */ + found = true; + break; + } + } + } + if (found) { + scan_inst->dst.file = MRF; + scan_inst->dst.hw_reg = inst->dst.hw_reg; + scan_inst->saturate |= inst->saturate; + inst->remove(); + progress = true; + } + } + + return progress; +} + bool fs_visitor::virtual_grf_interferes(int a, int b) { @@ -2825,6 +2947,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) v.calculate_live_intervals(); progress = v.propagate_constants() || progress; progress = v.register_coalesce() || progress; + progress = v.compute_to_mrf() || progress; progress = v.dead_code_eliminate() || progress; } while (progress); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 0d4c078fda..93cde7f5ed 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -339,6 +339,7 @@ public: void calculate_live_intervals(); bool propagate_constants(); bool register_coalesce(); + bool compute_to_mrf(); bool dead_code_eliminate(); bool virtual_grf_interferes(int a, int b); void generate_code(); |