diff options
| author | Eric Anholt <eric@anholt.net> | 2010-10-08 14:00:14 -0700 | 
|---|---|---|
| committer | Eric Anholt <eric@anholt.net> | 2010-10-11 12:08:13 -0700 | 
| commit | c6dbf253d284f68b0d0e4a3c145583880855324b (patch) | |
| tree | 76d4ca3432ac56bfddd71ad563bcb9981003bd96 | |
| parent | 06fd639c519214b6ebcbf29127b6d9ed429f8641 (diff) | |
i965: Compute to MRF in the new FS backend.
This didn't produce a statistically significant performance difference
in my demo (n=4) or nexuiz (n=3), but it still seems like a good idea
and is recommended by the HW team.
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 123 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 1 | 
2 files changed, 124 insertions, 0 deletions
| diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 836faaef3f..645145d3af 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2492,6 +2492,128 @@ fs_visitor::register_coalesce()     return progress;  } + +bool +fs_visitor::compute_to_mrf() +{ +   bool progress = false; +   int next_ip = 0; + +   foreach_iter(exec_list_iterator, iter, this->instructions) { +      fs_inst *inst = (fs_inst *)iter.get(); + +      int ip = next_ip; +      next_ip++; + +      if (inst->opcode != BRW_OPCODE_MOV || +	  inst->predicated || +	  inst->dst.file != MRF || inst->src[0].file != GRF || +	  inst->dst.type != inst->src[0].type || +	  inst->src[0].abs || inst->src[0].negate) +	 continue; + +      /* Can't compute-to-MRF this GRF if someone else was going to +       * read it later. +       */ +      if (this->virtual_grf_use[inst->src[0].reg] > ip) +	 continue; + +      /* Found a move of a GRF to a MRF.  Let's see if we can go +       * rewrite the thing that made this GRF to write into the MRF. +       */ +      bool found = false; +      fs_inst *scan_inst; +      for (scan_inst = (fs_inst *)inst->prev; +	   scan_inst->prev != NULL; +	   scan_inst = (fs_inst *)scan_inst->prev) { +	 /* We don't handle flow control here.  Most computation of +	  * values that end up in MRFs are shortly before the MRF +	  * write anyway. +	  */ +	 if (scan_inst->opcode == BRW_OPCODE_DO || +	     scan_inst->opcode == BRW_OPCODE_WHILE || +	     scan_inst->opcode == BRW_OPCODE_ENDIF) { +	    break; +	 } + +	 /* You can't read from an MRF, so if someone else reads our +	  * MRF's source GRF that we wanted to rewrite, that stops us. +	  */ +	 bool interfered = false; +	 for (int i = 0; i < 3; i++) { +	    if (scan_inst->src[i].file == GRF && +		scan_inst->src[i].reg == inst->src[0].reg && +		scan_inst->src[i].reg_offset == inst->src[0].reg_offset) { +	       interfered = true; +	    } +	 } +	 if (interfered) +	    break; + +	 if (scan_inst->dst.file == MRF && +	     scan_inst->dst.hw_reg == inst->dst.hw_reg) { +	    /* Somebody else wrote our MRF here, so we can't can't +	     * compute-to-MRF before that. +	     */ +	    break; +	 } + +	 if (scan_inst->mlen > 0) { +	    /* Found a SEND instruction, which will do some amount of +	     * implied write that may overwrite our MRF that we were +	     * hoping to compute-to-MRF somewhere above it.  Nothing +	     * we have implied-writes more than 2 MRFs from base_mrf, +	     * though. +	     */ +	    int implied_write_len = MIN2(scan_inst->mlen, 2); +	    if (inst->dst.hw_reg >= scan_inst->base_mrf && +		inst->dst.hw_reg < scan_inst->base_mrf + implied_write_len) { +	       break; +	    } +	 } + +	 if (scan_inst->dst.file == GRF && +	     scan_inst->dst.reg == inst->src[0].reg) { +	    /* Found the last thing to write our reg we want to turn +	     * into a compute-to-MRF. +	     */ + +	    if (scan_inst->opcode == FS_OPCODE_TEX) { +	       /* texturing writes several continuous regs, so we can't +		* compute-to-mrf that. +		*/ +	       break; +	    } + +	    /* If it's predicated, it (probably) didn't populate all +	     * the channels. +	     */ +	    if (scan_inst->predicated) +	       break; + +	    /* SEND instructions can't have MRF as a destination. */ +	    if (scan_inst->mlen) +	       break; + +	    if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) { +	       /* Found the creator of our MRF's source value. */ +	       found = true; +	       break; +	    } +	 } +      } +      if (found) { +	 scan_inst->dst.file = MRF; +	 scan_inst->dst.hw_reg = inst->dst.hw_reg; +	 scan_inst->saturate |= inst->saturate; +	 inst->remove(); +	 progress = true; +      } +   } + +   return progress; +} +  bool  fs_visitor::virtual_grf_interferes(int a, int b)  { @@ -2825,6 +2947,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)  	 v.calculate_live_intervals();  	 progress = v.propagate_constants() || progress;  	 progress = v.register_coalesce() || progress; +	 progress = v.compute_to_mrf() || progress;  	 progress = v.dead_code_eliminate() || progress;        } while (progress); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 0d4c078fda..93cde7f5ed 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -339,6 +339,7 @@ public:     void calculate_live_intervals();     bool propagate_constants();     bool register_coalesce(); +   bool compute_to_mrf();     bool dead_code_eliminate();     bool virtual_grf_interferes(int a, int b);     void generate_code(); | 
