diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 93 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 25 | 
2 files changed, 116 insertions, 2 deletions
| diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index e2c7dbde6a..41081c3b63 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2111,6 +2111,7 @@ static void  assign_reg(int *reg_hw_locations, fs_reg *reg)  {     if (reg->file == GRF && reg->reg != 0) { +      assert(reg->reg_offset >= 0);        reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset;        reg->reg = 0;     } @@ -2302,7 +2303,7 @@ fs_visitor::assign_regs()  	 }        } -      assert(hw_reg != -1); +      assert(hw_reg >= 0);        hw_reg_mapping[i] = this->first_non_payload_grf + hw_reg;        last_grf = MAX2(last_grf,  		      hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1); @@ -2322,6 +2323,92 @@ fs_visitor::assign_regs()     talloc_free(regs);  } +/** + * Split large virtual GRFs into separate components if we can. + * + * This is mostly duplicated with what brw_fs_vector_splitting does, + * but that's really conservative because it's afraid of doing + * splitting that doesn't result in real progress after the rest of + * the optimization phases, which would cause infinite looping in + * optimization.  We can do it once here, safely.  This also has the + * opportunity to split interpolated values, or maybe even uniforms, + * which we don't have at the IR level. + * + * We want to split, because virtual GRFs are what we register + * allocate and spill (due to contiguousness requirements for some + * instructions), and they're what we naturally generate in the + * codegen process, but most virtual GRFs don't actually need to be + * contiguous sets of GRFs.  If we split, we'll end up with reduced + * live intervals and better dead code elimination and coalescing. + */ +void +fs_visitor::split_virtual_grfs() +{ +   int num_vars = this->virtual_grf_next; +   bool split_grf[num_vars]; +   int new_virtual_grf[num_vars]; + +   /* Try to split anything > 0 sized. */ +   for (int i = 0; i < num_vars; i++) { +      if (this->virtual_grf_sizes[i] != 1) +	 split_grf[i] = true; +      else +	 split_grf[i] = false; +   } + +   if (brw->has_pln) { +      /* PLN opcodes rely on the delta_xy being contiguous. */ +      split_grf[this->delta_x.reg] = false; +   } + +   foreach_iter(exec_list_iterator, iter, this->instructions) { +      fs_inst *inst = (fs_inst *)iter.get(); + +      /* Texturing produces 4 contiguous registers, so no splitting. */ +      if ((inst->opcode == FS_OPCODE_TEX || +	   inst->opcode == FS_OPCODE_TXB || +	   inst->opcode == FS_OPCODE_TXL) && +	  inst->dst.file == GRF) { +	 split_grf[inst->dst.reg] = false; +      } +   } + +   /* Allocate new space for split regs.  Note that the virtual +    * numbers will be contiguous. +    */ +   for (int i = 0; i < num_vars; i++) { +      if (split_grf[i]) { +	 new_virtual_grf[i] = virtual_grf_alloc(1); +	 for (int j = 2; j < this->virtual_grf_sizes[i]; j++) { +	    int reg = virtual_grf_alloc(1); +	    assert(reg == new_virtual_grf[i] + j - 1); +	 } +	 this->virtual_grf_sizes[i] = 1; +      } +   } + +   foreach_iter(exec_list_iterator, iter, this->instructions) { +      fs_inst *inst = (fs_inst *)iter.get(); + +      if (inst->dst.file == GRF && +	  split_grf[inst->dst.reg] && +	  inst->dst.reg_offset != 0) { +	 inst->dst.reg = (new_virtual_grf[inst->dst.reg] + +			  inst->dst.reg_offset - 1); +	 inst->dst.reg_offset = 0; +      } +      for (int i = 0; i < 3; i++) { +	 if (inst->src[i].file == GRF && +	     split_grf[inst->src[i].reg] && +	     inst->src[i].reg_offset != 0) { +	    inst->src[i].reg = (new_virtual_grf[inst->src[i].reg] + +				inst->src[i].reg_offset - 1); +	    inst->src[i].reg_offset = 0; +	 } +      } +   } +} +  void  fs_visitor::calculate_live_intervals()  { @@ -3054,13 +3141,15 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)        }        v.emit_fb_writes(); + +      v.split_virtual_grfs(); +        v.assign_curb_setup();        v.assign_urb_setup();        bool progress;        do {  	 progress = false; -  	 v.calculate_live_intervals();  	 progress = v.propagate_constants() || progress;  	 progress = v.register_coalesce() || progress; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 929ac682b0..d0e84da1aa 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -213,6 +213,9 @@ public:        init();        this->opcode = opcode;        this->dst = dst; + +      if (dst.file == GRF) +	 assert(dst.reg_offset >= 0);     }     fs_inst(int opcode, fs_reg dst, fs_reg src0) @@ -221,6 +224,11 @@ public:        this->opcode = opcode;        this->dst = dst;        this->src[0] = src0; + +      if (dst.file == GRF) +	 assert(dst.reg_offset >= 0); +      if (src[0].file == GRF) +	 assert(src[0].reg_offset >= 0);     }     fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) @@ -230,6 +238,13 @@ public:        this->dst = dst;        this->src[0] = src0;        this->src[1] = src1; + +      if (dst.file == GRF) +	 assert(dst.reg_offset >= 0); +      if (src[0].file == GRF) +	 assert(src[0].reg_offset >= 0); +      if (src[1].file == GRF) +	 assert(src[1].reg_offset >= 0);     }     fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) @@ -240,6 +255,15 @@ public:        this->src[0] = src0;        this->src[1] = src1;        this->src[2] = src2; + +      if (dst.file == GRF) +	 assert(dst.reg_offset >= 0); +      if (src[0].file == GRF) +	 assert(src[0].reg_offset >= 0); +      if (src[1].file == GRF) +	 assert(src[1].reg_offset >= 0); +      if (src[2].file == GRF) +	 assert(src[2].reg_offset >= 0);     }     int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ @@ -336,6 +360,7 @@ public:     void assign_urb_setup();     void assign_regs();     void assign_regs_trivial(); +   void split_virtual_grfs();     void calculate_live_intervals();     bool propagate_constants();     bool register_coalesce(); | 
