diff options
| author | Eric Anholt <eric@anholt.net> | 2009-11-11 11:58:12 -0800 | 
|---|---|---|
| committer | Eric Anholt <eric@anholt.net> | 2009-11-13 13:18:56 -0800 | 
| commit | 91bd593109c71310fb7e101c5f73a14f1bbd5f93 (patch) | |
| tree | cad0b0e49e78fbe49fa836cc961e01c0180e224f /src/mesa | |
| parent | d6690ce15fb8c7c6abf1bc0d847c1d2da2c33904 (diff) | |
i965: Avoid moving the current value back into the accumulator for MAD.
This is a 2.9% (+/-.3%) performance win for my GL demo, which hits MAD
sequences for matrix transforms.
Diffstat (limited to 'src/mesa')
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_vs_emit.c | 35 | 
1 files changed, 34 insertions, 1 deletions
| diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 15154c3b8e..f7b0726636 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1271,6 +1271,38 @@ post_vs_emit( struct brw_vs_compile *c,     }  } +static GLboolean +accumulator_contains(struct brw_vs_compile *c, struct brw_reg val) +{ +   struct brw_compile *p = &c->func; +   struct brw_instruction *prev_insn = &p->store[p->nr_insn - 1]; + +   if (p->nr_insn == 0) +      return GL_FALSE; + +   if (val.address_mode != BRW_ADDRESS_DIRECT) +      return GL_FALSE; + +   switch (prev_insn->header.opcode) { +   case BRW_OPCODE_MOV: +   case BRW_OPCODE_MAC: +   case BRW_OPCODE_MUL: +      if (prev_insn->header.access_mode == BRW_ALIGN_16 && +	  prev_insn->header.execution_size == val.width && +	  prev_insn->bits1.da1.dest_reg_file == val.file && +	  prev_insn->bits1.da1.dest_reg_type == val.type && +	  prev_insn->bits1.da1.dest_address_mode == val.address_mode && +	  prev_insn->bits1.da1.dest_reg_nr == val.nr && +	  prev_insn->bits1.da16.dest_subreg_nr == val.subnr / 16 && +	  prev_insn->bits1.da16.dest_writemask == 0xf) +	 return GL_TRUE; +      else +	 return GL_FALSE; +   default: +      return GL_FALSE; +   } +} +  static uint32_t  get_predicate(const struct prog_instruction *inst)  { @@ -1449,7 +1481,8 @@ void brw_vs_emit(struct brw_vs_compile *c )  	 unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias);  	 break;        case OPCODE_MAD: -	 brw_MOV(p, brw_acc_reg(), args[2]); +	 if (!accumulator_contains(c, args[2])) +	    brw_MOV(p, brw_acc_reg(), args[2]);  	 brw_MAC(p, dst, args[0], args[1]);  	 break;        case OPCODE_MAX: | 
