diff options
| author | Eric Anholt <eric@anholt.net> | 2010-07-26 12:41:39 -0700 | 
|---|---|---|
| committer | Eric Anholt <eric@anholt.net> | 2010-07-26 13:08:25 -0700 | 
| commit | 22f839292f48a47601e1b97a7f4679018c42d0ed (patch) | |
| tree | 3bc43839b6f2150f69fdb2eee34aed11b1e407d3 /src | |
| parent | a64def5f2ae1336cafff64a782ec5314d31c310f (diff) | |
i965: Move the GRF-to-MRF optimizations to brw_optimize.c.
Diffstat (limited to 'src')
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu.h | 2 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_optimize.c | 613 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_emit.c | 622 | 
3 files changed, 618 insertions, 619 deletions
| diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 31ff86cf73..bc151738f6 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -984,5 +984,7 @@ void brw_set_src1( struct brw_instruction *insn,  /* brw_optimize.c */  void brw_optimize(struct brw_compile *p); +void brw_remove_duplicate_mrf_moves(struct brw_compile *p); +void brw_remove_mrf_to_grf_moves(struct brw_compile *p);  #endif diff --git a/src/mesa/drivers/dri/i965/brw_optimize.c b/src/mesa/drivers/dri/i965/brw_optimize.c index a364b15820..136dbbd73a 100644 --- a/src/mesa/drivers/dri/i965/brw_optimize.c +++ b/src/mesa/drivers/dri/i965/brw_optimize.c @@ -32,6 +32,619 @@  #include "brw_defines.h"  #include "brw_eu.h" +#define BRW_MRF_NUM 16 +#define BRW_SIZE_OF_REG 32 + +static INLINE +GLboolean brw_is_arithmetic_inst(const struct brw_instruction *inst) +{ +   switch (inst->header.opcode) { +      case BRW_OPCODE_MOV: +      case BRW_OPCODE_SEL: +      case BRW_OPCODE_NOT: +      case BRW_OPCODE_AND: +      case BRW_OPCODE_OR: +      case BRW_OPCODE_XOR: +      case BRW_OPCODE_SHR: +      case BRW_OPCODE_SHL: +      case BRW_OPCODE_RSR: +      case BRW_OPCODE_RSL: +      case BRW_OPCODE_ADD: +      case BRW_OPCODE_MUL: +      case BRW_OPCODE_AVG: +      case BRW_OPCODE_FRC: +      case BRW_OPCODE_RNDU: +      case BRW_OPCODE_RNDD: +      case BRW_OPCODE_RNDE: +      case BRW_OPCODE_RNDZ: +      case BRW_OPCODE_MAC: +      case BRW_OPCODE_MACH: +      case BRW_OPCODE_LINE: +         return GL_TRUE; +      default: +         return GL_FALSE; +   } +} + +static const struct { +    char    *name; +    int	    nsrc; +    int	    ndst; +} inst_opcode[128] = { +    [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 }, +    [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 }, +    [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 }, +    [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 }, +    [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 }, +    [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 }, +    [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 }, +    [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 }, + +    [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 }, +    [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, +    [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, +    [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, +    [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 }, +    [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, +    [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, +    [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, +    [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 }, +    [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 }, +    [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 }, +    [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 }, + +    [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 }, +    [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 }, +    [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 }, +    [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 }, +    [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 }, +    [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 }, +    [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 }, +    [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 }, +    [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 }, +    [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 }, +    [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 }, + +    [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 }, +    [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, +    [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 }, +    [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, +    [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 }, +    [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 }, +    [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, +    [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 }, +    [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 }, +    [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 }, +    [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 }, +    [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 }, +    [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 }, +    [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 }, +    [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 }, +    [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 }, +    [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 }, +}; + +static const GLuint inst_stride[7] = { +    [0] = 0, +    [1] = 1, +    [2] = 2, +    [3] = 4, +    [4] = 8, +    [5] = 16, +    [6] = 32 +}; + +static const GLuint inst_type_size[8] = { +    [0] = 4, +    [1] = 4, +    [2] = 2, +    [3] = 2, +    [4] = 1, +    [5] = 1, +    [7] = 4 +}; + +#define BRW_MAX_OFFSET(x0,x1) ((x0) > (x1) ? (x0) : (x1)) +#define BRW_MIN_OFFSET(x0,x1) ((x0) < (x1) ? (x0) : (x1)); + +static INLINE GLboolean +brw_is_grf_written(const struct brw_instruction *inst, +                   int reg_index, int size, +                   int gen) +{ +   if (inst_opcode[inst->header.opcode].ndst == 0) +      return GL_FALSE; + +   if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT) +      if (inst->bits1.ia1.dest_reg_file == BRW_GENERAL_REGISTER_FILE) +         return GL_TRUE; + +   if (inst->bits1.da1.dest_reg_file != BRW_GENERAL_REGISTER_FILE) +      return GL_FALSE; + +   const int reg_start = reg_index * BRW_SIZE_OF_REG; +   const int reg_end = reg_start + size; + +   const int type_size = inst_type_size[inst->bits1.da1.dest_reg_type]; +   const int write_start = inst->bits1.da1.dest_reg_nr*BRW_SIZE_OF_REG +                         + inst->bits1.da1.dest_subreg_nr; +   int length, write_end; + +   /* SEND is specific */ +   if (inst->header.opcode == BRW_OPCODE_SEND) { +      if (gen >= 5) +         length = inst->bits3.generic_gen5.response_length*BRW_SIZE_OF_REG; +      else  +         length = inst->bits3.generic.response_length*BRW_SIZE_OF_REG; +   } +   else { +      length = 1 << inst->header.execution_size; +      length *= type_size; +      length *= inst->bits1.da1.dest_horiz_stride; +   } + +   /* If the two intervals intersect, we overwrite the register */ +   write_end = write_start + length; +   const int left = BRW_MAX_OFFSET(write_start, reg_start); +   const int right = BRW_MIN_OFFSET(write_end, reg_end); + +   return left < right; +} + +/* Specific path for message register since we need to handle the compr4 case */ +static INLINE GLboolean +brw_is_mrf_written(const struct brw_instruction *inst, int reg_index, int size) +{ +   if (inst_opcode[inst->header.opcode].ndst == 0) +      return GL_FALSE; + +   if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT) +      if (inst->bits1.ia1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE) +         return GL_TRUE; + +   if (inst->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE) +      return GL_FALSE; + +   const int reg_start = reg_index * BRW_SIZE_OF_REG; +   const int reg_end = reg_start + size; + +   const int mrf_index = inst->bits1.da1.dest_reg_nr & 0x0f; +   const int is_compr4 = inst->bits1.da1.dest_reg_nr & 0xf0; +   const int type_size = inst_type_size[inst->bits1.da1.dest_reg_type]; + +   /* We use compr4 with a size != 16 elements. Strange, we conservatively +    * consider that we are writing the register. +    */ +   if (is_compr4 && inst->header.execution_size != BRW_EXECUTE_16) +      return GL_TRUE; + +   GLboolean is_written = GL_FALSE; + +   /* Here we write mrf_{i} and mrf_{i+4}. So we read two times 8 elements */ +   if (is_compr4) { +      const int length = 8 * type_size * inst->bits1.da1.dest_horiz_stride; + +      /* First 8-way register */ +      const int write_start0 = mrf_index*BRW_SIZE_OF_REG +                             + inst->bits1.da1.dest_subreg_nr; +      const int write_end0 = write_start0 + length; + +      /* Second 8-way register */ +      const int write_start1 = (mrf_index+4)*BRW_SIZE_OF_REG +                             + inst->bits1.da1.dest_subreg_nr; +      const int write_end1 = write_start1 + length; + +      /* If the two intervals intersect, we overwrite the register */ +      const int left0 = BRW_MAX_OFFSET(write_start0, reg_start); +      const int right0 = BRW_MIN_OFFSET(write_end0, reg_end); +      const int left1 = BRW_MAX_OFFSET(write_start1, reg_start); +      const int right1 = BRW_MIN_OFFSET(write_end1, reg_end); + +      is_written = left0 < right0 || left1 < right1; +   } +   else { +      int length; +      length = 1 << inst->header.execution_size; +      length *= type_size; +      length *= inst->bits1.da1.dest_horiz_stride; + +      /* If the two intervals intersect, we write into the register */ +      const int write_start = inst->bits1.da1.dest_reg_nr*BRW_SIZE_OF_REG +                            + inst->bits1.da1.dest_subreg_nr; +      const int write_end = write_start + length; +      const int left = BRW_MAX_OFFSET(write_start, reg_start); +      const int right = BRW_MIN_OFFSET(write_end, reg_end);; + +      is_written = left < right; +   } + +   /* SEND may perform an implicit mov to a mrf register */ +   if (is_written == GL_FALSE && +       inst->header.opcode == BRW_OPCODE_SEND && +       inst->bits1.da1.src0_reg_file != 0) { + +      const int mrf_start = inst->header.destreg__conditionalmod; +      const int write_start = mrf_start * BRW_SIZE_OF_REG; +      const int write_end = write_start + BRW_SIZE_OF_REG; +      const int left = BRW_MAX_OFFSET(write_start, reg_start); +      const int right = BRW_MIN_OFFSET(write_end, reg_end);; +      is_written = left < right; +   } + +   return is_written; +} + +static INLINE GLboolean +brw_is_mrf_read(const struct brw_instruction *inst, +                int reg_index, int size, int gen) +{ +   if (inst->header.opcode != BRW_OPCODE_SEND) +      return GL_FALSE; +   if (inst->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT) +      return GL_TRUE; + +   const int reg_start = reg_index*BRW_SIZE_OF_REG; +   const int reg_end = reg_start + size; + +   int length, read_start, read_end; +   if (gen >= 5) +      length = inst->bits3.generic_gen5.msg_length*BRW_SIZE_OF_REG; +   else  +      length = inst->bits3.generic.msg_length*BRW_SIZE_OF_REG; + +   /* Look if SEND uses an implicit mov. In that case, we read one less register +    * (but we write it) +    */ +   if (inst->bits1.da1.src0_reg_file != 0) +      read_start = inst->header.destreg__conditionalmod; +   else { +      length--; +      read_start = inst->header.destreg__conditionalmod + 1; +   } +   read_start *= BRW_SIZE_OF_REG; +   read_end = read_start + length; + +   const int left = BRW_MAX_OFFSET(read_start, reg_start); +   const int right = BRW_MIN_OFFSET(read_end, reg_end); + +   return left < right; +} + +static INLINE GLboolean +brw_is_grf_read(const struct brw_instruction *inst, int reg_index, int size) +{ +   int i, j; +   if (inst_opcode[inst->header.opcode].nsrc == 0) +      return GL_FALSE; + +   /* Look at first source. We must take into account register regions to +    * monitor carefully the read. Note that we are a bit too conservative here +    * since we do not take into account the fact that some complete registers +    * may be skipped +    */ +   if (inst_opcode[inst->header.opcode].nsrc >= 1) { + +      if (inst->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT) +         if (inst->bits1.ia1.src0_reg_file == BRW_GENERAL_REGISTER_FILE) +            return GL_TRUE; +      if (inst->bits1.da1.src0_reg_file != BRW_GENERAL_REGISTER_FILE) +         return GL_FALSE; + +      const int reg_start = reg_index*BRW_SIZE_OF_REG; +      const int reg_end = reg_start + size; + +      /* See if at least one of this element intersects the interval */ +      const int type_size = inst_type_size[inst->bits1.da1.src0_reg_type]; +      const int elem_num = 1 << inst->header.execution_size; +      const int width = 1 << inst->bits2.da1.src0_width; +      const int row_num = elem_num >> inst->bits2.da1.src0_width; +      const int hs = type_size*inst_stride[inst->bits2.da1.src0_horiz_stride]; +      const int vs = type_size*inst_stride[inst->bits2.da1.src0_vert_stride]; +      int row_start = inst->bits2.da1.src0_reg_nr*BRW_SIZE_OF_REG +                    + inst->bits2.da1.src0_subreg_nr; +      for (j = 0; j < row_num; ++j) { +         int write_start = row_start; +         for (i = 0; i < width; ++i) { +            const int write_end = write_start + type_size; +            const int left = write_start > reg_start ? write_start : reg_start; +            const int right = write_end < reg_end ? write_end : reg_end; +            if (left < right) +               return GL_TRUE; +            write_start += hs; +         } +         row_start += vs; +      } +   } + +   /* Second src register */ +   if (inst_opcode[inst->header.opcode].nsrc >= 2) { + +      if (inst->bits3.da1.src1_address_mode != BRW_ADDRESS_DIRECT) +         if (inst->bits1.ia1.src1_reg_file == BRW_GENERAL_REGISTER_FILE) +            return GL_TRUE; +      if (inst->bits1.da1.src1_reg_file != BRW_GENERAL_REGISTER_FILE) +         return GL_FALSE; + +      const int reg_start = reg_index*BRW_SIZE_OF_REG; +      const int reg_end = reg_start + size; + +      /* See if at least one of this element intersects the interval */ +      const int type_size = inst_type_size[inst->bits1.da1.src1_reg_type]; +      const int elem_num = 1 << inst->header.execution_size; +      const int width = 1 << inst->bits3.da1.src1_width; +      const int row_num = elem_num >> inst->bits3.da1.src1_width; +      const int hs = type_size*inst_stride[inst->bits3.da1.src1_horiz_stride]; +      const int vs = type_size*inst_stride[inst->bits3.da1.src1_vert_stride]; +      int row_start = inst->bits3.da1.src1_reg_nr*BRW_SIZE_OF_REG +                    + inst->bits3.da1.src1_subreg_nr; +      for (j = 0; j < row_num; ++j) { +         int write_start = row_start; +         for (i = 0; i < width; ++i) { +            const int write_end = write_start + type_size; +            const int left = write_start > reg_start ? write_start : reg_start; +            const int right = write_end < reg_end ? write_end : reg_end; +            if (left < right) +               return GL_TRUE; +            write_start += hs; +         } +         row_start += vs; +      } +   } + +   return GL_FALSE; +} + +static INLINE GLboolean +brw_is_control_done(const struct brw_instruction *mov) { +   return +       mov->header.dependency_control != 0 || +       mov->header.thread_control != 0 || +       mov->header.mask_control != 0 || +       mov->header.saturate != 0 || +       mov->header.debug_control != 0; +} + +static INLINE GLboolean +brw_is_predicated(const struct brw_instruction *mov) { +   return mov->header.predicate_control != 0; +} + +static INLINE GLboolean +brw_is_grf_to_mrf_mov(const struct brw_instruction *mov, +                      int *mrf_index, +                      int *grf_index, +                      GLboolean *is_compr4) +{ +   if (brw_is_predicated(mov) || +       brw_is_control_done(mov) || +       mov->header.debug_control != 0) +      return GL_FALSE; + +   if (mov->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT || +       mov->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE || +       mov->bits1.da1.dest_reg_type != 7 || +       mov->bits1.da1.dest_horiz_stride != 1 || +       mov->bits1.da1.dest_subreg_nr != 0) +      return GL_FALSE; + +   if (mov->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT || +       mov->bits1.da1.src0_reg_file != BRW_GENERAL_REGISTER_FILE || +       mov->bits1.da1.src0_reg_type != 7 || +       mov->bits2.da1.src0_width != 3 || +       mov->bits2.da1.src0_horiz_stride != 1 || +       mov->bits2.da1.src0_vert_stride != 4 || +       mov->bits2.da1.src0_subreg_nr != 0 || +       mov->bits2.da1.src0_abs != 0 || +       mov->bits2.da1.src0_negate != 0) +      return GL_FALSE; + +   *grf_index = mov->bits2.da1.src0_reg_nr; +   *mrf_index = mov->bits1.da1.dest_reg_nr & 0x0f; +   *is_compr4 = (mov->bits1.da1.dest_reg_nr & 0xf0) != 0; +   return GL_TRUE; +} + +static INLINE GLboolean +brw_is_grf_straight_write(const struct brw_instruction *inst, int grf_index) +{ +   /* remark: no problem to predicate a SEL instruction */ +   if ((!brw_is_predicated(inst) || inst->header.opcode == BRW_OPCODE_SEL) && +       brw_is_control_done(inst) == GL_FALSE && +       inst->header.execution_size == 4 && +       inst->header.access_mode == BRW_ALIGN_1 && +       inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT && +       inst->bits1.da1.dest_reg_file == BRW_GENERAL_REGISTER_FILE && +       inst->bits1.da1.dest_reg_type == 7 && +       inst->bits1.da1.dest_horiz_stride == 1 && +       inst->bits1.da1.dest_reg_nr == grf_index && +       inst->bits1.da1.dest_subreg_nr == 0 && +       brw_is_arithmetic_inst(inst)) +      return GL_TRUE; + +   return GL_FALSE; +} + +static INLINE GLboolean +brw_inst_are_equal(const struct brw_instruction *src0, +                   const struct brw_instruction *src1) +{ +   const GLuint *field0 = (GLuint *) src0; +   const GLuint *field1 = (GLuint *) src1; +   return field0[0] == field1[0] && +          field0[1] == field1[1] && +          field0[2] == field1[2] && +          field0[3] == field1[3]; +} + +static INLINE void +brw_inst_copy(struct brw_instruction *dst, +              const struct brw_instruction *src) +{ +   GLuint *field_dst = (GLuint *) dst; +   const GLuint *field_src = (GLuint *) src; +   field_dst[0] = field_src[0]; +   field_dst[1] = field_src[1]; +   field_dst[2] = field_src[2]; +   field_dst[3] = field_src[3]; +} + +static void brw_remove_inst(struct brw_compile *p, const GLboolean *removeInst) +{ +   int i, nr_insn = 0, to = 0, from = 0; + +   for (from = 0; from < p->nr_insn; ++from) { +      if (removeInst[from]) +         continue; +      if(to != from) +         brw_inst_copy(p->store + to, p->store + from); +      to++; +   } + +   for (i = 0; i < p->nr_insn; ++i) +      if (removeInst[i] == GL_FALSE) +         nr_insn++; +   p->nr_insn = nr_insn; +} + +/* The gen code emitter generates a lot of duplications in the mrf-to-grf moves. + * Here, we monitor same mov mrf-to-grf instrutions and remove them as soon as + * none of the two operands have been written + */ +void brw_remove_duplicate_mrf_moves(struct brw_compile *p) +{ +   const int gen = p->brw->intel.gen; +   int i, j; + +   GLboolean *removeInst = calloc(sizeof(GLboolean), p->nr_insn); +   for (i = 0; i < p->nr_insn; i++) { +      if (removeInst[i]) +         continue; + +      const struct brw_instruction *mov = p->store + i; +      int mrf_index, grf_index; +      GLboolean is_compr4; + +      /* Only consider _straight_ grf-to-mrf moves */ +      if (!brw_is_grf_to_mrf_mov(mov, &mrf_index, &grf_index, &is_compr4)) +         continue; + +      const int mrf_index0 = mrf_index; +      const int mrf_index1 = is_compr4 ? mrf_index0+4 : mrf_index0+1; +      const int simd16_size = 2 * BRW_SIZE_OF_REG; + +      for (j = i + 1; j < p->nr_insn; j++) { +         const struct brw_instruction *inst = p->store + j; + +         if (brw_inst_are_equal(mov, inst)) { +            removeInst[j] = GL_TRUE; +            continue; +         } + +         if (brw_is_grf_written(inst, grf_index, simd16_size, gen) || +             brw_is_mrf_written(inst, mrf_index0, BRW_SIZE_OF_REG) || +             brw_is_mrf_written(inst, mrf_index1, BRW_SIZE_OF_REG)) +            break; +      } +   } + +   brw_remove_inst(p, removeInst); +   free(removeInst); +} + +void brw_remove_mrf_to_grf_moves(struct brw_compile *p) +{ +   int i, j, prev; +   struct brw_context *brw = p->brw; +   const int gen = brw->intel.gen; +   const int simd16_size = 2*BRW_SIZE_OF_REG; + +   GLboolean *removeInst = calloc(sizeof(GLboolean), p->nr_insn); +   assert(removeInst); + +   for (i = 0; i < p->nr_insn; i++) { +      if (removeInst[i]) +         continue; + +      struct brw_instruction *grf_inst = NULL; +      const struct brw_instruction *mov = p->store + i; +      int mrf_index, grf_index; +      GLboolean is_compr4; + +      /* Only consider _straight_ grf-to-mrf moves */ +      if (!brw_is_grf_to_mrf_mov(mov, &mrf_index, &grf_index, &is_compr4)) +         continue; + +      /* Using comp4 enables a stride of 4 for this instruction */ +      const int mrf_index0 = mrf_index; +      const int mrf_index1 = is_compr4 ? mrf_index+4 : mrf_index+1; + +      /* Look where the register has been set */ +      prev = i; +      GLboolean potential_remove = GL_FALSE; +      while (prev--) { + +         /* If _one_ instruction writes the grf, we try to remove the mov */ +         struct brw_instruction *inst = p->store + prev; +         if (brw_is_grf_straight_write(inst, grf_index)) { +            potential_remove = GL_TRUE; +            grf_inst = inst; +            break; +         } + +      } + +      if (potential_remove == GL_FALSE) +         continue; +      removeInst[i] = GL_TRUE; + +      /* Monitor first the section of code between the grf computation and the +       * mov. Here we cannot read or write both mrf and grf register +       */ +      for (j = prev + 1; j < i; ++j) { +         struct brw_instruction *inst = p->store + j; +         if (removeInst[j]) +            continue; +         if (brw_is_grf_written(inst, grf_index, simd16_size, gen)   || +             brw_is_grf_read(inst, grf_index, simd16_size)           || +             brw_is_mrf_written(inst, mrf_index0, BRW_SIZE_OF_REG)   || +             brw_is_mrf_written(inst, mrf_index1, BRW_SIZE_OF_REG)   || +             brw_is_mrf_read(inst, mrf_index0, BRW_SIZE_OF_REG, gen) || +             brw_is_mrf_read(inst, mrf_index1, BRW_SIZE_OF_REG, gen)) { +            removeInst[i] = GL_FALSE; +            break; +         } +      } + +      /* After the mov, we can read or write the mrf. If the grf is overwritten, +       * we are done +       */ +      for (j = i + 1; j < p->nr_insn; ++j) { +         struct brw_instruction *inst = p->store + j; +         if (removeInst[j]) +            continue; + +         if (brw_is_grf_read(inst, grf_index, simd16_size)) { +            removeInst[i] = GL_FALSE; +            break; +         } + +         if (brw_is_grf_straight_write(inst, grf_index)) +            break; +      } + +      /* Note that with the top down traversal, we can safely pacth the mov +       * instruction +       */ +      if (removeInst[i]) { +         grf_inst->bits1.da1.dest_reg_file = mov->bits1.da1.dest_reg_file; +         grf_inst->bits1.da1.dest_reg_nr = mov->bits1.da1.dest_reg_nr; +      } +   } + +   brw_remove_inst(p, removeInst); +   free(removeInst); +} +  static GLboolean  is_single_channel_dp4(struct brw_instruction *insn)  { diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index d10e1c70d2..b09071fe97 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -1459,623 +1459,6 @@ static void spill_values( struct brw_wm_compile *c,  	 emit_spill(c, values[i].hw_reg, values[i].spill_slot);  } -#define BRW_MRF_NUM 16 -#define BRW_SIZE_OF_REG 32 - -static INLINE -GLboolean brw_is_arithmetic_inst(const struct brw_instruction *inst) -{ -   switch (inst->header.opcode) { -      case BRW_OPCODE_MOV: -      case BRW_OPCODE_SEL: -      case BRW_OPCODE_NOT: -      case BRW_OPCODE_AND: -      case BRW_OPCODE_OR: -      case BRW_OPCODE_XOR: -      case BRW_OPCODE_SHR: -      case BRW_OPCODE_SHL: -      case BRW_OPCODE_RSR: -      case BRW_OPCODE_RSL: -      case BRW_OPCODE_ADD: -      case BRW_OPCODE_MUL: -      case BRW_OPCODE_AVG: -      case BRW_OPCODE_FRC: -      case BRW_OPCODE_RNDU: -      case BRW_OPCODE_RNDD: -      case BRW_OPCODE_RNDE: -      case BRW_OPCODE_RNDZ: -      case BRW_OPCODE_MAC: -      case BRW_OPCODE_MACH: -      case BRW_OPCODE_LINE: -         return GL_TRUE; -      default: -         return GL_FALSE; -   } -} - -static const struct { -    char    *name; -    int	    nsrc; -    int	    ndst; -} inst_opcode[128] = { -    [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 }, -    [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 }, -    [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 }, -    [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 }, -    [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 }, -    [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 }, -    [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 }, -    [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 }, - -    [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 }, -    [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, -    [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, -    [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, -    [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 }, -    [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, -    [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, -    [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, -    [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 }, -    [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 }, -    [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 }, -    [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 }, - -    [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 }, -    [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 }, -    [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 }, -    [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 }, -    [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 }, -    [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 }, -    [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 }, -    [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 }, -    [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 }, -    [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 }, -    [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 }, - -    [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 }, -    [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, -    [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 }, -    [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, -    [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 }, -    [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 }, -    [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, -    [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 }, -    [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 }, -    [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 }, -    [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 }, -    [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 }, -    [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 }, -    [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 }, -    [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 }, -    [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 }, -    [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 }, -}; - -static const GLuint inst_stride[7] = { -    [0] = 0, -    [1] = 1, -    [2] = 2, -    [3] = 4, -    [4] = 8, -    [5] = 16, -    [6] = 32 -}; - -static const GLuint inst_type_size[8] = { -    [0] = 4, -    [1] = 4, -    [2] = 2, -    [3] = 2, -    [4] = 1, -    [5] = 1, -    [7] = 4 -}; - -#define BRW_MAX_OFFSET(x0,x1) ((x0) > (x1) ? (x0) : (x1)) -#define BRW_MIN_OFFSET(x0,x1) ((x0) < (x1) ? (x0) : (x1)); - -static INLINE GLboolean -brw_is_grf_written(const struct brw_instruction *inst, -                   int reg_index, int size, -                   int gen) -{ -   if (inst_opcode[inst->header.opcode].ndst == 0) -      return GL_FALSE; - -   if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT) -      if (inst->bits1.ia1.dest_reg_file == BRW_GENERAL_REGISTER_FILE) -         return GL_TRUE; - -   if (inst->bits1.da1.dest_reg_file != BRW_GENERAL_REGISTER_FILE) -      return GL_FALSE; - -   const int reg_start = reg_index * BRW_SIZE_OF_REG; -   const int reg_end = reg_start + size; - -   const int type_size = inst_type_size[inst->bits1.da1.dest_reg_type]; -   const int write_start = inst->bits1.da1.dest_reg_nr*BRW_SIZE_OF_REG -                         + inst->bits1.da1.dest_subreg_nr; -   int length, write_end; - -   /* SEND is specific */ -   if (inst->header.opcode == BRW_OPCODE_SEND) { -      if (gen >= 5) -         length = inst->bits3.generic_gen5.response_length*BRW_SIZE_OF_REG; -      else  -         length = inst->bits3.generic.response_length*BRW_SIZE_OF_REG; -   } -   else { -      length = 1 << inst->header.execution_size; -      length *= type_size; -      length *= inst->bits1.da1.dest_horiz_stride; -   } - -   /* If the two intervals intersect, we overwrite the register */ -   write_end = write_start + length; -   const int left = BRW_MAX_OFFSET(write_start, reg_start); -   const int right = BRW_MIN_OFFSET(write_end, reg_end); - -   return left < right; -} - -/* Specific path for message register since we need to handle the compr4 case */ -static INLINE GLboolean -brw_is_mrf_written(const struct brw_instruction *inst, int reg_index, int size) -{ -   if (inst_opcode[inst->header.opcode].ndst == 0) -      return GL_FALSE; - -   if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT) -      if (inst->bits1.ia1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE) -         return GL_TRUE; - -   if (inst->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE) -      return GL_FALSE; - -   const int reg_start = reg_index * BRW_SIZE_OF_REG; -   const int reg_end = reg_start + size; - -   const int mrf_index = inst->bits1.da1.dest_reg_nr & 0x0f; -   const int is_compr4 = inst->bits1.da1.dest_reg_nr & 0xf0; -   const int type_size = inst_type_size[inst->bits1.da1.dest_reg_type]; - -   /* We use compr4 with a size != 16 elements. Strange, we conservatively -    * consider that we are writing the register. -    */ -   if (is_compr4 && inst->header.execution_size != BRW_EXECUTE_16) -      return GL_TRUE; - -   GLboolean is_written = GL_FALSE; - -   /* Here we write mrf_{i} and mrf_{i+4}. So we read two times 8 elements */ -   if (is_compr4) { -      const int length = 8 * type_size * inst->bits1.da1.dest_horiz_stride; - -      /* First 8-way register */ -      const int write_start0 = mrf_index*BRW_SIZE_OF_REG -                             + inst->bits1.da1.dest_subreg_nr; -      const int write_end0 = write_start0 + length; - -      /* Second 8-way register */ -      const int write_start1 = (mrf_index+4)*BRW_SIZE_OF_REG -                             + inst->bits1.da1.dest_subreg_nr; -      const int write_end1 = write_start1 + length; - -      /* If the two intervals intersect, we overwrite the register */ -      const int left0 = BRW_MAX_OFFSET(write_start0, reg_start); -      const int right0 = BRW_MIN_OFFSET(write_end0, reg_end); -      const int left1 = BRW_MAX_OFFSET(write_start1, reg_start); -      const int right1 = BRW_MIN_OFFSET(write_end1, reg_end); - -      is_written = left0 < right0 || left1 < right1; -   } -   else { -      int length; -      length = 1 << inst->header.execution_size; -      length *= type_size; -      length *= inst->bits1.da1.dest_horiz_stride; - -      /* If the two intervals intersect, we write into the register */ -      const int write_start = inst->bits1.da1.dest_reg_nr*BRW_SIZE_OF_REG -                            + inst->bits1.da1.dest_subreg_nr; -      const int write_end = write_start + length; -      const int left = BRW_MAX_OFFSET(write_start, reg_start); -      const int right = BRW_MIN_OFFSET(write_end, reg_end);; - -      is_written = left < right; -   } - -   /* SEND may perform an implicit mov to a mrf register */ -   if (is_written == GL_FALSE && -       inst->header.opcode == BRW_OPCODE_SEND && -       inst->bits1.da1.src0_reg_file != 0) { - -      const int mrf_start = inst->header.destreg__conditionalmod; -      const int write_start = mrf_start * BRW_SIZE_OF_REG; -      const int write_end = write_start + BRW_SIZE_OF_REG; -      const int left = BRW_MAX_OFFSET(write_start, reg_start); -      const int right = BRW_MIN_OFFSET(write_end, reg_end);; -      is_written = left < right; -   } - -   return is_written; -} - -static INLINE GLboolean -brw_is_mrf_read(const struct brw_instruction *inst, -                int reg_index, int size, int gen) -{ -   if (inst->header.opcode != BRW_OPCODE_SEND) -      return GL_FALSE; -   if (inst->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT) -      return GL_TRUE; - -   const int reg_start = reg_index*BRW_SIZE_OF_REG; -   const int reg_end = reg_start + size; - -   int length, read_start, read_end; -   if (gen >= 5) -      length = inst->bits3.generic_gen5.msg_length*BRW_SIZE_OF_REG; -   else  -      length = inst->bits3.generic.msg_length*BRW_SIZE_OF_REG; - -   /* Look if SEND uses an implicit mov. In that case, we read one less register -    * (but we write it) -    */ -   if (inst->bits1.da1.src0_reg_file != 0) -      read_start = inst->header.destreg__conditionalmod; -   else { -      length--; -      read_start = inst->header.destreg__conditionalmod + 1; -   } -   read_start *= BRW_SIZE_OF_REG; -   read_end = read_start + length; - -   const int left = BRW_MAX_OFFSET(read_start, reg_start); -   const int right = BRW_MIN_OFFSET(read_end, reg_end); - -   return left < right; -} - -static INLINE GLboolean -brw_is_grf_read(const struct brw_instruction *inst, int reg_index, int size) -{ -   int i, j; -   if (inst_opcode[inst->header.opcode].nsrc == 0) -      return GL_FALSE; - -   /* Look at first source. We must take into account register regions to -    * monitor carefully the read. Note that we are a bit too conservative here -    * since we do not take into account the fact that some complete registers -    * may be skipped -    */ -   if (inst_opcode[inst->header.opcode].nsrc >= 1) { - -      if (inst->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT) -         if (inst->bits1.ia1.src0_reg_file == BRW_GENERAL_REGISTER_FILE) -            return GL_TRUE; -      if (inst->bits1.da1.src0_reg_file != BRW_GENERAL_REGISTER_FILE) -         return GL_FALSE; - -      const int reg_start = reg_index*BRW_SIZE_OF_REG; -      const int reg_end = reg_start + size; - -      /* See if at least one of this element intersects the interval */ -      const int type_size = inst_type_size[inst->bits1.da1.src0_reg_type]; -      const int elem_num = 1 << inst->header.execution_size; -      const int width = 1 << inst->bits2.da1.src0_width; -      const int row_num = elem_num >> inst->bits2.da1.src0_width; -      const int hs = type_size*inst_stride[inst->bits2.da1.src0_horiz_stride]; -      const int vs = type_size*inst_stride[inst->bits2.da1.src0_vert_stride]; -      int row_start = inst->bits2.da1.src0_reg_nr*BRW_SIZE_OF_REG -                    + inst->bits2.da1.src0_subreg_nr; -      for (j = 0; j < row_num; ++j) { -         int write_start = row_start; -         for (i = 0; i < width; ++i) { -            const int write_end = write_start + type_size; -            const int left = write_start > reg_start ? write_start : reg_start; -            const int right = write_end < reg_end ? write_end : reg_end; -            if (left < right) -               return GL_TRUE; -            write_start += hs; -         } -         row_start += vs; -      } -   } - -   /* Second src register */ -   if (inst_opcode[inst->header.opcode].nsrc >= 2) { - -      if (inst->bits3.da1.src1_address_mode != BRW_ADDRESS_DIRECT) -         if (inst->bits1.ia1.src1_reg_file == BRW_GENERAL_REGISTER_FILE) -            return GL_TRUE; -      if (inst->bits1.da1.src1_reg_file != BRW_GENERAL_REGISTER_FILE) -         return GL_FALSE; - -      const int reg_start = reg_index*BRW_SIZE_OF_REG; -      const int reg_end = reg_start + size; - -      /* See if at least one of this element intersects the interval */ -      const int type_size = inst_type_size[inst->bits1.da1.src1_reg_type]; -      const int elem_num = 1 << inst->header.execution_size; -      const int width = 1 << inst->bits3.da1.src1_width; -      const int row_num = elem_num >> inst->bits3.da1.src1_width; -      const int hs = type_size*inst_stride[inst->bits3.da1.src1_horiz_stride]; -      const int vs = type_size*inst_stride[inst->bits3.da1.src1_vert_stride]; -      int row_start = inst->bits3.da1.src1_reg_nr*BRW_SIZE_OF_REG -                    + inst->bits3.da1.src1_subreg_nr; -      for (j = 0; j < row_num; ++j) { -         int write_start = row_start; -         for (i = 0; i < width; ++i) { -            const int write_end = write_start + type_size; -            const int left = write_start > reg_start ? write_start : reg_start; -            const int right = write_end < reg_end ? write_end : reg_end; -            if (left < right) -               return GL_TRUE; -            write_start += hs; -         } -         row_start += vs; -      } -   } - -   return GL_FALSE; -} - -static INLINE GLboolean -brw_is_control_done(const struct brw_instruction *mov) { -   return -       mov->header.dependency_control != 0 || -       mov->header.thread_control != 0 || -       mov->header.mask_control != 0 || -       mov->header.saturate != 0 || -       mov->header.debug_control != 0; -} - -static INLINE GLboolean -brw_is_predicated(const struct brw_instruction *mov) { -   return mov->header.predicate_control != 0; -} - -static INLINE GLboolean -brw_is_grf_to_mrf_mov(const struct brw_instruction *mov, -                      int *mrf_index, -                      int *grf_index, -                      GLboolean *is_compr4) -{ -   if (brw_is_predicated(mov) || -       brw_is_control_done(mov) || -       mov->header.debug_control != 0) -      return GL_FALSE; - -   if (mov->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT || -       mov->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE || -       mov->bits1.da1.dest_reg_type != 7 || -       mov->bits1.da1.dest_horiz_stride != 1 || -       mov->bits1.da1.dest_subreg_nr != 0) -      return GL_FALSE; - -   if (mov->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT || -       mov->bits1.da1.src0_reg_file != BRW_GENERAL_REGISTER_FILE || -       mov->bits1.da1.src0_reg_type != 7 || -       mov->bits2.da1.src0_width != 3 || -       mov->bits2.da1.src0_horiz_stride != 1 || -       mov->bits2.da1.src0_vert_stride != 4 || -       mov->bits2.da1.src0_subreg_nr != 0 || -       mov->bits2.da1.src0_abs != 0 || -       mov->bits2.da1.src0_negate != 0) -      return GL_FALSE; - -   *grf_index = mov->bits2.da1.src0_reg_nr; -   *mrf_index = mov->bits1.da1.dest_reg_nr & 0x0f; -   *is_compr4 = (mov->bits1.da1.dest_reg_nr & 0xf0) != 0; -   return GL_TRUE; -} - -static INLINE GLboolean -brw_is_grf_straight_write(const struct brw_instruction *inst, int grf_index) -{ -   /* remark: no problem to predicate a SEL instruction */ -   if ((!brw_is_predicated(inst) || inst->header.opcode == BRW_OPCODE_SEL) && -       brw_is_control_done(inst) == GL_FALSE && -       inst->header.execution_size == 4 && -       inst->header.access_mode == BRW_ALIGN_1 && -       inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT && -       inst->bits1.da1.dest_reg_file == BRW_GENERAL_REGISTER_FILE && -       inst->bits1.da1.dest_reg_type == 7 && -       inst->bits1.da1.dest_horiz_stride == 1 && -       inst->bits1.da1.dest_reg_nr == grf_index && -       inst->bits1.da1.dest_subreg_nr == 0 && -       brw_is_arithmetic_inst(inst)) -      return GL_TRUE; - -   return GL_FALSE; -} - -static INLINE GLboolean -brw_inst_are_equal(const struct brw_instruction *src0, -                   const struct brw_instruction *src1) -{ -   const GLuint *field0 = (GLuint *) src0; -   const GLuint *field1 = (GLuint *) src1; -   return field0[0] == field1[0] && -          field0[1] == field1[1] && -          field0[2] == field1[2] && -          field0[3] == field1[3]; -} - -static INLINE void -brw_inst_copy(struct brw_instruction *dst, -              const struct brw_instruction *src) -{ -   GLuint *field_dst = (GLuint *) dst; -   const GLuint *field_src = (GLuint *) src; -   field_dst[0] = field_src[0]; -   field_dst[1] = field_src[1]; -   field_dst[2] = field_src[2]; -   field_dst[3] = field_src[3]; -} - -static void brw_remove_inst(struct brw_compile *p, const GLboolean *removeInst) -{ -   int i, nr_insn = 0, to = 0, from = 0; - -   for (from = 0; from < p->nr_insn; ++from) { -      if (removeInst[from]) -         continue; -      if(to != from) -         brw_inst_copy(p->store + to, p->store + from); -      to++; -   } - -   for (i = 0; i < p->nr_insn; ++i) -      if (removeInst[i] == GL_FALSE) -         nr_insn++; -   p->nr_insn = nr_insn; -} - -/* The gen code emitter generates a lot of duplications in the mrf-to-grf moves. - * Here, we monitor same mov mrf-to-grf instrutions and remove them as soon as - * none of the two operands have been written - */ -static void brw_remove_duplicate_mrf_moves(struct brw_wm_compile *c) -{ -   struct brw_compile *p = &c->func; -   const int gen = p->brw->intel.gen; -   int i, j; - -   GLboolean *removeInst = calloc(sizeof(GLboolean), p->nr_insn); -   for (i = 0; i < p->nr_insn; i++) { -      if (removeInst[i]) -         continue; - -      const struct brw_instruction *mov = p->store + i; -      int mrf_index, grf_index; -      GLboolean is_compr4; - -      /* Only consider _straight_ grf-to-mrf moves */ -      if (!brw_is_grf_to_mrf_mov(mov, &mrf_index, &grf_index, &is_compr4)) -         continue; - -      const int mrf_index0 = mrf_index; -      const int mrf_index1 = is_compr4 ? mrf_index0+4 : mrf_index0+1; -      const int simd16_size = 2 * BRW_SIZE_OF_REG; - -      for (j = i + 1; j < p->nr_insn; j++) { -         const struct brw_instruction *inst = p->store + j; - -         if (brw_inst_are_equal(mov, inst)) { -            removeInst[j] = GL_TRUE; -            continue; -         } - -         if (brw_is_grf_written(inst, grf_index, simd16_size, gen) || -             brw_is_mrf_written(inst, mrf_index0, BRW_SIZE_OF_REG) || -             brw_is_mrf_written(inst, mrf_index1, BRW_SIZE_OF_REG)) -            break; -      } -   } - -   brw_remove_inst(p, removeInst); -   free(removeInst); -} - -static void brw_remove_mrf_to_grf_moves(struct brw_wm_compile *c) -{ -   int i, j, prev; -   struct brw_compile *p = &c->func; -   struct brw_context *brw = p->brw; -   const int gen = brw->intel.gen; -   const int simd16_size = 2*BRW_SIZE_OF_REG; - -   if (c->dispatch_width != 16 || brw->has_compr4 == GL_FALSE) -      return; - -   GLboolean *removeInst = calloc(sizeof(GLboolean), p->nr_insn); -   assert(removeInst); - -   for (i = 0; i < p->nr_insn; i++) { -      if (removeInst[i]) -         continue; - -      struct brw_instruction *grf_inst = NULL; -      const struct brw_instruction *mov = p->store + i; -      int mrf_index, grf_index; -      GLboolean is_compr4; - -      /* Only consider _straight_ grf-to-mrf moves */ -      if (!brw_is_grf_to_mrf_mov(mov, &mrf_index, &grf_index, &is_compr4)) -         continue; - -      /* Using comp4 enables a stride of 4 for this instruction */ -      const int mrf_index0 = mrf_index; -      const int mrf_index1 = is_compr4 ? mrf_index+4 : mrf_index+1; - -      /* Look where the register has been set */ -      prev = i; -      GLboolean potential_remove = GL_FALSE; -      while (prev--) { - -         /* If _one_ instruction writes the grf, we try to remove the mov */ -         struct brw_instruction *inst = p->store + prev; -         if (brw_is_grf_straight_write(inst, grf_index)) { -            potential_remove = GL_TRUE; -            grf_inst = inst; -            break; -         } - -      } - -      if (potential_remove == GL_FALSE) -         continue; -      removeInst[i] = GL_TRUE; - -      /* Monitor first the section of code between the grf computation and the -       * mov. Here we cannot read or write both mrf and grf register -       */ -      for (j = prev + 1; j < i; ++j) { -         struct brw_instruction *inst = p->store + j; -         if (removeInst[j]) -            continue; -         if (brw_is_grf_written(inst, grf_index, simd16_size, gen)   || -             brw_is_grf_read(inst, grf_index, simd16_size)           || -             brw_is_mrf_written(inst, mrf_index0, BRW_SIZE_OF_REG)   || -             brw_is_mrf_written(inst, mrf_index1, BRW_SIZE_OF_REG)   || -             brw_is_mrf_read(inst, mrf_index0, BRW_SIZE_OF_REG, gen) || -             brw_is_mrf_read(inst, mrf_index1, BRW_SIZE_OF_REG, gen)) { -            removeInst[i] = GL_FALSE; -            break; -         } -      } - -      /* After the mov, we can read or write the mrf. If the grf is overwritten, -       * we are done -       */ -      for (j = i + 1; j < p->nr_insn; ++j) { -         struct brw_instruction *inst = p->store + j; -         if (removeInst[j]) -            continue; - -         if (brw_is_grf_read(inst, grf_index, simd16_size)) { -            removeInst[i] = GL_FALSE; -            break; -         } - -         if (brw_is_grf_straight_write(inst, grf_index)) -            break; -      } - -      /* Note that with the top down traversal, we can safely pacth the mov -       * instruction -       */ -      if (removeInst[i]) { -         grf_inst->bits1.da1.dest_reg_file = mov->bits1.da1.dest_reg_file; -         grf_inst->bits1.da1.dest_reg_nr = mov->bits1.da1.dest_reg_nr; -      } -   } - -   brw_remove_inst(p, removeInst); -   free(removeInst); -}  /* Emit the fragment program instructions here.   */ @@ -2331,8 +1714,9 @@ void brw_wm_emit( struct brw_wm_compile *c )     /* Only properly tested on ILK */     if (p->brw->intel.gen == 5) { -     brw_remove_duplicate_mrf_moves(c); -     brw_remove_mrf_to_grf_moves(c); +     brw_remove_duplicate_mrf_moves(p); +     if (c->dispatch_width == 16) +	brw_remove_mrf_to_grf_moves(p);     }     if (INTEL_DEBUG & DEBUG_WM) { | 
