From 22f839292f48a47601e1b97a7f4679018c42d0ed Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 26 Jul 2010 12:41:39 -0700 Subject: i965: Move the GRF-to-MRF optimizations to brw_optimize.c. --- src/mesa/drivers/dri/i965/brw_eu.h | 2 + src/mesa/drivers/dri/i965/brw_optimize.c | 613 ++++++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_wm_emit.c | 622 +------------------------------ 3 files changed, 618 insertions(+), 619 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 31ff86cf73..bc151738f6 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -984,5 +984,7 @@ void brw_set_src1( struct brw_instruction *insn, /* brw_optimize.c */ void brw_optimize(struct brw_compile *p); +void brw_remove_duplicate_mrf_moves(struct brw_compile *p); +void brw_remove_mrf_to_grf_moves(struct brw_compile *p); #endif diff --git a/src/mesa/drivers/dri/i965/brw_optimize.c b/src/mesa/drivers/dri/i965/brw_optimize.c index a364b15820..136dbbd73a 100644 --- a/src/mesa/drivers/dri/i965/brw_optimize.c +++ b/src/mesa/drivers/dri/i965/brw_optimize.c @@ -32,6 +32,619 @@ #include "brw_defines.h" #include "brw_eu.h" +#define BRW_MRF_NUM 16 +#define BRW_SIZE_OF_REG 32 + +static INLINE +GLboolean brw_is_arithmetic_inst(const struct brw_instruction *inst) +{ + switch (inst->header.opcode) { + case BRW_OPCODE_MOV: + case BRW_OPCODE_SEL: + case BRW_OPCODE_NOT: + case BRW_OPCODE_AND: + case BRW_OPCODE_OR: + case BRW_OPCODE_XOR: + case BRW_OPCODE_SHR: + case BRW_OPCODE_SHL: + case BRW_OPCODE_RSR: + case BRW_OPCODE_RSL: + case BRW_OPCODE_ADD: + case BRW_OPCODE_MUL: + case BRW_OPCODE_AVG: + case BRW_OPCODE_FRC: + case BRW_OPCODE_RNDU: + case BRW_OPCODE_RNDD: + case BRW_OPCODE_RNDE: + case BRW_OPCODE_RNDZ: + case BRW_OPCODE_MAC: + case BRW_OPCODE_MACH: + case BRW_OPCODE_LINE: + return GL_TRUE; + default: + return GL_FALSE; + } +} + +static const struct { + char *name; + int nsrc; + int ndst; +} inst_opcode[128] = { + [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 }, + + [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 }, +}; + +static const GLuint inst_stride[7] = { + [0] = 0, + [1] = 1, + [2] = 2, + [3] = 4, + [4] = 8, + [5] = 16, + [6] = 32 +}; + +static const GLuint inst_type_size[8] = { + [0] = 4, + [1] = 4, + [2] = 2, + [3] = 2, + [4] = 1, + [5] = 1, + [7] = 4 +}; + +#define BRW_MAX_OFFSET(x0,x1) ((x0) > (x1) ? (x0) : (x1)) +#define BRW_MIN_OFFSET(x0,x1) ((x0) < (x1) ? (x0) : (x1)); + +static INLINE GLboolean +brw_is_grf_written(const struct brw_instruction *inst, + int reg_index, int size, + int gen) +{ + if (inst_opcode[inst->header.opcode].ndst == 0) + return GL_FALSE; + + if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT) + if (inst->bits1.ia1.dest_reg_file == BRW_GENERAL_REGISTER_FILE) + return GL_TRUE; + + if (inst->bits1.da1.dest_reg_file != BRW_GENERAL_REGISTER_FILE) + return GL_FALSE; + + const int reg_start = reg_index * BRW_SIZE_OF_REG; + const int reg_end = reg_start + size; + + const int type_size = inst_type_size[inst->bits1.da1.dest_reg_type]; + const int write_start = inst->bits1.da1.dest_reg_nr*BRW_SIZE_OF_REG + + inst->bits1.da1.dest_subreg_nr; + int length, write_end; + + /* SEND is specific */ + if (inst->header.opcode == BRW_OPCODE_SEND) { + if (gen >= 5) + length = inst->bits3.generic_gen5.response_length*BRW_SIZE_OF_REG; + else + length = inst->bits3.generic.response_length*BRW_SIZE_OF_REG; + } + else { + length = 1 << inst->header.execution_size; + length *= type_size; + length *= inst->bits1.da1.dest_horiz_stride; + } + + /* If the two intervals intersect, we overwrite the register */ + write_end = write_start + length; + const int left = BRW_MAX_OFFSET(write_start, reg_start); + const int right = BRW_MIN_OFFSET(write_end, reg_end); + + return left < right; +} + +/* Specific path for message register since we need to handle the compr4 case */ +static INLINE GLboolean +brw_is_mrf_written(const struct brw_instruction *inst, int reg_index, int size) +{ + if (inst_opcode[inst->header.opcode].ndst == 0) + return GL_FALSE; + + if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT) + if (inst->bits1.ia1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE) + return GL_TRUE; + + if (inst->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE) + return GL_FALSE; + + const int reg_start = reg_index * BRW_SIZE_OF_REG; + const int reg_end = reg_start + size; + + const int mrf_index = inst->bits1.da1.dest_reg_nr & 0x0f; + const int is_compr4 = inst->bits1.da1.dest_reg_nr & 0xf0; + const int type_size = inst_type_size[inst->bits1.da1.dest_reg_type]; + + /* We use compr4 with a size != 16 elements. Strange, we conservatively + * consider that we are writing the register. + */ + if (is_compr4 && inst->header.execution_size != BRW_EXECUTE_16) + return GL_TRUE; + + GLboolean is_written = GL_FALSE; + + /* Here we write mrf_{i} and mrf_{i+4}. So we read two times 8 elements */ + if (is_compr4) { + const int length = 8 * type_size * inst->bits1.da1.dest_horiz_stride; + + /* First 8-way register */ + const int write_start0 = mrf_index*BRW_SIZE_OF_REG + + inst->bits1.da1.dest_subreg_nr; + const int write_end0 = write_start0 + length; + + /* Second 8-way register */ + const int write_start1 = (mrf_index+4)*BRW_SIZE_OF_REG + + inst->bits1.da1.dest_subreg_nr; + const int write_end1 = write_start1 + length; + + /* If the two intervals intersect, we overwrite the register */ + const int left0 = BRW_MAX_OFFSET(write_start0, reg_start); + const int right0 = BRW_MIN_OFFSET(write_end0, reg_end); + const int left1 = BRW_MAX_OFFSET(write_start1, reg_start); + const int right1 = BRW_MIN_OFFSET(write_end1, reg_end); + + is_written = left0 < right0 || left1 < right1; + } + else { + int length; + length = 1 << inst->header.execution_size; + length *= type_size; + length *= inst->bits1.da1.dest_horiz_stride; + + /* If the two intervals intersect, we write into the register */ + const int write_start = inst->bits1.da1.dest_reg_nr*BRW_SIZE_OF_REG + + inst->bits1.da1.dest_subreg_nr; + const int write_end = write_start + length; + const int left = BRW_MAX_OFFSET(write_start, reg_start); + const int right = BRW_MIN_OFFSET(write_end, reg_end);; + + is_written = left < right; + } + + /* SEND may perform an implicit mov to a mrf register */ + if (is_written == GL_FALSE && + inst->header.opcode == BRW_OPCODE_SEND && + inst->bits1.da1.src0_reg_file != 0) { + + const int mrf_start = inst->header.destreg__conditionalmod; + const int write_start = mrf_start * BRW_SIZE_OF_REG; + const int write_end = write_start + BRW_SIZE_OF_REG; + const int left = BRW_MAX_OFFSET(write_start, reg_start); + const int right = BRW_MIN_OFFSET(write_end, reg_end);; + is_written = left < right; + } + + return is_written; +} + +static INLINE GLboolean +brw_is_mrf_read(const struct brw_instruction *inst, + int reg_index, int size, int gen) +{ + if (inst->header.opcode != BRW_OPCODE_SEND) + return GL_FALSE; + if (inst->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT) + return GL_TRUE; + + const int reg_start = reg_index*BRW_SIZE_OF_REG; + const int reg_end = reg_start + size; + + int length, read_start, read_end; + if (gen >= 5) + length = inst->bits3.generic_gen5.msg_length*BRW_SIZE_OF_REG; + else + length = inst->bits3.generic.msg_length*BRW_SIZE_OF_REG; + + /* Look if SEND uses an implicit mov. In that case, we read one less register + * (but we write it) + */ + if (inst->bits1.da1.src0_reg_file != 0) + read_start = inst->header.destreg__conditionalmod; + else { + length--; + read_start = inst->header.destreg__conditionalmod + 1; + } + read_start *= BRW_SIZE_OF_REG; + read_end = read_start + length; + + const int left = BRW_MAX_OFFSET(read_start, reg_start); + const int right = BRW_MIN_OFFSET(read_end, reg_end); + + return left < right; +} + +static INLINE GLboolean +brw_is_grf_read(const struct brw_instruction *inst, int reg_index, int size) +{ + int i, j; + if (inst_opcode[inst->header.opcode].nsrc == 0) + return GL_FALSE; + + /* Look at first source. We must take into account register regions to + * monitor carefully the read. Note that we are a bit too conservative here + * since we do not take into account the fact that some complete registers + * may be skipped + */ + if (inst_opcode[inst->header.opcode].nsrc >= 1) { + + if (inst->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT) + if (inst->bits1.ia1.src0_reg_file == BRW_GENERAL_REGISTER_FILE) + return GL_TRUE; + if (inst->bits1.da1.src0_reg_file != BRW_GENERAL_REGISTER_FILE) + return GL_FALSE; + + const int reg_start = reg_index*BRW_SIZE_OF_REG; + const int reg_end = reg_start + size; + + /* See if at least one of this element intersects the interval */ + const int type_size = inst_type_size[inst->bits1.da1.src0_reg_type]; + const int elem_num = 1 << inst->header.execution_size; + const int width = 1 << inst->bits2.da1.src0_width; + const int row_num = elem_num >> inst->bits2.da1.src0_width; + const int hs = type_size*inst_stride[inst->bits2.da1.src0_horiz_stride]; + const int vs = type_size*inst_stride[inst->bits2.da1.src0_vert_stride]; + int row_start = inst->bits2.da1.src0_reg_nr*BRW_SIZE_OF_REG + + inst->bits2.da1.src0_subreg_nr; + for (j = 0; j < row_num; ++j) { + int write_start = row_start; + for (i = 0; i < width; ++i) { + const int write_end = write_start + type_size; + const int left = write_start > reg_start ? write_start : reg_start; + const int right = write_end < reg_end ? write_end : reg_end; + if (left < right) + return GL_TRUE; + write_start += hs; + } + row_start += vs; + } + } + + /* Second src register */ + if (inst_opcode[inst->header.opcode].nsrc >= 2) { + + if (inst->bits3.da1.src1_address_mode != BRW_ADDRESS_DIRECT) + if (inst->bits1.ia1.src1_reg_file == BRW_GENERAL_REGISTER_FILE) + return GL_TRUE; + if (inst->bits1.da1.src1_reg_file != BRW_GENERAL_REGISTER_FILE) + return GL_FALSE; + + const int reg_start = reg_index*BRW_SIZE_OF_REG; + const int reg_end = reg_start + size; + + /* See if at least one of this element intersects the interval */ + const int type_size = inst_type_size[inst->bits1.da1.src1_reg_type]; + const int elem_num = 1 << inst->header.execution_size; + const int width = 1 << inst->bits3.da1.src1_width; + const int row_num = elem_num >> inst->bits3.da1.src1_width; + const int hs = type_size*inst_stride[inst->bits3.da1.src1_horiz_stride]; + const int vs = type_size*inst_stride[inst->bits3.da1.src1_vert_stride]; + int row_start = inst->bits3.da1.src1_reg_nr*BRW_SIZE_OF_REG + + inst->bits3.da1.src1_subreg_nr; + for (j = 0; j < row_num; ++j) { + int write_start = row_start; + for (i = 0; i < width; ++i) { + const int write_end = write_start + type_size; + const int left = write_start > reg_start ? write_start : reg_start; + const int right = write_end < reg_end ? write_end : reg_end; + if (left < right) + return GL_TRUE; + write_start += hs; + } + row_start += vs; + } + } + + return GL_FALSE; +} + +static INLINE GLboolean +brw_is_control_done(const struct brw_instruction *mov) { + return + mov->header.dependency_control != 0 || + mov->header.thread_control != 0 || + mov->header.mask_control != 0 || + mov->header.saturate != 0 || + mov->header.debug_control != 0; +} + +static INLINE GLboolean +brw_is_predicated(const struct brw_instruction *mov) { + return mov->header.predicate_control != 0; +} + +static INLINE GLboolean +brw_is_grf_to_mrf_mov(const struct brw_instruction *mov, + int *mrf_index, + int *grf_index, + GLboolean *is_compr4) +{ + if (brw_is_predicated(mov) || + brw_is_control_done(mov) || + mov->header.debug_control != 0) + return GL_FALSE; + + if (mov->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT || + mov->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE || + mov->bits1.da1.dest_reg_type != 7 || + mov->bits1.da1.dest_horiz_stride != 1 || + mov->bits1.da1.dest_subreg_nr != 0) + return GL_FALSE; + + if (mov->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT || + mov->bits1.da1.src0_reg_file != BRW_GENERAL_REGISTER_FILE || + mov->bits1.da1.src0_reg_type != 7 || + mov->bits2.da1.src0_width != 3 || + mov->bits2.da1.src0_horiz_stride != 1 || + mov->bits2.da1.src0_vert_stride != 4 || + mov->bits2.da1.src0_subreg_nr != 0 || + mov->bits2.da1.src0_abs != 0 || + mov->bits2.da1.src0_negate != 0) + return GL_FALSE; + + *grf_index = mov->bits2.da1.src0_reg_nr; + *mrf_index = mov->bits1.da1.dest_reg_nr & 0x0f; + *is_compr4 = (mov->bits1.da1.dest_reg_nr & 0xf0) != 0; + return GL_TRUE; +} + +static INLINE GLboolean +brw_is_grf_straight_write(const struct brw_instruction *inst, int grf_index) +{ + /* remark: no problem to predicate a SEL instruction */ + if ((!brw_is_predicated(inst) || inst->header.opcode == BRW_OPCODE_SEL) && + brw_is_control_done(inst) == GL_FALSE && + inst->header.execution_size == 4 && + inst->header.access_mode == BRW_ALIGN_1 && + inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT && + inst->bits1.da1.dest_reg_file == BRW_GENERAL_REGISTER_FILE && + inst->bits1.da1.dest_reg_type == 7 && + inst->bits1.da1.dest_horiz_stride == 1 && + inst->bits1.da1.dest_reg_nr == grf_index && + inst->bits1.da1.dest_subreg_nr == 0 && + brw_is_arithmetic_inst(inst)) + return GL_TRUE; + + return GL_FALSE; +} + +static INLINE GLboolean +brw_inst_are_equal(const struct brw_instruction *src0, + const struct brw_instruction *src1) +{ + const GLuint *field0 = (GLuint *) src0; + const GLuint *field1 = (GLuint *) src1; + return field0[0] == field1[0] && + field0[1] == field1[1] && + field0[2] == field1[2] && + field0[3] == field1[3]; +} + +static INLINE void +brw_inst_copy(struct brw_instruction *dst, + const struct brw_instruction *src) +{ + GLuint *field_dst = (GLuint *) dst; + const GLuint *field_src = (GLuint *) src; + field_dst[0] = field_src[0]; + field_dst[1] = field_src[1]; + field_dst[2] = field_src[2]; + field_dst[3] = field_src[3]; +} + +static void brw_remove_inst(struct brw_compile *p, const GLboolean *removeInst) +{ + int i, nr_insn = 0, to = 0, from = 0; + + for (from = 0; from < p->nr_insn; ++from) { + if (removeInst[from]) + continue; + if(to != from) + brw_inst_copy(p->store + to, p->store + from); + to++; + } + + for (i = 0; i < p->nr_insn; ++i) + if (removeInst[i] == GL_FALSE) + nr_insn++; + p->nr_insn = nr_insn; +} + +/* The gen code emitter generates a lot of duplications in the mrf-to-grf moves. + * Here, we monitor same mov mrf-to-grf instrutions and remove them as soon as + * none of the two operands have been written + */ +void brw_remove_duplicate_mrf_moves(struct brw_compile *p) +{ + const int gen = p->brw->intel.gen; + int i, j; + + GLboolean *removeInst = calloc(sizeof(GLboolean), p->nr_insn); + for (i = 0; i < p->nr_insn; i++) { + if (removeInst[i]) + continue; + + const struct brw_instruction *mov = p->store + i; + int mrf_index, grf_index; + GLboolean is_compr4; + + /* Only consider _straight_ grf-to-mrf moves */ + if (!brw_is_grf_to_mrf_mov(mov, &mrf_index, &grf_index, &is_compr4)) + continue; + + const int mrf_index0 = mrf_index; + const int mrf_index1 = is_compr4 ? mrf_index0+4 : mrf_index0+1; + const int simd16_size = 2 * BRW_SIZE_OF_REG; + + for (j = i + 1; j < p->nr_insn; j++) { + const struct brw_instruction *inst = p->store + j; + + if (brw_inst_are_equal(mov, inst)) { + removeInst[j] = GL_TRUE; + continue; + } + + if (brw_is_grf_written(inst, grf_index, simd16_size, gen) || + brw_is_mrf_written(inst, mrf_index0, BRW_SIZE_OF_REG) || + brw_is_mrf_written(inst, mrf_index1, BRW_SIZE_OF_REG)) + break; + } + } + + brw_remove_inst(p, removeInst); + free(removeInst); +} + +void brw_remove_mrf_to_grf_moves(struct brw_compile *p) +{ + int i, j, prev; + struct brw_context *brw = p->brw; + const int gen = brw->intel.gen; + const int simd16_size = 2*BRW_SIZE_OF_REG; + + GLboolean *removeInst = calloc(sizeof(GLboolean), p->nr_insn); + assert(removeInst); + + for (i = 0; i < p->nr_insn; i++) { + if (removeInst[i]) + continue; + + struct brw_instruction *grf_inst = NULL; + const struct brw_instruction *mov = p->store + i; + int mrf_index, grf_index; + GLboolean is_compr4; + + /* Only consider _straight_ grf-to-mrf moves */ + if (!brw_is_grf_to_mrf_mov(mov, &mrf_index, &grf_index, &is_compr4)) + continue; + + /* Using comp4 enables a stride of 4 for this instruction */ + const int mrf_index0 = mrf_index; + const int mrf_index1 = is_compr4 ? mrf_index+4 : mrf_index+1; + + /* Look where the register has been set */ + prev = i; + GLboolean potential_remove = GL_FALSE; + while (prev--) { + + /* If _one_ instruction writes the grf, we try to remove the mov */ + struct brw_instruction *inst = p->store + prev; + if (brw_is_grf_straight_write(inst, grf_index)) { + potential_remove = GL_TRUE; + grf_inst = inst; + break; + } + + } + + if (potential_remove == GL_FALSE) + continue; + removeInst[i] = GL_TRUE; + + /* Monitor first the section of code between the grf computation and the + * mov. Here we cannot read or write both mrf and grf register + */ + for (j = prev + 1; j < i; ++j) { + struct brw_instruction *inst = p->store + j; + if (removeInst[j]) + continue; + if (brw_is_grf_written(inst, grf_index, simd16_size, gen) || + brw_is_grf_read(inst, grf_index, simd16_size) || + brw_is_mrf_written(inst, mrf_index0, BRW_SIZE_OF_REG) || + brw_is_mrf_written(inst, mrf_index1, BRW_SIZE_OF_REG) || + brw_is_mrf_read(inst, mrf_index0, BRW_SIZE_OF_REG, gen) || + brw_is_mrf_read(inst, mrf_index1, BRW_SIZE_OF_REG, gen)) { + removeInst[i] = GL_FALSE; + break; + } + } + + /* After the mov, we can read or write the mrf. If the grf is overwritten, + * we are done + */ + for (j = i + 1; j < p->nr_insn; ++j) { + struct brw_instruction *inst = p->store + j; + if (removeInst[j]) + continue; + + if (brw_is_grf_read(inst, grf_index, simd16_size)) { + removeInst[i] = GL_FALSE; + break; + } + + if (brw_is_grf_straight_write(inst, grf_index)) + break; + } + + /* Note that with the top down traversal, we can safely pacth the mov + * instruction + */ + if (removeInst[i]) { + grf_inst->bits1.da1.dest_reg_file = mov->bits1.da1.dest_reg_file; + grf_inst->bits1.da1.dest_reg_nr = mov->bits1.da1.dest_reg_nr; + } + } + + brw_remove_inst(p, removeInst); + free(removeInst); +} + static GLboolean is_single_channel_dp4(struct brw_instruction *insn) { diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index d10e1c70d2..b09071fe97 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -1459,623 +1459,6 @@ static void spill_values( struct brw_wm_compile *c, emit_spill(c, values[i].hw_reg, values[i].spill_slot); } -#define BRW_MRF_NUM 16 -#define BRW_SIZE_OF_REG 32 - -static INLINE -GLboolean brw_is_arithmetic_inst(const struct brw_instruction *inst) -{ - switch (inst->header.opcode) { - case BRW_OPCODE_MOV: - case BRW_OPCODE_SEL: - case BRW_OPCODE_NOT: - case BRW_OPCODE_AND: - case BRW_OPCODE_OR: - case BRW_OPCODE_XOR: - case BRW_OPCODE_SHR: - case BRW_OPCODE_SHL: - case BRW_OPCODE_RSR: - case BRW_OPCODE_RSL: - case BRW_OPCODE_ADD: - case BRW_OPCODE_MUL: - case BRW_OPCODE_AVG: - case BRW_OPCODE_FRC: - case BRW_OPCODE_RNDU: - case BRW_OPCODE_RNDD: - case BRW_OPCODE_RNDE: - case BRW_OPCODE_RNDZ: - case BRW_OPCODE_MAC: - case BRW_OPCODE_MACH: - case BRW_OPCODE_LINE: - return GL_TRUE; - default: - return GL_FALSE; - } -} - -static const struct { - char *name; - int nsrc; - int ndst; -} inst_opcode[128] = { - [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 }, - [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 }, - [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 }, - [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 }, - [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 }, - [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 }, - [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 }, - [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 }, - - [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 }, - [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, - [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, - [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, - [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 }, - [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, - [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, - [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, - [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 }, - [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 }, - [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 }, - [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 }, - - [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 }, - [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 }, - [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 }, - [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 }, - [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 }, - [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 }, - [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 }, - [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 }, - [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 }, - [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 }, - [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 }, - - [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 }, - [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, - [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 }, - [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, - [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 }, - [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 }, - [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, - [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 }, - [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 }, - [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 }, - [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 }, - [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 }, - [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 }, - [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 }, - [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 }, - [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 }, - [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 }, -}; - -static const GLuint inst_stride[7] = { - [0] = 0, - [1] = 1, - [2] = 2, - [3] = 4, - [4] = 8, - [5] = 16, - [6] = 32 -}; - -static const GLuint inst_type_size[8] = { - [0] = 4, - [1] = 4, - [2] = 2, - [3] = 2, - [4] = 1, - [5] = 1, - [7] = 4 -}; - -#define BRW_MAX_OFFSET(x0,x1) ((x0) > (x1) ? (x0) : (x1)) -#define BRW_MIN_OFFSET(x0,x1) ((x0) < (x1) ? (x0) : (x1)); - -static INLINE GLboolean -brw_is_grf_written(const struct brw_instruction *inst, - int reg_index, int size, - int gen) -{ - if (inst_opcode[inst->header.opcode].ndst == 0) - return GL_FALSE; - - if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT) - if (inst->bits1.ia1.dest_reg_file == BRW_GENERAL_REGISTER_FILE) - return GL_TRUE; - - if (inst->bits1.da1.dest_reg_file != BRW_GENERAL_REGISTER_FILE) - return GL_FALSE; - - const int reg_start = reg_index * BRW_SIZE_OF_REG; - const int reg_end = reg_start + size; - - const int type_size = inst_type_size[inst->bits1.da1.dest_reg_type]; - const int write_start = inst->bits1.da1.dest_reg_nr*BRW_SIZE_OF_REG - + inst->bits1.da1.dest_subreg_nr; - int length, write_end; - - /* SEND is specific */ - if (inst->header.opcode == BRW_OPCODE_SEND) { - if (gen >= 5) - length = inst->bits3.generic_gen5.response_length*BRW_SIZE_OF_REG; - else - length = inst->bits3.generic.response_length*BRW_SIZE_OF_REG; - } - else { - length = 1 << inst->header.execution_size; - length *= type_size; - length *= inst->bits1.da1.dest_horiz_stride; - } - - /* If the two intervals intersect, we overwrite the register */ - write_end = write_start + length; - const int left = BRW_MAX_OFFSET(write_start, reg_start); - const int right = BRW_MIN_OFFSET(write_end, reg_end); - - return left < right; -} - -/* Specific path for message register since we need to handle the compr4 case */ -static INLINE GLboolean -brw_is_mrf_written(const struct brw_instruction *inst, int reg_index, int size) -{ - if (inst_opcode[inst->header.opcode].ndst == 0) - return GL_FALSE; - - if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT) - if (inst->bits1.ia1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE) - return GL_TRUE; - - if (inst->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE) - return GL_FALSE; - - const int reg_start = reg_index * BRW_SIZE_OF_REG; - const int reg_end = reg_start + size; - - const int mrf_index = inst->bits1.da1.dest_reg_nr & 0x0f; - const int is_compr4 = inst->bits1.da1.dest_reg_nr & 0xf0; - const int type_size = inst_type_size[inst->bits1.da1.dest_reg_type]; - - /* We use compr4 with a size != 16 elements. Strange, we conservatively - * consider that we are writing the register. - */ - if (is_compr4 && inst->header.execution_size != BRW_EXECUTE_16) - return GL_TRUE; - - GLboolean is_written = GL_FALSE; - - /* Here we write mrf_{i} and mrf_{i+4}. So we read two times 8 elements */ - if (is_compr4) { - const int length = 8 * type_size * inst->bits1.da1.dest_horiz_stride; - - /* First 8-way register */ - const int write_start0 = mrf_index*BRW_SIZE_OF_REG - + inst->bits1.da1.dest_subreg_nr; - const int write_end0 = write_start0 + length; - - /* Second 8-way register */ - const int write_start1 = (mrf_index+4)*BRW_SIZE_OF_REG - + inst->bits1.da1.dest_subreg_nr; - const int write_end1 = write_start1 + length; - - /* If the two intervals intersect, we overwrite the register */ - const int left0 = BRW_MAX_OFFSET(write_start0, reg_start); - const int right0 = BRW_MIN_OFFSET(write_end0, reg_end); - const int left1 = BRW_MAX_OFFSET(write_start1, reg_start); - const int right1 = BRW_MIN_OFFSET(write_end1, reg_end); - - is_written = left0 < right0 || left1 < right1; - } - else { - int length; - length = 1 << inst->header.execution_size; - length *= type_size; - length *= inst->bits1.da1.dest_horiz_stride; - - /* If the two intervals intersect, we write into the register */ - const int write_start = inst->bits1.da1.dest_reg_nr*BRW_SIZE_OF_REG - + inst->bits1.da1.dest_subreg_nr; - const int write_end = write_start + length; - const int left = BRW_MAX_OFFSET(write_start, reg_start); - const int right = BRW_MIN_OFFSET(write_end, reg_end);; - - is_written = left < right; - } - - /* SEND may perform an implicit mov to a mrf register */ - if (is_written == GL_FALSE && - inst->header.opcode == BRW_OPCODE_SEND && - inst->bits1.da1.src0_reg_file != 0) { - - const int mrf_start = inst->header.destreg__conditionalmod; - const int write_start = mrf_start * BRW_SIZE_OF_REG; - const int write_end = write_start + BRW_SIZE_OF_REG; - const int left = BRW_MAX_OFFSET(write_start, reg_start); - const int right = BRW_MIN_OFFSET(write_end, reg_end);; - is_written = left < right; - } - - return is_written; -} - -static INLINE GLboolean -brw_is_mrf_read(const struct brw_instruction *inst, - int reg_index, int size, int gen) -{ - if (inst->header.opcode != BRW_OPCODE_SEND) - return GL_FALSE; - if (inst->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT) - return GL_TRUE; - - const int reg_start = reg_index*BRW_SIZE_OF_REG; - const int reg_end = reg_start + size; - - int length, read_start, read_end; - if (gen >= 5) - length = inst->bits3.generic_gen5.msg_length*BRW_SIZE_OF_REG; - else - length = inst->bits3.generic.msg_length*BRW_SIZE_OF_REG; - - /* Look if SEND uses an implicit mov. In that case, we read one less register - * (but we write it) - */ - if (inst->bits1.da1.src0_reg_file != 0) - read_start = inst->header.destreg__conditionalmod; - else { - length--; - read_start = inst->header.destreg__conditionalmod + 1; - } - read_start *= BRW_SIZE_OF_REG; - read_end = read_start + length; - - const int left = BRW_MAX_OFFSET(read_start, reg_start); - const int right = BRW_MIN_OFFSET(read_end, reg_end); - - return left < right; -} - -static INLINE GLboolean -brw_is_grf_read(const struct brw_instruction *inst, int reg_index, int size) -{ - int i, j; - if (inst_opcode[inst->header.opcode].nsrc == 0) - return GL_FALSE; - - /* Look at first source. We must take into account register regions to - * monitor carefully the read. Note that we are a bit too conservative here - * since we do not take into account the fact that some complete registers - * may be skipped - */ - if (inst_opcode[inst->header.opcode].nsrc >= 1) { - - if (inst->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT) - if (inst->bits1.ia1.src0_reg_file == BRW_GENERAL_REGISTER_FILE) - return GL_TRUE; - if (inst->bits1.da1.src0_reg_file != BRW_GENERAL_REGISTER_FILE) - return GL_FALSE; - - const int reg_start = reg_index*BRW_SIZE_OF_REG; - const int reg_end = reg_start + size; - - /* See if at least one of this element intersects the interval */ - const int type_size = inst_type_size[inst->bits1.da1.src0_reg_type]; - const int elem_num = 1 << inst->header.execution_size; - const int width = 1 << inst->bits2.da1.src0_width; - const int row_num = elem_num >> inst->bits2.da1.src0_width; - const int hs = type_size*inst_stride[inst->bits2.da1.src0_horiz_stride]; - const int vs = type_size*inst_stride[inst->bits2.da1.src0_vert_stride]; - int row_start = inst->bits2.da1.src0_reg_nr*BRW_SIZE_OF_REG - + inst->bits2.da1.src0_subreg_nr; - for (j = 0; j < row_num; ++j) { - int write_start = row_start; - for (i = 0; i < width; ++i) { - const int write_end = write_start + type_size; - const int left = write_start > reg_start ? write_start : reg_start; - const int right = write_end < reg_end ? write_end : reg_end; - if (left < right) - return GL_TRUE; - write_start += hs; - } - row_start += vs; - } - } - - /* Second src register */ - if (inst_opcode[inst->header.opcode].nsrc >= 2) { - - if (inst->bits3.da1.src1_address_mode != BRW_ADDRESS_DIRECT) - if (inst->bits1.ia1.src1_reg_file == BRW_GENERAL_REGISTER_FILE) - return GL_TRUE; - if (inst->bits1.da1.src1_reg_file != BRW_GENERAL_REGISTER_FILE) - return GL_FALSE; - - const int reg_start = reg_index*BRW_SIZE_OF_REG; - const int reg_end = reg_start + size; - - /* See if at least one of this element intersects the interval */ - const int type_size = inst_type_size[inst->bits1.da1.src1_reg_type]; - const int elem_num = 1 << inst->header.execution_size; - const int width = 1 << inst->bits3.da1.src1_width; - const int row_num = elem_num >> inst->bits3.da1.src1_width; - const int hs = type_size*inst_stride[inst->bits3.da1.src1_horiz_stride]; - const int vs = type_size*inst_stride[inst->bits3.da1.src1_vert_stride]; - int row_start = inst->bits3.da1.src1_reg_nr*BRW_SIZE_OF_REG - + inst->bits3.da1.src1_subreg_nr; - for (j = 0; j < row_num; ++j) { - int write_start = row_start; - for (i = 0; i < width; ++i) { - const int write_end = write_start + type_size; - const int left = write_start > reg_start ? write_start : reg_start; - const int right = write_end < reg_end ? write_end : reg_end; - if (left < right) - return GL_TRUE; - write_start += hs; - } - row_start += vs; - } - } - - return GL_FALSE; -} - -static INLINE GLboolean -brw_is_control_done(const struct brw_instruction *mov) { - return - mov->header.dependency_control != 0 || - mov->header.thread_control != 0 || - mov->header.mask_control != 0 || - mov->header.saturate != 0 || - mov->header.debug_control != 0; -} - -static INLINE GLboolean -brw_is_predicated(const struct brw_instruction *mov) { - return mov->header.predicate_control != 0; -} - -static INLINE GLboolean -brw_is_grf_to_mrf_mov(const struct brw_instruction *mov, - int *mrf_index, - int *grf_index, - GLboolean *is_compr4) -{ - if (brw_is_predicated(mov) || - brw_is_control_done(mov) || - mov->header.debug_control != 0) - return GL_FALSE; - - if (mov->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT || - mov->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE || - mov->bits1.da1.dest_reg_type != 7 || - mov->bits1.da1.dest_horiz_stride != 1 || - mov->bits1.da1.dest_subreg_nr != 0) - return GL_FALSE; - - if (mov->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT || - mov->bits1.da1.src0_reg_file != BRW_GENERAL_REGISTER_FILE || - mov->bits1.da1.src0_reg_type != 7 || - mov->bits2.da1.src0_width != 3 || - mov->bits2.da1.src0_horiz_stride != 1 || - mov->bits2.da1.src0_vert_stride != 4 || - mov->bits2.da1.src0_subreg_nr != 0 || - mov->bits2.da1.src0_abs != 0 || - mov->bits2.da1.src0_negate != 0) - return GL_FALSE; - - *grf_index = mov->bits2.da1.src0_reg_nr; - *mrf_index = mov->bits1.da1.dest_reg_nr & 0x0f; - *is_compr4 = (mov->bits1.da1.dest_reg_nr & 0xf0) != 0; - return GL_TRUE; -} - -static INLINE GLboolean -brw_is_grf_straight_write(const struct brw_instruction *inst, int grf_index) -{ - /* remark: no problem to predicate a SEL instruction */ - if ((!brw_is_predicated(inst) || inst->header.opcode == BRW_OPCODE_SEL) && - brw_is_control_done(inst) == GL_FALSE && - inst->header.execution_size == 4 && - inst->header.access_mode == BRW_ALIGN_1 && - inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT && - inst->bits1.da1.dest_reg_file == BRW_GENERAL_REGISTER_FILE && - inst->bits1.da1.dest_reg_type == 7 && - inst->bits1.da1.dest_horiz_stride == 1 && - inst->bits1.da1.dest_reg_nr == grf_index && - inst->bits1.da1.dest_subreg_nr == 0 && - brw_is_arithmetic_inst(inst)) - return GL_TRUE; - - return GL_FALSE; -} - -static INLINE GLboolean -brw_inst_are_equal(const struct brw_instruction *src0, - const struct brw_instruction *src1) -{ - const GLuint *field0 = (GLuint *) src0; - const GLuint *field1 = (GLuint *) src1; - return field0[0] == field1[0] && - field0[1] == field1[1] && - field0[2] == field1[2] && - field0[3] == field1[3]; -} - -static INLINE void -brw_inst_copy(struct brw_instruction *dst, - const struct brw_instruction *src) -{ - GLuint *field_dst = (GLuint *) dst; - const GLuint *field_src = (GLuint *) src; - field_dst[0] = field_src[0]; - field_dst[1] = field_src[1]; - field_dst[2] = field_src[2]; - field_dst[3] = field_src[3]; -} - -static void brw_remove_inst(struct brw_compile *p, const GLboolean *removeInst) -{ - int i, nr_insn = 0, to = 0, from = 0; - - for (from = 0; from < p->nr_insn; ++from) { - if (removeInst[from]) - continue; - if(to != from) - brw_inst_copy(p->store + to, p->store + from); - to++; - } - - for (i = 0; i < p->nr_insn; ++i) - if (removeInst[i] == GL_FALSE) - nr_insn++; - p->nr_insn = nr_insn; -} - -/* The gen code emitter generates a lot of duplications in the mrf-to-grf moves. - * Here, we monitor same mov mrf-to-grf instrutions and remove them as soon as - * none of the two operands have been written - */ -static void brw_remove_duplicate_mrf_moves(struct brw_wm_compile *c) -{ - struct brw_compile *p = &c->func; - const int gen = p->brw->intel.gen; - int i, j; - - GLboolean *removeInst = calloc(sizeof(GLboolean), p->nr_insn); - for (i = 0; i < p->nr_insn; i++) { - if (removeInst[i]) - continue; - - const struct brw_instruction *mov = p->store + i; - int mrf_index, grf_index; - GLboolean is_compr4; - - /* Only consider _straight_ grf-to-mrf moves */ - if (!brw_is_grf_to_mrf_mov(mov, &mrf_index, &grf_index, &is_compr4)) - continue; - - const int mrf_index0 = mrf_index; - const int mrf_index1 = is_compr4 ? mrf_index0+4 : mrf_index0+1; - const int simd16_size = 2 * BRW_SIZE_OF_REG; - - for (j = i + 1; j < p->nr_insn; j++) { - const struct brw_instruction *inst = p->store + j; - - if (brw_inst_are_equal(mov, inst)) { - removeInst[j] = GL_TRUE; - continue; - } - - if (brw_is_grf_written(inst, grf_index, simd16_size, gen) || - brw_is_mrf_written(inst, mrf_index0, BRW_SIZE_OF_REG) || - brw_is_mrf_written(inst, mrf_index1, BRW_SIZE_OF_REG)) - break; - } - } - - brw_remove_inst(p, removeInst); - free(removeInst); -} - -static void brw_remove_mrf_to_grf_moves(struct brw_wm_compile *c) -{ - int i, j, prev; - struct brw_compile *p = &c->func; - struct brw_context *brw = p->brw; - const int gen = brw->intel.gen; - const int simd16_size = 2*BRW_SIZE_OF_REG; - - if (c->dispatch_width != 16 || brw->has_compr4 == GL_FALSE) - return; - - GLboolean *removeInst = calloc(sizeof(GLboolean), p->nr_insn); - assert(removeInst); - - for (i = 0; i < p->nr_insn; i++) { - if (removeInst[i]) - continue; - - struct brw_instruction *grf_inst = NULL; - const struct brw_instruction *mov = p->store + i; - int mrf_index, grf_index; - GLboolean is_compr4; - - /* Only consider _straight_ grf-to-mrf moves */ - if (!brw_is_grf_to_mrf_mov(mov, &mrf_index, &grf_index, &is_compr4)) - continue; - - /* Using comp4 enables a stride of 4 for this instruction */ - const int mrf_index0 = mrf_index; - const int mrf_index1 = is_compr4 ? mrf_index+4 : mrf_index+1; - - /* Look where the register has been set */ - prev = i; - GLboolean potential_remove = GL_FALSE; - while (prev--) { - - /* If _one_ instruction writes the grf, we try to remove the mov */ - struct brw_instruction *inst = p->store + prev; - if (brw_is_grf_straight_write(inst, grf_index)) { - potential_remove = GL_TRUE; - grf_inst = inst; - break; - } - - } - - if (potential_remove == GL_FALSE) - continue; - removeInst[i] = GL_TRUE; - - /* Monitor first the section of code between the grf computation and the - * mov. Here we cannot read or write both mrf and grf register - */ - for (j = prev + 1; j < i; ++j) { - struct brw_instruction *inst = p->store + j; - if (removeInst[j]) - continue; - if (brw_is_grf_written(inst, grf_index, simd16_size, gen) || - brw_is_grf_read(inst, grf_index, simd16_size) || - brw_is_mrf_written(inst, mrf_index0, BRW_SIZE_OF_REG) || - brw_is_mrf_written(inst, mrf_index1, BRW_SIZE_OF_REG) || - brw_is_mrf_read(inst, mrf_index0, BRW_SIZE_OF_REG, gen) || - brw_is_mrf_read(inst, mrf_index1, BRW_SIZE_OF_REG, gen)) { - removeInst[i] = GL_FALSE; - break; - } - } - - /* After the mov, we can read or write the mrf. If the grf is overwritten, - * we are done - */ - for (j = i + 1; j < p->nr_insn; ++j) { - struct brw_instruction *inst = p->store + j; - if (removeInst[j]) - continue; - - if (brw_is_grf_read(inst, grf_index, simd16_size)) { - removeInst[i] = GL_FALSE; - break; - } - - if (brw_is_grf_straight_write(inst, grf_index)) - break; - } - - /* Note that with the top down traversal, we can safely pacth the mov - * instruction - */ - if (removeInst[i]) { - grf_inst->bits1.da1.dest_reg_file = mov->bits1.da1.dest_reg_file; - grf_inst->bits1.da1.dest_reg_nr = mov->bits1.da1.dest_reg_nr; - } - } - - brw_remove_inst(p, removeInst); - free(removeInst); -} /* Emit the fragment program instructions here. */ @@ -2331,8 +1714,9 @@ void brw_wm_emit( struct brw_wm_compile *c ) /* Only properly tested on ILK */ if (p->brw->intel.gen == 5) { - brw_remove_duplicate_mrf_moves(c); - brw_remove_mrf_to_grf_moves(c); + brw_remove_duplicate_mrf_moves(p); + if (c->dispatch_width == 16) + brw_remove_mrf_to_grf_moves(p); } if (INTEL_DEBUG & DEBUG_WM) { -- cgit v1.2.3