diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs.cpp')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 472 |
1 files changed, 294 insertions, 178 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index edb02fabb2..a35687d599 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -48,6 +48,7 @@ extern "C" { #include "../glsl/ir_optimization.h" #include "../glsl/ir_print_visitor.h" +#define MAX_INSTRUCTION (1 << 30) static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg); struct gl_shader * @@ -89,6 +90,9 @@ brw_compile_shader(struct gl_context *ctx, struct gl_shader *shader) GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) { + struct brw_context *brw = brw_context(ctx); + struct intel_context *intel = &brw->intel; + struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; if (shader != NULL) { @@ -107,7 +111,15 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) SUB_TO_ADD_NEG | EXP_TO_EXP2 | LOG_TO_LOG2); + + /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, + * if-statements need to be flattened. + */ + if (intel->gen < 6) + lower_if_to_cond_assign(shader->ir, 16); + do_lower_texture_projection(shader->ir); + do_vec_index_to_cond_assign(shader->ir); brw_do_cubemap_normalize(shader->ir); do { @@ -474,8 +486,13 @@ fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) wpos.reg_offset++; /* gl_FragCoord.z */ - emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, - interp_reg(FRAG_ATTRIB_WPOS, 2))); + if (intel->gen >= 6) { + emit(fs_inst(BRW_OPCODE_MOV, wpos, + fs_reg(brw_vec8_grf(c->source_depth_reg, 0)))); + } else { + emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, + interp_reg(FRAG_ATTRIB_WPOS, 2))); + } wpos.reg_offset++; /* gl_FragCoord.w: Already set up in emit_interpolation */ @@ -518,25 +535,40 @@ fs_visitor::emit_general_interpolation(ir_variable *ir) continue; } - for (unsigned int c = 0; c < type->vector_elements; c++) { - struct brw_reg interp = interp_reg(location, c); - emit(fs_inst(FS_OPCODE_LINTERP, - attr, - this->delta_x, - this->delta_y, - fs_reg(interp))); - attr.reg_offset++; - } - - if (intel->gen < 6) { - attr.reg_offset -= type->vector_elements; + if (c->key.flat_shade && (location == FRAG_ATTRIB_COL0 || + location == FRAG_ATTRIB_COL1)) { + /* Constant interpolation (flat shading) case. The SF has + * handed us defined values in only the constant offset + * field of the setup reg. + */ for (unsigned int c = 0; c < type->vector_elements; c++) { - emit(fs_inst(BRW_OPCODE_MUL, - attr, + struct brw_reg interp = interp_reg(location, c); + interp = suboffset(interp, 3); + emit(fs_inst(FS_OPCODE_CINTERP, attr, fs_reg(interp))); + attr.reg_offset++; + } + } else { + /* Perspective interpolation case. */ + for (unsigned int c = 0; c < type->vector_elements; c++) { + struct brw_reg interp = interp_reg(location, c); + emit(fs_inst(FS_OPCODE_LINTERP, attr, - this->pixel_w)); + this->delta_x, + this->delta_y, + fs_reg(interp))); attr.reg_offset++; } + + if (intel->gen < 6) { + attr.reg_offset -= type->vector_elements; + for (unsigned int c = 0; c < type->vector_elements; c++) { + emit(fs_inst(BRW_OPCODE_MUL, + attr, + attr, + this->pixel_w)); + attr.reg_offset++; + } + } } location++; } @@ -600,8 +632,13 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) * might be able to do better by doing execsize = 1 math and then * expanding that result out, but we would need to be careful with * masking. + * + * The hardware ignores source modifiers (negate and abs) on math + * instructions, so we also move to a temp to set those up. */ - if (intel->gen >= 6 && src.file == UNIFORM) { + if (intel->gen >= 6 && (src.file == UNIFORM || + src.abs || + src.negate)) { fs_reg expanded = fs_reg(this, glsl_type::float_type); emit(fs_inst(BRW_OPCODE_MOV, expanded, src)); src = expanded; @@ -765,6 +802,30 @@ fs_visitor::try_emit_saturate(ir_expression *ir) return true; } +static uint32_t +brw_conditional_for_comparison(unsigned int op) +{ + switch (op) { + case ir_binop_less: + return BRW_CONDITIONAL_L; + case ir_binop_greater: + return BRW_CONDITIONAL_G; + case ir_binop_lequal: + return BRW_CONDITIONAL_LE; + case ir_binop_gequal: + return BRW_CONDITIONAL_GE; + case ir_binop_equal: + case ir_binop_all_equal: /* same as equal for scalars */ + return BRW_CONDITIONAL_Z; + case ir_binop_nequal: + case ir_binop_any_nequal: /* same as nequal for scalars */ + return BRW_CONDITIONAL_NZ; + default: + assert(!"not reached: bad operation for comparison"); + return BRW_CONDITIONAL_NZ; + } +} + void fs_visitor::visit(ir_expression *ir) { @@ -814,6 +875,7 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_abs: op[0].abs = true; + op[0].negate = false; this->result = op[0]; break; case ir_unop_sign: @@ -880,35 +942,20 @@ fs_visitor::visit(ir_expression *ir) break; case ir_binop_less: - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_L; - emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); - break; case ir_binop_greater: - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_G; - emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); - break; case ir_binop_lequal: - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_LE; - emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); - break; case ir_binop_gequal: - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_GE; - emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); - break; case ir_binop_equal: - case ir_binop_all_equal: /* same as nequal for scalars */ - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_Z; - emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); - break; + case ir_binop_all_equal: case ir_binop_nequal: - case ir_binop_any_nequal: /* same as nequal for scalars */ - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + case ir_binop_any_nequal: + temp = this->result; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], op[1])); + inst->conditional_mod = brw_conditional_for_comparison(ir->operation); emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); break; @@ -933,6 +980,10 @@ fs_visitor::visit(ir_expression *ir) assert(!"not reached: should be handled by lower_noise"); break; + case ir_quadop_vector: + assert(!"not reached: should be handled by lower_quadop_vector"); + break; + case ir_unop_sqrt: emit_math(FS_OPCODE_SQRT, this->result, op[0]); break; @@ -949,7 +1000,12 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_f2b: case ir_unop_i2b: - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f))); + temp = this->result; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f))); inst->conditional_mod = BRW_CONDITIONAL_NZ; inst = emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(1))); @@ -1423,28 +1479,70 @@ fs_visitor::visit(ir_discard *ir) void fs_visitor::visit(ir_constant *ir) { - fs_reg reg(this, ir->type); - this->result = reg; + /* Set this->result to reg at the bottom of the function because some code + * paths will cause this visitor to be applied to other fields. This will + * cause the value stored in this->result to be modified. + * + * Make reg constant so that it doesn't get accidentally modified along the + * way. Yes, I actually had this problem. :( + */ + const fs_reg reg(this, ir->type); + fs_reg dst_reg = reg; - for (unsigned int i = 0; i < ir->type->vector_elements; i++) { - switch (ir->type->base_type) { - case GLSL_TYPE_FLOAT: - emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i]))); - break; - case GLSL_TYPE_UINT: - emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i]))); - break; - case GLSL_TYPE_INT: - emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i]))); - break; - case GLSL_TYPE_BOOL: - emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i]))); - break; - default: - assert(!"Non-float/uint/int/bool constant"); + if (ir->type->is_array()) { + const unsigned size = type_size(ir->type->fields.array); + + for (unsigned i = 0; i < ir->type->length; i++) { + ir->array_elements[i]->accept(this); + fs_reg src_reg = this->result; + + dst_reg.type = src_reg.type; + for (unsigned j = 0; j < size; j++) { + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg)); + src_reg.reg_offset++; + dst_reg.reg_offset++; + } + } + } else if (ir->type->is_record()) { + foreach_list(node, &ir->components) { + ir_instruction *const field = (ir_instruction *) node; + const unsigned size = type_size(field->type); + + field->accept(this); + fs_reg src_reg = this->result; + + dst_reg.type = src_reg.type; + for (unsigned j = 0; j < size; j++) { + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg)); + src_reg.reg_offset++; + dst_reg.reg_offset++; + } + } + } else { + const unsigned size = type_size(ir->type); + + for (unsigned i = 0; i < size; i++) { + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i]))); + break; + case GLSL_TYPE_UINT: + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i]))); + break; + case GLSL_TYPE_INT: + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i]))); + break; + case GLSL_TYPE_BOOL: + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i]))); + break; + default: + assert(!"Non-float/uint/int/bool constant"); + } + dst_reg.reg_offset++; } - reg.reg_offset++; } + + this->result = reg; } void @@ -1490,7 +1588,7 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0.0f))); } else { - inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, op[0])); + inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_f, op[0])); } inst->conditional_mod = BRW_CONDITIONAL_NZ; break; @@ -1505,31 +1603,18 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) break; case ir_binop_greater: - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_G; - break; case ir_binop_gequal: - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_GE; - break; case ir_binop_less: - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_L; - break; case ir_binop_lequal: - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_LE; - break; case ir_binop_equal: case ir_binop_all_equal: - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_Z; - break; case ir_binop_nequal: case ir_binop_any_nequal: - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1])); + inst->conditional_mod = + brw_conditional_for_comparison(expr->operation); break; + default: assert(!"not reached"); this->fail = true; @@ -1574,7 +1659,7 @@ fs_visitor::emit_if_gen6(ir_if *ir) switch (expr->operation) { case ir_unop_logic_not: - inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(1))); + inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(0))); inst->conditional_mod = BRW_CONDITIONAL_Z; return; @@ -1608,30 +1693,16 @@ fs_visitor::emit_if_gen6(ir_if *ir) return; case ir_binop_greater: - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_G; - return; case ir_binop_gequal: - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_GE; - return; case ir_binop_less: - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_L; - return; case ir_binop_lequal: - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_LE; - return; case ir_binop_equal: case ir_binop_all_equal: - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_Z; - return; case ir_binop_nequal: case ir_binop_any_nequal: inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + inst->conditional_mod = + brw_conditional_for_comparison(expr->operation); return; default: assert(!"not reached"); @@ -1713,32 +1784,9 @@ fs_visitor::visit(ir_loop *ir) this->base_ir = ir->to; ir->to->accept(this); - fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, + fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_cmp, counter, this->result)); - switch (ir->cmp) { - case ir_binop_equal: - inst->conditional_mod = BRW_CONDITIONAL_Z; - break; - case ir_binop_nequal: - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - case ir_binop_gequal: - inst->conditional_mod = BRW_CONDITIONAL_GE; - break; - case ir_binop_lequal: - inst->conditional_mod = BRW_CONDITIONAL_LE; - break; - case ir_binop_greater: - inst->conditional_mod = BRW_CONDITIONAL_G; - break; - case ir_binop_less: - inst->conditional_mod = BRW_CONDITIONAL_L; - break; - default: - assert(!"not reached: unknown loop condition"); - this->fail = true; - break; - } + inst->conditional_mod = brw_conditional_for_comparison(ir->cmp); inst = emit(fs_inst(BRW_OPCODE_BREAK)); inst->predicated = true; @@ -1951,7 +1999,7 @@ fs_visitor::emit_interpolation_setup_gen6() emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y)); this->current_annotation = "compute 1/pos.w"; - this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0)); + this->wpos_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); this->pixel_w = fs_reg(this, glsl_type::float_type); emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); @@ -1979,17 +2027,17 @@ fs_visitor::emit_fb_writes() nr += 2; } - if (c->key.aa_dest_stencil_reg) { + if (c->aa_dest_stencil_reg) { emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), - fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0)))); + fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0)))); } /* Reserve space for color. It'll be filled in per MRT below. */ int color_mrf = nr; nr += 4; - if (c->key.source_depth_to_render_target) { - if (c->key.computes_depth) { + if (c->source_depth_to_render_target) { + if (c->computes_depth) { /* Hand over gl_FragDepth. */ assert(this->frag_depth); fs_reg depth = *(variable_storage(this->frag_depth)); @@ -1998,20 +2046,22 @@ fs_visitor::emit_fb_writes() } else { /* Pass through the payload depth. */ emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), - fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0)))); + fs_reg(brw_vec8_grf(c->source_depth_reg, 0)))); } } - if (c->key.dest_depth_reg) { + if (c->dest_depth_reg) { emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), - fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0)))); + fs_reg(brw_vec8_grf(c->dest_depth_reg, 0)))); } fs_reg color = reg_undef; if (this->frag_color) color = *(variable_storage(this->frag_color)); - else if (this->frag_data) + else if (this->frag_data) { color = *(variable_storage(this->frag_data)); + color.type = BRW_REGISTER_TYPE_F; + } for (int target = 0; target < c->key.nr_color_regions; target++) { this->current_annotation = talloc_asprintf(this->mem_ctx, @@ -2105,7 +2155,8 @@ fs_visitor::generate_fb_write(fs_inst *inst) inst->target, inst->mlen, 0, - eot); + eot, + inst->header_present); } void @@ -2452,7 +2503,7 @@ fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst) void fs_visitor::assign_curb_setup() { - c->prog_data.first_curbe_grf = c->key.nr_payload_regs; + c->prog_data.first_curbe_grf = c->nr_payload_regs; c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; /* Map the offsets in the UNIFORM file to fixed HW regs. */ @@ -2522,12 +2573,15 @@ fs_visitor::assign_urb_setup() foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); - if (inst->opcode != FS_OPCODE_LINTERP) - continue; - - assert(inst->src[2].file == FIXED_HW_REG); + if (inst->opcode == FS_OPCODE_LINTERP) { + assert(inst->src[2].file == FIXED_HW_REG); + inst->src[2].fixed_hw_reg.nr += urb_start; + } - inst->src[2].fixed_hw_reg.nr += urb_start; + if (inst->opcode == FS_OPCODE_CINTERP) { + assert(inst->src[0].file == FIXED_HW_REG); + inst->src[0].fixed_hw_reg.nr += urb_start; + } } this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length; @@ -2618,6 +2672,7 @@ fs_visitor::split_virtual_grfs() } } } + this->live_intervals_valid = false; } /** @@ -2692,8 +2747,11 @@ fs_visitor::calculate_live_intervals() int loop_start = 0; int bb_header_ip = 0; + if (this->live_intervals_valid) + return; + for (int i = 0; i < num_vars; i++) { - def[i] = 1 << 30; + def[i] = MAX_INSTRUCTION; use[i] = -1; } @@ -2771,6 +2829,8 @@ fs_visitor::calculate_live_intervals() talloc_free(this->virtual_grf_use); this->virtual_grf_def = def; this->virtual_grf_use = use; + + this->live_intervals_valid = true; } /** @@ -2786,6 +2846,8 @@ fs_visitor::propagate_constants() { bool progress = false; + calculate_live_intervals(); + foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); @@ -2843,6 +2905,7 @@ fs_visitor::propagate_constants() /* Fit this constant in by commuting the operands */ scan_inst->src[0] = scan_inst->src[1]; scan_inst->src[1] = inst->src[0]; + progress = true; } break; case BRW_OPCODE_CMP: @@ -2863,6 +2926,9 @@ fs_visitor::propagate_constants() } } + if (progress) + this->live_intervals_valid = false; + return progress; } /** @@ -2877,6 +2943,8 @@ fs_visitor::dead_code_eliminate() bool progress = false; int pc = 0; + calculate_live_intervals(); + foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); @@ -2888,6 +2956,9 @@ fs_visitor::dead_code_eliminate() pc++; } + if (progress) + live_intervals_valid = false; + return progress; } @@ -2895,10 +2966,35 @@ bool fs_visitor::register_coalesce() { bool progress = false; + int if_depth = 0; + int loop_depth = 0; foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); + /* Make sure that we dominate the instructions we're going to + * scan for interfering with our coalescing, or we won't have + * scanned enough to see if anything interferes with our + * coalescing. We don't dominate the following instructions if + * we're in a loop or an if block. + */ + switch (inst->opcode) { + case BRW_OPCODE_DO: + loop_depth++; + break; + case BRW_OPCODE_WHILE: + loop_depth--; + break; + case BRW_OPCODE_IF: + if_depth++; + break; + case BRW_OPCODE_ENDIF: + if_depth--; + break; + } + if (loop_depth || if_depth) + continue; + if (inst->opcode != BRW_OPCODE_MOV || inst->predicated || inst->saturate || @@ -2916,14 +3012,6 @@ fs_visitor::register_coalesce() for (; scan_iter.has_next(); scan_iter.next()) { fs_inst *scan_inst = (fs_inst *)scan_iter.get(); - if (scan_inst->opcode == BRW_OPCODE_DO || - scan_inst->opcode == BRW_OPCODE_WHILE || - scan_inst->opcode == BRW_OPCODE_ENDIF) { - interfered = true; - iter = scan_iter; - break; - } - if (scan_inst->dst.file == GRF) { if (scan_inst->dst.reg == inst->dst.reg && (scan_inst->dst.reg_offset == inst->dst.reg_offset || @@ -2943,10 +3031,6 @@ fs_visitor::register_coalesce() continue; } - /* Update live interval so we don't have to recalculate. */ - this->virtual_grf_use[inst->src[0].reg] = MAX2(virtual_grf_use[inst->src[0].reg], - virtual_grf_use[inst->dst.reg]); - /* Rewrite the later usage to point at the source of the move to * be removed. */ @@ -2971,6 +3055,9 @@ fs_visitor::register_coalesce() progress = true; } + if (progress) + live_intervals_valid = false; + return progress; } @@ -2981,6 +3068,8 @@ fs_visitor::compute_to_mrf() bool progress = false; int next_ip = 0; + calculate_live_intervals(); + foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); @@ -3184,15 +3273,16 @@ fs_visitor::virtual_grf_interferes(int a, int b) int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]); int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]); - /* For dead code, just check if the def interferes with the other range. */ - if (this->virtual_grf_use[a] == -1) { - return (this->virtual_grf_def[a] >= this->virtual_grf_def[b] && - this->virtual_grf_def[a] < this->virtual_grf_use[b]); - } - if (this->virtual_grf_use[b] == -1) { - return (this->virtual_grf_def[b] >= this->virtual_grf_def[a] && - this->virtual_grf_def[b] < this->virtual_grf_use[a]); - } + /* We can't handle dead register writes here, without iterating + * over the whole instruction stream to find every single dead + * write to that register to compare to the live interval of the + * other register. Just assert that dead_code_eliminate() has been + * called. + */ + assert((this->virtual_grf_use[a] != -1 || + this->virtual_grf_def[a] == MAX_INSTRUCTION) && + (this->virtual_grf_use[b] != -1 || + this->virtual_grf_def[b] == MAX_INSTRUCTION)); return start < end; } @@ -3227,6 +3317,7 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) break; default: assert(!"not reached"); + brw_reg = brw_null_reg(); break; } break; @@ -3241,6 +3332,10 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) assert(!"not reached"); brw_reg = brw_null_reg(); break; + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; } if (reg->abs) brw_reg = brw_abs(brw_reg); @@ -3373,10 +3468,6 @@ fs_visitor::generate_code() break; case BRW_OPCODE_DO: - /* FINISHME: We need to write the loop instruction support still. */ - if (intel->gen >= 6) - this->fail = true; - loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); if_depth_in_loop[loop_stack_depth] = 0; break; @@ -3386,7 +3477,11 @@ fs_visitor::generate_code() brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case BRW_OPCODE_CONTINUE: - brw_CONT(p, if_depth_in_loop[loop_stack_depth]); + /* FINISHME: We need to write the loop instruction support still. */ + if (intel->gen >= 6) + brw_CONT_gen6(p, loop_stack[loop_stack_depth - 1]); + else + brw_CONT(p, if_depth_in_loop[loop_stack_depth]); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; @@ -3400,16 +3495,18 @@ fs_visitor::generate_code() assert(loop_stack_depth > 0); loop_stack_depth--; inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); - /* patch all the BREAK/CONT instructions from last BGNLOOP */ - while (inst0 > loop_stack[loop_stack_depth]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + if (intel->gen < 6) { + /* patch all the BREAK/CONT instructions from last BGNLOOP */ + while (inst0 > loop_stack[loop_stack_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + } } } } @@ -3425,6 +3522,9 @@ fs_visitor::generate_code() case FS_OPCODE_COS: generate_math(inst, dst, src); break; + case FS_OPCODE_CINTERP: + brw_MOV(p, dst, src[0]); + break; case FS_OPCODE_LINTERP: generate_linterp(inst, dst, src); break; @@ -3486,6 +3586,26 @@ fs_visitor::generate_code() last_native_inst = p->nr_insn; } + + brw_set_uip_jip(p); + + /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS + * emit issues, it doesn't get the jump distances into the output, + * which is often something we want to debug. So this is here in + * case you're doing that. + */ + if (0) { + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { + for (unsigned int i = 0; i < p->nr_insn; i++) { + printf("0x%08x 0x%08x 0x%08x 0x%08x ", + ((uint32_t *)&p->store[i])[3], + ((uint32_t *)&p->store[i])[2], + ((uint32_t *)&p->store[i])[1], + ((uint32_t *)&p->store[i])[0]); + brw_disasm(stdout, &p->store[i], intel->gen); + } + } + } } GLboolean @@ -3553,7 +3673,6 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) progress = v.remove_duplicate_mrf_writes() || progress; - v.calculate_live_intervals(); progress = v.propagate_constants() || progress; progress = v.register_coalesce() || progress; progress = v.compute_to_mrf() || progress; @@ -3566,7 +3685,6 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) for (int i = 1; i < virtual_grf_count; i++) { v.spill_reg(i); } - v.calculate_live_intervals(); } if (0) @@ -3575,8 +3693,6 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) while (!v.assign_regs()) { if (v.fail) break; - - v.calculate_live_intervals(); } } } |