diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs.cpp')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 567 |
1 files changed, 367 insertions, 200 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 6bb195b487..2c997b4eb3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -41,13 +41,13 @@ extern "C" { #include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" -#include "talloc.h" } #include "brw_fs.h" #include "../glsl/glsl_types.h" #include "../glsl/ir_optimization.h" #include "../glsl/ir_print_visitor.h" +#define MAX_INSTRUCTION (1 << 30) static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg); struct gl_shader * @@ -55,7 +55,7 @@ brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type) { struct brw_shader *shader; - shader = talloc_zero(NULL, struct brw_shader); + shader = rzalloc(NULL, struct brw_shader); if (shader) { shader->base.Type = type; shader->base.Name = name; @@ -69,7 +69,7 @@ struct gl_shader_program * brw_new_shader_program(struct gl_context *ctx, GLuint name) { struct brw_shader_program *prog; - prog = talloc_zero(NULL, struct brw_shader_program); + prog = rzalloc(NULL, struct brw_shader_program); if (prog) { prog->base.Name = name; _mesa_init_shader_program(ctx, &prog->base); @@ -89,14 +89,17 @@ brw_compile_shader(struct gl_context *ctx, struct gl_shader *shader) GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) { + struct brw_context *brw = brw_context(ctx); + struct intel_context *intel = &brw->intel; + struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; if (shader != NULL) { - void *mem_ctx = talloc_new(NULL); + void *mem_ctx = ralloc_context(NULL); bool progress; if (shader->ir) - talloc_free(shader->ir); + ralloc_free(shader->ir); shader->ir = new(shader) exec_list; clone_ir_list(mem_ctx, shader->ir, shader->base.ir); @@ -107,8 +110,24 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) SUB_TO_ADD_NEG | EXP_TO_EXP2 | LOG_TO_LOG2); + + /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, + * if-statements need to be flattened. + */ + if (intel->gen < 6) + lower_if_to_cond_assign(shader->ir, 16); + do_lower_texture_projection(shader->ir); + do_vec_index_to_cond_assign(shader->ir); brw_do_cubemap_normalize(shader->ir); + lower_noise(shader->ir); + lower_quadop_vector(shader->ir, false); + lower_variable_index_to_cond_assign(shader->ir, + GL_TRUE, /* input */ + GL_TRUE, /* output */ + GL_TRUE, /* temp */ + GL_TRUE /* uniform */ + ); do { progress = false; @@ -123,22 +142,12 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) ) || progress; progress = do_common_optimization(shader->ir, true, 32) || progress; - - progress = lower_noise(shader->ir) || progress; - progress = - lower_variable_index_to_cond_assign(shader->ir, - GL_TRUE, /* input */ - GL_TRUE, /* output */ - GL_TRUE, /* temp */ - GL_TRUE /* uniform */ - ) || progress; - progress = lower_quadop_vector(shader->ir, false) || progress; } while (progress); validate_ir_tree(shader->ir); reparent_ir(shader->ir, shader->ir); - talloc_free(mem_ctx); + ralloc_free(mem_ctx); } if (!_mesa_ir_link_shader(ctx, prog)) @@ -202,6 +211,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) return 2; case FS_OPCODE_TEX: case FS_OPCODE_TXB: + case FS_OPCODE_TXD: case FS_OPCODE_TXL: return 1; case FS_OPCODE_FB_WRITE: @@ -225,8 +235,8 @@ fs_visitor::virtual_grf_alloc(int size) virtual_grf_array_size = 16; else virtual_grf_array_size *= 2; - virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes, - int, virtual_grf_array_size); + virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, + virtual_grf_array_size); /* This slot is always unused. */ virtual_grf_sizes[0] = 0; @@ -304,7 +314,6 @@ int fs_visitor::setup_uniform_values(int loc, const glsl_type *type) { unsigned int offset = 0; - float *vec_values; if (type->is_matrix()) { const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, @@ -323,7 +332,6 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type) case GLSL_TYPE_UINT: case GLSL_TYPE_INT: case GLSL_TYPE_BOOL: - vec_values = fp->Base.Parameters->ParameterValues[loc]; for (unsigned int i = 0; i < type->vector_elements; i++) { unsigned int param = c->prog_data.nr_params++; @@ -347,8 +355,8 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type) c->prog_data.param_convert[param] = PARAM_NO_CONVERT; break; } - - c->prog_data.param[param] = &vec_values[i]; + this->param_index[param] = loc; + this->param_offset[param] = i; } return 1; @@ -419,7 +427,6 @@ fs_visitor::setup_builtin_uniform_values(ir_variable *ir) */ int index = _mesa_add_state_reference(this->fp->Base.Parameters, (gl_state_index *)tokens); - float *vec_values = this->fp->Base.Parameters->ParameterValues[index]; /* Add each of the unique swizzles of the element as a * parameter. This'll end up matching the expected layout of @@ -434,7 +441,9 @@ fs_visitor::setup_builtin_uniform_values(ir_variable *ir) c->prog_data.param_convert[c->prog_data.nr_params] = PARAM_NO_CONVERT; - c->prog_data.param[c->prog_data.nr_params++] = &vec_values[swiz]; + this->param_index[c->prog_data.nr_params] = index; + this->param_offset[c->prog_data.nr_params] = swiz; + c->prog_data.nr_params++; } } } @@ -474,8 +483,13 @@ fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) wpos.reg_offset++; /* gl_FragCoord.z */ - emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, - interp_reg(FRAG_ATTRIB_WPOS, 2))); + if (intel->gen >= 6) { + emit(fs_inst(BRW_OPCODE_MOV, wpos, + fs_reg(brw_vec8_grf(c->source_depth_reg, 0)))); + } else { + emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, + interp_reg(FRAG_ATTRIB_WPOS, 2))); + } wpos.reg_offset++; /* gl_FragCoord.w: Already set up in emit_interpolation */ @@ -518,25 +532,40 @@ fs_visitor::emit_general_interpolation(ir_variable *ir) continue; } - for (unsigned int c = 0; c < type->vector_elements; c++) { - struct brw_reg interp = interp_reg(location, c); - emit(fs_inst(FS_OPCODE_LINTERP, - attr, - this->delta_x, - this->delta_y, - fs_reg(interp))); - attr.reg_offset++; - } - - if (intel->gen < 6) { - attr.reg_offset -= type->vector_elements; + if (c->key.flat_shade && (location == FRAG_ATTRIB_COL0 || + location == FRAG_ATTRIB_COL1)) { + /* Constant interpolation (flat shading) case. The SF has + * handed us defined values in only the constant offset + * field of the setup reg. + */ for (unsigned int c = 0; c < type->vector_elements; c++) { - emit(fs_inst(BRW_OPCODE_MUL, - attr, + struct brw_reg interp = interp_reg(location, c); + interp = suboffset(interp, 3); + emit(fs_inst(FS_OPCODE_CINTERP, attr, fs_reg(interp))); + attr.reg_offset++; + } + } else { + /* Perspective interpolation case. */ + for (unsigned int c = 0; c < type->vector_elements; c++) { + struct brw_reg interp = interp_reg(location, c); + emit(fs_inst(FS_OPCODE_LINTERP, attr, - this->pixel_w)); + this->delta_x, + this->delta_y, + fs_reg(interp))); attr.reg_offset++; } + + if (intel->gen < 6) { + attr.reg_offset -= type->vector_elements; + for (unsigned int c = 0; c < type->vector_elements; c++) { + emit(fs_inst(BRW_OPCODE_MUL, + attr, + attr, + this->pixel_w)); + attr.reg_offset++; + } + } } location++; } @@ -631,14 +660,18 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1) assert(opcode == FS_OPCODE_POW); if (intel->gen >= 6) { - /* Can't do hstride == 0 args to gen6 math, so expand it out. */ - if (src0.file == UNIFORM) { + /* Can't do hstride == 0 args to gen6 math, so expand it out. + * + * The hardware ignores source modifiers (negate and abs) on math + * instructions, so we also move to a temp to set those up. + */ + if (src0.file == UNIFORM || src0.abs || src0.negate) { fs_reg expanded = fs_reg(this, glsl_type::float_type); emit(fs_inst(BRW_OPCODE_MOV, expanded, src0)); src0 = expanded; } - if (src1.file == UNIFORM) { + if (src1.file == UNIFORM || src1.abs || src1.negate) { fs_reg expanded = fs_reg(this, glsl_type::float_type); emit(fs_inst(BRW_OPCODE_MOV, expanded, src1)); src1 = expanded; @@ -770,6 +803,30 @@ fs_visitor::try_emit_saturate(ir_expression *ir) return true; } +static uint32_t +brw_conditional_for_comparison(unsigned int op) +{ + switch (op) { + case ir_binop_less: + return BRW_CONDITIONAL_L; + case ir_binop_greater: + return BRW_CONDITIONAL_G; + case ir_binop_lequal: + return BRW_CONDITIONAL_LE; + case ir_binop_gequal: + return BRW_CONDITIONAL_GE; + case ir_binop_equal: + case ir_binop_all_equal: /* same as equal for scalars */ + return BRW_CONDITIONAL_Z; + case ir_binop_nequal: + case ir_binop_any_nequal: /* same as nequal for scalars */ + return BRW_CONDITIONAL_NZ; + default: + assert(!"not reached: bad operation for comparison"); + return BRW_CONDITIONAL_NZ; + } +} + void fs_visitor::visit(ir_expression *ir) { @@ -819,6 +876,7 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_abs: op[0].abs = true; + op[0].negate = false; this->result = op[0]; break; case ir_unop_sign: @@ -885,35 +943,20 @@ fs_visitor::visit(ir_expression *ir) break; case ir_binop_less: - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_L; - emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); - break; case ir_binop_greater: - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_G; - emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); - break; case ir_binop_lequal: - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_LE; - emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); - break; case ir_binop_gequal: - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_GE; - emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); - break; case ir_binop_equal: - case ir_binop_all_equal: /* same as nequal for scalars */ - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_Z; - emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); - break; + case ir_binop_all_equal: case ir_binop_nequal: - case ir_binop_any_nequal: /* same as nequal for scalars */ - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + case ir_binop_any_nequal: + temp = this->result; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], op[1])); + inst->conditional_mod = brw_conditional_for_comparison(ir->operation); emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); break; @@ -958,7 +1001,12 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_f2b: case ir_unop_i2b: - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f))); + temp = this->result; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f))); inst->conditional_mod = BRW_CONDITIONAL_NZ; inst = emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(1))); @@ -1151,6 +1199,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) } /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ mlen += 3; + } else if (ir->op == ir_txd) { + assert(!"TXD isn't supported on gen4 yet."); } else { /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod * instructions. We'll need to do SIMD16 here. @@ -1204,6 +1254,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) inst = emit(fs_inst(FS_OPCODE_TXL, dst)); break; case ir_txd: + inst = emit(fs_inst(FS_OPCODE_TXD, dst)); + break; case ir_txf: assert(!"GLSL 1.30 features unsupported"); break; @@ -1292,6 +1344,37 @@ fs_visitor::visit(ir_texture *ir) ir->coordinate->accept(this); fs_reg coordinate = this->result; + if (ir->offset != NULL) { + ir_constant *offset = ir->offset->as_constant(); + assert(offset != NULL); + + signed char offsets[3]; + for (unsigned i = 0; i < ir->offset->type->vector_elements; i++) + offsets[i] = (signed char) offset->value.i[i]; + + /* Combine all three offsets into a single unsigned dword: + * + * bits 11:8 - U Offset (X component) + * bits 7:4 - V Offset (Y component) + * bits 3:0 - R Offset (Z component) + */ + unsigned offset_bits = 0; + for (unsigned i = 0; i < ir->offset->type->vector_elements; i++) { + const unsigned shift = 4 * (2 - i); + offset_bits |= (offsets[i] << shift) & (0xF << shift); + } + + /* Explicitly set up the message header by copying g0 to msg reg m1. */ + emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, 1, BRW_REGISTER_TYPE_UD), + fs_reg(GRF, 0, BRW_REGISTER_TYPE_UD))); + + /* Then set the offset bits in DWord 2 of the message header. */ + emit(fs_inst(BRW_OPCODE_MOV, + fs_reg(retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1, 2), + BRW_REGISTER_TYPE_UD)), + fs_reg(brw_imm_uw(offset_bits)))); + } + /* Should be lowered by do_lower_texture_projection */ assert(!ir->projector); @@ -1323,10 +1406,13 @@ fs_visitor::visit(ir_texture *ir) fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1); GLuint index = _mesa_add_state_reference(params, (gl_state_index *)tokens); - float *vec_values = this->fp->Base.Parameters->ParameterValues[index]; - c->prog_data.param[c->prog_data.nr_params++] = &vec_values[0]; - c->prog_data.param[c->prog_data.nr_params++] = &vec_values[1]; + this->param_index[c->prog_data.nr_params] = index; + this->param_offset[c->prog_data.nr_params] = 0; + c->prog_data.nr_params++; + this->param_index[c->prog_data.nr_params] = index; + this->param_offset[c->prog_data.nr_params] = 1; + c->prog_data.nr_params++; fs_reg dst = fs_reg(this, ir->coordinate->type); fs_reg src = coordinate; @@ -1349,6 +1435,14 @@ fs_visitor::visit(ir_texture *ir) inst = emit_texture_gen5(ir, dst, coordinate); } + /* If there's an offset, we already set up m1. To avoid the implied move, + * use the null register. Otherwise, we want an implied move from g0. + */ + if (ir->offset != NULL) + inst->src[0] = fs_reg(brw_null_reg()); + else + inst->src[0] = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); + inst->sampler = sampler; this->result = dst; @@ -1356,7 +1450,10 @@ fs_visitor::visit(ir_texture *ir) if (ir->shadow_comparitor) inst->shadow_compare = true; - if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) { + if (ir->type == glsl_type::float_type) { + /* Ignore DEPTH_TEXTURE_MODE swizzling. */ + assert(ir->sampler->type->sampler_shadow); + } else if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) { fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type); for (int i = 0; i < 4; i++) { @@ -1541,7 +1638,7 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0.0f))); } else { - inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, op[0])); + inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_f, op[0])); } inst->conditional_mod = BRW_CONDITIONAL_NZ; break; @@ -1556,31 +1653,18 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) break; case ir_binop_greater: - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_G; - break; case ir_binop_gequal: - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_GE; - break; case ir_binop_less: - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_L; - break; case ir_binop_lequal: - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_LE; - break; case ir_binop_equal: case ir_binop_all_equal: - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_Z; - break; case ir_binop_nequal: case ir_binop_any_nequal: - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1])); + inst->conditional_mod = + brw_conditional_for_comparison(expr->operation); break; + default: assert(!"not reached"); this->fail = true; @@ -1659,30 +1743,16 @@ fs_visitor::emit_if_gen6(ir_if *ir) return; case ir_binop_greater: - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_G; - return; case ir_binop_gequal: - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_GE; - return; case ir_binop_less: - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_L; - return; case ir_binop_lequal: - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_LE; - return; case ir_binop_equal: case ir_binop_all_equal: - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_Z; - return; case ir_binop_nequal: case ir_binop_any_nequal: inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + inst->conditional_mod = + brw_conditional_for_comparison(expr->operation); return; default: assert(!"not reached"); @@ -1764,32 +1834,9 @@ fs_visitor::visit(ir_loop *ir) this->base_ir = ir->to; ir->to->accept(this); - fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, + fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_cmp, counter, this->result)); - switch (ir->cmp) { - case ir_binop_equal: - inst->conditional_mod = BRW_CONDITIONAL_Z; - break; - case ir_binop_nequal: - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - case ir_binop_gequal: - inst->conditional_mod = BRW_CONDITIONAL_GE; - break; - case ir_binop_lequal: - inst->conditional_mod = BRW_CONDITIONAL_LE; - break; - case ir_binop_greater: - inst->conditional_mod = BRW_CONDITIONAL_G; - break; - case ir_binop_less: - inst->conditional_mod = BRW_CONDITIONAL_L; - break; - default: - assert(!"not reached: unknown loop condition"); - this->fail = true; - break; - } + inst->conditional_mod = brw_conditional_for_comparison(ir->cmp); inst = emit(fs_inst(BRW_OPCODE_BREAK)); inst->predicated = true; @@ -2067,7 +2114,7 @@ fs_visitor::emit_fb_writes() } for (int target = 0; target < c->key.nr_color_regions; target++) { - this->current_annotation = talloc_asprintf(this->mem_ctx, + this->current_annotation = ralloc_asprintf(this->mem_ctx, "FB write target %d", target); if (this->frag_color || this->frag_data) { @@ -2093,6 +2140,17 @@ fs_visitor::emit_fb_writes() } if (c->key.nr_color_regions == 0) { + if (c->key.alpha_test && (this->frag_color || this->frag_data)) { + /* If the alpha test is enabled but there's no color buffer, + * we still need to send alpha out the pipeline to our null + * renderbuffer. + */ + color.reg_offset += 3; + emit(fs_inst(BRW_OPCODE_MOV, + fs_reg(MRF, color_mrf + 3), + color)); + } + fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, reg_undef, reg_undef)); inst->base_mrf = 0; @@ -2158,7 +2216,8 @@ fs_visitor::generate_fb_write(fs_inst *inst) inst->target, inst->mlen, 0, - eot); + eot, + inst->header_present); } void @@ -2244,7 +2303,7 @@ fs_visitor::generate_math(fs_inst *inst, } void -fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst) +fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) { int msg_type = -1; int rlen = 4; @@ -2266,6 +2325,16 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst) msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; } break; + case FS_OPCODE_TXL: + if (inst->shadow_compare) { + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE_GEN5; + } else { + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_LOD_GEN5; + } + break; + case FS_OPCODE_TXD: + assert(!"TXD isn't supported on gen5+ yet."); + break; } } else { switch (inst->opcode) { @@ -2283,13 +2352,26 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst) case FS_OPCODE_TXB: if (inst->shadow_compare) { assert(inst->mlen == 6); - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE; } else { assert(inst->mlen == 9); msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; } break; + case FS_OPCODE_TXL: + if (inst->shadow_compare) { + assert(inst->mlen == 6); + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE; + } else { + assert(inst->mlen == 9); + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD; + simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; + } + break; + case FS_OPCODE_TXD: + assert(!"TXD isn't supported on gen4 yet."); + break; } } assert(msg_type != -1); @@ -2302,7 +2384,7 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst) brw_SAMPLE(p, retype(dst, BRW_REGISTER_TYPE_UW), inst->base_mrf, - retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), + src, SURF_INDEX_TEXTURE(inst->sampler), inst->sampler, WRITEMASK_XYZW, @@ -2502,6 +2584,22 @@ fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst) } } +/** + * To be called after the last _mesa_add_state_reference() call, to + * set up prog_data.param[] for assign_curb_setup() and + * setup_pull_constants(). + */ +void +fs_visitor::setup_paramvalues_refs() +{ + /* Set up the pointers to ParamValues now that that array is finalized. */ + for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { + c->prog_data.param[i] = + fp->Base.Parameters->ParameterValues[this->param_index[i]] + + this->param_offset[i]; + } +} + void fs_visitor::assign_curb_setup() { @@ -2575,12 +2673,15 @@ fs_visitor::assign_urb_setup() foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); - if (inst->opcode != FS_OPCODE_LINTERP) - continue; - - assert(inst->src[2].file == FIXED_HW_REG); + if (inst->opcode == FS_OPCODE_LINTERP) { + assert(inst->src[2].file == FIXED_HW_REG); + inst->src[2].fixed_hw_reg.nr += urb_start; + } - inst->src[2].fixed_hw_reg.nr += urb_start; + if (inst->opcode == FS_OPCODE_CINTERP) { + assert(inst->src[0].file == FIXED_HW_REG); + inst->src[0].fixed_hw_reg.nr += urb_start; + } } this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length; @@ -2628,10 +2729,7 @@ fs_visitor::split_virtual_grfs() fs_inst *inst = (fs_inst *)iter.get(); /* Texturing produces 4 contiguous registers, so no splitting. */ - if ((inst->opcode == FS_OPCODE_TEX || - inst->opcode == FS_OPCODE_TXB || - inst->opcode == FS_OPCODE_TXL) && - inst->dst.file == GRF) { + if (inst->is_tex()) { split_grf[inst->dst.reg] = false; } } @@ -2671,6 +2769,7 @@ fs_visitor::split_virtual_grfs() } } } + this->live_intervals_valid = false; } /** @@ -2739,14 +2838,17 @@ void fs_visitor::calculate_live_intervals() { int num_vars = this->virtual_grf_next; - int *def = talloc_array(mem_ctx, int, num_vars); - int *use = talloc_array(mem_ctx, int, num_vars); + int *def = ralloc_array(mem_ctx, int, num_vars); + int *use = ralloc_array(mem_ctx, int, num_vars); int loop_depth = 0; int loop_start = 0; int bb_header_ip = 0; + if (this->live_intervals_valid) + return; + for (int i = 0; i < num_vars; i++) { - def[i] = 1 << 30; + def[i] = MAX_INSTRUCTION; use[i] = -1; } @@ -2820,10 +2922,12 @@ fs_visitor::calculate_live_intervals() } } - talloc_free(this->virtual_grf_def); - talloc_free(this->virtual_grf_use); + ralloc_free(this->virtual_grf_def); + ralloc_free(this->virtual_grf_use); this->virtual_grf_def = def; this->virtual_grf_use = use; + + this->live_intervals_valid = true; } /** @@ -2839,6 +2943,8 @@ fs_visitor::propagate_constants() { bool progress = false; + calculate_live_intervals(); + foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); @@ -2896,6 +3002,7 @@ fs_visitor::propagate_constants() /* Fit this constant in by commuting the operands */ scan_inst->src[0] = scan_inst->src[1]; scan_inst->src[1] = inst->src[0]; + progress = true; } break; case BRW_OPCODE_CMP: @@ -2910,12 +3017,15 @@ fs_visitor::propagate_constants() if (scan_inst->dst.file == GRF && scan_inst->dst.reg == inst->dst.reg && (scan_inst->dst.reg_offset == inst->dst.reg_offset || - scan_inst->opcode == FS_OPCODE_TEX)) { + scan_inst->is_tex())) { break; } } } + if (progress) + this->live_intervals_valid = false; + return progress; } /** @@ -2930,6 +3040,8 @@ fs_visitor::dead_code_eliminate() bool progress = false; int pc = 0; + calculate_live_intervals(); + foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); @@ -2941,6 +3053,9 @@ fs_visitor::dead_code_eliminate() pc++; } + if (progress) + live_intervals_valid = false; + return progress; } @@ -2948,10 +3063,35 @@ bool fs_visitor::register_coalesce() { bool progress = false; + int if_depth = 0; + int loop_depth = 0; foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); + /* Make sure that we dominate the instructions we're going to + * scan for interfering with our coalescing, or we won't have + * scanned enough to see if anything interferes with our + * coalescing. We don't dominate the following instructions if + * we're in a loop or an if block. + */ + switch (inst->opcode) { + case BRW_OPCODE_DO: + loop_depth++; + break; + case BRW_OPCODE_WHILE: + loop_depth--; + break; + case BRW_OPCODE_IF: + if_depth++; + break; + case BRW_OPCODE_ENDIF: + if_depth--; + break; + } + if (loop_depth || if_depth) + continue; + if (inst->opcode != BRW_OPCODE_MOV || inst->predicated || inst->saturate || @@ -2959,6 +3099,8 @@ fs_visitor::register_coalesce() inst->dst.type != inst->src[0].type) continue; + bool has_source_modifiers = inst->src[0].abs || inst->src[0].negate; + /* Found a move of a GRF to a GRF. Let's see if we can coalesce * them: check for no writes to either one until the exit of the * program. @@ -2969,37 +3111,33 @@ fs_visitor::register_coalesce() for (; scan_iter.has_next(); scan_iter.next()) { fs_inst *scan_inst = (fs_inst *)scan_iter.get(); - if (scan_inst->opcode == BRW_OPCODE_DO || - scan_inst->opcode == BRW_OPCODE_WHILE || - scan_inst->opcode == BRW_OPCODE_ENDIF) { - interfered = true; - iter = scan_iter; - break; - } - if (scan_inst->dst.file == GRF) { if (scan_inst->dst.reg == inst->dst.reg && (scan_inst->dst.reg_offset == inst->dst.reg_offset || - scan_inst->opcode == FS_OPCODE_TEX)) { + scan_inst->is_tex())) { interfered = true; break; } if (scan_inst->dst.reg == inst->src[0].reg && (scan_inst->dst.reg_offset == inst->src[0].reg_offset || - scan_inst->opcode == FS_OPCODE_TEX)) { + scan_inst->is_tex())) { interfered = true; break; } } + + /* The gen6 MATH instruction can't handle source modifiers, so avoid + * coalescing those for now. We should do something more specific. + */ + if (intel->gen == 6 && scan_inst->is_math() && has_source_modifiers) { + interfered = true; + break; + } } if (interfered) { continue; } - /* Update live interval so we don't have to recalculate. */ - this->virtual_grf_use[inst->src[0].reg] = MAX2(virtual_grf_use[inst->src[0].reg], - virtual_grf_use[inst->dst.reg]); - /* Rewrite the later usage to point at the source of the move to * be removed. */ @@ -3024,6 +3162,9 @@ fs_visitor::register_coalesce() progress = true; } + if (progress) + live_intervals_valid = false; + return progress; } @@ -3034,6 +3175,8 @@ fs_visitor::compute_to_mrf() bool progress = false; int next_ip = 0; + calculate_live_intervals(); + foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); @@ -3066,7 +3209,7 @@ fs_visitor::compute_to_mrf() * into a compute-to-MRF. */ - if (scan_inst->opcode == FS_OPCODE_TEX) { + if (scan_inst->is_tex()) { /* texturing writes several continuous regs, so we can't * compute-to-mrf that. */ @@ -3087,14 +3230,7 @@ fs_visitor::compute_to_mrf() /* gen6 math instructions must have the destination be * GRF, so no compute-to-MRF for them. */ - if (scan_inst->opcode == FS_OPCODE_RCP || - scan_inst->opcode == FS_OPCODE_RSQ || - scan_inst->opcode == FS_OPCODE_SQRT || - scan_inst->opcode == FS_OPCODE_EXP2 || - scan_inst->opcode == FS_OPCODE_LOG2 || - scan_inst->opcode == FS_OPCODE_SIN || - scan_inst->opcode == FS_OPCODE_COS || - scan_inst->opcode == FS_OPCODE_POW) { + if (scan_inst->is_math()) { break; } } @@ -3116,6 +3252,7 @@ fs_visitor::compute_to_mrf() */ if (scan_inst->opcode == BRW_OPCODE_DO || scan_inst->opcode == BRW_OPCODE_WHILE || + scan_inst->opcode == BRW_OPCODE_ELSE || scan_inst->opcode == BRW_OPCODE_ENDIF) { break; } @@ -3202,7 +3339,7 @@ fs_visitor::remove_duplicate_mrf_writes() } if (inst->mlen > 0) { - /* Found a SEND instruction, which will include two of fewer + /* Found a SEND instruction, which will include two or fewer * implied MRF writes. We could do better here. */ for (int i = 0; i < implied_mrf_writes(inst); i++) { @@ -3237,15 +3374,16 @@ fs_visitor::virtual_grf_interferes(int a, int b) int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]); int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]); - /* For dead code, just check if the def interferes with the other range. */ - if (this->virtual_grf_use[a] == -1) { - return (this->virtual_grf_def[a] >= this->virtual_grf_def[b] && - this->virtual_grf_def[a] < this->virtual_grf_use[b]); - } - if (this->virtual_grf_use[b] == -1) { - return (this->virtual_grf_def[b] >= this->virtual_grf_def[a] && - this->virtual_grf_def[b] < this->virtual_grf_use[a]); - } + /* We can't handle dead register writes here, without iterating + * over the whole instruction stream to find every single dead + * write to that register to compare to the live interval of the + * other register. Just assert that dead_code_eliminate() has been + * called. + */ + assert((this->virtual_grf_use[a] != -1 || + this->virtual_grf_def[a] == MAX_INSTRUCTION) && + (this->virtual_grf_use[b] != -1 || + this->virtual_grf_def[b] == MAX_INSTRUCTION)); return start < end; } @@ -3280,6 +3418,7 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) break; default: assert(!"not reached"); + brw_reg = brw_null_reg(); break; } break; @@ -3294,6 +3433,10 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) assert(!"not reached"); brw_reg = brw_null_reg(); break; + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; } if (reg->abs) brw_reg = brw_abs(brw_reg); @@ -3307,20 +3450,25 @@ void fs_visitor::generate_code() { int last_native_inst = 0; - struct brw_instruction *if_stack[16], *loop_stack[16]; - int if_stack_depth = 0, loop_stack_depth = 0; - int if_depth_in_loop[16]; const char *last_annotation_string = NULL; ir_instruction *last_annotation_ir = NULL; + int if_stack_array_size = 16; + int loop_stack_array_size = 16; + int if_stack_depth = 0, loop_stack_depth = 0; + brw_instruction **if_stack = + rzalloc_array(this->mem_ctx, brw_instruction *, if_stack_array_size); + brw_instruction **loop_stack = + rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size); + int *if_depth_in_loop = + rzalloc_array(this->mem_ctx, int, loop_stack_array_size); + + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { printf("Native code for fragment shader %d:\n", ctx->Shader.CurrentFragmentProgram->Name); } - if_depth_in_loop[loop_stack_depth] = 0; - - memset(&if_stack, 0, sizeof(if_stack)); foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); struct brw_reg src[3], dst; @@ -3404,7 +3552,6 @@ fs_visitor::generate_code() break; case BRW_OPCODE_IF: - assert(if_stack_depth < 16); if (inst->src[0].file != BAD_FILE) { assert(intel->gen >= 6); if_stack[if_stack_depth] = brw_IF_gen6(p, inst->conditional_mod, src[0], src[1]); @@ -3413,6 +3560,11 @@ fs_visitor::generate_code() } if_depth_in_loop[loop_stack_depth]++; if_stack_depth++; + if (if_stack_array_size <= if_stack_depth) { + if_stack_array_size *= 2; + if_stack = reralloc(this->mem_ctx, if_stack, brw_instruction *, + if_stack_array_size); + } break; case BRW_OPCODE_ELSE: @@ -3427,6 +3579,13 @@ fs_visitor::generate_code() case BRW_OPCODE_DO: loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); + if (loop_stack_array_size <= loop_stack_depth) { + loop_stack_array_size *= 2; + loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *, + loop_stack_array_size); + if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int, + loop_stack_array_size); + } if_depth_in_loop[loop_stack_depth] = 0; break; @@ -3480,13 +3639,17 @@ fs_visitor::generate_code() case FS_OPCODE_COS: generate_math(inst, dst, src); break; + case FS_OPCODE_CINTERP: + brw_MOV(p, dst, src[0]); + break; case FS_OPCODE_LINTERP: generate_linterp(inst, dst, src); break; case FS_OPCODE_TEX: case FS_OPCODE_TXB: + case FS_OPCODE_TXD: case FS_OPCODE_TXL: - generate_tex(inst, dst); + generate_tex(inst, dst, src[0]); break; case FS_OPCODE_DISCARD_NOT: generate_discard_not(inst, dst); @@ -3542,6 +3705,10 @@ fs_visitor::generate_code() last_native_inst = p->nr_insn; } + ralloc_free(if_stack); + ralloc_free(loop_stack); + ralloc_free(if_depth_in_loop); + brw_set_uip_jip(p); /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS @@ -3617,10 +3784,9 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) v.emit_fb_writes(); v.split_virtual_grfs(); - v.setup_pull_constants(); - v.assign_curb_setup(); - v.assign_urb_setup(); + v.setup_paramvalues_refs(); + v.setup_pull_constants(); bool progress; do { @@ -3628,20 +3794,23 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) progress = v.remove_duplicate_mrf_writes() || progress; - v.calculate_live_intervals(); progress = v.propagate_constants() || progress; progress = v.register_coalesce() || progress; progress = v.compute_to_mrf() || progress; progress = v.dead_code_eliminate() || progress; } while (progress); + v.schedule_instructions(); + + v.assign_curb_setup(); + v.assign_urb_setup(); + if (0) { /* Debug of register spilling: Go spill everything. */ int virtual_grf_count = v.virtual_grf_next; for (int i = 1; i < virtual_grf_count; i++) { v.spill_reg(i); } - v.calculate_live_intervals(); } if (0) @@ -3650,8 +3819,6 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) while (!v.assign_regs()) { if (v.fail) break; - - v.calculate_live_intervals(); } } } |