From 1d073cb2d920d1c0b8c6d598055b14048fedc96e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 30 Sep 2010 19:18:25 -0700 Subject: i965: Split the gen4 and gen5 sampler handling apart. Trying to track the insanity of the different argument layouts for normal/shadow crossed with normal/lod/bias one generation at a time is enough. Fixes: glsl1-texture2D() with bias. (first test passing in this code that doesn't pass without it!) --- src/mesa/drivers/dri/i965/brw_fs.cpp | 126 ++++++++++++++++++++++++++++------- 1 file changed, 103 insertions(+), 23 deletions(-) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index c49b27b0b5..78cdfed3fb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -469,6 +469,8 @@ public: void emit_fragcoord_interpolation(ir_variable *ir); void emit_general_interpolation(ir_variable *ir); void emit_interpolation_setup(); + fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, int base_mrf); + fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, int base_mrf); void emit_fb_writes(); void emit_assignment_writes(fs_reg &l, fs_reg &r, const glsl_type *type, bool predicated); @@ -1220,48 +1222,93 @@ fs_visitor::visit(ir_assignment *ir) } } -void -fs_visitor::visit(ir_texture *ir) +fs_inst * +fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, int base_mrf) { - int base_mrf = 2; - fs_inst *inst = NULL; - unsigned int mlen = 0; + /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ + int mlen = 3; - ir->coordinate->accept(this); - fs_reg coordinate = this->result; + if (ir->shadow_comparitor) { + if (ir->op == ir_tex) { + /* There's no plain shadow compare message, so we use shadow + * compare with a bias of 0.0. + */ + emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), + fs_reg(0.0f))); + mlen++; + } else if (ir->op == ir_txb) { + ir->lod_info.bias->accept(this); + emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), + this->result)); + mlen++; + } else { + assert(ir->op == ir_txl); + ir->lod_info.lod->accept(this); + emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), + this->result)); + mlen++; + } - /* Should be lowered by do_lower_texture_projection */ - assert(!ir->projector); + ir->shadow_comparitor->accept(this); + emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); + mlen++; + } else { + /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare sampler + * instructions. We'll need to do SIMD16 here. + */ + abort(); + } - for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) { - emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate)); - coordinate.reg_offset++; + fs_inst *inst = NULL; + switch (ir->op) { + case ir_tex: + inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf))); + break; + case ir_txb: + inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf))); + break; + case ir_txl: + inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf))); + break; + case ir_txd: + case ir_txf: + assert(!"GLSL 1.30 features unsupported"); + break; } + inst->mlen = mlen; - /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */ - if (intel->gen < 5) - mlen = 3; + return inst; +} + +fs_inst * +fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, int base_mrf) +{ + /* gen5's SIMD8 sampler has slots for u, v, r, array index, then + * optional parameters like shadow comparitor or LOD bias. If + * optional parameters aren't present, those base slots are + * optional and don't need to be included in the message. + * + * We don't fill in the unnecessary slots regardless, which may + * look surprising in the disassembly. + */ + int mlen = ir->coordinate->type->vector_elements; if (ir->shadow_comparitor) { - /* For shadow comparisons, we have to supply u,v,r. */ - mlen = 3; + mlen = MAX2(mlen, 4); ir->shadow_comparitor->accept(this); emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); mlen++; } - /* Do we ever want to handle writemasking on texture samples? Is it - * performance relevant? - */ - fs_reg dst = fs_reg(this, glsl_type::vec4_type); - + fs_inst *inst = NULL; switch (ir->op) { case ir_tex: inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf))); break; case ir_txb: ir->lod_info.bias->accept(this); + mlen = MAX2(mlen, 4); emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); mlen++; @@ -1269,6 +1316,7 @@ fs_visitor::visit(ir_texture *ir) break; case ir_txl: ir->lod_info.lod->accept(this); + mlen = MAX2(mlen, 4); emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); mlen++; @@ -1279,6 +1327,39 @@ fs_visitor::visit(ir_texture *ir) assert(!"GLSL 1.30 features unsupported"); break; } + inst->mlen = mlen; + + return inst; +} + +void +fs_visitor::visit(ir_texture *ir) +{ + int base_mrf = 2; + fs_inst *inst = NULL; + unsigned int mlen = 0; + + ir->coordinate->accept(this); + fs_reg coordinate = this->result; + + /* Should be lowered by do_lower_texture_projection */ + assert(!ir->projector); + + for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) { + emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate)); + coordinate.reg_offset++; + } + + /* Writemasking doesn't eliminate channels on SIMD8 texture + * samples, so don't worry about them. + */ + fs_reg dst = fs_reg(this, glsl_type::vec4_type); + + if (intel->gen < 5) { + inst = emit_texture_gen4(ir, dst, base_mrf); + } else { + inst = emit_texture_gen5(ir, dst, base_mrf); + } inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler, @@ -1290,7 +1371,6 @@ fs_visitor::visit(ir_texture *ir) if (ir->shadow_comparitor) inst->shadow_compare = true; - inst->mlen = mlen; } void -- cgit v1.2.3