From 1d073cb2d920d1c0b8c6d598055b14048fedc96e Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 30 Sep 2010 19:18:25 -0700
Subject: i965: Split the gen4 and gen5 sampler handling apart.

Trying to track the insanity of the different argument layouts for
normal/shadow crossed with normal/lod/bias one generation at a time is
enough.

Fixes: glsl1-texture2D() with bias.
(first test passing in this code that doesn't pass without it!)
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 126 ++++++++++++++++++++++++++++-------
 1 file changed, 103 insertions(+), 23 deletions(-)

(limited to 'src/mesa/drivers/dri/i965')

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index c49b27b0b5..78cdfed3fb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -469,6 +469,8 @@ public:
    void emit_fragcoord_interpolation(ir_variable *ir);
    void emit_general_interpolation(ir_variable *ir);
    void emit_interpolation_setup();
+   fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, int base_mrf);
+   fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, int base_mrf);
    void emit_fb_writes();
    void emit_assignment_writes(fs_reg &l, fs_reg &r,
 			       const glsl_type *type, bool predicated);
@@ -1220,48 +1222,93 @@ fs_visitor::visit(ir_assignment *ir)
    }
 }
 
-void
-fs_visitor::visit(ir_texture *ir)
+fs_inst *
+fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, int base_mrf)
 {
-   int base_mrf = 2;
-   fs_inst *inst = NULL;
-   unsigned int mlen = 0;
+   /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
+   int mlen = 3;
 
-   ir->coordinate->accept(this);
-   fs_reg coordinate = this->result;
+   if (ir->shadow_comparitor) {
+      if (ir->op == ir_tex) {
+	 /* There's no plain shadow compare message, so we use shadow
+	  * compare with a bias of 0.0.
+	  */
+	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
+		      fs_reg(0.0f)));
+	 mlen++;
+      } else if (ir->op == ir_txb) {
+	 ir->lod_info.bias->accept(this);
+	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
+		      this->result));
+	 mlen++;
+      } else {
+	 assert(ir->op == ir_txl);
+	 ir->lod_info.lod->accept(this);
+	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
+		      this->result));
+	 mlen++;
+      }
 
-   /* Should be lowered by do_lower_texture_projection */
-   assert(!ir->projector);
+      ir->shadow_comparitor->accept(this);
+      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
+      mlen++;
+   } else {
+      /* Oh joy.  gen4 doesn't have SIMD8 non-shadow-compare sampler
+       * instructions.  We'll need to do SIMD16 here.
+       */
+      abort();
+   }
 
-   for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
-      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate));
-      coordinate.reg_offset++;
+   fs_inst *inst = NULL;
+   switch (ir->op) {
+   case ir_tex:
+      inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf)));
+      break;
+   case ir_txb:
+      inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf)));
+      break;
+   case ir_txl:
+      inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf)));
+      break;
+   case ir_txd:
+   case ir_txf:
+      assert(!"GLSL 1.30 features unsupported");
+      break;
    }
+   inst->mlen = mlen;
 
-   /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
-   if (intel->gen < 5)
-      mlen = 3;
+   return inst;
+}
+
+fs_inst *
+fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, int base_mrf)
+{
+   /* gen5's SIMD8 sampler has slots for u, v, r, array index, then
+    * optional parameters like shadow comparitor or LOD bias.  If
+    * optional parameters aren't present, those base slots are
+    * optional and don't need to be included in the message.
+    *
+    * We don't fill in the unnecessary slots regardless, which may
+    * look surprising in the disassembly.
+    */
+   int mlen = ir->coordinate->type->vector_elements;
 
    if (ir->shadow_comparitor) {
-      /* For shadow comparisons, we have to supply u,v,r. */
-      mlen = 3;
+      mlen = MAX2(mlen, 4);
 
       ir->shadow_comparitor->accept(this);
       emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
       mlen++;
    }
 
-   /* Do we ever want to handle writemasking on texture samples?  Is it
-    * performance relevant?
-    */
-   fs_reg dst = fs_reg(this, glsl_type::vec4_type);
-
+   fs_inst *inst = NULL;
    switch (ir->op) {
    case ir_tex:
       inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf)));
       break;
    case ir_txb:
       ir->lod_info.bias->accept(this);
+      mlen = MAX2(mlen, 4);
       emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
       mlen++;
 
@@ -1269,6 +1316,7 @@ fs_visitor::visit(ir_texture *ir)
       break;
    case ir_txl:
       ir->lod_info.lod->accept(this);
+      mlen = MAX2(mlen, 4);
       emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
       mlen++;
 
@@ -1279,6 +1327,39 @@ fs_visitor::visit(ir_texture *ir)
       assert(!"GLSL 1.30 features unsupported");
       break;
    }
+   inst->mlen = mlen;
+
+   return inst;
+}
+
+void
+fs_visitor::visit(ir_texture *ir)
+{
+   int base_mrf = 2;
+   fs_inst *inst = NULL;
+   unsigned int mlen = 0;
+
+   ir->coordinate->accept(this);
+   fs_reg coordinate = this->result;
+
+   /* Should be lowered by do_lower_texture_projection */
+   assert(!ir->projector);
+
+   for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
+      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate));
+      coordinate.reg_offset++;
+   }
+
+   /* Writemasking doesn't eliminate channels on SIMD8 texture
+    * samples, so don't worry about them.
+    */
+   fs_reg dst = fs_reg(this, glsl_type::vec4_type);
+
+   if (intel->gen < 5) {
+      inst = emit_texture_gen4(ir, dst, base_mrf);
+   } else {
+      inst = emit_texture_gen5(ir, dst, base_mrf);
+   }
 
    inst->sampler =
       _mesa_get_sampler_uniform_value(ir->sampler,
@@ -1290,7 +1371,6 @@ fs_visitor::visit(ir_texture *ir)
 
    if (ir->shadow_comparitor)
       inst->shadow_compare = true;
-   inst->mlen = mlen;
 }
 
 void
-- 
cgit v1.2.3