From 9935fe705df44bb633039ca74332cc0c126ccc30 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Wed, 17 Nov 2010 13:59:17 -0800
Subject: glsl: Remove the ir_binop_cross opcode.

---
 src/mesa/program/ir_to_mesa.cpp | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'src/mesa/program')

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index f45bbf5582..0458625ab0 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1058,10 +1058,6 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
 			 ir->operands[0]->type->vector_elements);
       break;
 
-   case ir_binop_cross:
-      ir_to_mesa_emit_op2(ir, OPCODE_XPD, result_dst, op[0], op[1]);
-      break;
-
    case ir_unop_sqrt:
       /* sqrt(x) = x * rsq(x). */
       ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]);
-- 
cgit v1.2.3


From c05ccc1ebde177646ac09c1bd6d1b4719e745f82 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Thu, 18 Nov 2010 16:11:25 -0800
Subject: ir_to_mesa: Generate smarter code for some conditional moves

Condiation moves with a condition of (a < 0), (a > 0), (a <= 0), or (a
>= 0) can be generated with "a" directly as an operand of the CMP
instruction.  This doesn't help much now, but it will help with
assembly shaders that use the CMP instruction.
---
 src/mesa/program/ir_to_mesa.cpp | 111 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 99 insertions(+), 12 deletions(-)

(limited to 'src/mesa/program')

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 0458625ab0..870fd6f25e 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -285,6 +285,8 @@ public:
    GLboolean try_emit_mad(ir_expression *ir,
 			  int mul_operand);
 
+   bool process_move_condition(ir_rvalue *ir);
+
    void *mem_ctx;
 };
 
@@ -1326,6 +1328,93 @@ get_assignment_lhs(ir_dereference *ir, ir_to_mesa_visitor *v)
    return ir_to_mesa_dst_reg_from_src(v->result);
 }
 
+/**
+ * Process the condition of a conditional assignment
+ *
+ * Examines the condition of a conditional assignment to generate the optimal
+ * first operand of a \c CMP instruction.  If the condition is a relational
+ * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
+ * used as the source for the \c CMP instruction.  Otherwise the comparison
+ * is processed to a boolean result, and the boolean result is used as the
+ * operand to the CMP instruction.
+ */
+bool
+ir_to_mesa_visitor::process_move_condition(ir_rvalue *ir)
+{
+   ir_rvalue *src_ir = ir;
+   bool negate = true;
+   bool switch_order = false;
+
+   ir_expression *const expr = ir->as_expression();
+   if ((expr != NULL) && (expr->get_num_operands() == 2)) {
+      bool zero_on_left = false;
+
+      if (expr->operands[0]->is_zero()) {
+	 src_ir = expr->operands[1];
+	 zero_on_left = true;
+      } else if (expr->operands[1]->is_zero()) {
+	 src_ir = expr->operands[0];
+	 zero_on_left = false;
+      }
+
+      /*      a is -  0  +            -  0  +
+       * (a <  0)  T  F  F  ( a < 0)  T  F  F
+       * (0 <  a)  F  F  T  (-a < 0)  F  F  T
+       * (a <= 0)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
+       * (0 <= a)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
+       * (a >  0)  F  F  T  (-a < 0)  F  F  T
+       * (0 >  a)  T  F  F  ( a < 0)  T  F  F
+       * (a >= 0)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
+       * (0 >= a)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
+       *
+       * Note that exchanging the order of 0 and 'a' in the comparison simply
+       * means that the value of 'a' should be negated.
+       */
+      if (src_ir != ir) {
+	 switch (expr->operation) {
+	 case ir_binop_less:
+	    switch_order = false;
+	    negate = zero_on_left;
+	    break;
+
+	 case ir_binop_greater:
+	    switch_order = false;
+	    negate = !zero_on_left;
+	    break;
+
+	 case ir_binop_lequal:
+	    switch_order = true;
+	    negate = !zero_on_left;
+	    break;
+
+	 case ir_binop_gequal:
+	    switch_order = true;
+	    negate = zero_on_left;
+	    break;
+
+	 default:
+	    /* This isn't the right kind of comparison afterall, so make sure
+	     * the whole condition is visited.
+	     */
+	    src_ir = ir;
+	    break;
+	 }
+      }
+   }
+
+   src_ir->accept(this);
+
+   /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
+    * condition we produced is 0.0 or 1.0.  By flipping the sign, we can
+    * choose which value OPCODE_CMP produces without an extra instruction
+    * computing the condition.
+    */
+   if (negate)
+      this->result.negate = ~this->result.negate;
+
+   return switch_order;
+}
+
 void
 ir_to_mesa_visitor::visit(ir_assignment *ir)
 {
@@ -1385,20 +1474,18 @@ ir_to_mesa_visitor::visit(ir_assignment *ir)
    assert(r.file != PROGRAM_UNDEFINED);
 
    if (ir->condition) {
-      ir_to_mesa_src_reg condition;
-
-      ir->condition->accept(this);
-      condition = this->result;
+      const bool switch_order = this->process_move_condition(ir->condition);
+      ir_to_mesa_src_reg condition = this->result;
 
-      /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves,
-       * and the condition we produced is 0.0 or 1.0.  By flipping the
-       * sign, we can choose which value OPCODE_CMP produces without
-       * an extra computing the condition.
-       */
-      condition.negate = ~condition.negate;
       for (i = 0; i < type_size(ir->lhs->type); i++) {
-	 ir_to_mesa_emit_op3(ir, OPCODE_CMP, l,
-			     condition, r, ir_to_mesa_src_reg_from_dst(l));
+	 if (switch_order) {
+	    ir_to_mesa_emit_op3(ir, OPCODE_CMP, l,
+				condition, ir_to_mesa_src_reg_from_dst(l), r);
+	 } else {
+	    ir_to_mesa_emit_op3(ir, OPCODE_CMP, l,
+				condition, r, ir_to_mesa_src_reg_from_dst(l));
+	 }
+
 	 l.index++;
 	 r.index++;
       }
-- 
cgit v1.2.3


From f2616e56de8a48360cae8f269727b58490555f4d Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Thu, 18 Nov 2010 11:05:32 -0800
Subject: glsl: Add ir_unop_sin_reduced and ir_unop_cos_reduced

The operate just like ir_unop_sin and ir_unop_cos except that they
expect their inputs to be limited to the range [-pi, pi].  Several
GPUs require this limited range for their sine and cosine
instructions, so having these as operations (along with a to-be-written
lowering pass) helps this architectures.

These new operations also matche the semantics of the
GL_ARB_fragment_program SCS instruction.  Having these as operations
helps in generating GLSL IR directly from assembly fragment programs.
---
 src/glsl/ir.cpp                      |   2 +
 src/glsl/ir.h                        |   2 +
 src/glsl/ir_constant_expression.cpp  |   2 +
 src/glsl/ir_validate.cpp             |   2 +
 src/mesa/drivers/dri/i965/brw_fs.cpp |   2 +
 src/mesa/program/ir_to_mesa.cpp      | 110 +++++++++++++++++++++++++++++++++++
 6 files changed, 120 insertions(+)

(limited to 'src/mesa/program')

diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 574ef3e183..714826343c 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -239,6 +239,8 @@ static const char *const operator_strs[] = {
    "round_even",
    "sin",
    "cos",
+   "sin_reduced",
+   "cos_reduced",
    "dFdx",
    "dFdy",
    "noise",
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 3ea7301f47..2b94e63cc2 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -729,6 +729,8 @@ enum ir_expression_operation {
    /*@{*/
    ir_unop_sin,
    ir_unop_cos,
+   ir_unop_sin_reduced,    /**< Reduced range sin. [-pi, pi] */
+   ir_unop_cos_reduced,    /**< Reduced range cos. [-pi, pi] */
    /*@}*/
 
    /**
diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp
index 8a54fc78cc..45860b279f 100644
--- a/src/glsl/ir_constant_expression.cpp
+++ b/src/glsl/ir_constant_expression.cpp
@@ -216,6 +216,7 @@ ir_expression::constant_expression_value()
       break;
 
    case ir_unop_sin:
+   case ir_unop_sin_reduced:
       assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
       for (unsigned c = 0; c < op[0]->type->components(); c++) {
 	 data.f[c] = sinf(op[0]->value.f[c]);
@@ -223,6 +224,7 @@ ir_expression::constant_expression_value()
       break;
 
    case ir_unop_cos:
+   case ir_unop_cos_reduced:
       assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
       for (unsigned c = 0; c < op[0]->type->components(); c++) {
 	 data.f[c] = cosf(op[0]->value.f[c]);
diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
index 77f48968b8..2a066c1a27 100644
--- a/src/glsl/ir_validate.cpp
+++ b/src/glsl/ir_validate.cpp
@@ -273,6 +273,8 @@ ir_validate::visit_leave(ir_expression *ir)
    case ir_unop_fract:
    case ir_unop_sin:
    case ir_unop_cos:
+   case ir_unop_sin_reduced:
+   case ir_unop_cos_reduced:
    case ir_unop_dFdx:
    case ir_unop_dFdy:
       assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index ac795e0bda..164f89eace 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -778,9 +778,11 @@ fs_visitor::visit(ir_expression *ir)
       assert(!"not reached: should be handled by ir_explog_to_explog2");
       break;
    case ir_unop_sin:
+   case ir_unop_sin_reduced:
       emit_math(FS_OPCODE_SIN, this->result, op[0]);
       break;
    case ir_unop_cos:
+   case ir_unop_cos_reduced:
       emit_math(FS_OPCODE_COS, this->result, op[0]);
       break;
 
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 870fd6f25e..ef9f692f94 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -282,6 +282,10 @@ public:
 				   ir_to_mesa_src_reg src0,
 				   ir_to_mesa_src_reg src1);
 
+   void emit_scs(ir_instruction *ir, enum prog_opcode op,
+		 ir_to_mesa_dst_reg dst,
+		 const ir_to_mesa_src_reg &src);
+
    GLboolean try_emit_mad(ir_expression *ir,
 			  int mul_operand);
 
@@ -475,6 +479,10 @@ ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op2(ir_instruction *ir,
       GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
       GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
       for (j = i + 1; j < 4; j++) {
+	 /* If there is another enabled component in the destination that is
+	  * derived from the same inputs, generate its value on this pass as
+	  * well.
+	  */
 	 if (!(done_mask & (1 << j)) &&
 	     GET_SWZ(src0.swizzle, j) == src0_swiz &&
 	     GET_SWZ(src1.swizzle, j) == src1_swiz) {
@@ -508,6 +516,102 @@ ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
    ir_to_mesa_emit_scalar_op2(ir, op, dst, src0, undef);
 }
 
+/**
+ * Emit an OPCODE_SCS instruction
+ *
+ * The \c SCS opcode functions a bit differently than the other Mesa (or
+ * ARB_fragment_program) opcodes.  Instead of splatting its result across all
+ * four components of the destination, it writes one value to the \c x
+ * component and another value to the \c y component.
+ *
+ * \param ir        IR instruction being processed
+ * \param op        Either \c OPCODE_SIN or \c OPCODE_COS depending on which
+ *                  value is desired.
+ * \param dst       Destination register
+ * \param src       Source register
+ */
+void
+ir_to_mesa_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
+			     ir_to_mesa_dst_reg dst,
+			     const ir_to_mesa_src_reg &src)
+{
+   /* Vertex programs cannot use the SCS opcode.
+    */
+   if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
+      ir_to_mesa_emit_scalar_op1(ir, op, dst, src);
+      return;
+   }
+
+   const unsigned component = (op == OPCODE_SIN) ? 0 : 1;
+   const unsigned scs_mask = (1U << component);
+   int done_mask = ~dst.writemask;
+   ir_to_mesa_src_reg tmp;
+
+   assert(op == OPCODE_SIN || op == OPCODE_COS);
+
+   /* If there are compnents in the destination that differ from the component
+    * that will be written by the SCS instrution, we'll need a temporary.
+    */
+   if (scs_mask != unsigned(dst.writemask)) {
+      tmp = get_temp(glsl_type::vec4_type);
+   }
+
+   for (unsigned i = 0; i < 4; i++) {
+      unsigned this_mask = (1U << i);
+      ir_to_mesa_src_reg src0 = src;
+
+      if ((done_mask & this_mask) != 0)
+	 continue;
+
+      /* The source swizzle specified which component of the source generates
+       * sine / cosine for the current component in the destination.  The SCS
+       * instruction requires that this value be swizzle to the X component.
+       * Replace the current swizzle with a swizzle that puts the source in
+       * the X component.
+       */
+      unsigned src0_swiz = GET_SWZ(src.swizzle, i);
+
+      src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
+				   src0_swiz, src0_swiz);
+      for (unsigned j = i + 1; j < 4; j++) {
+	 /* If there is another enabled component in the destination that is
+	  * derived from the same inputs, generate its value on this pass as
+	  * well.
+	  */
+	 if (!(done_mask & (1 << j)) &&
+	     GET_SWZ(src0.swizzle, j) == src0_swiz) {
+	    this_mask |= (1 << j);
+	 }
+      }
+
+      if (this_mask != scs_mask) {
+	 ir_to_mesa_instruction *inst;
+	 ir_to_mesa_dst_reg tmp_dst = ir_to_mesa_dst_reg_from_src(tmp);
+
+	 /* Emit the SCS instruction.
+	  */
+	 inst = ir_to_mesa_emit_op1(ir, OPCODE_SCS, tmp_dst, src0);
+	 inst->dst_reg.writemask = scs_mask;
+
+	 /* Move the result of the SCS instruction to the desired location in
+	  * the destination.
+	  */
+	 tmp.swizzle = MAKE_SWIZZLE4(component, component,
+				     component, component);
+	 inst = ir_to_mesa_emit_op1(ir, OPCODE_SCS, dst, tmp);
+	 inst->dst_reg.writemask = this_mask;
+      } else {
+	 /* Emit the SCS instruction to write directly to the destination.
+	  */
+	 ir_to_mesa_instruction *inst =
+	    ir_to_mesa_emit_op1(ir, OPCODE_SCS, dst, src0);
+	 inst->dst_reg.writemask = scs_mask;
+      }
+
+      done_mask |= this_mask;
+   }
+}
+
 struct ir_to_mesa_src_reg
 ir_to_mesa_visitor::src_reg_for_float(float val)
 {
@@ -942,6 +1046,12 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
    case ir_unop_cos:
       ir_to_mesa_emit_scalar_op1(ir, OPCODE_COS, result_dst, op[0]);
       break;
+   case ir_unop_sin_reduced:
+      emit_scs(ir, OPCODE_SIN, result_dst, op[0]);
+      break;
+   case ir_unop_cos_reduced:
+      emit_scs(ir, OPCODE_COS, result_dst, op[0]);
+      break;
 
    case ir_unop_dFdx:
       ir_to_mesa_emit_op1(ir, OPCODE_DDX, result_dst, op[0]);
-- 
cgit v1.2.3


From fc92e87b9757eda01caf0bb3e2c31b1dbbd73aa0 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 10 Nov 2010 16:33:10 -0800
Subject: glsl: Eliminate assumptions about size of ir_expression::operands

This may grow in the near future.
---
 src/glsl/ir_clone.cpp                | 3 ++-
 src/glsl/ir_constant_expression.cpp  | 2 +-
 src/glsl/ir_print_visitor.cpp        | 7 +++----
 src/glsl/lower_mat_op_to_vec.cpp     | 2 ++
 src/glsl/opt_algebraic.cpp           | 1 +
 src/mesa/drivers/dri/i965/brw_fs.cpp | 3 +++
 src/mesa/program/ir_to_mesa.cpp      | 2 +-
 7 files changed, 13 insertions(+), 7 deletions(-)

(limited to 'src/mesa/program')

diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp
index a3cc8dbc97..4032647c3a 100644
--- a/src/glsl/ir_clone.cpp
+++ b/src/glsl/ir_clone.cpp
@@ -22,6 +22,7 @@
  */
 
 #include <string.h>
+#include "main/compiler.h"
 #include "ir.h"
 #include "glsl_types.h"
 extern "C" {
@@ -160,7 +161,7 @@ ir_call::clone(void *mem_ctx, struct hash_table *ht) const
 ir_expression *
 ir_expression::clone(void *mem_ctx, struct hash_table *ht) const
 {
-   ir_rvalue *op[2] = {NULL, NULL};
+   ir_rvalue *op[Elements(this->operands)] = { NULL, };
    unsigned int i;
 
    for (i = 0; i < get_num_operands(); i++) {
diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp
index 45860b279f..1fe1505047 100644
--- a/src/glsl/ir_constant_expression.cpp
+++ b/src/glsl/ir_constant_expression.cpp
@@ -57,7 +57,7 @@ ir_expression::constant_expression_value()
    if (this->type->is_error())
       return NULL;
 
-   ir_constant *op[2] = { NULL, NULL };
+   ir_constant *op[Elements(this->operands)] = { NULL, };
    ir_constant_data data;
 
    memset(&data, 0, sizeof(data));
diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp
index 5c19db1326..e5067bfdad 100644
--- a/src/glsl/ir_print_visitor.cpp
+++ b/src/glsl/ir_print_visitor.cpp
@@ -182,11 +182,10 @@ void ir_print_visitor::visit(ir_expression *ir)
 
    printf(" %s ", ir->operator_string());
 
-   if (ir->operands[0])
-      ir->operands[0]->accept(this);
+   for (unsigned i = 0; i < ir->get_num_operands(); i++) {
+      ir->operands[i]->accept(this);
+   }
 
-   if (ir->operands[1])
-      ir->operands[1]->accept(this);
    printf(") ");
 }
 
diff --git a/src/glsl/lower_mat_op_to_vec.cpp b/src/glsl/lower_mat_op_to_vec.cpp
index 4965df8976..7065fdec35 100644
--- a/src/glsl/lower_mat_op_to_vec.cpp
+++ b/src/glsl/lower_mat_op_to_vec.cpp
@@ -366,6 +366,8 @@ ir_mat_op_to_vec_visitor::visit_leave(ir_assignment *orig_assign)
    if (!has_matrix_operand(orig_expr, matrix_columns))
       return visit_continue;
 
+   assert(orig_expr->get_num_operands() <= 2);
+
    mem_ctx = talloc_parent(orig_assign);
 
    ir_dereference_variable *lhs_deref =
diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp
index 88b6c485d3..9a8080bff3 100644
--- a/src/glsl/opt_algebraic.cpp
+++ b/src/glsl/opt_algebraic.cpp
@@ -181,6 +181,7 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
    ir_expression *temp;
    unsigned int i;
 
+   assert(ir->get_num_operands() <= 2);
    for (i = 0; i < ir->get_num_operands(); i++) {
       if (ir->operands[i]->type->is_matrix())
 	 return ir;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 164f89eace..105327c7fe 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -708,6 +708,7 @@ fs_visitor::visit(ir_expression *ir)
    fs_reg op[2], temp;
    fs_inst *inst;
 
+   assert(ir->get_num_operands() <= 2);
    for (operand = 0; operand < ir->get_num_operands(); operand++) {
       ir->operands[operand]->accept(this);
       if (this->result.file == BAD_FILE) {
@@ -1387,6 +1388,7 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
       fs_reg op[2];
       fs_inst *inst;
 
+      assert(expr->get_num_operands() <= 2);
       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 	 assert(expr->operands[i]->type->is_scalar());
 
@@ -1494,6 +1496,7 @@ fs_visitor::emit_if_gen6(ir_if *ir)
       fs_inst *inst;
       fs_reg temp;
 
+      assert(expr->get_num_operands() <= 2);
       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 	 assert(expr->operands[i]->type->is_scalar());
 
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index ef9f692f94..1b5337e92e 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -961,7 +961,7 @@ void
 ir_to_mesa_visitor::visit(ir_expression *ir)
 {
    unsigned int operand;
-   struct ir_to_mesa_src_reg op[2];
+   struct ir_to_mesa_src_reg op[Elements(ir->operands)];
    struct ir_to_mesa_src_reg result_src;
    struct ir_to_mesa_dst_reg result_dst;
 
-- 
cgit v1.2.3


From 11d6f1c69871d0b7edc28f639256460839fccd2d Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Tue, 16 Nov 2010 12:01:42 -0800
Subject: glsl: Add ir_quadop_vector expression

The vector operator collects 2, 3, or 4 scalar components into a
vector.  Doing this has several advantages.  First, it will make
ud-chain tracking for components of vectors much easier.  Second, a
later optimization pass could collect scalars into vectors to allow
generation of SWZ instructions (or similar as operands to other
instructions on R200 and i915).  It also enables an easy way to
generate IR for SWZ instructions in the ARB_vertex_program assembler.
---
 src/glsl/Makefile                    |   1 +
 src/glsl/ir.cpp                      |  25 +++-
 src/glsl/ir.h                        |  14 ++-
 src/glsl/ir_clone.cpp                |   3 +-
 src/glsl/ir_constant_expression.cpp  |  18 +++
 src/glsl/ir_optimization.h           |   1 +
 src/glsl/ir_validate.cpp             |  45 +++++++
 src/glsl/lower_vector.cpp            | 224 +++++++++++++++++++++++++++++++++++
 src/glsl/opt_algebraic.cpp           |   2 +-
 src/mesa/drivers/dri/i965/brw_fs.cpp |   1 +
 src/mesa/program/ir_to_mesa.cpp      | 132 +++++++++++++++++++++
 11 files changed, 460 insertions(+), 6 deletions(-)
 create mode 100644 src/glsl/lower_vector.cpp

(limited to 'src/mesa/program')

diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index 62984f81c0..ea4d6be5a4 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -64,6 +64,7 @@ CXX_SOURCES = \
 	lower_variable_index_to_cond_assign.cpp \
 	lower_vec_index_to_cond_assign.cpp \
 	lower_vec_index_to_swizzle.cpp \
+	lower_vector.cpp \
 	opt_algebraic.cpp \
 	opt_constant_folding.cpp \
 	opt_constant_propagation.cpp \
diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 1f5e2ebdcb..741e3cb177 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -200,18 +200,35 @@ ir_expression::ir_expression(int op, const struct glsl_type *type,
    this->operation = ir_expression_operation(op);
    this->operands[0] = op0;
    this->operands[1] = NULL;
+   this->operands[2] = NULL;
+   this->operands[3] = NULL;
 }
 
 ir_expression::ir_expression(int op, const struct glsl_type *type,
 			     ir_rvalue *op0, ir_rvalue *op1)
 {
-   assert((op1 == NULL) && (get_num_operands(ir_expression_operation(op)) == 1)
+   assert(((op1 == NULL) && (get_num_operands(ir_expression_operation(op)) == 1))
 	  || (get_num_operands(ir_expression_operation(op)) == 2));
    this->ir_type = ir_type_expression;
    this->type = type;
    this->operation = ir_expression_operation(op);
    this->operands[0] = op0;
    this->operands[1] = op1;
+   this->operands[2] = NULL;
+   this->operands[3] = NULL;
+}
+
+ir_expression::ir_expression(int op, const struct glsl_type *type,
+			     ir_rvalue *op0, ir_rvalue *op1,
+			     ir_rvalue *op2, ir_rvalue *op3)
+{
+   this->ir_type = ir_type_expression;
+   this->type = type;
+   this->operation = ir_expression_operation(op);
+   this->operands[0] = op0;
+   this->operands[1] = op1;
+   this->operands[2] = op2;
+   this->operands[3] = op3;
 }
 
 unsigned int
@@ -225,6 +242,9 @@ ir_expression::get_num_operands(ir_expression_operation op)
    if (op <= ir_last_binop)
       return 2;
 
+   if (op == ir_quadop_vector)
+      return 4;
+
    assert(false);
    return 0;
 }
@@ -287,12 +307,13 @@ static const char *const operator_strs[] = {
    "min",
    "max",
    "pow",
+   "vector",
 };
 
 const char *ir_expression::operator_string(ir_expression_operation op)
 {
    assert((unsigned int) op < Elements(operator_strs));
-   assert(Elements(operator_strs) == (ir_binop_pow + 1));
+   assert(Elements(operator_strs) == (ir_quadop_vector + 1));
    return operator_strs[op];
 }
 
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 99fdaa3b09..be0da07b3b 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -33,6 +33,7 @@ extern "C" {
 #include <talloc.h>
 }
 
+#include "glsl_types.h"
 #include "list.h"
 #include "ir_visitor.h"
 #include "ir_hierarchical_visitor.h"
@@ -824,6 +825,8 @@ enum ir_expression_operation {
     */
    ir_last_binop = ir_binop_pow,
 
+   ir_quadop_vector,
+
    /**
     * A sentinel marking the last of all operations.
     */
@@ -843,6 +846,12 @@ public:
    ir_expression(int op, const struct glsl_type *type,
 		 ir_rvalue *, ir_rvalue *);
 
+   /**
+    * Constructor for quad operator expressions
+    */
+   ir_expression(int op, const struct glsl_type *type,
+		 ir_rvalue *, ir_rvalue *, ir_rvalue *, ir_rvalue *);
+
    virtual ir_expression *as_expression()
    {
       return this;
@@ -868,7 +877,8 @@ public:
     */
    unsigned int get_num_operands() const
    {
-      return get_num_operands(operation);
+      return (this->operation == ir_quadop_vector)
+	 ? this->type->vector_elements : get_num_operands(operation);
    }
 
    /**
@@ -895,7 +905,7 @@ public:
    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
 
    ir_expression_operation operation;
-   ir_rvalue *operands[2];
+   ir_rvalue *operands[4];
 };
 
 
diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp
index 4032647c3a..325f606615 100644
--- a/src/glsl/ir_clone.cpp
+++ b/src/glsl/ir_clone.cpp
@@ -168,7 +168,8 @@ ir_expression::clone(void *mem_ctx, struct hash_table *ht) const
       op[i] = this->operands[i]->clone(mem_ctx, ht);
    }
 
-   return new(mem_ctx) ir_expression(this->operation, this->type, op[0], op[1]);
+   return new(mem_ctx) ir_expression(this->operation, this->type,
+				     op[0], op[1], op[2], op[3]);
 }
 
 ir_dereference_variable *
diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp
index 1fe1505047..4fd6d09a3a 100644
--- a/src/glsl/ir_constant_expression.cpp
+++ b/src/glsl/ir_constant_expression.cpp
@@ -788,6 +788,24 @@ ir_expression::constant_expression_value()
       }
       break;
 
+   case ir_quadop_vector:
+      for (unsigned c = 0; c < this->type->vector_elements; c++) {
+	 switch (this->type->base_type) {
+	 case GLSL_TYPE_INT:
+	    data.i[c] = op[c]->value.i[0];
+	    break;
+	 case GLSL_TYPE_UINT:
+	    data.u[c] = op[c]->value.u[0];
+	    break;
+	 case GLSL_TYPE_FLOAT:
+	    data.f[c] = op[c]->value.f[0];
+	    break;
+	 default:
+	    assert(0);
+	 }
+      }
+      break;
+
    default:
       /* FINISHME: Should handle all expression types. */
       return NULL;
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index ffdc66b9f7..1f8da16bcb 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -59,4 +59,5 @@ bool do_vec_index_to_swizzle(exec_list *instructions);
 bool lower_noise(exec_list *instructions);
 bool lower_variable_index_to_cond_assign(exec_list *instructions,
     bool lower_input, bool lower_output, bool lower_temp, bool lower_uniform);
+bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz);
 bool optimize_redundant_jumps(exec_list *instructions);
diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
index 2a066c1a27..5b055f64d3 100644
--- a/src/glsl/ir_validate.cpp
+++ b/src/glsl/ir_validate.cpp
@@ -374,6 +374,51 @@ ir_validate::visit_leave(ir_expression *ir)
       assert(ir->operands[0]->type->is_vector());
       assert(ir->operands[0]->type == ir->operands[1]->type);
       break;
+
+   case ir_quadop_vector:
+      /* The vector operator collects some number of scalars and generates a
+       * vector from them.
+       *
+       *  - All of the operands must be scalar.
+       *  - Number of operands must matche the size of the resulting vector.
+       *  - Base type of the operands must match the base type of the result.
+       */
+      assert(ir->type->is_vector());
+      switch (ir->type->vector_elements) {
+      case 2:
+	 assert(ir->operands[0]->type->is_scalar());
+	 assert(ir->operands[0]->type->base_type == ir->type->base_type);
+	 assert(ir->operands[1]->type->is_scalar());
+	 assert(ir->operands[1]->type->base_type == ir->type->base_type);
+	 assert(ir->operands[2] == NULL);
+	 assert(ir->operands[3] == NULL);
+	 break;
+      case 3:
+	 assert(ir->operands[0]->type->is_scalar());
+	 assert(ir->operands[0]->type->base_type == ir->type->base_type);
+	 assert(ir->operands[1]->type->is_scalar());
+	 assert(ir->operands[1]->type->base_type == ir->type->base_type);
+	 assert(ir->operands[2]->type->is_scalar());
+	 assert(ir->operands[2]->type->base_type == ir->type->base_type);
+	 assert(ir->operands[3] == NULL);
+	 break;
+      case 4:
+	 assert(ir->operands[0]->type->is_scalar());
+	 assert(ir->operands[0]->type->base_type == ir->type->base_type);
+	 assert(ir->operands[1]->type->is_scalar());
+	 assert(ir->operands[1]->type->base_type == ir->type->base_type);
+	 assert(ir->operands[2]->type->is_scalar());
+	 assert(ir->operands[2]->type->base_type == ir->type->base_type);
+	 assert(ir->operands[3]->type->is_scalar());
+	 assert(ir->operands[3]->type->base_type == ir->type->base_type);
+	 break;
+      default:
+	 /* The is_vector assertion above should prevent execution from ever
+	  * getting here.
+	  */
+	 assert(!"Should not get here.");
+	 break;
+      }
    }
 
    return visit_continue;
diff --git a/src/glsl/lower_vector.cpp b/src/glsl/lower_vector.cpp
new file mode 100644
index 0000000000..ae50120100
--- /dev/null
+++ b/src/glsl/lower_vector.cpp
@@ -0,0 +1,224 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_vector.cpp
+ * IR lowering pass to remove some types of ir_quadop_vector
+ *
+ * \author Ian Romanick <ian.d.romanick@intel.com>
+ */
+
+#include "ir.h"
+#include "ir_rvalue_visitor.h"
+
+class lower_vector_visitor : public ir_rvalue_visitor {
+public:
+   lower_vector_visitor() : progress(false)
+   {
+      /* empty */
+   }
+
+   void handle_rvalue(ir_rvalue **rvalue);
+
+   /**
+    * Should SWZ-like expressions be lowered?
+    */
+   bool dont_lower_swz;
+
+   bool progress;
+};
+
+/**
+ * Determine if an IR expression tree looks like an extended swizzle
+ *
+ * Extended swizzles consist of access of a single vector source (with possible
+ * per component negation) and the constants -1, 0, or 1.
+ */
+bool
+is_extended_swizzle(ir_expression *ir)
+{
+   /* Track any variables that are accessed by this expression.
+    */
+   ir_variable *var = NULL;
+
+   assert(ir->operation == ir_quadop_vector);
+
+   for (unsigned i = 0; i < ir->type->vector_elements; i++) {
+      ir_rvalue *op = ir->operands[i];
+
+      while (op != NULL) {
+	 switch (op->ir_type) {
+	 case ir_type_constant: {
+	    const ir_constant *const c = op->as_constant();
+
+	    if (!c->is_one() && !c->is_zero() && !c->is_negative_one())
+	       return false;
+
+	    op = NULL;
+	    break;
+	 }
+
+	 case ir_type_dereference_variable: {
+	    ir_dereference_variable *const d = (ir_dereference_variable *) op;
+
+	    if ((var != NULL) && (var != d->var))
+	       return false;
+
+	    var = d->var;
+	    op = NULL;
+	    break;
+	 }
+
+	 case ir_type_expression: {
+	    ir_expression *const ex = (ir_expression *) op;
+
+	    if (ex->operation != ir_unop_neg)
+	       return false;
+
+	    op = ex->operands[0];
+	    break;
+	 }
+
+	 case ir_type_swizzle:
+	    op = ((ir_swizzle *) op)->val;
+	    break;
+
+	 default:
+	    return false;
+	 }
+      }
+   }
+
+   return true;
+}
+
+void
+lower_vector_visitor::handle_rvalue(ir_rvalue **rvalue)
+{
+   if (!*rvalue)
+      return;
+
+   ir_expression *expr = (*rvalue)->as_expression();
+   if ((expr == NULL) || (expr->operation != ir_quadop_vector))
+      return;
+
+   if (this->dont_lower_swz && is_extended_swizzle(expr))
+      return;
+
+   /* FINISHME: Is this the right thing to use for the talloc context?
+    */
+   void *const mem_ctx = expr;
+
+   assert(expr->type->vector_elements == expr->get_num_operands());
+
+   /* Generate a temporary with the same type as the ir_quadop_operation.
+    */
+   ir_variable *const temp =
+      new(mem_ctx) ir_variable(expr->type, "vecop_tmp", ir_var_temporary);
+
+   this->base_ir->insert_before(temp);
+
+   /* Counter of the number of components collected so far.
+    */
+   unsigned assigned;
+
+   /* Write-mask in the destination that receives counted by 'assigned'.
+    */
+   unsigned write_mask;
+
+
+   /* Generate upto four assignments to that variable.  Try to group component
+    * assignments together:
+    *
+    * - All constant components can be assigned at once.
+    * - All assigments of components from a single variable with the same
+    *   unary operator can be assigned at once.
+    */
+   ir_constant_data d = { { 0 } };
+
+   assigned = 0;
+   write_mask = 0;
+   for (unsigned i = 0; i < expr->type->vector_elements; i++) {
+      const ir_constant *const c = expr->operands[i]->as_constant();
+
+      if (c == NULL)
+	 continue;
+
+      switch (expr->type->base_type) {
+      case GLSL_TYPE_UINT:  d.u[assigned] = c->value.u[0]; break;
+      case GLSL_TYPE_INT:   d.i[assigned] = c->value.i[0]; break;
+      case GLSL_TYPE_FLOAT: d.f[assigned] = c->value.f[0]; break;
+      case GLSL_TYPE_BOOL:  d.b[assigned] = c->value.b[0]; break;
+      defatul:              assert(!"Should not get here."); break;
+      }
+
+      write_mask |= (1U << i);
+      assigned++;
+   }
+
+   assert((write_mask == 0) == (assigned == 0));
+
+   /* If there were constant values, generate an assignment.
+    */
+   if (assigned > 0) {
+      ir_constant *const c =
+	 new(mem_ctx) ir_constant(glsl_type::get_instance(expr->type->base_type,
+							  assigned, 0),
+				  &d);
+      ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp);
+      ir_assignment *const assign =
+	 new(mem_ctx) ir_assignment(lhs, c, NULL, write_mask);
+
+      this->base_ir->insert_before(assign);
+   }
+
+   /* FINISHME: This should try to coalesce assignments.
+    */
+   for (unsigned i = 0; i < expr->type->vector_elements; i++) {
+      if (expr->operands[i]->ir_type == ir_type_constant)
+	 continue;
+
+      ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp);
+      ir_assignment *const assign =
+	 new(mem_ctx) ir_assignment(lhs, expr->operands[i], NULL, (1U << i));
+
+      this->base_ir->insert_before(assign);
+      assigned++;
+   }
+
+   assert(assigned == expr->type->vector_elements);
+
+   *rvalue = new(mem_ctx) ir_dereference_variable(temp);
+   this->progress = true;
+}
+
+bool
+lower_quadop_vector(exec_list *instructions, bool dont_lower_swz)
+{
+   lower_vector_visitor v;
+
+   v.dont_lower_swz = dont_lower_swz;
+   visit_list_elements(&v, instructions);
+
+   return v.progress;
+}
diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp
index 9a8080bff3..3c9af85f31 100644
--- a/src/glsl/opt_algebraic.cpp
+++ b/src/glsl/opt_algebraic.cpp
@@ -394,7 +394,7 @@ ir_algebraic_visitor::handle_rvalue(ir_rvalue **rvalue)
       return;
 
    ir_expression *expr = (*rvalue)->as_expression();
-   if (!expr)
+   if (!expr || expr->operation == ir_quadop_vector)
       return;
 
    *rvalue = handle_expression(expr);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 105327c7fe..e1cb94452d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -130,6 +130,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 						GL_TRUE, /* temp */
 						GL_TRUE /* uniform */
 						) || progress;
+	 progress = lower_quadop_vector(shader->ir, false) || progress;
       } while (progress);
 
       validate_ir_tree(shader->ir);
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 1b5337e92e..1cb8183042 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -289,6 +289,8 @@ public:
    GLboolean try_emit_mad(ir_expression *ir,
 			  int mul_operand);
 
+   void emit_swz(ir_expression *ir);
+
    bool process_move_condition(ir_rvalue *ir);
 
    void *mem_ctx;
@@ -957,6 +959,123 @@ ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
    (*num_reladdr)--;
 }
 
+void
+ir_to_mesa_visitor::emit_swz(ir_expression *ir)
+{
+   /* Assume that the vector operator is in a form compatible with OPCODE_SWZ.
+    * This means that each of the operands is either an immediate value of -1,
+    * 0, or 1, or is a component from one source register (possibly with
+    * negation).
+    */
+   uint8_t components[4] = { 0 };
+   bool negate[4] = { false };
+   ir_variable *var = NULL;
+
+   for (unsigned i = 0; i < ir->type->vector_elements; i++) {
+      ir_rvalue *op = ir->operands[i];
+
+      assert(op->type->is_scalar());
+
+      while (op != NULL) {
+	 switch (op->ir_type) {
+	 case ir_type_constant: {
+
+	    assert(op->type->is_scalar());
+
+	    const ir_constant *const c = op->as_constant();
+	    if (c->is_one()) {
+	       components[i] = SWIZZLE_ONE;
+	    } else if (c->is_zero()) {
+	       components[i] = SWIZZLE_ZERO;
+	    } else if (c->is_negative_one()) {
+	       components[i] = SWIZZLE_ONE;
+	       negate[i] = true;
+	    } else {
+	       assert(!"SWZ constant must be 0.0 or 1.0.");
+	    }
+
+	    op = NULL;
+	    break;
+	 }
+
+	 case ir_type_dereference_variable: {
+	    ir_dereference_variable *const deref =
+	       (ir_dereference_variable *) op;
+
+	    assert((var == NULL) || (deref->var == var));
+	    components[i] = SWIZZLE_X;
+	    var = deref->var;
+	    op = NULL;
+	    break;
+	 }
+
+	 case ir_type_expression: {
+	    ir_expression *const expr = (ir_expression *) op;
+
+	    assert(expr->operation == ir_unop_neg);
+	    negate[i] = true;
+
+	    op = expr->operands[0];
+	    break;
+	 }
+
+	 case ir_type_swizzle: {
+	    ir_swizzle *const swiz = (ir_swizzle *) op;
+
+	    components[i] = swiz->mask.x;
+	    op = swiz->val;
+	    break;
+	 }
+
+	 default:
+	    assert(!"Should not get here.");
+	    return;
+	 }
+      }
+   }
+
+   assert(var != NULL);
+
+   ir_dereference_variable *const deref =
+      new(mem_ctx) ir_dereference_variable(var);
+
+   this->result.file = PROGRAM_UNDEFINED;
+   deref->accept(this);
+   if (this->result.file == PROGRAM_UNDEFINED) {
+      ir_print_visitor v;
+      printf("Failed to get tree for expression operand:\n");
+      deref->accept(&v);
+      exit(1);
+   }
+
+   ir_to_mesa_src_reg src;
+
+   src = this->result;
+   src.swizzle = MAKE_SWIZZLE4(components[0],
+			       components[1],
+			       components[2],
+			       components[3]);
+   src.negate = ((unsigned(negate[0]) << 0)
+		 | (unsigned(negate[1]) << 1)
+		 | (unsigned(negate[2]) << 2)
+		 | (unsigned(negate[3]) << 3));
+
+   /* Storage for our result.  Ideally for an assignment we'd be using the
+    * actual storage for the result here, instead.
+    */
+   const ir_to_mesa_src_reg result_src = get_temp(ir->type);
+   ir_to_mesa_dst_reg result_dst = ir_to_mesa_dst_reg_from_src(result_src);
+
+   /* Limit writes to the channels that will be used by result_src later.
+    * This does limit this temp's use as a temporary for multi-instruction
+    * sequences.
+    */
+   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+
+   ir_to_mesa_emit_op1(ir, OPCODE_SWZ, result_dst, src);
+   this->result = result_src;
+}
+
 void
 ir_to_mesa_visitor::visit(ir_expression *ir)
 {
@@ -974,6 +1093,11 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
 	 return;
    }
 
+   if (ir->operation == ir_quadop_vector) {
+      this->emit_swz(ir);
+      return;
+   }
+
    for (operand = 0; operand < ir->get_num_operands(); operand++) {
       this->result.file = PROGRAM_UNDEFINED;
       ir->operands[operand]->accept(this);
@@ -1231,6 +1355,12 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
    case ir_unop_round_even:
       assert(!"GLSL 1.30 features unsupported");
       break;
+
+   case ir_quadop_vector:
+      /* This operation should have already been handled.
+       */
+      assert(!"Should not get here.");
+      break;
    }
 
    this->result = result_src;
@@ -2676,6 +2806,8 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 
 	 progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress;
 
+	 progress = lower_quadop_vector(ir, true) || progress;
+
 	 if (options->EmitNoIfs)
 	    progress = do_if_to_cond_assign(ir) || progress;
 
-- 
cgit v1.2.3


From 63684a9ae7a66f68df1f2c68cd9358e5622122a3 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 18 Nov 2010 17:54:07 -0800
Subject: glsl: Combine many instruction lowering passes into one.

This should save on the overhead of tree-walking and provide a
convenient place to add more instruction lowering in the future.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/glsl/Makefile                    |   5 +-
 src/glsl/SConscript                  |   5 +-
 src/glsl/glsl_parser_extras.cpp      |   2 +-
 src/glsl/ir_optimization.h           |  10 +-
 src/glsl/lower_div_to_mul_rcp.cpp    | 114 ---------------
 src/glsl/lower_explog_to_explog2.cpp |  85 ------------
 src/glsl/lower_instructions.cpp      | 262 +++++++++++++++++++++++++++++++++++
 src/glsl/lower_mod_to_fract.cpp      |  90 ------------
 src/glsl/lower_sub_to_add_neg.cpp    |  76 ----------
 src/mesa/drivers/dri/i965/brw_fs.cpp |  10 +-
 src/mesa/program/ir_to_mesa.cpp      |   5 +-
 11 files changed, 281 insertions(+), 383 deletions(-)
 delete mode 100644 src/glsl/lower_div_to_mul_rcp.cpp
 delete mode 100644 src/glsl/lower_explog_to_explog2.cpp
 create mode 100644 src/glsl/lower_instructions.cpp
 delete mode 100644 src/glsl/lower_mod_to_fract.cpp
 delete mode 100644 src/glsl/lower_sub_to_add_neg.cpp

(limited to 'src/mesa/program')

diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index ea4d6be5a4..f5aadc347b 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -52,14 +52,11 @@ CXX_SOURCES = \
 	loop_analysis.cpp \
 	loop_controls.cpp \
 	loop_unroll.cpp \
-	lower_div_to_mul_rcp.cpp \
-	lower_explog_to_explog2.cpp \
 	lower_if_to_cond_assign.cpp \
+	lower_instructions.cpp \
 	lower_jumps.cpp \
 	lower_mat_op_to_vec.cpp \
-	lower_mod_to_fract.cpp \
 	lower_noise.cpp \
-	lower_sub_to_add_neg.cpp \
 	lower_texture_projection.cpp \
 	lower_variable_index_to_cond_assign.cpp \
 	lower_vec_index_to_cond_assign.cpp \
diff --git a/src/glsl/SConscript b/src/glsl/SConscript
index 4cb2d8bcb3..f708635e53 100644
--- a/src/glsl/SConscript
+++ b/src/glsl/SConscript
@@ -49,14 +49,11 @@ sources = [
     'loop_analysis.cpp',
     'loop_controls.cpp',
     'loop_unroll.cpp',
-    'lower_div_to_mul_rcp.cpp',
-    'lower_explog_to_explog2.cpp',
     'lower_if_to_cond_assign.cpp',
+    'lower_instructions.cpp',
     'lower_jumps.cpp',
     'lower_mat_op_to_vec.cpp',
-    'lower_mod_to_fract.cpp',
     'lower_noise.cpp',
-    'lower_sub_to_add_neg.cpp',
     'lower_variable_index_to_cond_assign.cpp',
     'lower_vec_index_to_cond_assign.cpp',
     'lower_vec_index_to_swizzle.cpp',
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index b2c378dd85..302cfbc566 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -708,7 +708,7 @@ do_common_optimization(exec_list *ir, bool linked, unsigned max_unroll_iteration
 {
    GLboolean progress = GL_FALSE;
 
-   progress = do_sub_to_add_neg(ir) || progress;
+   progress = lower_instructions(ir, SUB_TO_ADD_NEG) || progress;
 
    if (linked) {
       progress = do_function_inlining(ir) || progress;
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 1f8da16bcb..fa497a4555 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -28,6 +28,13 @@
  * Prototypes for optimization passes to be called by the compiler and drivers.
  */
 
+/* Operations for lower_instructions() */
+#define SUB_TO_ADD_NEG 0x01
+#define DIV_TO_MUL_RCP 0x02
+#define EXP_TO_EXP2    0x04
+#define LOG_TO_LOG2    0x08
+#define MOD_TO_FRACT   0x10
+
 bool do_common_optimization(exec_list *ir, bool linked, unsigned max_unroll_iterations);
 
 bool do_algebraic(exec_list *instructions);
@@ -40,8 +47,6 @@ bool do_dead_code(exec_list *instructions);
 bool do_dead_code_local(exec_list *instructions);
 bool do_dead_code_unlinked(exec_list *instructions);
 bool do_dead_functions(exec_list *instructions);
-bool do_div_to_mul_rcp(exec_list *instructions);
-bool do_explog_to_explog2(exec_list *instructions);
 bool do_function_inlining(exec_list *instructions);
 bool do_lower_jumps(exec_list *instructions, bool pull_out_jumps = true, bool lower_sub_return = true, bool lower_main_return = false, bool lower_continue = false, bool lower_break = false);
 bool do_lower_texture_projection(exec_list *instructions);
@@ -56,6 +61,7 @@ bool do_swizzle_swizzle(exec_list *instructions);
 bool do_tree_grafting(exec_list *instructions);
 bool do_vec_index_to_cond_assign(exec_list *instructions);
 bool do_vec_index_to_swizzle(exec_list *instructions);
+bool lower_instructions(exec_list *instructions, unsigned what_to_lower);
 bool lower_noise(exec_list *instructions);
 bool lower_variable_index_to_cond_assign(exec_list *instructions,
     bool lower_input, bool lower_output, bool lower_temp, bool lower_uniform);
diff --git a/src/glsl/lower_div_to_mul_rcp.cpp b/src/glsl/lower_div_to_mul_rcp.cpp
deleted file mode 100644
index db9d6854cd..0000000000
--- a/src/glsl/lower_div_to_mul_rcp.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright © 2010 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file lower_div_to_mul_rcp.cpp
- *
- * Breaks an ir_unop_div expression down to op0 * (rcp(op1)).
- *
- * Many GPUs don't have a divide instruction (945 and 965 included),
- * but they do have an RCP instruction to compute an approximate
- * reciprocal.  By breaking the operation down, constant reciprocals
- * can get constant folded.
- */
-
-#include "ir.h"
-#include "glsl_types.h"
-
-class ir_div_to_mul_rcp_visitor : public ir_hierarchical_visitor {
-public:
-   ir_div_to_mul_rcp_visitor()
-   {
-      this->made_progress = false;
-   }
-
-   ir_visitor_status visit_leave(ir_expression *);
-
-   bool made_progress;
-};
-
-bool
-do_div_to_mul_rcp(exec_list *instructions)
-{
-   ir_div_to_mul_rcp_visitor v;
-
-   visit_list_elements(&v, instructions);
-   return v.made_progress;
-}
-
-ir_visitor_status
-ir_div_to_mul_rcp_visitor::visit_leave(ir_expression *ir)
-{
-   if (ir->operation != ir_binop_div)
-      return visit_continue;
-
-   if (!ir->operands[1]->type->is_integer()) {
-      /* New expression for the 1.0 / op1 */
-      ir_rvalue *expr;
-      expr = new(ir) ir_expression(ir_unop_rcp,
-				   ir->operands[1]->type,
-				   ir->operands[1],
-				   NULL);
-
-      /* op0 / op1 -> op0 * (1.0 / op1) */
-      ir->operation = ir_binop_mul;
-      ir->operands[1] = expr;
-   } else {
-      /* Be careful with integer division -- we need to do it as a
-       * float and re-truncate, since rcp(n > 1) of an integer would
-       * just be 0.
-       */
-      ir_rvalue *op0, *op1;
-      const struct glsl_type *vec_type;
-
-      vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
-					 ir->operands[1]->type->vector_elements,
-					 ir->operands[1]->type->matrix_columns);
-
-      if (ir->operands[1]->type->base_type == GLSL_TYPE_INT)
-	 op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL);
-      else
-	 op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL);
-
-      op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL);
-
-      vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
-					 ir->operands[0]->type->vector_elements,
-					 ir->operands[0]->type->matrix_columns);
-
-      if (ir->operands[0]->type->base_type == GLSL_TYPE_INT)
-	 op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL);
-      else
-	 op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL);
-
-      op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1);
-
-      ir->operation = ir_unop_f2i;
-      ir->operands[0] = op0;
-      ir->operands[1] = NULL;
-   }
-
-   this->made_progress = true;
-
-   return visit_continue;
-}
diff --git a/src/glsl/lower_explog_to_explog2.cpp b/src/glsl/lower_explog_to_explog2.cpp
deleted file mode 100644
index 8ad0262544..0000000000
--- a/src/glsl/lower_explog_to_explog2.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright © 2010 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file lower_explog_to_explog2.cpp
- *
- * Many GPUs don't have a base e log or exponent instruction, but they
- * do have base 2 versions, so this pass converts exp and log to exp2
- * and log2 operations.
- */
-
-#include "main/core.h" /* for log2f on MSVC */
-#include "ir.h"
-#include "glsl_types.h"
-
-class ir_explog_to_explog2_visitor : public ir_hierarchical_visitor {
-public:
-   ir_explog_to_explog2_visitor()
-   {
-      this->progress = false;
-   }
-
-   ir_visitor_status visit_leave(ir_expression *);
-
-   bool progress;
-};
-
-bool
-do_explog_to_explog2(exec_list *instructions)
-{
-   ir_explog_to_explog2_visitor v;
-
-   visit_list_elements(&v, instructions);
-   return v.progress;
-}
-
-ir_visitor_status
-ir_explog_to_explog2_visitor::visit_leave(ir_expression *ir)
-{
-   if (ir->operation == ir_unop_exp) {
-      void *mem_ctx = talloc_parent(ir);
-      ir_constant *log2_e = new(mem_ctx) ir_constant(log2f(M_E));
-
-      ir->operation = ir_unop_exp2;
-      ir->operands[0] = new(mem_ctx) ir_expression(ir_binop_mul,
-						   ir->operands[0]->type,
-						   ir->operands[0],
-						   log2_e);
-      this->progress = true;
-   }
-
-   if (ir->operation == ir_unop_log) {
-      void *mem_ctx = talloc_parent(ir);
-
-      ir->operation = ir_binop_mul;
-      ir->operands[0] = new(mem_ctx) ir_expression(ir_unop_log2,
-						   ir->operands[0]->type,
-						   ir->operands[0],
-						   NULL);
-      ir->operands[1] = new(mem_ctx) ir_constant(1.0f / log2f(M_E));
-      this->progress = true;
-   }
-
-   return visit_continue;
-}
diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
new file mode 100644
index 0000000000..d460ba1a97
--- /dev/null
+++ b/src/glsl/lower_instructions.cpp
@@ -0,0 +1,262 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_instructions.cpp
+ *
+ * Many GPUs lack native instructions for certain expression operations, and
+ * must replace them with some other expression tree.  This pass lowers some
+ * of the most common cases, allowing the lowering code to be implemented once
+ * rather than in each driver backend.
+ *
+ * Currently supported transformations:
+ * - SUB_TO_ADD_NEG
+ * - DIV_TO_MUL_RCP
+ * - EXP_TO_EXP2
+ * - LOG_TO_LOG2
+ * - MOD_TO_FRACT
+ *
+ * SUB_TO_ADD_NEG:
+ * ---------------
+ * Breaks an ir_binop_sub expression down to add(op0, neg(op1))
+ *
+ * This simplifies expression reassociation, and for many backends
+ * there is no subtract operation separate from adding the negation.
+ * For backends with native subtract operations, they will probably
+ * want to recognize add(op0, neg(op1)) or the other way around to
+ * produce a subtract anyway.
+ *
+ * DIV_TO_MUL_RCP:
+ * ---------------
+ * Breaks an ir_unop_div expression down to op0 * (rcp(op1)).
+ *
+ * Many GPUs don't have a divide instruction (945 and 965 included),
+ * but they do have an RCP instruction to compute an approximate
+ * reciprocal.  By breaking the operation down, constant reciprocals
+ * can get constant folded.
+ *
+ * EXP_TO_EXP2 and LOG_TO_LOG2:
+ * ----------------------------
+ * Many GPUs don't have a base e log or exponent instruction, but they
+ * do have base 2 versions, so this pass converts exp and log to exp2
+ * and log2 operations.
+ *
+ * MOD_TO_FRACT:
+ * -------------
+ * Breaks an ir_unop_mod expression down to (op1 * fract(op0 / op1))
+ *
+ * Many GPUs don't have a MOD instruction (945 and 965 included), and
+ * if we have to break it down like this anyway, it gives an
+ * opportunity to do things like constant fold the (1.0 / op1) easily.
+ */
+
+#include "main/core.h" /* for M_E */
+#include "glsl_types.h"
+#include "ir.h"
+#include "ir_optimization.h"
+
+class lower_instructions_visitor : public ir_hierarchical_visitor {
+public:
+   lower_instructions_visitor(unsigned lower)
+      : progress(false), lower(lower) { }
+
+   ir_visitor_status visit_leave(ir_expression *);
+
+   bool progress;
+
+private:
+   unsigned lower; /** Bitfield of which operations to lower */
+
+   void sub_to_add_neg(ir_expression *);
+   void div_to_mul_rcp(ir_expression *);
+   void mod_to_fract(ir_expression *);
+   void exp_to_exp2(ir_expression *);
+   void log_to_log2(ir_expression *);
+};
+
+/**
+ * Determine if a particular type of lowering should occur
+ */
+#define lowering(x) (this->lower & x)
+
+bool
+lower_instructions(exec_list *instructions, unsigned what_to_lower)
+{
+   lower_instructions_visitor v(what_to_lower);
+
+   visit_list_elements(&v, instructions);
+   return v.progress;
+}
+
+void
+lower_instructions_visitor::sub_to_add_neg(ir_expression *ir)
+{
+   ir->operation = ir_binop_add;
+   ir->operands[1] = new(ir) ir_expression(ir_unop_neg, ir->operands[1]->type,
+					   ir->operands[1], NULL);
+   this->progress = true;
+}
+
+void
+lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir)
+{
+   if (!ir->operands[1]->type->is_integer()) {
+      /* New expression for the 1.0 / op1 */
+      ir_rvalue *expr;
+      expr = new(ir) ir_expression(ir_unop_rcp,
+				   ir->operands[1]->type,
+				   ir->operands[1],
+				   NULL);
+
+      /* op0 / op1 -> op0 * (1.0 / op1) */
+      ir->operation = ir_binop_mul;
+      ir->operands[1] = expr;
+   } else {
+      /* Be careful with integer division -- we need to do it as a
+       * float and re-truncate, since rcp(n > 1) of an integer would
+       * just be 0.
+       */
+      ir_rvalue *op0, *op1;
+      const struct glsl_type *vec_type;
+
+      vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+					 ir->operands[1]->type->vector_elements,
+					 ir->operands[1]->type->matrix_columns);
+
+      if (ir->operands[1]->type->base_type == GLSL_TYPE_INT)
+	 op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL);
+      else
+	 op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL);
+
+      op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL);
+
+      vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+					 ir->operands[0]->type->vector_elements,
+					 ir->operands[0]->type->matrix_columns);
+
+      if (ir->operands[0]->type->base_type == GLSL_TYPE_INT)
+	 op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL);
+      else
+	 op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL);
+
+      op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1);
+
+      ir->operation = ir_unop_f2i;
+      ir->operands[0] = op0;
+      ir->operands[1] = NULL;
+   }
+
+   this->progress = true;
+}
+
+void
+lower_instructions_visitor::exp_to_exp2(ir_expression *ir)
+{
+   ir_constant *log2_e = new(ir) ir_constant(log2f(M_E));
+
+   ir->operation = ir_unop_exp2;
+   ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[0]->type,
+					   ir->operands[0], log2_e);
+   this->progress = true;
+}
+
+void
+lower_instructions_visitor::log_to_log2(ir_expression *ir)
+{
+   ir->operation = ir_binop_mul;
+   ir->operands[0] = new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type,
+					   ir->operands[0], NULL);
+   ir->operands[1] = new(ir) ir_constant(1.0f / log2f(M_E));
+   this->progress = true;
+}
+
+void
+lower_instructions_visitor::mod_to_fract(ir_expression *ir)
+{
+   ir_variable *temp = new(ir) ir_variable(ir->operands[1]->type, "mod_b",
+					   ir_var_temporary);
+   this->base_ir->insert_before(temp);
+
+   ir_assignment *const assign =
+      new(ir) ir_assignment(new(ir) ir_dereference_variable(temp),
+			    ir->operands[1], NULL);
+
+   this->base_ir->insert_before(assign);
+
+   ir_expression *const div_expr =
+      new(ir) ir_expression(ir_binop_div, ir->operands[0]->type,
+			    ir->operands[0],
+			    new(ir) ir_dereference_variable(temp));
+
+   /* Don't generate new IR that would need to be lowered in an additional
+    * pass.
+    */
+   if (lowering(DIV_TO_MUL_RCP))
+      div_to_mul_rcp(div_expr);
+
+   ir_rvalue *expr = new(ir) ir_expression(ir_unop_fract,
+					   ir->operands[0]->type,
+					   div_expr,
+					   NULL);
+
+   ir->operation = ir_binop_mul;
+   ir->operands[0] = new(ir) ir_dereference_variable(temp);
+   ir->operands[1] = expr;
+   this->progress = true;
+}
+
+ir_visitor_status
+lower_instructions_visitor::visit_leave(ir_expression *ir)
+{
+   switch (ir->operation) {
+   case ir_binop_sub:
+      if (lowering(SUB_TO_ADD_NEG))
+	 sub_to_add_neg(ir);
+      break;
+
+   case ir_binop_div:
+      if (lowering(DIV_TO_MUL_RCP))
+	 div_to_mul_rcp(ir);
+      break;
+
+   case ir_unop_exp:
+      if (lowering(EXP_TO_EXP2))
+	 exp_to_exp2(ir);
+      break;
+
+   case ir_unop_log:
+      if (lowering(LOG_TO_LOG2))
+	 log_to_log2(ir);
+      break;
+
+   case ir_binop_mod:
+      if (lowering(MOD_TO_FRACT))
+	 mod_to_fract(ir);
+      break;
+
+   default:
+      return visit_continue;
+   }
+
+   return visit_continue;
+}
diff --git a/src/glsl/lower_mod_to_fract.cpp b/src/glsl/lower_mod_to_fract.cpp
deleted file mode 100644
index ff907ae97c..0000000000
--- a/src/glsl/lower_mod_to_fract.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright © 2010 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file lower_mod_to_fract.cpp
- *
- * Breaks an ir_unop_mod expression down to (op1 * fract(op0 / op1))
- *
- * Many GPUs don't have a MOD instruction (945 and 965 included), and
- * if we have to break it down like this anyway, it gives an
- * opportunity to do things like constant fold the (1.0 / op1) easily.
- */
-
-#include "ir.h"
-
-class ir_mod_to_fract_visitor : public ir_hierarchical_visitor {
-public:
-   ir_mod_to_fract_visitor()
-   {
-      this->made_progress = false;
-   }
-
-   ir_visitor_status visit_leave(ir_expression *);
-
-   bool made_progress;
-};
-
-bool
-do_mod_to_fract(exec_list *instructions)
-{
-   ir_mod_to_fract_visitor v;
-
-   visit_list_elements(&v, instructions);
-   return v.made_progress;
-}
-
-ir_visitor_status
-ir_mod_to_fract_visitor::visit_leave(ir_expression *ir)
-{
-   if (ir->operation != ir_binop_mod)
-      return visit_continue;
-
-   ir_variable *temp = new(ir) ir_variable(ir->operands[1]->type, "mod_b",
-					   ir_var_temporary);
-   this->base_ir->insert_before(temp);
-
-   ir_assignment *assign;
-   ir_rvalue *expr;
-
-   assign = new(ir) ir_assignment(new(ir) ir_dereference_variable(temp),
-				  ir->operands[1], NULL);
-   this->base_ir->insert_before(assign);
-
-   expr = new(ir) ir_expression(ir_binop_div,
-				ir->operands[0]->type,
-				ir->operands[0],
-				new(ir) ir_dereference_variable(temp));
-
-   expr = new(ir) ir_expression(ir_unop_fract,
-				ir->operands[0]->type,
-				expr,
-				NULL);
-
-   ir->operation = ir_binop_mul;
-   ir->operands[0] = new(ir) ir_dereference_variable(temp);
-   ir->operands[1] = expr;
-   this->made_progress = true;
-
-   return visit_continue;
-}
diff --git a/src/glsl/lower_sub_to_add_neg.cpp b/src/glsl/lower_sub_to_add_neg.cpp
deleted file mode 100644
index 9e4019709b..0000000000
--- a/src/glsl/lower_sub_to_add_neg.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright © 2010 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file lower_sub_to_add_neg.cpp
- *
- * Breaks an ir_binop_sub expression down to add(op0, neg(op1))
- *
- * This simplifies expression reassociation, and for many backends
- * there is no subtract operation separate from adding the negation.
- * For backends with native subtract operations, they will probably
- * want to recognize add(op0, neg(op1)) or the other way around to
- * produce a subtract anyway.
- */
-
-#include "ir.h"
-
-class ir_sub_to_add_neg_visitor : public ir_hierarchical_visitor {
-public:
-   ir_sub_to_add_neg_visitor()
-   {
-      this->progress = false;
-   }
-
-   ir_visitor_status visit_leave(ir_expression *);
-
-   bool progress;
-};
-
-bool
-do_sub_to_add_neg(exec_list *instructions)
-{
-   ir_sub_to_add_neg_visitor v;
-
-   visit_list_elements(&v, instructions);
-   return v.progress;
-}
-
-ir_visitor_status
-ir_sub_to_add_neg_visitor::visit_leave(ir_expression *ir)
-{
-   if (ir->operation != ir_binop_sub)
-      return visit_continue;
-
-   void *mem_ctx = talloc_parent(ir);
-
-   ir->operation = ir_binop_add;
-   ir->operands[1] = new(mem_ctx) ir_expression(ir_unop_neg,
-						ir->operands[1]->type,
-						ir->operands[1],
-						NULL);
-
-   this->progress = true;
-
-   return visit_continue;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index e1cb94452d..610b7e3c3e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -101,10 +101,12 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
       clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
 
       do_mat_op_to_vec(shader->ir);
-      do_mod_to_fract(shader->ir);
-      do_div_to_mul_rcp(shader->ir);
-      do_sub_to_add_neg(shader->ir);
-      do_explog_to_explog2(shader->ir);
+      lower_instructions(shader->ir,
+			 MOD_TO_FRACT |
+			 DIV_TO_MUL_RCP |
+			 SUB_TO_ADD_NEG |
+			 EXP_TO_EXP2 |
+			 LOG_TO_LOG2);
       do_lower_texture_projection(shader->ir);
       brw_do_cubemap_normalize(shader->ir);
 
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 1cb8183042..c7a8594f72 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2798,9 +2798,8 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 
 	 /* Lowering */
 	 do_mat_op_to_vec(ir);
-	 do_mod_to_fract(ir);
-	 do_div_to_mul_rcp(ir);
-	 do_explog_to_explog2(ir);
+	 lower_instructions(ir, MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
+			      | LOG_TO_LOG2);
 
 	 progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
 
-- 
cgit v1.2.3


From ac89a90401f08df945248fcc96da59ba0e2bbfa9 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 19 Nov 2010 18:50:05 +0800
Subject: ir_to_mesa: Detect and emit MOV_SATs for saturate constructs.

The goal here is to avoid regressing performance on ir_to_mesa drivers
for fixed function fragment shaders requiring saturates.
---
 src/mesa/program/ir_to_mesa.cpp | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'src/mesa/program')

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index c7a8594f72..98da90d359 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -123,6 +123,7 @@ public:
    /** Pointer to the ir source this tree came from for debugging */
    ir_instruction *ir;
    GLboolean cond_update;
+   bool saturate;
    int sampler; /**< sampler index */
    int tex_target; /**< One of TEXTURE_*_INDEX */
    GLboolean tex_shadow;
@@ -288,6 +289,7 @@ public:
 
    GLboolean try_emit_mad(ir_expression *ir,
 			  int mul_operand);
+   GLboolean try_emit_sat(ir_expression *ir);
 
    void emit_swz(ir_expression *ir);
 
@@ -939,6 +941,32 @@ ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
    return true;
 }
 
+GLboolean
+ir_to_mesa_visitor::try_emit_sat(ir_expression *ir)
+{
+   /* Saturates were only introduced to vertex programs in
+    * NV_vertex_program3, so don't give them to drivers in the VP.
+    */
+   if (this->prog->Target == GL_VERTEX_PROGRAM_ARB)
+      return false;
+
+   ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
+   if (!sat_src)
+      return false;
+
+   sat_src->accept(this);
+   ir_to_mesa_src_reg src = this->result;
+
+   this->result = get_temp(ir->type);
+   ir_to_mesa_instruction *inst;
+   inst = ir_to_mesa_emit_op1(ir, OPCODE_MOV,
+			      ir_to_mesa_dst_reg_from_src(this->result),
+			      src);
+   inst->saturate = true;
+
+   return true;
+}
+
 void
 ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
 				    ir_to_mesa_src_reg *reg, int *num_reladdr)
@@ -1092,6 +1120,8 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
       if (try_emit_mad(ir, 0))
 	 return;
    }
+   if (try_emit_sat(ir))
+      return;
 
    if (ir->operation == ir_quadop_vector) {
       this->emit_swz(ir);
@@ -2678,6 +2708,8 @@ get_mesa_program(struct gl_context *ctx, struct gl_shader_program *shader_progra
 
       mesa_inst->Opcode = inst->op;
       mesa_inst->CondUpdate = inst->cond_update;
+      if (inst->saturate)
+	 mesa_inst->SaturateMode = SATURATE_ZERO_ONE;
       mesa_inst->DstReg.File = inst->dst_reg.file;
       mesa_inst->DstReg.Index = inst->dst_reg.index;
       mesa_inst->DstReg.CondMask = inst->dst_reg.cond_mask;
-- 
cgit v1.2.3


From 50fd99d1723a6c7f3bd2dedffeeadf7d5e33b83b Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 22 Nov 2010 15:06:47 -0700
Subject: glsl: fix off by one in register index assertion

---
 src/mesa/program/ir_to_mesa.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa/program')

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 98da90d359..4f263add7a 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2239,7 +2239,7 @@ mesa_src_reg_from_ir_src_reg(ir_to_mesa_src_reg reg)
    struct prog_src_register mesa_reg;
 
    mesa_reg.File = reg.file;
-   assert(reg.index < (1 << INST_INDEX_BITS) - 1);
+   assert(reg.index < (1 << INST_INDEX_BITS));
    mesa_reg.Index = reg.index;
    mesa_reg.Swizzle = reg.swizzle;
    mesa_reg.RelAddr = reg.reladdr != NULL;
-- 
cgit v1.2.3


From caf974c5259f14b50257e8dd9b325a87378259af Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 22 Nov 2010 16:55:10 -0700
Subject: glsl: use gl_register_file in a few places

---
 src/mesa/program/ir_to_mesa.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/mesa/program')

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 4f263add7a..c5f11e010d 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -65,7 +65,7 @@ static int swizzle_for_size(int size);
 typedef struct ir_to_mesa_src_reg {
    ir_to_mesa_src_reg(int file, int index, const glsl_type *type)
    {
-      this->file = file;
+      this->file = (gl_register_file) file;
       this->index = index;
       if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
 	 this->swizzle = swizzle_for_size(type->vector_elements);
@@ -84,7 +84,7 @@ typedef struct ir_to_mesa_src_reg {
       this->reladdr = NULL;
    }
 
-   int file; /**< PROGRAM_* from Mesa */
+   gl_register_file file; /**< PROGRAM_* from Mesa */
    int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
    GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
    int negate; /**< NEGATE_XYZW mask from mesa */
@@ -133,13 +133,13 @@ public:
 
 class variable_storage : public exec_node {
 public:
-   variable_storage(ir_variable *var, int file, int index)
+   variable_storage(ir_variable *var, gl_register_file file, int index)
       : file(file), index(index), var(var)
    {
       /* empty */
    }
 
-   int file;
+   gl_register_file file;
    int index;
    ir_variable *var; /* variable that maps to this, if any */
 };
-- 
cgit v1.2.3


From b8dacaf174517d3efa32711e0f85f8b861d0846b Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 23 Nov 2010 10:11:46 -0700
Subject: mesa: rename, make _mesa_register_file_name() non-static

Plus remove unused parameter.
---
 src/mesa/program/prog_print.c | 20 +++++++++-----------
 src/mesa/program/prog_print.h |  3 +++
 2 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'src/mesa/program')

diff --git a/src/mesa/program/prog_print.c b/src/mesa/program/prog_print.c
index 79c01020eb..abebf392c0 100644
--- a/src/mesa/program/prog_print.c
+++ b/src/mesa/program/prog_print.c
@@ -42,8 +42,8 @@
 /**
  * Return string name for given program/register file.
  */
-static const char *
-file_string(gl_register_file f, gl_prog_print_mode mode)
+const char *
+_mesa_register_file_name(gl_register_file f)
 {
    switch (f) {
    case PROGRAM_TEMPORARY:
@@ -275,7 +275,8 @@ reg_string(gl_register_file f, GLint index, gl_prog_print_mode mode,
 
    switch (mode) {
    case PROG_PRINT_DEBUG:
-      sprintf(str, "%s[%s%d]", file_string(f, mode), addr, index);
+      sprintf(str, "%s[%s%d]",
+              _mesa_register_file_name(f), addr, index);
       if (hasIndex2) {
          int offset = strlen(str);
          const char *addr2 = relAddr2 ? "ADDR+" : "";
@@ -497,7 +498,7 @@ fprint_dst_reg(FILE * f,
 
 #if 0
    fprintf(f, "%s[%d]%s",
-	   file_string((gl_register_file) dstReg->File, mode),
+	   _mesa_register_file_name((gl_register_file) dstReg->File),
 	   dstReg->Index,
 	   _mesa_writemask_string(dstReg->WriteMask));
 #endif
@@ -522,7 +523,7 @@ fprint_src_reg(FILE *f,
 	   abs);
 #if 0
    fprintf(f, "%s[%d]%s",
-	   file_string((gl_register_file) srcReg->File, mode),
+	   _mesa_register_file_name((gl_register_file) srcReg->File),
 	   srcReg->Index,
 	   _mesa_swizzle_string(srcReg->Swizzle,
 				srcReg->Negate, GL_FALSE));
@@ -615,8 +616,7 @@ _mesa_fprint_instruction_opt(FILE *f,
       if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
          fprintf(f, ", ");
          fprintf(f, "%s[%d]%s",
-		 file_string((gl_register_file) inst->SrcReg[0].File,
-			     mode),
+                 _mesa_register_file_name((gl_register_file) inst->SrcReg[0].File),
 		 inst->SrcReg[0].Index,
 		 _mesa_swizzle_string(inst->SrcReg[0].Swizzle,
 				      inst->SrcReg[0].Negate, GL_FALSE));
@@ -632,8 +632,7 @@ _mesa_fprint_instruction_opt(FILE *f,
       fprintf(f, " ");
       fprint_dst_reg(f, &inst->DstReg, mode, prog);
       fprintf(f, ", %s[%d], %s",
-	      file_string((gl_register_file) inst->SrcReg[0].File,
-			  mode),
+	      _mesa_register_file_name((gl_register_file) inst->SrcReg[0].File),
 	      inst->SrcReg[0].Index,
 	      _mesa_swizzle_string(inst->SrcReg[0].Swizzle,
 				   inst->SrcReg[0].Negate, GL_TRUE));
@@ -964,7 +963,6 @@ static void
 _mesa_fprint_parameter_list(FILE *f,
                             const struct gl_program_parameter_list *list)
 {
-   const gl_prog_print_mode mode = PROG_PRINT_DEBUG;
    GLuint i;
 
    if (!list)
@@ -978,7 +976,7 @@ _mesa_fprint_parameter_list(FILE *f,
       const GLfloat *v = list->ParameterValues[i];
       fprintf(f, "param[%d] sz=%d %s %s = {%.3g, %.3g, %.3g, %.3g}",
 	      i, param->Size,
-	      file_string(list->Parameters[i].Type, mode),
+	      _mesa_register_file_name(list->Parameters[i].Type),
 	      param->Name, v[0], v[1], v[2], v[3]);
       if (param->Flags & PROG_PARAM_BIT_CENTROID)
          fprintf(f, " Centroid");
diff --git a/src/mesa/program/prog_print.h b/src/mesa/program/prog_print.h
index f080b3fd2e..d962087db3 100644
--- a/src/mesa/program/prog_print.h
+++ b/src/mesa/program/prog_print.h
@@ -47,6 +47,9 @@ typedef enum {
 } gl_prog_print_mode;
 
 
+extern const char *
+_mesa_register_file_name(gl_register_file f);
+
 extern void
 _mesa_print_vp_inputs(GLbitfield inputs);
 
-- 
cgit v1.2.3


From 512f840702d58b48607a9dca06dd939256c7afed Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 23 Nov 2010 10:12:55 -0700
Subject: mesa: _mesa_valid_register_index() to validate register indexes

---
 src/mesa/program/program.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++
 src/mesa/program/program.h |  6 +++
 2 files changed, 103 insertions(+)

(limited to 'src/mesa/program')

diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index 4cacde9aed..1e99e1e4c3 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -917,6 +917,103 @@ _mesa_find_free_register(const GLboolean used[],
 }
 
 
+
+/**
+ * Check if the given register index is valid (doesn't exceed implementation-
+ * dependent limits).
+ * \return GL_TRUE if OK, GL_FALSE if bad index
+ */
+GLboolean
+_mesa_valid_register_index(const struct gl_context *ctx,
+                           GLuint shaderType,
+                           gl_register_file file, GLint index)
+{
+   const struct gl_program_constants *c;
+
+   switch (shaderType) {
+   case MESA_SHADER_VERTEX:
+      c = &ctx->Const.VertexProgram;
+      break;
+   case MESA_SHADER_FRAGMENT:
+      c = &ctx->Const.FragmentProgram;
+      break;
+   case MESA_SHADER_GEOMETRY:
+      c = &ctx->Const.GeometryProgram;
+      break;
+   default:
+      _mesa_problem(ctx,
+                    "unexpected shader type in _mesa_valid_register_index()");
+      return GL_FALSE;
+   }
+
+   switch (file) {
+   case PROGRAM_UNDEFINED:
+      return GL_TRUE;  /* XXX or maybe false? */
+
+   case PROGRAM_TEMPORARY:
+      return index >= 0 && index < c->MaxTemps;
+
+   case PROGRAM_ENV_PARAM:
+      return index >= 0 && index < c->MaxEnvParams;
+
+   case PROGRAM_LOCAL_PARAM:
+      return index >= 0 && index < c->MaxLocalParams;
+
+   case PROGRAM_NAMED_PARAM:
+      return index >= 0 && index < c->MaxParameters;
+
+   case PROGRAM_UNIFORM:
+   case PROGRAM_STATE_VAR:
+      /* aka constant buffer */
+      return index >= 0 && index < c->MaxUniformComponents / 4;
+
+   case PROGRAM_CONSTANT:
+      /* constant buffer w/ possible relative negative addressing */
+      return (index > (int) c->MaxUniformComponents / -4 &&
+              index < c->MaxUniformComponents / 4);
+
+   case PROGRAM_INPUT:
+      if (index < 0)
+         return GL_FALSE;
+
+      switch (shaderType) {
+      case MESA_SHADER_VERTEX:
+         return index < VERT_ATTRIB_GENERIC0 + c->MaxAttribs;
+      case MESA_SHADER_FRAGMENT:
+         return index < FRAG_ATTRIB_VAR0 + ctx->Const.MaxVarying;
+      case MESA_SHADER_GEOMETRY:
+         return index < GEOM_ATTRIB_VAR0 + ctx->Const.MaxVarying;
+      default:
+         return GL_FALSE;
+      }
+
+   case PROGRAM_OUTPUT:
+      if (index < 0)
+         return GL_FALSE;
+
+      switch (shaderType) {
+      case MESA_SHADER_VERTEX:
+         return index < VERT_RESULT_VAR0 + ctx->Const.MaxVarying;
+      case MESA_SHADER_FRAGMENT:
+         return index < FRAG_RESULT_DATA0 + ctx->Const.MaxDrawBuffers;
+      case MESA_SHADER_GEOMETRY:
+         return index < GEOM_RESULT_VAR0 + ctx->Const.MaxVarying;
+      default:
+         return GL_FALSE;
+      }
+
+   case PROGRAM_ADDRESS:
+      return index >= 0 && index < c->MaxAddressRegs;
+
+   default:
+      _mesa_problem(ctx,
+                    "unexpected register file in _mesa_valid_register_index()");
+      return GL_FALSE;
+   }
+}
+
+
+
 /**
  * "Post-process" a GPU program.  This is intended to be used for debugging.
  * Example actions include no-op'ing instructions or changing instruction
diff --git a/src/mesa/program/program.h b/src/mesa/program/program.h
index 70cc2c3aae..1f1d614025 100644
--- a/src/mesa/program/program.h
+++ b/src/mesa/program/program.h
@@ -165,6 +165,12 @@ extern GLint
 _mesa_find_free_register(const GLboolean used[],
                          GLuint maxRegs, GLuint firstReg);
 
+
+extern GLboolean
+_mesa_valid_register_index(const struct gl_context *ctx,
+                           GLuint shaderType,
+                           gl_register_file file, GLint index);
+
 extern void
 _mesa_postprocess_program(struct gl_context *ctx, struct gl_program *prog);
 
-- 
cgit v1.2.3


From c628fd743ee3c3305e9beac7f0e6efacf6982115 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 23 Nov 2010 10:28:43 -0700
Subject: mesa: replace #defines with new gl_shader_type enum

---
 src/mesa/main/context.c                |  2 +-
 src/mesa/main/mtypes.h                 | 22 ++++++++++------
 src/mesa/main/shaderobj.c              |  9 ++++---
 src/mesa/main/shaderobj.h              |  4 +--
 src/mesa/program/program.c             |  2 +-
 src/mesa/program/program.h             |  2 +-
 src/mesa/state_tracker/st_context.c    |  5 ++++
 src/mesa/state_tracker/st_extensions.c | 47 +++++++++++++++++-----------------
 8 files changed, 53 insertions(+), 40 deletions(-)

(limited to 'src/mesa/program')

diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index 4ed179a834..e2c91c3e40 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -1829,7 +1829,7 @@ _mesa_valid_to_render(struct gl_context *ctx, const char *where)
 #ifdef DEBUG
    if (ctx->Shader.Flags & GLSL_LOG) {
       struct gl_shader_program *shProg[MESA_SHADER_TYPES];
-      unsigned i;
+      gl_shader_type i;
 
       shProg[MESA_SHADER_VERTEX] = ctx->Shader.CurrentVertexProgram;
       shProg[MESA_SHADER_GEOMETRY] = ctx->Shader.CurrentGeometryProgram;
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 87b96489db..80c20e09d9 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -41,14 +41,6 @@
 #include "math/m_matrix.h"	/* GLmatrix */
 #include "main/simple_list.h"	/* struct simple_node */
 
-/* Shader stages. Note that these will become 5 with tessellation.
- * These MUST have the same values as PIPE_SHADER_*
- */
-#define MESA_SHADER_VERTEX   0
-#define MESA_SHADER_FRAGMENT 1
-#define MESA_SHADER_GEOMETRY 2
-#define MESA_SHADER_TYPES    3
-
 
 /**
  * Color channel data type.
@@ -129,6 +121,20 @@ struct st_context;
 
 
+/**
+ * Shader stages. Note that these will become 5 with tessellation.
+ * These MUST have the same values as gallium's PIPE_SHADER_*
+ */
+typedef enum
+{
+   MESA_SHADER_VERTEX = 0,
+   MESA_SHADER_FRAGMENT = 1,
+   MESA_SHADER_GEOMETRY = 2,
+   MESA_SHADER_TYPES = 3
+} gl_shader_type;
+
+
+
 /**
  * Indexes for vertex program attributes.
  * GL_NV_vertex_program aliases generic attributes over the conventional
diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c
index b6594cbe6f..216bbce003 100644
--- a/src/mesa/main/shaderobj.c
+++ b/src/mesa/main/shaderobj.c
@@ -291,6 +291,7 @@ _mesa_free_shader_program_data(struct gl_context *ctx,
                                struct gl_shader_program *shProg)
 {
    GLuint i;
+   gl_shader_type sh;
 
    assert(shProg->Type == GL_SHADER_PROGRAM_MESA);
 
@@ -326,10 +327,10 @@ _mesa_free_shader_program_data(struct gl_context *ctx,
    shProg->TransformFeedback.NumVarying = 0;
 
 
-   for (i = 0; i < MESA_SHADER_TYPES; i++) {
-      if (shProg->_LinkedShaders[i] != NULL) {
-	 ctx->Driver.DeleteShader(ctx, shProg->_LinkedShaders[i]);
-	 shProg->_LinkedShaders[i] = NULL;
+   for (sh = 0; sh < MESA_SHADER_TYPES; sh++) {
+      if (shProg->_LinkedShaders[sh] != NULL) {
+	 ctx->Driver.DeleteShader(ctx, shProg->_LinkedShaders[sh]);
+	 shProg->_LinkedShaders[sh] = NULL;
       }
    }
 }
diff --git a/src/mesa/main/shaderobj.h b/src/mesa/main/shaderobj.h
index 346a5b7517..de7c998cf0 100644
--- a/src/mesa/main/shaderobj.h
+++ b/src/mesa/main/shaderobj.h
@@ -98,7 +98,7 @@ extern void
 _mesa_free_shader_state(struct gl_context *ctx);
 
 
-static INLINE GLuint
+static INLINE gl_shader_type
 _mesa_shader_type_to_index(GLenum v)
 {
    switch (v) {
@@ -110,7 +110,7 @@ _mesa_shader_type_to_index(GLenum v)
       return MESA_SHADER_GEOMETRY;
    default:
       ASSERT(0 && "bad value in _mesa_shader_type_to_index()");
-      return ~0;
+      return MESA_SHADER_TYPES;
    }
 }
 
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index 1e99e1e4c3..9ffa49bb01 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -925,7 +925,7 @@ _mesa_find_free_register(const GLboolean used[],
  */
 GLboolean
 _mesa_valid_register_index(const struct gl_context *ctx,
-                           GLuint shaderType,
+                           gl_shader_type shaderType,
                            gl_register_file file, GLint index)
 {
    const struct gl_program_constants *c;
diff --git a/src/mesa/program/program.h b/src/mesa/program/program.h
index 1f1d614025..ce37b95bf8 100644
--- a/src/mesa/program/program.h
+++ b/src/mesa/program/program.h
@@ -168,7 +168,7 @@ _mesa_find_free_register(const GLboolean used[],
 
 extern GLboolean
 _mesa_valid_register_index(const struct gl_context *ctx,
-                           GLuint shaderType,
+                           gl_shader_type shaderType,
                            gl_register_file file, GLint index);
 
 extern void
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index d0dcdd4e29..d6628b1bff 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -170,6 +170,11 @@ struct st_context *st_create_context(gl_api api, struct pipe_context *pipe,
    struct gl_context *shareCtx = share ? share->ctx : NULL;
    struct dd_function_table funcs;
 
+   /* Sanity checks */
+   assert(MESA_SHADER_VERTEX == PIPE_SHADER_VERTEX);
+   assert(MESA_SHADER_FRAGMENT == PIPE_SHADER_FRAGMENT);
+   assert(MESA_SHADER_GEOMETRY == PIPE_SHADER_GEOMETRY);
+
    memset(&funcs, 0, sizeof(funcs));
    st_init_driver_functions(&funcs);
 
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index e72caa3842..930b60ade2 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -67,7 +67,7 @@ void st_init_limits(struct st_context *st)
 {
    struct pipe_screen *screen = st->pipe->screen;
    struct gl_constants *c = &st->ctx->Const;
-   unsigned i;
+   gl_shader_type sh;
 
    c->MaxTextureLevels
       = _min(screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS),
@@ -137,11 +137,12 @@ void st_init_limits(struct st_context *st)
    /* Quads always follow GL provoking rules. */
    c->QuadsFollowProvokingVertexConvention = GL_FALSE;
 
-   for(i = 0; i < MESA_SHADER_TYPES; ++i) {
-      struct gl_shader_compiler_options *options = &st->ctx->ShaderCompilerOptions[i];
+   for (sh = 0; sh < MESA_SHADER_TYPES; ++sh) {
+      struct gl_shader_compiler_options *options =
+         &st->ctx->ShaderCompilerOptions[sh];
       struct gl_program_constants *pc;
-      switch(i)
-      {
+
+      switch (sh) {
       case PIPE_SHADER_FRAGMENT:
          pc = &c->FragmentProgram;
          break;
@@ -156,37 +157,37 @@ void st_init_limits(struct st_context *st)
          continue;
       }
 
-      pc->MaxNativeInstructions    = screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_INSTRUCTIONS);
-      pc->MaxNativeAluInstructions = screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS);
-      pc->MaxNativeTexInstructions = screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS);
-      pc->MaxNativeTexIndirections = screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS);
-      pc->MaxNativeAttribs         = screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_INPUTS);
-      pc->MaxNativeTemps           = screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_TEMPS);
-      pc->MaxNativeAddressRegs     = screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_ADDRS);
-      pc->MaxNativeParameters      = screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_CONSTS);
+      pc->MaxNativeInstructions    = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_INSTRUCTIONS);
+      pc->MaxNativeAluInstructions = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS);
+      pc->MaxNativeTexInstructions = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS);
+      pc->MaxNativeTexIndirections = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS);
+      pc->MaxNativeAttribs         = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_INPUTS);
+      pc->MaxNativeTemps           = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_TEMPS);
+      pc->MaxNativeAddressRegs     = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_ADDRS);
+      pc->MaxNativeParameters      = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_CONSTS);
       pc->MaxUniformComponents     = 4 * MIN2(pc->MaxNativeParameters, MAX_UNIFORMS);
 
       options->EmitNoNoise = TRUE;
 
       /* TODO: make these more fine-grained if anyone needs it */
-      options->EmitNoIfs = !screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH);
-      options->EmitNoLoops = !screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH);
-      options->EmitNoFunctions = !screen->get_shader_param(screen, i, PIPE_SHADER_CAP_SUBROUTINES);
-      options->EmitNoMainReturn = !screen->get_shader_param(screen, i, PIPE_SHADER_CAP_SUBROUTINES);
+      options->EmitNoIfs = !screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH);
+      options->EmitNoLoops = !screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH);
+      options->EmitNoFunctions = !screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_SUBROUTINES);
+      options->EmitNoMainReturn = !screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_SUBROUTINES);
 
-      options->EmitNoCont = !screen->get_shader_param(screen, i, PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED);
+      options->EmitNoCont = !screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED);
 
-      options->EmitNoIndirectInput = !screen->get_shader_param(screen, i,
+      options->EmitNoIndirectInput = !screen->get_shader_param(screen, sh,
                                         PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR);
-      options->EmitNoIndirectOutput = !screen->get_shader_param(screen, i,
+      options->EmitNoIndirectOutput = !screen->get_shader_param(screen, sh,
                                         PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR);
-      options->EmitNoIndirectTemp = !screen->get_shader_param(screen, i,
+      options->EmitNoIndirectTemp = !screen->get_shader_param(screen, sh,
                                         PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR);
-      options->EmitNoIndirectUniform = !screen->get_shader_param(screen, i,
+      options->EmitNoIndirectUniform = !screen->get_shader_param(screen, sh,
                                         PIPE_SHADER_CAP_INDIRECT_CONST_ADDR);
 
       if(options->EmitNoLoops)
-         options->MaxUnrollIterations = MIN2(screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_INSTRUCTIONS), 65536);
+         options->MaxUnrollIterations = MIN2(screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_INSTRUCTIONS), 65536);
    }
 
    /* PIPE_CAP_MAX_FS_INPUTS specifies the number of COLORn + GENERICn inputs
-- 
cgit v1.2.3


From 6162773ea4b0e84c3ab9c9952fb5e838519c2564 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 23 Nov 2010 17:18:44 -0700
Subject: glsl: better handling of linker failures

Upon link error, exit translation loop, free program instructions.
Check for null pointers in calling code.
---
 src/mesa/program/ir_to_mesa.cpp | 52 ++++++++++++++++++++++++++---------------
 1 file changed, 33 insertions(+), 19 deletions(-)

(limited to 'src/mesa/program')

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index c5f11e010d..2561bf0880 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2609,8 +2609,9 @@ set_uniform_initializers(struct gl_context *ctx,
 /**
  * Convert a shader's GLSL IR into a Mesa gl_program.
  */
-struct gl_program *
-get_mesa_program(struct gl_context *ctx, struct gl_shader_program *shader_program,
+static struct gl_program *
+get_mesa_program(struct gl_context *ctx,
+                 struct gl_shader_program *shader_program,
 		 struct gl_shader *shader)
 {
    ir_to_mesa_visitor v;
@@ -2756,6 +2757,15 @@ get_mesa_program(struct gl_context *ctx, struct gl_shader_program *shader_progra
 
       mesa_inst++;
       i++;
+
+      if (!shader_program->LinkStatus)
+         break;
+   }
+
+   if (!shader_program->LinkStatus) {
+      free(mesa_instructions);
+      _mesa_reference_program(ctx, &shader->Program, NULL);
+      return NULL;
    }
 
    set_branchtargets(&v, mesa_instructions, num_instructions);
@@ -2866,30 +2876,34 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 
    for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
       struct gl_program *linked_prog;
-      bool ok = true;
 
       if (prog->_LinkedShaders[i] == NULL)
 	 continue;
 
       linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
 
-      switch (prog->_LinkedShaders[i]->Type) {
-      case GL_VERTEX_SHADER:
-	 _mesa_reference_vertprog(ctx, &prog->VertexProgram,
-				  (struct gl_vertex_program *)linked_prog);
-	 ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
-					      linked_prog);
-	 break;
-      case GL_FRAGMENT_SHADER:
-	 _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
-				  (struct gl_fragment_program *)linked_prog);
-	 ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
-					      linked_prog);
-	 break;
-      }
-      if (!ok) {
-	 return GL_FALSE;
+      if (linked_prog) {
+         bool ok = true;
+
+         switch (prog->_LinkedShaders[i]->Type) {
+         case GL_VERTEX_SHADER:
+            _mesa_reference_vertprog(ctx, &prog->VertexProgram,
+                                     (struct gl_vertex_program *)linked_prog);
+            ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
+                                                 linked_prog);
+            break;
+         case GL_FRAGMENT_SHADER:
+            _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
+                                     (struct gl_fragment_program *)linked_prog);
+            ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
+                                                 linked_prog);
+            break;
+         }
+         if (!ok) {
+            return GL_FALSE;
+         }
       }
+
       _mesa_reference_program(ctx, &linked_prog, NULL);
    }
 
-- 
cgit v1.2.3


From 903ead0b26e4fc55474b652adf9470247283e7aa Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 23 Nov 2010 17:23:42 -0700
Subject: glsl: start restoring some geometry shader code

---
 src/mesa/program/ir_to_mesa.cpp | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'src/mesa/program')

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 2561bf0880..8f75c82c3e 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2634,6 +2634,10 @@ get_mesa_program(struct gl_context *ctx,
       target = GL_FRAGMENT_PROGRAM_ARB;
       target_string = "fragment";
       break;
+   case GL_GEOMETRY_SHADER:
+      target = GL_GEOMETRY_PROGRAM_NV;
+      target_string = "geometry";
+      break;
    default:
       assert(!"should not be reached");
       return NULL;
@@ -2898,6 +2902,12 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
             ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
                                                  linked_prog);
             break;
+         case GL_GEOMETRY_SHADER:
+            _mesa_reference_geomprog(ctx, &prog->GeometryProgram,
+                                     (struct gl_geometry_program *)linked_prog);
+            ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV,
+                                                 linked_prog);
+            break;
          }
          if (!ok) {
             return GL_FALSE;
@@ -3021,6 +3031,7 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
    prog->Varying = _mesa_new_parameter_list();
    _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
    _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
+   _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL);
 
    if (prog->LinkStatus) {
       link_shaders(ctx, prog);
-- 
cgit v1.2.3


From ead2ea89f42b40edc56ddf8c6ce1df4efdcefe2a Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Thu, 25 Nov 2010 03:13:36 -0800
Subject: ir_to_mesa: Add support for conditional discards.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

NOTE: This is a candidate for the 7.9 branch.

Signed-off-by: Marek Olšák <maraeo@gmail.com>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/program/ir_to_mesa.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'src/mesa/program')

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 8f75c82c3e..5dd602fd83 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2166,9 +2166,14 @@ ir_to_mesa_visitor::visit(ir_discard *ir)
 {
    struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
 
-   assert(ir->condition == NULL); /* FINISHME */
+   if (ir->condition) {
+      ir->condition->accept(this);
+      this->result.negate = ~this->result.negate;
+      ir_to_mesa_emit_op1(ir, OPCODE_KIL, ir_to_mesa_undef_dst, this->result);
+   } else {
+      ir_to_mesa_emit_op0(ir, OPCODE_KIL_NV);
+   }
 
-   ir_to_mesa_emit_op0(ir, OPCODE_KIL_NV);
    fp->UsesKill = GL_TRUE;
 }
 
-- 
cgit v1.2.3


From 940df10100d740ef27fa39026fd51c3199ed3d62 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 25 Nov 2010 01:09:26 -0800
Subject: glsl: Add a lowering pass to move discards out of if-statements.

This should allow lower_if_to_cond_assign to work in the presence of
discards, fixing bug #31690 and likely #31983.

NOTE: This is a candidate for the 7.9 branch.
---
 src/glsl/Makefile               |   1 +
 src/glsl/SConscript             |   1 +
 src/glsl/ir_optimization.h      |   1 +
 src/glsl/lower_discard.cpp      | 198 ++++++++++++++++++++++++++++++++++++++++
 src/mesa/program/ir_to_mesa.cpp |   4 +-
 5 files changed, 204 insertions(+), 1 deletion(-)
 create mode 100644 src/glsl/lower_discard.cpp

(limited to 'src/mesa/program')

diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index 83ecb7f9c0..2674c6ec48 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -52,6 +52,7 @@ CXX_SOURCES = \
 	loop_analysis.cpp \
 	loop_controls.cpp \
 	loop_unroll.cpp \
+	lower_discard.cpp \
 	lower_if_to_cond_assign.cpp \
 	lower_instructions.cpp \
 	lower_jumps.cpp \
diff --git a/src/glsl/SConscript b/src/glsl/SConscript
index 16f02e94b1..b5b1728bee 100644
--- a/src/glsl/SConscript
+++ b/src/glsl/SConscript
@@ -49,6 +49,7 @@ sources = [
     'loop_analysis.cpp',
     'loop_controls.cpp',
     'loop_unroll.cpp',
+    'lower_discard.cpp',
     'lower_if_to_cond_assign.cpp',
     'lower_instructions.cpp',
     'lower_jumps.cpp',
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 05c1becc5e..1048ff982e 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -62,6 +62,7 @@ bool do_swizzle_swizzle(exec_list *instructions);
 bool do_tree_grafting(exec_list *instructions);
 bool do_vec_index_to_cond_assign(exec_list *instructions);
 bool do_vec_index_to_swizzle(exec_list *instructions);
+bool lower_discard(exec_list *instructions);
 bool lower_instructions(exec_list *instructions, unsigned what_to_lower);
 bool lower_noise(exec_list *instructions);
 bool lower_variable_index_to_cond_assign(exec_list *instructions,
diff --git a/src/glsl/lower_discard.cpp b/src/glsl/lower_discard.cpp
new file mode 100644
index 0000000000..b95313df8c
--- /dev/null
+++ b/src/glsl/lower_discard.cpp
@@ -0,0 +1,198 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_discard.cpp
+ *
+ * This pass moves discards out of if-statements.
+ *
+ * Case 1: The "then" branch contains a conditional discard:
+ * ---------------------------------------------------------
+ *
+ *    if (cond1) {
+ *	 s1;
+ *	 discard cond2;
+ *	 s2;
+ *    } else {
+ *	 s3;
+ *    }
+ *
+ * becomes:
+ *
+ *    temp = false;
+ *    if (cond1) {
+ *	 s1;
+ *	 temp = cond2;
+ *	 s2;
+ *    } else {
+ *	 s3;
+ *    }
+ *    discard temp;
+ *
+ * Case 2: The "else" branch contains a conditional discard:
+ * ---------------------------------------------------------
+ *
+ *    if (cond1) {
+ *	 s1;
+ *    } else {
+ *	 s2;
+ *	 discard cond2;
+ *	 s3;
+ *    }
+ *
+ * becomes:
+ *
+ *    temp = false;
+ *    if (cond1) {
+ *	 s1;
+ *    } else {
+ *	 s2;
+ *	 temp = cond2;
+ *	 s3;
+ *    }
+ *    discard temp;
+ *
+ * Case 3: Both branches contain a conditional discard:
+ * ----------------------------------------------------
+ *
+ *    if (cond1) {
+ *	 s1;
+ *	 discard cond2;
+ *	 s2;
+ *    } else {
+ *	 s3;
+ *	 discard cond3;
+ *	 s4;
+ *    }
+ *
+ * becomes:
+ *
+ *    temp = false;
+ *    if (cond1) {
+ *	 s1;
+ *	 temp = cond2;
+ *	 s2;
+ *    } else {
+ *	 s3;
+ *	 temp = cond3;
+ *	 s4;
+ *    }
+ *    discard temp;
+ *
+ * If there are multiple conditional discards, we need only deal with one of
+ * them.  Repeatedly applying this pass will take care of the others.
+ *
+ * Unconditional discards are treated as having a condition of "true".
+ */
+
+#include "glsl_types.h"
+#include "ir.h"
+
+class lower_discard_visitor : public ir_hierarchical_visitor {
+public:
+   lower_discard_visitor()
+   {
+      this->progress = false;
+   }
+
+   ir_visitor_status visit_leave(ir_if *);
+
+   bool progress;
+};
+
+
+bool
+lower_discard(exec_list *instructions)
+{
+   lower_discard_visitor v;
+
+   visit_list_elements(&v, instructions);
+
+   return v.progress;
+}
+
+
+static ir_discard *
+find_discard(exec_list &instructions)
+{
+   foreach_list(n, &instructions) {
+      ir_discard *ir = ((ir_instruction *) n)->as_discard();
+      if (ir != NULL)
+	 return ir;
+   }
+   return NULL;
+}
+
+
+static void
+replace_discard(void *mem_ctx, ir_variable *var, ir_discard *ir)
+{
+   ir_rvalue *condition = ir->condition;
+
+   /* For unconditional discards, use "true" as the condition. */
+   if (condition == NULL)
+      condition = new(mem_ctx) ir_constant(true);
+
+   ir_assignment *assignment =
+      new(mem_ctx) ir_assignment(new(mem_ctx) ir_dereference_variable(var),
+				 condition, NULL);
+
+   ir->replace_with(assignment);
+}
+
+
+ir_visitor_status
+lower_discard_visitor::visit_leave(ir_if *ir)
+{
+   ir_discard *then_discard = find_discard(ir->then_instructions);
+   ir_discard *else_discard = find_discard(ir->else_instructions);
+
+   if (then_discard == NULL && else_discard == NULL)
+      return visit_continue;
+
+   void *mem_ctx = talloc_parent(ir);
+
+   ir_variable *temp = new(mem_ctx) ir_variable(glsl_type::bool_type,
+						"discard_cond_temp",
+						ir_var_temporary);
+   ir_assignment *temp_initializer =
+      new(mem_ctx) ir_assignment(new(mem_ctx) ir_dereference_variable(temp),
+				 new(mem_ctx) ir_constant(false), NULL);
+
+   ir->insert_before(temp);
+   ir->insert_before(temp_initializer);
+
+   if (then_discard != NULL)
+      replace_discard(mem_ctx, temp, then_discard);
+
+   if (else_discard != NULL)
+      replace_discard(mem_ctx, temp, else_discard);
+
+   ir_discard *discard = then_discard != NULL ? then_discard : else_discard;
+   discard->condition = new(mem_ctx) ir_dereference_variable(temp);
+   ir->insert_after(discard);
+
+   this->progress = true;
+
+   return visit_continue;
+}
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 5dd602fd83..d9d86b6c29 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2858,8 +2858,10 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 
 	 progress = lower_quadop_vector(ir, true) || progress;
 
-	 if (options->EmitNoIfs)
+	 if (options->EmitNoIfs) {
+	    progress = lower_discard(ir) || progress;
 	    progress = do_if_to_cond_assign(ir) || progress;
+	 }
 
 	 if (options->EmitNoNoise)
 	    progress = lower_noise(ir) || progress;
-- 
cgit v1.2.3


From c4285be9a5bd1adaa89050989374b95a9a601cdc Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 24 Nov 2010 22:21:10 -0800
Subject: glsl: Lower ir_binop_pow to a sequence of EXP2 and LOG2

---
 src/glsl/ir_optimization.h      |  5 +++--
 src/glsl/lower_instructions.cpp | 26 ++++++++++++++++++++++++++
 src/mesa/main/mtypes.h          |  1 +
 src/mesa/program/ir_to_mesa.cpp |  5 +++--
 4 files changed, 33 insertions(+), 4 deletions(-)

(limited to 'src/mesa/program')

diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 1048ff982e..f264265f4b 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -32,8 +32,9 @@
 #define SUB_TO_ADD_NEG 0x01
 #define DIV_TO_MUL_RCP 0x02
 #define EXP_TO_EXP2    0x04
-#define LOG_TO_LOG2    0x08
-#define MOD_TO_FRACT   0x10
+#define POW_TO_EXP2    0x08
+#define LOG_TO_LOG2    0x10
+#define MOD_TO_FRACT   0x20
 
 bool do_common_optimization(exec_list *ir, bool linked, unsigned max_unroll_iterations);
 
diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
index 0d9374d73b..a5f61f213d 100644
--- a/src/glsl/lower_instructions.cpp
+++ b/src/glsl/lower_instructions.cpp
@@ -33,6 +33,7 @@
  * - SUB_TO_ADD_NEG
  * - DIV_TO_MUL_RCP
  * - EXP_TO_EXP2
+ * - POW_TO_EXP2
  * - LOG_TO_LOG2
  * - MOD_TO_FRACT
  *
@@ -61,6 +62,11 @@
  * do have base 2 versions, so this pass converts exp and log to exp2
  * and log2 operations.
  *
+ * POW_TO_EXP2:
+ * -----------
+ * Many older GPUs don't have an x**y instruction.  For these GPUs, convert
+ * x**y to 2**(y * log2(x)).
+ *
  * MOD_TO_FRACT:
  * -------------
  * Breaks an ir_unop_mod expression down to (op1 * fract(op0 / op1))
@@ -91,6 +97,7 @@ private:
    void div_to_mul_rcp(ir_expression *);
    void mod_to_fract(ir_expression *);
    void exp_to_exp2(ir_expression *);
+   void pow_to_exp2(ir_expression *);
    void log_to_log2(ir_expression *);
 };
 
@@ -180,6 +187,20 @@ lower_instructions_visitor::exp_to_exp2(ir_expression *ir)
    this->progress = true;
 }
 
+void
+lower_instructions_visitor::pow_to_exp2(ir_expression *ir)
+{
+   ir_expression *const log2_x =
+      new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type,
+			    ir->operands[0]);
+
+   ir->operation = ir_unop_exp2;
+   ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[1]->type,
+					   ir->operands[1], log2_x);
+   ir->operands[1] = NULL;
+   this->progress = true;
+}
+
 void
 lower_instructions_visitor::log_to_log2(ir_expression *ir)
 {
@@ -254,6 +275,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
 	 mod_to_fract(ir);
       break;
 
+   case ir_binop_pow:
+      if (lowering(POW_TO_EXP2))
+	 pow_to_exp2(ir);
+      break;
+
    default:
       return visit_continue;
    }
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 80c20e09d9..82495714f2 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2197,6 +2197,7 @@ struct gl_shader_compiler_options
    GLboolean EmitNoCont;                  /**< Emit CONT opcode? */
    GLboolean EmitNoMainReturn;            /**< Emit CONT/RET opcodes? */
    GLboolean EmitNoNoise;                 /**< Emit NOISE opcodes? */
+   GLboolean EmitNoPow;                   /**< Emit POW opcodes? */
 
    /**
     * \name Forms of indirect addressing the driver cannot do.
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index d9d86b6c29..b274a961b2 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2849,8 +2849,9 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 
 	 /* Lowering */
 	 do_mat_op_to_vec(ir);
-	 lower_instructions(ir, MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
-			      | LOG_TO_LOG2);
+	 lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
+				 | LOG_TO_LOG2
+				 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
 
 	 progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
 
-- 
cgit v1.2.3


From d531f9c2f5c78468d913fc509b223760ac1c1124 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Thu, 2 Dec 2010 20:39:59 +0100
Subject: mesa, st/mesa: fix gl_FragCoord with FBOs in Gallium
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

gl_FragCoord.y needs to be flipped upside down if a FBO is bound.

This fixes:
- piglit/fbo-fragcoord
- https://bugs.freedesktop.org/show_bug.cgi?id=29420

Here I add a new program state STATE_FB_WPOS_Y_TRANSFORM, which is set based
on whether a FBO is bound. The state contains a pair of transformations.
It can be either (XY=identity, ZW=transformY) if a FBO is bound,
or (XY=transformY, ZW=identity) otherwise, where identity = (1, 0),
transformY = (-1, height-1).

A classic driver (or st/mesa) may, based on some other state, choose whether
to use XY or ZW, thus negate the conditional "if (is a FBO bound) ...".
The reason for this is that a Gallium driver is allowed to only support WPOS
relative to either the lower left or the upper left corner, so we must flip
the Y axis accordingly again. (the "invert" parameter in emit_wpos_inversion)

NOTE: This is a candidate for the 7.9 branch.

Signed-off-by: Marek Olšák <maraeo@gmail.com>
Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/program/prog_statevars.c        | 22 ++++++++++++++++
 src/mesa/program/prog_statevars.h        |  1 +
 src/mesa/state_tracker/st_mesa_to_tgsi.c | 43 +++++++++++++++++++++-----------
 3 files changed, 51 insertions(+), 15 deletions(-)

(limited to 'src/mesa/program')

diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c
index baac29ff0d..83bd3727f3 100644
--- a/src/mesa/program/prog_statevars.c
+++ b/src/mesa/program/prog_statevars.c
@@ -572,6 +572,24 @@ _mesa_fetch_state(struct gl_context *ctx, const gl_state_index state[],
          value[3] = 0.0F;
          return;
 
+      case STATE_FB_WPOS_Y_TRANSFORM:
+         /* A driver may negate this conditional by using ZW swizzle
+          * instead of XY (based on e.g. some other state). */
+         if (ctx->DrawBuffer->Name != 0) {
+            /* Identity (XY) followed by flipping Y upside down (ZW). */
+            value[0] = 1.0F;
+            value[1] = 0.0F;
+            value[2] = -1.0F;
+            value[3] = (GLfloat) (ctx->DrawBuffer->Height - 1);
+         } else {
+            /* Flipping Y upside down (XY) followed by identity (ZW). */
+            value[0] = -1.0F;
+            value[1] = (GLfloat) (ctx->DrawBuffer->Height - 1);
+            value[2] = 1.0F;
+            value[3] = 0.0F;
+         }
+         return;
+
       case STATE_ROT_MATRIX_0:
          {
             const int unit = (int) state[2];
@@ -695,6 +713,7 @@ _mesa_program_state_flags(const gl_state_index state[STATE_LENGTH])
          return _NEW_PIXEL;
 
       case STATE_FB_SIZE:
+      case STATE_FB_WPOS_Y_TRANSFORM:
          return _NEW_BUFFERS;
 
       default:
@@ -900,6 +919,9 @@ append_token(char *dst, gl_state_index k)
    case STATE_FB_SIZE:
       append(dst, "FbSize");
       break;
+   case STATE_FB_WPOS_Y_TRANSFORM:
+      append(dst, "FbWposYTransform");
+      break;
    case STATE_ROT_MATRIX_0:
       append(dst, "rotMatrixRow0");
       break;
diff --git a/src/mesa/program/prog_statevars.h b/src/mesa/program/prog_statevars.h
index 6e5be53630..009ebde001 100644
--- a/src/mesa/program/prog_statevars.h
+++ b/src/mesa/program/prog_statevars.h
@@ -117,6 +117,7 @@ typedef enum gl_state_index_ {
    STATE_PT_BIAS,               /**< Pixel transfer RGBA bias */
    STATE_SHADOW_AMBIENT,        /**< ARB_shadow_ambient fail value; token[2] is texture unit index */
    STATE_FB_SIZE,               /**< (width-1, height-1, 0, 0) */
+   STATE_FB_WPOS_Y_TRANSFORM,   /**< (1, 0, -1, height-1) if a FBO is bound, (-1, height-1, 1, 0) otherwise */
    STATE_ROT_MATRIX_0,          /**< ATI_envmap_bumpmap, rot matrix row 0 */
    STATE_ROT_MATRIX_1,          /**< ATI_envmap_bumpmap, rot matrix row 1 */
    STATE_INTERNAL_DRIVER	/* first available state index for drivers (must be last) */
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index c5c239b2c9..f848462310 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -760,10 +760,13 @@ emit_adjusted_wpos( struct st_translate *t,
 
 /**
  * Emit the TGSI instructions for inverting the WPOS y coordinate.
+ * This code is unavoidable because it also depends on whether
+ * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
  */
 static void
-emit_inverted_wpos( struct st_translate *t,
-                    const struct gl_program *program )
+emit_wpos_inversion( struct st_translate *t,
+                     const struct gl_program *program,
+                     boolean invert)
 {
    struct ureg_program *ureg = t->ureg;
 
@@ -771,17 +774,17 @@ emit_inverted_wpos( struct st_translate *t,
     * Need to replace instances of INPUT[WPOS] with temp T
     * where T = INPUT[WPOS] by y is inverted.
     */
-   static const gl_state_index winSizeState[STATE_LENGTH]
-      = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 };
+   static const gl_state_index wposTransformState[STATE_LENGTH]
+      = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0 };
    
    /* XXX: note we are modifying the incoming shader here!  Need to
     * do this before emitting the constant decls below, or this
     * will be missed:
     */
-   unsigned winHeightConst = _mesa_add_state_reference(program->Parameters,
-                                                       winSizeState);
+   unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
+                                                       wposTransformState);
 
-   struct ureg_src winsize = ureg_DECL_constant( ureg, winHeightConst );
+   struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst );
    struct ureg_dst wpos_temp;
    struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
 
@@ -794,12 +797,23 @@ emit_inverted_wpos( struct st_translate *t,
       ureg_MOV( ureg, wpos_temp, wpos_input );
    }
 
-   /* SUB wpos_temp.y, winsize_const, wpos_input
-    */
-   ureg_SUB( ureg,
-             ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
-             winsize,
-             wpos_input);
+   if (invert) {
+      /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
+       */
+      ureg_MAD( ureg,
+                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
+                wpos_input,
+                ureg_scalar(wpostrans, 0),
+                ureg_scalar(wpostrans, 1));
+   } else {
+      /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
+       */
+      ureg_MAD( ureg,
+                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
+                wpos_input,
+                ureg_scalar(wpostrans, 2),
+                ureg_scalar(wpostrans, 3));
+   }
 
    /* Use wpos_temp as position input from here on:
     */
@@ -861,8 +875,7 @@ emit_wpos(struct st_context *st,
 
    /* we invert after adjustment so that we avoid the MOV to temporary,
     * and reuse the adjustment ADD instead */
-   if (invert)
-      emit_inverted_wpos(t, program);
+   emit_wpos_inversion(t, program, invert);
 }
 
 
-- 
cgit v1.2.3


From 6f851e6642a640deda34790c14d178acac1c552e Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 3 Dec 2010 16:40:36 -0700
Subject: mesa: remove unneeded cast

---
 src/mesa/program/prog_statevars.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa/program')

diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c
index 83bd3727f3..def1fbe157 100644
--- a/src/mesa/program/prog_statevars.c
+++ b/src/mesa/program/prog_statevars.c
@@ -1084,7 +1084,7 @@ _mesa_load_state_parameters(struct gl_context *ctx,
    for (i = 0; i < paramList->NumParameters; i++) {
       if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) {
          _mesa_fetch_state(ctx,
-			   (gl_state_index *) paramList->Parameters[i].StateIndexes,
+			   paramList->Parameters[i].StateIndexes,
                            paramList->ParameterValues[i]);
       }
    }
-- 
cgit v1.2.3


From 64244dfd39b6e63a62a91ea2fb2743bd88a22476 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 3 Dec 2010 16:45:44 -0700
Subject: mesa: update comments, remove dead code

---
 src/mesa/program/prog_statevars.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/mesa/program')

diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c
index def1fbe157..c310acb01d 100644
--- a/src/mesa/program/prog_statevars.c
+++ b/src/mesa/program/prog_statevars.c
@@ -1068,7 +1068,9 @@ _mesa_program_state_string(const gl_state_index state[STATE_LENGTH])
  * Loop over all the parameters in a parameter list.  If the parameter
  * is a GL state reference, look up the current value of that state
  * variable and put it into the parameter's Value[4] array.
- * This would be called at glBegin time when using a fragment program.
+ * Other parameter types never change or are explicitly set by the user
+ * with glUniform() or glProgramParameter(), etc.
+ * This would be called at glBegin time.
  */
 void
 _mesa_load_state_parameters(struct gl_context *ctx,
@@ -1079,8 +1081,6 @@ _mesa_load_state_parameters(struct gl_context *ctx,
    if (!paramList)
       return;
 
-   /*assert(ctx->Driver.NeedFlush == 0);*/
-
    for (i = 0; i < paramList->NumParameters; i++) {
       if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) {
          _mesa_fetch_state(ctx,
-- 
cgit v1.2.3


From a9fa0f3a2f318a7c57163491abe931e86b2cd4c3 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 6 Dec 2010 20:01:30 +0000
Subject: mesa: Bump the number of bits in the register index.

More than 1023 temporaries were being used for a Cinebench shader before
doing temporary optimization, causing the index value to wrap around to
-1024.
---
 src/mesa/program/prog_instruction.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/mesa/program')

diff --git a/src/mesa/program/prog_instruction.h b/src/mesa/program/prog_instruction.h
index ca90de7ce1..a383828e34 100644
--- a/src/mesa/program/prog_instruction.h
+++ b/src/mesa/program/prog_instruction.h
@@ -247,7 +247,7 @@ typedef enum prog_opcode {
  * Number of bits for the src/dst register Index field.
  * This limits the size of temp/uniform register files.
  */
-#define INST_INDEX_BITS 10
+#define INST_INDEX_BITS 11
 
 
 /**
-- 
cgit v1.2.3


From a8f52647b045b5400ebcfc58ba235599a6e9ad87 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 6 Dec 2010 10:42:27 -0800
Subject: symbol_table: Add support for adding a symbol at top-level/global
 scope.

---
 src/mesa/program/symbol_table.c | 85 ++++++++++++++++++++++++++++++++++++++---
 src/mesa/program/symbol_table.h |  4 ++
 2 files changed, 84 insertions(+), 5 deletions(-)

(limited to 'src/mesa/program')

diff --git a/src/mesa/program/symbol_table.c b/src/mesa/program/symbol_table.c
index 09e7cb44ef..004f1f8fa7 100644
--- a/src/mesa/program/symbol_table.c
+++ b/src/mesa/program/symbol_table.c
@@ -336,12 +336,12 @@ _mesa_symbol_table_add_symbol(struct _mesa_symbol_table *table,
     check_symbol_table(table);
 
     if (hdr == NULL) {
-        hdr = calloc(1, sizeof(*hdr));
-        hdr->name = strdup(name);
+       hdr = calloc(1, sizeof(*hdr));
+       hdr->name = strdup(name);
 
-        hash_table_insert(table->ht, hdr, hdr->name);
-	hdr->next = table->hdr;
-	table->hdr = hdr;
+       hash_table_insert(table->ht, hdr, hdr->name);
+       hdr->next = table->hdr;
+       table->hdr = hdr;
     }
 
     check_symbol_table(table);
@@ -376,6 +376,81 @@ _mesa_symbol_table_add_symbol(struct _mesa_symbol_table *table,
 }
 
 
+int
+_mesa_symbol_table_add_global_symbol(struct _mesa_symbol_table *table,
+				     int name_space, const char *name,
+				     void *declaration)
+{
+    struct symbol_header *hdr;
+    struct symbol *sym;
+    struct symbol *curr;
+    struct scope_level *top_scope;
+
+    check_symbol_table(table);
+
+    hdr = find_symbol(table, name);
+
+    check_symbol_table(table);
+
+    if (hdr == NULL) {
+        hdr = calloc(1, sizeof(*hdr));
+        hdr->name = strdup(name);
+
+        hash_table_insert(table->ht, hdr, hdr->name);
+        hdr->next = table->hdr;
+        table->hdr = hdr;
+    }
+
+    check_symbol_table(table);
+
+    /* If the symbol already exists in this namespace at this scope, it cannot
+     * be added to the table.
+     */
+    for (sym = hdr->symbols
+	 ; (sym != NULL) && (sym->name_space != name_space)
+	 ; sym = sym->next_with_same_name) {
+       /* empty */
+    }
+
+    if (sym && sym->depth == 0)
+       return -1;
+
+    /* Find the top-level scope */
+    for (top_scope = table->current_scope
+	 ; top_scope->next != NULL
+	 ; top_scope = top_scope->next) {
+       /* empty */
+    }
+
+    sym = calloc(1, sizeof(*sym));
+    sym->next_with_same_scope = top_scope->symbols;
+    sym->hdr = hdr;
+    sym->name_space = name_space;
+    sym->data = declaration;
+
+    assert(sym->hdr == hdr);
+
+    /* Since next_with_same_name is ordered by scope, we need to append the
+     * new symbol to the _end_ of the list.
+     */
+    if (hdr->symbols == NULL) {
+       hdr->symbols = sym;
+    } else {
+       for (curr = hdr->symbols
+	    ; curr->next_with_same_name != NULL
+	    ; curr = curr->next_with_same_name) {
+	  /* empty */
+       }
+       curr->next_with_same_name = sym;
+    }
+    top_scope->symbols = sym;
+
+    check_symbol_table(table);
+    return 0;
+}
+
+
+
 struct _mesa_symbol_table *
 _mesa_symbol_table_ctor(void)
 {
diff --git a/src/mesa/program/symbol_table.h b/src/mesa/program/symbol_table.h
index 1d570fc1a0..f9d91649bb 100644
--- a/src/mesa/program/symbol_table.h
+++ b/src/mesa/program/symbol_table.h
@@ -33,6 +33,10 @@ extern void _mesa_symbol_table_pop_scope(struct _mesa_symbol_table *table);
 extern int _mesa_symbol_table_add_symbol(struct _mesa_symbol_table *symtab,
     int name_space, const char *name, void *declaration);
 
+extern int _mesa_symbol_table_add_global_symbol(
+    struct _mesa_symbol_table *symtab, int name_space, const char *name,
+    void *declaration);
+
 extern int _mesa_symbol_table_symbol_scope(struct _mesa_symbol_table *table,
     int name_space, const char *name);
 
-- 
cgit v1.2.3


From d5810efca69d016eddd6ece4cb399394c46525db Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Thu, 9 Dec 2010 23:52:28 -0800
Subject: mesa: Clean up header file inclusion in arbprogparse.h.

---
 src/mesa/program/arbprogparse.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'src/mesa/program')

diff --git a/src/mesa/program/arbprogparse.h b/src/mesa/program/arbprogparse.h
index 08e25a1c16..4c0c300720 100644
--- a/src/mesa/program/arbprogparse.h
+++ b/src/mesa/program/arbprogparse.h
@@ -26,7 +26,11 @@
 #ifndef ARBPROGPARSE_H
 #define ARBPROGPARSE_H
 
-#include "main/mtypes.h"
+#include "main/glheader.h"
+
+struct gl_context;
+struct gl_fragment_program;
+struct gl_vertex_program;
 
 extern void
 _mesa_parse_arb_vertex_program(struct gl_context *ctx, GLenum target,
-- 
cgit v1.2.3


From 7d8c06746059c2b3cd7ac08fd0909e38c880ec9c Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Sat, 11 Dec 2010 13:30:13 -0800
Subject: mesa: Clean up header file inclusion in ir_to_mesa.h.

---
 src/mesa/program/ir_to_mesa.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'src/mesa/program')

diff --git a/src/mesa/program/ir_to_mesa.h b/src/mesa/program/ir_to_mesa.h
index 7197615f94..7410e14973 100644
--- a/src/mesa/program/ir_to_mesa.h
+++ b/src/mesa/program/ir_to_mesa.h
@@ -25,8 +25,11 @@
 extern "C" {
 #endif
 
-#include "main/config.h"
-#include "main/mtypes.h"
+#include "main/glheader.h"
+
+struct gl_context;
+struct gl_shader;
+struct gl_shader_program;
 
 void _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *sh);
 void _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog);
-- 
cgit v1.2.3


From bf8242684a893c2b3abb4a515c31d24a0a6b7dc4 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Sat, 11 Dec 2010 14:37:18 -0800
Subject: mesa: Clean up header file inclusion in nvfragparse.h.

---
 src/mesa/program/nvfragparse.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src/mesa/program')

diff --git a/src/mesa/program/nvfragparse.h b/src/mesa/program/nvfragparse.h
index 3e85dd2c30..088e7527d5 100644
--- a/src/mesa/program/nvfragparse.h
+++ b/src/mesa/program/nvfragparse.h
@@ -30,7 +30,10 @@
 #ifndef NVFRAGPARSE_H
 #define NVFRAGPARSE_H
 
-#include "main/mtypes.h"
+#include "main/glheader.h"
+
+struct gl_context;
+struct gl_fragment_program;
 
 extern void
 _mesa_parse_nv_fragment_program(struct gl_context *ctx, GLenum target,
-- 
cgit v1.2.3


From 2d577ee730c30caacf711babde6542766aa0b655 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 13 Dec 2010 15:42:46 -0800
Subject: ir_to_mesa: Don't generate swizzles for record derefs of
 non-scalar/vectors

This is the same as what the array dereference handler does.

Fixes piglit test glsl-link-struct-array (bugzilla #31648).

NOTE: This is a candidate for the 7.9 and 7.10 branches.
---
 src/mesa/program/ir_to_mesa.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'src/mesa/program')

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index b274a961b2..490c4cab7a 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1569,7 +1569,13 @@ ir_to_mesa_visitor::visit(ir_dereference_record *ir)
 	 break;
       offset += type_size(struct_type->fields.structure[i].type);
    }
-   this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
+
+   /* If the type is smaller than a vec4, replicate the last channel out. */
+   if (ir->type->is_scalar() || ir->type->is_vector())
+      this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
+   else
+      this->result.swizzle = SWIZZLE_NOOP;
+
    this->result.index += offset;
 }
 
-- 
cgit v1.2.3


From 71af07bf40e095e4c4e075d3f52a1f89f22d6639 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Tue, 14 Dec 2010 00:22:27 -0800
Subject: mesa: Clean up header file inclusion in nvvertparse.h.

---
 src/mesa/program/nvvertparse.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src/mesa/program')

diff --git a/src/mesa/program/nvvertparse.h b/src/mesa/program/nvvertparse.h
index e98e867320..7318e14941 100644
--- a/src/mesa/program/nvvertparse.h
+++ b/src/mesa/program/nvvertparse.h
@@ -29,7 +29,10 @@
 #ifndef NVVERTPARSE_H
 #define NVVERTPARSE_H
 
-#include "main/mtypes.h"
+#include "main/glheader.h"
+
+struct gl_context;
+struct gl_vertex_program;
 
 extern void
 _mesa_parse_nv_vertex_program(struct gl_context *ctx, GLenum target,
-- 
cgit v1.2.3


From 2c582dd25f0625eae9ebd2c0c2c151f048a9790b Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Tue, 14 Dec 2010 00:30:24 -0800
Subject: mesa: Clean up header file inclusion in prog_cache.h.

---
 src/mesa/program/prog_cache.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/mesa/program')

diff --git a/src/mesa/program/prog_cache.h b/src/mesa/program/prog_cache.h
index 4907ae3030..0167334827 100644
--- a/src/mesa/program/prog_cache.h
+++ b/src/mesa/program/prog_cache.h
@@ -30,8 +30,9 @@
 #define PROG_CACHE_H
 
 
-#include "main/mtypes.h"
+#include "main/glheader.h"
 
+struct gl_context;
 
 /** Opaque type */
 struct gl_program_cache;
-- 
cgit v1.2.3


From 4d1b2df6c0fd837828fdebc0bd20d9eccf1ffedf Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Tue, 14 Dec 2010 00:39:57 -0800
Subject: mesa: Clean up header file inclusion in prog_optimize.h.

---
 src/mesa/program/prog_optimize.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/mesa/program')

diff --git a/src/mesa/program/prog_optimize.h b/src/mesa/program/prog_optimize.h
index 00f1080449..463f5fc51c 100644
--- a/src/mesa/program/prog_optimize.h
+++ b/src/mesa/program/prog_optimize.h
@@ -27,9 +27,10 @@
 
 
 #include "main/config.h"
-#include "main/mtypes.h"
+#include "main/glheader.h"
 
 
+struct gl_context;
 struct gl_program;
 struct prog_instruction;
 
-- 
cgit v1.2.3


From be2aa81f5f8f44be8c9c8854098d29635352c4f8 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 14 Dec 2010 12:46:01 -0700
Subject: mesa: more program debug code

---
 src/mesa/program/prog_execute.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'src/mesa/program')

diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c
index 1d97a077f5..dd15e9a1cc 100644
--- a/src/mesa/program/prog_execute.c
+++ b/src/mesa/program/prog_execute.c
@@ -1670,6 +1670,18 @@ _mesa_execute_program(struct gl_context * ctx,
 
             fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
 
+            if (DEBUG_PROG) {
+               printf("TXB (%g, %g, %g, %g) = texture[%d][%g %g %g %g]"
+                      "  bias %g\n",
+                      color[0], color[1], color[2], color[3],
+                      inst->TexSrcUnit,
+                      texcoord[0],
+                      texcoord[1],
+                      texcoord[2],
+                      texcoord[3],
+                      lodBias);
+            }
+
             store_vector4(inst, machine, color);
          }
          break;
-- 
cgit v1.2.3


From 488e994ba977dc363a5b0a8b657d93a46ade7da2 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Sat, 18 Dec 2010 01:16:53 -0800
Subject: mesa: Clean up header file inclusion in prog_statevars.h.

---
 src/mesa/program/prog_statevars.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/mesa/program')

diff --git a/src/mesa/program/prog_statevars.h b/src/mesa/program/prog_statevars.h
index 009ebde001..f2407af9c8 100644
--- a/src/mesa/program/prog_statevars.h
+++ b/src/mesa/program/prog_statevars.h
@@ -25,8 +25,10 @@
 #ifndef PROG_STATEVARS_H
 #define PROG_STATEVARS_H
 
-#include "main/mtypes.h"
+#include "main/glheader.h"
 
+struct gl_context;
+struct gl_program_parameter_list;
 
 /**
  * Number of STATE_* values we need to address any GL state.
-- 
cgit v1.2.3