From a1bebf73dfdaf2cd23286aa74271b87166589901 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 10 Aug 2010 20:39:06 -0700
Subject: i965: Start building 965 FS backend.

---
 src/mesa/drivers/dri/i965/Makefile | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src/mesa/drivers/dri/i965/Makefile')

diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index e381a5c714..bc4cfab5c0 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -104,6 +104,9 @@ C_SOURCES = \
 	$(COMMON_SOURCES) \
 	$(DRIVER_SOURCES)
 
+CXX_SOURCES = \
+	brw_fs.cpp
+
 ASM_SOURCES = 
 
 DRIVER_DEFINES = -I../intel
-- 
cgit v1.2.3


From 3a8ad33dde2f059b82ebf09f5cffa66c86f2e734 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 13 Aug 2010 02:20:40 -0700
Subject: i965: Add a pass for the FS to reduce vector expressions down to
 scalar.

This is a step towards implementing a GLSL IR backend for the 965
fragment shader.  Because it has downsides with the current codegen,
it is hidden under the environment variable INTEL_NEW_FS.

This results in an increase in instruction count at the moment (1444
-> 1752 for glsl-fs-raytrace, 345 -> 359 on my demo), because dot
products are turned into a series of multiplies and adds instead of a
custom expansion of MULs and MACs, and by not splitting the variable
types up we don't get tree grafting and thus there are extra moves of
temporary storage.  However, register count drops for the non-GLSL
path (64 -> 56 on my demo shader) because the register allocator sees
all the sub-operations.
---
 src/mesa/drivers/dri/i965/Makefile                 |   3 +-
 src/mesa/drivers/dri/i965/brw_fs.cpp               |  15 +
 .../dri/i965/brw_fs_channel_expressions.cpp        | 365 +++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_wm.h                 |   2 +
 4 files changed, 384 insertions(+), 1 deletion(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp

(limited to 'src/mesa/drivers/dri/i965/Makefile')

diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index bc4cfab5c0..39acae9e43 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -105,7 +105,8 @@ C_SOURCES = \
 	$(DRIVER_SOURCES)
 
 CXX_SOURCES = \
-	brw_fs.cpp
+	brw_fs.cpp \
+	brw_fs_channel_expressions.cpp
 
 ASM_SOURCES = 
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 9509d93236..d16e75a2ca 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -36,6 +36,7 @@ extern "C" {
 #include "brw_wm.h"
 #include "talloc.h"
 }
+#include "../glsl/ir_optimization.h"
 
 struct gl_shader *
 brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
@@ -75,6 +76,20 @@ brw_compile_shader(GLcontext *ctx, struct gl_shader *shader)
 GLboolean
 brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
 {
+   static int using_new_fs = -1;
+
+   if (using_new_fs == -1)
+      using_new_fs = getenv("INTEL_NEW_FS") != NULL;
+
+   for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
+      struct gl_shader *shader = prog->_LinkedShaders[i];
+
+      if (using_new_fs && shader->Type == GL_FRAGMENT_SHADER) {
+	 do_mat_op_to_vec(shader->ir);
+	 brw_do_channel_expressions(shader->ir);
+      }
+   }
+
    if (!_mesa_ir_link_shader(ctx, prog))
       return GL_FALSE;
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
new file mode 100644
index 0000000000..d8d58a9467
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -0,0 +1,365 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file brw_wm_channel_expressions.cpp
+ *
+ * Breaks vector operations down into operations on each component.
+ *
+ * The 965 fragment shader receives 8 or 16 pixels at a time, so each
+ * channel of a vector is laid out as 1 or 2 8-float registers.  Each
+ * ALU operation operates on one of those channel registers.  As a
+ * result, there is no value to the 965 fragment shader in tracking
+ * "vector" expressions in the sense of GLSL fragment shaders, when
+ * doing a channel at a time may help in constant folding, algebraic
+ * simplification, and reducing the liveness of channel registers.
+ *
+ * The exception to the desire to break everything down to floats is
+ * texturing.  The texture sampler returns a writemasked masked
+ * 4/8-register sequence containing the texture values.  We don't want
+ * to dispatch to the sampler separately for each channel we need, so
+ * we do retain the vector types in that case.
+ */
+
+extern "C" {
+#include "main/core.h"
+#include "brw_wm.h"
+}
+#include "../glsl/ir.h"
+#include "../glsl/ir_expression_flattening.h"
+#include "../glsl/glsl_types.h"
+
+class ir_channel_expressions_visitor : public ir_hierarchical_visitor {
+public:
+   ir_channel_expressions_visitor()
+   {
+      this->progress = false;
+      this->mem_ctx = NULL;
+   }
+
+   ir_visitor_status visit_leave(ir_assignment *);
+
+   ir_rvalue *get_element(ir_variable *var, unsigned int element);
+   void assign(ir_assignment *ir, int elem, ir_rvalue *val);
+
+   bool progress;
+   void *mem_ctx;
+};
+
+static bool
+channel_expressions_predicate(ir_instruction *ir)
+{
+   ir_expression *expr = ir->as_expression();
+   unsigned int i;
+
+   if (!expr)
+      return false;
+
+   for (i = 0; i < expr->get_num_operands(); i++) {
+      if (expr->operands[i]->type->is_vector())
+	 return true;
+   }
+
+   return false;
+}
+
+extern "C" {
+GLboolean
+brw_do_channel_expressions(exec_list *instructions)
+{
+   ir_channel_expressions_visitor v;
+
+   /* Pull out any matrix expression to a separate assignment to a
+    * temp.  This will make our handling of the breakdown to
+    * operations on the matrix's vector components much easier.
+    */
+   do_expression_flattening(instructions, channel_expressions_predicate);
+
+   visit_list_elements(&v, instructions);
+
+   return v.progress;
+}
+}
+
+ir_rvalue *
+ir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem)
+{
+   ir_dereference *deref;
+
+   if (var->type->is_scalar())
+      return new(mem_ctx) ir_dereference_variable(var);
+
+   assert(elem < var->type->components());
+   deref = new(mem_ctx) ir_dereference_variable(var);
+   return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1);
+}
+
+void
+ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val)
+{
+   ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL);
+   ir_assignment *assign;
+   ir_swizzle *val_swiz;
+
+   /* This assign-of-expression should have been generated by the
+    * expression flattening visitor (since we never short circit to
+    * not flatten, even for plain assignments of variables), so the
+    * writemask is always full.
+    */
+   assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1);
+
+   /* Smear the float across all the channels for the masked write. */
+   val_swiz = new(mem_ctx) ir_swizzle(val, 0, 0, 0, 0,
+				      ir->lhs->type->components());
+   assign = new(mem_ctx) ir_assignment(lhs, val_swiz, NULL, (1 << elem));
+   ir->insert_before(assign);
+}
+
+ir_visitor_status
+ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
+{
+   ir_expression *expr = ir->rhs->as_expression();
+   bool found_vector = false;
+   unsigned int i, vector_elements = 1;
+   ir_variable *op_var[2];
+
+   if (!expr)
+      return visit_continue;
+
+   if (!this->mem_ctx)
+      this->mem_ctx = talloc_parent(ir);
+
+   for (i = 0; i < expr->get_num_operands(); i++) {
+      if (expr->operands[i]->type->is_vector()) {
+	 found_vector = true;
+	 vector_elements = expr->operands[i]->type->vector_elements;
+	 break;
+      }
+   }
+   if (!found_vector)
+      return visit_continue;
+
+   /* Store the expression operands in temps so we can use them
+    * multiple times.
+    */
+   for (i = 0; i < expr->get_num_operands(); i++) {
+      ir_assignment *assign;
+      ir_dereference *deref;
+
+      assert(!expr->operands[i]->type->is_matrix());
+
+      op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type,
+					   "channel_expressions",
+					   ir_var_temporary);
+      ir->insert_before(op_var[i]);
+
+      deref = new(mem_ctx) ir_dereference_variable(op_var[i]);
+      assign = new(mem_ctx) ir_assignment(deref,
+					  expr->operands[i],
+					  NULL);
+      ir->insert_before(assign);
+   }
+
+   const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type,
+							   1, 1);
+
+   /* OK, time to break down this vector operation. */
+   switch (expr->operation) {
+   case ir_unop_bit_not:
+   case ir_unop_logic_not:
+   case ir_unop_neg:
+   case ir_unop_abs:
+   case ir_unop_sign:
+   case ir_unop_rcp:
+   case ir_unop_rsq:
+   case ir_unop_sqrt:
+   case ir_unop_exp:
+   case ir_unop_log:
+   case ir_unop_exp2:
+   case ir_unop_log2:
+   case ir_unop_f2i:
+   case ir_unop_i2f:
+   case ir_unop_f2b:
+   case ir_unop_b2f:
+   case ir_unop_i2b:
+   case ir_unop_b2i:
+   case ir_unop_u2f:
+   case ir_unop_trunc:
+   case ir_unop_ceil:
+   case ir_unop_floor:
+   case ir_unop_fract:
+   case ir_unop_sin:
+   case ir_unop_cos:
+   case ir_unop_dFdx:
+   case ir_unop_dFdy:
+      for (i = 0; i < vector_elements; i++) {
+	 ir_rvalue *op0 = get_element(op_var[0], i);
+
+	 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
+						  element_type,
+						  op0,
+						  NULL));
+      }
+      break;
+
+   case ir_binop_add:
+   case ir_binop_sub:
+   case ir_binop_mul:
+   case ir_binop_div:
+   case ir_binop_mod:
+   case ir_binop_min:
+   case ir_binop_max:
+   case ir_binop_pow:
+   case ir_binop_lshift:
+   case ir_binop_rshift:
+   case ir_binop_bit_and:
+   case ir_binop_bit_xor:
+   case ir_binop_bit_or:
+      for (i = 0; i < vector_elements; i++) {
+	 ir_rvalue *op0 = get_element(op_var[0], i);
+	 ir_rvalue *op1 = get_element(op_var[1], i);
+
+	 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
+						  element_type,
+						  op0,
+						  op1));
+      }
+      break;
+
+   case ir_unop_any: {
+      ir_expression *temp;
+      temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
+					element_type,
+					get_element(op_var[0], 0),
+					get_element(op_var[0], 1));
+
+      for (i = 2; i < vector_elements; i++) {
+	 temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
+					   element_type,
+					   get_element(op_var[0], i),
+					   temp);
+      }
+      assign(ir, 0, temp);
+      break;
+   }
+
+   case ir_binop_dot: {
+      ir_expression *last = NULL;
+      for (i = 0; i < vector_elements; i++) {
+	 ir_rvalue *op0 = get_element(op_var[0], i);
+	 ir_rvalue *op1 = get_element(op_var[1], i);
+	 ir_expression *temp;
+
+	 temp = new(mem_ctx) ir_expression(ir_binop_mul,
+					   element_type,
+					   op0,
+					   op1);
+	 if (last) {
+	    last = new(mem_ctx) ir_expression(ir_binop_add,
+					      element_type,
+					      temp,
+					      last);
+	 } else {
+	    last = temp;
+	 }
+      }
+      assign(ir, 0, last);
+      break;
+   }
+
+   case ir_binop_cross: {
+      for (i = 0; i < vector_elements; i++) {
+	 int swiz0 = (i + 1) % 3;
+	 int swiz1 = (i + 2) % 3;
+	 ir_expression *temp1, *temp2;
+
+	 temp1 = new(mem_ctx) ir_expression(ir_binop_mul,
+					    element_type,
+					    get_element(op_var[0], swiz0),
+					    get_element(op_var[1], swiz1));
+
+	 temp2 = new(mem_ctx) ir_expression(ir_binop_mul,
+					    element_type,
+					    get_element(op_var[1], swiz0),
+					    get_element(op_var[0], swiz1));
+
+	 temp2 = new(mem_ctx) ir_expression(ir_unop_neg,
+					    element_type,
+					    temp2,
+					    NULL);
+
+	 assign(ir, i, new(mem_ctx) ir_expression(ir_binop_add,
+						  element_type,
+						  temp1, temp2));
+      }
+      break;
+   }
+
+   case ir_binop_less:
+   case ir_binop_greater:
+   case ir_binop_lequal:
+   case ir_binop_gequal:
+   case ir_binop_logic_and:
+   case ir_binop_logic_xor:
+   case ir_binop_logic_or:
+      ir->print();
+      printf("\n");
+      assert(!"not reached: expression operates on scalars only");
+      break;
+   case ir_binop_equal:
+   case ir_binop_nequal: {
+      ir_expression *last = NULL;
+      for (i = 0; i < vector_elements; i++) {
+	 ir_rvalue *op0 = get_element(op_var[0], i);
+	 ir_rvalue *op1 = get_element(op_var[1], i);
+	 ir_expression *temp;
+	 ir_expression_operation join;
+
+	 if (expr->operation == ir_binop_equal)
+	    join = ir_binop_logic_and;
+	 else
+	    join = ir_binop_logic_or;
+
+	 temp = new(mem_ctx) ir_expression(expr->operation,
+					   element_type,
+					   op0,
+					   op1);
+	 if (last) {
+	    last = new(mem_ctx) ir_expression(join,
+					      element_type,
+					      temp,
+					      last);
+	 } else {
+	    last = temp;
+	 }
+      }
+      assign(ir, 0, last);
+      break;
+   }
+   }
+
+   ir->remove();
+   this->progress = true;
+
+   return visit_continue;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 25a72f5dda..438da1af62 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -465,4 +465,6 @@ GLboolean brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog);
 struct gl_shader *brw_new_shader(GLcontext *ctx, GLuint name, GLuint type);
 struct gl_shader_program *brw_new_shader_program(GLcontext *ctx, GLuint name);
 
+GLboolean brw_do_channel_expressions(struct exec_list *instructions);
+
 #endif
-- 
cgit v1.2.3


From c1dfdcb93a8991788032d4906c5bf1a5b48cdc48 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 26 Aug 2010 12:02:26 -0700
Subject: i965: Add new pass to split vectors into scalar variables

Combined with the previous pass, this lets other optimization passes
do their work thanks to ir_tree_grafting.  Still have regression in
instruction count with INTEL_NEW_FS, but register count is even
better.
---
 src/mesa/drivers/dri/i965/Makefile                 |   3 +-
 src/mesa/drivers/dri/i965/brw_fs.cpp               |   2 +
 .../drivers/dri/i965/brw_fs_vector_splitting.cpp   | 388 +++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_wm.h                 |   1 +
 4 files changed, 393 insertions(+), 1 deletion(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp

(limited to 'src/mesa/drivers/dri/i965/Makefile')

diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index 39acae9e43..bea48e1313 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -106,7 +106,8 @@ C_SOURCES = \
 
 CXX_SOURCES = \
 	brw_fs.cpp \
-	brw_fs_channel_expressions.cpp
+	brw_fs_channel_expressions.cpp \
+	brw_fs_vector_splitting.cpp
 
 ASM_SOURCES = 
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index d16e75a2ca..9a6ee7a010 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -37,6 +37,7 @@ extern "C" {
 #include "talloc.h"
 }
 #include "../glsl/ir_optimization.h"
+#include "../glsl/ir_print_visitor.h"
 
 struct gl_shader *
 brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
@@ -87,6 +88,7 @@ brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
       if (using_new_fs && shader->Type == GL_FRAGMENT_SHADER) {
 	 do_mat_op_to_vec(shader->ir);
 	 brw_do_channel_expressions(shader->ir);
+	 brw_do_vector_splitting(shader->ir);
       }
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
new file mode 100644
index 0000000000..d4da86b3b0
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
@@ -0,0 +1,388 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file brw_wm_vector_splitting.cpp
+ *
+ * If a vector is only ever referenced by its components, then
+ * split those components out to individual variables so they can be
+ * handled normally by other optimization passes.
+ *
+ * This skips vectors in uniforms and varyings, which need to be
+ * accessible as vectors for their access by the GL.  Also, vector
+ * results of non-variable-derefs in assignments aren't handled
+ * because to do so we would have to store the vector result to a
+ * temporary in order to unload each channel, and to do so would just
+ * loop us back to where we started.  For the 965, this is exactly the
+ * behavior we want for the results of texture lookups, but probably not for
+ */
+
+extern "C" {
+#include "main/core.h"
+#include "intel_context.h"
+}
+#include "../glsl/ir.h"
+#include "../glsl/ir_visitor.h"
+#include "../glsl/ir_print_visitor.h"
+#include "../glsl/ir_rvalue_visitor.h"
+#include "../glsl/glsl_types.h"
+
+static bool debug = false;
+
+class variable_entry : public exec_node
+{
+public:
+   variable_entry(ir_variable *var)
+   {
+      this->var = var;
+      this->whole_vector_access = 0;
+      this->declaration = false;
+      this->mem_ctx = NULL;
+   }
+
+   ir_variable *var; /* The key: the variable's pointer. */
+
+   /** Number of times the variable is referenced, including assignments. */
+   unsigned whole_vector_access;
+
+   bool declaration; /* If the variable had a decl in the instruction stream */
+
+   ir_variable *components[4];
+
+   /** talloc_parent(this->var) -- the shader's talloc context. */
+   void *mem_ctx;
+};
+
+class ir_vector_reference_visitor : public ir_hierarchical_visitor {
+public:
+   ir_vector_reference_visitor(void)
+   {
+      this->mem_ctx = talloc_new(NULL);
+      this->variable_list.make_empty();
+   }
+
+   ~ir_vector_reference_visitor(void)
+   {
+      talloc_free(mem_ctx);
+   }
+
+   virtual ir_visitor_status visit(ir_variable *);
+   virtual ir_visitor_status visit(ir_dereference_variable *);
+   virtual ir_visitor_status visit_enter(ir_swizzle *);
+   virtual ir_visitor_status visit_enter(ir_assignment *);
+   virtual ir_visitor_status visit_enter(ir_function_signature *);
+
+   variable_entry *get_variable_entry(ir_variable *var);
+
+   /* List of variable_entry */
+   exec_list variable_list;
+
+   void *mem_ctx;
+};
+
+variable_entry *
+ir_vector_reference_visitor::get_variable_entry(ir_variable *var)
+{
+   assert(var);
+
+   if (!var->type->is_vector())
+      return NULL;
+
+   switch (var->mode) {
+   case ir_var_uniform:
+   case ir_var_in:
+   case ir_var_out:
+   case ir_var_inout:
+      /* Can't split varyings or uniforms.  Function in/outs won't get split
+       * either, so don't care about the ambiguity.
+       */
+      return NULL;
+   case ir_var_auto:
+   case ir_var_temporary:
+      break;
+   }
+
+   foreach_iter(exec_list_iterator, iter, this->variable_list) {
+      variable_entry *entry = (variable_entry *)iter.get();
+      if (entry->var == var)
+	 return entry;
+   }
+
+   variable_entry *entry = new(mem_ctx) variable_entry(var);
+   this->variable_list.push_tail(entry);
+   return entry;
+}
+
+
+ir_visitor_status
+ir_vector_reference_visitor::visit(ir_variable *ir)
+{
+   variable_entry *entry = this->get_variable_entry(ir);
+
+   if (entry)
+      entry->declaration = true;
+
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_vector_reference_visitor::visit(ir_dereference_variable *ir)
+{
+   ir_variable *const var = ir->var;
+   variable_entry *entry = this->get_variable_entry(var);
+
+   if (entry)
+      entry->whole_vector_access++;
+
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_vector_reference_visitor::visit_enter(ir_swizzle *ir)
+{
+   /* Don't descend into a vector ir_dereference_variable below. */
+   if (ir->val->as_dereference_variable() && ir->type->is_scalar())
+      return visit_continue_with_parent;
+
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_vector_reference_visitor::visit_enter(ir_assignment *ir)
+{
+   if (ir->lhs->as_dereference_variable() &&
+       ir->rhs->as_dereference_variable() &&
+       !ir->condition) {
+      /* We'll split copies of a vector to copies of channels, so don't
+       * descend to the ir_dereference_variables.
+       */
+      return visit_continue_with_parent;
+   }
+   if (ir->lhs->as_dereference_variable() &&
+       is_power_of_two(ir->write_mask) &&
+       !ir->condition) {
+      /* If we're writing just a channel, then channel-splitting the LHS is OK.
+       */
+      ir->rhs->accept(this);
+      return visit_continue_with_parent;
+   }
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_vector_reference_visitor::visit_enter(ir_function_signature *ir)
+{
+   /* We don't want to descend into the function parameters and
+    * split them, so just accept the body here.
+    */
+   visit_list_elements(this, &ir->body);
+   return visit_continue_with_parent;
+}
+
+class ir_vector_splitting_visitor : public ir_rvalue_visitor {
+public:
+   ir_vector_splitting_visitor(exec_list *vars)
+   {
+      this->variable_list = vars;
+   }
+
+   virtual ir_visitor_status visit_leave(ir_assignment *);
+
+   void handle_rvalue(ir_rvalue **rvalue);
+   struct variable_entry *get_splitting_entry(ir_variable *var);
+
+   exec_list *variable_list;
+   void *mem_ctx;
+};
+
+struct variable_entry *
+ir_vector_splitting_visitor::get_splitting_entry(ir_variable *var)
+{
+   assert(var);
+
+   if (!var->type->is_vector())
+      return NULL;
+
+   foreach_iter(exec_list_iterator, iter, *this->variable_list) {
+      variable_entry *entry = (variable_entry *)iter.get();
+      if (entry->var == var) {
+	 return entry;
+      }
+   }
+
+   return NULL;
+}
+
+void
+ir_vector_splitting_visitor::handle_rvalue(ir_rvalue **rvalue)
+{
+   if (!*rvalue)
+      return;
+
+   ir_swizzle *swiz = (*rvalue)->as_swizzle();
+   if (!swiz || !swiz->type->is_scalar())
+      return;
+
+   ir_dereference_variable *deref_var = swiz->val->as_dereference_variable();
+   if (!deref_var)
+      return;
+
+   variable_entry *entry = get_splitting_entry(deref_var->var);
+   if (!entry)
+      return;
+
+   ir_variable *var = entry->components[swiz->mask.x];
+   *rvalue = new(entry->mem_ctx) ir_dereference_variable(var);
+}
+
+ir_visitor_status
+ir_vector_splitting_visitor::visit_leave(ir_assignment *ir)
+{
+   ir_dereference_variable *lhs_deref = ir->lhs->as_dereference_variable();
+   ir_dereference_variable *rhs_deref = ir->rhs->as_dereference_variable();
+   variable_entry *lhs = lhs_deref ? get_splitting_entry(lhs_deref->var) : NULL;
+   variable_entry *rhs = rhs_deref ? get_splitting_entry(rhs_deref->var) : NULL;
+
+   if (lhs_deref && rhs_deref && (lhs || rhs) && !ir->condition) {
+      /* Straight assignment of vector variables. */
+      for (unsigned int i = 0; i < ir->rhs->type->vector_elements; i++) {
+	 ir_dereference *new_lhs;
+	 ir_rvalue *new_rhs;
+	 void *mem_ctx = lhs ? lhs->mem_ctx : rhs->mem_ctx;
+	 unsigned int writemask;
+
+	 if (lhs) {
+	    new_lhs = new(mem_ctx) ir_dereference_variable(lhs->components[i]);
+	    writemask = (ir->write_mask >> i) & 1;
+	 } else {
+	    new_lhs = ir->lhs->clone(mem_ctx, NULL);
+	    writemask = ir->write_mask & (1 << i);
+	 }
+
+	 if (rhs) {
+	    new_rhs = new(mem_ctx) ir_dereference_variable(rhs->components[i]);
+	 } else {
+	    new_rhs = new(mem_ctx) ir_swizzle(ir->rhs->clone(mem_ctx, NULL),
+					      i, i, i, i, 1);
+	 }
+
+	 ir->insert_before(new(mem_ctx) ir_assignment(new_lhs,
+						      new_rhs,
+						      NULL, writemask));
+      }
+      ir->remove();
+   } else if (lhs) {
+      int elem = -1;
+
+      switch (ir->write_mask) {
+      case (1 << 0):
+	 elem = 0;
+	 break;
+      case (1 << 1):
+	 elem = 1;
+	 break;
+      case (1 << 2):
+	 elem = 2;
+	 break;
+      case (1 << 3):
+	 elem = 3;
+	 break;
+      default:
+	 ir->print();
+	 assert(!"not reached: non-channelwise dereference of LHS.");
+      }
+
+      ir->lhs = new(mem_ctx) ir_dereference_variable(lhs->components[elem]);
+      ir->write_mask = (1 << 0);
+
+      handle_rvalue(&ir->rhs);
+      ir->rhs = new(mem_ctx) ir_swizzle(ir->rhs,
+					elem, elem, elem, elem, 1);
+   } else {
+      handle_rvalue(&ir->rhs);
+   }
+
+   handle_rvalue(&ir->condition);
+
+   return visit_continue;
+}
+
+extern "C" {
+bool
+brw_do_vector_splitting(exec_list *instructions)
+{
+   ir_vector_reference_visitor refs;
+
+   visit_list_elements(&refs, instructions);
+
+   /* Trim out variables we can't split. */
+   foreach_iter(exec_list_iterator, iter, refs.variable_list) {
+      variable_entry *entry = (variable_entry *)iter.get();
+
+      if (debug) {
+	 printf("vector %s@%p: decl %d, whole_access %d\n",
+		entry->var->name, (void *) entry->var, entry->declaration,
+		entry->whole_vector_access);
+      }
+
+      if (!entry->declaration || entry->whole_vector_access) {
+	 entry->remove();
+      }
+   }
+
+   if (refs.variable_list.is_empty())
+      return false;
+
+   void *mem_ctx = talloc_new(NULL);
+
+   /* Replace the decls of the vectors to be split with their split
+    * components.
+    */
+   foreach_iter(exec_list_iterator, iter, refs.variable_list) {
+      variable_entry *entry = (variable_entry *)iter.get();
+      const struct glsl_type *type;
+      type = glsl_type::get_instance(entry->var->type->base_type, 1, 1);
+
+      entry->mem_ctx = talloc_parent(entry->var);
+
+      for (unsigned int i = 0; i < entry->var->type->vector_elements; i++) {
+	 const char *name = talloc_asprintf(mem_ctx, "%s_%c",
+					    entry->var->name,
+					    "xyzw"[i]);
+
+	 entry->components[i] = new(entry->mem_ctx) ir_variable(type, name,
+								ir_var_temporary);
+	 entry->var->insert_before(entry->components[i]);
+      }
+
+      entry->var->remove();
+   }
+
+   ir_vector_splitting_visitor split(&refs.variable_list);
+   visit_list_elements(&split, instructions);
+
+   talloc_free(mem_ctx);
+
+   return true;
+}
+}
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 438da1af62..6a761e723b 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -466,5 +466,6 @@ struct gl_shader *brw_new_shader(GLcontext *ctx, GLuint name, GLuint type);
 struct gl_shader_program *brw_new_shader_program(GLcontext *ctx, GLuint name);
 
 GLboolean brw_do_channel_expressions(struct exec_list *instructions);
+GLboolean brw_do_vector_splitting(struct exec_list *instructions);
 
 #endif
-- 
cgit v1.2.3