From 29285882676388aacff123e8bdf025904abf8ea9 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 24 Jun 2010 15:32:15 -0700
Subject: glsl2: Move the compiler to the subdirectory it will live in in Mesa.

---
 src/glsl/ir_optimization.h | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 src/glsl/ir_optimization.h

(limited to 'src/glsl/ir_optimization.h')

diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
new file mode 100644
index 0000000000..432a33458c
--- /dev/null
+++ b/src/glsl/ir_optimization.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * \file ir_dead_code.h
+ *
+ * Prototypes for optimization passes to be called by the compiler and drivers.
+ */
+
+bool do_constant_folding(exec_list *instructions);
+bool do_constant_variable(exec_list *instructions);
+bool do_constant_variable_unlinked(exec_list *instructions);
+bool do_copy_propagation(exec_list *instructions);
+bool do_dead_code(exec_list *instructions);
+bool do_dead_code_local(exec_list *instructions);
+bool do_dead_code_unlinked(exec_list *instructions);
+bool do_function_inlining(exec_list *instructions);
+bool do_if_simplification(exec_list *instructions);
+bool do_swizzle_swizzle(exec_list *instructions);
+bool do_vec_index_to_swizzle(exec_list *instructions);
-- 
cgit v1.2.3


From bda27424cf04c0d2ec2b49c56f562d5b2d2f0bff Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 25 Jun 2010 13:38:38 -0700
Subject: glsl2: Use the parser state as the talloc context for dead code
 elimination.

This cuts runtime by around 20% from talloc_parent() lookups.
---
 src/glsl/ir_dead_code.cpp      | 15 +++++++++------
 src/glsl/ir_optimization.h     |  6 ++++--
 src/glsl/main.cpp              |  2 +-
 src/mesa/shader/ir_to_mesa.cpp |  2 +-
 4 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'src/glsl/ir_optimization.h')

diff --git a/src/glsl/ir_dead_code.cpp b/src/glsl/ir_dead_code.cpp
index 8821304682..51fa96df0c 100644
--- a/src/glsl/ir_dead_code.cpp
+++ b/src/glsl/ir_dead_code.cpp
@@ -71,6 +71,8 @@ public:
 
    /* List of variable_entry */
    exec_list variable_list;
+
+   void *mem_ctx;
 };
 
 
@@ -84,9 +86,7 @@ ir_dead_code_visitor::get_variable_entry(ir_variable *var)
 	 return entry;
    }
 
-   void *ctx = talloc_parent(var);
-
-   variable_entry *entry = new(ctx) variable_entry(var);
+   variable_entry *entry = new(mem_ctx) variable_entry(var);
    this->variable_list.push_tail(entry);
    return entry;
 }
@@ -147,11 +147,13 @@ ir_dead_code_visitor::visit_leave(ir_assignment *ir)
  * for usage on an unlinked instruction stream.
  */
 bool
-do_dead_code(exec_list *instructions)
+do_dead_code(struct _mesa_glsl_parse_state *state,
+	     exec_list *instructions)
 {
    ir_dead_code_visitor v;
    bool progress = false;
 
+   v.mem_ctx = state;
    v.run(instructions);
 
    foreach_iter(exec_list_iterator, iter, v.variable_list) {
@@ -198,7 +200,8 @@ do_dead_code(exec_list *instructions)
  * with global scope.
  */
 bool
-do_dead_code_unlinked(exec_list *instructions)
+do_dead_code_unlinked(struct _mesa_glsl_parse_state *state,
+		      exec_list *instructions)
 {
    bool progress = false;
 
@@ -209,7 +212,7 @@ do_dead_code_unlinked(exec_list *instructions)
 	 foreach_iter(exec_list_iterator, sigiter, *f) {
 	    ir_function_signature *sig =
 	       (ir_function_signature *) sigiter.get();
-	    if (do_dead_code(&sig->body))
+	    if (do_dead_code(state, &sig->body))
 	       progress = true;
 	 }
       }
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 432a33458c..147f92176b 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -32,9 +32,11 @@ bool do_constant_folding(exec_list *instructions);
 bool do_constant_variable(exec_list *instructions);
 bool do_constant_variable_unlinked(exec_list *instructions);
 bool do_copy_propagation(exec_list *instructions);
-bool do_dead_code(exec_list *instructions);
+bool do_dead_code(struct _mesa_glsl_parse_state *state,
+		  exec_list *instructions);
 bool do_dead_code_local(exec_list *instructions);
-bool do_dead_code_unlinked(exec_list *instructions);
+bool do_dead_code_unlinked(struct _mesa_glsl_parse_state *state,
+			   exec_list *instructions);
 bool do_function_inlining(exec_list *instructions);
 bool do_if_simplification(exec_list *instructions);
 bool do_swizzle_swizzle(exec_list *instructions);
diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp
index dcd9bd69c0..b32e2ad3db 100644
--- a/src/glsl/main.cpp
+++ b/src/glsl/main.cpp
@@ -157,7 +157,7 @@ compile_shader(struct glsl_shader *shader)
 	 progress = do_if_simplification(&shader->ir) || progress;
 	 progress = do_copy_propagation(&shader->ir) || progress;
 	 progress = do_dead_code_local(&shader->ir) || progress;
-	 progress = do_dead_code_unlinked(&shader->ir) || progress;
+	 progress = do_dead_code_unlinked(state, &shader->ir) || progress;
 	 progress = do_constant_variable_unlinked(&shader->ir) || progress;
 	 progress = do_constant_folding(&shader->ir) || progress;
 	 progress = do_vec_index_to_swizzle(&shader->ir) || progress;
diff --git a/src/mesa/shader/ir_to_mesa.cpp b/src/mesa/shader/ir_to_mesa.cpp
index f58af0f65f..0425e7d91e 100644
--- a/src/mesa/shader/ir_to_mesa.cpp
+++ b/src/mesa/shader/ir_to_mesa.cpp
@@ -1332,7 +1332,7 @@ _mesa_get_glsl_shader(GLcontext *ctx, void *mem_ctx, struct gl_shader *sh)
 	 progress = do_if_simplification(&shader->ir) || progress;
 	 progress = do_copy_propagation(&shader->ir) || progress;
 	 progress = do_dead_code_local(&shader->ir) || progress;
-	 progress = do_dead_code_unlinked(&shader->ir) || progress;
+	 progress = do_dead_code_unlinked(state, &shader->ir) || progress;
 	 progress = do_constant_variable_unlinked(&shader->ir) || progress;
 	 progress = do_constant_folding(&shader->ir) || progress;
 	 progress = do_vec_index_to_swizzle(&shader->ir) || progress;
-- 
cgit v1.2.3


From 8a1f186cc55979bb9df0a88b48da8d81460c3e7c Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 1 Jul 2010 10:09:58 -0700
Subject: glsl2: Add a pass to convert mod(a, b) to b * fract(a/b).

This is used by the Mesa IR backend to implement mod, fixing glsl-fs-mod.
---
 src/glsl/Makefile                    |  1 +
 src/glsl/ir.h                        |  9 ++++
 src/glsl/ir_hierarchical_visitor.cpp |  7 +--
 src/glsl/ir_hierarchical_visitor.h   | 12 +++++
 src/glsl/ir_hv_accept.cpp            |  5 +-
 src/glsl/ir_mod_to_fract.cpp         | 89 ++++++++++++++++++++++++++++++++++++
 src/glsl/ir_optimization.h           |  1 +
 src/mesa/shader/ir_to_mesa.cpp       |  3 ++
 8 files changed, 120 insertions(+), 7 deletions(-)
 create mode 100644 src/glsl/ir_mod_to_fract.cpp

(limited to 'src/glsl/ir_optimization.h')

diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index f4e32b9185..a709bf7121 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -40,6 +40,7 @@ CXX_SOURCES = \
 	ir_hierarchical_visitor.cpp \
 	ir_hv_accept.cpp \
 	ir_if_simplification.cpp \
+	ir_mod_to_fract.cpp \
 	ir_print_visitor.cpp \
 	ir_reader.cpp \
 	ir_swizzle_swizzle.cpp \
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index f47813786b..c19bd417c3 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -551,6 +551,15 @@ enum ir_expression_operation {
    ir_binop_sub,
    ir_binop_mul,
    ir_binop_div,
+
+   /**
+    * Takes one of two combinations of arguments:
+    *
+    * - mod(vecN, vecN)
+    * - mod(vecN, float)
+    *
+    * Does not take integer types.
+    */
    ir_binop_mod,
 
    /**
diff --git a/src/glsl/ir_hierarchical_visitor.cpp b/src/glsl/ir_hierarchical_visitor.cpp
index 9afb12a4a2..d475df62fc 100644
--- a/src/glsl/ir_hierarchical_visitor.cpp
+++ b/src/glsl/ir_hierarchical_visitor.cpp
@@ -277,12 +277,7 @@ ir_hierarchical_visitor::visit_leave(ir_if *ir)
 void
 ir_hierarchical_visitor::run(exec_list *instructions)
 {
-   foreach_list(n, instructions) {
-      ir_instruction *ir = (ir_instruction *) n;
-
-      if (ir->accept(this) != visit_continue)
-	 break;
-   }
+   visit_list_elements(this, instructions);
 }
 
 
diff --git a/src/glsl/ir_hierarchical_visitor.h b/src/glsl/ir_hierarchical_visitor.h
index 2c4590d4b1..afa780dc91 100644
--- a/src/glsl/ir_hierarchical_visitor.h
+++ b/src/glsl/ir_hierarchical_visitor.h
@@ -141,6 +141,16 @@ public:
     */
    void run(struct exec_list *instructions);
 
+   /* Some visitors may need to insert new variable declarations and
+    * assignments for portions of a subtree, which means they need a
+    * pointer to the current instruction in the stream, not just their
+    * node in the tree rooted at that instruction.
+    *
+    * This is implemented by visit_list_elements -- if the visitor is
+    * not called by it, nothing good will happen.
+    */
+   class ir_instruction *base_ir;
+
    /**
     * Callback function that is invoked on entry to each node visited.
     *
@@ -161,4 +171,6 @@ void visit_tree(ir_instruction *ir,
 		void (*callback)(class ir_instruction *ir, void *data),
 		void *data);
 
+ir_visitor_status visit_list_elements(ir_hierarchical_visitor *v, exec_list *l);
+
 #endif /* IR_HIERARCHICAL_VISITOR_H */
diff --git a/src/glsl/ir_hv_accept.cpp b/src/glsl/ir_hv_accept.cpp
index 7b5cc5234c..e772018a45 100644
--- a/src/glsl/ir_hv_accept.cpp
+++ b/src/glsl/ir_hv_accept.cpp
@@ -37,20 +37,23 @@
  * from list.  However, if nodes are added to the list after the node being
  * processed, some of the added noded may not be processed.
  */
-static ir_visitor_status
+ir_visitor_status
 visit_list_elements(ir_hierarchical_visitor *v, exec_list *l)
 {
    exec_node *next;
+   ir_instruction *prev_base_ir = v->base_ir;
 
    for (exec_node *n = l->head; n->next != NULL; n = next) {
       next = n->next;
 
       ir_instruction *const ir = (ir_instruction *) n;
+      v->base_ir = ir;
       ir_visitor_status s = ir->accept(v);
 
       if (s != visit_continue)
 	 return s;
    }
+   v->base_ir = prev_base_ir;
 
    return visit_continue;
 }
diff --git a/src/glsl/ir_mod_to_fract.cpp b/src/glsl/ir_mod_to_fract.cpp
new file mode 100644
index 0000000000..ec1e65092d
--- /dev/null
+++ b/src/glsl/ir_mod_to_fract.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file ir_mod_to_floor.cpp
+ *
+ * Breaks an ir_unop_mod expression down to (op1 * fract(op0 / op1))
+ *
+ * Many GPUs don't have a MOD instruction (945 and 965 included), and
+ * if we have to break it down like this anyway, it gives an
+ * opportunity to do things like constant fold the (1.0 / op1) easily.
+ */
+
+#include "ir.h"
+
+class ir_mod_to_fract_visitor : public ir_hierarchical_visitor {
+public:
+   ir_mod_to_fract_visitor()
+   {
+      this->made_progress = false;
+   }
+
+   ir_visitor_status visit_leave(ir_expression *);
+
+   bool made_progress;
+};
+
+bool
+do_mod_to_fract(exec_list *instructions)
+{
+   ir_mod_to_fract_visitor v;
+
+   visit_list_elements(&v, instructions);
+   return v.made_progress;
+}
+
+ir_visitor_status
+ir_mod_to_fract_visitor::visit_leave(ir_expression *ir)
+{
+   if (ir->operation != ir_binop_mod)
+      return visit_continue;
+
+   ir_variable *temp = new(ir) ir_variable(ir->operands[1]->type, "mod_b");
+   this->base_ir->insert_before(temp);
+
+   ir_assignment *assign;
+   ir_rvalue *expr;
+
+   assign = new(ir) ir_assignment(new(ir) ir_dereference_variable(temp),
+				  ir->operands[1], NULL);
+   this->base_ir->insert_before(assign);
+
+   expr = new(ir) ir_expression(ir_binop_div,
+				ir->operands[0]->type,
+				ir->operands[0],
+				new(ir) ir_dereference_variable(temp));
+
+   expr = new(ir) ir_expression(ir_unop_fract,
+				ir->operands[0]->type,
+				expr,
+				NULL);
+
+   ir->operation = ir_binop_mul;
+   ir->operands[0] = new(ir) ir_dereference_variable(temp);
+   ir->operands[1] = expr;
+   this->made_progress = true;
+
+   return visit_continue;
+}
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 147f92176b..1a8b740566 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -39,5 +39,6 @@ bool do_dead_code_unlinked(struct _mesa_glsl_parse_state *state,
 			   exec_list *instructions);
 bool do_function_inlining(exec_list *instructions);
 bool do_if_simplification(exec_list *instructions);
+bool do_mod_to_fract(exec_list *instructions);
 bool do_swizzle_swizzle(exec_list *instructions);
 bool do_vec_index_to_swizzle(exec_list *instructions);
diff --git a/src/mesa/shader/ir_to_mesa.cpp b/src/mesa/shader/ir_to_mesa.cpp
index 2f2096ef97..25267d79b5 100644
--- a/src/mesa/shader/ir_to_mesa.cpp
+++ b/src/mesa/shader/ir_to_mesa.cpp
@@ -1724,6 +1724,9 @@ _mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *shader)
    if (!state->error && !state->translation_unit.is_empty())
       _mesa_ast_to_hir(shader->ir, state);
 
+   /* Lowering */
+   do_mod_to_fract(shader->ir);
+
    /* Optimization passes */
    if (!state->error && !shader->ir->is_empty()) {
       bool progress;
-- 
cgit v1.2.3


From 9a0e421983edc31371440c08687fa2bb2207924d Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 2 Jul 2010 11:27:06 -0700
Subject: glsl2: Add a pass to break ir_binop_div to _mul and _rcp.

This results in constant folding of a constant divisor.
---
 src/glsl/Makefile              |  1 +
 src/glsl/ir_div_to_mul_rcp.cpp | 77 ++++++++++++++++++++++++++++++++++++++++++
 src/glsl/ir_optimization.h     |  1 +
 src/mesa/shader/ir_to_mesa.cpp |  5 ++-
 4 files changed, 81 insertions(+), 3 deletions(-)
 create mode 100644 src/glsl/ir_div_to_mul_rcp.cpp

(limited to 'src/glsl/ir_optimization.h')

diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index a709bf7121..30ba475d92 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -33,6 +33,7 @@ CXX_SOURCES = \
 	ir.cpp \
 	ir_dead_code.cpp \
 	ir_dead_code_local.cpp \
+	ir_div_to_mul_rcp.cpp \
 	ir_expression_flattening.cpp \
 	ir_function_can_inline.cpp \
 	ir_function.cpp \
diff --git a/src/glsl/ir_div_to_mul_rcp.cpp b/src/glsl/ir_div_to_mul_rcp.cpp
new file mode 100644
index 0000000000..ce84add221
--- /dev/null
+++ b/src/glsl/ir_div_to_mul_rcp.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file ir_div_to_mul_rcp.cpp
+ *
+ * Breaks an ir_unop_div expression down to op0 * (rcp(op1)).
+ *
+ * Many GPUs don't have a divide instruction (945 and 965 included),
+ * but they do have an RCP instruction to compute an approximate
+ * reciprocal.  By breaking the operation down, constant reciprocals
+ * can get constant folded.
+ */
+
+#include "ir.h"
+
+class ir_div_to_mul_rcp_visitor : public ir_hierarchical_visitor {
+public:
+   ir_div_to_mul_rcp_visitor()
+   {
+      this->made_progress = false;
+   }
+
+   ir_visitor_status visit_leave(ir_expression *);
+
+   bool made_progress;
+};
+
+bool
+do_div_to_mul_rcp(exec_list *instructions)
+{
+   ir_div_to_mul_rcp_visitor v;
+
+   visit_list_elements(&v, instructions);
+   return v.made_progress;
+}
+
+ir_visitor_status
+ir_div_to_mul_rcp_visitor::visit_leave(ir_expression *ir)
+{
+   if (ir->operation != ir_binop_div)
+      return visit_continue;
+
+   /* New expression for the 1.0 / op1 */
+   ir_rvalue *expr;
+   expr = new(ir) ir_expression(ir_unop_rcp,
+				ir->operands[1]->type,
+				ir->operands[1],
+				NULL);
+
+   /* op0 / op1 -> op0 * (1.0 / op1) */
+   ir->operation = ir_binop_mul;
+   ir->operands[1] = expr;
+   this->made_progress = true;
+
+   return visit_continue;
+}
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 1a8b740566..6d02e591c3 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -37,6 +37,7 @@ bool do_dead_code(struct _mesa_glsl_parse_state *state,
 bool do_dead_code_local(exec_list *instructions);
 bool do_dead_code_unlinked(struct _mesa_glsl_parse_state *state,
 			   exec_list *instructions);
+bool do_div_to_mul_rcp(exec_list *instructions);
 bool do_function_inlining(exec_list *instructions);
 bool do_if_simplification(exec_list *instructions);
 bool do_mod_to_fract(exec_list *instructions);
diff --git a/src/mesa/shader/ir_to_mesa.cpp b/src/mesa/shader/ir_to_mesa.cpp
index 7c7e368d0d..d5664e7b91 100644
--- a/src/mesa/shader/ir_to_mesa.cpp
+++ b/src/mesa/shader/ir_to_mesa.cpp
@@ -691,9 +691,7 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
       }
       break;
    case ir_binop_div:
-      ir_to_mesa_emit_scalar_op1(ir, OPCODE_RCP, result_dst, op[1]);
-      ir_to_mesa_emit_op2(ir, OPCODE_MUL, result_dst, op[0], result_src);
-      break;
+      assert(!"not reached: should be handled by ir_div_to_mul_rcp");
    case ir_binop_mod:
       assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
       break;
@@ -1729,6 +1727,7 @@ _mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *shader)
 
    /* Lowering */
    do_mod_to_fract(shader->ir);
+   do_div_to_mul_rcp(shader->ir);
 
    /* Optimization passes */
    if (!state->error && !shader->ir->is_empty()) {
-- 
cgit v1.2.3


From a36334be02cb0a2b834667116bfeb680bf365857 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 6 Jul 2010 17:53:32 -0700
Subject: glsl2: Add pass for supporting variable vector indexing in rvalues.

The Mesa IR needs this to support vector indexing correctly, and
hardware backends such as 915 would want this behavior as well.

Fixes glsl-vs-vec4-indexing-2.
---
 src/glsl/Makefile                        |   1 +
 src/glsl/ir_optimization.h               |   1 +
 src/glsl/ir_vec_index_to_cond_assign.cpp | 197 +++++++++++++++++++++++++++++++
 src/glsl/main.cpp                        |   1 +
 src/mesa/shader/ir_to_mesa.cpp           |   6 +
 5 files changed, 206 insertions(+)
 create mode 100644 src/glsl/ir_vec_index_to_cond_assign.cpp

(limited to 'src/glsl/ir_optimization.h')

diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index 30ba475d92..d2a687aa33 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -47,6 +47,7 @@ CXX_SOURCES = \
 	ir_swizzle_swizzle.cpp \
 	ir_validate.cpp \
 	ir_variable.cpp \
+	ir_vec_index_to_cond_assign.cpp \
 	ir_vec_index_to_swizzle.cpp \
 	linker.cpp \
 	s_expression.cpp
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 6d02e591c3..93010dadbe 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -42,4 +42,5 @@ bool do_function_inlining(exec_list *instructions);
 bool do_if_simplification(exec_list *instructions);
 bool do_mod_to_fract(exec_list *instructions);
 bool do_swizzle_swizzle(exec_list *instructions);
+bool do_vec_index_to_cond_assign(exec_list *instructions);
 bool do_vec_index_to_swizzle(exec_list *instructions);
diff --git a/src/glsl/ir_vec_index_to_cond_assign.cpp b/src/glsl/ir_vec_index_to_cond_assign.cpp
new file mode 100644
index 0000000000..6264a430e3
--- /dev/null
+++ b/src/glsl/ir_vec_index_to_cond_assign.cpp
@@ -0,0 +1,197 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file ir_vec_index_to_cond_assign.cpp
+ *
+ * Turns indexing into vector types to a series of conditional moves
+ * of each channel's swizzle into a temporary.
+ *
+ * Most GPUs don't have a native way to do this operation, and this
+ * works around that.  For drivers using both this pass and
+ * ir_vec_index_to_swizzle, there's a risk that this pass will happen
+ * before sufficient constant folding to find that the array index is
+ * constant.  However, we hope that other optimization passes,
+ * particularly constant folding of assignment conditions and copy
+ * propagation, will result in the same code in the end.
+ */
+
+#include "ir.h"
+#include "ir_visitor.h"
+#include "ir_optimization.h"
+#include "glsl_types.h"
+
+/**
+ * Visitor class for replacing expressions with ir_constant values.
+ */
+
+class ir_vec_index_to_cond_assign_visitor : public ir_hierarchical_visitor {
+public:
+   ir_vec_index_to_cond_assign_visitor()
+   {
+      progress = false;
+   }
+
+   ir_rvalue *convert_vec_index_to_cond_assign(ir_rvalue *val);
+
+   virtual ir_visitor_status visit_enter(ir_expression *);
+   virtual ir_visitor_status visit_enter(ir_swizzle *);
+   virtual ir_visitor_status visit_enter(ir_assignment *);
+   virtual ir_visitor_status visit_enter(ir_return *);
+   virtual ir_visitor_status visit_enter(ir_call *);
+   virtual ir_visitor_status visit_enter(ir_if *);
+
+   bool progress;
+};
+
+ir_rvalue *
+ir_vec_index_to_cond_assign_visitor::convert_vec_index_to_cond_assign(ir_rvalue *ir)
+{
+   ir_dereference_array *orig_deref = ir->as_dereference_array();
+   ir_assignment *assign;
+   ir_variable *index, *var;
+   ir_dereference *deref;
+   ir_expression *condition;
+   ir_swizzle *swizzle;
+   int i;
+
+   if (!orig_deref)
+      return ir;
+
+   if (orig_deref->array->type->is_matrix() ||
+       orig_deref->array->type->is_array())
+      return ir;
+
+   assert(orig_deref->array_index->type->base_type == GLSL_TYPE_INT);
+
+   /* Store the index to a temporary to avoid reusing its tree. */
+   index = new(base_ir) ir_variable(glsl_type::int_type,
+				    "vec_index_tmp_i");
+   base_ir->insert_before(index);
+   deref = new(base_ir) ir_dereference_variable(index);
+   assign = new(base_ir) ir_assignment(deref, orig_deref->array_index, NULL);
+   base_ir->insert_before(assign);
+
+   /* Temporary where we store whichever value we swizzle out. */
+   var = new(base_ir) ir_variable(ir->type, "vec_index_tmp_v");
+   base_ir->insert_before(var);
+
+   /* Generate a conditional move of each vector element to the temp. */
+   for (i = 0; i < orig_deref->array->type->vector_elements; i++) {
+      deref = new(base_ir) ir_dereference_variable(index);
+      condition = new(base_ir) ir_expression(ir_binop_equal,
+					     glsl_type::bool_type,
+					     deref,
+					     new(base_ir) ir_constant(i));
+
+      /* Just clone the rest of the deref chain when trying to get at the
+       * underlying variable.
+       */
+      deref = (ir_dereference *)orig_deref->array->clone(NULL);
+      swizzle = new(base_ir) ir_swizzle(deref, i, 0, 0, 0, 1);
+
+      deref = new(base_ir) ir_dereference_variable(var);
+      assign = new(base_ir) ir_assignment(deref, swizzle, condition);
+      base_ir->insert_before(assign);
+   }
+
+   this->progress = true;
+   return new(base_ir) ir_dereference_variable(var);
+}
+
+ir_visitor_status
+ir_vec_index_to_cond_assign_visitor::visit_enter(ir_expression *ir)
+{
+   unsigned int i;
+
+   for (i = 0; i < ir->get_num_operands(); i++) {
+      ir->operands[i] = convert_vec_index_to_cond_assign(ir->operands[i]);
+   }
+
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_vec_index_to_cond_assign_visitor::visit_enter(ir_swizzle *ir)
+{
+   /* Can't be hit from normal GLSL, since you can't swizzle a scalar (which
+    * the result of indexing a vector is.  But maybe at some point we'll end up
+    * using swizzling of scalars for vector construction.
+    */
+   ir->val = convert_vec_index_to_cond_assign(ir->val);
+
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_vec_index_to_cond_assign_visitor::visit_enter(ir_assignment *ir)
+{
+   /* FINISHME: Handle it on the LHS. */
+   ir->rhs = convert_vec_index_to_cond_assign(ir->rhs);
+
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_vec_index_to_cond_assign_visitor::visit_enter(ir_call *ir)
+{
+   foreach_iter(exec_list_iterator, iter, *ir) {
+      ir_rvalue *param = (ir_rvalue *)iter.get();
+      ir_rvalue *new_param = convert_vec_index_to_cond_assign(param);
+
+      if (new_param != param) {
+	 param->insert_before(new_param);
+	 param->remove();
+      }
+   }
+
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_vec_index_to_cond_assign_visitor::visit_enter(ir_return *ir)
+{
+   if (ir->value) {
+      ir->value = convert_vec_index_to_cond_assign(ir->value);
+   }
+
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_vec_index_to_cond_assign_visitor::visit_enter(ir_if *ir)
+{
+   ir->condition = convert_vec_index_to_cond_assign(ir->condition);
+
+   return visit_continue;
+}
+
+bool
+do_vec_index_to_cond_assign(exec_list *instructions)
+{
+   ir_vec_index_to_cond_assign_visitor v;
+
+   visit_list_elements(&v, instructions);
+
+   return v.progress;
+}
diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp
index 9bed2c6bcc..782934a8d7 100644
--- a/src/glsl/main.cpp
+++ b/src/glsl/main.cpp
@@ -175,6 +175,7 @@ compile_shader(struct gl_shader *shader)
 	 progress = do_constant_variable_unlinked(shader->ir) || progress;
 	 progress = do_constant_folding(shader->ir) || progress;
 	 progress = do_vec_index_to_swizzle(shader->ir) || progress;
+	 progress = do_vec_index_to_cond_assign(shader->ir) || progress;
 	 progress = do_swizzle_swizzle(shader->ir) || progress;
       } while (progress);
    }
diff --git a/src/mesa/shader/ir_to_mesa.cpp b/src/mesa/shader/ir_to_mesa.cpp
index 021e270f18..daf09e9e65 100644
--- a/src/mesa/shader/ir_to_mesa.cpp
+++ b/src/mesa/shader/ir_to_mesa.cpp
@@ -1821,7 +1821,13 @@ _mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *shader)
 	 progress = do_dead_code_unlinked(state, shader->ir) || progress;
 	 progress = do_constant_variable_unlinked(shader->ir) || progress;
 	 progress = do_constant_folding(shader->ir) || progress;
+
 	 progress = do_vec_index_to_swizzle(shader->ir) || progress;
+	 /* Do this one after the previous to let the easier pass handle
+	  * constant vector indexing.
+	  */
+	 progress = do_vec_index_to_cond_assign(shader->ir) || progress;
+
 	 progress = do_swizzle_swizzle(shader->ir) || progress;
       } while (progress);
    }
-- 
cgit v1.2.3


From d674ebcee0d2731e50d6530502cefcebc39dcdb6 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 6 Jul 2010 18:09:39 -0700
Subject: glsl2: Add a pass to simplify if statements returning from both
 sides.

This allows function inlining making the following tests work even
without function calls implemented:
glsl-fs-functions-2
glsl-fs-functions-3
glsl-vs-functions
glsl-vs-functions-2
glsl-vs-functions-3
glsl-vs-vec4-indexing-5

(Note that those tests were designed to trigger actual function calls,
and this defeats them.  However, those testcases ended up catching the
bug in the previous commit.)
---
 src/glsl/Makefile              |   1 +
 src/glsl/ir_if_return.cpp      | 123 +++++++++++++++++++++++++++++++++++++++++
 src/glsl/ir_optimization.h     |   1 +
 src/mesa/shader/ir_to_mesa.cpp |   1 +
 4 files changed, 126 insertions(+)
 create mode 100644 src/glsl/ir_if_return.cpp

(limited to 'src/glsl/ir_optimization.h')

diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index d2a687aa33..ddc9d82d61 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -40,6 +40,7 @@ CXX_SOURCES = \
 	ir_function_inlining.cpp \
 	ir_hierarchical_visitor.cpp \
 	ir_hv_accept.cpp \
+	ir_if_return.cpp \
 	ir_if_simplification.cpp \
 	ir_mod_to_fract.cpp \
 	ir_print_visitor.cpp \
diff --git a/src/glsl/ir_if_return.cpp b/src/glsl/ir_if_return.cpp
new file mode 100644
index 0000000000..f68dcfb501
--- /dev/null
+++ b/src/glsl/ir_if_return.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file ir_if_return.cpp
+ *
+ * If a function includes an if statement that returns from both
+ * branches, then make the branches write the return val to a temp and
+ * return the temp after the if statement.
+ *
+ * This allows inlinining in the common case of short functions that
+ * return one of two values based on a condition.  This helps on
+ * hardware with no branching support, and may even be a useful
+ * transform on hardware supporting control flow by masked returns
+ * with normal returns.
+ */
+
+#include "ir.h"
+
+class ir_if_return_visitor : public ir_hierarchical_visitor {
+public:
+   ir_if_return_visitor()
+   {
+      this->progress = false;
+   }
+
+   ir_visitor_status visit_enter(ir_if *);
+
+   bool progress;
+};
+
+bool
+do_if_return(exec_list *instructions)
+{
+   ir_if_return_visitor v;
+
+   visit_list_elements(&v, instructions);
+
+   return v.progress;
+}
+
+
+ir_visitor_status
+ir_if_return_visitor::visit_enter(ir_if *ir)
+{
+   ir_return *then_return = NULL;
+   ir_return *else_return = NULL;
+
+   /* Try to find a return statement on both sides. */
+   foreach_iter(exec_list_iterator, then_iter, ir->then_instructions) {
+      ir_instruction *then_ir = (ir_instruction *)then_iter.get();
+      then_return = then_ir->as_return();
+      if (then_return)
+	 break;
+   }
+   if (!then_return)
+      return visit_continue;
+
+   foreach_iter(exec_list_iterator, else_iter, ir->else_instructions) {
+      ir_instruction *else_ir = (ir_instruction *)else_iter.get();
+      else_return = else_ir->as_return();
+      if (else_return)
+	 break;
+   }
+   if (!else_return)
+      return visit_continue;
+
+   /* Trim off any trailing instructions after the return statements
+    * on both sides.
+    */
+   while (then_return->get_next()->get_next())
+      ((ir_instruction *)then_return->get_next())->remove();
+   while (else_return->get_next()->get_next())
+      ((ir_instruction *)else_return->get_next())->remove();
+
+   this->progress = true;
+
+   if (!then_return->value) {
+      then_return->remove();
+      else_return->remove();
+      ir->insert_after(new(ir) ir_return(NULL));
+   } else {
+      ir_assignment *assign;
+      ir_variable *new_var = new(ir) ir_variable(then_return->value->type,
+					     "if_return_tmp");
+      ir->insert_before(new_var);
+
+      assign = new(ir) ir_assignment(new(ir) ir_dereference_variable(new_var),
+				     then_return->value, NULL);
+      then_return->insert_before(assign);
+      then_return->remove();
+
+      assign = new(ir) ir_assignment(new(ir) ir_dereference_variable(new_var),
+				     else_return->value, NULL);
+      else_return->insert_before(assign);
+      else_return->remove();
+
+      ir_dereference_variable *deref = new(ir) ir_dereference_variable(new_var);
+      ir->insert_after(new(ir) ir_return(deref));
+   }
+
+   return visit_continue;
+}
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 93010dadbe..b03c0644cf 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -39,6 +39,7 @@ bool do_dead_code_unlinked(struct _mesa_glsl_parse_state *state,
 			   exec_list *instructions);
 bool do_div_to_mul_rcp(exec_list *instructions);
 bool do_function_inlining(exec_list *instructions);
+bool do_if_return(exec_list *instructions);
 bool do_if_simplification(exec_list *instructions);
 bool do_mod_to_fract(exec_list *instructions);
 bool do_swizzle_swizzle(exec_list *instructions);
diff --git a/src/mesa/shader/ir_to_mesa.cpp b/src/mesa/shader/ir_to_mesa.cpp
index daf09e9e65..2ffff60065 100644
--- a/src/mesa/shader/ir_to_mesa.cpp
+++ b/src/mesa/shader/ir_to_mesa.cpp
@@ -1821,6 +1821,7 @@ _mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *shader)
 	 progress = do_dead_code_unlinked(state, shader->ir) || progress;
 	 progress = do_constant_variable_unlinked(shader->ir) || progress;
 	 progress = do_constant_folding(shader->ir) || progress;
+	 progress = do_if_return(shader->ir) || progress;
 
 	 progress = do_vec_index_to_swizzle(shader->ir) || progress;
 	 /* Do this one after the previous to let the easier pass handle
-- 
cgit v1.2.3


From 6d8a0a0aadaafbab02dffcf7f89eb0210dd37b2e Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 12 Jul 2010 11:04:07 -0700
Subject: glsl2: Add a new pass at the IR level to break down matrix ops to
 vector ops.

This will be used by the Mesa IR and likely most HW backends, as it
allows other optimizations to occur that might not otherwise.

Fixes glsl-vs-mat-sub-1, glsl-vs-mat-div-1.
---
 src/glsl/Makefile              |   1 +
 src/glsl/ir.h                  |   6 ++
 src/glsl/ir_mat_op_to_vec.cpp  | 188 +++++++++++++++++++++++++++++++++++++++++
 src/glsl/ir_optimization.h     |   1 +
 src/mesa/shader/ir_to_mesa.cpp |   1 +
 5 files changed, 197 insertions(+)
 create mode 100644 src/glsl/ir_mat_op_to_vec.cpp

(limited to 'src/glsl/ir_optimization.h')

diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index ddc9d82d61..a36ff28a4b 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -42,6 +42,7 @@ CXX_SOURCES = \
 	ir_hv_accept.cpp \
 	ir_if_return.cpp \
 	ir_if_simplification.cpp \
+	ir_mat_op_to_vec.cpp \
 	ir_mod_to_fract.cpp \
 	ir_print_visitor.cpp \
 	ir_reader.cpp \
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 500a8c7a00..0d5bbc20aa 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -74,6 +74,7 @@ public:
    virtual class ir_dereference *       as_dereference()      { return NULL; }
    virtual class ir_dereference_array *	as_dereference_array() { return NULL; }
    virtual class ir_dereference_variable *as_dereference_variable() { return NULL; }
+   virtual class ir_expression *        as_expression()       { return NULL; }
    virtual class ir_rvalue *            as_rvalue()           { return NULL; }
    virtual class ir_loop *              as_loop()             { return NULL; }
    virtual class ir_assignment *        as_assignment()       { return NULL; }
@@ -603,6 +604,11 @@ public:
    ir_expression(int op, const struct glsl_type *type,
 		 ir_rvalue *, ir_rvalue *);
 
+   virtual ir_expression *as_expression()
+   {
+      return this;
+   }
+
    virtual ir_expression *clone(struct hash_table *ht) const;
 
    static unsigned int get_num_operands(ir_expression_operation);
diff --git a/src/glsl/ir_mat_op_to_vec.cpp b/src/glsl/ir_mat_op_to_vec.cpp
new file mode 100644
index 0000000000..828c63c17a
--- /dev/null
+++ b/src/glsl/ir_mat_op_to_vec.cpp
@@ -0,0 +1,188 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file ir_mat_op_to_vec.cpp
+ *
+ * Breaks matrix operation expressions down to a series of vector operations.
+ *
+ * Generally this is how we have to codegen matrix operations for a
+ * GPU, so this gives us the chance to constant fold operations on a
+ * column or row.
+ */
+
+#include "ir.h"
+#include "ir_expression_flattening.h"
+#include "glsl_types.h"
+
+class ir_mat_op_to_vec_visitor : public ir_hierarchical_visitor {
+public:
+   ir_mat_op_to_vec_visitor()
+   {
+      this->made_progress = false;
+   }
+
+   ir_visitor_status visit_leave(ir_assignment *);
+
+   ir_rvalue *get_column(ir_variable *var, int i);
+
+   bool made_progress;
+};
+
+static bool
+mat_op_to_vec_predicate(ir_instruction *ir)
+{
+   ir_expression *expr = ir->as_expression();
+   unsigned int i;
+
+   if (!expr)
+      return false;
+
+   for (i = 0; i < expr->get_num_operands(); i++) {
+      if (expr->operands[i]->type->is_matrix())
+	 return true;
+   }
+
+   return false;
+}
+
+bool
+do_mat_op_to_vec(exec_list *instructions)
+{
+   ir_mat_op_to_vec_visitor v;
+
+   /* Pull out any matrix expression to a separate assignment to a
+    * temp.  This will make our handling of the breakdown to
+    * operations on the matrix's vector components much easier.
+    */
+   do_expression_flattening(instructions, mat_op_to_vec_predicate);
+
+   visit_list_elements(&v, instructions);
+
+   return v.made_progress;
+}
+
+ir_rvalue *
+ir_mat_op_to_vec_visitor::get_column(ir_variable *var, int i)
+{
+   ir_dereference *deref;
+
+   if (!var->type->is_matrix()) {
+      deref = new(base_ir) ir_dereference_variable(var);
+   } else {
+      deref = new(base_ir) ir_dereference_variable(var);
+      deref = new(base_ir) ir_dereference_array(deref,
+						new(base_ir) ir_constant(i));
+   }
+
+   return deref;
+}
+
+ir_visitor_status
+ir_mat_op_to_vec_visitor::visit_leave(ir_assignment *assign)
+{
+   ir_expression *expr = assign->rhs->as_expression();
+   bool found_matrix = false;
+   unsigned int i, matrix_columns = 1;
+   ir_variable *op_var[2];
+
+   if (!expr)
+      return visit_continue;
+
+   for (i = 0; i < expr->get_num_operands(); i++) {
+      if (expr->operands[i]->type->is_matrix()) {
+	 found_matrix = true;
+	 matrix_columns = expr->operands[i]->type->matrix_columns;
+	 break;
+      }
+   }
+   if (!found_matrix)
+      return visit_continue;
+
+   /* FINISHME: see below */
+   if (expr->operation == ir_binop_mul)
+      return visit_continue;
+
+   ir_dereference_variable *lhs_deref = assign->lhs->as_dereference_variable();
+   assert(lhs_deref);
+
+   ir_variable *result_var = lhs_deref->var;
+
+   /* Store the expression operands in temps so we can use them
+    * multiple times.
+    */
+   for (i = 0; i < expr->get_num_operands(); i++) {
+      ir_assignment *assign;
+
+      op_var[i] = new(base_ir) ir_variable(expr->operands[i]->type,
+					   "mat_op_to_vec");
+      base_ir->insert_before(op_var[i]);
+
+      lhs_deref = new(base_ir) ir_dereference_variable(op_var[i]);
+      assign = new(base_ir) ir_assignment(lhs_deref,
+					  expr->operands[i],
+					  NULL);
+      base_ir->insert_before(assign);
+   }
+
+   /* OK, time to break down this matrix operation. */
+   switch (expr->operation) {
+   case ir_binop_add:
+   case ir_binop_sub:
+   case ir_binop_div:
+   case ir_binop_mod:
+      /* For most operations, the matrix version is just going
+       * column-wise through and applying the operation to each column
+       * if available.
+       */
+      for (i = 0; i < matrix_columns; i++) {
+	 ir_rvalue *op0 = get_column(op_var[0], i);
+	 ir_rvalue *op1 = get_column(op_var[1], i);
+	 ir_rvalue *result = get_column(result_var, i);
+	 ir_expression *column_expr;
+	 ir_assignment *column_assign;
+
+	 column_expr = new(base_ir) ir_expression(expr->operation,
+						  result->type,
+						  op0,
+						  op1);
+
+	 column_assign = new(base_ir) ir_assignment(result,
+						    column_expr,
+						    NULL);
+	 base_ir->insert_before(column_assign);
+      }
+      break;
+   case ir_binop_mul:
+      /* FINISHME */
+      return visit_continue;
+      break;
+   default:
+      printf("FINISHME: Handle matrix operation for %s\n", expr->operator_string());
+      abort();
+   }
+   assign->remove();
+   this->made_progress = true;
+
+   return visit_continue;
+}
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index b03c0644cf..fae583df75 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -41,6 +41,7 @@ bool do_div_to_mul_rcp(exec_list *instructions);
 bool do_function_inlining(exec_list *instructions);
 bool do_if_return(exec_list *instructions);
 bool do_if_simplification(exec_list *instructions);
+bool do_mat_op_to_vec(exec_list *instructions);
 bool do_mod_to_fract(exec_list *instructions);
 bool do_swizzle_swizzle(exec_list *instructions);
 bool do_vec_index_to_cond_assign(exec_list *instructions);
diff --git a/src/mesa/shader/ir_to_mesa.cpp b/src/mesa/shader/ir_to_mesa.cpp
index 708c6fece1..81b91918cb 100644
--- a/src/mesa/shader/ir_to_mesa.cpp
+++ b/src/mesa/shader/ir_to_mesa.cpp
@@ -1960,6 +1960,7 @@ _mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *shader)
       _mesa_ast_to_hir(shader->ir, state);
 
    /* Lowering */
+   do_mat_op_to_vec(shader->ir);
    do_mod_to_fract(shader->ir);
    do_div_to_mul_rcp(shader->ir);
 
-- 
cgit v1.2.3


From 29ce44ad2b8d37ea54923f1d1856b44ef26903e5 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 19 Jul 2010 09:36:43 -0700
Subject: glsl2: Add a pass for converting if statements to conditional
 assignment.

This will be used on 915 and similar hardware of that generation.
---
 src/glsl/Makefile                 |   1 +
 src/glsl/ir_if_to_cond_assign.cpp | 167 ++++++++++++++++++++++++++++++++++++++
 src/glsl/ir_optimization.h        |   1 +
 3 files changed, 169 insertions(+)
 create mode 100644 src/glsl/ir_if_to_cond_assign.cpp

(limited to 'src/glsl/ir_optimization.h')

diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index a36ff28a4b..c09735dff6 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -42,6 +42,7 @@ CXX_SOURCES = \
 	ir_hv_accept.cpp \
 	ir_if_return.cpp \
 	ir_if_simplification.cpp \
+	ir_if_to_cond_assign.cpp \
 	ir_mat_op_to_vec.cpp \
 	ir_mod_to_fract.cpp \
 	ir_print_visitor.cpp \
diff --git a/src/glsl/ir_if_to_cond_assign.cpp b/src/glsl/ir_if_to_cond_assign.cpp
new file mode 100644
index 0000000000..274874bbb7
--- /dev/null
+++ b/src/glsl/ir_if_to_cond_assign.cpp
@@ -0,0 +1,167 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file ir_if_to_cond_assign.cpp
+ *
+ * This attempts to flatten all if statements to conditional
+ * assignments for GPUs that don't do control flow.
+ *
+ * It can't handle other control flow being inside of its block, such
+ * as calls or loops.  Hopefully loop unrolling and inlining will take
+ * care of those.
+ */
+
+#include "glsl_types.h"
+#include "ir.h"
+
+class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor {
+public:
+   ir_if_to_cond_assign_visitor()
+   {
+      this->progress = false;
+   }
+
+   ir_visitor_status visit_leave(ir_if *);
+
+   bool progress;
+};
+
+bool
+do_if_to_cond_assign(exec_list *instructions)
+{
+   ir_if_to_cond_assign_visitor v;
+
+   visit_list_elements(&v, instructions);
+
+   return v.progress;
+}
+
+void
+check_control_flow(ir_instruction *ir, void *data)
+{
+   bool *found_control_flow = (bool *)data;
+   switch (ir->ir_type) {
+   case ir_type_call:
+   case ir_type_discard:
+   case ir_type_loop:
+   case ir_type_loop_jump:
+   case ir_type_return:
+      *found_control_flow = true;
+      break;
+   default:
+      break;
+   }
+}
+
+void
+move_block_to_cond_assign(void *mem_ctx,
+			  ir_if *if_ir, ir_variable *cond_var, bool then)
+{
+   exec_list *instructions;
+
+   if (then) {
+      instructions = &if_ir->then_instructions;
+   } else {
+      instructions = &if_ir->else_instructions;
+   }
+
+   foreach_iter(exec_list_iterator, iter, *instructions) {
+      ir_instruction *ir = (ir_instruction *)iter.get();
+
+      if (ir->ir_type == ir_type_assignment) {
+	 ir_assignment *assign = (ir_assignment *)ir;
+	 ir_rvalue *cond_expr;
+	 ir_dereference *deref = new(mem_ctx) ir_dereference_variable(cond_var);
+
+	 if (then) {
+	    cond_expr = deref;
+	 } else {
+	    cond_expr = new(mem_ctx) ir_expression(ir_unop_logic_not,
+						   glsl_type::bool_type,
+						   deref,
+						   NULL);
+	 }
+
+	 if (!assign->condition) {
+	    assign->condition = cond_expr;
+	 } else {
+	    assign->condition = new(mem_ctx) ir_expression(ir_binop_logic_and,
+							   glsl_type::bool_type,
+							   cond_expr,
+							   assign->condition);
+	 }
+      }
+
+      /* Now, move from the if block to the block surrounding it. */
+      ir->remove();
+      if_ir->insert_before(ir);
+   }
+}
+
+ir_visitor_status
+ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir)
+{
+   bool found_control_flow = false;
+   ir_variable *cond_var;
+   ir_assignment *assign;
+   ir_dereference_variable *deref;
+
+   /* Check that both blocks don't contain anything we can't support. */
+   foreach_iter(exec_list_iterator, then_iter, ir->then_instructions) {
+      ir_instruction *then_ir = (ir_instruction *)then_iter.get();
+      visit_tree(then_ir, check_control_flow, &found_control_flow);
+   }
+   foreach_iter(exec_list_iterator, else_iter, ir->else_instructions) {
+      ir_instruction *else_ir = (ir_instruction *)else_iter.get();
+      visit_tree(else_ir, check_control_flow, &found_control_flow);
+   }
+   if (found_control_flow)
+      return visit_continue;
+
+   void *mem_ctx = talloc_parent(ir);
+
+   /* Store the condition to a variable so the assignment conditions are
+    * simpler.
+    */
+   cond_var = new(mem_ctx) ir_variable(glsl_type::bool_type,
+				       "if_to_cond_assign_condition");
+   ir->insert_before(cond_var);
+
+   deref = new(mem_ctx) ir_dereference_variable(cond_var);
+   assign = new(mem_ctx) ir_assignment(deref,
+				       ir->condition, NULL);
+   ir->insert_before(assign);
+
+   /* Now, move all of the instructions out of the if blocks, putting
+    * conditions on assignments.
+    */
+   move_block_to_cond_assign(mem_ctx, ir, cond_var, true);
+   move_block_to_cond_assign(mem_ctx, ir, cond_var, false);
+
+   ir->remove();
+
+   this->progress = true;
+
+   return visit_continue;
+}
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index fae583df75..06cb4d22ca 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -41,6 +41,7 @@ bool do_div_to_mul_rcp(exec_list *instructions);
 bool do_function_inlining(exec_list *instructions);
 bool do_if_return(exec_list *instructions);
 bool do_if_simplification(exec_list *instructions);
+bool do_if_to_cond_assign(exec_list *instructions);
 bool do_mat_op_to_vec(exec_list *instructions);
 bool do_mod_to_fract(exec_list *instructions);
 bool do_swizzle_swizzle(exec_list *instructions);
-- 
cgit v1.2.3


From 832aad989e3d319a8aaac046aa49df25da134d82 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 26 Jul 2010 22:50:29 -0700
Subject: glsl2: Add optimization pass for algebraic simplifications.

This cleans up the assembly output of almost all the non-logic tests
glsl-algebraic-*.  glsl-algebraic-pow-two needs love (basically,
flattening to a temporary and squaring it).
---
 src/glsl/Makefile               |   1 +
 src/glsl/ir.h                   |   8 +
 src/glsl/ir_algebraic.cpp       | 366 ++++++++++++++++++++++++++++++++++++++++
 src/glsl/ir_optimization.h      |   3 +-
 src/glsl/main.cpp               |   1 +
 src/mesa/program/ir_to_mesa.cpp |   1 +
 6 files changed, 379 insertions(+), 1 deletion(-)
 create mode 100644 src/glsl/ir_algebraic.cpp

(limited to 'src/glsl/ir_optimization.h')

diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index 462d49e884..4c85af8906 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -30,6 +30,7 @@ CXX_SOURCES = \
 	glsl_parser_extras.cpp \
 	glsl_types.cpp \
 	hir_field_selection.cpp \
+	ir_algebraic.cpp \
 	ir_basic_block.cpp \
 	ir_clone.cpp \
 	ir_constant_expression.cpp \
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index e0f3683a7a..7e8363106d 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -613,7 +613,15 @@ enum ir_expression_operation {
    ir_binop_greater,
    ir_binop_lequal,
    ir_binop_gequal,
+   /**
+    * Returns single boolean for whether all components of operands[0]
+    * equal the components of operands[1].
+    */
    ir_binop_equal,
+   /**
+    * Returns single boolean for whether any component of operands[0]
+    * is not equal to the corresponding component of operands[1].
+    */
    ir_binop_nequal,
    /*@}*/
 
diff --git a/src/glsl/ir_algebraic.cpp b/src/glsl/ir_algebraic.cpp
new file mode 100644
index 0000000000..5b065b086e
--- /dev/null
+++ b/src/glsl/ir_algebraic.cpp
@@ -0,0 +1,366 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file ir_algebraic.cpp
+ *
+ * Takes advantage of association, commutivity, and other algebraic
+ * properties to simplify expressions.
+ */
+
+#include "ir.h"
+#include "ir_visitor.h"
+#include "ir_optimization.h"
+#include "glsl_types.h"
+
+/**
+ * Visitor class for replacing expressions with ir_constant values.
+ */
+
+class ir_algebraic_visitor : public ir_hierarchical_visitor {
+public:
+   ir_algebraic_visitor()
+   {
+      this->progress = false;
+   }
+
+   virtual ~ir_algebraic_visitor()
+   {
+   }
+
+   virtual ir_visitor_status visit_leave(ir_assignment *);
+   virtual ir_visitor_status visit_leave(ir_call *);
+   virtual ir_visitor_status visit_leave(ir_dereference_array *);
+   virtual ir_visitor_status visit_leave(ir_expression *);
+   virtual ir_visitor_status visit_leave(ir_if *);
+   virtual ir_visitor_status visit_leave(ir_return *);
+   virtual ir_visitor_status visit_leave(ir_swizzle *);
+   virtual ir_visitor_status visit_leave(ir_texture *);
+
+   ir_rvalue *handle_expression(ir_rvalue *in_ir);
+
+   bool progress;
+};
+
+static bool
+is_vec_zero(ir_constant *ir)
+{
+   int c;
+
+   if (!ir)
+      return false;
+   if (!ir->type->is_scalar() &&
+       !ir->type->is_vector())
+      return false;
+
+   for (c = 0; c < ir->type->vector_elements; c++) {
+      switch (ir->type->base_type) {
+      case GLSL_TYPE_FLOAT:
+	 if (ir->value.f[c] != 0.0)
+	    return false;
+	 break;
+      case GLSL_TYPE_INT:
+	 if (ir->value.i[c] != 0)
+	    return false;
+	 break;
+      case GLSL_TYPE_UINT:
+	 if (ir->value.u[c] != 0)
+	    return false;
+	 break;
+      case GLSL_TYPE_BOOL:
+	 if (ir->value.b[c] != false)
+	    return false;
+	 break;
+      default:
+	 assert(!"bad base type");
+	 return false;
+      }
+   }
+
+   return true;
+}
+
+static bool
+is_vec_one(ir_constant *ir)
+{
+   int c;
+
+   if (!ir)
+      return false;
+   if (!ir->type->is_scalar() &&
+       !ir->type->is_vector())
+      return false;
+
+   for (c = 0; c < ir->type->vector_elements; c++) {
+      switch (ir->type->base_type) {
+      case GLSL_TYPE_FLOAT:
+	 if (ir->value.f[c] != 1.0)
+	    return false;
+	 break;
+      case GLSL_TYPE_INT:
+	 if (ir->value.i[c] != 1)
+	    return false;
+	 break;
+      case GLSL_TYPE_UINT:
+	 if (ir->value.u[c] != 1)
+	    return false;
+	 break;
+      case GLSL_TYPE_BOOL:
+	 if (ir->value.b[c] != true)
+	    return false;
+	 break;
+      default:
+	 assert(!"bad base type");
+	 return false;
+      }
+   }
+
+   return true;
+}
+
+ir_rvalue *
+ir_algebraic_visitor::handle_expression(ir_rvalue *in_ir)
+{
+   ir_expression *ir = (ir_expression *)in_ir;
+   ir_constant *op_const[2] = {NULL, NULL};
+   ir_expression *op_expr[2] = {NULL, NULL};
+   unsigned int i;
+
+   if (!in_ir)
+      return NULL;
+
+   if (in_ir->ir_type != ir_type_expression)
+      return in_ir;
+
+   for (i = 0; i < ir->get_num_operands(); i++) {
+      if (ir->operands[i]->type->is_matrix())
+	 return in_ir;
+
+      op_const[i] = ir->operands[i]->constant_expression_value();
+      op_expr[i] = ir->operands[i]->as_expression();
+   }
+
+   switch (ir->operation) {
+   case ir_unop_logic_not:
+      if (op_expr[0] && op_expr[0]->operation == ir_binop_equal) {
+	 this->progress = true;
+	 return new(ir) ir_expression(ir_binop_nequal,
+				      ir->type,
+				      op_expr[0]->operands[0],
+				      op_expr[0]->operands[1]);
+      }
+      if (op_expr[0] && op_expr[0]->operation == ir_binop_nequal) {
+	 this->progress = true;
+	 return new(ir) ir_expression(ir_binop_equal,
+				      ir->type,
+				      op_expr[0]->operands[0],
+				      op_expr[0]->operands[1]);
+      }
+      break;
+
+   case ir_binop_add:
+      if (is_vec_zero(op_const[0])) {
+	 this->progress = true;
+	 return ir->operands[1];
+      }
+      if (is_vec_zero(op_const[1])) {
+	 this->progress = true;
+	 return ir->operands[0];
+      }
+      break;
+
+   case ir_binop_sub:
+      if (is_vec_zero(op_const[0])) {
+	 this->progress = true;
+	 return new(ir) ir_expression(ir_unop_neg,
+				      ir->type,
+				      ir->operands[1],
+				      NULL);
+      }
+      if (is_vec_zero(op_const[1])) {
+	 this->progress = true;
+	 return ir->operands[0];
+      }
+      break;
+
+   case ir_binop_mul:
+      if (is_vec_one(op_const[0])) {
+	 this->progress = true;
+	 return ir->operands[1];
+      }
+      if (is_vec_one(op_const[1])) {
+	 this->progress = true;
+	 return ir->operands[0];
+      }
+
+      if (is_vec_zero(op_const[0]) ||
+	  is_vec_zero(op_const[1])) {
+	 ir_constant_data zero_data;
+	 memset(&zero_data, 0, sizeof(zero_data));
+
+	 this->progress = true;
+	 return new(ir) ir_constant(ir->type, &zero_data);
+      }
+      break;
+
+   case ir_binop_div:
+      if (is_vec_one(op_const[0]) && ir->type->base_type == GLSL_TYPE_FLOAT) {
+	 this->progress = true;
+	 return new(ir) ir_expression(ir_unop_rcp,
+				      ir->type,
+				      ir->operands[1],
+				      NULL);
+      }
+      if (is_vec_one(op_const[1])) {
+	 this->progress = true;
+	 return ir->operands[0];
+      }
+      break;
+
+   case ir_unop_rcp:
+      if (op_expr[0] && op_expr[0]->operation == ir_unop_rcp) {
+	 this->progress = true;
+	 return op_expr[0]->operands[0];
+      }
+
+      /* FINISHME: We should do rcp(rsq(x)) -> sqrt(x) for some
+       * backends, except that some backends will have done sqrt ->
+       * rcp(rsq(x)) and we don't want to undo it for them.
+       */
+
+      /* As far as we know, all backends are OK with rsq. */
+      if (op_expr[0] && op_expr[0]->operation == ir_unop_sqrt) {
+	 this->progress = true;
+	 return new(ir) ir_expression(ir_unop_rsq,
+				      ir->type,
+				      op_expr[0]->operands[0],
+				      NULL);
+      }
+
+      break;
+
+   default:
+      break;
+   }
+
+   return in_ir;
+}
+
+ir_visitor_status
+ir_algebraic_visitor::visit_leave(ir_expression *ir)
+{
+   unsigned int operand;
+
+   for (operand = 0; operand < ir->get_num_operands(); operand++) {
+      ir->operands[operand] = handle_expression(ir->operands[operand]);
+   }
+
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_algebraic_visitor::visit_leave(ir_texture *ir)
+{
+   ir->coordinate = handle_expression(ir->coordinate);
+   ir->projector = handle_expression(ir->projector);
+   ir->shadow_comparitor = handle_expression(ir->shadow_comparitor);
+
+   switch (ir->op) {
+   case ir_tex:
+      break;
+   case ir_txb:
+      ir->lod_info.bias = handle_expression(ir->lod_info.bias);
+      break;
+   case ir_txf:
+   case ir_txl:
+      ir->lod_info.lod = handle_expression(ir->lod_info.lod);
+      break;
+   case ir_txd:
+      ir->lod_info.grad.dPdx = handle_expression(ir->lod_info.grad.dPdx);
+      ir->lod_info.grad.dPdy = handle_expression(ir->lod_info.grad.dPdy);
+      break;
+   }
+
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_algebraic_visitor::visit_leave(ir_swizzle *ir)
+{
+   ir->val = handle_expression(ir->val);
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_algebraic_visitor::visit_leave(ir_dereference_array *ir)
+{
+   ir->array_index = handle_expression(ir->array_index);
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_algebraic_visitor::visit_leave(ir_assignment *ir)
+{
+   ir->rhs = handle_expression(ir->rhs);
+   ir->condition = handle_expression(ir->condition);
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_algebraic_visitor::visit_leave(ir_call *ir)
+{
+   foreach_iter(exec_list_iterator, iter, *ir) {
+      ir_rvalue *param = (ir_rvalue *)iter.get();
+      ir_rvalue *new_param = handle_expression(param);
+
+      if (new_param != param) {
+	 param->replace_with(new_param);
+      }
+   }
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_algebraic_visitor::visit_leave(ir_return *ir)
+{
+   ir->value = handle_expression(ir->value);;
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_algebraic_visitor::visit_leave(ir_if *ir)
+{
+   ir->condition = handle_expression(ir->condition);
+   return visit_continue;
+}
+
+
+bool
+do_algebraic(exec_list *instructions)
+{
+   ir_algebraic_visitor v;
+
+   visit_list_elements(&v, instructions);
+
+   return v.progress;
+}
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 06cb4d22ca..4f39565e5f 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -23,11 +23,12 @@
 
 
 /**
- * \file ir_dead_code.h
+ * \file ir_optimization.h
  *
  * Prototypes for optimization passes to be called by the compiler and drivers.
  */
 
+bool do_algebraic(exec_list *instructions);
 bool do_constant_folding(exec_list *instructions);
 bool do_constant_variable(exec_list *instructions);
 bool do_constant_variable_unlinked(exec_list *instructions);
diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp
index 5c0f6475e0..b62902278c 100644
--- a/src/glsl/main.cpp
+++ b/src/glsl/main.cpp
@@ -165,6 +165,7 @@ compile_shader(struct gl_shader *shader)
 	 progress = do_dead_code_unlinked(state, shader->ir) || progress;
 	 progress = do_constant_variable_unlinked(shader->ir) || progress;
 	 progress = do_constant_folding(shader->ir) || progress;
+	 progress = do_algebraic(shader->ir) || progress;
 	 progress = do_vec_index_to_swizzle(shader->ir) || progress;
 	 progress = do_vec_index_to_cond_assign(shader->ir) || progress;
 	 progress = do_swizzle_swizzle(shader->ir) || progress;
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index d06b83261c..2fd0507c2f 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2240,6 +2240,7 @@ _mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *shader)
 	 progress = do_dead_code_unlinked(state, shader->ir) || progress;
 	 progress = do_constant_variable_unlinked(shader->ir) || progress;
 	 progress = do_constant_folding(shader->ir) || progress;
+	 progress = do_algebraic(shader->ir) || progress;
 	 progress = do_if_return(shader->ir) || progress;
 	 if (ctx->Shader.EmitNoIfs)
 	    progress = do_if_to_cond_assign(shader->ir) || progress;
-- 
cgit v1.2.3


From 66d4c65ee2c311ea0c71c39a28456d0c11798d6b Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 27 Jul 2010 11:28:26 -0700
Subject: glsl2: Make the dead code handler make its own talloc context.

This way, we don't need to pass in a parse state, and the context
doesn't grow with the number of passes through optimization.
---
 src/glsl/ir_dead_code.cpp       | 12 ++++++------
 src/glsl/ir_optimization.h      |  6 ++----
 src/glsl/main.cpp               |  2 +-
 src/mesa/program/ir_to_mesa.cpp |  2 +-
 4 files changed, 10 insertions(+), 12 deletions(-)

(limited to 'src/glsl/ir_optimization.h')

diff --git a/src/glsl/ir_dead_code.cpp b/src/glsl/ir_dead_code.cpp
index ea78107f49..4804407bdc 100644
--- a/src/glsl/ir_dead_code.cpp
+++ b/src/glsl/ir_dead_code.cpp
@@ -146,13 +146,12 @@ ir_dead_code_visitor::visit_leave(ir_assignment *ir)
  * for usage on an unlinked instruction stream.
  */
 bool
-do_dead_code(struct _mesa_glsl_parse_state *state,
-	     exec_list *instructions)
+do_dead_code(exec_list *instructions)
 {
    ir_dead_code_visitor v;
    bool progress = false;
 
-   v.mem_ctx = state;
+   v.mem_ctx = talloc_new(NULL);
    v.run(instructions);
 
    foreach_iter(exec_list_iterator, iter, v.variable_list) {
@@ -188,6 +187,8 @@ do_dead_code(struct _mesa_glsl_parse_state *state,
 	 progress = true;
       }
    }
+   talloc_free(v.mem_ctx);
+
    return progress;
 }
 
@@ -199,8 +200,7 @@ do_dead_code(struct _mesa_glsl_parse_state *state,
  * with global scope.
  */
 bool
-do_dead_code_unlinked(struct _mesa_glsl_parse_state *state,
-		      exec_list *instructions)
+do_dead_code_unlinked(exec_list *instructions)
 {
    bool progress = false;
 
@@ -211,7 +211,7 @@ do_dead_code_unlinked(struct _mesa_glsl_parse_state *state,
 	 foreach_iter(exec_list_iterator, sigiter, *f) {
 	    ir_function_signature *sig =
 	       (ir_function_signature *) sigiter.get();
-	    if (do_dead_code(state, &sig->body))
+	    if (do_dead_code(&sig->body))
 	       progress = true;
 	 }
       }
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 4f39565e5f..5dbb025d35 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -33,11 +33,9 @@ bool do_constant_folding(exec_list *instructions);
 bool do_constant_variable(exec_list *instructions);
 bool do_constant_variable_unlinked(exec_list *instructions);
 bool do_copy_propagation(exec_list *instructions);
-bool do_dead_code(struct _mesa_glsl_parse_state *state,
-		  exec_list *instructions);
+bool do_dead_code(exec_list *instructions);
 bool do_dead_code_local(exec_list *instructions);
-bool do_dead_code_unlinked(struct _mesa_glsl_parse_state *state,
-			   exec_list *instructions);
+bool do_dead_code_unlinked(exec_list *instructions);
 bool do_div_to_mul_rcp(exec_list *instructions);
 bool do_function_inlining(exec_list *instructions);
 bool do_if_return(exec_list *instructions);
diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp
index b62902278c..08b133f124 100644
--- a/src/glsl/main.cpp
+++ b/src/glsl/main.cpp
@@ -162,7 +162,7 @@ compile_shader(struct gl_shader *shader)
 	 progress = do_if_simplification(shader->ir) || progress;
 	 progress = do_copy_propagation(shader->ir) || progress;
 	 progress = do_dead_code_local(shader->ir) || progress;
-	 progress = do_dead_code_unlinked(state, shader->ir) || progress;
+	 progress = do_dead_code_unlinked(shader->ir) || progress;
 	 progress = do_constant_variable_unlinked(shader->ir) || progress;
 	 progress = do_constant_folding(shader->ir) || progress;
 	 progress = do_algebraic(shader->ir) || progress;
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 5cc999c2e3..409b6d7288 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2237,7 +2237,7 @@ _mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *shader)
 	 progress = do_if_simplification(shader->ir) || progress;
 	 progress = do_copy_propagation(shader->ir) || progress;
 	 progress = do_dead_code_local(shader->ir) || progress;
-	 progress = do_dead_code_unlinked(state, shader->ir) || progress;
+	 progress = do_dead_code_unlinked(shader->ir) || progress;
 	 progress = do_constant_variable_unlinked(shader->ir) || progress;
 	 progress = do_constant_folding(shader->ir) || progress;
 	 progress = do_algebraic(shader->ir) || progress;
-- 
cgit v1.2.3


From 784695442c415cf0be882434a25671ecfb635d34 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 30 Jul 2010 17:04:49 -0700
Subject: glsl2: Add new tree grafting optimization pass.

---
 src/glsl/Makefile               |   1 +
 src/glsl/ir_optimization.h      |   1 +
 src/glsl/ir_tree_grafting.cpp   | 356 ++++++++++++++++++++++++++++++++++++++++
 src/glsl/linker.cpp             |   1 +
 src/glsl/main.cpp               |   1 +
 src/mesa/program/ir_to_mesa.cpp |   1 +
 6 files changed, 361 insertions(+)
 create mode 100644 src/glsl/ir_tree_grafting.cpp

(limited to 'src/glsl/ir_optimization.h')

diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index aa1922f3be..0254fec756 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -56,6 +56,7 @@ CXX_SOURCES = \
 	ir_print_visitor.cpp \
 	ir_reader.cpp \
 	ir_swizzle_swizzle.cpp \
+	ir_tree_grafting.cpp \
 	ir_validate.cpp \
 	ir_variable.cpp \
 	ir_variable_refcount.cpp \
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 5dbb025d35..55ec327193 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -44,5 +44,6 @@ bool do_if_to_cond_assign(exec_list *instructions);
 bool do_mat_op_to_vec(exec_list *instructions);
 bool do_mod_to_fract(exec_list *instructions);
 bool do_swizzle_swizzle(exec_list *instructions);
+bool do_tree_grafting(exec_list *instructions);
 bool do_vec_index_to_cond_assign(exec_list *instructions);
 bool do_vec_index_to_swizzle(exec_list *instructions);
diff --git a/src/glsl/ir_tree_grafting.cpp b/src/glsl/ir_tree_grafting.cpp
new file mode 100644
index 0000000000..6f62de758b
--- /dev/null
+++ b/src/glsl/ir_tree_grafting.cpp
@@ -0,0 +1,356 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file ir_tree_grafting.cpp
+ *
+ * Takes assignments to variables that are dereferenced only once and
+ * pastes the RHS expression into where the variable is dereferenced.
+ *
+ * In the process of various operations like function inlining and
+ * tertiary op handling, we'll end up with our expression trees having
+ * been chopped up into a series of assignments of short expressions
+ * to temps.  Other passes like ir_algebraic.cpp would prefer to see
+ * the deepest expression trees they can to try to optimize them.
+ *
+ * This is a lot like copy propagaton.  In comparison, copy
+ * propagation only acts on plain copies, not arbitrary expressions on
+ * the RHS.  Generally, we wouldn't want to go pasting some
+ * complicated expression everywhere it got used, though, so we don't
+ * handle expressions in that pass.
+ *
+ * The hard part is making sure we don't move an expression across
+ * some other assignments that would change the value of the
+ * expression.  So we split this into two passes: First, find the
+ * variables in our scope which are written to once and read once, and
+ * then go through basic blocks seeing if we find an opportunity to
+ * move those expressions safely.
+ */
+
+#include "ir.h"
+#include "ir_visitor.h"
+#include "ir_variable_refcount.h"
+#include "ir_basic_block.h"
+#include "ir_optimization.h"
+#include "glsl_types.h"
+
+static bool debug = false;
+
+class ir_tree_grafting_visitor : public ir_hierarchical_visitor {
+public:
+   ir_tree_grafting_visitor(ir_assignment *graft_assign,
+			    ir_variable *graft_var)
+   {
+      this->progress = false;
+      this->graft_assign = graft_assign;
+      this->graft_var = graft_var;
+   }
+
+   virtual ir_visitor_status visit_leave(class ir_assignment *);
+   virtual ir_visitor_status visit_enter(class ir_call *);
+   virtual ir_visitor_status visit_enter(class ir_expression *);
+   virtual ir_visitor_status visit_enter(class ir_function *);
+   virtual ir_visitor_status visit_enter(class ir_function_signature *);
+   virtual ir_visitor_status visit_enter(class ir_if *);
+   virtual ir_visitor_status visit_enter(class ir_loop *);
+   virtual ir_visitor_status visit_enter(class ir_swizzle *);
+   virtual ir_visitor_status visit_enter(class ir_texture *);
+
+   bool do_graft(ir_rvalue **rvalue);
+
+   bool progress;
+   ir_variable *graft_var;
+   ir_assignment *graft_assign;
+};
+
+struct find_deref_info {
+   ir_variable *var;
+   bool found;
+};
+
+void
+dereferences_variable_callback(ir_instruction *ir, void *data)
+{
+   struct find_deref_info *info = (struct find_deref_info *)data;
+
+   if (ir == info->var)
+      info->found = true;
+}
+
+static bool
+dereferences_variable(ir_instruction *ir, ir_variable *var)
+{
+   struct find_deref_info info;
+
+   info.var = var;
+   info.found = false;
+
+   visit_tree(ir, dereferences_variable_callback, &info);
+
+   return info.found;
+}
+
+bool
+ir_tree_grafting_visitor::do_graft(ir_rvalue **rvalue)
+{
+   if (!*rvalue)
+      return false;
+
+   ir_dereference_variable *deref = (*rvalue)->as_dereference_variable();
+
+   if (!deref || deref->var != this->graft_var)
+      return false;
+
+   if (debug) {
+      printf("GRAFTING:\n");
+      this->graft_assign->rhs->print();
+      printf("\n");
+      printf("TO:\n");
+      (*rvalue)->print();
+      printf("\n");
+   }
+
+   this->graft_assign->remove();
+   *rvalue = this->graft_assign->rhs;
+
+   this->progress = true;
+   return true;
+}
+
+ir_visitor_status
+ir_tree_grafting_visitor::visit_enter(ir_loop *ir)
+{
+   (void)ir;
+   /* Do not traverse into the body of the loop since that is a
+    * different basic block.
+    */
+   return visit_stop;
+}
+
+ir_visitor_status
+ir_tree_grafting_visitor::visit_leave(ir_assignment *ir)
+{
+   if (do_graft(&ir->rhs) ||
+       do_graft(&ir->condition))
+      return visit_stop;
+
+   /* If this assignment updates a variable used in the assignment
+    * we're trying to graft, then we're done.
+    */
+   if (dereferences_variable(this->graft_assign->rhs,
+			     ir->lhs->variable_referenced())) {
+      if (debug) {
+	 printf("graft killed by: ");
+	 ir->print();
+	 printf("\n");
+      }
+      return visit_stop;
+   }
+
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_tree_grafting_visitor::visit_enter(ir_function *ir)
+{
+   (void) ir;
+   return visit_continue_with_parent;
+}
+
+ir_visitor_status
+ir_tree_grafting_visitor::visit_enter(ir_function_signature *ir)
+{
+   (void)ir;
+   return visit_continue_with_parent;
+}
+
+ir_visitor_status
+ir_tree_grafting_visitor::visit_enter(ir_call *ir)
+{
+   /* Reminder: iterating ir_call iterates its parameters. */
+   foreach_iter(exec_list_iterator, iter, *ir) {
+      ir_rvalue *ir = (ir_rvalue *)iter.get();
+      ir_rvalue *new_ir = ir;
+
+      if (do_graft(&new_ir)) {
+	 ir->replace_with(new_ir);
+	 return visit_stop;
+      }
+   }
+
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_tree_grafting_visitor::visit_enter(ir_expression *ir)
+{
+   for (unsigned int i = 0; i < ir->get_num_operands(); i++) {
+      if (do_graft(&ir->operands[i]))
+	 return visit_stop;
+   }
+
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_tree_grafting_visitor::visit_enter(ir_if *ir)
+{
+   if (do_graft(&ir->condition))
+      return visit_stop;
+
+   /* Do not traverse into the body of the if-statement since that is a
+    * different basic block.
+    */
+   return visit_continue_with_parent;
+}
+
+ir_visitor_status
+ir_tree_grafting_visitor::visit_enter(ir_swizzle *ir)
+{
+   if (do_graft(&ir->val))
+      return visit_stop;
+
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_tree_grafting_visitor::visit_enter(ir_texture *ir)
+{
+   if (do_graft(&ir->coordinate) ||
+       do_graft(&ir->projector) ||
+       do_graft(&ir->shadow_comparitor))
+	 return visit_stop;
+
+   switch (ir->op) {
+   case ir_tex:
+      break;
+   case ir_txb:
+      if (do_graft(&ir->lod_info.bias))
+	 return visit_stop;
+      break;
+   case ir_txf:
+   case ir_txl:
+      if (do_graft(&ir->lod_info.lod))
+	 return visit_stop;
+      break;
+   case ir_txd:
+      if (do_graft(&ir->lod_info.grad.dPdx) ||
+	  do_graft(&ir->lod_info.grad.dPdy))
+	 return visit_stop;
+      break;
+   }
+
+   return visit_continue;
+}
+
+struct tree_grafting_info {
+   ir_variable_refcount_visitor *refs;
+   bool progress;
+};
+
+static bool
+try_tree_grafting(ir_assignment *start,
+		  ir_variable *lhs_var,
+		  ir_instruction *bb_last)
+{
+   ir_tree_grafting_visitor v(start, lhs_var);
+
+   if (debug) {
+      printf("trying to graft: ");
+      lhs_var->print();
+      printf("\n");
+   }
+
+   for (ir_instruction *ir = (ir_instruction *)start->next;
+	ir != bb_last->next;
+	ir = (ir_instruction *)ir->next) {
+
+      if (debug) {
+	 printf("- ");
+	 ir->print();
+	 printf("\n");
+      }
+
+      ir_visitor_status s = ir->accept(&v);
+      if (s == visit_stop)
+	 return v.progress;
+   }
+
+   return false;
+}
+
+static void
+tree_grafting_basic_block(ir_instruction *bb_first,
+			  ir_instruction *bb_last,
+			  void *data)
+{
+   struct tree_grafting_info *info = (struct tree_grafting_info *)data;
+   ir_instruction *ir, *next;
+
+   for (ir = bb_first, next = (ir_instruction *)ir->next;
+	ir != bb_last->next;
+	ir = next, next = (ir_instruction *)ir->next) {
+      ir_assignment *assign = ir->as_assignment();
+
+      if (!assign)
+	 continue;
+
+      ir_variable *lhs_var = assign->lhs->whole_variable_referenced();
+      if (!lhs_var)
+	 continue;
+
+      struct variable_entry *entry = info->refs->get_variable_entry(lhs_var);
+
+      if (!entry->declaration ||
+	  entry->assigned_count != 1 ||
+	  entry->referenced_count != 2)
+	 continue;
+
+      assert(assign == entry->assign);
+
+      /* Found a possibly graftable assignment.  Now, walk through the
+       * rest of the BB seeing if the deref is here, and if nothing interfered with
+       * pasting its expression's values in between.
+       */
+      info->progress |= try_tree_grafting(assign, lhs_var, bb_last);
+   }
+}
+
+/**
+ * Does a copy propagation pass on the code present in the instruction stream.
+ */
+bool
+do_tree_grafting(exec_list *instructions)
+{
+   ir_variable_refcount_visitor refs;
+   struct tree_grafting_info info;
+
+   info.progress = false;
+   info.refs = &refs;
+
+   visit_list_elements(info.refs, instructions);
+
+   call_for_basic_blocks(instructions, tree_grafting_basic_block, &info);
+
+   return info.progress;
+}
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index e9daad28ec..9b47e4788f 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1286,6 +1286,7 @@ link_shaders(struct gl_shader_program *prog)
 	 progress = do_copy_propagation(ir) || progress;
 	 progress = do_dead_code_local(ir) || progress;
 	 progress = do_dead_code(ir) || progress;
+	 progress = do_tree_grafting(ir) || progress;
 	 progress = do_constant_variable_unlinked(ir) || progress;
 	 progress = do_constant_folding(ir) || progress;
 	 progress = do_if_return(ir) || progress;
diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp
index 08b133f124..d557dcc493 100644
--- a/src/glsl/main.cpp
+++ b/src/glsl/main.cpp
@@ -163,6 +163,7 @@ compile_shader(struct gl_shader *shader)
 	 progress = do_copy_propagation(shader->ir) || progress;
 	 progress = do_dead_code_local(shader->ir) || progress;
 	 progress = do_dead_code_unlinked(shader->ir) || progress;
+	 progress = do_tree_grafting(shader->ir) || progress;
 	 progress = do_constant_variable_unlinked(shader->ir) || progress;
 	 progress = do_constant_folding(shader->ir) || progress;
 	 progress = do_algebraic(shader->ir) || progress;
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index e62395a3b9..9274723eb7 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2485,6 +2485,7 @@ _mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *shader)
 	 progress = do_copy_propagation(shader->ir) || progress;
 	 progress = do_dead_code_local(shader->ir) || progress;
 	 progress = do_dead_code_unlinked(shader->ir) || progress;
+	 progress = do_tree_grafting(shader->ir) || progress;
 	 progress = do_constant_variable_unlinked(shader->ir) || progress;
 	 progress = do_constant_folding(shader->ir) || progress;
 	 progress = do_algebraic(shader->ir) || progress;
-- 
cgit v1.2.3


From 2e853ca23c8670246dd4efcee0706f68097652f7 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 5 Aug 2010 10:09:12 -0700
Subject: glsl2: Add a pass for removing unused functions.

For a shader involving many small functions, this avoids running
optimization across all of them after they've been inlined
post-linking.

Reduces the runtime of linking and running a fragment shader from Yo
Frankie from 1.6 seconds to 0.9 seconds (-44.9%, +/- 3.3%).
---
 src/glsl/Makefile              |   1 +
 src/glsl/glsl_symbol_table.h   |   6 ++
 src/glsl/ir.h                  |   1 -
 src/glsl/ir_dead_functions.cpp | 151 +++++++++++++++++++++++++++++++++++++++++
 src/glsl/ir_optimization.h     |   1 +
 src/glsl/linker.cpp            |   1 +
 6 files changed, 160 insertions(+), 1 deletion(-)
 create mode 100644 src/glsl/ir_dead_functions.cpp

(limited to 'src/glsl/ir_optimization.h')

diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index 3102947494..844385792a 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -39,6 +39,7 @@ CXX_SOURCES = \
 	ir.cpp \
 	ir_dead_code.cpp \
 	ir_dead_code_local.cpp \
+	ir_dead_functions.cpp \
 	ir_div_to_mul_rcp.cpp \
 	ir_expression_flattening.cpp \
 	ir_function_can_inline.cpp \
diff --git a/src/glsl/glsl_symbol_table.h b/src/glsl/glsl_symbol_table.h
index 27e825597c..02e4542cf3 100644
--- a/src/glsl/glsl_symbol_table.h
+++ b/src/glsl/glsl_symbol_table.h
@@ -133,6 +133,12 @@ public:
       return _mesa_symbol_table_add_symbol(table, glsl_function_name_space,
 					   name, f) == 0;
    }
+
+   bool remove_function(const char *name, ir_function *f)
+   {
+      return _mesa_symbol_table_add_symbol(table, glsl_function_name_space,
+					   name, f) == 0;
+   }
    /*@}*/
 
    /**
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index e61485813d..f58602515e 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -410,7 +410,6 @@ public:
     */
    const char *name;
 
-private:
    /**
     * List of ir_function_signature for each overloaded function with this name.
     */
diff --git a/src/glsl/ir_dead_functions.cpp b/src/glsl/ir_dead_functions.cpp
new file mode 100644
index 0000000000..26554441d3
--- /dev/null
+++ b/src/glsl/ir_dead_functions.cpp
@@ -0,0 +1,151 @@
+ /*
+  * Copyright © 2010 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+  * DEALINGS IN THE SOFTWARE.
+  */
+
+ /**
+  * \file ir_dead_functions.cpp
+  *
+  * Eliminates unused functions from the linked program.
+  */
+
+ #include "ir.h"
+ #include "ir_visitor.h"
+ #include "ir_expression_flattening.h"
+ #include "glsl_types.h"
+
+ class signature_entry : public exec_node
+ {
+ public:
+    signature_entry(ir_function_signature *sig)
+    {
+       this->signature = sig;
+       this->used = false;
+    }
+
+    ir_function_signature *signature;
+    bool used;
+ };
+
+ class ir_dead_functions_visitor : public ir_hierarchical_visitor {
+ public:
+    ir_dead_functions_visitor()
+    {
+       this->mem_ctx = talloc_new(NULL);
+    }
+
+    ~ir_dead_functions_visitor()
+    {
+       talloc_free(this->mem_ctx);
+    }
+
+    virtual ir_visitor_status visit_enter(ir_function_signature *);
+    virtual ir_visitor_status visit_enter(ir_call *);
+
+    signature_entry *get_signature_entry(ir_function_signature *var);
+
+    bool (*predicate)(ir_instruction *ir);
+
+    /* List of signature_entry */
+    exec_list signature_list;
+    void *mem_ctx;
+ };
+
+
+ signature_entry *
+ ir_dead_functions_visitor::get_signature_entry(ir_function_signature *sig)
+ {
+    foreach_iter(exec_list_iterator, iter, this->signature_list) {
+       signature_entry *entry = (signature_entry *)iter.get();
+       if (entry->signature == sig)
+	  return entry;
+    }
+
+    signature_entry *entry = new(mem_ctx) signature_entry(sig);
+    this->signature_list.push_tail(entry);
+    return entry;
+ }
+
+
+ ir_visitor_status
+ ir_dead_functions_visitor::visit_enter(ir_function_signature *ir)
+ {
+    signature_entry *entry = this->get_signature_entry(ir);
+
+    if (strcmp(ir->function_name(), "main") == 0) {
+       entry->used = true;
+    }
+
+    return visit_continue;
+ }
+
+
+ ir_visitor_status
+ ir_dead_functions_visitor::visit_enter(ir_call *ir)
+ {
+    signature_entry *entry = this->get_signature_entry(ir->get_callee());
+
+    entry->used = true;
+
+   return visit_continue;
+}
+
+bool
+do_dead_functions(exec_list *instructions)
+{
+   ir_dead_functions_visitor v;
+   bool progress = false;
+
+   visit_list_elements(&v, instructions);
+
+   /* Now that we've figured out which function signatures are used, remove
+    * the unused ones, and remove function definitions that have no more
+    * signatures.
+    */
+    foreach_iter(exec_list_iterator, iter, v.signature_list) {
+      signature_entry *entry = (signature_entry *)iter.get();
+
+      if (!entry->used) {
+	 entry->signature->remove();
+	 progress = true;
+      }
+      delete(entry);
+   }
+
+   /* We don't just do this above when we nuked a signature because of
+    * const pointers.
+    */
+   foreach_iter(exec_list_iterator, iter, *instructions) {
+      ir_instruction *ir = (ir_instruction *)iter.get();
+      ir_function *func = ir->as_function();
+
+      if (func && func->signatures.is_empty()) {
+	 /* At this point (post-linking), the symbol table is no
+	  * longer in use, so not removing the function from the
+	  * symbol table should be OK.
+	  */
+	 func->remove();
+	 progress = true;
+      }
+   }
+
+   return progress;
+}
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 55ec327193..e0c0715cf5 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -36,6 +36,7 @@ bool do_copy_propagation(exec_list *instructions);
 bool do_dead_code(exec_list *instructions);
 bool do_dead_code_local(exec_list *instructions);
 bool do_dead_code_unlinked(exec_list *instructions);
+bool do_dead_functions(exec_list *instructions);
 bool do_div_to_mul_rcp(exec_list *instructions);
 bool do_function_inlining(exec_list *instructions);
 bool do_if_return(exec_list *instructions);
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 94db57d6a5..f9e24ca0f1 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1286,6 +1286,7 @@ link_shaders(struct gl_shader_program *prog)
 	 progress = false;
 
 	 progress = do_function_inlining(ir) || progress;
+	 progress = do_dead_functions(ir) || progress;
 	 progress = do_if_simplification(ir) || progress;
 	 progress = do_copy_propagation(ir) || progress;
 	 progress = do_dead_code_local(ir) || progress;
-- 
cgit v1.2.3


From 7f7eaf0285d011f7cc7e1a63133184a50b24ecaa Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 5 Aug 2010 11:01:09 -0700
Subject: ir_structure_splitting: New pass to chop structures into their
 components.

This doesn't do anything if your structure goes through an uninlined
function call or if whole-structure assignment occurs.  As such, the
impact is limited, at least until we do some global copy propagation
to reduce whole-structure assignment.
---
 src/glsl/Makefile                   |   1 +
 src/glsl/ir_optimization.h          |   1 +
 src/glsl/ir_structure_splitting.cpp | 378 ++++++++++++++++++++++++++++++++++++
 src/glsl/linker.cpp                 |   1 +
 4 files changed, 381 insertions(+)
 create mode 100644 src/glsl/ir_structure_splitting.cpp

(limited to 'src/glsl/ir_optimization.h')

diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index 844385792a..53567508a0 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -55,6 +55,7 @@ CXX_SOURCES = \
 	ir_mod_to_fract.cpp \
 	ir_print_visitor.cpp \
 	ir_reader.cpp \
+	ir_structure_splitting.cpp \
 	ir_swizzle_swizzle.cpp \
 	ir_tree_grafting.cpp \
 	ir_validate.cpp \
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index e0c0715cf5..eac28dc64c 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -44,6 +44,7 @@ bool do_if_simplification(exec_list *instructions);
 bool do_if_to_cond_assign(exec_list *instructions);
 bool do_mat_op_to_vec(exec_list *instructions);
 bool do_mod_to_fract(exec_list *instructions);
+bool do_structure_splitting(exec_list *instructions);
 bool do_swizzle_swizzle(exec_list *instructions);
 bool do_tree_grafting(exec_list *instructions);
 bool do_vec_index_to_cond_assign(exec_list *instructions);
diff --git a/src/glsl/ir_structure_splitting.cpp b/src/glsl/ir_structure_splitting.cpp
new file mode 100644
index 0000000000..f57ae44aae
--- /dev/null
+++ b/src/glsl/ir_structure_splitting.cpp
@@ -0,0 +1,378 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file ir_structure_splitting.cpp
+ *
+ * If a structure is only ever referenced by its components, then
+ * split those components out to individual variables so they can be
+ * handled normally by other optimization passes.
+ *
+ * This skips structures like uniforms, which need to be accessible as
+ * structures for their access by the GL.
+ */
+
+#include "ir.h"
+#include "ir_visitor.h"
+#include "glsl_types.h"
+
+class variable_entry : public exec_node
+{
+public:
+   variable_entry(ir_variable *var)
+   {
+      this->var = var;
+      this->whole_structure_access = 0;
+      this->declaration = false;
+      this->components = NULL;
+      this->mem_ctx = NULL;
+   }
+
+   ir_variable *var; /* The key: the variable's pointer. */
+
+   /** Number of times the variable is referenced, including assignments. */
+   unsigned whole_structure_access;
+
+   bool declaration; /* If the variable had a decl in the instruction stream */
+
+   ir_variable **components;
+
+   /** talloc_parent(this->var) -- the shader's talloc context. */
+   void *mem_ctx;
+};
+
+class ir_structure_reference_visitor : public ir_hierarchical_visitor {
+public:
+   ir_structure_reference_visitor(void)
+   {
+      this->mem_ctx = talloc_new(NULL);
+      this->variable_list.make_empty();
+   }
+
+   ~ir_structure_reference_visitor(void)
+   {
+      talloc_free(mem_ctx);
+   }
+
+   virtual ir_visitor_status visit(ir_variable *);
+   virtual ir_visitor_status visit(ir_dereference_variable *);
+   virtual ir_visitor_status visit(ir_dereference_record *);
+
+   virtual ir_visitor_status visit_enter(ir_function_signature *);
+
+   variable_entry *get_variable_entry(ir_variable *var);
+
+   /* List of variable_entry */
+   exec_list variable_list;
+
+   void *mem_ctx;
+};
+
+variable_entry *
+ir_structure_reference_visitor::get_variable_entry(ir_variable *var)
+{
+   assert(var);
+
+   if (!var->type->is_record())
+      return NULL;
+
+   foreach_iter(exec_list_iterator, iter, this->variable_list) {
+      variable_entry *entry = (variable_entry *)iter.get();
+      if (entry->var == var)
+	 return entry;
+   }
+
+   variable_entry *entry = new(mem_ctx) variable_entry(var);
+   this->variable_list.push_tail(entry);
+   return entry;
+}
+
+
+ir_visitor_status
+ir_structure_reference_visitor::visit(ir_variable *ir)
+{
+   variable_entry *entry = this->get_variable_entry(ir);
+
+   if (entry)
+      entry->declaration = true;
+
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_structure_reference_visitor::visit(ir_dereference_variable *ir)
+{
+   ir_variable *const var = ir->variable_referenced();
+   variable_entry *entry = this->get_variable_entry(var);
+
+   if (entry)
+      entry->whole_structure_access++;
+
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_structure_reference_visitor::visit(ir_dereference_record *ir)
+{
+   /* Don't descend into the ir_dereference_variable below. */
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_structure_reference_visitor::visit_enter(ir_function_signature *ir)
+{
+   /* We don't want to descend into the function parameters and
+    * dead-code eliminate them, so just accept the body here.
+    */
+   visit_list_elements(this, &ir->body);
+   return visit_continue_with_parent;
+}
+
+class ir_structure_splitting_visitor : public ir_hierarchical_visitor {
+public:
+   ir_structure_splitting_visitor(exec_list *vars)
+   {
+      this->variable_list = vars;
+   }
+
+   virtual ~ir_structure_splitting_visitor()
+   {
+   }
+
+   virtual ir_visitor_status visit_leave(ir_assignment *);
+   virtual ir_visitor_status visit_leave(ir_call *);
+   virtual ir_visitor_status visit_leave(ir_dereference_array *);
+   virtual ir_visitor_status visit_leave(ir_expression *);
+   virtual ir_visitor_status visit_leave(ir_if *);
+   virtual ir_visitor_status visit_leave(ir_return *);
+   virtual ir_visitor_status visit_leave(ir_swizzle *);
+   virtual ir_visitor_status visit_leave(ir_texture *);
+
+   void split_deref(ir_dereference **deref);
+   void split_rvalue(ir_rvalue **rvalue);
+   struct variable_entry *get_splitting_entry(ir_variable *var);
+
+   exec_list *variable_list;
+   void *mem_ctx;
+};
+
+struct variable_entry *
+ir_structure_splitting_visitor::get_splitting_entry(ir_variable *var)
+{
+   assert(var);
+
+   if (!var->type->is_record())
+      return NULL;
+
+   foreach_iter(exec_list_iterator, iter, *this->variable_list) {
+      variable_entry *entry = (variable_entry *)iter.get();
+      if (entry->var == var) {
+	 return entry;
+      }
+   }
+
+   return NULL;
+}
+
+void
+ir_structure_splitting_visitor::split_deref(ir_dereference **deref)
+{
+   if ((*deref)->ir_type != ir_type_dereference_record)
+      return;
+
+   ir_dereference_record *deref_record = (ir_dereference_record *)deref;
+   ir_dereference_variable *deref_var = deref_record->as_dereference_variable();
+   if (!deref_var)
+      return;
+
+   variable_entry *entry = get_splitting_entry(deref_var->var);
+   if (entry)
+      return;
+
+   unsigned int i;
+   for (i = 0; i < entry->var->type->length; i++) {
+      if (strcmp(deref_record->field,
+		 entry->var->type->fields.structure[i].name) == 0)
+	 break;
+   }
+   assert(i != entry->var->type->length);
+
+   *deref = new(entry->mem_ctx) ir_dereference_variable(entry->components[i]);
+}
+
+void
+ir_structure_splitting_visitor::split_rvalue(ir_rvalue **rvalue)
+{
+   ir_dereference *deref = (*rvalue)->as_dereference();
+
+   if (!deref)
+      return;
+
+   split_deref(&deref);
+   *rvalue = deref;
+}
+
+ir_visitor_status
+ir_structure_splitting_visitor::visit_leave(ir_expression *ir)
+{
+   unsigned int operand;
+
+   for (operand = 0; operand < ir->get_num_operands(); operand++) {
+      split_rvalue(&ir->operands[operand]);
+   }
+
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_structure_splitting_visitor::visit_leave(ir_texture *ir)
+{
+   split_rvalue(&ir->coordinate);
+   split_rvalue(&ir->projector);
+   split_rvalue(&ir->shadow_comparitor);
+
+   switch (ir->op) {
+   case ir_tex:
+      break;
+   case ir_txb:
+      split_rvalue(&ir->lod_info.bias);
+      break;
+   case ir_txf:
+   case ir_txl:
+      split_rvalue(&ir->lod_info.lod);
+      break;
+   case ir_txd:
+      split_rvalue(&ir->lod_info.grad.dPdx);
+      split_rvalue(&ir->lod_info.grad.dPdy);
+      break;
+   }
+
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_structure_splitting_visitor::visit_leave(ir_swizzle *ir)
+{
+   split_rvalue(&ir->val);
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_structure_splitting_visitor::visit_leave(ir_dereference_array *ir)
+{
+   split_rvalue(&ir->array_index);
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_structure_splitting_visitor::visit_leave(ir_assignment *ir)
+{
+   split_rvalue(&ir->rhs);
+   split_rvalue(&ir->condition);
+   split_deref(&ir->lhs);
+
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_structure_splitting_visitor::visit_leave(ir_call *ir)
+{
+   foreach_iter(exec_list_iterator, iter, *ir) {
+      ir_rvalue *param = (ir_rvalue *)iter.get();
+      ir_rvalue *new_param = param;
+      split_rvalue(&new_param);
+
+      if (new_param != param) {
+	 param->replace_with(new_param);
+      }
+   }
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_structure_splitting_visitor::visit_leave(ir_return *ir)
+{
+   split_rvalue(&ir->value);;
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_structure_splitting_visitor::visit_leave(ir_if *ir)
+{
+   split_rvalue(&ir->condition);
+   return visit_continue;
+}
+
+
+bool
+do_structure_splitting(exec_list *instructions)
+{
+   ir_structure_reference_visitor refs;
+   void *mem_ctx = talloc_new(NULL);
+
+   /* Trim out variables we can't split. */
+   foreach_iter(exec_list_iterator, iter, refs.variable_list) {
+      variable_entry *entry = (variable_entry *)iter.get();
+      if (!entry->declaration || entry->whole_structure_access) {
+	 entry->remove();
+      }
+   }
+
+   if (refs.variable_list.is_empty())
+      return false;
+
+   /* Replace the decls of the structures to be split with their split
+    * components.
+    */
+   foreach_iter(exec_list_iterator, iter, refs.variable_list) {
+      variable_entry *entry = (variable_entry *)iter.get();
+      const struct glsl_type *type = entry->var->type;
+
+      entry->mem_ctx = talloc_parent(entry->var);
+
+      entry->components = talloc_array(mem_ctx,
+				       ir_variable *,
+				       type->length);
+
+      for (unsigned int i = 0; i < entry->var->type->length; i++) {
+	 const char *name = talloc_asprintf(mem_ctx, "%s_%s",
+					    type->name,
+					    type->fields.structure[i].name);
+
+	 entry->components[i] =
+	    new(entry->mem_ctx) ir_variable(type->fields.structure[i].type,
+					    name,
+					    ir_var_temporary);
+	 entry->var->insert_before(entry->components[i]);
+      }
+
+      entry->var->remove();
+   }
+
+   ir_structure_splitting_visitor split(&refs.variable_list);
+   visit_list_elements(&split, instructions);
+
+   talloc_free(mem_ctx);
+
+   return true;
+}
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index f9e24ca0f1..050116954a 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1287,6 +1287,7 @@ link_shaders(struct gl_shader_program *prog)
 
 	 progress = do_function_inlining(ir) || progress;
 	 progress = do_dead_functions(ir) || progress;
+	 progress = do_structure_splitting(ir) || progress;
 	 progress = do_if_simplification(ir) || progress;
 	 progress = do_copy_propagation(ir) || progress;
 	 progress = do_dead_code_local(ir) || progress;
-- 
cgit v1.2.3


From bc4034b243975089c06c4415d4e26edaaaec7a46 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 5 Aug 2010 15:22:05 -0700
Subject: glsl2: Add a pass to convert exp and log to exp2 and log2.

Fixes ir_to_mesa handling of unop_log, which used the weird ARB_vp LOG
opcode that doesn't do what we want.  This also lets the multiplication
coefficients in there get constant-folded, possibly.

Fixes:
glsl-fs-log
---
 src/glsl/Makefile                 |  1 +
 src/glsl/ir.h                     |  4 +-
 src/glsl/ir_explog_to_explog2.cpp | 85 +++++++++++++++++++++++++++++++++++++++
 src/glsl/ir_optimization.h        |  1 +
 src/glsl/ir_validate.cpp          |  4 ++
 src/glsl/linker.cpp               |  1 +
 src/mesa/program/ir_to_mesa.cpp   |  7 +---
 7 files changed, 96 insertions(+), 7 deletions(-)
 create mode 100644 src/glsl/ir_explog_to_explog2.cpp

(limited to 'src/glsl/ir_optimization.h')

diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index 53567508a0..752e60a79f 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -41,6 +41,7 @@ CXX_SOURCES = \
 	ir_dead_code_local.cpp \
 	ir_dead_functions.cpp \
 	ir_div_to_mul_rcp.cpp \
+	ir_explog_to_explog2.cpp \
 	ir_expression_flattening.cpp \
 	ir_function_can_inline.cpp \
 	ir_function.cpp \
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index ef8339ce19..5dc3c6b918 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -593,8 +593,8 @@ enum ir_expression_operation {
    ir_unop_rcp,
    ir_unop_rsq,
    ir_unop_sqrt,
-   ir_unop_exp,
-   ir_unop_log,
+   ir_unop_exp,      /**< Log base e on gentype */
+   ir_unop_log,	     /**< Natural log on gentype */
    ir_unop_exp2,
    ir_unop_log2,
    ir_unop_f2i,      /**< Float-to-integer conversion. */
diff --git a/src/glsl/ir_explog_to_explog2.cpp b/src/glsl/ir_explog_to_explog2.cpp
new file mode 100644
index 0000000000..4fe1daaee9
--- /dev/null
+++ b/src/glsl/ir_explog_to_explog2.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file ir_explog_to_explog2.cpp
+ *
+ * Many GPUs don't have a base e log or exponent instruction, but they
+ * do have base 2 versions, so this pass converts exp and log to exp2
+ * and log2 operations.
+ */
+
+#include <math.h>
+#include "ir.h"
+#include "glsl_types.h"
+
+class ir_explog_to_explog2_visitor : public ir_hierarchical_visitor {
+public:
+   ir_explog_to_explog2_visitor()
+   {
+      this->progress = false;
+   }
+
+   ir_visitor_status visit_leave(ir_expression *);
+
+   bool progress;
+};
+
+bool
+do_explog_to_explog2(exec_list *instructions)
+{
+   ir_explog_to_explog2_visitor v;
+
+   visit_list_elements(&v, instructions);
+   return v.progress;
+}
+
+ir_visitor_status
+ir_explog_to_explog2_visitor::visit_leave(ir_expression *ir)
+{
+   if (ir->operation == ir_unop_exp) {
+      void *mem_ctx = talloc_parent(ir);
+      ir_constant *log2_e = new(mem_ctx) ir_constant(log2f(M_E));
+
+      ir->operation = ir_unop_exp2;
+      ir->operands[0] = new(mem_ctx) ir_expression(ir_binop_mul,
+						   ir->operands[0]->type,
+						   ir->operands[0],
+						   log2_e);
+      this->progress = true;
+   }
+
+   if (ir->operation == ir_unop_log) {
+      void *mem_ctx = talloc_parent(ir);
+
+      ir->operation = ir_binop_mul;
+      ir->operands[0] = new(mem_ctx) ir_expression(ir_unop_log2,
+						   ir->operands[0]->type,
+						   ir->operands[0],
+						   NULL);
+      ir->operands[1] = new(mem_ctx) ir_constant(1.0f / log2f(M_E));
+      this->progress = true;
+   }
+
+   return visit_continue;
+}
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index eac28dc64c..c6e7beb447 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -38,6 +38,7 @@ bool do_dead_code_local(exec_list *instructions);
 bool do_dead_code_unlinked(exec_list *instructions);
 bool do_dead_functions(exec_list *instructions);
 bool do_div_to_mul_rcp(exec_list *instructions);
+bool do_explog_to_explog2(exec_list *instructions);
 bool do_function_inlining(exec_list *instructions);
 bool do_if_return(exec_list *instructions);
 bool do_if_simplification(exec_list *instructions);
diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
index 701bf21ea6..545fe2799f 100644
--- a/src/glsl/ir_validate.cpp
+++ b/src/glsl/ir_validate.cpp
@@ -183,10 +183,14 @@ ir_validate::visit_leave(ir_expression *ir)
    case ir_unop_rcp:
    case ir_unop_rsq:
    case ir_unop_sqrt:
+      assert(ir->type == ir->operands[0]->type);
+      break;
+
    case ir_unop_exp:
    case ir_unop_log:
    case ir_unop_exp2:
    case ir_unop_log2:
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
       assert(ir->type == ir->operands[0]->type);
       break;
 
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 050116954a..9d6de242f5 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1281,6 +1281,7 @@ link_shaders(struct gl_shader_program *prog)
       do_mat_op_to_vec(ir);
       do_mod_to_fract(ir);
       do_div_to_mul_rcp(ir);
+      do_explog_to_explog2(ir);
 
       do {
 	 progress = false;
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 299b11d274..26fbc4349a 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -752,15 +752,12 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
       ir_to_mesa_emit_scalar_op1(ir, OPCODE_RCP, result_dst, op[0]);
       break;
 
-   case ir_unop_exp:
-      ir_to_mesa_emit_scalar_op2(ir, OPCODE_POW, result_dst,
-				 src_reg_for_float(M_E), op[0]);
-      break;
    case ir_unop_exp2:
       ir_to_mesa_emit_scalar_op1(ir, OPCODE_EX2, result_dst, op[0]);
       break;
+   case ir_unop_exp:
    case ir_unop_log:
-      ir_to_mesa_emit_scalar_op1(ir, OPCODE_LOG, result_dst, op[0]);
+      assert(!"not reached: should be handled by ir_explog_to_explog2");
       break;
    case ir_unop_log2:
       ir_to_mesa_emit_scalar_op1(ir, OPCODE_LG2, result_dst, op[0]);
-- 
cgit v1.2.3


From 8bebbeb7c5b26ec9166a4644a2c051238d18509b Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 9 Aug 2010 17:03:46 -0700
Subject: glsl2: Add constant propagation.

Whereas constant folding evaluates constant expressions at rvalue
nodes, constant propagation tracks constant components of vectors
across execution to replace (possibly swizzled) variable dereferences
with constant values, triggering possible constant folding or reduced
variable liveness.
---
 src/glsl/Makefile                    |   1 +
 src/glsl/ir_constant_propagation.cpp | 481 +++++++++++++++++++++++++++++++++++
 src/glsl/ir_optimization.h           |   1 +
 src/glsl/linker.cpp                  |   1 +
 src/glsl/main.cpp                    |   1 +
 src/mesa/program/ir_to_mesa.cpp      |   1 +
 6 files changed, 486 insertions(+)
 create mode 100644 src/glsl/ir_constant_propagation.cpp

(limited to 'src/glsl/ir_optimization.h')

diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index 0f8b290b65..841e2b9ce9 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -34,6 +34,7 @@ CXX_SOURCES = \
 	ir_clone.cpp \
 	ir_constant_expression.cpp \
 	ir_constant_folding.cpp \
+	ir_constant_propagation.cpp \
 	ir_constant_variable.cpp \
 	ir_copy_propagation.cpp \
 	ir.cpp \
diff --git a/src/glsl/ir_constant_propagation.cpp b/src/glsl/ir_constant_propagation.cpp
new file mode 100644
index 0000000000..adae0aa117
--- /dev/null
+++ b/src/glsl/ir_constant_propagation.cpp
@@ -0,0 +1,481 @@
+/*
+ * Constantright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * constant of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, constant, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above constantright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR CONSTANTRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file ir_constant_propagation.cpp
+ *
+ * Tracks assignments of constants to channels of variables, and
+ * usage of those constant channels with direct usage of the constants.
+ *
+ * This can lead to constant folding and algebraic optimizations in
+ * those later expressions, while causing no increase in instruction
+ * count (due to constants being generally free to load from a
+ * constant push buffer or as instruction immediate values) and
+ * possibly reducing register pressure.
+ */
+
+#include "ir.h"
+#include "ir_visitor.h"
+#include "ir_basic_block.h"
+#include "ir_optimization.h"
+#include "glsl_types.h"
+
+class acp_entry : public exec_node
+{
+public:
+   acp_entry(ir_variable *var, unsigned write_mask, ir_constant *constant)
+   {
+      assert(var);
+      assert(constant);
+      this->var = var;
+      this->write_mask = write_mask;
+      this->constant = constant;
+   }
+
+   ir_variable *var;
+   ir_constant *constant;
+   unsigned write_mask;
+};
+
+
+class kill_entry : public exec_node
+{
+public:
+   kill_entry(ir_variable *var, unsigned write_mask)
+   {
+      assert(var);
+      this->var = var;
+      this->write_mask = write_mask;
+   }
+
+   ir_variable *var;
+   unsigned write_mask;
+};
+
+class ir_constant_propagation_visitor : public ir_hierarchical_visitor {
+public:
+   ir_constant_propagation_visitor()
+   {
+      progress = false;
+      mem_ctx = talloc_new(0);
+      this->acp = new(mem_ctx) exec_list;
+      this->kills = new(mem_ctx) exec_list;
+   }
+   ~ir_constant_propagation_visitor()
+   {
+      talloc_free(mem_ctx);
+   }
+
+   virtual ir_visitor_status visit_enter(class ir_loop *);
+   virtual ir_visitor_status visit_enter(class ir_function_signature *);
+   virtual ir_visitor_status visit_enter(class ir_function *);
+   virtual ir_visitor_status visit_enter(class ir_assignment *);
+   virtual ir_visitor_status visit_leave(class ir_assignment *);
+   virtual ir_visitor_status visit_enter(class ir_expression *);
+   virtual ir_visitor_status visit_enter(class ir_call *);
+   virtual ir_visitor_status visit_enter(class ir_if *);
+   virtual ir_visitor_status visit_enter(class ir_dereference_array *);
+   virtual ir_visitor_status visit_enter(class ir_texture *);
+
+   void add_constant(ir_assignment *ir);
+   void kill(ir_variable *ir, unsigned write_mask);
+   void handle_if_block(exec_list *instructions);
+   void handle_rvalue(ir_rvalue **rvalue);
+
+   /** List of acp_entry: The available constants to propagate */
+   exec_list *acp;
+
+   /**
+    * List of kill_entry: The masks of variables whose values were
+    * killed in this block.
+    */
+   exec_list *kills;
+
+   bool progress;
+
+   bool killed_all;
+
+   void *mem_ctx;
+};
+
+
+void
+ir_constant_propagation_visitor::handle_rvalue(ir_rvalue **rvalue)
+{
+   if (!*rvalue)
+      return;
+
+   const glsl_type *type = (*rvalue)->type;
+   if (!type->is_scalar() && !type->is_vector())
+      return;
+
+   ir_swizzle *swiz = NULL;
+   ir_dereference_variable *deref = (*rvalue)->as_dereference_variable();
+   if (!deref) {
+      swiz = (*rvalue)->as_swizzle();
+      if (!swiz)
+	 return;
+
+      deref = swiz->val->as_dereference_variable();
+      if (!deref)
+	 return;
+   }
+
+   ir_constant_data data;
+   memset(&data, 0, sizeof(data));
+
+   for (unsigned int i = 0; i < type->components(); i++) {
+      int channel;
+      acp_entry *found = NULL;
+
+      if (swiz) {
+	 switch (i) {
+	 case 0: channel = swiz->mask.x; break;
+	 case 1: channel = swiz->mask.y; break;
+	 case 2: channel = swiz->mask.z; break;
+	 case 3: channel = swiz->mask.w; break;
+	 default: assert(!"shouldn't be reached"); channel = 0; break;
+	 }
+      } else {
+	 channel = i;
+      }
+
+      foreach_iter(exec_list_iterator, iter, *this->acp) {
+	 acp_entry *entry = (acp_entry *)iter.get();
+	 if (entry->var == deref->var && entry->write_mask & (1 << channel)) {
+	    found = entry;
+	    break;
+	 }
+      }
+
+      if (!found)
+	 return;
+
+      switch (type->base_type) {
+      case GLSL_TYPE_FLOAT:
+	 data.f[i] = found->constant->value.f[channel];
+	 break;
+      case GLSL_TYPE_INT:
+	 data.i[i] = found->constant->value.i[channel];
+	 break;
+      case GLSL_TYPE_UINT:
+	 data.u[i] = found->constant->value.u[channel];
+	 break;
+      case GLSL_TYPE_BOOL:
+	 data.b[i] = found->constant->value.b[channel];
+	 break;
+      default:
+	 assert(!"not reached");
+	 break;
+      }
+   }
+
+   *rvalue = new(talloc_parent(deref)) ir_constant(type, &data);
+   this->progress = true;
+}
+
+ir_visitor_status
+ir_constant_propagation_visitor::visit_enter(ir_function_signature *ir)
+{
+   /* Treat entry into a function signature as a completely separate
+    * block.  Any instructions at global scope will be shuffled into
+    * main() at link time, so they're irrelevant to us.
+    */
+   exec_list *orig_acp = this->acp;
+   exec_list *orig_kills = this->kills;
+   bool orig_killed_all = this->killed_all;
+
+   this->acp = new(mem_ctx) exec_list;
+   this->kills = new(mem_ctx) exec_list;
+   this->killed_all = false;
+
+   visit_list_elements(this, &ir->body);
+
+   this->kills = orig_kills;
+   this->acp = orig_acp;
+   this->killed_all = orig_killed_all;
+
+   return visit_continue_with_parent;
+}
+
+ir_visitor_status
+ir_constant_propagation_visitor::visit_enter(ir_assignment *ir)
+{
+   handle_rvalue(&ir->condition);
+   handle_rvalue(&ir->rhs);
+
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_constant_propagation_visitor::visit_leave(ir_assignment *ir)
+{
+   kill(ir->lhs->variable_referenced(), ir->write_mask);
+
+   add_constant(ir);
+
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_constant_propagation_visitor::visit_enter(ir_expression *ir)
+{
+   for (unsigned int i = 0; i < ir->get_num_operands(); i++) {
+      handle_rvalue(&ir->operands[i]);
+   }
+
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_constant_propagation_visitor::visit_enter(ir_function *ir)
+{
+   (void) ir;
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_constant_propagation_visitor::visit_enter(ir_call *ir)
+{
+   /* Do constant propagation on call parameters, but skip any out params */
+   exec_list_iterator sig_param_iter = ir->get_callee()->parameters.iterator();
+   foreach_iter(exec_list_iterator, iter, ir->actual_parameters) {
+      ir_variable *sig_param = (ir_variable *)sig_param_iter.get();
+      ir_rvalue *param = (ir_rvalue *)iter.get();
+      if (sig_param->mode != ir_var_out && sig_param->mode != ir_var_inout) {
+	 ir_rvalue *new_param = param;
+	 handle_rvalue(&new_param);
+         if (new_param != param)
+	    param->replace_with(new_param);
+	 else
+	    param->accept(this);
+      }
+      sig_param_iter.next();
+   }
+
+   /* Since we're unlinked, we don't (necssarily) know the side effects of
+    * this call.  So kill all copies.
+    */
+   acp->make_empty();
+   this->killed_all = true;
+
+   return visit_continue_with_parent;
+}
+
+void
+ir_constant_propagation_visitor::handle_if_block(exec_list *instructions)
+{
+   exec_list *orig_acp = this->acp;
+   exec_list *orig_kills = this->kills;
+   bool orig_killed_all = this->killed_all;
+
+   this->acp = new(mem_ctx) exec_list;
+   this->kills = new(mem_ctx) exec_list;
+   this->killed_all = false;
+
+   /* Populate the initial acp with a constant of the original */
+   foreach_iter(exec_list_iterator, iter, *orig_acp) {
+      acp_entry *a = (acp_entry *)iter.get();
+      this->acp->push_tail(new(this->mem_ctx) acp_entry(a->var, a->write_mask,
+							a->constant));
+   }
+
+   visit_list_elements(this, instructions);
+
+   if (this->killed_all) {
+      orig_acp->make_empty();
+   }
+
+   exec_list *new_kills = this->kills;
+   this->kills = orig_kills;
+   this->acp = orig_acp;
+   this->killed_all = this->killed_all || orig_killed_all;
+
+   foreach_iter(exec_list_iterator, iter, *new_kills) {
+      kill_entry *k = (kill_entry *)iter.get();
+      kill(k->var, k->write_mask);
+   }
+}
+
+ir_visitor_status
+ir_constant_propagation_visitor::visit_enter(ir_if *ir)
+{
+   ir->condition->accept(this);
+   handle_rvalue(&ir->condition);
+
+   handle_if_block(&ir->then_instructions);
+   handle_if_block(&ir->else_instructions);
+
+   /* handle_if_block() already descended into the children. */
+   return visit_continue_with_parent;
+}
+
+ir_visitor_status
+ir_constant_propagation_visitor::visit_enter(ir_dereference_array *ir)
+{
+   handle_rvalue(&ir->array_index);
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_constant_propagation_visitor::visit_enter(ir_texture *ir)
+{
+   handle_rvalue(&ir->coordinate);
+   handle_rvalue(&ir->projector);
+   handle_rvalue(&ir->shadow_comparitor);
+
+   switch (ir->op) {
+   case ir_tex:
+      break;
+   case ir_txb:
+      handle_rvalue(&ir->lod_info.bias);
+      break;
+   case ir_txf:
+   case ir_txl:
+      handle_rvalue(&ir->lod_info.lod);
+      break;
+   case ir_txd:
+      handle_rvalue(&ir->lod_info.grad.dPdx);
+      handle_rvalue(&ir->lod_info.grad.dPdy);
+      break;
+   }
+
+   return visit_continue;
+}
+
+ir_visitor_status
+ir_constant_propagation_visitor::visit_enter(ir_loop *ir)
+{
+   exec_list *orig_acp = this->acp;
+   exec_list *orig_kills = this->kills;
+   bool orig_killed_all = this->killed_all;
+
+   /* FINISHME: For now, the initial acp for loops is totally empty.
+    * We could go through once, then go through again with the acp
+    * cloned minus the killed entries after the first run through.
+    */
+   this->acp = new(mem_ctx) exec_list;
+   this->kills = new(mem_ctx) exec_list;
+   this->killed_all = false;
+
+   visit_list_elements(this, &ir->body_instructions);
+
+   if (this->killed_all) {
+      orig_acp->make_empty();
+   }
+
+   exec_list *new_kills = this->kills;
+   this->kills = orig_kills;
+   this->acp = orig_acp;
+   this->killed_all = this->killed_all || orig_killed_all;
+
+   foreach_iter(exec_list_iterator, iter, *new_kills) {
+      kill_entry *k = (kill_entry *)iter.get();
+      kill(k->var, k->write_mask);
+   }
+
+   /* already descended into the children. */
+   return visit_continue_with_parent;
+}
+
+void
+ir_constant_propagation_visitor::kill(ir_variable *var, unsigned write_mask)
+{
+   assert(var != NULL);
+
+   /* We don't track non-vectors. */
+   if (!var->type->is_vector() && !var->type->is_scalar())
+      return;
+
+   /* Remove any entries currently in the ACP for this kill. */
+   foreach_iter(exec_list_iterator, iter, *this->acp) {
+      acp_entry *entry = (acp_entry *)iter.get();
+
+      if (entry->var == var) {
+	 entry->write_mask &= ~write_mask;
+	 if (entry->write_mask == 0)
+	    entry->remove();
+      }
+   }
+
+   /* Add this writemask of the variable to the list of killed
+    * variables in this block.
+    */
+   foreach_iter(exec_list_iterator, iter, *this->kills) {
+      kill_entry *entry = (kill_entry *)iter.get();
+
+      if (entry->var == var) {
+	 entry->write_mask |= write_mask;
+	 return;
+      }
+   }
+   /* Not already in the list.  Make new entry. */
+   this->kills->push_tail(new(this->mem_ctx) kill_entry(var, write_mask));
+}
+
+/**
+ * Adds an entry to the available constant list if it's a plain assignment
+ * of a variable to a variable.
+ */
+void
+ir_constant_propagation_visitor::add_constant(ir_assignment *ir)
+{
+   acp_entry *entry;
+
+   if (ir->condition) {
+      ir_constant *condition = ir->condition->as_constant();
+      if (!condition || !condition->value.b[0])
+	 return;
+   }
+
+   if (!ir->write_mask)
+      return;
+
+   ir_dereference_variable *deref = ir->lhs->as_dereference_variable();
+   ir_constant *constant = ir->rhs->as_constant();
+
+   if (!deref || !constant)
+      return;
+
+   /* Only do constant propagation on vectors.  Constant matrices,
+    * arrays, or structures would require more work elsewhere.
+    */
+   if (!deref->var->type->is_vector() && !deref->var->type->is_scalar())
+      return;
+
+   entry = new(this->mem_ctx) acp_entry(deref->var, ir->write_mask, constant);
+   this->acp->push_tail(entry);
+}
+
+/**
+ * Does a constant propagation pass on the code present in the instruction stream.
+ */
+bool
+do_constant_propagation(exec_list *instructions)
+{
+   ir_constant_propagation_visitor v;
+
+   visit_list_elements(&v, instructions);
+
+   return v.progress;
+}
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index c6e7beb447..97a0c25216 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -33,6 +33,7 @@ bool do_constant_folding(exec_list *instructions);
 bool do_constant_variable(exec_list *instructions);
 bool do_constant_variable_unlinked(exec_list *instructions);
 bool do_copy_propagation(exec_list *instructions);
+bool do_constant_propagation(exec_list *instructions);
 bool do_dead_code(exec_list *instructions);
 bool do_dead_code_local(exec_list *instructions);
 bool do_dead_code_unlinked(exec_list *instructions);
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index e93c2f5554..52c9322788 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1296,6 +1296,7 @@ link_shaders(struct gl_shader_program *prog)
 	 progress = do_dead_code_local(ir) || progress;
 	 progress = do_dead_code(ir) || progress;
 	 progress = do_tree_grafting(ir) || progress;
+	 progress = do_constant_propagation(ir) || progress;
 	 progress = do_constant_variable(ir) || progress;
 	 progress = do_constant_folding(ir) || progress;
 	 progress = do_algebraic(ir) || progress;
diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp
index bc7292d155..24d6076d07 100644
--- a/src/glsl/main.cpp
+++ b/src/glsl/main.cpp
@@ -167,6 +167,7 @@ compile_shader(struct gl_shader *shader)
 	 progress = do_dead_code_local(shader->ir) || progress;
 	 progress = do_dead_code_unlinked(shader->ir) || progress;
 	 progress = do_tree_grafting(shader->ir) || progress;
+	 progress = do_constant_propagation(shader->ir) || progress;
 	 progress = do_constant_variable_unlinked(shader->ir) || progress;
 	 progress = do_constant_folding(shader->ir) || progress;
 	 progress = do_algebraic(shader->ir) || progress;
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index c6856eb5a4..5a272ab88a 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2581,6 +2581,7 @@ _mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *shader)
 	 progress = do_dead_code_local(shader->ir) || progress;
 	 progress = do_dead_code_unlinked(shader->ir) || progress;
 	 progress = do_tree_grafting(shader->ir) || progress;
+	 progress = do_constant_propagation(shader->ir) || progress;
 	 progress = do_constant_variable_unlinked(shader->ir) || progress;
 	 progress = do_constant_folding(shader->ir) || progress;
 	 progress = do_algebraic(shader->ir) || progress;
-- 
cgit v1.2.3


From 5854d4583c6e8885185e12a0636f77489a62e24c Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 9 Aug 2010 21:22:17 -0700
Subject: glsl2: Add a pass to transform ir_binop_sub to add(op0, neg(op1))

All the current HW backends transform subtract to adding the negation,
so I haven't bothered peepholing it back out in Mesa IR.  This allows
some subtract of subtract to get removed in ir_algebraic.
---
 src/glsl/Makefile               |  1 +
 src/glsl/ir_optimization.h      |  1 +
 src/glsl/ir_sub_to_add_neg.cpp  | 76 +++++++++++++++++++++++++++++++++++++++++
 src/glsl/linker.cpp             |  1 +
 src/mesa/program/ir_to_mesa.cpp |  1 +
 5 files changed, 80 insertions(+)
 create mode 100644 src/glsl/ir_sub_to_add_neg.cpp

(limited to 'src/glsl/ir_optimization.h')

diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index 841e2b9ce9..85298d06a0 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -59,6 +59,7 @@ CXX_SOURCES = \
 	ir_reader.cpp \
 	ir_set_program_inouts.cpp \
 	ir_structure_splitting.cpp \
+	ir_sub_to_add_neg.cpp \
 	ir_swizzle_swizzle.cpp \
 	ir_tree_grafting.cpp \
 	ir_validate.cpp \
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 97a0c25216..5997a30eab 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -47,6 +47,7 @@ bool do_if_to_cond_assign(exec_list *instructions);
 bool do_mat_op_to_vec(exec_list *instructions);
 bool do_mod_to_fract(exec_list *instructions);
 bool do_structure_splitting(exec_list *instructions);
+bool do_sub_to_add_neg(exec_list *instructions);
 bool do_swizzle_swizzle(exec_list *instructions);
 bool do_tree_grafting(exec_list *instructions);
 bool do_vec_index_to_cond_assign(exec_list *instructions);
diff --git a/src/glsl/ir_sub_to_add_neg.cpp b/src/glsl/ir_sub_to_add_neg.cpp
new file mode 100644
index 0000000000..7ed8c1495e
--- /dev/null
+++ b/src/glsl/ir_sub_to_add_neg.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file ir_sub_to_add_neg.cpp
+ *
+ * Breaks an ir_binop_sub expression down to add(op0, neg(op1))
+ *
+ * This simplifies expression reassociation, and for many backends
+ * there is no subtract operation separate from adding the negation.
+ * For backends with native subtract operations, they will probably
+ * want to recognize add(op0, neg(op1)) or the other way around to
+ * produce a subtract anyway.
+ */
+
+#include "ir.h"
+
+class ir_sub_to_add_neg_visitor : public ir_hierarchical_visitor {
+public:
+   ir_sub_to_add_neg_visitor()
+   {
+      this->progress = false;
+   }
+
+   ir_visitor_status visit_leave(ir_expression *);
+
+   bool progress;
+};
+
+bool
+do_sub_to_add_neg(exec_list *instructions)
+{
+   ir_sub_to_add_neg_visitor v;
+
+   visit_list_elements(&v, instructions);
+   return v.progress;
+}
+
+ir_visitor_status
+ir_sub_to_add_neg_visitor::visit_leave(ir_expression *ir)
+{
+   if (ir->operation != ir_binop_sub)
+      return visit_continue;
+
+   void *mem_ctx = talloc_parent(ir);
+
+   ir->operation = ir_binop_add;
+   ir->operands[1] = new(mem_ctx) ir_expression(ir_unop_neg,
+						ir->operands[1]->type,
+						ir->operands[1],
+						NULL);
+
+   this->progress = true;
+
+   return visit_continue;
+}
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 52c9322788..c462d31ef3 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1284,6 +1284,7 @@ link_shaders(struct gl_shader_program *prog)
       do_mod_to_fract(ir);
       do_div_to_mul_rcp(ir);
       do_explog_to_explog2(ir);
+      do_sub_to_add_neg(ir);
 
       do {
 	 progress = false;
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 5a272ab88a..a9a6f977c0 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2570,6 +2570,7 @@ _mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *shader)
       do_mat_op_to_vec(shader->ir);
       do_mod_to_fract(shader->ir);
       do_div_to_mul_rcp(shader->ir);
+      do_sub_to_add_neg(shader->ir);
 
       /* Optimization passes */
       bool progress;
-- 
cgit v1.2.3


From 2f4fe151681a6f6afe1d452eece6cf4144f44e49 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 10 Aug 2010 13:06:49 -0700
Subject: glsl2: Move the common optimization passes to a helper function.

These are passes that we expect all codegen to be happy with.  The
other lowering passes for Mesa IR are moved to the Mesa IR generator.
---
 src/glsl/glsl_parser_extras.cpp | 35 +++++++++++++++++++++++
 src/glsl/ir_optimization.h      |  2 ++
 src/glsl/linker.cpp             | 47 ++++---------------------------
 src/mesa/program/ir_to_mesa.cpp | 61 ++++++++++++++++++++---------------------
 4 files changed, 72 insertions(+), 73 deletions(-)

(limited to 'src/glsl/ir_optimization.h')

diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index dbf6f53156..2ed3905abc 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -33,6 +33,7 @@ extern "C" {
 #include "ast.h"
 #include "glsl_parser_extras.h"
 #include "glsl_parser.h"
+#include "ir_optimization.h"
 
 _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct __GLcontextRec *ctx,
 					       GLenum target, void *mem_ctx)
@@ -705,3 +706,37 @@ ast_struct_specifier::ast_struct_specifier(char *identifier,
    name = identifier;
    this->declarations.push_degenerate_list_at_head(&declarator_list->link);
 }
+
+bool
+do_common_optimization(exec_list *ir, bool linked)
+{
+   GLboolean progress = GL_FALSE;
+
+   progress = do_sub_to_add_neg(ir) || progress;
+
+   if (linked) {
+      progress = do_function_inlining(ir) || progress;
+      progress = do_dead_functions(ir) || progress;
+   }
+   progress = do_structure_splitting(ir) || progress;
+   progress = do_if_simplification(ir) || progress;
+   progress = do_copy_propagation(ir) || progress;
+   if (linked)
+      progress = do_dead_code(ir) || progress;
+   else
+      progress = do_dead_code_unlinked(ir) || progress;
+   progress = do_dead_code_local(ir) || progress;
+   progress = do_tree_grafting(ir) || progress;
+   progress = do_constant_propagation(ir) || progress;
+   if (linked)
+      progress = do_constant_variable(ir) || progress;
+   else
+      progress = do_constant_variable_unlinked(ir) || progress;
+   progress = do_constant_folding(ir) || progress;
+   progress = do_algebraic(ir) || progress;
+   progress = do_if_return(ir) || progress;
+   progress = do_vec_index_to_swizzle(ir) || progress;
+   progress = do_swizzle_swizzle(ir) || progress;
+
+   return progress;
+}
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 5997a30eab..0c4e548e44 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -28,6 +28,8 @@
  * Prototypes for optimization passes to be called by the compiler and drivers.
  */
 
+bool do_common_optimization(exec_list *ir, bool linked);
+
 bool do_algebraic(exec_list *instructions);
 bool do_constant_folding(exec_list *instructions);
 bool do_constant_variable(exec_list *instructions);
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 7bff859d55..9931251f40 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1308,48 +1308,13 @@ link_shaders(struct gl_shader_program *prog)
       prog->LinkStatus = true;
    }
 
-   /* FINISHME: Perform whole-program optimization here. */
+   /* Do common optimization before assigning storage for attributes,
+    * uniforms, and varyings.  Later optimization could possibly make
+    * some of that unused.
+    */
    for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
-      /* Optimization passes */
-      bool progress;
-      exec_list *ir = prog->_LinkedShaders[i]->ir;
-
-      /* Lowering */
-      do_mat_op_to_vec(ir);
-      do_mod_to_fract(ir);
-      do_div_to_mul_rcp(ir);
-      do_explog_to_explog2(ir);
-      do_sub_to_add_neg(ir);
-
-      do {
-	 progress = false;
-
-	 progress = do_function_inlining(ir) || progress;
-	 progress = do_dead_functions(ir) || progress;
-	 progress = do_structure_splitting(ir) || progress;
-	 progress = do_if_simplification(ir) || progress;
-	 progress = do_copy_propagation(ir) || progress;
-	 progress = do_dead_code_local(ir) || progress;
-	 progress = do_dead_code(ir) || progress;
-	 progress = do_tree_grafting(ir) || progress;
-	 progress = do_constant_propagation(ir) || progress;
-	 progress = do_constant_variable(ir) || progress;
-	 progress = do_constant_folding(ir) || progress;
-	 progress = do_algebraic(ir) || progress;
-	 progress = do_if_return(ir) || progress;
-#if 0
-	 if (ctx->Shader.EmitNoIfs)
-	    progress = do_if_to_cond_assign(ir) || progress;
-#endif
-
-	 progress = do_vec_index_to_swizzle(ir) || progress;
-	 /* Do this one after the previous to let the easier pass handle
-	  * constant vector indexing.
-	  */
-	 progress = do_vec_index_to_cond_assign(ir) || progress;
-
-	 progress = do_swizzle_swizzle(ir) || progress;
-      } while (progress);
+      while (do_common_optimization(prog->_LinkedShaders[i]->ir, true))
+	 ;
    }
 
    assign_uniform_locations(prog);
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index ecb13069cb..c8c655b296 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2567,38 +2567,11 @@ _mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *shader)
    if (!state->error && !shader->ir->is_empty()) {
       validate_ir_tree(shader->ir);
 
-      /* Lowering */
-      do_mat_op_to_vec(shader->ir);
-      do_mod_to_fract(shader->ir);
-      do_div_to_mul_rcp(shader->ir);
-      do_sub_to_add_neg(shader->ir);
-
-      /* Optimization passes */
-      bool progress;
-      do {
-	 progress = false;
-
-	 progress = do_if_simplification(shader->ir) || progress;
-	 progress = do_copy_propagation(shader->ir) || progress;
-	 progress = do_dead_code_local(shader->ir) || progress;
-	 progress = do_dead_code_unlinked(shader->ir) || progress;
-	 progress = do_tree_grafting(shader->ir) || progress;
-	 progress = do_constant_propagation(shader->ir) || progress;
-	 progress = do_constant_variable_unlinked(shader->ir) || progress;
-	 progress = do_constant_folding(shader->ir) || progress;
-	 progress = do_algebraic(shader->ir) || progress;
-	 progress = do_if_return(shader->ir) || progress;
-	 if (ctx->Shader.EmitNoIfs)
-	    progress = do_if_to_cond_assign(shader->ir) || progress;
-
-	 progress = do_vec_index_to_swizzle(shader->ir) || progress;
-	 /* Do this one after the previous to let the easier pass handle
-	  * constant vector indexing.
-	  */
-	 progress = do_vec_index_to_cond_assign(shader->ir) || progress;
-
-	 progress = do_swizzle_swizzle(shader->ir) || progress;
-      } while (progress);
+      /* Do some optimization at compile time to reduce shader IR size
+       * and reduce later work if the same shader is linked multiple times
+       */
+      while (do_common_optimization(shader->ir, false))
+	 ;
 
       validate_ir_tree(shader->ir);
    }
@@ -2665,6 +2638,30 @@ _mesa_glsl_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
       prog->Uniforms = _mesa_new_uniform_list();
    }
 
+   if (prog->LinkStatus) {
+      for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
+	 bool progress;
+	 exec_list *ir = prog->_LinkedShaders[i]->ir;
+
+	 do {
+	    progress = false;
+
+	    /* Lowering */
+	    do_mat_op_to_vec(ir);
+	    do_mod_to_fract(ir);
+	    do_div_to_mul_rcp(ir);
+	    do_explog_to_explog2(ir);
+
+	    progress = do_common_optimization(ir, true) || progress;
+
+	    if (ctx->Shader.EmitNoIfs)
+	       progress = do_if_to_cond_assign(ir) || progress;
+
+	    progress = do_vec_index_to_cond_assign(ir) || progress;
+	 } while (progress);
+      }
+   }
+
    if (prog->LinkStatus) {
       for (i = 0; i < prog->_NumLinkedShaders; i++) {
 	 struct gl_program *linked_prog;
-- 
cgit v1.2.3


From 8f8cdbfba43550d0b8985fb087961864e4cd92b6 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 13 Aug 2010 07:16:38 -0700
Subject: glsl2: Add a pass to strip out noop swizzles.

With the glsl2-965 branch, the optimization of glsl-algebraic-rcp-rcp
regressed due to noop swizzles hiding information from ir_algebraic.
This cleans up those noop swizzles for us.
---
 src/glsl/Makefile               |  1 +
 src/glsl/glsl_parser_extras.cpp |  1 +
 src/glsl/ir_noop_swizzle.cpp    | 80 +++++++++++++++++++++++++++++++++++++++++
 src/glsl/ir_optimization.h      |  1 +
 4 files changed, 83 insertions(+)
 create mode 100644 src/glsl/ir_noop_swizzle.cpp

(limited to 'src/glsl/ir_optimization.h')

diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index 48b7c8f66b..110228e72a 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -55,6 +55,7 @@ CXX_SOURCES = \
 	ir_import_prototypes.cpp \
 	ir_mat_op_to_vec.cpp \
 	ir_mod_to_fract.cpp \
+	ir_noop_swizzle.cpp \
 	ir_print_visitor.cpp \
 	ir_reader.cpp \
 	ir_rvalue_visitor.cpp \
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 2ed3905abc..d1bb1ae5ec 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -737,6 +737,7 @@ do_common_optimization(exec_list *ir, bool linked)
    progress = do_if_return(ir) || progress;
    progress = do_vec_index_to_swizzle(ir) || progress;
    progress = do_swizzle_swizzle(ir) || progress;
+   progress = do_noop_swizzle(ir) || progress;
 
    return progress;
 }
diff --git a/src/glsl/ir_noop_swizzle.cpp b/src/glsl/ir_noop_swizzle.cpp
new file mode 100644
index 0000000000..b78c87b47f
--- /dev/null
+++ b/src/glsl/ir_noop_swizzle.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file ir_noop_swizzle.cpp
+ *
+ * If a swizzle doesn't change the order or count of components, then
+ * remove the swizzle so that other optimization passes see the value
+ * behind it.
+ */
+
+#include "ir.h"
+#include "ir_visitor.h"
+#include "ir_rvalue_visitor.h"
+#include "ir_print_visitor.h"
+#include "glsl_types.h"
+
+class ir_noop_swizzle_visitor : public ir_rvalue_visitor {
+public:
+   ir_noop_swizzle_visitor()
+   {
+      this->progress = false;
+   }
+
+   void handle_rvalue(ir_rvalue **rvalue);
+   bool progress;
+};
+
+void
+ir_noop_swizzle_visitor::handle_rvalue(ir_rvalue **rvalue)
+{
+   if (!*rvalue)
+      return;
+
+   ir_swizzle *swiz = (*rvalue)->as_swizzle();
+   if (!swiz || swiz->type != swiz->val->type)
+      return;
+
+   int elems = swiz->val->type->vector_elements;
+   if (swiz->mask.x != 0)
+      return;
+   if (elems >= 2 && swiz->mask.y != 1)
+      return;
+   if (elems >= 3 && swiz->mask.z != 1)
+      return;
+   if (elems >= 4 && swiz->mask.w != 1)
+      return;
+
+   this->progress = true;
+   *rvalue = swiz->val;
+}
+
+bool
+do_noop_swizzle(exec_list *instructions)
+{
+   ir_noop_swizzle_visitor v;
+   visit_list_elements(&v, instructions);
+
+   return v.progress;
+}
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 0c4e548e44..33f4bc78f7 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -48,6 +48,7 @@ bool do_if_simplification(exec_list *instructions);
 bool do_if_to_cond_assign(exec_list *instructions);
 bool do_mat_op_to_vec(exec_list *instructions);
 bool do_mod_to_fract(exec_list *instructions);
+bool do_noop_swizzle(exec_list *instructions);
 bool do_structure_splitting(exec_list *instructions);
 bool do_sub_to_add_neg(exec_list *instructions);
 bool do_swizzle_swizzle(exec_list *instructions);
-- 
cgit v1.2.3