summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/glsl/Makefile1
-rw-r--r--src/glsl/ir.cpp25
-rw-r--r--src/glsl/ir.h14
-rw-r--r--src/glsl/ir_clone.cpp3
-rw-r--r--src/glsl/ir_constant_expression.cpp18
-rw-r--r--src/glsl/ir_optimization.h1
-rw-r--r--src/glsl/ir_validate.cpp45
-rw-r--r--src/glsl/lower_vector.cpp224
-rw-r--r--src/glsl/opt_algebraic.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp1
-rw-r--r--src/mesa/program/ir_to_mesa.cpp132
11 files changed, 460 insertions, 6 deletions
diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index 62984f81c0..ea4d6be5a4 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -64,6 +64,7 @@ CXX_SOURCES = \
lower_variable_index_to_cond_assign.cpp \
lower_vec_index_to_cond_assign.cpp \
lower_vec_index_to_swizzle.cpp \
+ lower_vector.cpp \
opt_algebraic.cpp \
opt_constant_folding.cpp \
opt_constant_propagation.cpp \
diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 1f5e2ebdcb..741e3cb177 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -200,18 +200,35 @@ ir_expression::ir_expression(int op, const struct glsl_type *type,
this->operation = ir_expression_operation(op);
this->operands[0] = op0;
this->operands[1] = NULL;
+ this->operands[2] = NULL;
+ this->operands[3] = NULL;
}
ir_expression::ir_expression(int op, const struct glsl_type *type,
ir_rvalue *op0, ir_rvalue *op1)
{
- assert((op1 == NULL) && (get_num_operands(ir_expression_operation(op)) == 1)
+ assert(((op1 == NULL) && (get_num_operands(ir_expression_operation(op)) == 1))
|| (get_num_operands(ir_expression_operation(op)) == 2));
this->ir_type = ir_type_expression;
this->type = type;
this->operation = ir_expression_operation(op);
this->operands[0] = op0;
this->operands[1] = op1;
+ this->operands[2] = NULL;
+ this->operands[3] = NULL;
+}
+
+ir_expression::ir_expression(int op, const struct glsl_type *type,
+ ir_rvalue *op0, ir_rvalue *op1,
+ ir_rvalue *op2, ir_rvalue *op3)
+{
+ this->ir_type = ir_type_expression;
+ this->type = type;
+ this->operation = ir_expression_operation(op);
+ this->operands[0] = op0;
+ this->operands[1] = op1;
+ this->operands[2] = op2;
+ this->operands[3] = op3;
}
unsigned int
@@ -225,6 +242,9 @@ ir_expression::get_num_operands(ir_expression_operation op)
if (op <= ir_last_binop)
return 2;
+ if (op == ir_quadop_vector)
+ return 4;
+
assert(false);
return 0;
}
@@ -287,12 +307,13 @@ static const char *const operator_strs[] = {
"min",
"max",
"pow",
+ "vector",
};
const char *ir_expression::operator_string(ir_expression_operation op)
{
assert((unsigned int) op < Elements(operator_strs));
- assert(Elements(operator_strs) == (ir_binop_pow + 1));
+ assert(Elements(operator_strs) == (ir_quadop_vector + 1));
return operator_strs[op];
}
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 99fdaa3b09..be0da07b3b 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -33,6 +33,7 @@ extern "C" {
#include <talloc.h>
}
+#include "glsl_types.h"
#include "list.h"
#include "ir_visitor.h"
#include "ir_hierarchical_visitor.h"
@@ -824,6 +825,8 @@ enum ir_expression_operation {
*/
ir_last_binop = ir_binop_pow,
+ ir_quadop_vector,
+
/**
* A sentinel marking the last of all operations.
*/
@@ -843,6 +846,12 @@ public:
ir_expression(int op, const struct glsl_type *type,
ir_rvalue *, ir_rvalue *);
+ /**
+ * Constructor for quad operator expressions
+ */
+ ir_expression(int op, const struct glsl_type *type,
+ ir_rvalue *, ir_rvalue *, ir_rvalue *, ir_rvalue *);
+
virtual ir_expression *as_expression()
{
return this;
@@ -868,7 +877,8 @@ public:
*/
unsigned int get_num_operands() const
{
- return get_num_operands(operation);
+ return (this->operation == ir_quadop_vector)
+ ? this->type->vector_elements : get_num_operands(operation);
}
/**
@@ -895,7 +905,7 @@ public:
virtual ir_visitor_status accept(ir_hierarchical_visitor *);
ir_expression_operation operation;
- ir_rvalue *operands[2];
+ ir_rvalue *operands[4];
};
diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp
index 4032647c3a..325f606615 100644
--- a/src/glsl/ir_clone.cpp
+++ b/src/glsl/ir_clone.cpp
@@ -168,7 +168,8 @@ ir_expression::clone(void *mem_ctx, struct hash_table *ht) const
op[i] = this->operands[i]->clone(mem_ctx, ht);
}
- return new(mem_ctx) ir_expression(this->operation, this->type, op[0], op[1]);
+ return new(mem_ctx) ir_expression(this->operation, this->type,
+ op[0], op[1], op[2], op[3]);
}
ir_dereference_variable *
diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp
index 1fe1505047..4fd6d09a3a 100644
--- a/src/glsl/ir_constant_expression.cpp
+++ b/src/glsl/ir_constant_expression.cpp
@@ -788,6 +788,24 @@ ir_expression::constant_expression_value()
}
break;
+ case ir_quadop_vector:
+ for (unsigned c = 0; c < this->type->vector_elements; c++) {
+ switch (this->type->base_type) {
+ case GLSL_TYPE_INT:
+ data.i[c] = op[c]->value.i[0];
+ break;
+ case GLSL_TYPE_UINT:
+ data.u[c] = op[c]->value.u[0];
+ break;
+ case GLSL_TYPE_FLOAT:
+ data.f[c] = op[c]->value.f[0];
+ break;
+ default:
+ assert(0);
+ }
+ }
+ break;
+
default:
/* FINISHME: Should handle all expression types. */
return NULL;
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index ffdc66b9f7..1f8da16bcb 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -59,4 +59,5 @@ bool do_vec_index_to_swizzle(exec_list *instructions);
bool lower_noise(exec_list *instructions);
bool lower_variable_index_to_cond_assign(exec_list *instructions,
bool lower_input, bool lower_output, bool lower_temp, bool lower_uniform);
+bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz);
bool optimize_redundant_jumps(exec_list *instructions);
diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
index 2a066c1a27..5b055f64d3 100644
--- a/src/glsl/ir_validate.cpp
+++ b/src/glsl/ir_validate.cpp
@@ -374,6 +374,51 @@ ir_validate::visit_leave(ir_expression *ir)
assert(ir->operands[0]->type->is_vector());
assert(ir->operands[0]->type == ir->operands[1]->type);
break;
+
+ case ir_quadop_vector:
+ /* The vector operator collects some number of scalars and generates a
+ * vector from them.
+ *
+ * - All of the operands must be scalar.
+ * - Number of operands must matche the size of the resulting vector.
+ * - Base type of the operands must match the base type of the result.
+ */
+ assert(ir->type->is_vector());
+ switch (ir->type->vector_elements) {
+ case 2:
+ assert(ir->operands[0]->type->is_scalar());
+ assert(ir->operands[0]->type->base_type == ir->type->base_type);
+ assert(ir->operands[1]->type->is_scalar());
+ assert(ir->operands[1]->type->base_type == ir->type->base_type);
+ assert(ir->operands[2] == NULL);
+ assert(ir->operands[3] == NULL);
+ break;
+ case 3:
+ assert(ir->operands[0]->type->is_scalar());
+ assert(ir->operands[0]->type->base_type == ir->type->base_type);
+ assert(ir->operands[1]->type->is_scalar());
+ assert(ir->operands[1]->type->base_type == ir->type->base_type);
+ assert(ir->operands[2]->type->is_scalar());
+ assert(ir->operands[2]->type->base_type == ir->type->base_type);
+ assert(ir->operands[3] == NULL);
+ break;
+ case 4:
+ assert(ir->operands[0]->type->is_scalar());
+ assert(ir->operands[0]->type->base_type == ir->type->base_type);
+ assert(ir->operands[1]->type->is_scalar());
+ assert(ir->operands[1]->type->base_type == ir->type->base_type);
+ assert(ir->operands[2]->type->is_scalar());
+ assert(ir->operands[2]->type->base_type == ir->type->base_type);
+ assert(ir->operands[3]->type->is_scalar());
+ assert(ir->operands[3]->type->base_type == ir->type->base_type);
+ break;
+ default:
+ /* The is_vector assertion above should prevent execution from ever
+ * getting here.
+ */
+ assert(!"Should not get here.");
+ break;
+ }
}
return visit_continue;
diff --git a/src/glsl/lower_vector.cpp b/src/glsl/lower_vector.cpp
new file mode 100644
index 0000000000..ae50120100
--- /dev/null
+++ b/src/glsl/lower_vector.cpp
@@ -0,0 +1,224 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_vector.cpp
+ * IR lowering pass to remove some types of ir_quadop_vector
+ *
+ * \author Ian Romanick <ian.d.romanick@intel.com>
+ */
+
+#include "ir.h"
+#include "ir_rvalue_visitor.h"
+
+class lower_vector_visitor : public ir_rvalue_visitor {
+public:
+ lower_vector_visitor() : progress(false)
+ {
+ /* empty */
+ }
+
+ void handle_rvalue(ir_rvalue **rvalue);
+
+ /**
+ * Should SWZ-like expressions be lowered?
+ */
+ bool dont_lower_swz;
+
+ bool progress;
+};
+
+/**
+ * Determine if an IR expression tree looks like an extended swizzle
+ *
+ * Extended swizzles consist of access of a single vector source (with possible
+ * per component negation) and the constants -1, 0, or 1.
+ */
+bool
+is_extended_swizzle(ir_expression *ir)
+{
+ /* Track any variables that are accessed by this expression.
+ */
+ ir_variable *var = NULL;
+
+ assert(ir->operation == ir_quadop_vector);
+
+ for (unsigned i = 0; i < ir->type->vector_elements; i++) {
+ ir_rvalue *op = ir->operands[i];
+
+ while (op != NULL) {
+ switch (op->ir_type) {
+ case ir_type_constant: {
+ const ir_constant *const c = op->as_constant();
+
+ if (!c->is_one() && !c->is_zero() && !c->is_negative_one())
+ return false;
+
+ op = NULL;
+ break;
+ }
+
+ case ir_type_dereference_variable: {
+ ir_dereference_variable *const d = (ir_dereference_variable *) op;
+
+ if ((var != NULL) && (var != d->var))
+ return false;
+
+ var = d->var;
+ op = NULL;
+ break;
+ }
+
+ case ir_type_expression: {
+ ir_expression *const ex = (ir_expression *) op;
+
+ if (ex->operation != ir_unop_neg)
+ return false;
+
+ op = ex->operands[0];
+ break;
+ }
+
+ case ir_type_swizzle:
+ op = ((ir_swizzle *) op)->val;
+ break;
+
+ default:
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+void
+lower_vector_visitor::handle_rvalue(ir_rvalue **rvalue)
+{
+ if (!*rvalue)
+ return;
+
+ ir_expression *expr = (*rvalue)->as_expression();
+ if ((expr == NULL) || (expr->operation != ir_quadop_vector))
+ return;
+
+ if (this->dont_lower_swz && is_extended_swizzle(expr))
+ return;
+
+ /* FINISHME: Is this the right thing to use for the talloc context?
+ */
+ void *const mem_ctx = expr;
+
+ assert(expr->type->vector_elements == expr->get_num_operands());
+
+ /* Generate a temporary with the same type as the ir_quadop_operation.
+ */
+ ir_variable *const temp =
+ new(mem_ctx) ir_variable(expr->type, "vecop_tmp", ir_var_temporary);
+
+ this->base_ir->insert_before(temp);
+
+ /* Counter of the number of components collected so far.
+ */
+ unsigned assigned;
+
+ /* Write-mask in the destination that receives counted by 'assigned'.
+ */
+ unsigned write_mask;
+
+
+ /* Generate upto four assignments to that variable. Try to group component
+ * assignments together:
+ *
+ * - All constant components can be assigned at once.
+ * - All assigments of components from a single variable with the same
+ * unary operator can be assigned at once.
+ */
+ ir_constant_data d = { { 0 } };
+
+ assigned = 0;
+ write_mask = 0;
+ for (unsigned i = 0; i < expr->type->vector_elements; i++) {
+ const ir_constant *const c = expr->operands[i]->as_constant();
+
+ if (c == NULL)
+ continue;
+
+ switch (expr->type->base_type) {
+ case GLSL_TYPE_UINT: d.u[assigned] = c->value.u[0]; break;
+ case GLSL_TYPE_INT: d.i[assigned] = c->value.i[0]; break;
+ case GLSL_TYPE_FLOAT: d.f[assigned] = c->value.f[0]; break;
+ case GLSL_TYPE_BOOL: d.b[assigned] = c->value.b[0]; break;
+ defatul: assert(!"Should not get here."); break;
+ }
+
+ write_mask |= (1U << i);
+ assigned++;
+ }
+
+ assert((write_mask == 0) == (assigned == 0));
+
+ /* If there were constant values, generate an assignment.
+ */
+ if (assigned > 0) {
+ ir_constant *const c =
+ new(mem_ctx) ir_constant(glsl_type::get_instance(expr->type->base_type,
+ assigned, 0),
+ &d);
+ ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp);
+ ir_assignment *const assign =
+ new(mem_ctx) ir_assignment(lhs, c, NULL, write_mask);
+
+ this->base_ir->insert_before(assign);
+ }
+
+ /* FINISHME: This should try to coalesce assignments.
+ */
+ for (unsigned i = 0; i < expr->type->vector_elements; i++) {
+ if (expr->operands[i]->ir_type == ir_type_constant)
+ continue;
+
+ ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp);
+ ir_assignment *const assign =
+ new(mem_ctx) ir_assignment(lhs, expr->operands[i], NULL, (1U << i));
+
+ this->base_ir->insert_before(assign);
+ assigned++;
+ }
+
+ assert(assigned == expr->type->vector_elements);
+
+ *rvalue = new(mem_ctx) ir_dereference_variable(temp);
+ this->progress = true;
+}
+
+bool
+lower_quadop_vector(exec_list *instructions, bool dont_lower_swz)
+{
+ lower_vector_visitor v;
+
+ v.dont_lower_swz = dont_lower_swz;
+ visit_list_elements(&v, instructions);
+
+ return v.progress;
+}
diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp
index 9a8080bff3..3c9af85f31 100644
--- a/src/glsl/opt_algebraic.cpp
+++ b/src/glsl/opt_algebraic.cpp
@@ -394,7 +394,7 @@ ir_algebraic_visitor::handle_rvalue(ir_rvalue **rvalue)
return;
ir_expression *expr = (*rvalue)->as_expression();
- if (!expr)
+ if (!expr || expr->operation == ir_quadop_vector)
return;
*rvalue = handle_expression(expr);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 105327c7fe..e1cb94452d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -130,6 +130,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
GL_TRUE, /* temp */
GL_TRUE /* uniform */
) || progress;
+ progress = lower_quadop_vector(shader->ir, false) || progress;
} while (progress);
validate_ir_tree(shader->ir);
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 1b5337e92e..1cb8183042 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -289,6 +289,8 @@ public:
GLboolean try_emit_mad(ir_expression *ir,
int mul_operand);
+ void emit_swz(ir_expression *ir);
+
bool process_move_condition(ir_rvalue *ir);
void *mem_ctx;
@@ -958,6 +960,123 @@ ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
}
void
+ir_to_mesa_visitor::emit_swz(ir_expression *ir)
+{
+ /* Assume that the vector operator is in a form compatible with OPCODE_SWZ.
+ * This means that each of the operands is either an immediate value of -1,
+ * 0, or 1, or is a component from one source register (possibly with
+ * negation).
+ */
+ uint8_t components[4] = { 0 };
+ bool negate[4] = { false };
+ ir_variable *var = NULL;
+
+ for (unsigned i = 0; i < ir->type->vector_elements; i++) {
+ ir_rvalue *op = ir->operands[i];
+
+ assert(op->type->is_scalar());
+
+ while (op != NULL) {
+ switch (op->ir_type) {
+ case ir_type_constant: {
+
+ assert(op->type->is_scalar());
+
+ const ir_constant *const c = op->as_constant();
+ if (c->is_one()) {
+ components[i] = SWIZZLE_ONE;
+ } else if (c->is_zero()) {
+ components[i] = SWIZZLE_ZERO;
+ } else if (c->is_negative_one()) {
+ components[i] = SWIZZLE_ONE;
+ negate[i] = true;
+ } else {
+ assert(!"SWZ constant must be 0.0 or 1.0.");
+ }
+
+ op = NULL;
+ break;
+ }
+
+ case ir_type_dereference_variable: {
+ ir_dereference_variable *const deref =
+ (ir_dereference_variable *) op;
+
+ assert((var == NULL) || (deref->var == var));
+ components[i] = SWIZZLE_X;
+ var = deref->var;
+ op = NULL;
+ break;
+ }
+
+ case ir_type_expression: {
+ ir_expression *const expr = (ir_expression *) op;
+
+ assert(expr->operation == ir_unop_neg);
+ negate[i] = true;
+
+ op = expr->operands[0];
+ break;
+ }
+
+ case ir_type_swizzle: {
+ ir_swizzle *const swiz = (ir_swizzle *) op;
+
+ components[i] = swiz->mask.x;
+ op = swiz->val;
+ break;
+ }
+
+ default:
+ assert(!"Should not get here.");
+ return;
+ }
+ }
+ }
+
+ assert(var != NULL);
+
+ ir_dereference_variable *const deref =
+ new(mem_ctx) ir_dereference_variable(var);
+
+ this->result.file = PROGRAM_UNDEFINED;
+ deref->accept(this);
+ if (this->result.file == PROGRAM_UNDEFINED) {
+ ir_print_visitor v;
+ printf("Failed to get tree for expression operand:\n");
+ deref->accept(&v);
+ exit(1);
+ }
+
+ ir_to_mesa_src_reg src;
+
+ src = this->result;
+ src.swizzle = MAKE_SWIZZLE4(components[0],
+ components[1],
+ components[2],
+ components[3]);
+ src.negate = ((unsigned(negate[0]) << 0)
+ | (unsigned(negate[1]) << 1)
+ | (unsigned(negate[2]) << 2)
+ | (unsigned(negate[3]) << 3));
+
+ /* Storage for our result. Ideally for an assignment we'd be using the
+ * actual storage for the result here, instead.
+ */
+ const ir_to_mesa_src_reg result_src = get_temp(ir->type);
+ ir_to_mesa_dst_reg result_dst = ir_to_mesa_dst_reg_from_src(result_src);
+
+ /* Limit writes to the channels that will be used by result_src later.
+ * This does limit this temp's use as a temporary for multi-instruction
+ * sequences.
+ */
+ result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+
+ ir_to_mesa_emit_op1(ir, OPCODE_SWZ, result_dst, src);
+ this->result = result_src;
+}
+
+void
ir_to_mesa_visitor::visit(ir_expression *ir)
{
unsigned int operand;
@@ -974,6 +1093,11 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
return;
}
+ if (ir->operation == ir_quadop_vector) {
+ this->emit_swz(ir);
+ return;
+ }
+
for (operand = 0; operand < ir->get_num_operands(); operand++) {
this->result.file = PROGRAM_UNDEFINED;
ir->operands[operand]->accept(this);
@@ -1231,6 +1355,12 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
case ir_unop_round_even:
assert(!"GLSL 1.30 features unsupported");
break;
+
+ case ir_quadop_vector:
+ /* This operation should have already been handled.
+ */
+ assert(!"Should not get here.");
+ break;
}
this->result = result_src;
@@ -2676,6 +2806,8 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress;
+ progress = lower_quadop_vector(ir, true) || progress;
+
if (options->EmitNoIfs)
progress = do_if_to_cond_assign(ir) || progress;