summaryrefslogtreecommitdiff
path: root/src/mesa/program/ir_to_mesa.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/program/ir_to_mesa.cpp')
-rw-r--r--src/mesa/program/ir_to_mesa.cpp476
1 files changed, 411 insertions, 65 deletions
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index bdd3fd92ff..98da90d359 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -123,6 +123,7 @@ public:
/** Pointer to the ir source this tree came from for debugging */
ir_instruction *ir;
GLboolean cond_update;
+ bool saturate;
int sampler; /**< sampler index */
int tex_target; /**< One of TEXTURE_*_INDEX */
GLboolean tex_shadow;
@@ -260,6 +261,17 @@ public:
ir_to_mesa_src_reg src1,
ir_to_mesa_src_reg src2);
+ /**
+ * Emit the correct dot-product instruction for the type of arguments
+ *
+ * \sa ir_to_mesa_emit_op2
+ */
+ void ir_to_mesa_emit_dp(ir_instruction *ir,
+ ir_to_mesa_dst_reg dst,
+ ir_to_mesa_src_reg src0,
+ ir_to_mesa_src_reg src1,
+ unsigned elements);
+
void ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
enum prog_opcode op,
ir_to_mesa_dst_reg dst,
@@ -271,8 +283,17 @@ public:
ir_to_mesa_src_reg src0,
ir_to_mesa_src_reg src1);
+ void emit_scs(ir_instruction *ir, enum prog_opcode op,
+ ir_to_mesa_dst_reg dst,
+ const ir_to_mesa_src_reg &src);
+
GLboolean try_emit_mad(ir_expression *ir,
int mul_operand);
+ GLboolean try_emit_sat(ir_expression *ir);
+
+ void emit_swz(ir_expression *ir);
+
+ bool process_move_condition(ir_rvalue *ir);
void *mem_ctx;
};
@@ -393,6 +414,21 @@ ir_to_mesa_visitor::ir_to_mesa_emit_op0(ir_instruction *ir,
ir_to_mesa_undef);
}
+void
+ir_to_mesa_visitor::ir_to_mesa_emit_dp(ir_instruction *ir,
+ ir_to_mesa_dst_reg dst,
+ ir_to_mesa_src_reg src0,
+ ir_to_mesa_src_reg src1,
+ unsigned elements)
+{
+ static const gl_inst_opcode dot_opcodes[] = {
+ OPCODE_DP2, OPCODE_DP3, OPCODE_DP4
+ };
+
+ ir_to_mesa_emit_op3(ir, dot_opcodes[elements - 2],
+ dst, src0, src1, ir_to_mesa_undef);
+}
+
inline ir_to_mesa_dst_reg
ir_to_mesa_dst_reg_from_src(ir_to_mesa_src_reg reg)
{
@@ -447,6 +483,10 @@ ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op2(ir_instruction *ir,
GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
for (j = i + 1; j < 4; j++) {
+ /* If there is another enabled component in the destination that is
+ * derived from the same inputs, generate its value on this pass as
+ * well.
+ */
if (!(done_mask & (1 << j)) &&
GET_SWZ(src0.swizzle, j) == src0_swiz &&
GET_SWZ(src1.swizzle, j) == src1_swiz) {
@@ -480,6 +520,102 @@ ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
ir_to_mesa_emit_scalar_op2(ir, op, dst, src0, undef);
}
+/**
+ * Emit an OPCODE_SCS instruction
+ *
+ * The \c SCS opcode functions a bit differently than the other Mesa (or
+ * ARB_fragment_program) opcodes. Instead of splatting its result across all
+ * four components of the destination, it writes one value to the \c x
+ * component and another value to the \c y component.
+ *
+ * \param ir IR instruction being processed
+ * \param op Either \c OPCODE_SIN or \c OPCODE_COS depending on which
+ * value is desired.
+ * \param dst Destination register
+ * \param src Source register
+ */
+void
+ir_to_mesa_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
+ ir_to_mesa_dst_reg dst,
+ const ir_to_mesa_src_reg &src)
+{
+ /* Vertex programs cannot use the SCS opcode.
+ */
+ if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
+ ir_to_mesa_emit_scalar_op1(ir, op, dst, src);
+ return;
+ }
+
+ const unsigned component = (op == OPCODE_SIN) ? 0 : 1;
+ const unsigned scs_mask = (1U << component);
+ int done_mask = ~dst.writemask;
+ ir_to_mesa_src_reg tmp;
+
+ assert(op == OPCODE_SIN || op == OPCODE_COS);
+
+ /* If there are compnents in the destination that differ from the component
+ * that will be written by the SCS instrution, we'll need a temporary.
+ */
+ if (scs_mask != unsigned(dst.writemask)) {
+ tmp = get_temp(glsl_type::vec4_type);
+ }
+
+ for (unsigned i = 0; i < 4; i++) {
+ unsigned this_mask = (1U << i);
+ ir_to_mesa_src_reg src0 = src;
+
+ if ((done_mask & this_mask) != 0)
+ continue;
+
+ /* The source swizzle specified which component of the source generates
+ * sine / cosine for the current component in the destination. The SCS
+ * instruction requires that this value be swizzle to the X component.
+ * Replace the current swizzle with a swizzle that puts the source in
+ * the X component.
+ */
+ unsigned src0_swiz = GET_SWZ(src.swizzle, i);
+
+ src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
+ src0_swiz, src0_swiz);
+ for (unsigned j = i + 1; j < 4; j++) {
+ /* If there is another enabled component in the destination that is
+ * derived from the same inputs, generate its value on this pass as
+ * well.
+ */
+ if (!(done_mask & (1 << j)) &&
+ GET_SWZ(src0.swizzle, j) == src0_swiz) {
+ this_mask |= (1 << j);
+ }
+ }
+
+ if (this_mask != scs_mask) {
+ ir_to_mesa_instruction *inst;
+ ir_to_mesa_dst_reg tmp_dst = ir_to_mesa_dst_reg_from_src(tmp);
+
+ /* Emit the SCS instruction.
+ */
+ inst = ir_to_mesa_emit_op1(ir, OPCODE_SCS, tmp_dst, src0);
+ inst->dst_reg.writemask = scs_mask;
+
+ /* Move the result of the SCS instruction to the desired location in
+ * the destination.
+ */
+ tmp.swizzle = MAKE_SWIZZLE4(component, component,
+ component, component);
+ inst = ir_to_mesa_emit_op1(ir, OPCODE_SCS, dst, tmp);
+ inst->dst_reg.writemask = this_mask;
+ } else {
+ /* Emit the SCS instruction to write directly to the destination.
+ */
+ ir_to_mesa_instruction *inst =
+ ir_to_mesa_emit_op1(ir, OPCODE_SCS, dst, src0);
+ inst->dst_reg.writemask = scs_mask;
+ }
+
+ done_mask |= this_mask;
+ }
+}
+
struct ir_to_mesa_src_reg
ir_to_mesa_visitor::src_reg_for_float(float val)
{
@@ -805,6 +941,32 @@ ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
return true;
}
+GLboolean
+ir_to_mesa_visitor::try_emit_sat(ir_expression *ir)
+{
+ /* Saturates were only introduced to vertex programs in
+ * NV_vertex_program3, so don't give them to drivers in the VP.
+ */
+ if (this->prog->Target == GL_VERTEX_PROGRAM_ARB)
+ return false;
+
+ ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
+ if (!sat_src)
+ return false;
+
+ sat_src->accept(this);
+ ir_to_mesa_src_reg src = this->result;
+
+ this->result = get_temp(ir->type);
+ ir_to_mesa_instruction *inst;
+ inst = ir_to_mesa_emit_op1(ir, OPCODE_MOV,
+ ir_to_mesa_dst_reg_from_src(this->result),
+ src);
+ inst->saturate = true;
+
+ return true;
+}
+
void
ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
ir_to_mesa_src_reg *reg, int *num_reladdr)
@@ -826,15 +988,129 @@ ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
}
void
+ir_to_mesa_visitor::emit_swz(ir_expression *ir)
+{
+ /* Assume that the vector operator is in a form compatible with OPCODE_SWZ.
+ * This means that each of the operands is either an immediate value of -1,
+ * 0, or 1, or is a component from one source register (possibly with
+ * negation).
+ */
+ uint8_t components[4] = { 0 };
+ bool negate[4] = { false };
+ ir_variable *var = NULL;
+
+ for (unsigned i = 0; i < ir->type->vector_elements; i++) {
+ ir_rvalue *op = ir->operands[i];
+
+ assert(op->type->is_scalar());
+
+ while (op != NULL) {
+ switch (op->ir_type) {
+ case ir_type_constant: {
+
+ assert(op->type->is_scalar());
+
+ const ir_constant *const c = op->as_constant();
+ if (c->is_one()) {
+ components[i] = SWIZZLE_ONE;
+ } else if (c->is_zero()) {
+ components[i] = SWIZZLE_ZERO;
+ } else if (c->is_negative_one()) {
+ components[i] = SWIZZLE_ONE;
+ negate[i] = true;
+ } else {
+ assert(!"SWZ constant must be 0.0 or 1.0.");
+ }
+
+ op = NULL;
+ break;
+ }
+
+ case ir_type_dereference_variable: {
+ ir_dereference_variable *const deref =
+ (ir_dereference_variable *) op;
+
+ assert((var == NULL) || (deref->var == var));
+ components[i] = SWIZZLE_X;
+ var = deref->var;
+ op = NULL;
+ break;
+ }
+
+ case ir_type_expression: {
+ ir_expression *const expr = (ir_expression *) op;
+
+ assert(expr->operation == ir_unop_neg);
+ negate[i] = true;
+
+ op = expr->operands[0];
+ break;
+ }
+
+ case ir_type_swizzle: {
+ ir_swizzle *const swiz = (ir_swizzle *) op;
+
+ components[i] = swiz->mask.x;
+ op = swiz->val;
+ break;
+ }
+
+ default:
+ assert(!"Should not get here.");
+ return;
+ }
+ }
+ }
+
+ assert(var != NULL);
+
+ ir_dereference_variable *const deref =
+ new(mem_ctx) ir_dereference_variable(var);
+
+ this->result.file = PROGRAM_UNDEFINED;
+ deref->accept(this);
+ if (this->result.file == PROGRAM_UNDEFINED) {
+ ir_print_visitor v;
+ printf("Failed to get tree for expression operand:\n");
+ deref->accept(&v);
+ exit(1);
+ }
+
+ ir_to_mesa_src_reg src;
+
+ src = this->result;
+ src.swizzle = MAKE_SWIZZLE4(components[0],
+ components[1],
+ components[2],
+ components[3]);
+ src.negate = ((unsigned(negate[0]) << 0)
+ | (unsigned(negate[1]) << 1)
+ | (unsigned(negate[2]) << 2)
+ | (unsigned(negate[3]) << 3));
+
+ /* Storage for our result. Ideally for an assignment we'd be using the
+ * actual storage for the result here, instead.
+ */
+ const ir_to_mesa_src_reg result_src = get_temp(ir->type);
+ ir_to_mesa_dst_reg result_dst = ir_to_mesa_dst_reg_from_src(result_src);
+
+ /* Limit writes to the channels that will be used by result_src later.
+ * This does limit this temp's use as a temporary for multi-instruction
+ * sequences.
+ */
+ result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+
+ ir_to_mesa_emit_op1(ir, OPCODE_SWZ, result_dst, src);
+ this->result = result_src;
+}
+
+void
ir_to_mesa_visitor::visit(ir_expression *ir)
{
unsigned int operand;
- struct ir_to_mesa_src_reg op[2];
+ struct ir_to_mesa_src_reg op[Elements(ir->operands)];
struct ir_to_mesa_src_reg result_src;
struct ir_to_mesa_dst_reg result_dst;
- const glsl_type *vec4_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 4, 1);
- const glsl_type *vec3_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 3, 1);
- const glsl_type *vec2_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 2, 1);
/* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c)
*/
@@ -844,6 +1120,13 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
if (try_emit_mad(ir, 0))
return;
}
+ if (try_emit_sat(ir))
+ return;
+
+ if (ir->operation == ir_quadop_vector) {
+ this->emit_swz(ir);
+ return;
+ }
for (operand = 0; operand < ir->get_num_operands(); operand++) {
this->result.file = PROGRAM_UNDEFINED;
@@ -917,6 +1200,12 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
case ir_unop_cos:
ir_to_mesa_emit_scalar_op1(ir, OPCODE_COS, result_dst, op[0]);
break;
+ case ir_unop_sin_reduced:
+ emit_scs(ir, OPCODE_SIN, result_dst, op[0]);
+ break;
+ case ir_unop_cos_reduced:
+ emit_scs(ir, OPCODE_COS, result_dst, op[0]);
+ break;
case ir_unop_dFdx:
ir_to_mesa_emit_op1(ir, OPCODE_DDX, result_dst, op[0]);
@@ -976,12 +1265,7 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
ir_to_mesa_src_reg temp = get_temp(glsl_type::vec4_type);
ir_to_mesa_emit_op2(ir, OPCODE_SNE,
ir_to_mesa_dst_reg_from_src(temp), op[0], op[1]);
- if (vector_elements == 4)
- ir_to_mesa_emit_op2(ir, OPCODE_DP4, result_dst, temp, temp);
- else if (vector_elements == 3)
- ir_to_mesa_emit_op2(ir, OPCODE_DP3, result_dst, temp, temp);
- else
- ir_to_mesa_emit_op2(ir, OPCODE_DP2, result_dst, temp, temp);
+ ir_to_mesa_emit_dp(ir, result_dst, temp, temp, vector_elements);
ir_to_mesa_emit_op2(ir, OPCODE_SEQ,
result_dst, result_src, src_reg_for_float(0.0));
} else {
@@ -995,12 +1279,7 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
ir_to_mesa_src_reg temp = get_temp(glsl_type::vec4_type);
ir_to_mesa_emit_op2(ir, OPCODE_SNE,
ir_to_mesa_dst_reg_from_src(temp), op[0], op[1]);
- if (vector_elements == 4)
- ir_to_mesa_emit_op2(ir, OPCODE_DP4, result_dst, temp, temp);
- else if (vector_elements == 3)
- ir_to_mesa_emit_op2(ir, OPCODE_DP3, result_dst, temp, temp);
- else
- ir_to_mesa_emit_op2(ir, OPCODE_DP2, result_dst, temp, temp);
+ ir_to_mesa_emit_dp(ir, result_dst, temp, temp, vector_elements);
ir_to_mesa_emit_op2(ir, OPCODE_SNE,
result_dst, result_src, src_reg_for_float(0.0));
} else {
@@ -1009,20 +1288,9 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
break;
case ir_unop_any:
- switch (ir->operands[0]->type->vector_elements) {
- case 4:
- ir_to_mesa_emit_op2(ir, OPCODE_DP4, result_dst, op[0], op[0]);
- break;
- case 3:
- ir_to_mesa_emit_op2(ir, OPCODE_DP3, result_dst, op[0], op[0]);
- break;
- case 2:
- ir_to_mesa_emit_op2(ir, OPCODE_DP2, result_dst, op[0], op[0]);
- break;
- default:
- assert(!"unreached: ir_unop_any of non-bvec");
- break;
- }
+ assert(ir->operands[0]->type->is_vector());
+ ir_to_mesa_emit_dp(ir, result_dst, op[0], op[0],
+ ir->operands[0]->type->vector_elements);
ir_to_mesa_emit_op2(ir, OPCODE_SNE,
result_dst, result_src, src_reg_for_float(0.0));
break;
@@ -1050,26 +1318,10 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
break;
case ir_binop_dot:
- if (ir->operands[0]->type == vec4_type) {
- assert(ir->operands[1]->type == vec4_type);
- ir_to_mesa_emit_op2(ir, OPCODE_DP4,
- result_dst,
- op[0], op[1]);
- } else if (ir->operands[0]->type == vec3_type) {
- assert(ir->operands[1]->type == vec3_type);
- ir_to_mesa_emit_op2(ir, OPCODE_DP3,
- result_dst,
- op[0], op[1]);
- } else if (ir->operands[0]->type == vec2_type) {
- assert(ir->operands[1]->type == vec2_type);
- ir_to_mesa_emit_op2(ir, OPCODE_DP2,
- result_dst,
- op[0], op[1]);
- }
- break;
-
- case ir_binop_cross:
- ir_to_mesa_emit_op2(ir, OPCODE_XPD, result_dst, op[0], op[1]);
+ assert(ir->operands[0]->type->is_vector());
+ assert(ir->operands[0]->type == ir->operands[1]->type);
+ ir_to_mesa_emit_dp(ir, result_dst, op[0], op[1],
+ ir->operands[0]->type->vector_elements);
break;
case ir_unop_sqrt:
@@ -1133,6 +1385,12 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
case ir_unop_round_even:
assert(!"GLSL 1.30 features unsupported");
break;
+
+ case ir_quadop_vector:
+ /* This operation should have already been handled.
+ */
+ assert(!"Should not get here.");
+ break;
}
this->result = result_src;
@@ -1340,6 +1598,93 @@ get_assignment_lhs(ir_dereference *ir, ir_to_mesa_visitor *v)
return ir_to_mesa_dst_reg_from_src(v->result);
}
+/**
+ * Process the condition of a conditional assignment
+ *
+ * Examines the condition of a conditional assignment to generate the optimal
+ * first operand of a \c CMP instruction. If the condition is a relational
+ * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
+ * used as the source for the \c CMP instruction. Otherwise the comparison
+ * is processed to a boolean result, and the boolean result is used as the
+ * operand to the CMP instruction.
+ */
+bool
+ir_to_mesa_visitor::process_move_condition(ir_rvalue *ir)
+{
+ ir_rvalue *src_ir = ir;
+ bool negate = true;
+ bool switch_order = false;
+
+ ir_expression *const expr = ir->as_expression();
+ if ((expr != NULL) && (expr->get_num_operands() == 2)) {
+ bool zero_on_left = false;
+
+ if (expr->operands[0]->is_zero()) {
+ src_ir = expr->operands[1];
+ zero_on_left = true;
+ } else if (expr->operands[1]->is_zero()) {
+ src_ir = expr->operands[0];
+ zero_on_left = false;
+ }
+
+ /* a is - 0 + - 0 +
+ * (a < 0) T F F ( a < 0) T F F
+ * (0 < a) F F T (-a < 0) F F T
+ * (a <= 0) T T F (-a < 0) F F T (swap order of other operands)
+ * (0 <= a) F T T ( a < 0) T F F (swap order of other operands)
+ * (a > 0) F F T (-a < 0) F F T
+ * (0 > a) T F F ( a < 0) T F F
+ * (a >= 0) F T T ( a < 0) T F F (swap order of other operands)
+ * (0 >= a) T T F (-a < 0) F F T (swap order of other operands)
+ *
+ * Note that exchanging the order of 0 and 'a' in the comparison simply
+ * means that the value of 'a' should be negated.
+ */
+ if (src_ir != ir) {
+ switch (expr->operation) {
+ case ir_binop_less:
+ switch_order = false;
+ negate = zero_on_left;
+ break;
+
+ case ir_binop_greater:
+ switch_order = false;
+ negate = !zero_on_left;
+ break;
+
+ case ir_binop_lequal:
+ switch_order = true;
+ negate = !zero_on_left;
+ break;
+
+ case ir_binop_gequal:
+ switch_order = true;
+ negate = zero_on_left;
+ break;
+
+ default:
+ /* This isn't the right kind of comparison afterall, so make sure
+ * the whole condition is visited.
+ */
+ src_ir = ir;
+ break;
+ }
+ }
+ }
+
+ src_ir->accept(this);
+
+ /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
+ * condition we produced is 0.0 or 1.0. By flipping the sign, we can
+ * choose which value OPCODE_CMP produces without an extra instruction
+ * computing the condition.
+ */
+ if (negate)
+ this->result.negate = ~this->result.negate;
+
+ return switch_order;
+}
+
void
ir_to_mesa_visitor::visit(ir_assignment *ir)
{
@@ -1399,20 +1744,18 @@ ir_to_mesa_visitor::visit(ir_assignment *ir)
assert(r.file != PROGRAM_UNDEFINED);
if (ir->condition) {
- ir_to_mesa_src_reg condition;
-
- ir->condition->accept(this);
- condition = this->result;
+ const bool switch_order = this->process_move_condition(ir->condition);
+ ir_to_mesa_src_reg condition = this->result;
- /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves,
- * and the condition we produced is 0.0 or 1.0. By flipping the
- * sign, we can choose which value OPCODE_CMP produces without
- * an extra computing the condition.
- */
- condition.negate = ~condition.negate;
for (i = 0; i < type_size(ir->lhs->type); i++) {
- ir_to_mesa_emit_op3(ir, OPCODE_CMP, l,
- condition, r, ir_to_mesa_src_reg_from_dst(l));
+ if (switch_order) {
+ ir_to_mesa_emit_op3(ir, OPCODE_CMP, l,
+ condition, ir_to_mesa_src_reg_from_dst(l), r);
+ } else {
+ ir_to_mesa_emit_op3(ir, OPCODE_CMP, l,
+ condition, r, ir_to_mesa_src_reg_from_dst(l));
+ }
+
l.index++;
r.index++;
}
@@ -2365,6 +2708,8 @@ get_mesa_program(struct gl_context *ctx, struct gl_shader_program *shader_progra
mesa_inst->Opcode = inst->op;
mesa_inst->CondUpdate = inst->cond_update;
+ if (inst->saturate)
+ mesa_inst->SaturateMode = SATURATE_ZERO_ONE;
mesa_inst->DstReg.File = inst->dst_reg.file;
mesa_inst->DstReg.Index = inst->dst_reg.index;
mesa_inst->DstReg.CondMask = inst->dst_reg.cond_mask;
@@ -2485,14 +2830,15 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
/* Lowering */
do_mat_op_to_vec(ir);
- do_mod_to_fract(ir);
- do_div_to_mul_rcp(ir);
- do_explog_to_explog2(ir);
+ lower_instructions(ir, MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
+ | LOG_TO_LOG2);
progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress;
+ progress = lower_quadop_vector(ir, true) || progress;
+
if (options->EmitNoIfs)
progress = do_if_to_cond_assign(ir) || progress;