summaryrefslogtreecommitdiff
path: root/src/mesa/program/ir_to_mesa.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/program/ir_to_mesa.cpp')
-rw-r--r--src/mesa/program/ir_to_mesa.cpp384
1 files changed, 334 insertions, 50 deletions
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index b274a961b2..9578f42f14 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -105,13 +105,13 @@ extern ir_to_mesa_src_reg ir_to_mesa_undef;
class ir_to_mesa_instruction : public exec_node {
public:
- /* Callers of this talloc-based new need not call delete. It's
- * easier to just talloc_free 'ctx' (or any of its ancestors). */
+ /* Callers of this ralloc-based new need not call delete. It's
+ * easier to just ralloc_free 'ctx' (or any of its ancestors). */
static void* operator new(size_t size, void *ctx)
{
void *node;
- node = talloc_zero_size(ctx, size);
+ node = rzalloc_size(ctx, size);
assert(node != NULL);
return node;
@@ -295,6 +295,8 @@ public:
bool process_move_condition(ir_rvalue *ir);
+ void copy_propagate(void);
+
void *mem_ctx;
};
@@ -316,7 +318,7 @@ fail_link(struct gl_shader_program *prog, const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
- prog->InfoLog = talloc_vasprintf_append(prog->InfoLog, fmt, args);
+ ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
va_end(args);
prog->LinkStatus = GL_FALSE;
@@ -649,6 +651,7 @@ type_size(const struct glsl_type *type)
return 1;
}
case GLSL_TYPE_ARRAY:
+ assert(type->length > 0);
return type_size(type->fields.array) * type->length;
case GLSL_TYPE_STRUCT:
size = 0;
@@ -724,6 +727,29 @@ ir_to_mesa_visitor::visit(ir_variable *ir)
fp->OriginUpperLeft = ir->origin_upper_left;
fp->PixelCenterInteger = ir->pixel_center_integer;
+
+ } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
+ struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
+ switch (ir->depth_layout) {
+ case ir_depth_layout_none:
+ fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE;
+ break;
+ case ir_depth_layout_any:
+ fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY;
+ break;
+ case ir_depth_layout_greater:
+ fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER;
+ break;
+ case ir_depth_layout_less:
+ fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS;
+ break;
+ case ir_depth_layout_unchanged:
+ fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED;
+ break;
+ default:
+ assert(0);
+ break;
+ }
}
if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
@@ -1449,16 +1475,16 @@ void
ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
{
variable_storage *entry = find_variable_storage(ir->var);
+ ir_variable *var = ir->var;
if (!entry) {
- switch (ir->var->mode) {
+ switch (var->mode) {
case ir_var_uniform:
- entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_UNIFORM,
- ir->var->location);
+ entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
+ var->location);
this->variables.push_tail(entry);
break;
case ir_var_in:
- case ir_var_out:
case ir_var_inout:
/* The linker assigns locations for varyings and attributes,
* including deprecated builtins (like gl_Color), user-assign
@@ -1467,45 +1493,47 @@ ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
*
* FINISHME: We would hit this path for function arguments. Fix!
*/
- assert(ir->var->location != -1);
- if (ir->var->mode == ir_var_in ||
- ir->var->mode == ir_var_inout) {
- entry = new(mem_ctx) variable_storage(ir->var,
- PROGRAM_INPUT,
- ir->var->location);
-
- if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
- ir->var->location >= VERT_ATTRIB_GENERIC0) {
- _mesa_add_attribute(prog->Attributes,
- ir->var->name,
- _mesa_sizeof_glsl_type(ir->var->type->gl_type),
- ir->var->type->gl_type,
- ir->var->location - VERT_ATTRIB_GENERIC0);
- }
- } else {
- entry = new(mem_ctx) variable_storage(ir->var,
- PROGRAM_OUTPUT,
- ir->var->location);
- }
-
+ assert(var->location != -1);
+ entry = new(mem_ctx) variable_storage(var,
+ PROGRAM_INPUT,
+ var->location);
+ if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
+ var->location >= VERT_ATTRIB_GENERIC0) {
+ _mesa_add_attribute(this->prog->Attributes,
+ var->name,
+ _mesa_sizeof_glsl_type(var->type->gl_type),
+ var->type->gl_type,
+ var->location - VERT_ATTRIB_GENERIC0);
+ }
+ break;
+ case ir_var_out:
+ assert(var->location != -1);
+ entry = new(mem_ctx) variable_storage(var,
+ PROGRAM_OUTPUT,
+ var->location);
break;
+ case ir_var_system_value:
+ entry = new(mem_ctx) variable_storage(var,
+ PROGRAM_SYSTEM_VALUE,
+ var->location);
+ break;
case ir_var_auto:
case ir_var_temporary:
- entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_TEMPORARY,
+ entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
this->next_temp);
this->variables.push_tail(entry);
- next_temp += type_size(ir->var->type);
+ next_temp += type_size(var->type);
break;
}
if (!entry) {
- printf("Failed to make storage for %s\n", ir->var->name);
+ printf("Failed to make storage for %s\n", var->name);
exit(1);
}
}
- this->result = ir_to_mesa_src_reg(entry->file, entry->index, ir->var->type);
+ this->result = ir_to_mesa_src_reg(entry->file, entry->index, var->type);
}
void
@@ -1542,7 +1570,7 @@ ir_to_mesa_visitor::visit(ir_dereference_array *ir)
this->result, src_reg_for_float(element_size));
}
- src_reg.reladdr = talloc(mem_ctx, ir_to_mesa_src_reg);
+ src_reg.reladdr = ralloc(mem_ctx, ir_to_mesa_src_reg);
memcpy(src_reg.reladdr, &index_reg, sizeof(index_reg));
}
@@ -1569,7 +1597,13 @@ ir_to_mesa_visitor::visit(ir_dereference_record *ir)
break;
offset += type_size(struct_type->fields.structure[i].type);
}
- this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
+
+ /* If the type is smaller than a vec4, replicate the last channel out. */
+ if (ir->type->is_scalar() || ir->type->is_vector())
+ this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
+ else
+ this->result.swizzle = SWIZZLE_NOOP;
+
this->result.index += offset;
}
@@ -1893,7 +1927,7 @@ ir_to_mesa_visitor::get_function_signature(ir_function_signature *sig)
return entry;
}
- entry = talloc(mem_ctx, function_entry);
+ entry = ralloc(mem_ctx, function_entry);
entry->sig = sig;
entry->sig_id = this->next_signature_id++;
entry->bgn_inst = NULL;
@@ -2230,12 +2264,12 @@ ir_to_mesa_visitor::ir_to_mesa_visitor()
next_temp = 1;
next_signature_id = 1;
current_function = NULL;
- mem_ctx = talloc_new(NULL);
+ mem_ctx = ralloc_context(NULL);
}
ir_to_mesa_visitor::~ir_to_mesa_visitor()
{
- talloc_free(mem_ctx);
+ ralloc_free(mem_ctx);
}
static struct prog_src_register
@@ -2284,8 +2318,8 @@ set_branchtargets(ir_to_mesa_visitor *v,
}
}
- if_stack = talloc_zero_array(v->mem_ctx, int, if_count);
- loop_stack = talloc_zero_array(v->mem_ctx, int, loop_count);
+ if_stack = rzalloc_array(v->mem_ctx, int, if_count);
+ loop_stack = rzalloc_array(v->mem_ctx, int, loop_count);
for (i = 0; i < num_instructions; i++) {
switch (mesa_instructions[i].Opcode) {
@@ -2372,6 +2406,11 @@ print_program(struct prog_instruction *mesa_instructions,
}
}
+
+/**
+ * Count resources used by the given gpu program (number of texture
+ * samplers, etc).
+ */
static void
count_resources(struct gl_program *prog)
{
@@ -2395,6 +2434,55 @@ count_resources(struct gl_program *prog)
_mesa_update_shader_textures_used(prog);
}
+
+/**
+ * Check if the given vertex/fragment/shader program is within the
+ * resource limits of the context (number of texture units, etc).
+ * If any of those checks fail, record a linker error.
+ *
+ * XXX more checks are needed...
+ */
+static void
+check_resources(const struct gl_context *ctx,
+ struct gl_shader_program *shader_program,
+ struct gl_program *prog)
+{
+ switch (prog->Target) {
+ case GL_VERTEX_PROGRAM_ARB:
+ if (_mesa_bitcount(prog->SamplersUsed) >
+ ctx->Const.MaxVertexTextureImageUnits) {
+ fail_link(shader_program, "Too many vertex shader texture samplers");
+ }
+ if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
+ fail_link(shader_program, "Too many vertex shader constants");
+ }
+ break;
+ case MESA_GEOMETRY_PROGRAM:
+ if (_mesa_bitcount(prog->SamplersUsed) >
+ ctx->Const.MaxGeometryTextureImageUnits) {
+ fail_link(shader_program, "Too many geometry shader texture samplers");
+ }
+ if (prog->Parameters->NumParameters >
+ MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) {
+ fail_link(shader_program, "Too many geometry shader constants");
+ }
+ break;
+ case GL_FRAGMENT_PROGRAM_ARB:
+ if (_mesa_bitcount(prog->SamplersUsed) >
+ ctx->Const.MaxTextureImageUnits) {
+ fail_link(shader_program, "Too many fragment shader texture samplers");
+ }
+ if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
+ fail_link(shader_program, "Too many fragment shader constants");
+ }
+ break;
+ default:
+ _mesa_problem(ctx, "unexpected program type in check_resources()");
+ }
+}
+
+
+
struct uniform_sort {
struct gl_uniform *u;
int pos;
@@ -2428,7 +2516,7 @@ add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
unsigned int next_sampler = 0, num_uniforms = 0;
struct uniform_sort *sorted_uniforms;
- sorted_uniforms = talloc_array(NULL, struct uniform_sort,
+ sorted_uniforms = ralloc_array(NULL, struct uniform_sort,
shader_program->Uniforms->NumUniforms);
for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) {
@@ -2507,7 +2595,7 @@ add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
}
}
- talloc_free(sorted_uniforms);
+ ralloc_free(sorted_uniforms);
}
static void
@@ -2523,7 +2611,7 @@ set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
for (unsigned int i = 0; i < type->length; i++) {
const glsl_type *field_type = type->fields.structure[i].type;
- const char *field_name = talloc_asprintf(mem_ctx, "%s.%s", name,
+ const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name,
type->fields.structure[i].name);
set_uniform_initializer(ctx, mem_ctx, shader_program, field_name,
field_type, field_constant);
@@ -2554,7 +2642,7 @@ set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
void *values;
if (element_type->base_type == GLSL_TYPE_BOOL) {
- int *conv = talloc_array(mem_ctx, int, element_type->components());
+ int *conv = ralloc_array(mem_ctx, int, element_type->components());
for (unsigned int j = 0; j < element_type->components(); j++) {
conv[j] = element->value.b[j];
}
@@ -2600,14 +2688,206 @@ set_uniform_initializers(struct gl_context *ctx,
continue;
if (!mem_ctx)
- mem_ctx = talloc_new(NULL);
+ mem_ctx = ralloc_context(NULL);
set_uniform_initializer(ctx, mem_ctx, shader_program, var->name,
var->type, var->constant_value);
}
}
- talloc_free(mem_ctx);
+ ralloc_free(mem_ctx);
+}
+
+/*
+ * On a basic block basis, tracks available PROGRAM_TEMPORARY register
+ * channels for copy propagation and updates following instructions to
+ * use the original versions.
+ *
+ * The ir_to_mesa_visitor lazily produces code assuming that this pass
+ * will occur. As an example, a TXP production before this pass:
+ *
+ * 0: MOV TEMP[1], INPUT[4].xyyy;
+ * 1: MOV TEMP[1].w, INPUT[4].wwww;
+ * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
+ *
+ * and after:
+ *
+ * 0: MOV TEMP[1], INPUT[4].xyyy;
+ * 1: MOV TEMP[1].w, INPUT[4].wwww;
+ * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
+ *
+ * which allows for dead code elimination on TEMP[1]'s writes.
+ */
+void
+ir_to_mesa_visitor::copy_propagate(void)
+{
+ ir_to_mesa_instruction **acp = rzalloc_array(mem_ctx,
+ ir_to_mesa_instruction *,
+ this->next_temp * 4);
+ int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
+ int level = 0;
+
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get();
+
+ assert(inst->dst_reg.file != PROGRAM_TEMPORARY
+ || inst->dst_reg.index < this->next_temp);
+
+ /* First, do any copy propagation possible into the src regs. */
+ for (int r = 0; r < 3; r++) {
+ ir_to_mesa_instruction *first = NULL;
+ bool good = true;
+ int acp_base = inst->src_reg[r].index * 4;
+
+ if (inst->src_reg[r].file != PROGRAM_TEMPORARY ||
+ inst->src_reg[r].reladdr)
+ continue;
+
+ /* See if we can find entries in the ACP consisting of MOVs
+ * from the same src register for all the swizzled channels
+ * of this src register reference.
+ */
+ for (int i = 0; i < 4; i++) {
+ int src_chan = GET_SWZ(inst->src_reg[r].swizzle, i);
+ ir_to_mesa_instruction *copy_chan = acp[acp_base + src_chan];
+
+ if (!copy_chan) {
+ good = false;
+ break;
+ }
+
+ assert(acp_level[acp_base + src_chan] <= level);
+
+ if (!first) {
+ first = copy_chan;
+ } else {
+ if (first->src_reg[0].file != copy_chan->src_reg[0].file ||
+ first->src_reg[0].index != copy_chan->src_reg[0].index) {
+ good = false;
+ break;
+ }
+ }
+ }
+
+ if (good) {
+ /* We've now validated that we can copy-propagate to
+ * replace this src register reference. Do it.
+ */
+ inst->src_reg[r].file = first->src_reg[0].file;
+ inst->src_reg[r].index = first->src_reg[0].index;
+
+ int swizzle = 0;
+ for (int i = 0; i < 4; i++) {
+ int src_chan = GET_SWZ(inst->src_reg[r].swizzle, i);
+ ir_to_mesa_instruction *copy_inst = acp[acp_base + src_chan];
+ swizzle |= (GET_SWZ(copy_inst->src_reg[0].swizzle, src_chan) <<
+ (3 * i));
+ }
+ inst->src_reg[r].swizzle = swizzle;
+ }
+ }
+
+ switch (inst->op) {
+ case OPCODE_BGNLOOP:
+ case OPCODE_ENDLOOP:
+ /* End of a basic block, clear the ACP entirely. */
+ memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
+ break;
+
+ case OPCODE_IF:
+ ++level;
+ break;
+
+ case OPCODE_ENDIF:
+ case OPCODE_ELSE:
+ /* Clear all channels written inside the block from the ACP, but
+ * leaving those that were not touched.
+ */
+ for (int r = 0; r < this->next_temp; r++) {
+ for (int c = 0; c < 4; c++) {
+ if (!acp[4 * r + c])
+ continue;
+
+ if (acp_level[4 * r + c] >= level)
+ acp[4 * r + c] = NULL;
+ }
+ }
+ if (inst->op == OPCODE_ENDIF)
+ --level;
+ break;
+
+ default:
+ /* Continuing the block, clear any written channels from
+ * the ACP.
+ */
+ if (inst->dst_reg.file == PROGRAM_TEMPORARY && inst->dst_reg.reladdr) {
+ /* Any temporary might be written, so no copy propagation
+ * across this instruction.
+ */
+ memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
+ } else if (inst->dst_reg.file == PROGRAM_OUTPUT &&
+ inst->dst_reg.reladdr) {
+ /* Any output might be written, so no copy propagation
+ * from outputs across this instruction.
+ */
+ for (int r = 0; r < this->next_temp; r++) {
+ for (int c = 0; c < 4; c++) {
+ if (!acp[4 * r + c])
+ continue;
+
+ if (acp[4 * r + c]->src_reg[0].file == PROGRAM_OUTPUT)
+ acp[4 * r + c] = NULL;
+ }
+ }
+ } else if (inst->dst_reg.file == PROGRAM_TEMPORARY ||
+ inst->dst_reg.file == PROGRAM_OUTPUT) {
+ /* Clear where it's used as dst. */
+ if (inst->dst_reg.file == PROGRAM_TEMPORARY) {
+ for (int c = 0; c < 4; c++) {
+ if (inst->dst_reg.writemask & (1 << c)) {
+ acp[4 * inst->dst_reg.index + c] = NULL;
+ }
+ }
+ }
+
+ /* Clear where it's used as src. */
+ for (int r = 0; r < this->next_temp; r++) {
+ for (int c = 0; c < 4; c++) {
+ if (!acp[4 * r + c])
+ continue;
+
+ int src_chan = GET_SWZ(acp[4 * r + c]->src_reg[0].swizzle, c);
+
+ if (acp[4 * r + c]->src_reg[0].file == inst->dst_reg.file &&
+ acp[4 * r + c]->src_reg[0].index == inst->dst_reg.index &&
+ inst->dst_reg.writemask & (1 << src_chan))
+ {
+ acp[4 * r + c] = NULL;
+ }
+ }
+ }
+ }
+ break;
+ }
+
+ /* If this is a copy, add it to the ACP. */
+ if (inst->op == OPCODE_MOV &&
+ inst->dst_reg.file == PROGRAM_TEMPORARY &&
+ !inst->dst_reg.reladdr &&
+ !inst->saturate &&
+ !inst->src_reg[0].reladdr &&
+ !inst->src_reg[0].negate) {
+ for (int i = 0; i < 4; i++) {
+ if (inst->dst_reg.writemask & (1 << i)) {
+ acp[4 * inst->dst_reg.index + i] = inst;
+ acp_level[4 * inst->dst_reg.index + i] = level;
+ }
+ }
+ }
+ }
+
+ ralloc_free(acp_level);
+ ralloc_free(acp);
}
@@ -2706,9 +2986,11 @@ get_mesa_program(struct gl_context *ctx,
mesa_instructions =
(struct prog_instruction *)calloc(num_instructions,
sizeof(*mesa_instructions));
- mesa_instruction_annotation = talloc_array(v.mem_ctx, ir_instruction *,
+ mesa_instruction_annotation = ralloc_array(v.mem_ctx, ir_instruction *,
num_instructions);
+ v.copy_propagate();
+
/* Convert ir_mesa_instructions into prog_instructions.
*/
mesa_inst = mesa_instructions;
@@ -2798,6 +3080,8 @@ get_mesa_program(struct gl_context *ctx,
do_set_program_inouts(shader->ir, prog);
count_resources(prog);
+ check_resources(ctx, shader_program, prog);
+
_mesa_reference_program(ctx, &shader->Program, prog);
if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) {
@@ -2861,7 +3145,7 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
if (options->EmitNoIfs) {
progress = lower_discard(ir) || progress;
- progress = do_if_to_cond_assign(ir) || progress;
+ progress = lower_if_to_cond_assign(ir) || progress;
}
if (options->EmitNoNoise)
@@ -2961,7 +3245,7 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader)
_mesa_glsl_lexer_dtor(state);
}
- talloc_free(shader->ir);
+ ralloc_free(shader->ir);
shader->ir = new(shader) exec_list;
if (!state->error && !state->translation_unit.is_empty())
_mesa_ast_to_hir(shader->ir, state);
@@ -3008,7 +3292,7 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader)
/* Retain any live IR, but trash the rest. */
reparent_ir(shader->ir, shader->ir);
- talloc_free(state);
+ ralloc_free(state);
if (shader->CompileStatus) {
if (!ctx->Driver.CompileShader(ctx, shader))