summaryrefslogtreecommitdiff
path: root/src/mesa/shader/slang/slang_codegen.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/shader/slang/slang_codegen.c')
-rw-r--r--src/mesa/shader/slang/slang_codegen.c388
1 files changed, 343 insertions, 45 deletions
diff --git a/src/mesa/shader/slang/slang_codegen.c b/src/mesa/shader/slang/slang_codegen.c
index 712f5975dc..5e2ce0ce3a 100644
--- a/src/mesa/shader/slang/slang_codegen.c
+++ b/src/mesa/shader/slang/slang_codegen.c
@@ -57,6 +57,21 @@
#include "slang_print.h"
+/** Max iterations to unroll */
+const GLuint MAX_FOR_LOOP_UNROLL_ITERATIONS = 32;
+
+/** Max for-loop body size (in slang operations) to unroll */
+const GLuint MAX_FOR_LOOP_UNROLL_BODY_SIZE = 50;
+
+/** Max for-loop body complexity to unroll.
+ * We'll compute complexity as the product of the number of iterations
+ * and the size of the body. So long-ish loops with very simple bodies
+ * can be unrolled, as well as short loops with larger bodies.
+ */
+const GLuint MAX_FOR_LOOP_UNROLL_COMPLEXITY = 256;
+
+
+
static slang_ir_node *
_slang_gen_operation(slang_assemble_ctx * A, slang_operation *oper);
@@ -398,7 +413,7 @@ _slang_input_index(const char *name, GLenum target, GLuint *swizzleOut)
const struct input_info *inputs
= (target == GL_VERTEX_PROGRAM_ARB) ? vertInputs : fragInputs;
- ASSERT(MAX_TEXTURE_UNITS == 8); /* if this fails, fix vertInputs above */
+ ASSERT(MAX_TEXTURE_COORD_UNITS == 8); /* if this fails, fix vertInputs above */
for (i = 0; inputs[i].Name; i++) {
if (strcmp(inputs[i].Name, name) == 0) {
@@ -476,6 +491,9 @@ static slang_asm_info AsmInfo[] = {
{ "vec4_multiply", IR_MUL, 1, 2 },
{ "vec4_dot", IR_DOT4, 1, 2 },
{ "vec3_dot", IR_DOT3, 1, 2 },
+ { "vec2_dot", IR_DOT2, 1, 2 },
+ { "vec3_nrm", IR_NRM3, 1, 1 },
+ { "vec4_nrm", IR_NRM4, 1, 1 },
{ "vec3_cross", IR_CROSS, 1, 2 },
{ "vec4_lrp", IR_LRP, 1, 3 },
{ "vec4_min", IR_MIN, 1, 2 },
@@ -498,18 +516,28 @@ static slang_asm_info AsmInfo[] = {
/* float binary op */
{ "float_power", IR_POW, 1, 2 },
/* texture / sampler */
- { "vec4_tex1d", IR_TEX, 1, 2 },
- { "vec4_texb1d", IR_TEXB, 1, 2 }, /* 1d w/ bias */
- { "vec4_texp1d", IR_TEXP, 1, 2 }, /* 1d w/ projection */
- { "vec4_tex2d", IR_TEX, 1, 2 },
- { "vec4_texb2d", IR_TEXB, 1, 2 }, /* 2d w/ bias */
- { "vec4_texp2d", IR_TEXP, 1, 2 }, /* 2d w/ projection */
- { "vec4_tex3d", IR_TEX, 1, 2 },
- { "vec4_texb3d", IR_TEXB, 1, 2 }, /* 3d w/ bias */
- { "vec4_texp3d", IR_TEXP, 1, 2 }, /* 3d w/ projection */
- { "vec4_texcube", IR_TEX, 1, 2 }, /* cubemap */
- { "vec4_tex_rect", IR_TEX, 1, 2 }, /* rectangle */
- { "vec4_texp_rect", IR_TEX, 1, 2 },/* rectangle w/ projection */
+ { "vec4_tex_1d", IR_TEX, 1, 2 },
+ { "vec4_tex_1d_bias", IR_TEXB, 1, 2 }, /* 1d w/ bias */
+ { "vec4_tex_1d_proj", IR_TEXP, 1, 2 }, /* 1d w/ projection */
+ { "vec4_tex_2d", IR_TEX, 1, 2 },
+ { "vec4_tex_2d_bias", IR_TEXB, 1, 2 }, /* 2d w/ bias */
+ { "vec4_tex_2d_proj", IR_TEXP, 1, 2 }, /* 2d w/ projection */
+ { "vec4_tex_3d", IR_TEX, 1, 2 },
+ { "vec4_tex_3d_bias", IR_TEXB, 1, 2 }, /* 3d w/ bias */
+ { "vec4_tex_3d_proj", IR_TEXP, 1, 2 }, /* 3d w/ projection */
+ { "vec4_tex_cube", IR_TEX, 1, 2 }, /* cubemap */
+ { "vec4_tex_rect", IR_TEX, 1, 2 }, /* rectangle */
+ { "vec4_tex_rect_bias", IR_TEX, 1, 2 }, /* rectangle w/ projection */
+
+ /* texture / sampler but with shadow comparison */
+ { "vec4_tex_1d_shadow", IR_TEX_SH, 1, 2 },
+ { "vec4_tex_1d_bias_shadow", IR_TEXB_SH, 1, 2 },
+ { "vec4_tex_1d_proj_shadow", IR_TEXP_SH, 1, 2 },
+ { "vec4_tex_2d_shadow", IR_TEX_SH, 1, 2 },
+ { "vec4_tex_2d_bias_shadow", IR_TEXB_SH, 1, 2 },
+ { "vec4_tex_2d_proj_shadow", IR_TEXP_SH, 1, 2 },
+ { "vec4_tex_rect_shadow", IR_TEX_SH, 1, 2 },
+ { "vec4_tex_rect_proj_shadow", IR_TEXP_SH, 1, 2 },
/* unary op */
{ "ivec4_to_vec4", IR_I_TO_F, 1, 1 }, /* int[4] to float[4] */
@@ -1574,6 +1602,7 @@ swizzle_to_writemask(slang_assemble_ctx *A, GLuint swizzle,
}
+#if 0 /* not used, but don't remove just yet */
/**
* Recursively traverse 'oper' to produce a swizzle mask in the event
* of any vector subscripts and swizzle suffixes.
@@ -1627,8 +1656,10 @@ resolve_swizzle(const slang_operation *oper)
return SWIZZLE_XYZW;
}
}
+#endif
+#if 0
/**
* Recursively descend through swizzle nodes to find the node's storage info.
*/
@@ -1640,7 +1671,7 @@ get_store(const slang_ir_node *n)
}
return n->Store;
}
-
+#endif
/**
@@ -1713,6 +1744,7 @@ _slang_gen_asm(slang_assemble_ctx *A, slang_operation *oper,
}
+#if 0
static void
print_funcs(struct slang_function_scope_ *scope, const char *name)
{
@@ -1726,6 +1758,7 @@ print_funcs(struct slang_function_scope_ *scope, const char *name)
if (scope->outer_scope)
print_funcs(scope->outer_scope, name);
}
+#endif
/**
@@ -2443,40 +2476,259 @@ _slang_gen_do(slang_assemble_ctx * A, const slang_operation *oper)
/**
- * Generate for-loop using high-level IR_LOOP instruction.
+ * Recursively count the number of operations rooted at 'oper'.
+ * This gives some kind of indication of the size/complexity of an operation.
+ */
+static GLuint
+sizeof_operation(const slang_operation *oper)
+{
+ if (oper) {
+ GLuint count = 1; /* me */
+ GLuint i;
+ for (i = 0; i < oper->num_children; i++) {
+ count += sizeof_operation(&oper->children[i]);
+ }
+ return count;
+ }
+ else {
+ return 0;
+ }
+}
+
+
+/**
+ * Determine if a for-loop can be unrolled.
+ * At this time, only a rather narrow class of for loops can be unrolled.
+ * See code for details.
+ * When a loop can't be unrolled because it's too large we'll emit a
+ * message to the log.
+ */
+static GLboolean
+_slang_can_unroll_for_loop(slang_assemble_ctx * A, const slang_operation *oper)
+{
+ GLuint bodySize;
+ GLint start, end;
+ const char *varName;
+ slang_atom varId;
+
+ assert(oper->type == SLANG_OPER_FOR);
+ assert(oper->num_children == 4);
+
+ /* children[0] must be either "int i=constant" or "i=constant" */
+ if (oper->children[0].type == SLANG_OPER_BLOCK_NO_NEW_SCOPE) {
+ slang_variable *var;
+
+ if (oper->children[0].children[0].type != SLANG_OPER_VARIABLE_DECL)
+ return GL_FALSE;
+
+ varId = oper->children[0].children[0].a_id;
+
+ var = _slang_variable_locate(oper->children[0].children[0].locals,
+ varId, GL_TRUE);
+ if (!var)
+ return GL_FALSE;
+ if (!var->initializer)
+ return GL_FALSE;
+ if (var->initializer->type != SLANG_OPER_LITERAL_INT)
+ return GL_FALSE;
+ start = (GLint) var->initializer->literal[0];
+ }
+ else if (oper->children[0].type == SLANG_OPER_EXPRESSION) {
+ if (oper->children[0].children[0].type != SLANG_OPER_ASSIGN)
+ return GL_FALSE;
+ if (oper->children[0].children[0].children[0].type != SLANG_OPER_IDENTIFIER)
+ return GL_FALSE;
+ if (oper->children[0].children[0].children[1].type != SLANG_OPER_LITERAL_INT)
+ return GL_FALSE;
+
+ varId = oper->children[0].children[0].children[0].a_id;
+
+ start = (GLint) oper->children[0].children[0].children[1].literal[0];
+ }
+ else {
+ return GL_FALSE;
+ }
+
+ /* children[1] must be "i<constant" */
+ if (oper->children[1].type != SLANG_OPER_EXPRESSION)
+ return GL_FALSE;
+ if (oper->children[1].children[0].type != SLANG_OPER_LESS)
+ return GL_FALSE;
+ if (oper->children[1].children[0].children[0].type != SLANG_OPER_IDENTIFIER)
+ return GL_FALSE;
+ if (oper->children[1].children[0].children[1].type != SLANG_OPER_LITERAL_INT)
+ return GL_FALSE;
+
+ end = (GLint) oper->children[1].children[0].children[1].literal[0];
+
+ /* children[2] must be "i++" or "++i" */
+ if (oper->children[2].type != SLANG_OPER_POSTINCREMENT &&
+ oper->children[2].type != SLANG_OPER_PREINCREMENT)
+ return GL_FALSE;
+ if (oper->children[2].children[0].type != SLANG_OPER_IDENTIFIER)
+ return GL_FALSE;
+
+ /* make sure the same variable name is used in all places */
+ if ((oper->children[1].children[0].children[0].a_id != varId) ||
+ (oper->children[2].children[0].a_id != varId))
+ return GL_FALSE;
+
+ varName = (const char *) varId;
+
+ /* children[3], the loop body, can't be too large */
+ bodySize = sizeof_operation(&oper->children[3]);
+ if (bodySize > MAX_FOR_LOOP_UNROLL_BODY_SIZE) {
+ slang_info_log_print(A->log,
+ "Note: 'for (%s ... )' body is too large/complex"
+ " to unroll",
+ varName);
+ return GL_FALSE;
+ }
+
+ if (start >= end)
+ return GL_FALSE; /* degenerate case */
+
+ if (end - start > MAX_FOR_LOOP_UNROLL_ITERATIONS) {
+ slang_info_log_print(A->log,
+ "Note: 'for (%s=%d; %s<%d; ++%s)' is too"
+ " many iterations to unroll",
+ varName, start, varName, end, varName);
+ return GL_FALSE;
+ }
+
+ if ((end - start) * bodySize > MAX_FOR_LOOP_UNROLL_COMPLEXITY) {
+ slang_info_log_print(A->log,
+ "Note: 'for (%s=%d; %s<%d; ++%s)' will generate"
+ " too much code to unroll",
+ varName, start, varName, end, varName);
+ return GL_FALSE;
+ }
+
+ return GL_TRUE; /* we can unroll the loop */
+}
+
+
+/**
+ * Unroll a for-loop.
+ * First we determine the number of iterations to unroll.
+ * Then for each iteration:
+ * make a copy of the loop body
+ * replace instances of the loop variable with the current iteration value
+ * generate IR code for the body
+ * \return pointer to generated IR code or NULL if error, out of memory, etc.
+ */
+static slang_ir_node *
+_slang_unroll_for_loop(slang_assemble_ctx * A, const slang_operation *oper)
+{
+ GLint start, end, iter;
+ slang_ir_node *n, *root = NULL;
+ slang_atom varId;
+
+ if (oper->children[0].type == SLANG_OPER_BLOCK_NO_NEW_SCOPE) {
+ /* for (int i=0; ... */
+ slang_variable *var;
+
+ varId = oper->children[0].children[0].a_id;
+ var = _slang_variable_locate(oper->children[0].children[0].locals,
+ varId, GL_TRUE);
+ start = (GLint) var->initializer->literal[0];
+ }
+ else {
+ /* for (i=0; ... */
+ varId = oper->children[0].children[0].children[0].a_id;
+ start = (GLint) oper->children[0].children[0].children[1].literal[0];
+ }
+
+ end = (GLint) oper->children[1].children[0].children[1].literal[0];
+
+ for (iter = start; iter < end; iter++) {
+ slang_operation *body;
+
+ /* make a copy of the loop body */
+ body = slang_operation_new(1);
+ if (!body)
+ return NULL;
+
+ if (!slang_operation_copy(body, &oper->children[3]))
+ return NULL;
+
+ /* in body, replace instances of 'varId' with literal 'iter' */
+ {
+ slang_variable *oldVar;
+ slang_operation *newOper;
+
+ oldVar = _slang_variable_locate(oper->locals, varId, GL_TRUE);
+ if (!oldVar) {
+ /* undeclared loop variable */
+ slang_operation_delete(body);
+ return NULL;
+ }
+
+ newOper = slang_operation_new(1);
+ newOper->type = SLANG_OPER_LITERAL_INT;
+ newOper->literal_size = 1;
+ newOper->literal[0] = iter;
+
+ /* replace instances of the loop variable with newOper */
+ slang_substitute(A, body, 1, &oldVar, &newOper, GL_FALSE);
+ }
+
+ /* do IR codegen for body */
+ n = _slang_gen_operation(A, body);
+ root = new_seq(root, n);
+
+ slang_operation_delete(body);
+ }
+
+ return root;
+}
+
+
+/**
+ * Generate IR for a for-loop. Unrolling will be done when possible.
*/
static slang_ir_node *
_slang_gen_for(slang_assemble_ctx * A, const slang_operation *oper)
{
- /*
- * init code (child[0])
- * LOOP:
- * BREAK if !expr (child[1])
- * body code (child[3])
- * tail code:
- * incr code (child[2]) // XXX continue here
- */
- slang_ir_node *prevLoop, *loop, *cond, *breakIf, *body, *init, *incr;
+ GLboolean unroll = _slang_can_unroll_for_loop(A, oper);
- init = _slang_gen_operation(A, &oper->children[0]);
- loop = new_loop(NULL);
+ if (unroll) {
+ slang_ir_node *code = _slang_unroll_for_loop(A, oper);
+ if (code)
+ return code;
+ }
- /* save old, push new loop */
- prevLoop = A->CurLoop;
- A->CurLoop = loop;
+ /* conventional for-loop code generation */
+ {
+ /*
+ * init code (child[0])
+ * LOOP:
+ * BREAK if !expr (child[1])
+ * body code (child[3])
+ * tail code:
+ * incr code (child[2]) // XXX continue here
+ */
+ slang_ir_node *prevLoop, *loop, *cond, *breakIf, *body, *init, *incr;
+ init = _slang_gen_operation(A, &oper->children[0]);
+ loop = new_loop(NULL);
- cond = new_cond(new_not(_slang_gen_operation(A, &oper->children[1])));
- breakIf = new_break_if_true(A->CurLoop, cond);
- body = _slang_gen_operation(A, &oper->children[3]);
- incr = _slang_gen_operation(A, &oper->children[2]);
+ /* save old, push new loop */
+ prevLoop = A->CurLoop;
+ A->CurLoop = loop;
- loop->Children[0] = new_seq(breakIf, body);
- loop->Children[1] = incr; /* tail code */
+ cond = new_cond(new_not(_slang_gen_operation(A, &oper->children[1])));
+ breakIf = new_break_if_true(A->CurLoop, cond);
+ body = _slang_gen_operation(A, &oper->children[3]);
+ incr = _slang_gen_operation(A, &oper->children[2]);
- /* pop loop, restore prev */
- A->CurLoop = prevLoop;
+ loop->Children[0] = new_seq(breakIf, body);
+ loop->Children[1] = incr; /* tail code */
+
+ /* pop loop, restore prev */
+ A->CurLoop = prevLoop;
- return new_seq(init, loop);
+ return new_seq(init, loop);
+ }
}
@@ -3093,6 +3345,7 @@ _slang_gen_return(slang_assemble_ctx * A, slang_operation *oper)
}
+#if 0
/**
* Determine if the given operation/expression is const-valued.
*/
@@ -3116,6 +3369,7 @@ _slang_is_constant_expr(const slang_operation *oper)
return GL_TRUE;
}
}
+#endif
/**
@@ -3418,7 +3672,7 @@ _slang_gen_assignment(slang_assemble_ctx * A, slang_operation *oper)
if (lhs && rhs) {
/* convert lhs swizzle into writemask */
const GLuint swizzle = root_swizzle(lhs->Store);
- GLuint writemask, newSwizzle;
+ GLuint writemask, newSwizzle = 0x0;
if (!swizzle_to_writemask(A, swizzle, &writemask, &newSwizzle)) {
/* Non-simple writemask, need to swizzle right hand side in
* order to put components into the right place.
@@ -3560,8 +3814,16 @@ _slang_gen_array_element(slang_assemble_ctx * A, slang_operation *oper)
index = (GLint) oper->children[1].literal[0];
if (oper->children[1].type != SLANG_OPER_LITERAL_INT ||
index >= (GLint) max) {
+#if 0
slang_info_log_error(A->log, "Invalid array index for vector type");
+ printf("type = %d\n", oper->children[1].type);
+ printf("index = %d, max = %d\n", index, max);
+ printf("array = %s\n", (char*)oper->children[0].a_id);
+ printf("index = %s\n", (char*)oper->children[1].a_id);
return NULL;
+#else
+ index = 0;
+#endif
}
n = _slang_gen_operation(A, &oper->children[0]);
@@ -3987,6 +4249,21 @@ _slang_gen_operation(slang_assemble_ctx * A, slang_operation *oper)
/**
+ * Check if the given type specifier is a rectangular texture sampler.
+ */
+static GLboolean
+is_rect_sampler_spec(const slang_type_specifier *spec)
+{
+ while (spec->_array) {
+ spec = spec->_array;
+ }
+ return spec->type == SLANG_SPEC_SAMPLER2DRECT ||
+ spec->type == SLANG_SPEC_SAMPLER2DRECTSHADOW;
+}
+
+
+
+/**
* Called by compiler when a global variable has been parsed/compiled.
* Here we examine the variable's type to determine what kind of register
* storage will be used.
@@ -4009,10 +4286,14 @@ _slang_codegen_global_variable(slang_assemble_ctx *A, slang_variable *var,
slang_ir_storage *store = NULL;
int dbg = 0;
const GLenum datatype = _slang_gltype_from_specifier(&var->type.specifier);
- const GLint texIndex = sampler_to_texture_index(var->type.specifier.type);
const GLint size = _slang_sizeof_type_specifier(&var->type.specifier);
const GLint arrayLen = _slang_array_length(var);
const GLint totalSize = _slang_array_size(size, arrayLen);
+ GLint texIndex = sampler_to_texture_index(var->type.specifier.type);
+
+ /* check for sampler2D arrays */
+ if (texIndex == -1 && var->type.specifier._array)
+ texIndex = sampler_to_texture_index(var->type.specifier._array->type);
if (texIndex != -1) {
/* This is a texture sampler variable...
@@ -4026,15 +4307,32 @@ _slang_codegen_global_variable(slang_assemble_ctx *A, slang_variable *var,
}
#if FEATURE_es2_glsl /* XXX should use FEATURE_texture_rect */
/* disallow rect samplers */
- if (var->type.specifier.type == SLANG_SPEC_SAMPLER2DRECT ||
- var->type.specifier.type == SLANG_SPEC_SAMPLER2DRECTSHADOW) {
+ if (is_rect_sampler_spec(&var->type.specifier)) {
slang_info_log_error(A->log, "invalid sampler type for '%s'", varName);
return GL_FALSE;
}
+#else
+ (void) is_rect_sampler_spec; /* silence warning */
#endif
{
GLint sampNum = _mesa_add_sampler(prog->Parameters, varName, datatype);
- store = _slang_new_ir_storage(PROGRAM_SAMPLER, sampNum, texIndex);
+ store = _slang_new_ir_storage_sampler(sampNum, texIndex, totalSize);
+
+ /* If we have a sampler array, then we need to allocate the
+ * additional samplers to ensure we don't allocate them elsewhere.
+ * We can't directly use _mesa_add_sampler() as that checks the
+ * varName and gets a match, so we call _mesa_add_parameter()
+ * directly and use the last sampler number from the call above.
+ */
+ if (arrayLen > 0) {
+ GLint a = arrayLen - 1;
+ GLint i;
+ for (i = 0; i < a; i++) {
+ GLfloat value = (GLfloat)(i + sampNum + 1);
+ (void) _mesa_add_parameter(prog->Parameters, PROGRAM_SAMPLER,
+ varName, 1, datatype, &value, NULL, 0x0);
+ }
+ }
}
if (dbg) printf("SAMPLER ");
}
@@ -4218,7 +4516,7 @@ _slang_codegen_global_variable(slang_assemble_ctx *A, slang_variable *var,
n = _slang_gen_var_decl(A, var, var->initializer);
/* emit GPU instructions */
- success = _slang_emit_code(n, A->vartable, A->program, GL_FALSE, A->log);
+ success = _slang_emit_code(n, A->vartable, A->program, A->pragmas, GL_FALSE, A->log);
_slang_free_ir_tree(n);
}
@@ -4328,7 +4626,7 @@ _slang_codegen_function(slang_assemble_ctx * A, slang_function * fun)
#endif
/* Emit program instructions */
- success = _slang_emit_code(n, A->vartable, A->program, GL_TRUE, A->log);
+ success = _slang_emit_code(n, A->vartable, A->program, A->pragmas, GL_TRUE, A->log);
_slang_free_ir_tree(n);
/* free codegen context */