diff options
Diffstat (limited to 'src/mesa/shader/slang/slang_codegen.c')
-rw-r--r-- | src/mesa/shader/slang/slang_codegen.c | 388 |
1 files changed, 343 insertions, 45 deletions
diff --git a/src/mesa/shader/slang/slang_codegen.c b/src/mesa/shader/slang/slang_codegen.c index 712f5975dc..5e2ce0ce3a 100644 --- a/src/mesa/shader/slang/slang_codegen.c +++ b/src/mesa/shader/slang/slang_codegen.c @@ -57,6 +57,21 @@ #include "slang_print.h" +/** Max iterations to unroll */ +const GLuint MAX_FOR_LOOP_UNROLL_ITERATIONS = 32; + +/** Max for-loop body size (in slang operations) to unroll */ +const GLuint MAX_FOR_LOOP_UNROLL_BODY_SIZE = 50; + +/** Max for-loop body complexity to unroll. + * We'll compute complexity as the product of the number of iterations + * and the size of the body. So long-ish loops with very simple bodies + * can be unrolled, as well as short loops with larger bodies. + */ +const GLuint MAX_FOR_LOOP_UNROLL_COMPLEXITY = 256; + + + static slang_ir_node * _slang_gen_operation(slang_assemble_ctx * A, slang_operation *oper); @@ -398,7 +413,7 @@ _slang_input_index(const char *name, GLenum target, GLuint *swizzleOut) const struct input_info *inputs = (target == GL_VERTEX_PROGRAM_ARB) ? vertInputs : fragInputs; - ASSERT(MAX_TEXTURE_UNITS == 8); /* if this fails, fix vertInputs above */ + ASSERT(MAX_TEXTURE_COORD_UNITS == 8); /* if this fails, fix vertInputs above */ for (i = 0; inputs[i].Name; i++) { if (strcmp(inputs[i].Name, name) == 0) { @@ -476,6 +491,9 @@ static slang_asm_info AsmInfo[] = { { "vec4_multiply", IR_MUL, 1, 2 }, { "vec4_dot", IR_DOT4, 1, 2 }, { "vec3_dot", IR_DOT3, 1, 2 }, + { "vec2_dot", IR_DOT2, 1, 2 }, + { "vec3_nrm", IR_NRM3, 1, 1 }, + { "vec4_nrm", IR_NRM4, 1, 1 }, { "vec3_cross", IR_CROSS, 1, 2 }, { "vec4_lrp", IR_LRP, 1, 3 }, { "vec4_min", IR_MIN, 1, 2 }, @@ -498,18 +516,28 @@ static slang_asm_info AsmInfo[] = { /* float binary op */ { "float_power", IR_POW, 1, 2 }, /* texture / sampler */ - { "vec4_tex1d", IR_TEX, 1, 2 }, - { "vec4_texb1d", IR_TEXB, 1, 2 }, /* 1d w/ bias */ - { "vec4_texp1d", IR_TEXP, 1, 2 }, /* 1d w/ projection */ - { "vec4_tex2d", IR_TEX, 1, 2 }, - { "vec4_texb2d", IR_TEXB, 1, 2 }, /* 2d w/ bias */ - { "vec4_texp2d", IR_TEXP, 1, 2 }, /* 2d w/ projection */ - { "vec4_tex3d", IR_TEX, 1, 2 }, - { "vec4_texb3d", IR_TEXB, 1, 2 }, /* 3d w/ bias */ - { "vec4_texp3d", IR_TEXP, 1, 2 }, /* 3d w/ projection */ - { "vec4_texcube", IR_TEX, 1, 2 }, /* cubemap */ - { "vec4_tex_rect", IR_TEX, 1, 2 }, /* rectangle */ - { "vec4_texp_rect", IR_TEX, 1, 2 },/* rectangle w/ projection */ + { "vec4_tex_1d", IR_TEX, 1, 2 }, + { "vec4_tex_1d_bias", IR_TEXB, 1, 2 }, /* 1d w/ bias */ + { "vec4_tex_1d_proj", IR_TEXP, 1, 2 }, /* 1d w/ projection */ + { "vec4_tex_2d", IR_TEX, 1, 2 }, + { "vec4_tex_2d_bias", IR_TEXB, 1, 2 }, /* 2d w/ bias */ + { "vec4_tex_2d_proj", IR_TEXP, 1, 2 }, /* 2d w/ projection */ + { "vec4_tex_3d", IR_TEX, 1, 2 }, + { "vec4_tex_3d_bias", IR_TEXB, 1, 2 }, /* 3d w/ bias */ + { "vec4_tex_3d_proj", IR_TEXP, 1, 2 }, /* 3d w/ projection */ + { "vec4_tex_cube", IR_TEX, 1, 2 }, /* cubemap */ + { "vec4_tex_rect", IR_TEX, 1, 2 }, /* rectangle */ + { "vec4_tex_rect_bias", IR_TEX, 1, 2 }, /* rectangle w/ projection */ + + /* texture / sampler but with shadow comparison */ + { "vec4_tex_1d_shadow", IR_TEX_SH, 1, 2 }, + { "vec4_tex_1d_bias_shadow", IR_TEXB_SH, 1, 2 }, + { "vec4_tex_1d_proj_shadow", IR_TEXP_SH, 1, 2 }, + { "vec4_tex_2d_shadow", IR_TEX_SH, 1, 2 }, + { "vec4_tex_2d_bias_shadow", IR_TEXB_SH, 1, 2 }, + { "vec4_tex_2d_proj_shadow", IR_TEXP_SH, 1, 2 }, + { "vec4_tex_rect_shadow", IR_TEX_SH, 1, 2 }, + { "vec4_tex_rect_proj_shadow", IR_TEXP_SH, 1, 2 }, /* unary op */ { "ivec4_to_vec4", IR_I_TO_F, 1, 1 }, /* int[4] to float[4] */ @@ -1574,6 +1602,7 @@ swizzle_to_writemask(slang_assemble_ctx *A, GLuint swizzle, } +#if 0 /* not used, but don't remove just yet */ /** * Recursively traverse 'oper' to produce a swizzle mask in the event * of any vector subscripts and swizzle suffixes. @@ -1627,8 +1656,10 @@ resolve_swizzle(const slang_operation *oper) return SWIZZLE_XYZW; } } +#endif +#if 0 /** * Recursively descend through swizzle nodes to find the node's storage info. */ @@ -1640,7 +1671,7 @@ get_store(const slang_ir_node *n) } return n->Store; } - +#endif /** @@ -1713,6 +1744,7 @@ _slang_gen_asm(slang_assemble_ctx *A, slang_operation *oper, } +#if 0 static void print_funcs(struct slang_function_scope_ *scope, const char *name) { @@ -1726,6 +1758,7 @@ print_funcs(struct slang_function_scope_ *scope, const char *name) if (scope->outer_scope) print_funcs(scope->outer_scope, name); } +#endif /** @@ -2443,40 +2476,259 @@ _slang_gen_do(slang_assemble_ctx * A, const slang_operation *oper) /** - * Generate for-loop using high-level IR_LOOP instruction. + * Recursively count the number of operations rooted at 'oper'. + * This gives some kind of indication of the size/complexity of an operation. + */ +static GLuint +sizeof_operation(const slang_operation *oper) +{ + if (oper) { + GLuint count = 1; /* me */ + GLuint i; + for (i = 0; i < oper->num_children; i++) { + count += sizeof_operation(&oper->children[i]); + } + return count; + } + else { + return 0; + } +} + + +/** + * Determine if a for-loop can be unrolled. + * At this time, only a rather narrow class of for loops can be unrolled. + * See code for details. + * When a loop can't be unrolled because it's too large we'll emit a + * message to the log. + */ +static GLboolean +_slang_can_unroll_for_loop(slang_assemble_ctx * A, const slang_operation *oper) +{ + GLuint bodySize; + GLint start, end; + const char *varName; + slang_atom varId; + + assert(oper->type == SLANG_OPER_FOR); + assert(oper->num_children == 4); + + /* children[0] must be either "int i=constant" or "i=constant" */ + if (oper->children[0].type == SLANG_OPER_BLOCK_NO_NEW_SCOPE) { + slang_variable *var; + + if (oper->children[0].children[0].type != SLANG_OPER_VARIABLE_DECL) + return GL_FALSE; + + varId = oper->children[0].children[0].a_id; + + var = _slang_variable_locate(oper->children[0].children[0].locals, + varId, GL_TRUE); + if (!var) + return GL_FALSE; + if (!var->initializer) + return GL_FALSE; + if (var->initializer->type != SLANG_OPER_LITERAL_INT) + return GL_FALSE; + start = (GLint) var->initializer->literal[0]; + } + else if (oper->children[0].type == SLANG_OPER_EXPRESSION) { + if (oper->children[0].children[0].type != SLANG_OPER_ASSIGN) + return GL_FALSE; + if (oper->children[0].children[0].children[0].type != SLANG_OPER_IDENTIFIER) + return GL_FALSE; + if (oper->children[0].children[0].children[1].type != SLANG_OPER_LITERAL_INT) + return GL_FALSE; + + varId = oper->children[0].children[0].children[0].a_id; + + start = (GLint) oper->children[0].children[0].children[1].literal[0]; + } + else { + return GL_FALSE; + } + + /* children[1] must be "i<constant" */ + if (oper->children[1].type != SLANG_OPER_EXPRESSION) + return GL_FALSE; + if (oper->children[1].children[0].type != SLANG_OPER_LESS) + return GL_FALSE; + if (oper->children[1].children[0].children[0].type != SLANG_OPER_IDENTIFIER) + return GL_FALSE; + if (oper->children[1].children[0].children[1].type != SLANG_OPER_LITERAL_INT) + return GL_FALSE; + + end = (GLint) oper->children[1].children[0].children[1].literal[0]; + + /* children[2] must be "i++" or "++i" */ + if (oper->children[2].type != SLANG_OPER_POSTINCREMENT && + oper->children[2].type != SLANG_OPER_PREINCREMENT) + return GL_FALSE; + if (oper->children[2].children[0].type != SLANG_OPER_IDENTIFIER) + return GL_FALSE; + + /* make sure the same variable name is used in all places */ + if ((oper->children[1].children[0].children[0].a_id != varId) || + (oper->children[2].children[0].a_id != varId)) + return GL_FALSE; + + varName = (const char *) varId; + + /* children[3], the loop body, can't be too large */ + bodySize = sizeof_operation(&oper->children[3]); + if (bodySize > MAX_FOR_LOOP_UNROLL_BODY_SIZE) { + slang_info_log_print(A->log, + "Note: 'for (%s ... )' body is too large/complex" + " to unroll", + varName); + return GL_FALSE; + } + + if (start >= end) + return GL_FALSE; /* degenerate case */ + + if (end - start > MAX_FOR_LOOP_UNROLL_ITERATIONS) { + slang_info_log_print(A->log, + "Note: 'for (%s=%d; %s<%d; ++%s)' is too" + " many iterations to unroll", + varName, start, varName, end, varName); + return GL_FALSE; + } + + if ((end - start) * bodySize > MAX_FOR_LOOP_UNROLL_COMPLEXITY) { + slang_info_log_print(A->log, + "Note: 'for (%s=%d; %s<%d; ++%s)' will generate" + " too much code to unroll", + varName, start, varName, end, varName); + return GL_FALSE; + } + + return GL_TRUE; /* we can unroll the loop */ +} + + +/** + * Unroll a for-loop. + * First we determine the number of iterations to unroll. + * Then for each iteration: + * make a copy of the loop body + * replace instances of the loop variable with the current iteration value + * generate IR code for the body + * \return pointer to generated IR code or NULL if error, out of memory, etc. + */ +static slang_ir_node * +_slang_unroll_for_loop(slang_assemble_ctx * A, const slang_operation *oper) +{ + GLint start, end, iter; + slang_ir_node *n, *root = NULL; + slang_atom varId; + + if (oper->children[0].type == SLANG_OPER_BLOCK_NO_NEW_SCOPE) { + /* for (int i=0; ... */ + slang_variable *var; + + varId = oper->children[0].children[0].a_id; + var = _slang_variable_locate(oper->children[0].children[0].locals, + varId, GL_TRUE); + start = (GLint) var->initializer->literal[0]; + } + else { + /* for (i=0; ... */ + varId = oper->children[0].children[0].children[0].a_id; + start = (GLint) oper->children[0].children[0].children[1].literal[0]; + } + + end = (GLint) oper->children[1].children[0].children[1].literal[0]; + + for (iter = start; iter < end; iter++) { + slang_operation *body; + + /* make a copy of the loop body */ + body = slang_operation_new(1); + if (!body) + return NULL; + + if (!slang_operation_copy(body, &oper->children[3])) + return NULL; + + /* in body, replace instances of 'varId' with literal 'iter' */ + { + slang_variable *oldVar; + slang_operation *newOper; + + oldVar = _slang_variable_locate(oper->locals, varId, GL_TRUE); + if (!oldVar) { + /* undeclared loop variable */ + slang_operation_delete(body); + return NULL; + } + + newOper = slang_operation_new(1); + newOper->type = SLANG_OPER_LITERAL_INT; + newOper->literal_size = 1; + newOper->literal[0] = iter; + + /* replace instances of the loop variable with newOper */ + slang_substitute(A, body, 1, &oldVar, &newOper, GL_FALSE); + } + + /* do IR codegen for body */ + n = _slang_gen_operation(A, body); + root = new_seq(root, n); + + slang_operation_delete(body); + } + + return root; +} + + +/** + * Generate IR for a for-loop. Unrolling will be done when possible. */ static slang_ir_node * _slang_gen_for(slang_assemble_ctx * A, const slang_operation *oper) { - /* - * init code (child[0]) - * LOOP: - * BREAK if !expr (child[1]) - * body code (child[3]) - * tail code: - * incr code (child[2]) // XXX continue here - */ - slang_ir_node *prevLoop, *loop, *cond, *breakIf, *body, *init, *incr; + GLboolean unroll = _slang_can_unroll_for_loop(A, oper); - init = _slang_gen_operation(A, &oper->children[0]); - loop = new_loop(NULL); + if (unroll) { + slang_ir_node *code = _slang_unroll_for_loop(A, oper); + if (code) + return code; + } - /* save old, push new loop */ - prevLoop = A->CurLoop; - A->CurLoop = loop; + /* conventional for-loop code generation */ + { + /* + * init code (child[0]) + * LOOP: + * BREAK if !expr (child[1]) + * body code (child[3]) + * tail code: + * incr code (child[2]) // XXX continue here + */ + slang_ir_node *prevLoop, *loop, *cond, *breakIf, *body, *init, *incr; + init = _slang_gen_operation(A, &oper->children[0]); + loop = new_loop(NULL); - cond = new_cond(new_not(_slang_gen_operation(A, &oper->children[1]))); - breakIf = new_break_if_true(A->CurLoop, cond); - body = _slang_gen_operation(A, &oper->children[3]); - incr = _slang_gen_operation(A, &oper->children[2]); + /* save old, push new loop */ + prevLoop = A->CurLoop; + A->CurLoop = loop; - loop->Children[0] = new_seq(breakIf, body); - loop->Children[1] = incr; /* tail code */ + cond = new_cond(new_not(_slang_gen_operation(A, &oper->children[1]))); + breakIf = new_break_if_true(A->CurLoop, cond); + body = _slang_gen_operation(A, &oper->children[3]); + incr = _slang_gen_operation(A, &oper->children[2]); - /* pop loop, restore prev */ - A->CurLoop = prevLoop; + loop->Children[0] = new_seq(breakIf, body); + loop->Children[1] = incr; /* tail code */ + + /* pop loop, restore prev */ + A->CurLoop = prevLoop; - return new_seq(init, loop); + return new_seq(init, loop); + } } @@ -3093,6 +3345,7 @@ _slang_gen_return(slang_assemble_ctx * A, slang_operation *oper) } +#if 0 /** * Determine if the given operation/expression is const-valued. */ @@ -3116,6 +3369,7 @@ _slang_is_constant_expr(const slang_operation *oper) return GL_TRUE; } } +#endif /** @@ -3418,7 +3672,7 @@ _slang_gen_assignment(slang_assemble_ctx * A, slang_operation *oper) if (lhs && rhs) { /* convert lhs swizzle into writemask */ const GLuint swizzle = root_swizzle(lhs->Store); - GLuint writemask, newSwizzle; + GLuint writemask, newSwizzle = 0x0; if (!swizzle_to_writemask(A, swizzle, &writemask, &newSwizzle)) { /* Non-simple writemask, need to swizzle right hand side in * order to put components into the right place. @@ -3560,8 +3814,16 @@ _slang_gen_array_element(slang_assemble_ctx * A, slang_operation *oper) index = (GLint) oper->children[1].literal[0]; if (oper->children[1].type != SLANG_OPER_LITERAL_INT || index >= (GLint) max) { +#if 0 slang_info_log_error(A->log, "Invalid array index for vector type"); + printf("type = %d\n", oper->children[1].type); + printf("index = %d, max = %d\n", index, max); + printf("array = %s\n", (char*)oper->children[0].a_id); + printf("index = %s\n", (char*)oper->children[1].a_id); return NULL; +#else + index = 0; +#endif } n = _slang_gen_operation(A, &oper->children[0]); @@ -3987,6 +4249,21 @@ _slang_gen_operation(slang_assemble_ctx * A, slang_operation *oper) /** + * Check if the given type specifier is a rectangular texture sampler. + */ +static GLboolean +is_rect_sampler_spec(const slang_type_specifier *spec) +{ + while (spec->_array) { + spec = spec->_array; + } + return spec->type == SLANG_SPEC_SAMPLER2DRECT || + spec->type == SLANG_SPEC_SAMPLER2DRECTSHADOW; +} + + + +/** * Called by compiler when a global variable has been parsed/compiled. * Here we examine the variable's type to determine what kind of register * storage will be used. @@ -4009,10 +4286,14 @@ _slang_codegen_global_variable(slang_assemble_ctx *A, slang_variable *var, slang_ir_storage *store = NULL; int dbg = 0; const GLenum datatype = _slang_gltype_from_specifier(&var->type.specifier); - const GLint texIndex = sampler_to_texture_index(var->type.specifier.type); const GLint size = _slang_sizeof_type_specifier(&var->type.specifier); const GLint arrayLen = _slang_array_length(var); const GLint totalSize = _slang_array_size(size, arrayLen); + GLint texIndex = sampler_to_texture_index(var->type.specifier.type); + + /* check for sampler2D arrays */ + if (texIndex == -1 && var->type.specifier._array) + texIndex = sampler_to_texture_index(var->type.specifier._array->type); if (texIndex != -1) { /* This is a texture sampler variable... @@ -4026,15 +4307,32 @@ _slang_codegen_global_variable(slang_assemble_ctx *A, slang_variable *var, } #if FEATURE_es2_glsl /* XXX should use FEATURE_texture_rect */ /* disallow rect samplers */ - if (var->type.specifier.type == SLANG_SPEC_SAMPLER2DRECT || - var->type.specifier.type == SLANG_SPEC_SAMPLER2DRECTSHADOW) { + if (is_rect_sampler_spec(&var->type.specifier)) { slang_info_log_error(A->log, "invalid sampler type for '%s'", varName); return GL_FALSE; } +#else + (void) is_rect_sampler_spec; /* silence warning */ #endif { GLint sampNum = _mesa_add_sampler(prog->Parameters, varName, datatype); - store = _slang_new_ir_storage(PROGRAM_SAMPLER, sampNum, texIndex); + store = _slang_new_ir_storage_sampler(sampNum, texIndex, totalSize); + + /* If we have a sampler array, then we need to allocate the + * additional samplers to ensure we don't allocate them elsewhere. + * We can't directly use _mesa_add_sampler() as that checks the + * varName and gets a match, so we call _mesa_add_parameter() + * directly and use the last sampler number from the call above. + */ + if (arrayLen > 0) { + GLint a = arrayLen - 1; + GLint i; + for (i = 0; i < a; i++) { + GLfloat value = (GLfloat)(i + sampNum + 1); + (void) _mesa_add_parameter(prog->Parameters, PROGRAM_SAMPLER, + varName, 1, datatype, &value, NULL, 0x0); + } + } } if (dbg) printf("SAMPLER "); } @@ -4218,7 +4516,7 @@ _slang_codegen_global_variable(slang_assemble_ctx *A, slang_variable *var, n = _slang_gen_var_decl(A, var, var->initializer); /* emit GPU instructions */ - success = _slang_emit_code(n, A->vartable, A->program, GL_FALSE, A->log); + success = _slang_emit_code(n, A->vartable, A->program, A->pragmas, GL_FALSE, A->log); _slang_free_ir_tree(n); } @@ -4328,7 +4626,7 @@ _slang_codegen_function(slang_assemble_ctx * A, slang_function * fun) #endif /* Emit program instructions */ - success = _slang_emit_code(n, A->vartable, A->program, GL_TRUE, A->log); + success = _slang_emit_code(n, A->vartable, A->program, A->pragmas, GL_TRUE, A->log); _slang_free_ir_tree(n); /* free codegen context */ |