summaryrefslogtreecommitdiff
path: root/src/mesa/shader/slang/slang_codegen.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/shader/slang/slang_codegen.c')
-rw-r--r--src/mesa/shader/slang/slang_codegen.c449
1 files changed, 291 insertions, 158 deletions
diff --git a/src/mesa/shader/slang/slang_codegen.c b/src/mesa/shader/slang/slang_codegen.c
index d19d5a0abb..14fa38c3cd 100644
--- a/src/mesa/shader/slang/slang_codegen.c
+++ b/src/mesa/shader/slang/slang_codegen.c
@@ -707,6 +707,64 @@ _slang_find_node_type(slang_operation *oper, slang_operation_type type)
}
+/**
+ * Count the number of operations of the given time rooted at 'oper'.
+ */
+static GLuint
+_slang_count_node_type(slang_operation *oper, slang_operation_type type)
+{
+ GLuint i, count = 0;
+ if (oper->type == type) {
+ return 1;
+ }
+ for (i = 0; i < oper->num_children; i++) {
+ count += _slang_count_node_type(&oper->children[i], type);
+ }
+ return count;
+}
+
+
+/**
+ * Check if the 'return' statement found under 'oper' is a "tail return"
+ * that can be no-op'd. For example:
+ *
+ * void func(void)
+ * {
+ * .. do something ..
+ * return; // this is a no-op
+ * }
+ *
+ * This is used when determining if a function can be inlined. If the
+ * 'return' is not the last statement, we can't inline the function since
+ * we still need the semantic behaviour of the 'return' but we don't want
+ * to accidentally return from the _calling_ function. We'd need to use an
+ * unconditional branch, but we don't have such a GPU instruction (not
+ * always, at least).
+ */
+static GLboolean
+_slang_is_tail_return(const slang_operation *oper)
+{
+ GLuint k = oper->num_children;
+
+ while (k > 0) {
+ const slang_operation *last = &oper->children[k - 1];
+ if (last->type == SLANG_OPER_RETURN)
+ return GL_TRUE;
+ else if (last->type == SLANG_OPER_IDENTIFIER ||
+ last->type == SLANG_OPER_LABEL)
+ k--; /* try prev child */
+ else if (last->type == SLANG_OPER_BLOCK_NO_NEW_SCOPE ||
+ last->type == SLANG_OPER_BLOCK_NEW_SCOPE)
+ /* try sub-children */
+ return _slang_is_tail_return(last);
+ else
+ break;
+ }
+
+ return GL_FALSE;
+}
+
+
static void
slang_resolve_variable(slang_operation *oper)
{
@@ -1207,38 +1265,62 @@ _slang_gen_function_call(slang_assemble_ctx *A, slang_function *fun,
}
else {
/* non-assembly function */
+ /* We always generate an "inline-able" block of code here.
+ * We may either:
+ * 1. insert the inline code
+ * 2. Generate a call to the "inline" code as a subroutine
+ */
+
+
+ slang_operation *ret = NULL;
+
inlined = slang_inline_function_call(A, fun, oper, dest);
- if (inlined && _slang_find_node_type(inlined, SLANG_OPER_RETURN)) {
- slang_operation *callOper;
- /* The function we're calling has one or more 'return' statements.
- * So, we can't truly inline this function because we need to
- * implement 'return' with RET (and CAL).
- * Nevertheless, we performed "inlining" to make a new instance
- * of the function body to deal with static register allocation.
- *
- * XXX check if there's one 'return' and if it's the very last
- * statement in the function - we can optimize that case.
- */
- assert(inlined->type == SLANG_OPER_BLOCK_NEW_SCOPE ||
- inlined->type == SLANG_OPER_SEQUENCE);
- if (_slang_function_has_return_value(fun) && !dest) {
- assert(inlined->children[0].type == SLANG_OPER_VARIABLE_DECL);
- assert(inlined->children[2].type == SLANG_OPER_IDENTIFIER);
- callOper = &inlined->children[1];
+ if (!inlined)
+ return NULL;
+
+ ret = _slang_find_node_type(inlined, SLANG_OPER_RETURN);
+ if (ret) {
+ /* check if this is a "tail" return */
+ if (_slang_count_node_type(inlined, SLANG_OPER_RETURN) == 1 &&
+ _slang_is_tail_return(inlined)) {
+ /* The only RETURN is the last stmt in the function, no-op it
+ * and inline the function body.
+ */
+ ret->type = SLANG_OPER_NONE;
}
else {
- callOper = inlined;
+ slang_operation *callOper;
+ /* The function we're calling has one or more 'return' statements.
+ * So, we can't truly inline this function because we need to
+ * implement 'return' with RET (and CAL).
+ * Nevertheless, we performed "inlining" to make a new instance
+ * of the function body to deal with static register allocation.
+ *
+ * XXX check if there's one 'return' and if it's the very last
+ * statement in the function - we can optimize that case.
+ */
+ assert(inlined->type == SLANG_OPER_BLOCK_NEW_SCOPE ||
+ inlined->type == SLANG_OPER_SEQUENCE);
+
+ if (_slang_function_has_return_value(fun) && !dest) {
+ assert(inlined->children[0].type == SLANG_OPER_VARIABLE_DECL);
+ assert(inlined->children[2].type == SLANG_OPER_IDENTIFIER);
+ callOper = &inlined->children[1];
+ }
+ else {
+ callOper = inlined;
+ }
+ callOper->type = SLANG_OPER_NON_INLINED_CALL;
+ callOper->fun = fun;
+ callOper->label = _slang_label_new_unique((char*) fun->header.a_name);
}
- callOper->type = SLANG_OPER_NON_INLINED_CALL;
- callOper->fun = fun;
- callOper->label = _slang_label_new_unique((char*) fun->header.a_name);
}
}
if (!inlined)
return NULL;
- /* Replace the function call with the inlined block */
+ /* Replace the function call with the inlined block (or new CALL stmt) */
slang_operation_destruct(oper);
*oper = *inlined;
_slang_free(inlined);
@@ -1273,46 +1355,188 @@ slang_find_asm_info(const char *name)
}
+/**
+ * Some write-masked assignments are simple, but others are hard.
+ * Simple example:
+ * vec3 v;
+ * v.xy = vec2(a, b);
+ * Hard example:
+ * vec3 v;
+ * v.zy = vec2(a, b);
+ * this gets transformed/swizzled into:
+ * v.zy = vec2(a, b).*yx* (* = don't care)
+ * This function helps to determine simple vs. non-simple.
+ */
+static GLboolean
+_slang_simple_writemask(GLuint writemask, GLuint swizzle)
+{
+ switch (writemask) {
+ case WRITEMASK_X:
+ return GET_SWZ(swizzle, 0) == SWIZZLE_X;
+ case WRITEMASK_Y:
+ return GET_SWZ(swizzle, 1) == SWIZZLE_Y;
+ case WRITEMASK_Z:
+ return GET_SWZ(swizzle, 2) == SWIZZLE_Z;
+ case WRITEMASK_W:
+ return GET_SWZ(swizzle, 3) == SWIZZLE_W;
+ case WRITEMASK_XY:
+ return (GET_SWZ(swizzle, 0) == SWIZZLE_X)
+ && (GET_SWZ(swizzle, 1) == SWIZZLE_Y);
+ case WRITEMASK_XYZ:
+ return (GET_SWZ(swizzle, 0) == SWIZZLE_X)
+ && (GET_SWZ(swizzle, 1) == SWIZZLE_Y)
+ && (GET_SWZ(swizzle, 2) == SWIZZLE_Z);
+ case WRITEMASK_XYZW:
+ return swizzle == SWIZZLE_NOOP;
+ default:
+ return GL_FALSE;
+ }
+}
+
+
+/**
+ * Convert the given swizzle into a writemask. In some cases this
+ * is trivial, in other cases, we'll need to also swizzle the right
+ * hand side to put components in the right places.
+ * \param swizzle the incoming swizzle
+ * \param writemaskOut returns the writemask
+ * \param swizzleOut swizzle to apply to the right-hand-side
+ * \return GL_FALSE for simple writemasks, GL_TRUE for non-simple
+ */
+static GLboolean
+swizzle_to_writemask(GLuint swizzle,
+ GLuint *writemaskOut, GLuint *swizzleOut)
+{
+ GLuint mask = 0x0, newSwizzle[4];
+ GLint i, size;
+
+ /* make new dst writemask, compute size */
+ for (i = 0; i < 4; i++) {
+ const GLuint swz = GET_SWZ(swizzle, i);
+ if (swz == SWIZZLE_NIL) {
+ /* end */
+ break;
+ }
+ assert(swz >= 0 && swz <= 3);
+ mask |= (1 << swz);
+ }
+ assert(mask <= 0xf);
+ size = i; /* number of components in mask/swizzle */
+
+ *writemaskOut = mask;
+
+ /* make new src swizzle, by inversion */
+ for (i = 0; i < 4; i++) {
+ newSwizzle[i] = i; /*identity*/
+ }
+ for (i = 0; i < size; i++) {
+ const GLuint swz = GET_SWZ(swizzle, i);
+ newSwizzle[swz] = i;
+ }
+ *swizzleOut = MAKE_SWIZZLE4(newSwizzle[0],
+ newSwizzle[1],
+ newSwizzle[2],
+ newSwizzle[3]);
+
+ if (_slang_simple_writemask(mask, *swizzleOut)) {
+ if (size >= 1)
+ assert(GET_SWZ(*swizzleOut, 0) == SWIZZLE_X);
+ if (size >= 2)
+ assert(GET_SWZ(*swizzleOut, 1) == SWIZZLE_Y);
+ if (size >= 3)
+ assert(GET_SWZ(*swizzleOut, 2) == SWIZZLE_Z);
+ if (size >= 4)
+ assert(GET_SWZ(*swizzleOut, 3) == SWIZZLE_W);
+ return GL_TRUE;
+ }
+ else
+ return GL_FALSE;
+}
+
+
+/**
+ * Recursively traverse 'oper' to produce a swizzle mask in the event
+ * of any vector subscripts and swizzle suffixes.
+ * Ex: for "vec4 v", "v[2].x" resolves to v.z
+ */
static GLuint
-make_writemask(const char *field)
-{
- GLuint mask = 0x0;
- while (*field) {
- switch (*field) {
- case 'x':
- case 's':
- case 'r':
- mask |= WRITEMASK_X;
+resolve_swizzle(const slang_operation *oper)
+{
+ if (oper->type == SLANG_OPER_FIELD) {
+ /* writemask from .xyzw suffix */
+ slang_swizzle swz;
+ if (_slang_is_swizzle((char*) oper->a_id, 4, &swz)) {
+ GLuint swizzle = MAKE_SWIZZLE4(swz.swizzle[0],
+ swz.swizzle[1],
+ swz.swizzle[2],
+ swz.swizzle[3]);
+ GLuint child_swizzle = resolve_swizzle(&oper->children[0]);
+ GLuint s = _slang_swizzle_swizzle(child_swizzle, swizzle);
+ return s;
+ }
+ else
+ return SWIZZLE_XYZW;
+ }
+ else if (oper->type == SLANG_OPER_SUBSCRIPT &&
+ oper->children[1].type == SLANG_OPER_LITERAL_INT) {
+ /* writemask from [index] */
+ GLuint child_swizzle = resolve_swizzle(&oper->children[0]);
+ GLuint i = (GLuint) oper->children[1].literal[0];
+ GLuint swizzle;
+ GLuint s;
+ switch (i) {
+ case 0:
+ swizzle = SWIZZLE_XXXX;
break;
- case 'y':
- case 't':
- case 'g':
- mask |= WRITEMASK_Y;
+ case 1:
+ swizzle = SWIZZLE_YYYY;
break;
- case 'z':
- case 'p':
- case 'b':
- mask |= WRITEMASK_Z;
+ case 2:
+ swizzle = SWIZZLE_ZZZZ;
break;
- case 'w':
- case 'q':
- case 'a':
- mask |= WRITEMASK_W;
+ case 3:
+ swizzle = SWIZZLE_WWWW;
break;
default:
- _mesa_problem(NULL, "invalid writemask in make_writemask()");
- return 0;
+ swizzle = SWIZZLE_XYZW;
}
- field++;
+ s = _slang_swizzle_swizzle(child_swizzle, swizzle);
+ return s;
}
- if (mask == 0x0)
- return WRITEMASK_XYZW;
- else
- return mask;
+ else {
+ return SWIZZLE_XYZW;
+ }
+}
+
+
+/**
+ * As above, but produce a writemask.
+ */
+static GLuint
+resolve_writemask(const slang_operation *oper)
+{
+ GLuint swizzle = resolve_swizzle(oper);
+ GLuint writemask, swizzleOut;
+ swizzle_to_writemask(swizzle, &writemask, &swizzleOut);
+ return writemask;
}
/**
+ * Recursively descend through swizzle nodes to find the node's storage info.
+ */
+static slang_ir_storage *
+get_store(const slang_ir_node *n)
+{
+ if (n->Opcode == IR_SWIZZLE) {
+ return get_store(n->Children[0]);
+ }
+ return n->Store;
+}
+
+
+
+/**
* Generate IR tree for an asm instruction/operation such as:
* __asm vec4_dot __retVal.x, v1, v2;
*/
@@ -1366,19 +1590,19 @@ _slang_gen_asm(slang_assemble_ctx *A, slang_operation *oper,
slang_ir_node *n0;
dest_oper = &oper->children[0];
- while (dest_oper->type == SLANG_OPER_FIELD) {
- /* writemask */
- writemask &= make_writemask((char*) dest_oper->a_id);
- dest_oper = &dest_oper->children[0];
- }
+
+ writemask = resolve_writemask(dest_oper);
n0 = _slang_gen_operation(A, dest_oper);
- assert(n0->Var);
- assert(n0->Store);
+ if (!n0)
+ return NULL;
+
assert(!n->Store);
- n->Store = n0->Store;
+ n->Store = get_store(n0);
n->Writemask = writemask;
+ assert(n->Store->File != PROGRAM_UNDEFINED);
+
_slang_free(n0);
}
@@ -1847,6 +2071,14 @@ _slang_gen_var_decl(slang_assemble_ctx *A, slang_variable *var)
n->Store->File = PROGRAM_TEMPORARY;
n->Store->Size = _slang_sizeof_type_specifier(&n->Var->type.specifier);
+ if (var->array_len > 0) {
+ /* this is an array */
+ /* round up element size to mult of 4 */
+ GLint sz = (n->Store->Size + 3) & ~3;
+ /* mult by array size */
+ sz *= var->array_len;
+ n->Store->Size = sz;
+ }
A->program->NumTemporaries++;
assert(n->Store->Size > 0);
}
@@ -2126,105 +2358,6 @@ _slang_gen_variable(slang_assemble_ctx * A, slang_operation *oper)
}
-/**
- * Some write-masked assignments are simple, but others are hard.
- * Simple example:
- * vec3 v;
- * v.xy = vec2(a, b);
- * Hard example:
- * vec3 v;
- * v.zy = vec2(a, b);
- * this gets transformed/swizzled into:
- * v.zy = vec2(a, b).*yx* (* = don't care)
- * This function helps to determine simple vs. non-simple.
- */
-static GLboolean
-_slang_simple_writemask(GLuint writemask, GLuint swizzle)
-{
- switch (writemask) {
- case WRITEMASK_X:
- return GET_SWZ(swizzle, 0) == SWIZZLE_X;
- case WRITEMASK_Y:
- return GET_SWZ(swizzle, 1) == SWIZZLE_Y;
- case WRITEMASK_Z:
- return GET_SWZ(swizzle, 2) == SWIZZLE_Z;
- case WRITEMASK_W:
- return GET_SWZ(swizzle, 3) == SWIZZLE_W;
- case WRITEMASK_XY:
- return (GET_SWZ(swizzle, 0) == SWIZZLE_X)
- && (GET_SWZ(swizzle, 1) == SWIZZLE_Y);
- case WRITEMASK_XYZ:
- return (GET_SWZ(swizzle, 0) == SWIZZLE_X)
- && (GET_SWZ(swizzle, 1) == SWIZZLE_Y)
- && (GET_SWZ(swizzle, 2) == SWIZZLE_Z);
- case WRITEMASK_XYZW:
- return swizzle == SWIZZLE_NOOP;
- default:
- return GL_FALSE;
- }
-}
-
-
-/**
- * Convert the given swizzle into a writemask. In some cases this
- * is trivial, in other cases, we'll need to also swizzle the right
- * hand side to put components in the right places.
- * \param swizzle the incoming swizzle
- * \param writemaskOut returns the writemask
- * \param swizzleOut swizzle to apply to the right-hand-side
- * \return GL_FALSE for simple writemasks, GL_TRUE for non-simple
- */
-static GLboolean
-swizzle_to_writemask(GLuint swizzle,
- GLuint *writemaskOut, GLuint *swizzleOut)
-{
- GLuint mask = 0x0, newSwizzle[4];
- GLint i, size;
-
- /* make new dst writemask, compute size */
- for (i = 0; i < 4; i++) {
- const GLuint swz = GET_SWZ(swizzle, i);
- if (swz == SWIZZLE_NIL) {
- /* end */
- break;
- }
- assert(swz >= 0 && swz <= 3);
- mask |= (1 << swz);
- }
- assert(mask <= 0xf);
- size = i; /* number of components in mask/swizzle */
-
- *writemaskOut = mask;
-
- /* make new src swizzle, by inversion */
- for (i = 0; i < 4; i++) {
- newSwizzle[i] = i; /*identity*/
- }
- for (i = 0; i < size; i++) {
- const GLuint swz = GET_SWZ(swizzle, i);
- newSwizzle[swz] = i;
- }
- *swizzleOut = MAKE_SWIZZLE4(newSwizzle[0],
- newSwizzle[1],
- newSwizzle[2],
- newSwizzle[3]);
-
- if (_slang_simple_writemask(mask, *swizzleOut)) {
- if (size >= 1)
- assert(GET_SWZ(*swizzleOut, 0) == SWIZZLE_X);
- if (size >= 2)
- assert(GET_SWZ(*swizzleOut, 1) == SWIZZLE_Y);
- if (size >= 3)
- assert(GET_SWZ(*swizzleOut, 2) == SWIZZLE_Z);
- if (size >= 4)
- assert(GET_SWZ(*swizzleOut, 3) == SWIZZLE_W);
- return GL_TRUE;
- }
- else
- return GL_FALSE;
-}
-
-
static slang_ir_node *
_slang_gen_swizzle(slang_ir_node *child, GLuint swizzle)
{
@@ -3024,7 +3157,7 @@ _slang_codegen_function(slang_assemble_ctx * A, slang_function * fun)
if (_mesa_strcmp((char *) fun->header.a_name, "main") != 0) {
/* we only really generate code for main, all other functions get
- * inlined.
+ * inlined or codegen'd upon an actual call.
*/
#if 0
/* do some basic error checking though */