summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrian <brian@nostromo.localnet.net>2007-01-27 20:06:41 -0700
committerBrian <brian@nostromo.localnet.net>2007-01-27 20:06:41 -0700
commitd6772f157a56bb53b754bb16c98e102b54c82a0e (patch)
treed0d0c1f8b7e134d55453567072c0a7acb44c5f63
parentea8b68e0f7e7a4025ce662d36380157273ce10a3 (diff)
Improved register allocation: allow four 'float' vars or temporaries to share a single register. Clean-up needed.
-rw-r--r--src/mesa/shader/slang/slang_emit.c50
-rw-r--r--src/mesa/shader/slang/slang_vartable.c140
-rw-r--r--src/mesa/shader/slang/slang_vartable.h11
3 files changed, 137 insertions, 64 deletions
diff --git a/src/mesa/shader/slang/slang_emit.c b/src/mesa/shader/slang/slang_emit.c
index 82e8c0b158..c3167e36fb 100644
--- a/src/mesa/shader/slang/slang_emit.c
+++ b/src/mesa/shader/slang/slang_emit.c
@@ -40,7 +40,7 @@
#define PEEPHOLE_OPTIMIZATIONS 1
-#define ANNOTATE 0
+#define ANNOTATE 1
/**
@@ -336,11 +336,14 @@ static void
alloc_temp_storage(slang_var_table *vt, slang_ir_node *n, GLint size)
{
GLint indx;
+ GLuint swizzle;
assert(!n->Var);
assert(!n->Store);
assert(size > 0);
- indx = _slang_alloc_temp(vt, size);
+ indx = _slang_alloc_temp(vt, size, &swizzle);
n->Store = _slang_new_ir_storage(PROGRAM_TEMPORARY, indx, size);
+ if (n->Store)
+ n->Store->Swizzle = swizzle;
}
@@ -352,8 +355,8 @@ static void
free_temp_storage(slang_var_table *vt, slang_ir_node *n)
{
if (n->Store->File == PROGRAM_TEMPORARY && n->Store->Index >= 0) {
- if (_slang_is_temp(vt, n->Store->Index)) {
- _slang_free_temp(vt, n->Store->Index, n->Store->Size);
+ if (_slang_is_temp(vt, n->Store->Index, n->Store->Swizzle)) {
+ _slang_free_temp(vt, n->Store->Index, n->Store->Size, n->Store->Swizzle);
/* XXX free(store)? */
n->Store->Index = -1;
n->Store->Size = -1;
@@ -381,7 +384,15 @@ storage_to_dst_reg(struct prog_dst_register *dst, const slang_ir_storage *st,
assert(st->File != PROGRAM_UNDEFINED);
assert(st->Size >= 1);
assert(st->Size <= 4);
- dst->WriteMask = defaultWritemask[st->Size - 1] & writemask;
+ if (st->Size == 1) {
+ GLuint comp = GET_SWZ(st->Swizzle, 0);
+ assert(comp < 4);
+ assert(writemask & WRITEMASK_X);
+ dst->WriteMask = WRITEMASK_X << comp;
+ }
+ else {
+ dst->WriteMask = defaultWritemask[st->Size - 1] & writemask;
+ }
}
@@ -803,13 +814,15 @@ emit_move(slang_var_table *vt, slang_ir_node *n, struct gl_program *prog)
emit(vt, n->Children[0], prog);
#if PEEPHOLE_OPTIMIZATIONS
- if (inst && _slang_is_temp(vt, n->Children[1]->Store->Index)) {
+ if (inst && _slang_is_temp(vt, n->Children[1]->Store->Index,
+ n->Children[1]->Store->Swizzle)) {
/* Peephole optimization:
* Just modify the RHS to put its result into the dest of this
* MOVE operation. Then, this MOVE is a no-op.
*/
_slang_free_temp(vt, n->Children[1]->Store->Index,
- n->Children[1]->Store->Size);
+ n->Children[1]->Store->Size,
+ n->Children[1]->Store->Swizzle);
*n->Children[1]->Store = *n->Children[0]->Store;
/* fixup the prev (RHS) instruction */
assert(n->Children[0]->Store->Index >= 0);
@@ -852,12 +865,7 @@ emit_move(slang_var_table *vt, slang_ir_node *n, struct gl_program *prog)
inst->Comment = instruction_annotation(inst->Opcode, dstAnnot,
srcAnnot, NULL);
}
- /* XXX is this test correct? */
- if (_slang_is_temp(vt, n->Children[1]->Store->Index)) {
- _slang_free_temp(vt, n->Children[1]->Store->Index,
- n->Children[1]->Store->Size);
- }
- /*inst->Comment = _mesa_strdup("IR_MOVE");*/
+ free_temp_storage(vt, n->Children[1]);
assert(!n->Store);
n->Store = n->Children[0]->Store; /*XXX new */
return inst;
@@ -883,13 +891,17 @@ emit_cond(slang_var_table *vt, slang_ir_node *n, struct gl_program *prog)
/* This'll happen for things like "if (i) ..." where no code
* is normally generated for the expression "i".
* Generate a move instruction just to set condition codes.
+ * Note: must use full 4-component vector since all four
+ * condition codes must be set identically.
*/
- alloc_temp_storage(vt, n, 1);
+ alloc_temp_storage(vt, n, 4);
inst = new_instruction(prog, OPCODE_MOV);
inst->CondUpdate = GL_TRUE;
storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask);
storage_to_src_reg(&inst->SrcReg[0], n->Children[0]->Store);
- _slang_free_temp(vt, n->Store->Index, n->Store->Size);
+ _slang_free_temp(vt, n->Store->Index, n->Store->Size,
+ n->Store->Swizzle);
+ inst->Comment = _mesa_strdup("COND expr");
return inst; /* XXX or null? */
}
}
@@ -928,12 +940,16 @@ emit(slang_var_table *vt, slang_ir_node *n, struct gl_program *prog)
assert(n->Store->Index < 0);
if (!n->Var || n->Var->isTemp) {
/* a nameless/temporary variable, will be freed after first use */
- n->Store->Index = _slang_alloc_temp(vt, n->Store->Size);
+ GLuint swizzle;
+ n->Store->Index = _slang_alloc_temp(vt, n->Store->Size, &swizzle);
+ n->Store->Swizzle = swizzle;
}
else {
/* a regular variable */
+ GLuint swizzle;
_slang_add_variable(vt, n->Var);
- n->Store->Index = _slang_alloc_var(vt, n->Store->Size);
+ n->Store->Index = _slang_alloc_var(vt, n->Store->Size, &swizzle);
+ n->Store->Swizzle = swizzle;
/*
printf("IR_VAR_DECL %s %d store %p\n",
(char*) n->Var->a_name, n->Store->Index, (void*) n->Store);
diff --git a/src/mesa/shader/slang/slang_vartable.c b/src/mesa/shader/slang/slang_vartable.c
index 6d2b4369d9..cadefda1bc 100644
--- a/src/mesa/shader/slang/slang_vartable.c
+++ b/src/mesa/shader/slang/slang_vartable.c
@@ -4,6 +4,7 @@
#include "slang_compile_variable.h"
#include "slang_vartable.h"
#include "slang_ir.h"
+#include "prog_instruction.h"
static int dbg = 0;
@@ -23,7 +24,8 @@ struct slang_var_table_
int num_entries;
slang_variable **vars; /* array [num_entries] */
- TempState temps[MAX_PROGRAM_TEMPS];
+ TempState temps[MAX_PROGRAM_TEMPS * 4];
+ int size[MAX_PROGRAM_TEMPS];
struct slang_var_table_ *parent;
};
@@ -46,6 +48,7 @@ _slang_push_var_table(slang_var_table *parent)
if (parent) {
/* copy the info indicating which temp regs are in use */
memcpy(t->temps, parent->temps, sizeof(t->temps));
+ memcpy(t->size, parent->size, sizeof(t->size));
}
if (dbg) printf("Pushing level %d\n", t->level);
}
@@ -67,12 +70,22 @@ _slang_pop_var_table(slang_var_table *t)
/* free the storage allocated for each variable */
for (i = 0; i < t->num_entries; i++) {
slang_ir_storage *store = (slang_ir_storage *) t->vars[i]->aux;
- GLint j, sz4 = (store->Size + 3) / 4;
- if (dbg) printf(" Free var %s, size %d\n",
- (char*) t->vars[i]->a_name, store->Size);
- for (j = 0; j < sz4; j++) {
- assert(t->temps[store->Index + j] == VAR);
- t->temps[store->Index + j] = FREE;
+ GLint j;
+ const GLuint sz = store->Size;
+ GLuint comp;
+ if (dbg) printf(" Free var %s, size %d at %d\n",
+ (char*) t->vars[i]->a_name, store->Size,
+ store->Index);
+
+ if (sz == 1)
+ comp = GET_SWZ(store->Swizzle, 0);
+ else
+ comp = 0;
+
+ assert(store->Index >= 0);
+ for (j = 0; j < sz; j++) {
+ assert(t->temps[store->Index * 4 + j + comp] == VAR);
+ t->temps[store->Index * 4 + j + comp] = FREE;
}
store->Index = -1;
}
@@ -80,9 +93,9 @@ _slang_pop_var_table(slang_var_table *t)
/* just verify that any remaining allocations in this scope
* were for temps
*/
- for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
+ for (i = 0; i < MAX_PROGRAM_TEMPS * 4; i++) {
if (t->temps[i] && !t->parent->temps[i]) {
- if (dbg) printf(" Free reg %d\n", i);
+ if (dbg) printf(" Free reg %d\n", i/4);
assert(t->temps[i] == TEMP);
}
}
@@ -131,16 +144,22 @@ _slang_find_variable(const slang_var_table *t, slang_atom name)
}
+/**
+ * Allocation helper.
+ * \param size var size in floats
+ * \return position for var, measured in floats
+ */
static GLint
alloc_reg(slang_var_table *t, GLint size, GLboolean isTemp)
{
- const GLuint sz4 = (size + 3) / 4;
+ /* if size == 1, allocate anywhere, else, pos must be multiple of 4 */
+ const GLuint step = (size == 1) ? 1 : 4;
GLuint i, j;
assert(size > 0); /* number of floats */
- for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
+ for (i = 0; i < MAX_PROGRAM_TEMPS - size; i += step) {
GLuint found = 0;
- for (j = 0; j < sz4; j++) {
+ for (j = 0; j < size; j++) {
if (i + j < MAX_PROGRAM_TEMPS && !t->temps[i + j]) {
found++;
}
@@ -148,10 +167,14 @@ alloc_reg(slang_var_table *t, GLint size, GLboolean isTemp)
break;
}
}
- if (found == sz4) {
- /* found block of size/4 free regs */
- for (j = 0; j < sz4; j++)
+ if (found == size) {
+ /* found block of size free regs */
+ if (size > 1)
+ assert(i % 4 == 0);
+ for (j = 0; j < size; j++)
t->temps[i + j] = isTemp ? TEMP : VAR;
+ printf("t->size[%d] = %d\n", i, size);
+ t->size[i] = size;
return i;
}
}
@@ -161,61 +184,98 @@ alloc_reg(slang_var_table *t, GLint size, GLboolean isTemp)
/**
* Allocate temp register(s) for storing a variable.
+ * \param size size needed, in floats
+ * \param swizzle returns swizzle mask for accessing var in register
+ * \return register allocated, or -1
*/
GLint
-_slang_alloc_var(slang_var_table *t, GLint size)
+_slang_alloc_var(slang_var_table *t, GLint size, GLuint *swizzle)
{
int i = alloc_reg(t, size, GL_FALSE);
- if (dbg) printf("Alloc var %d (level %d)\n", i, t->level);
- return i;
-}
-
-
-void
-_slang_reserve_var(slang_var_table *t, GLint r, GLint size)
-{
- const GLint sz4 = (size + 3) / 4;
- GLint i;
- for (i = 0; i < sz4; i++) {
- t->temps[r + i] = VAR;
+ if (i < 0)
+ return -1;
+
+ if (size == 1) {
+ GLuint comp = i % 4;
+ *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp);
+ char swz = "xyzw"[comp];
+ if (dbg) printf("Alloc var sz %d at %d.%c (level %d)\n", size, i/4, swz, t->level);
}
+ else {
+ *swizzle = SWIZZLE_NOOP;
+ if (dbg) printf("Alloc var sz %d at %d.xyzw (level %d)\n", size, i/4, t->level);
+ }
+ return i / 4;
}
+
/**
* Allocate temp register(s) for storing an unnamed intermediate value.
*/
GLint
-_slang_alloc_temp(slang_var_table *t, GLint size)
+_slang_alloc_temp(slang_var_table *t, GLint size, GLuint *swizzle)
{
int i = alloc_reg(t, size, GL_TRUE);
- if (dbg) printf("Alloc temp %d (level %d)\n", i, t->level);
- return i;
+ if (i < 0)
+ return -1;
+
+ if (size == 1) {
+ GLuint comp = i % 4;
+ assert(comp < 4);
+ int swz = "xyzw"[comp];
+ *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp);
+ if (dbg) printf("Alloc temp sz %d at %d.%c (level %d)\n",
+ size, i/4, swz, t->level);
+ }
+ else {
+ *swizzle = SWIZZLE_NOOP;
+ if (dbg) printf("Alloc temp sz %d at %d.xyzw (level %d)\n",
+ size, i/4, t->level);
+ }
+ return i / 4;
}
void
-_slang_free_temp(slang_var_table *t, GLint r, GLint size)
+_slang_free_temp(slang_var_table *t, GLint r, GLint size, GLuint swizzle)
{
- const GLuint sz4 = (size + 3) / 4;
GLuint i;
assert(size > 0);
assert(r >= 0);
- assert(r < MAX_PROGRAM_TEMPS);
- if (dbg) printf("Free temp %d (level %d)\n", r, t->level);
- for (i = 0; i < sz4; i++) {
- assert(t->temps[r + i] == TEMP);
- t->temps[r + i] = FREE;
+ assert(r + size <= MAX_PROGRAM_TEMPS);
+ if (dbg) printf("Free temp sz %d at %d (level %d)\n", size, r, t->level);
+ if (size == 1) {
+ GLuint comp = GET_SWZ(swizzle, 0);
+ assert(swizzle == MAKE_SWIZZLE4(comp, comp, comp, comp));
+ assert(comp < 4);
+ assert(t->size[r * 4 + comp] == 1);
+ assert(t->temps[r * 4 + comp] == TEMP);
+ t->temps[r * 4 + comp] = FREE;
+ }
+ else {
+ assert(swizzle == SWIZZLE_NOOP);
+ assert(t->size[r*4] == size);
+ for (i = 0; i < size; i++) {
+ assert(t->temps[r * 4 + i] == TEMP);
+ t->temps[r * 4 + i] = FREE;
+ }
}
}
GLboolean
-_slang_is_temp(slang_var_table *t, GLint r)
+_slang_is_temp(slang_var_table *t, GLint r, GLuint swizzle)
{
assert(r >= 0);
assert(r < MAX_PROGRAM_TEMPS);
- if (t->temps[r] == TEMP)
+ GLuint comp;
+ if (swizzle == SWIZZLE_NOOP)
+ comp = 0;
+ else
+ comp = GET_SWZ(swizzle, 0);
+
+ if (t->temps[r * 4 + comp] == TEMP)
return GL_TRUE;
else
return GL_FALSE;
diff --git a/src/mesa/shader/slang/slang_vartable.h b/src/mesa/shader/slang/slang_vartable.h
index c8e37c94aa..86fa5d4a4a 100644
--- a/src/mesa/shader/slang/slang_vartable.h
+++ b/src/mesa/shader/slang/slang_vartable.h
@@ -20,19 +20,16 @@ extern struct slang_variable_ *
_slang_find_variable(const slang_var_table *t, slang_atom name);
extern GLint
-_slang_alloc_var(slang_var_table *t, GLint size);
-
-extern void
-_slang_reserve_var(slang_var_table *t, GLint r, GLint size);
+_slang_alloc_var(slang_var_table *t, GLint size, GLuint *swizzle);
extern GLint
-_slang_alloc_temp(slang_var_table *t, GLint size);
+_slang_alloc_temp(slang_var_table *t, GLint size, GLuint *swizzle);
extern void
-_slang_free_temp(slang_var_table *t, GLint r, GLint size);
+_slang_free_temp(slang_var_table *t, GLint r, GLint size, GLuint swizzle);
extern GLboolean
-_slang_is_temp(slang_var_table *t, GLint r);
+_slang_is_temp(slang_var_table *t, GLint r, GLuint swizzle);
#endif /* SLANG_VARTABLE_H */