From d6772f157a56bb53b754bb16c98e102b54c82a0e Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 27 Jan 2007 20:06:41 -0700 Subject: Improved register allocation: allow four 'float' vars or temporaries to share a single register. Clean-up needed. --- src/mesa/shader/slang/slang_emit.c | 50 ++++++++---- src/mesa/shader/slang/slang_vartable.c | 140 +++++++++++++++++++++++---------- src/mesa/shader/slang/slang_vartable.h | 11 +-- 3 files changed, 137 insertions(+), 64 deletions(-) (limited to 'src/mesa/shader') diff --git a/src/mesa/shader/slang/slang_emit.c b/src/mesa/shader/slang/slang_emit.c index 82e8c0b158..c3167e36fb 100644 --- a/src/mesa/shader/slang/slang_emit.c +++ b/src/mesa/shader/slang/slang_emit.c @@ -40,7 +40,7 @@ #define PEEPHOLE_OPTIMIZATIONS 1 -#define ANNOTATE 0 +#define ANNOTATE 1 /** @@ -336,11 +336,14 @@ static void alloc_temp_storage(slang_var_table *vt, slang_ir_node *n, GLint size) { GLint indx; + GLuint swizzle; assert(!n->Var); assert(!n->Store); assert(size > 0); - indx = _slang_alloc_temp(vt, size); + indx = _slang_alloc_temp(vt, size, &swizzle); n->Store = _slang_new_ir_storage(PROGRAM_TEMPORARY, indx, size); + if (n->Store) + n->Store->Swizzle = swizzle; } @@ -352,8 +355,8 @@ static void free_temp_storage(slang_var_table *vt, slang_ir_node *n) { if (n->Store->File == PROGRAM_TEMPORARY && n->Store->Index >= 0) { - if (_slang_is_temp(vt, n->Store->Index)) { - _slang_free_temp(vt, n->Store->Index, n->Store->Size); + if (_slang_is_temp(vt, n->Store->Index, n->Store->Swizzle)) { + _slang_free_temp(vt, n->Store->Index, n->Store->Size, n->Store->Swizzle); /* XXX free(store)? */ n->Store->Index = -1; n->Store->Size = -1; @@ -381,7 +384,15 @@ storage_to_dst_reg(struct prog_dst_register *dst, const slang_ir_storage *st, assert(st->File != PROGRAM_UNDEFINED); assert(st->Size >= 1); assert(st->Size <= 4); - dst->WriteMask = defaultWritemask[st->Size - 1] & writemask; + if (st->Size == 1) { + GLuint comp = GET_SWZ(st->Swizzle, 0); + assert(comp < 4); + assert(writemask & WRITEMASK_X); + dst->WriteMask = WRITEMASK_X << comp; + } + else { + dst->WriteMask = defaultWritemask[st->Size - 1] & writemask; + } } @@ -803,13 +814,15 @@ emit_move(slang_var_table *vt, slang_ir_node *n, struct gl_program *prog) emit(vt, n->Children[0], prog); #if PEEPHOLE_OPTIMIZATIONS - if (inst && _slang_is_temp(vt, n->Children[1]->Store->Index)) { + if (inst && _slang_is_temp(vt, n->Children[1]->Store->Index, + n->Children[1]->Store->Swizzle)) { /* Peephole optimization: * Just modify the RHS to put its result into the dest of this * MOVE operation. Then, this MOVE is a no-op. */ _slang_free_temp(vt, n->Children[1]->Store->Index, - n->Children[1]->Store->Size); + n->Children[1]->Store->Size, + n->Children[1]->Store->Swizzle); *n->Children[1]->Store = *n->Children[0]->Store; /* fixup the prev (RHS) instruction */ assert(n->Children[0]->Store->Index >= 0); @@ -852,12 +865,7 @@ emit_move(slang_var_table *vt, slang_ir_node *n, struct gl_program *prog) inst->Comment = instruction_annotation(inst->Opcode, dstAnnot, srcAnnot, NULL); } - /* XXX is this test correct? */ - if (_slang_is_temp(vt, n->Children[1]->Store->Index)) { - _slang_free_temp(vt, n->Children[1]->Store->Index, - n->Children[1]->Store->Size); - } - /*inst->Comment = _mesa_strdup("IR_MOVE");*/ + free_temp_storage(vt, n->Children[1]); assert(!n->Store); n->Store = n->Children[0]->Store; /*XXX new */ return inst; @@ -883,13 +891,17 @@ emit_cond(slang_var_table *vt, slang_ir_node *n, struct gl_program *prog) /* This'll happen for things like "if (i) ..." where no code * is normally generated for the expression "i". * Generate a move instruction just to set condition codes. + * Note: must use full 4-component vector since all four + * condition codes must be set identically. */ - alloc_temp_storage(vt, n, 1); + alloc_temp_storage(vt, n, 4); inst = new_instruction(prog, OPCODE_MOV); inst->CondUpdate = GL_TRUE; storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask); storage_to_src_reg(&inst->SrcReg[0], n->Children[0]->Store); - _slang_free_temp(vt, n->Store->Index, n->Store->Size); + _slang_free_temp(vt, n->Store->Index, n->Store->Size, + n->Store->Swizzle); + inst->Comment = _mesa_strdup("COND expr"); return inst; /* XXX or null? */ } } @@ -928,12 +940,16 @@ emit(slang_var_table *vt, slang_ir_node *n, struct gl_program *prog) assert(n->Store->Index < 0); if (!n->Var || n->Var->isTemp) { /* a nameless/temporary variable, will be freed after first use */ - n->Store->Index = _slang_alloc_temp(vt, n->Store->Size); + GLuint swizzle; + n->Store->Index = _slang_alloc_temp(vt, n->Store->Size, &swizzle); + n->Store->Swizzle = swizzle; } else { /* a regular variable */ + GLuint swizzle; _slang_add_variable(vt, n->Var); - n->Store->Index = _slang_alloc_var(vt, n->Store->Size); + n->Store->Index = _slang_alloc_var(vt, n->Store->Size, &swizzle); + n->Store->Swizzle = swizzle; /* printf("IR_VAR_DECL %s %d store %p\n", (char*) n->Var->a_name, n->Store->Index, (void*) n->Store); diff --git a/src/mesa/shader/slang/slang_vartable.c b/src/mesa/shader/slang/slang_vartable.c index 6d2b4369d9..cadefda1bc 100644 --- a/src/mesa/shader/slang/slang_vartable.c +++ b/src/mesa/shader/slang/slang_vartable.c @@ -4,6 +4,7 @@ #include "slang_compile_variable.h" #include "slang_vartable.h" #include "slang_ir.h" +#include "prog_instruction.h" static int dbg = 0; @@ -23,7 +24,8 @@ struct slang_var_table_ int num_entries; slang_variable **vars; /* array [num_entries] */ - TempState temps[MAX_PROGRAM_TEMPS]; + TempState temps[MAX_PROGRAM_TEMPS * 4]; + int size[MAX_PROGRAM_TEMPS]; struct slang_var_table_ *parent; }; @@ -46,6 +48,7 @@ _slang_push_var_table(slang_var_table *parent) if (parent) { /* copy the info indicating which temp regs are in use */ memcpy(t->temps, parent->temps, sizeof(t->temps)); + memcpy(t->size, parent->size, sizeof(t->size)); } if (dbg) printf("Pushing level %d\n", t->level); } @@ -67,12 +70,22 @@ _slang_pop_var_table(slang_var_table *t) /* free the storage allocated for each variable */ for (i = 0; i < t->num_entries; i++) { slang_ir_storage *store = (slang_ir_storage *) t->vars[i]->aux; - GLint j, sz4 = (store->Size + 3) / 4; - if (dbg) printf(" Free var %s, size %d\n", - (char*) t->vars[i]->a_name, store->Size); - for (j = 0; j < sz4; j++) { - assert(t->temps[store->Index + j] == VAR); - t->temps[store->Index + j] = FREE; + GLint j; + const GLuint sz = store->Size; + GLuint comp; + if (dbg) printf(" Free var %s, size %d at %d\n", + (char*) t->vars[i]->a_name, store->Size, + store->Index); + + if (sz == 1) + comp = GET_SWZ(store->Swizzle, 0); + else + comp = 0; + + assert(store->Index >= 0); + for (j = 0; j < sz; j++) { + assert(t->temps[store->Index * 4 + j + comp] == VAR); + t->temps[store->Index * 4 + j + comp] = FREE; } store->Index = -1; } @@ -80,9 +93,9 @@ _slang_pop_var_table(slang_var_table *t) /* just verify that any remaining allocations in this scope * were for temps */ - for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + for (i = 0; i < MAX_PROGRAM_TEMPS * 4; i++) { if (t->temps[i] && !t->parent->temps[i]) { - if (dbg) printf(" Free reg %d\n", i); + if (dbg) printf(" Free reg %d\n", i/4); assert(t->temps[i] == TEMP); } } @@ -131,16 +144,22 @@ _slang_find_variable(const slang_var_table *t, slang_atom name) } +/** + * Allocation helper. + * \param size var size in floats + * \return position for var, measured in floats + */ static GLint alloc_reg(slang_var_table *t, GLint size, GLboolean isTemp) { - const GLuint sz4 = (size + 3) / 4; + /* if size == 1, allocate anywhere, else, pos must be multiple of 4 */ + const GLuint step = (size == 1) ? 1 : 4; GLuint i, j; assert(size > 0); /* number of floats */ - for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + for (i = 0; i < MAX_PROGRAM_TEMPS - size; i += step) { GLuint found = 0; - for (j = 0; j < sz4; j++) { + for (j = 0; j < size; j++) { if (i + j < MAX_PROGRAM_TEMPS && !t->temps[i + j]) { found++; } @@ -148,10 +167,14 @@ alloc_reg(slang_var_table *t, GLint size, GLboolean isTemp) break; } } - if (found == sz4) { - /* found block of size/4 free regs */ - for (j = 0; j < sz4; j++) + if (found == size) { + /* found block of size free regs */ + if (size > 1) + assert(i % 4 == 0); + for (j = 0; j < size; j++) t->temps[i + j] = isTemp ? TEMP : VAR; + printf("t->size[%d] = %d\n", i, size); + t->size[i] = size; return i; } } @@ -161,61 +184,98 @@ alloc_reg(slang_var_table *t, GLint size, GLboolean isTemp) /** * Allocate temp register(s) for storing a variable. + * \param size size needed, in floats + * \param swizzle returns swizzle mask for accessing var in register + * \return register allocated, or -1 */ GLint -_slang_alloc_var(slang_var_table *t, GLint size) +_slang_alloc_var(slang_var_table *t, GLint size, GLuint *swizzle) { int i = alloc_reg(t, size, GL_FALSE); - if (dbg) printf("Alloc var %d (level %d)\n", i, t->level); - return i; -} - - -void -_slang_reserve_var(slang_var_table *t, GLint r, GLint size) -{ - const GLint sz4 = (size + 3) / 4; - GLint i; - for (i = 0; i < sz4; i++) { - t->temps[r + i] = VAR; + if (i < 0) + return -1; + + if (size == 1) { + GLuint comp = i % 4; + *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp); + char swz = "xyzw"[comp]; + if (dbg) printf("Alloc var sz %d at %d.%c (level %d)\n", size, i/4, swz, t->level); } + else { + *swizzle = SWIZZLE_NOOP; + if (dbg) printf("Alloc var sz %d at %d.xyzw (level %d)\n", size, i/4, t->level); + } + return i / 4; } + /** * Allocate temp register(s) for storing an unnamed intermediate value. */ GLint -_slang_alloc_temp(slang_var_table *t, GLint size) +_slang_alloc_temp(slang_var_table *t, GLint size, GLuint *swizzle) { int i = alloc_reg(t, size, GL_TRUE); - if (dbg) printf("Alloc temp %d (level %d)\n", i, t->level); - return i; + if (i < 0) + return -1; + + if (size == 1) { + GLuint comp = i % 4; + assert(comp < 4); + int swz = "xyzw"[comp]; + *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp); + if (dbg) printf("Alloc temp sz %d at %d.%c (level %d)\n", + size, i/4, swz, t->level); + } + else { + *swizzle = SWIZZLE_NOOP; + if (dbg) printf("Alloc temp sz %d at %d.xyzw (level %d)\n", + size, i/4, t->level); + } + return i / 4; } void -_slang_free_temp(slang_var_table *t, GLint r, GLint size) +_slang_free_temp(slang_var_table *t, GLint r, GLint size, GLuint swizzle) { - const GLuint sz4 = (size + 3) / 4; GLuint i; assert(size > 0); assert(r >= 0); - assert(r < MAX_PROGRAM_TEMPS); - if (dbg) printf("Free temp %d (level %d)\n", r, t->level); - for (i = 0; i < sz4; i++) { - assert(t->temps[r + i] == TEMP); - t->temps[r + i] = FREE; + assert(r + size <= MAX_PROGRAM_TEMPS); + if (dbg) printf("Free temp sz %d at %d (level %d)\n", size, r, t->level); + if (size == 1) { + GLuint comp = GET_SWZ(swizzle, 0); + assert(swizzle == MAKE_SWIZZLE4(comp, comp, comp, comp)); + assert(comp < 4); + assert(t->size[r * 4 + comp] == 1); + assert(t->temps[r * 4 + comp] == TEMP); + t->temps[r * 4 + comp] = FREE; + } + else { + assert(swizzle == SWIZZLE_NOOP); + assert(t->size[r*4] == size); + for (i = 0; i < size; i++) { + assert(t->temps[r * 4 + i] == TEMP); + t->temps[r * 4 + i] = FREE; + } } } GLboolean -_slang_is_temp(slang_var_table *t, GLint r) +_slang_is_temp(slang_var_table *t, GLint r, GLuint swizzle) { assert(r >= 0); assert(r < MAX_PROGRAM_TEMPS); - if (t->temps[r] == TEMP) + GLuint comp; + if (swizzle == SWIZZLE_NOOP) + comp = 0; + else + comp = GET_SWZ(swizzle, 0); + + if (t->temps[r * 4 + comp] == TEMP) return GL_TRUE; else return GL_FALSE; diff --git a/src/mesa/shader/slang/slang_vartable.h b/src/mesa/shader/slang/slang_vartable.h index c8e37c94aa..86fa5d4a4a 100644 --- a/src/mesa/shader/slang/slang_vartable.h +++ b/src/mesa/shader/slang/slang_vartable.h @@ -20,19 +20,16 @@ extern struct slang_variable_ * _slang_find_variable(const slang_var_table *t, slang_atom name); extern GLint -_slang_alloc_var(slang_var_table *t, GLint size); - -extern void -_slang_reserve_var(slang_var_table *t, GLint r, GLint size); +_slang_alloc_var(slang_var_table *t, GLint size, GLuint *swizzle); extern GLint -_slang_alloc_temp(slang_var_table *t, GLint size); +_slang_alloc_temp(slang_var_table *t, GLint size, GLuint *swizzle); extern void -_slang_free_temp(slang_var_table *t, GLint r, GLint size); +_slang_free_temp(slang_var_table *t, GLint r, GLint size, GLuint swizzle); extern GLboolean -_slang_is_temp(slang_var_table *t, GLint r); +_slang_is_temp(slang_var_table *t, GLint r, GLuint swizzle); #endif /* SLANG_VARTABLE_H */ -- cgit v1.2.3