summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/tgsi/tgsi_exec.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/tgsi/tgsi_exec.c')
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c159
1 files changed, 95 insertions, 64 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 259894e72e..60ffa188df 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -304,14 +304,14 @@ tgsi_exec_machine_bind_shader(
case TGSI_TOKEN_TYPE_IMMEDIATE:
{
uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
- assert( size % 4 == 0 );
- assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
+ assert( size <= 4 );
+ assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
for( i = 0; i < size; i++ ) {
- mach->Imms[mach->ImmLimit + i / 4][i % 4] =
- parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
+ mach->Imms[mach->ImmLimit][i] =
+ parse.FullToken.FullImmediate.u[i].Float;
}
- mach->ImmLimit += size / 4;
+ mach->ImmLimit += 1;
}
break;
@@ -373,13 +373,18 @@ tgsi_exec_machine_bind_shader(
}
-void
-tgsi_exec_machine_init(
- struct tgsi_exec_machine *mach )
+struct tgsi_exec_machine *
+tgsi_exec_machine_create( void )
{
+ struct tgsi_exec_machine *mach;
uint i;
- mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
+ mach = align_malloc( sizeof *mach, 16 );
+ if (!mach)
+ goto fail;
+
+ memset(mach, 0, sizeof(*mach));
+
mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
/* Setup constants. */
@@ -401,22 +406,24 @@ tgsi_exec_machine_init(
(void) print_chan;
(void) print_temp;
#endif
+
+ return mach;
+
+fail:
+ align_free(mach);
+ return NULL;
}
void
-tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
+tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
{
- if (mach->Instructions) {
+ if (mach) {
FREE(mach->Instructions);
- mach->Instructions = NULL;
- mach->NumInstructions = 0;
- }
- if (mach->Declarations) {
FREE(mach->Declarations);
- mach->Declarations = NULL;
- mach->NumDeclarations = 0;
}
+
+ align_free(mach);
}
@@ -2015,8 +2022,7 @@ exec_instruction(
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ARL:
- case TGSI_OPCODE_FLOOR:
- /* TGSI_OPCODE_FLR */
+ case TGSI_OPCODE_FLR:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
micro_flr( &r[0], &r[0] );
@@ -2299,8 +2305,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_LERP:
- /* TGSI_OPCODE_LRP */
+ case TGSI_OPCODE_LRP:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH(&r[0], 0, chan_index);
FETCH(&r[1], 1, chan_index);
@@ -2324,18 +2329,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_CND0:
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
- FETCH(&r[2], 2, chan_index);
- micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]);
- STORE(&r[0], 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_DOT2ADD:
- /* TGSI_OPCODE_DP2A */
+ case TGSI_OPCODE_DP2A:
FETCH( &r[0], 0, CHAN_X );
FETCH( &r[1], 1, CHAN_X );
micro_mul( &r[0], &r[0], &r[1] );
@@ -2353,18 +2347,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_INDEX:
- /* XXX: considered for removal */
- assert (0);
- break;
-
- case TGSI_OPCODE_NEGATE:
- /* XXX: considered for removal */
- assert (0);
- break;
-
- case TGSI_OPCODE_FRAC:
- /* TGSI_OPCODE_FRC */
+ case TGSI_OPCODE_FRC:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
micro_frc( &r[0], &r[0] );
@@ -2392,8 +2375,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_EXPBASE2:
- /* TGSI_OPCODE_EX2 */
+ case TGSI_OPCODE_EX2:
FETCH(&r[0], 0, CHAN_X);
#if FAST_MATH
@@ -2407,8 +2389,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_LOGBASE2:
- /* TGSI_OPCODE_LG2 */
+ case TGSI_OPCODE_LG2:
FETCH( &r[0], 0, CHAN_X );
micro_lg2( &r[0], &r[0] );
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
@@ -2416,8 +2397,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_POWER:
- /* TGSI_OPCODE_POW */
+ case TGSI_OPCODE_POW:
FETCH(&r[0], 0, CHAN_X);
FETCH(&r[1], 1, CHAN_X);
@@ -2428,8 +2408,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_CROSSPRODUCT:
- /* TGSI_OPCODE_XPD */
+ case TGSI_OPCODE_XPD:
FETCH(&r[0], 0, CHAN_Y);
FETCH(&r[1], 1, CHAN_Z);
@@ -2471,11 +2450,6 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_MULTIPLYMATRIX:
- /* XXX: considered for removal */
- assert (0);
- break;
-
case TGSI_OPCODE_ABS:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH(&r[0], 0, chan_index);
@@ -3119,26 +3093,81 @@ exec_instruction(
mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
break;
- case TGSI_OPCODE_LOOP:
+ case TGSI_OPCODE_BGNFOR:
+ assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ for (chan_index = 0; chan_index < 3; chan_index++) {
+ FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index );
+ }
+ STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X );
+ ++mach->LoopCounterStackTop;
/* fall-through (for now) */
- case TGSI_OPCODE_BGNLOOP2:
+ case TGSI_OPCODE_BGNLOOP:
/* push LoopMask and ContMasks */
assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
mach->ContStack[mach->ContStackTop++] = mach->ContMask;
+ assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
+ break;
+
+ case TGSI_OPCODE_ENDFOR:
+ assert(mach->LoopCounterStackTop > 0);
+ micro_sub( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
+ &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
+ &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
+ /* update LoopMask */
+ if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[0] <= 0) {
+ mach->LoopMask &= ~0x1;
+ }
+ if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[1] <= 0 ) {
+ mach->LoopMask &= ~0x2;
+ }
+ if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[2] <= 0 ) {
+ mach->LoopMask &= ~0x4;
+ }
+ if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[3] <= 0 ) {
+ mach->LoopMask &= ~0x8;
+ }
+ micro_add( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
+ &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
+ &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
+ assert(mach->LoopLabelStackTop > 0);
+ inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1];
+ STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X );
+ /* Restore ContMask, but don't pop */
+ assert(mach->ContStackTop > 0);
+ mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
+ UPDATE_EXEC_MASK(mach);
+ if (mach->ExecMask) {
+ /* repeat loop: jump to instruction just past BGNLOOP */
+ assert(mach->LoopLabelStackTop > 0);
+ *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
+ }
+ else {
+ /* exit loop: pop LoopMask */
+ assert(mach->LoopStackTop > 0);
+ mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
+ /* pop ContMask */
+ assert(mach->ContStackTop > 0);
+ mach->ContMask = mach->ContStack[--mach->ContStackTop];
+ assert(mach->LoopLabelStackTop > 0);
+ --mach->LoopLabelStackTop;
+ assert(mach->LoopCounterStackTop > 0);
+ --mach->LoopCounterStackTop;
+ }
+ UPDATE_EXEC_MASK(mach);
break;
-
+
case TGSI_OPCODE_ENDLOOP:
- /* fall-through (for now at least) */
- case TGSI_OPCODE_ENDLOOP2:
/* Restore ContMask, but don't pop */
assert(mach->ContStackTop > 0);
mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
UPDATE_EXEC_MASK(mach);
if (mach->ExecMask) {
/* repeat loop: jump to instruction just past BGNLOOP */
- *pc = inst->InstructionExtLabel.Label + 1;
+ assert(mach->LoopLabelStackTop > 0);
+ *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
}
else {
/* exit loop: pop LoopMask */
@@ -3147,6 +3176,8 @@ exec_instruction(
/* pop ContMask */
assert(mach->ContStackTop > 0);
mach->ContMask = mach->ContStack[--mach->ContStackTop];
+ assert(mach->LoopLabelStackTop > 0);
+ --mach->LoopLabelStackTop;
}
UPDATE_EXEC_MASK(mach);
break;