summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrian <brian.paul@tungstengraphics.com>2007-09-29 10:43:29 -0600
committerBrian <brian.paul@tungstengraphics.com>2007-09-29 10:43:29 -0600
commit78f3cd1e08d68111a2db308c000b94cc580b3cad (patch)
treede8fd7a55ec37e47512fb554988af87f7054aa7a
parent91550f0a17488f41da4829a627db90f0081250dc (diff)
Initial conditional execution support for loops and BRK instruction.
Also, instead of passing cond mask to each micro op, just apply it in the store_dest() function.
-rw-r--r--src/mesa/pipe/tgsi/exec/tgsi_exec.c188
-rw-r--r--src/mesa/pipe/tgsi/exec/tgsi_exec.h16
2 files changed, 110 insertions, 94 deletions
diff --git a/src/mesa/pipe/tgsi/exec/tgsi_exec.c b/src/mesa/pipe/tgsi/exec/tgsi_exec.c
index 0ddc3f2965..90da2ee2db 100644
--- a/src/mesa/pipe/tgsi/exec/tgsi_exec.c
+++ b/src/mesa/pipe/tgsi/exec/tgsi_exec.c
@@ -54,6 +54,12 @@
FOR_EACH_CHANNEL( CHAN )\
if (IS_CHANNEL_ENABLED2( INST, CHAN ))
+
+/** The execution mask depends on the conditional mask and the loop mask */
+#define UPDATE_EXEC_MASK(MACH) \
+ MACH->ExecMask = MACH->CondMask & MACH->LoopMask
+
+
#define CHAN_X 0
#define CHAN_Y 1
#define CHAN_Z 2
@@ -98,6 +104,8 @@ tgsi_exec_machine_init(
}
mach->CondMask = 0xf;
+ mach->LoopMask = 0xf;
+ mach->ExecMask = 0xf;
}
void
@@ -177,17 +185,12 @@ static void
micro_add(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- uint mask)
+ const union tgsi_exec_channel *src1 )
{
- if (mask & 0x1)
- dst->f[0] = src0->f[0] + src1->f[0];
- if (mask & 0x2)
- dst->f[1] = src0->f[1] + src1->f[1];
- if (mask & 0x4)
- dst->f[2] = src0->f[2] + src1->f[2];
- if (mask & 0x8)
- dst->f[3] = src0->f[3] + src1->f[3];
+ dst->f[0] = src0->f[0] + src1->f[0];
+ dst->f[1] = src0->f[1] + src1->f[1];
+ dst->f[2] = src0->f[2] + src1->f[2];
+ dst->f[3] = src0->f[3] + src1->f[3];
}
static void
@@ -313,7 +316,7 @@ micro_ieq(
static void
micro_exp2(
union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
+ const union tgsi_exec_channel *src)
{
dst->f[0] = (GLfloat) pow( 2.0, (GLdouble) src->f[0] );
dst->f[1] = (GLfloat) pow( 2.0, (GLdouble) src->f[1] );
@@ -531,17 +534,12 @@ static void
micro_mul(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- uint condMask)
+ const union tgsi_exec_channel *src1 )
{
- if (condMask & 0x1)
- dst->f[0] = src0->f[0] * src1->f[0];
- if (condMask & 0x2)
- dst->f[1] = src0->f[1] * src1->f[1];
- if (condMask & 0x4)
- dst->f[2] = src0->f[2] * src1->f[2];
- if (condMask & 0x8)
- dst->f[3] = src0->f[3] * src1->f[3];
+ dst->f[0] = src0->f[0] * src1->f[0];
+ dst->f[1] = src0->f[1] * src1->f[1];
+ dst->f[2] = src0->f[2] * src1->f[2];
+ dst->f[3] = src0->f[3] * src1->f[3];
}
static void
@@ -732,17 +730,12 @@ static void
micro_sub(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- uint mask)
+ const union tgsi_exec_channel *src1 )
{
- if (mask & 0x1)
- dst->f[0] = src0->f[0] - src1->f[0];
- if (mask & 0x2)
- dst->f[1] = src0->f[1] - src1->f[1];
- if (mask & 0x4)
- dst->f[2] = src0->f[2] - src1->f[2];
- if (mask & 0x8)
- dst->f[3] = src0->f[3] - src1->f[3];
+ dst->f[0] = src0->f[0] - src1->f[0];
+ dst->f[1] = src0->f[1] - src1->f[1];
+ dst->f[2] = src0->f[2] - src1->f[2];
+ dst->f[3] = src0->f[3] - src1->f[3];
}
static void
@@ -957,8 +950,7 @@ store_dest(
const union tgsi_exec_channel *chan,
const struct tgsi_full_dst_register *reg,
const struct tgsi_full_instruction *inst,
- GLuint chan_index,
- uint mask)
+ GLuint chan_index )
{
union tgsi_exec_channel *dst;
@@ -989,13 +981,13 @@ store_dest(
#if 0
*dst = *chan;
#else
- if (mask & 0x1)
+ if (mach->ExecMask & 0x1)
dst->i[0] = chan->i[0];
- if (mask & 0x2)
+ if (mach->ExecMask & 0x2)
dst->i[1] = chan->i[1];
- if (mask & 0x4)
+ if (mach->ExecMask & 0x4)
dst->i[2] = chan->i[2];
- if (mask & 0x8)
+ if (mach->ExecMask & 0x8)
dst->i[3] = chan->i[3];
#endif
break;
@@ -1018,10 +1010,8 @@ store_dest(
fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
#define STORE(VAL,INDEX,CHAN)\
- store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN, ~0)
+ store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
-#define STORE_MASKED(VAL,INDEX,CHAN,MASK) \
- store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN, MASK)
static void
exec_kil (struct tgsi_exec_machine *mach,
@@ -1242,7 +1232,7 @@ exec_instruction(
/* TGSI_OPCODE_SWZ */
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- STORE_MASKED( &r[0], 0, chan_index, mach->CondMask );
+ STORE( &r[0], 0, chan_index );
}
break;
@@ -1309,7 +1299,7 @@ exec_instruction(
FETCH(&r[0], 0, chan_index);
FETCH(&r[1], 1, chan_index);
- micro_mul( &r[0], &r[0], &r[1], mach->CondMask );
+ micro_mul( &r[0], &r[0], &r[1] );
STORE(&r[0], 0, chan_index);
}
@@ -1319,7 +1309,7 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_add( &r[0], &r[0], &r[1], mach->CondMask );
+ micro_add( &r[0], &r[0], &r[1] );
STORE( &r[0], 0, chan_index );
}
break;
@@ -1328,17 +1318,17 @@ exec_instruction(
/* TGSI_OPCODE_DOT3 */
FETCH( &r[0], 0, CHAN_X );
FETCH( &r[1], 1, CHAN_X );
- micro_mul( &r[0], &r[0], &r[1], mach->CondMask );
+ micro_mul( &r[0], &r[0], &r[1] );
FETCH( &r[1], 0, CHAN_Y );
FETCH( &r[2], 1, CHAN_Y );
- micro_mul( &r[1], &r[1], &r[2], mach->CondMask );
- micro_add( &r[0], &r[0], &r[1], mach->CondMask );
+ micro_mul( &r[1], &r[1], &r[2] );
+ micro_add( &r[0], &r[0], &r[1] );
FETCH( &r[1], 0, CHAN_Z );
FETCH( &r[2], 1, CHAN_Z );
- micro_mul( &r[1], &r[1], &r[2], mach->CondMask );
- micro_add( &r[0], &r[0], &r[1], mach->CondMask );
+ micro_mul( &r[1], &r[1], &r[2] );
+ micro_add( &r[0], &r[0], &r[1] );
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( &r[0], 0, chan_index );
@@ -1350,25 +1340,25 @@ exec_instruction(
FETCH(&r[0], 0, CHAN_X);
FETCH(&r[1], 1, CHAN_X);
- micro_mul( &r[0], &r[0], &r[1], mach->CondMask );
+ micro_mul( &r[0], &r[0], &r[1] );
FETCH(&r[1], 0, CHAN_Y);
FETCH(&r[2], 1, CHAN_Y);
- micro_mul( &r[1], &r[1], &r[2], mach->CondMask );
- micro_add( &r[0], &r[0], &r[1], mach->CondMask );
+ micro_mul( &r[1], &r[1], &r[2] );
+ micro_add( &r[0], &r[0], &r[1] );
FETCH(&r[1], 0, CHAN_Z);
FETCH(&r[2], 1, CHAN_Z);
- micro_mul( &r[1], &r[1], &r[2], mach->CondMask );
- micro_add( &r[0], &r[0], &r[1], mach->CondMask );
+ micro_mul( &r[1], &r[1], &r[2] );
+ micro_add( &r[0], &r[0], &r[1] );
FETCH(&r[1], 0, CHAN_W);
FETCH(&r[2], 1, CHAN_W);
- micro_mul( &r[1], &r[1], &r[2], mach->CondMask );
- micro_add( &r[0], &r[0], &r[1], mach->CondMask );
+ micro_mul( &r[1], &r[1], &r[2] );
+ micro_add( &r[0], &r[0], &r[1] );
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( &r[0], 0, chan_index );
@@ -1383,7 +1373,7 @@ exec_instruction(
if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
FETCH( &r[0], 0, CHAN_Y );
FETCH( &r[1], 1, CHAN_Y);
- micro_mul( &r[0], &r[0], &r[1], mach->CondMask );
+ micro_mul( &r[0], &r[0], &r[1] );
STORE( &r[0], 0, CHAN_Y );
}
@@ -1416,7 +1406,7 @@ exec_instruction(
micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
- STORE(&r[0], 0, chan_index);
+ STORE(&r[0], 0, chan_index );
}
break;
@@ -1445,9 +1435,9 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_mul( &r[0], &r[0], &r[1], mach->CondMask );
+ micro_mul( &r[0], &r[0], &r[1] );
FETCH( &r[1], 2, chan_index );
- micro_add( &r[0], &r[0], &r[1], mach->CondMask );
+ micro_add( &r[0], &r[0], &r[1] );
STORE( &r[0], 0, chan_index );
}
break;
@@ -1457,7 +1447,7 @@ exec_instruction(
FETCH(&r[0], 0, chan_index);
FETCH(&r[1], 1, chan_index);
- micro_sub( &r[0], &r[0], &r[1], mach->CondMask );
+ micro_sub( &r[0], &r[0], &r[1] );
STORE(&r[0], 0, chan_index);
}
@@ -1470,9 +1460,9 @@ exec_instruction(
FETCH(&r[1], 1, chan_index);
FETCH(&r[2], 2, chan_index);
- micro_sub( &r[1], &r[1], &r[2], mach->CondMask );
- micro_mul( &r[0], &r[0], &r[1], mach->CondMask );
- micro_add( &r[0], &r[0], &r[2], mach->CondMask );
+ micro_sub( &r[1], &r[1], &r[2] );
+ micro_mul( &r[0], &r[0], &r[1] );
+ micro_add( &r[0], &r[0], &r[2] );
STORE(&r[0], 0, chan_index);
}
@@ -1566,13 +1556,13 @@ exec_instruction(
FETCH(&r[0], 0, CHAN_Y);
FETCH(&r[1], 1, CHAN_Z);
- micro_mul( &r[2], &r[0], &r[1], mach->CondMask );
+ micro_mul( &r[2], &r[0], &r[1] );
FETCH(&r[3], 0, CHAN_Z);
FETCH(&r[4], 1, CHAN_Y);
- micro_mul( &r[5], &r[3], &r[4], mach->CondMask );
- micro_sub( &r[2], &r[2], &r[5], mach->CondMask );
+ micro_mul( &r[5], &r[3], &r[4] );
+ micro_sub( &r[2], &r[2], &r[5] );
if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
STORE( &r[2], 0, CHAN_X );
@@ -1580,20 +1570,20 @@ exec_instruction(
FETCH(&r[2], 1, CHAN_X);
- micro_mul( &r[3], &r[3], &r[2], mach->CondMask );
+ micro_mul( &r[3], &r[3], &r[2] );
FETCH(&r[5], 0, CHAN_X);
- micro_mul( &r[1], &r[1], &r[5], mach->CondMask );
- micro_sub( &r[3], &r[3], &r[1], mach->CondMask );
+ micro_mul( &r[1], &r[1], &r[5] );
+ micro_sub( &r[3], &r[3], &r[1] );
if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
STORE( &r[3], 0, CHAN_Y );
}
- micro_mul( &r[5], &r[5], &r[4], mach->CondMask );
- micro_mul( &r[0], &r[0], &r[2], mach->CondMask );
- micro_sub( &r[5], &r[5], &r[0], mach->CondMask );
+ micro_mul( &r[5], &r[5], &r[4] );
+ micro_mul( &r[0], &r[0], &r[2] );
+ micro_sub( &r[5], &r[5], &r[0] );
if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
STORE( &r[5], 0, CHAN_Z );
@@ -1626,23 +1616,23 @@ exec_instruction(
FETCH(&r[0], 0, CHAN_X);
FETCH(&r[1], 1, CHAN_X);
- micro_mul( &r[0], &r[0], &r[1], mach->CondMask );
+ micro_mul( &r[0], &r[0], &r[1] );
FETCH(&r[1], 0, CHAN_Y);
FETCH(&r[2], 1, CHAN_Y);
- micro_mul( &r[1], &r[1], &r[2], mach->CondMask );
- micro_add( &r[0], &r[0], &r[1], mach->CondMask );
+ micro_mul( &r[1], &r[1], &r[2] );
+ micro_add( &r[0], &r[0], &r[1] );
FETCH(&r[1], 0, CHAN_Z);
FETCH(&r[2], 1, CHAN_Z);
- micro_mul( &r[1], &r[1], &r[2], mach->CondMask );
- micro_add( &r[0], &r[0], &r[1], mach->CondMask );
+ micro_mul( &r[1], &r[1], &r[2] );
+ micro_add( &r[0], &r[0], &r[1] );
FETCH(&r[1], 1, CHAN_W);
- micro_add( &r[0], &r[0], &r[1], mach->CondMask );
+ micro_add( &r[0], &r[0], &r[1] );
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( &r[0], 0, chan_index );
@@ -1925,12 +1915,12 @@ exec_instruction(
case TGSI_OPCODE_DP2:
FETCH( &r[0], 0, CHAN_X );
FETCH( &r[1], 1, CHAN_X );
- micro_mul( &r[0], &r[0], &r[1], mach->CondMask );
+ micro_mul( &r[0], &r[0], &r[1] );
FETCH( &r[1], 0, CHAN_Y );
FETCH( &r[2], 1, CHAN_Y );
- micro_mul( &r[1], &r[1], &r[2], mach->CondMask );
- micro_add( &r[0], &r[0], &r[1], mach->CondMask );
+ micro_mul( &r[1], &r[1], &r[2] );
+ micro_add( &r[0], &r[0], &r[1] );
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( &r[0], 0, chan_index );
@@ -1942,12 +1932,15 @@ exec_instruction(
break;
case TGSI_OPCODE_BRK:
- assert (0);
+ /* turn off loop channels for each enabled exec channel */
+ mach->LoopMask &= ~mach->ExecMask;
+ UPDATE_EXEC_MASK(mach);
break;
case TGSI_OPCODE_IF:
/* push CondMask */
- mach->condStack[mach->CondStackTop++] = mach->CondMask;
+ assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
+ mach->CondStack[mach->CondStackTop++] = mach->CondMask;
FETCH( &r[0], 0, CHAN_X );
/* update CondMask */
if( ! r[0].u[0] ) {
@@ -1962,10 +1955,13 @@ exec_instruction(
if( ! r[0].u[3] ) {
mach->CondMask &= ~0x8;
}
+ UPDATE_EXEC_MASK(mach);
break;
case TGSI_OPCODE_LOOP:
- assert (0);
+ /* push LoopMask */
+ assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
break;
case TGSI_OPCODE_REP:
@@ -1977,20 +1973,25 @@ exec_instruction(
{
uint prevMask;
assert(mach->CondStackTop > 0);
- prevMask = mach->condStack[mach->CondStackTop - 1];
+ prevMask = mach->CondStack[mach->CondStackTop - 1];
mach->CondMask = ~mach->CondMask & prevMask;
+ UPDATE_EXEC_MASK(mach);
}
break;
case TGSI_OPCODE_ENDIF:
- assert(mach->CondStackTop > 0);
/* pop CondMask */
- mach->CondMask = mach->condStack[--mach->CondStackTop];
+ assert(mach->CondStackTop > 0);
+ mach->CondMask = mach->CondStack[--mach->CondStackTop];
+ UPDATE_EXEC_MASK(mach);
break;
case TGSI_OPCODE_ENDLOOP:
- assert (0);
- break;
+ /* pop LoopMask */
+ assert(mach->LoopStackTop > 0);
+ mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
+ UPDATE_EXEC_MASK(mach);
+ break;
case TGSI_OPCODE_ENDREP:
assert (0);
@@ -2109,7 +2110,9 @@ exec_instruction(
break;
case TGSI_OPCODE_BGNLOOP2:
- assert( 0 );
+ /* push LoopMask */
+ assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
break;
case TGSI_OPCODE_BGNSUB:
@@ -2117,7 +2120,10 @@ exec_instruction(
break;
case TGSI_OPCODE_ENDLOOP2:
- assert( 0 );
+ /* pop LoopMask */
+ assert(mach->LoopStackTop > 0);
+ mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
+ UPDATE_EXEC_MASK(mach);
break;
case TGSI_OPCODE_ENDSUB:
diff --git a/src/mesa/pipe/tgsi/exec/tgsi_exec.h b/src/mesa/pipe/tgsi/exec/tgsi_exec.h
index 54b38cb4b2..e67a8138e8 100644
--- a/src/mesa/pipe/tgsi/exec/tgsi_exec.h
+++ b/src/mesa/pipe/tgsi/exec/tgsi_exec.h
@@ -98,8 +98,12 @@ struct tgsi_exec_labels
#define TGSI_EXEC_NUM_ADDRS 1
#define TGSI_EXEC_MAX_COND_NESTING 10
+#define TGSI_EXEC_MAX_LOOP_NESTING 10
+/**
+ * Run-time virtual machine state for executing TGSI shader.
+ */
struct tgsi_exec_machine
{
/*
@@ -132,12 +136,18 @@ struct tgsi_exec_machine
/* FRAGMENT processor only. */
const struct tgsi_interp_coef *InterpCoefs;
- /* Conditional execution mask */
+ /* Conditional execution masks */
uint CondMask;
+ uint LoopMask;
+ uint ExecMask; /**< = CondMask & LoopMask */
- /* Condition mask stack (for nested conditionals) */
- uint condStack[TGSI_EXEC_MAX_COND_NESTING];
+ /** Condition mask stack (for nested conditionals) */
+ uint CondStack[TGSI_EXEC_MAX_COND_NESTING];
int CondStackTop;
+
+ /** Loop mask stack (for nested loops) */
+ uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING];
+ int LoopStackTop;
};