From 78f3cd1e08d68111a2db308c000b94cc580b3cad Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 29 Sep 2007 10:43:29 -0600 Subject: Initial conditional execution support for loops and BRK instruction. Also, instead of passing cond mask to each micro op, just apply it in the store_dest() function. --- src/mesa/pipe/tgsi/exec/tgsi_exec.c | 188 +++++++++++++++++++----------------- src/mesa/pipe/tgsi/exec/tgsi_exec.h | 16 ++- 2 files changed, 110 insertions(+), 94 deletions(-) (limited to 'src') diff --git a/src/mesa/pipe/tgsi/exec/tgsi_exec.c b/src/mesa/pipe/tgsi/exec/tgsi_exec.c index 0ddc3f2965..90da2ee2db 100644 --- a/src/mesa/pipe/tgsi/exec/tgsi_exec.c +++ b/src/mesa/pipe/tgsi/exec/tgsi_exec.c @@ -54,6 +54,12 @@ FOR_EACH_CHANNEL( CHAN )\ if (IS_CHANNEL_ENABLED2( INST, CHAN )) + +/** The execution mask depends on the conditional mask and the loop mask */ +#define UPDATE_EXEC_MASK(MACH) \ + MACH->ExecMask = MACH->CondMask & MACH->LoopMask + + #define CHAN_X 0 #define CHAN_Y 1 #define CHAN_Z 2 @@ -98,6 +104,8 @@ tgsi_exec_machine_init( } mach->CondMask = 0xf; + mach->LoopMask = 0xf; + mach->ExecMask = 0xf; } void @@ -177,17 +185,12 @@ static void micro_add( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - uint mask) + const union tgsi_exec_channel *src1 ) { - if (mask & 0x1) - dst->f[0] = src0->f[0] + src1->f[0]; - if (mask & 0x2) - dst->f[1] = src0->f[1] + src1->f[1]; - if (mask & 0x4) - dst->f[2] = src0->f[2] + src1->f[2]; - if (mask & 0x8) - dst->f[3] = src0->f[3] + src1->f[3]; + dst->f[0] = src0->f[0] + src1->f[0]; + dst->f[1] = src0->f[1] + src1->f[1]; + dst->f[2] = src0->f[2] + src1->f[2]; + dst->f[3] = src0->f[3] + src1->f[3]; } static void @@ -313,7 +316,7 @@ micro_ieq( static void micro_exp2( union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) + const union tgsi_exec_channel *src) { dst->f[0] = (GLfloat) pow( 2.0, (GLdouble) src->f[0] ); dst->f[1] = (GLfloat) pow( 2.0, (GLdouble) src->f[1] ); @@ -531,17 +534,12 @@ static void micro_mul( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - uint condMask) + const union tgsi_exec_channel *src1 ) { - if (condMask & 0x1) - dst->f[0] = src0->f[0] * src1->f[0]; - if (condMask & 0x2) - dst->f[1] = src0->f[1] * src1->f[1]; - if (condMask & 0x4) - dst->f[2] = src0->f[2] * src1->f[2]; - if (condMask & 0x8) - dst->f[3] = src0->f[3] * src1->f[3]; + dst->f[0] = src0->f[0] * src1->f[0]; + dst->f[1] = src0->f[1] * src1->f[1]; + dst->f[2] = src0->f[2] * src1->f[2]; + dst->f[3] = src0->f[3] * src1->f[3]; } static void @@ -732,17 +730,12 @@ static void micro_sub( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - uint mask) + const union tgsi_exec_channel *src1 ) { - if (mask & 0x1) - dst->f[0] = src0->f[0] - src1->f[0]; - if (mask & 0x2) - dst->f[1] = src0->f[1] - src1->f[1]; - if (mask & 0x4) - dst->f[2] = src0->f[2] - src1->f[2]; - if (mask & 0x8) - dst->f[3] = src0->f[3] - src1->f[3]; + dst->f[0] = src0->f[0] - src1->f[0]; + dst->f[1] = src0->f[1] - src1->f[1]; + dst->f[2] = src0->f[2] - src1->f[2]; + dst->f[3] = src0->f[3] - src1->f[3]; } static void @@ -957,8 +950,7 @@ store_dest( const union tgsi_exec_channel *chan, const struct tgsi_full_dst_register *reg, const struct tgsi_full_instruction *inst, - GLuint chan_index, - uint mask) + GLuint chan_index ) { union tgsi_exec_channel *dst; @@ -989,13 +981,13 @@ store_dest( #if 0 *dst = *chan; #else - if (mask & 0x1) + if (mach->ExecMask & 0x1) dst->i[0] = chan->i[0]; - if (mask & 0x2) + if (mach->ExecMask & 0x2) dst->i[1] = chan->i[1]; - if (mask & 0x4) + if (mach->ExecMask & 0x4) dst->i[2] = chan->i[2]; - if (mask & 0x8) + if (mach->ExecMask & 0x8) dst->i[3] = chan->i[3]; #endif break; @@ -1018,10 +1010,8 @@ store_dest( fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) #define STORE(VAL,INDEX,CHAN)\ - store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN, ~0) + store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) -#define STORE_MASKED(VAL,INDEX,CHAN,MASK) \ - store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN, MASK) static void exec_kil (struct tgsi_exec_machine *mach, @@ -1242,7 +1232,7 @@ exec_instruction( /* TGSI_OPCODE_SWZ */ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - STORE_MASKED( &r[0], 0, chan_index, mach->CondMask ); + STORE( &r[0], 0, chan_index ); } break; @@ -1309,7 +1299,7 @@ exec_instruction( FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); - micro_mul( &r[0], &r[0], &r[1], mach->CondMask ); + micro_mul( &r[0], &r[0], &r[1] ); STORE(&r[0], 0, chan_index); } @@ -1319,7 +1309,7 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_add( &r[0], &r[0], &r[1], mach->CondMask ); + micro_add( &r[0], &r[0], &r[1] ); STORE( &r[0], 0, chan_index ); } break; @@ -1328,17 +1318,17 @@ exec_instruction( /* TGSI_OPCODE_DOT3 */ FETCH( &r[0], 0, CHAN_X ); FETCH( &r[1], 1, CHAN_X ); - micro_mul( &r[0], &r[0], &r[1], mach->CondMask ); + micro_mul( &r[0], &r[0], &r[1] ); FETCH( &r[1], 0, CHAN_Y ); FETCH( &r[2], 1, CHAN_Y ); - micro_mul( &r[1], &r[1], &r[2], mach->CondMask ); - micro_add( &r[0], &r[0], &r[1], mach->CondMask ); + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); FETCH( &r[1], 0, CHAN_Z ); FETCH( &r[2], 1, CHAN_Z ); - micro_mul( &r[1], &r[1], &r[2], mach->CondMask ); - micro_add( &r[0], &r[0], &r[1], mach->CondMask ); + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { STORE( &r[0], 0, chan_index ); @@ -1350,25 +1340,25 @@ exec_instruction( FETCH(&r[0], 0, CHAN_X); FETCH(&r[1], 1, CHAN_X); - micro_mul( &r[0], &r[0], &r[1], mach->CondMask ); + micro_mul( &r[0], &r[0], &r[1] ); FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 1, CHAN_Y); - micro_mul( &r[1], &r[1], &r[2], mach->CondMask ); - micro_add( &r[0], &r[0], &r[1], mach->CondMask ); + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); FETCH(&r[1], 0, CHAN_Z); FETCH(&r[2], 1, CHAN_Z); - micro_mul( &r[1], &r[1], &r[2], mach->CondMask ); - micro_add( &r[0], &r[0], &r[1], mach->CondMask ); + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); FETCH(&r[1], 0, CHAN_W); FETCH(&r[2], 1, CHAN_W); - micro_mul( &r[1], &r[1], &r[2], mach->CondMask ); - micro_add( &r[0], &r[0], &r[1], mach->CondMask ); + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { STORE( &r[0], 0, chan_index ); @@ -1383,7 +1373,7 @@ exec_instruction( if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { FETCH( &r[0], 0, CHAN_Y ); FETCH( &r[1], 1, CHAN_Y); - micro_mul( &r[0], &r[0], &r[1], mach->CondMask ); + micro_mul( &r[0], &r[0], &r[1] ); STORE( &r[0], 0, CHAN_Y ); } @@ -1416,7 +1406,7 @@ exec_instruction( micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); - STORE(&r[0], 0, chan_index); + STORE(&r[0], 0, chan_index ); } break; @@ -1445,9 +1435,9 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_mul( &r[0], &r[0], &r[1], mach->CondMask ); + micro_mul( &r[0], &r[0], &r[1] ); FETCH( &r[1], 2, chan_index ); - micro_add( &r[0], &r[0], &r[1], mach->CondMask ); + micro_add( &r[0], &r[0], &r[1] ); STORE( &r[0], 0, chan_index ); } break; @@ -1457,7 +1447,7 @@ exec_instruction( FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); - micro_sub( &r[0], &r[0], &r[1], mach->CondMask ); + micro_sub( &r[0], &r[0], &r[1] ); STORE(&r[0], 0, chan_index); } @@ -1470,9 +1460,9 @@ exec_instruction( FETCH(&r[1], 1, chan_index); FETCH(&r[2], 2, chan_index); - micro_sub( &r[1], &r[1], &r[2], mach->CondMask ); - micro_mul( &r[0], &r[0], &r[1], mach->CondMask ); - micro_add( &r[0], &r[0], &r[2], mach->CondMask ); + micro_sub( &r[1], &r[1], &r[2] ); + micro_mul( &r[0], &r[0], &r[1] ); + micro_add( &r[0], &r[0], &r[2] ); STORE(&r[0], 0, chan_index); } @@ -1566,13 +1556,13 @@ exec_instruction( FETCH(&r[0], 0, CHAN_Y); FETCH(&r[1], 1, CHAN_Z); - micro_mul( &r[2], &r[0], &r[1], mach->CondMask ); + micro_mul( &r[2], &r[0], &r[1] ); FETCH(&r[3], 0, CHAN_Z); FETCH(&r[4], 1, CHAN_Y); - micro_mul( &r[5], &r[3], &r[4], mach->CondMask ); - micro_sub( &r[2], &r[2], &r[5], mach->CondMask ); + micro_mul( &r[5], &r[3], &r[4] ); + micro_sub( &r[2], &r[2], &r[5] ); if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { STORE( &r[2], 0, CHAN_X ); @@ -1580,20 +1570,20 @@ exec_instruction( FETCH(&r[2], 1, CHAN_X); - micro_mul( &r[3], &r[3], &r[2], mach->CondMask ); + micro_mul( &r[3], &r[3], &r[2] ); FETCH(&r[5], 0, CHAN_X); - micro_mul( &r[1], &r[1], &r[5], mach->CondMask ); - micro_sub( &r[3], &r[3], &r[1], mach->CondMask ); + micro_mul( &r[1], &r[1], &r[5] ); + micro_sub( &r[3], &r[3], &r[1] ); if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { STORE( &r[3], 0, CHAN_Y ); } - micro_mul( &r[5], &r[5], &r[4], mach->CondMask ); - micro_mul( &r[0], &r[0], &r[2], mach->CondMask ); - micro_sub( &r[5], &r[5], &r[0], mach->CondMask ); + micro_mul( &r[5], &r[5], &r[4] ); + micro_mul( &r[0], &r[0], &r[2] ); + micro_sub( &r[5], &r[5], &r[0] ); if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { STORE( &r[5], 0, CHAN_Z ); @@ -1626,23 +1616,23 @@ exec_instruction( FETCH(&r[0], 0, CHAN_X); FETCH(&r[1], 1, CHAN_X); - micro_mul( &r[0], &r[0], &r[1], mach->CondMask ); + micro_mul( &r[0], &r[0], &r[1] ); FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 1, CHAN_Y); - micro_mul( &r[1], &r[1], &r[2], mach->CondMask ); - micro_add( &r[0], &r[0], &r[1], mach->CondMask ); + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); FETCH(&r[1], 0, CHAN_Z); FETCH(&r[2], 1, CHAN_Z); - micro_mul( &r[1], &r[1], &r[2], mach->CondMask ); - micro_add( &r[0], &r[0], &r[1], mach->CondMask ); + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); FETCH(&r[1], 1, CHAN_W); - micro_add( &r[0], &r[0], &r[1], mach->CondMask ); + micro_add( &r[0], &r[0], &r[1] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { STORE( &r[0], 0, chan_index ); @@ -1925,12 +1915,12 @@ exec_instruction( case TGSI_OPCODE_DP2: FETCH( &r[0], 0, CHAN_X ); FETCH( &r[1], 1, CHAN_X ); - micro_mul( &r[0], &r[0], &r[1], mach->CondMask ); + micro_mul( &r[0], &r[0], &r[1] ); FETCH( &r[1], 0, CHAN_Y ); FETCH( &r[2], 1, CHAN_Y ); - micro_mul( &r[1], &r[1], &r[2], mach->CondMask ); - micro_add( &r[0], &r[0], &r[1], mach->CondMask ); + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { STORE( &r[0], 0, chan_index ); @@ -1942,12 +1932,15 @@ exec_instruction( break; case TGSI_OPCODE_BRK: - assert (0); + /* turn off loop channels for each enabled exec channel */ + mach->LoopMask &= ~mach->ExecMask; + UPDATE_EXEC_MASK(mach); break; case TGSI_OPCODE_IF: /* push CondMask */ - mach->condStack[mach->CondStackTop++] = mach->CondMask; + assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + mach->CondStack[mach->CondStackTop++] = mach->CondMask; FETCH( &r[0], 0, CHAN_X ); /* update CondMask */ if( ! r[0].u[0] ) { @@ -1962,10 +1955,13 @@ exec_instruction( if( ! r[0].u[3] ) { mach->CondMask &= ~0x8; } + UPDATE_EXEC_MASK(mach); break; case TGSI_OPCODE_LOOP: - assert (0); + /* push LoopMask */ + assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; break; case TGSI_OPCODE_REP: @@ -1977,20 +1973,25 @@ exec_instruction( { uint prevMask; assert(mach->CondStackTop > 0); - prevMask = mach->condStack[mach->CondStackTop - 1]; + prevMask = mach->CondStack[mach->CondStackTop - 1]; mach->CondMask = ~mach->CondMask & prevMask; + UPDATE_EXEC_MASK(mach); } break; case TGSI_OPCODE_ENDIF: - assert(mach->CondStackTop > 0); /* pop CondMask */ - mach->CondMask = mach->condStack[--mach->CondStackTop]; + assert(mach->CondStackTop > 0); + mach->CondMask = mach->CondStack[--mach->CondStackTop]; + UPDATE_EXEC_MASK(mach); break; case TGSI_OPCODE_ENDLOOP: - assert (0); - break; + /* pop LoopMask */ + assert(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + UPDATE_EXEC_MASK(mach); + break; case TGSI_OPCODE_ENDREP: assert (0); @@ -2109,7 +2110,9 @@ exec_instruction( break; case TGSI_OPCODE_BGNLOOP2: - assert( 0 ); + /* push LoopMask */ + assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; break; case TGSI_OPCODE_BGNSUB: @@ -2117,7 +2120,10 @@ exec_instruction( break; case TGSI_OPCODE_ENDLOOP2: - assert( 0 ); + /* pop LoopMask */ + assert(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + UPDATE_EXEC_MASK(mach); break; case TGSI_OPCODE_ENDSUB: diff --git a/src/mesa/pipe/tgsi/exec/tgsi_exec.h b/src/mesa/pipe/tgsi/exec/tgsi_exec.h index 54b38cb4b2..e67a8138e8 100644 --- a/src/mesa/pipe/tgsi/exec/tgsi_exec.h +++ b/src/mesa/pipe/tgsi/exec/tgsi_exec.h @@ -98,8 +98,12 @@ struct tgsi_exec_labels #define TGSI_EXEC_NUM_ADDRS 1 #define TGSI_EXEC_MAX_COND_NESTING 10 +#define TGSI_EXEC_MAX_LOOP_NESTING 10 +/** + * Run-time virtual machine state for executing TGSI shader. + */ struct tgsi_exec_machine { /* @@ -132,12 +136,18 @@ struct tgsi_exec_machine /* FRAGMENT processor only. */ const struct tgsi_interp_coef *InterpCoefs; - /* Conditional execution mask */ + /* Conditional execution masks */ uint CondMask; + uint LoopMask; + uint ExecMask; /**< = CondMask & LoopMask */ - /* Condition mask stack (for nested conditionals) */ - uint condStack[TGSI_EXEC_MAX_COND_NESTING]; + /** Condition mask stack (for nested conditionals) */ + uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; int CondStackTop; + + /** Loop mask stack (for nested loops) */ + uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int LoopStackTop; }; -- cgit v1.2.3