diff options
Diffstat (limited to 'src/gallium/auxiliary/tgsi/tgsi_exec.c')
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.c | 895 |
1 files changed, 607 insertions, 288 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index b7569e74d4..ba89f2fbc3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -62,9 +62,6 @@ #define FAST_MATH 1 -/** for tgsi_full_instruction::Flags */ -#define SOA_DEPENDENCY_FLAG 0x1 - #define TILE_TOP_LEFT 0 #define TILE_TOP_RIGHT 1 #define TILE_BOTTOM_LEFT 2 @@ -110,10 +107,10 @@ #define TEMP_P0 TGSI_EXEC_TEMP_P0 #define IS_CHANNEL_ENABLED(INST, CHAN)\ - ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) #define IS_CHANNEL_ENABLED2(INST, CHAN)\ - ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[1].Register.WriteMask & (1 << (CHAN))) #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ @@ -191,7 +188,7 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) { uint i, chan; - uint writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; + uint writemask = inst->Dst[0].Register.WriteMask; if (writemask == TGSI_WRITEMASK_X || writemask == TGSI_WRITEMASK_Y || writemask == TGSI_WRITEMASK_Z || @@ -203,15 +200,15 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) /* loop over src regs */ for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - if ((inst->FullSrcRegisters[i].SrcRegister.File == - inst->FullDstRegisters[0].DstRegister.File) && - (inst->FullSrcRegisters[i].SrcRegister.Index == - inst->FullDstRegisters[0].DstRegister.Index)) { + if ((inst->Src[i].Register.File == + inst->Dst[0].Register.File) && + (inst->Src[i].Register.Index == + inst->Dst[0].Register.Index)) { /* loop over dest channels */ uint channelsWritten = 0x0; FOR_EACH_ENABLED_CHANNEL(*inst, chan) { /* check if we're reading a channel that's been written */ - uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->FullSrcRegisters[i], chan); + uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan); if (channelsWritten & (1 << swizzle)) { return TRUE; } @@ -295,6 +292,14 @@ tgsi_exec_machine_bind_shader( * sizeof(struct tgsi_full_declaration)); maxDeclarations += 10; } + if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) { + unsigned reg; + for (reg = parse.FullToken.FullDeclaration.Range.First; + reg <= parse.FullToken.FullDeclaration.Range.Last; + ++reg) { + ++mach->NumOutputs; + } + } memcpy(declarations + numDeclarations, &parse.FullToken.FullDeclaration, sizeof(declarations[0])); @@ -332,20 +337,6 @@ tgsi_exec_machine_bind_shader( maxInstructions += 10; } - if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) { - uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode; - parse.FullToken.FullInstruction.Flags = SOA_DEPENDENCY_FLAG; - /* XXX we only handle SOA dependencies properly for MOV/SWZ - * at this time! - */ - if (opcode != TGSI_OPCODE_MOV) { - debug_printf("Warning: SOA dependency in instruction" - " is not handled:\n"); - tgsi_dump_instruction(&parse.FullToken.FullInstruction, - numInstructions); - } - } - memcpy(instructions + numInstructions, &parse.FullToken.FullInstruction, sizeof(instructions[0])); @@ -353,6 +344,9 @@ tgsi_exec_machine_bind_shader( numInstructions++; break; + case TGSI_TOKEN_TYPE_PROPERTY: + break; + default: assert( 0 ); } @@ -386,6 +380,8 @@ tgsi_exec_machine_create( void ) memset(mach, 0, sizeof(*mach)); mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; + mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; + mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; /* Setup constants. */ for( i = 0; i < 4; i++ ) { @@ -517,7 +513,7 @@ micro_ddy( dst->f[0] = dst->f[1] = dst->f[2] = - dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; + dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; } static void @@ -595,6 +591,24 @@ micro_exp2( dst->f[2] = util_fast_exp2( src->f[2] ); dst->f[3] = util_fast_exp2( src->f[3] ); #else + +#if DEBUG + /* Inf is okay for this instruction, so clamp it to silence assertions. */ + uint i; + union tgsi_exec_channel clamped; + + for (i = 0; i < 4; i++) { + if (src->f[i] > 127.99999f) { + clamped.f[i] = 127.99999f; + } else if (src->f[i] < -126.99999f) { + clamped.f[i] = -126.99999f; + } else { + clamped.f[i] = src->f[i]; + } + } + src = &clamped; +#endif + dst->f[0] = powf( 2.0f, src->f[0] ); dst->f[1] = powf( 2.0f, src->f[1] ); dst->f[2] = powf( 2.0f, src->f[2] ); @@ -1156,6 +1170,7 @@ fetch_src_file_channel( break; case TGSI_FILE_INPUT: + case TGSI_FILE_SYSTEM_VALUE: chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; @@ -1193,10 +1208,10 @@ fetch_src_file_channel( assert(index->i[1] < TGSI_EXEC_NUM_PREDS); assert(index->i[2] < TGSI_EXEC_NUM_PREDS); assert(index->i[3] < TGSI_EXEC_NUM_PREDS); - chan->u[0] = mach->Addrs[0].xyzw[swizzle].u[0]; - chan->u[1] = mach->Addrs[0].xyzw[swizzle].u[1]; - chan->u[2] = mach->Addrs[0].xyzw[swizzle].u[2]; - chan->u[3] = mach->Addrs[0].xyzw[swizzle].u[3]; + chan->u[0] = mach->Predicates[0].xyzw[swizzle].u[0]; + chan->u[1] = mach->Predicates[0].xyzw[swizzle].u[1]; + chan->u[2] = mach->Predicates[0].xyzw[swizzle].u[2]; + chan->u[3] = mach->Predicates[0].xyzw[swizzle].u[3]; break; case TGSI_FILE_OUTPUT: @@ -1231,13 +1246,13 @@ fetch_source( * * file[1], * where: - * file = SrcRegister.File - * [1] = SrcRegister.Index + * file = Register.File + * [1] = Register.Index */ index.i[0] = index.i[1] = index.i[2] = - index.i[3] = reg->SrcRegister.Index; + index.i[3] = reg->Register.Index; /* There is an extra source register that indirectly subscripts * a register file. The direct index now becomes an offset @@ -1245,11 +1260,11 @@ fetch_source( * * file[ind[2].x+1], * where: - * ind = SrcRegisterInd.File - * [2] = SrcRegisterInd.Index - * .x = SrcRegisterInd.SwizzleX + * ind = Indirect.File + * [2] = Indirect.Index + * .x = Indirect.SwizzleX */ - if (reg->SrcRegister.Indirect) { + if (reg->Register.Indirect) { union tgsi_exec_channel index2; union tgsi_exec_channel indir_index; const uint execmask = mach->ExecMask; @@ -1259,13 +1274,13 @@ fetch_source( index2.i[0] = index2.i[1] = index2.i[2] = - index2.i[3] = reg->SrcRegisterInd.Index; + index2.i[3] = reg->Indirect.Index; /* get current value of address register[swizzle] */ - swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); + swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); fetch_src_file_channel( mach, - reg->SrcRegisterInd.File, + reg->Indirect.File, swizzle, &index2, &indir_index ); @@ -1291,15 +1306,16 @@ fetch_source( * * file[1][3] == file[1*sizeof(file[1])+3], * where: - * [3] = SrcRegisterDim.Index + * [3] = Dimension.Index */ - if (reg->SrcRegister.Dimension) { + if (reg->Register.Dimension) { /* The size of the first-order array depends on the register file type. * We need to multiply the index to the first array to get an effective, * "flat" index that points to the beginning of the second-order array. */ - switch (reg->SrcRegister.File) { + switch (reg->Register.File) { case TGSI_FILE_INPUT: + case TGSI_FILE_SYSTEM_VALUE: index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; @@ -1315,10 +1331,10 @@ fetch_source( assert( 0 ); } - index.i[0] += reg->SrcRegisterDim.Index; - index.i[1] += reg->SrcRegisterDim.Index; - index.i[2] += reg->SrcRegisterDim.Index; - index.i[3] += reg->SrcRegisterDim.Index; + index.i[0] += reg->Dimension.Index; + index.i[1] += reg->Dimension.Index; + index.i[2] += reg->Dimension.Index; + index.i[3] += reg->Dimension.Index; /* Again, the second subscript index can be addressed indirectly * identically to the first one. @@ -1327,11 +1343,11 @@ fetch_source( * * file[1][ind[4].y+3], * where: - * ind = SrcRegisterDimInd.File - * [4] = SrcRegisterDimInd.Index - * .y = SrcRegisterDimInd.SwizzleX + * ind = DimIndirect.File + * [4] = DimIndirect.Index + * .y = DimIndirect.SwizzleX */ - if (reg->SrcRegisterDim.Indirect) { + if (reg->Dimension.Indirect) { union tgsi_exec_channel index2; union tgsi_exec_channel indir_index; const uint execmask = mach->ExecMask; @@ -1340,12 +1356,12 @@ fetch_source( index2.i[0] = index2.i[1] = index2.i[2] = - index2.i[3] = reg->SrcRegisterDimInd.Index; + index2.i[3] = reg->DimIndirect.Index; - swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); + swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); fetch_src_file_channel( mach, - reg->SrcRegisterDimInd.File, + reg->DimIndirect.File, swizzle, &index2, &indir_index ); @@ -1365,7 +1381,7 @@ fetch_source( } /* If by any chance there was a need for a 3D array of register - * files, we would have to check whether SrcRegisterDim is followed + * files, we would have to check whether Dimension is followed * by a dimension register and continue the saga. */ } @@ -1373,7 +1389,7 @@ fetch_source( swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); fetch_src_file_channel( mach, - reg->SrcRegister.File, + reg->Register.File, swizzle, &index, chan ); @@ -1395,10 +1411,6 @@ fetch_source( case TGSI_UTIL_SIGN_KEEP: break; } - - if (reg->SrcRegisterExtMod.Complement) { - micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan ); - } } static void @@ -1426,11 +1438,11 @@ store_dest( * * file[ind[2].x+1], * where: - * ind = DstRegisterInd.File - * [2] = DstRegisterInd.Index - * .x = DstRegisterInd.SwizzleX + * ind = Indirect.File + * [2] = Indirect.Index + * .x = Indirect.SwizzleX */ - if (reg->DstRegister.Indirect) { + if (reg->Register.Indirect) { union tgsi_exec_channel index; union tgsi_exec_channel indir_index; uint swizzle; @@ -1439,15 +1451,15 @@ store_dest( index.i[0] = index.i[1] = index.i[2] = - index.i[3] = reg->DstRegisterInd.Index; + index.i[3] = reg->Indirect.Index; /* get current value of address register[swizzle] */ - swizzle = tgsi_util_get_src_register_swizzle( ®->DstRegisterInd, CHAN_X ); + swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); /* fetch values from the address/indirection register */ fetch_src_file_channel( mach, - reg->DstRegisterInd.File, + reg->Indirect.File, swizzle, &index, &indir_index ); @@ -1456,32 +1468,48 @@ store_dest( offset = (int) indir_index.f[0]; } - switch (reg->DstRegister.File) { + switch (reg->Register.File) { case TGSI_FILE_NULL: dst = &null; break; case TGSI_FILE_OUTPUT: index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] - + reg->DstRegister.Index; + + reg->Register.Index; dst = &mach->Outputs[offset + index].xyzw[chan_index]; +#if 0 + if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { + fprintf(stderr, "STORING OUT[%d] mask(%d), = (", offset + index, execmask); + for (i = 0; i < QUAD_SIZE; i++) + if (execmask & (1 << i)) + fprintf(stderr, "%f, ", chan->f[i]); + fprintf(stderr, ")\n"); + } +#endif break; case TGSI_FILE_TEMPORARY: - index = reg->DstRegister.Index; + index = reg->Register.Index; assert( index < TGSI_EXEC_NUM_TEMPS ); dst = &mach->Temps[offset + index].xyzw[chan_index]; break; case TGSI_FILE_ADDRESS: - index = reg->DstRegister.Index; + index = reg->Register.Index; dst = &mach->Addrs[index].xyzw[chan_index]; break; + case TGSI_FILE_LOOP: + assert(reg->Register.Index == 0); + assert(mach->LoopCounterStackTop > 0); + assert(chan_index == CHAN_X); + dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index]; + break; + case TGSI_FILE_PREDICATE: - index = reg->DstRegister.Index; + index = reg->Register.Index; assert(index < TGSI_EXEC_NUM_PREDS); - dst = &mach->Addrs[index].xyzw[chan_index]; + dst = &mach->Predicates[index].xyzw[chan_index]; break; default: @@ -1489,6 +1517,47 @@ store_dest( return; } + if (inst->Instruction.Predicate) { + uint swizzle; + union tgsi_exec_channel *pred; + + switch (chan_index) { + case CHAN_X: + swizzle = inst->Predicate.SwizzleX; + break; + case CHAN_Y: + swizzle = inst->Predicate.SwizzleY; + break; + case CHAN_Z: + swizzle = inst->Predicate.SwizzleZ; + break; + case CHAN_W: + swizzle = inst->Predicate.SwizzleW; + break; + default: + assert(0); + return; + } + + assert(inst->Predicate.Index == 0); + + pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle]; + + if (inst->Predicate.Negate) { + for (i = 0; i < QUAD_SIZE; i++) { + if (pred->u[i]) { + execmask &= ~(1 << i); + } + } + } else { + for (i = 0; i < QUAD_SIZE; i++) { + if (!pred->u[i]) { + execmask &= ~(1 << i); + } + } + } + } + switch (inst->Instruction.Saturate) { case TGSI_SAT_NONE: for (i = 0; i < QUAD_SIZE; i++) @@ -1526,10 +1595,10 @@ store_dest( } #define FETCH(VAL,INDEX,CHAN)\ - fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) + fetch_source (mach, VAL, &inst->Src[INDEX], CHAN) #define STORE(VAL,INDEX,CHAN)\ - store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) + store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN ) /** @@ -1555,7 +1624,7 @@ exec_kil(struct tgsi_exec_machine *mach, /* unswizzle channel */ swizzle = tgsi_util_get_full_src_register_swizzle ( - &inst->FullSrcRegisters[0], + &inst->Src[0], chan_index); /* check if the component has not been already tested */ @@ -1587,6 +1656,35 @@ exec_kilp(struct tgsi_exec_machine *mach, mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; } +static void +emit_vertex(struct tgsi_exec_machine *mach) +{ + /* FIXME: check for exec mask correctly + unsigned i; + for (i = 0; i < QUAD_SIZE; ++i) { + if ((mach->ExecMask & (1 << i))) + */ + if (mach->ExecMask) { + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; + } +} + +static void +emit_primitive(struct tgsi_exec_machine *mach) +{ + unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]; + /* FIXME: check for exec mask correctly + unsigned i; + for (i = 0; i < QUAD_SIZE; ++i) { + if ((mach->ExecMask & (1 << i))) + */ + if (mach->ExecMask) { + ++(*prim_count); + debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs); + mach->Primitives[*prim_count] = 0; + } +} /* * Fetch a four texture samples using STR texture coordinates. @@ -1622,14 +1720,14 @@ exec_tex(struct tgsi_exec_machine *mach, boolean biasLod, boolean projected) { - const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + const uint unit = inst->Src[1].Register.Index; union tgsi_exec_channel r[4]; uint chan_index; float lodBias; /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ - switch (inst->InstructionExtTexture.Texture) { + switch (inst->Texture.Texture) { case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: @@ -1715,6 +1813,64 @@ exec_tex(struct tgsi_exec_machine *mach, } } +static void +exec_txd(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + const uint unit = inst->Src[3].Register.Index; + union tgsi_exec_channel r[4]; + uint chan_index; + + /* + * XXX: This is fake TXD -- the derivatives are not taken into account, yet. + */ + + switch (inst->Texture.Texture) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: + + FETCH(&r[0], 0, CHAN_X); + + fetch_texel(mach->Samplers[unit], + &r[0], &ZeroVec, &ZeroVec, 0.0f, /* S, T, P, BIAS */ + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + break; + + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + fetch_texel(mach->Samplers[unit], + &r[0], &r[1], &r[2], 0.0f, /* inputs */ + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + fetch_texel(mach->Samplers[unit], + &r[0], &r[1], &r[2], 0.0f, + &r[0], &r[1], &r[2], &r[3]); + break; + + default: + assert(0); + } + + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&r[chan_index], 0, chan_index); + } +} + /** * Evaluate a constant-valued coefficient at the position of the @@ -1784,53 +1940,59 @@ typedef void (* eval_coef_func)( unsigned chan ); static void -exec_declaration( - struct tgsi_exec_machine *mach, - const struct tgsi_full_declaration *decl ) +exec_declaration(struct tgsi_exec_machine *mach, + const struct tgsi_full_declaration *decl) { - if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { - if( decl->Declaration.File == TGSI_FILE_INPUT ) { - unsigned first, last, mask; - eval_coef_func eval; + if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { + if (decl->Declaration.File == TGSI_FILE_INPUT || + decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { + uint first, last, mask; - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; + first = decl->Range.First; + last = decl->Range.Last; mask = decl->Declaration.UsageMask; - switch( decl->Declaration.Interpolate ) { - case TGSI_INTERPOLATE_CONSTANT: - eval = eval_constant_coef; - break; + if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) { + assert(decl->Semantic.Index == 0); + assert(first == last); + assert(mask = TGSI_WRITEMASK_XYZW); - case TGSI_INTERPOLATE_LINEAR: - eval = eval_linear_coef; - break; + mach->Inputs[first] = mach->QuadPos; + } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { + uint i; - case TGSI_INTERPOLATE_PERSPECTIVE: - eval = eval_perspective_coef; - break; + assert(decl->Semantic.Index == 0); + assert(first == last); - default: - assert( 0 ); - return; - } - - if( mask == TGSI_WRITEMASK_XYZW ) { - unsigned i, j; - - for( i = first; i <= last; i++ ) { - for( j = 0; j < NUM_CHANNELS; j++ ) { - eval( mach, i, j ); - } + for (i = 0; i < QUAD_SIZE; i++) { + mach->Inputs[first].xyzw[0].f[i] = mach->Face; + } + } else { + eval_coef_func eval; + uint i, j; + + switch (decl->Declaration.Interpolate) { + case TGSI_INTERPOLATE_CONSTANT: + eval = eval_constant_coef; + break; + + case TGSI_INTERPOLATE_LINEAR: + eval = eval_linear_coef; + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + eval = eval_perspective_coef; + break; + + default: + assert(0); + return; } - } - else { - unsigned i, j; - for( j = 0; j < NUM_CHANNELS; j++ ) { - if( mask & (1 << j) ) { - for( i = first; i <= last; i++ ) { - eval( mach, i, j ); + for (j = 0; j < NUM_CHANNELS; j++) { + if (mask & (1 << j)) { + for (i = first; i <= last; i++) { + eval(mach, i, j); } } } @@ -1847,6 +2009,7 @@ exec_instruction( { uint chan_index; union tgsi_exec_channel r[10]; + union tgsi_exec_channel d[8]; (*pc)++; @@ -1855,42 +2018,27 @@ exec_instruction( case TGSI_OPCODE_FLR: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_flr( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_flr(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_MOV: - if (inst->Flags & SOA_DEPENDENCY_FLAG) { - /* Do all fetches into temp regs, then do all stores to avoid - * intermediate/accidental clobbering. This could be done all the - * time for MOV but for other instructions we'll need more temps... - */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[chan_index], 0, chan_index ); - } - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[chan_index], 0, chan_index ); - } + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&d[chan_index], 0, chan_index); } - else { - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - STORE( &r[0], 0, chan_index ); - } + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_LIT: - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { FETCH( &r[0], 0, CHAN_X ); if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, CHAN_Y ); + micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); } if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { @@ -1901,11 +2049,19 @@ exec_instruction( micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); micro_pow( &r[1], &r[1], &r[2] ); - micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, CHAN_Z ); + micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); } - } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { + STORE(&d[CHAN_Y], 0, CHAN_Y); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + STORE(&d[CHAN_Z], 0, CHAN_Z); + } + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); } @@ -1973,14 +2129,13 @@ exec_instruction( break; case TGSI_OPCODE_MUL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) - { + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); - - micro_mul( &r[0], &r[0], &r[1] ); - - STORE(&r[0], 0, chan_index); + micro_mul(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -1988,8 +2143,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_add( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_add(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2045,25 +2202,29 @@ exec_instruction( break; case TGSI_OPCODE_DST: - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { FETCH( &r[0], 0, CHAN_Y ); FETCH( &r[1], 1, CHAN_Y); - micro_mul( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, CHAN_Y ); + micro_mul(&d[CHAN_Y], &r[0], &r[1]); } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[0], 0, CHAN_Z ); - STORE( &r[0], 0, CHAN_Z ); + FETCH(&d[CHAN_Z], 0, CHAN_Z); } - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - FETCH( &r[0], 1, CHAN_W ); - STORE( &r[0], 0, CHAN_W ); + FETCH(&d[CHAN_W], 1, CHAN_W); + } + + if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { + STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { + STORE(&d[CHAN_Y], 0, CHAN_Y); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + STORE(&d[CHAN_Z], 0, CHAN_Z); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { + STORE(&d[CHAN_W], 0, CHAN_W); } break; @@ -2073,9 +2234,10 @@ exec_instruction( FETCH(&r[1], 1, chan_index); /* XXX use micro_min()?? */ - micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] ); - - STORE(&r[0], 0, chan_index); + micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2085,9 +2247,10 @@ exec_instruction( FETCH(&r[1], 1, chan_index); /* XXX use micro_max()?? */ - micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); - - STORE(&r[0], 0, chan_index ); + micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] ); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2096,8 +2259,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); + micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2106,8 +2271,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); + micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2118,8 +2285,10 @@ exec_instruction( FETCH( &r[1], 1, chan_index ); micro_mul( &r[0], &r[0], &r[1] ); FETCH( &r[1], 2, chan_index ); - micro_add( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_add(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2127,10 +2296,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); - - micro_sub( &r[0], &r[0], &r[1] ); - - STORE(&r[0], 0, chan_index); + micro_sub(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2139,12 +2308,12 @@ exec_instruction( FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); FETCH(&r[2], 2, chan_index); - micro_sub( &r[1], &r[1], &r[2] ); micro_mul( &r[0], &r[0], &r[1] ); - micro_add( &r[0], &r[0], &r[2] ); - - STORE(&r[0], 0, chan_index); + micro_add(&d[chan_index], &r[0], &r[2]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2153,8 +2322,10 @@ exec_instruction( FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); FETCH(&r[2], 2, chan_index); - micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); - STORE(&r[0], 0, chan_index); + micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2179,8 +2350,10 @@ exec_instruction( case TGSI_OPCODE_FRC: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_frc( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_frc(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2190,8 +2363,10 @@ exec_instruction( FETCH(&r[1], 1, chan_index); micro_max(&r[0], &r[0], &r[1]); FETCH(&r[1], 2, chan_index); - micro_min(&r[0], &r[0], &r[1]); - STORE(&r[0], 0, chan_index); + micro_min(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2199,19 +2374,17 @@ exec_instruction( case TGSI_OPCODE_ARR: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_rnd( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_rnd(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_EX2: FETCH(&r[0], 0, CHAN_X); -#if FAST_MATH micro_exp2( &r[0], &r[0] ); -#else - micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); -#endif FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { STORE( &r[0], 0, chan_index ); @@ -2247,11 +2420,7 @@ exec_instruction( FETCH(&r[4], 1, CHAN_Y); micro_mul( &r[5], &r[3], &r[4] ); - micro_sub( &r[2], &r[2], &r[5] ); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &r[2], 0, CHAN_X ); - } + micro_sub(&d[CHAN_X], &r[2], &r[5]); FETCH(&r[2], 1, CHAN_X); @@ -2260,20 +2429,21 @@ exec_instruction( FETCH(&r[5], 0, CHAN_X); micro_mul( &r[1], &r[1], &r[5] ); - micro_sub( &r[3], &r[3], &r[1] ); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - STORE( &r[3], 0, CHAN_Y ); - } + micro_sub(&d[CHAN_Y], &r[3], &r[1]); micro_mul( &r[5], &r[5], &r[4] ); micro_mul( &r[0], &r[0], &r[2] ); - micro_sub( &r[5], &r[5], &r[0] ); + micro_sub(&d[CHAN_Z], &r[5], &r[0]); - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - STORE( &r[5], 0, CHAN_Z ); + if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { + STORE(&d[CHAN_X], 0, CHAN_X); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { + STORE(&d[CHAN_Y], 0, CHAN_Y); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + STORE(&d[CHAN_Z], 0, CHAN_Z); } - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); } @@ -2282,11 +2452,11 @@ exec_instruction( case TGSI_OPCODE_ABS: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH(&r[0], 0, chan_index); - - micro_abs( &r[0], &r[0] ); - - STORE(&r[0], 0, chan_index); + micro_abs(&d[chan_index], &r[0]); } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); + } break; case TGSI_OPCODE_RCC: @@ -2338,16 +2508,20 @@ exec_instruction( case TGSI_OPCODE_DDX: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_ddx( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_ddx(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_DDY: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_ddy( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_ddy(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2428,10 +2602,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_eq( &r[0], &r[0], &r[1], - &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], - &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); + micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2445,8 +2619,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); - STORE( &r[0], 0, chan_index ); + micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2462,8 +2638,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); + micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2471,8 +2649,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); - STORE( &r[0], 0, chan_index ); + micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2502,7 +2682,7 @@ exec_instruction( /* src[1] = d[strq]/dx */ /* src[2] = d[strq]/dy */ /* src[3] = sampler unit */ - assert (0); + exec_txd(mach, inst); break; case TGSI_OPCODE_TXL: @@ -2546,13 +2726,8 @@ exec_instruction( micro_mul(&r[3], &r[3], &r[1]); micro_add(&r[2], &r[2], &r[3]); FETCH(&r[3], 0, CHAN_X); - micro_add(&r[2], &r[2], &r[3]); - if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { - STORE(&r[2], 0, CHAN_X); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { - STORE(&r[2], 0, CHAN_Z); - } + micro_add(&d[CHAN_X], &r[2], &r[3]); + } if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) || IS_CHANNEL_ENABLED(*inst, CHAN_W)) { @@ -2562,13 +2737,20 @@ exec_instruction( micro_mul(&r[3], &r[3], &r[1]); micro_add(&r[2], &r[2], &r[3]); FETCH(&r[3], 0, CHAN_Y); - micro_add(&r[2], &r[2], &r[3]); - if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { - STORE(&r[2], 0, CHAN_Y); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { - STORE(&r[2], 0, CHAN_W); - } + micro_add(&d[CHAN_Y], &r[2], &r[3]); + + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { + STORE(&d[CHAN_X], 0, CHAN_X); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { + STORE(&d[CHAN_Y], 0, CHAN_Y); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + STORE(&d[CHAN_X], 0, CHAN_Z); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { + STORE(&d[CHAN_Y], 0, CHAN_W); } break; @@ -2611,7 +2793,7 @@ exec_instruction( mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; /* Finally, jump to the subroutine */ - *pc = inst->InstructionExtLabel.Label; + *pc = inst->Label.Label; } break; @@ -2653,8 +2835,10 @@ exec_instruction( /* TGSI_OPCODE_SGN */ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_sgn( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_sgn(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2663,10 +2847,10 @@ exec_instruction( FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); FETCH(&r[2], 2, chan_index); - - micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] ); - - STORE(&r[0], 0, chan_index); + micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2841,32 +3025,40 @@ exec_instruction( case TGSI_OPCODE_CEIL: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_ceil( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_ceil(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_I2F: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_i2f( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_i2f(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_NOT: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_not( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_not(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_TRUNC: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_trunc( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_trunc(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2874,8 +3066,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_shl( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_shl(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2883,8 +3077,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_ishr( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_ishr(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2892,8 +3088,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_and( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_and(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2901,8 +3099,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_or( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_or(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2914,8 +3114,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_xor( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_xor(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2932,13 +3134,11 @@ exec_instruction( break; case TGSI_OPCODE_EMIT: - mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; + emit_vertex(mach); break; case TGSI_OPCODE_ENDPRIM: - mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; + emit_primitive(mach); break; case TGSI_OPCODE_BGNFOR: @@ -2946,8 +3146,23 @@ exec_instruction( for (chan_index = 0; chan_index < 3; chan_index++) { FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index ); } - STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); ++mach->LoopCounterStackTop; + STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X); + /* update LoopMask */ + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { + mach->LoopMask &= ~0x1; + } + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { + mach->LoopMask &= ~0x2; + } + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { + mach->LoopMask &= ~0x4; + } + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { + mach->LoopMask &= ~0x8; + } + /* TODO: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); /* fall-through (for now) */ case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ @@ -2961,28 +3176,28 @@ exec_instruction( case TGSI_OPCODE_ENDFOR: assert(mach->LoopCounterStackTop > 0); - micro_sub( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], - &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); + micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], + &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); /* update LoopMask */ - if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[0] <= 0) { + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { mach->LoopMask &= ~0x1; } - if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[1] <= 0 ) { + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { mach->LoopMask &= ~0x2; } - if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[2] <= 0 ) { + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { mach->LoopMask &= ~0x4; } - if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[3] <= 0 ) { + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { mach->LoopMask &= ~0x8; } - micro_add( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); + micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); assert(mach->LoopLabelStackTop > 0); inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1]; - STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); + STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X); /* Restore ContMask, but don't pop */ assert(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; @@ -3049,17 +3264,59 @@ exec_instruction( break; case TGSI_OPCODE_ENDSUB: - /* no-op */ + /* + * XXX: This really should be a no-op. We should never reach this opcode. + */ + + assert(mach->CallStackTop > 0); + mach->CallStackTop--; + + mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; + mach->CondMask = mach->CondStack[mach->CondStackTop]; + + mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; + mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; + + mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; + mach->ContMask = mach->ContStack[mach->ContStackTop]; + + assert(mach->FuncStackTop > 0); + mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; + + *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; + + UPDATE_EXEC_MASK(mach); break; case TGSI_OPCODE_NOP: break; + case TGSI_OPCODE_BREAKC: + FETCH(&r[0], 0, CHAN_X); + /* update CondMask */ + if (r[0].u[0] && (mach->ExecMask & 0x1)) { + mach->LoopMask &= ~0x1; + } + if (r[0].u[1] && (mach->ExecMask & 0x2)) { + mach->LoopMask &= ~0x2; + } + if (r[0].u[2] && (mach->ExecMask & 0x4)) { + mach->LoopMask &= ~0x4; + } + if (r[0].u[3] && (mach->ExecMask & 0x8)) { + mach->LoopMask &= ~0x8; + } + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + break; + default: assert( 0 ); } } +#define DEBUG_EXECUTION 0 + /** * Run TGSI interpreter. @@ -3103,10 +3360,67 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) exec_declaration( mach, mach->Declarations+i ); } - /* execute instructions, until pc is set to -1 */ - while (pc != -1) { - assert(pc < (int) mach->NumInstructions); - exec_instruction( mach, mach->Instructions + pc, &pc ); + { +#if DEBUG_EXECUTION + struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS]; + struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; + uint inst = 1; + + memcpy(temps, mach->Temps, sizeof(temps)); + memcpy(outputs, mach->Outputs, sizeof(outputs)); +#endif + + /* execute instructions, until pc is set to -1 */ + while (pc != -1) { + +#if DEBUG_EXECUTION + uint i; + + tgsi_dump_instruction(&mach->Instructions[pc], inst++); +#endif + + assert(pc < (int) mach->NumInstructions); + exec_instruction(mach, mach->Instructions + pc, &pc); + +#if DEBUG_EXECUTION + for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) { + if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) { + uint j; + + memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i])); + debug_printf("TEMP[%2u] = ", i); + for (j = 0; j < 4; j++) { + if (j > 0) { + debug_printf(" "); + } + debug_printf("(%6f, %6f, %6f, %6f)\n", + temps[i].xyzw[0].f[j], + temps[i].xyzw[1].f[j], + temps[i].xyzw[2].f[j], + temps[i].xyzw[3].f[j]); + } + } + } + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { + uint j; + + memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); + debug_printf("OUT[%2u] = ", i); + for (j = 0; j < 4; j++) { + if (j > 0) { + debug_printf(" "); + } + debug_printf("{%6f, %6f, %6f, %6f}\n", + outputs[i].xyzw[0].f[j], + outputs[i].xyzw[1].f[j], + outputs[i].xyzw[2].f[j], + outputs[i].xyzw[3].f[j]); + } + } + } +#endif + } } #if 0 @@ -3120,5 +3434,10 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) } #endif + assert(mach->CondStackTop == 0); + assert(mach->LoopStackTop == 0); + assert(mach->ContStackTop == 0); + assert(mach->CallStackTop == 0); + return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; } |