diff options
Diffstat (limited to 'src/gallium/auxiliary/tgsi')
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt | 32 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_build.c | 166 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_build.h | 32 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.c | 705 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.h | 4 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_parse.c | 8 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_parse.h | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_sanity.c | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_ureg.c | 53 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_ureg.h | 3 |
10 files changed, 647 insertions, 360 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt index eb492076b7..080fd4c731 100644 --- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt +++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt @@ -1129,3 +1129,35 @@ TGSI Instruction Specification target Label of target instruction. + +3 Other tokens +=============== + + +3.1 Declaration Semantic +------------------------- + + + Follows Declaration token if Semantic bit is set. + + Since its purpose is to link a shader with other stages of the pipeline, + it is valid to follow only those Declaration tokens that declare a register + either in INPUT or OUTPUT file. + + SemanticName field contains the semantic name of the register being declared. + There is no default value. + + SemanticIndex is an optional subscript that can be used to distinguish + different register declarations with the same semantic name. The default value + is 0. + + The meanings of the individual semantic names are explained in the following + sections. + + +3.1.1 FACE + + Valid only in a fragment shader INPUT declaration. + + FACE.x is negative when the primitive is back facing. FACE.x is positive + when the primitive is front facing. diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 4fa10e2f7e..9791e58db3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -419,6 +419,7 @@ tgsi_default_instruction( void ) instruction.NrTokens = 1; instruction.Opcode = TGSI_OPCODE_MOV; instruction.Saturate = TGSI_SAT_NONE; + instruction.Predicate = 0; instruction.NumDstRegs = 1; instruction.NumSrcRegs = 1; instruction.Padding = 0; @@ -428,12 +429,12 @@ tgsi_default_instruction( void ) } struct tgsi_instruction -tgsi_build_instruction( - unsigned opcode, - unsigned saturate, - unsigned num_dst_regs, - unsigned num_src_regs, - struct tgsi_header *header ) +tgsi_build_instruction(unsigned opcode, + unsigned saturate, + unsigned predicate, + unsigned num_dst_regs, + unsigned num_src_regs, + struct tgsi_header *header) { struct tgsi_instruction instruction; @@ -445,6 +446,7 @@ tgsi_build_instruction( instruction = tgsi_default_instruction(); instruction.Opcode = opcode; instruction.Saturate = saturate; + instruction.Predicate = predicate; instruction.NumDstRegs = num_dst_regs; instruction.NumSrcRegs = num_src_regs; @@ -472,9 +474,9 @@ tgsi_default_full_instruction( void ) unsigned i; full_instruction.Instruction = tgsi_default_instruction(); + full_instruction.InstructionPredicate = tgsi_default_instruction_predicate(); full_instruction.InstructionExtLabel = tgsi_default_instruction_ext_label(); full_instruction.InstructionExtTexture = tgsi_default_instruction_ext_texture(); - full_instruction.InstructionExtPredicate = tgsi_default_instruction_ext_predicate(); for( i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++ ) { full_instruction.FullDstRegisters[i] = tgsi_default_full_dst_register(); } @@ -504,14 +506,34 @@ tgsi_build_full_instruction( instruction = (struct tgsi_instruction *) &tokens[size]; size++; - *instruction = tgsi_build_instruction( - full_inst->Instruction.Opcode, - full_inst->Instruction.Saturate, - full_inst->Instruction.NumDstRegs, - full_inst->Instruction.NumSrcRegs, - header ); + *instruction = tgsi_build_instruction(full_inst->Instruction.Opcode, + full_inst->Instruction.Saturate, + full_inst->Instruction.Predicate, + full_inst->Instruction.NumDstRegs, + full_inst->Instruction.NumSrcRegs, + header); prev_token = (struct tgsi_token *) instruction; + if (full_inst->Instruction.Predicate) { + struct tgsi_instruction_predicate *instruction_predicate; + + if (maxsize <= size) { + return 0; + } + instruction_predicate = (struct tgsi_instruction_predicate *)&tokens[size]; + size++; + + *instruction_predicate = + tgsi_build_instruction_predicate(full_inst->InstructionPredicate.Index, + full_inst->InstructionPredicate.Negate, + full_inst->InstructionPredicate.SwizzleX, + full_inst->InstructionPredicate.SwizzleY, + full_inst->InstructionPredicate.SwizzleZ, + full_inst->InstructionPredicate.SwizzleW, + instruction, + header); + } + if( tgsi_compare_instruction_ext_label( full_inst->InstructionExtLabel, tgsi_default_instruction_ext_label() ) ) { @@ -550,29 +572,6 @@ tgsi_build_full_instruction( prev_token = (struct tgsi_token *) instruction_ext_texture; } - if (tgsi_compare_instruction_ext_predicate(full_inst->InstructionExtPredicate, - tgsi_default_instruction_ext_predicate())) { - struct tgsi_instruction_ext_predicate *instruction_ext_predicate; - - if (maxsize <= size) { - return 0; - } - instruction_ext_predicate = (struct tgsi_instruction_ext_predicate *)&tokens[size]; - size++; - - *instruction_ext_predicate = - tgsi_build_instruction_ext_predicate(full_inst->InstructionExtPredicate.SrcIndex, - full_inst->InstructionExtPredicate.Negate, - full_inst->InstructionExtPredicate.SwizzleX, - full_inst->InstructionExtPredicate.SwizzleY, - full_inst->InstructionExtPredicate.SwizzleZ, - full_inst->InstructionExtPredicate.SwizzleW, - prev_token, - instruction, - header); - prev_token = (struct tgsi_token *)instruction_ext_predicate; - } - for( i = 0; i < full_inst->Instruction.NumDstRegs; i++ ) { const struct tgsi_full_dst_register *reg = &full_inst->FullDstRegisters[i]; struct tgsi_dst_register *dst_register; @@ -746,6 +745,47 @@ tgsi_build_full_instruction( return size; } +struct tgsi_instruction_predicate +tgsi_default_instruction_predicate(void) +{ + struct tgsi_instruction_predicate instruction_predicate; + + instruction_predicate.SwizzleX = TGSI_SWIZZLE_X; + instruction_predicate.SwizzleY = TGSI_SWIZZLE_Y; + instruction_predicate.SwizzleZ = TGSI_SWIZZLE_Z; + instruction_predicate.SwizzleW = TGSI_SWIZZLE_W; + instruction_predicate.Negate = 0; + instruction_predicate.Index = 0; + instruction_predicate.Padding = 0; + + return instruction_predicate; +} + +struct tgsi_instruction_predicate +tgsi_build_instruction_predicate(int index, + unsigned negate, + unsigned swizzleX, + unsigned swizzleY, + unsigned swizzleZ, + unsigned swizzleW, + struct tgsi_instruction *instruction, + struct tgsi_header *header) +{ + struct tgsi_instruction_predicate instruction_predicate; + + instruction_predicate = tgsi_default_instruction_predicate(); + instruction_predicate.SwizzleX = swizzleX; + instruction_predicate.SwizzleY = swizzleY; + instruction_predicate.SwizzleZ = swizzleZ; + instruction_predicate.SwizzleW = swizzleW; + instruction_predicate.Negate = negate; + instruction_predicate.Index = index; + + instruction_grow(instruction, header); + + return instruction_predicate; +} + /** test for inequality of 32-bit values pointed to by a and b */ static INLINE boolean compare32(const void *a, const void *b) @@ -835,60 +875,6 @@ tgsi_build_instruction_ext_texture( return instruction_ext_texture; } -struct tgsi_instruction_ext_predicate -tgsi_default_instruction_ext_predicate(void) -{ - struct tgsi_instruction_ext_predicate instruction_ext_predicate; - - instruction_ext_predicate.Type = TGSI_INSTRUCTION_EXT_TYPE_PREDICATE; - instruction_ext_predicate.SwizzleX = TGSI_SWIZZLE_X; - instruction_ext_predicate.SwizzleY = TGSI_SWIZZLE_Y; - instruction_ext_predicate.SwizzleZ = TGSI_SWIZZLE_Z; - instruction_ext_predicate.SwizzleW = TGSI_SWIZZLE_W; - instruction_ext_predicate.Negate = 0; - instruction_ext_predicate.SrcIndex = 0; - instruction_ext_predicate.Padding = 0; - instruction_ext_predicate.Extended = 0; - - return instruction_ext_predicate; -} - -unsigned -tgsi_compare_instruction_ext_predicate(struct tgsi_instruction_ext_predicate a, - struct tgsi_instruction_ext_predicate b) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return compare32(&a, &b); -} - -struct tgsi_instruction_ext_predicate -tgsi_build_instruction_ext_predicate(unsigned index, - unsigned negate, - unsigned swizzleX, - unsigned swizzleY, - unsigned swizzleZ, - unsigned swizzleW, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header) -{ - struct tgsi_instruction_ext_predicate instruction_ext_predicate; - - instruction_ext_predicate = tgsi_default_instruction_ext_predicate(); - instruction_ext_predicate.SwizzleX = swizzleX; - instruction_ext_predicate.SwizzleY = swizzleY; - instruction_ext_predicate.SwizzleZ = swizzleZ; - instruction_ext_predicate.SwizzleW = swizzleW; - instruction_ext_predicate.Negate = negate; - instruction_ext_predicate.SrcIndex = index; - - prev_token->Extended = 1; - instruction_grow(instruction, header); - - return instruction_ext_predicate; -} - struct tgsi_src_register tgsi_default_src_register( void ) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h index 669712eb8f..0fe5f229d3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -143,6 +143,7 @@ struct tgsi_instruction tgsi_build_instruction( unsigned opcode, unsigned saturate, + unsigned predicate, unsigned num_dst_regs, unsigned num_src_regs, struct tgsi_header *header ); @@ -157,6 +158,19 @@ tgsi_build_full_instruction( struct tgsi_header *header, unsigned maxsize ); +struct tgsi_instruction_predicate +tgsi_default_instruction_predicate(void); + +struct tgsi_instruction_predicate +tgsi_build_instruction_predicate(int index, + unsigned negate, + unsigned swizzleX, + unsigned swizzleY, + unsigned swizzleZ, + unsigned swizzleW, + struct tgsi_instruction *instruction, + struct tgsi_header *header); + struct tgsi_instruction_ext_label tgsi_default_instruction_ext_label( void ); @@ -187,24 +201,6 @@ tgsi_build_instruction_ext_texture( struct tgsi_instruction *instruction, struct tgsi_header *header ); -struct tgsi_instruction_ext_predicate -tgsi_default_instruction_ext_predicate(void); - -unsigned -tgsi_compare_instruction_ext_predicate(struct tgsi_instruction_ext_predicate a, - struct tgsi_instruction_ext_predicate b); - -struct tgsi_instruction_ext_predicate -tgsi_build_instruction_ext_predicate(unsigned index, - unsigned negate, - unsigned swizzleX, - unsigned swizzleY, - unsigned swizzleZ, - unsigned swizzleW, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header); - struct tgsi_src_register tgsi_default_src_register( void ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index b7569e74d4..e22a1643c8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -62,9 +62,6 @@ #define FAST_MATH 1 -/** for tgsi_full_instruction::Flags */ -#define SOA_DEPENDENCY_FLAG 0x1 - #define TILE_TOP_LEFT 0 #define TILE_TOP_RIGHT 1 #define TILE_BOTTOM_LEFT 2 @@ -332,20 +329,6 @@ tgsi_exec_machine_bind_shader( maxInstructions += 10; } - if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) { - uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode; - parse.FullToken.FullInstruction.Flags = SOA_DEPENDENCY_FLAG; - /* XXX we only handle SOA dependencies properly for MOV/SWZ - * at this time! - */ - if (opcode != TGSI_OPCODE_MOV) { - debug_printf("Warning: SOA dependency in instruction" - " is not handled:\n"); - tgsi_dump_instruction(&parse.FullToken.FullInstruction, - numInstructions); - } - } - memcpy(instructions + numInstructions, &parse.FullToken.FullInstruction, sizeof(instructions[0])); @@ -386,6 +369,7 @@ tgsi_exec_machine_create( void ) memset(mach, 0, sizeof(*mach)); mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; + mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; /* Setup constants. */ for( i = 0; i < 4; i++ ) { @@ -517,7 +501,7 @@ micro_ddy( dst->f[0] = dst->f[1] = dst->f[2] = - dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; + dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; } static void @@ -595,6 +579,24 @@ micro_exp2( dst->f[2] = util_fast_exp2( src->f[2] ); dst->f[3] = util_fast_exp2( src->f[3] ); #else + +#if DEBUG + /* Inf is okay for this instruction, so clamp it to silence assertions. */ + uint i; + union tgsi_exec_channel clamped; + + for (i = 0; i < 4; i++) { + if (src->f[i] > 127.99999f) { + clamped.f[i] = 127.99999f; + } else if (src->f[i] < -126.99999f) { + clamped.f[i] = -126.99999f; + } else { + clamped.f[i] = src->f[i]; + } + } + src = &clamped; +#endif + dst->f[0] = powf( 2.0f, src->f[0] ); dst->f[1] = powf( 2.0f, src->f[1] ); dst->f[2] = powf( 2.0f, src->f[2] ); @@ -1193,10 +1195,10 @@ fetch_src_file_channel( assert(index->i[1] < TGSI_EXEC_NUM_PREDS); assert(index->i[2] < TGSI_EXEC_NUM_PREDS); assert(index->i[3] < TGSI_EXEC_NUM_PREDS); - chan->u[0] = mach->Addrs[0].xyzw[swizzle].u[0]; - chan->u[1] = mach->Addrs[0].xyzw[swizzle].u[1]; - chan->u[2] = mach->Addrs[0].xyzw[swizzle].u[2]; - chan->u[3] = mach->Addrs[0].xyzw[swizzle].u[3]; + chan->u[0] = mach->Predicates[0].xyzw[swizzle].u[0]; + chan->u[1] = mach->Predicates[0].xyzw[swizzle].u[1]; + chan->u[2] = mach->Predicates[0].xyzw[swizzle].u[2]; + chan->u[3] = mach->Predicates[0].xyzw[swizzle].u[3]; break; case TGSI_FILE_OUTPUT: @@ -1478,10 +1480,17 @@ store_dest( dst = &mach->Addrs[index].xyzw[chan_index]; break; + case TGSI_FILE_LOOP: + assert(reg->DstRegister.Index == 0); + assert(mach->LoopCounterStackTop > 0); + assert(chan_index == CHAN_X); + dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index]; + break; + case TGSI_FILE_PREDICATE: index = reg->DstRegister.Index; assert(index < TGSI_EXEC_NUM_PREDS); - dst = &mach->Addrs[index].xyzw[chan_index]; + dst = &mach->Predicates[index].xyzw[chan_index]; break; default: @@ -1489,6 +1498,47 @@ store_dest( return; } + if (inst->Instruction.Predicate) { + uint swizzle; + union tgsi_exec_channel *pred; + + switch (chan_index) { + case CHAN_X: + swizzle = inst->InstructionPredicate.SwizzleX; + break; + case CHAN_Y: + swizzle = inst->InstructionPredicate.SwizzleY; + break; + case CHAN_Z: + swizzle = inst->InstructionPredicate.SwizzleZ; + break; + case CHAN_W: + swizzle = inst->InstructionPredicate.SwizzleW; + break; + default: + assert(0); + return; + } + + assert(inst->InstructionPredicate.Index == 0); + + pred = &mach->Predicates[inst->InstructionPredicate.Index].xyzw[swizzle]; + + if (inst->InstructionPredicate.Negate) { + for (i = 0; i < QUAD_SIZE; i++) { + if (pred->u[i]) { + execmask &= ~(1 << i); + } + } + } else { + for (i = 0; i < QUAD_SIZE; i++) { + if (!pred->u[i]) { + execmask &= ~(1 << i); + } + } + } + } + switch (inst->Instruction.Saturate) { case TGSI_SAT_NONE: for (i = 0; i < QUAD_SIZE; i++) @@ -1715,6 +1765,64 @@ exec_tex(struct tgsi_exec_machine *mach, } } +static void +exec_txd(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + const uint unit = inst->FullSrcRegisters[3].SrcRegister.Index; + union tgsi_exec_channel r[4]; + uint chan_index; + + /* + * XXX: This is fake TXD -- the derivatives are not taken into account, yet. + */ + + switch (inst->InstructionExtTexture.Texture) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: + + FETCH(&r[0], 0, CHAN_X); + + fetch_texel(mach->Samplers[unit], + &r[0], &ZeroVec, &ZeroVec, 0.0f, /* S, T, P, BIAS */ + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + break; + + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + fetch_texel(mach->Samplers[unit], + &r[0], &r[1], &r[2], 0.0f, /* inputs */ + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + fetch_texel(mach->Samplers[unit], + &r[0], &r[1], &r[2], 0.0f, + &r[0], &r[1], &r[2], &r[3]); + break; + + default: + assert(0); + } + + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&r[chan_index], 0, chan_index); + } +} + /** * Evaluate a constant-valued coefficient at the position of the @@ -1784,53 +1892,58 @@ typedef void (* eval_coef_func)( unsigned chan ); static void -exec_declaration( - struct tgsi_exec_machine *mach, - const struct tgsi_full_declaration *decl ) +exec_declaration(struct tgsi_exec_machine *mach, + const struct tgsi_full_declaration *decl) { - if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { - if( decl->Declaration.File == TGSI_FILE_INPUT ) { - unsigned first, last, mask; - eval_coef_func eval; + if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { + if (decl->Declaration.File == TGSI_FILE_INPUT) { + uint first, last, mask; first = decl->DeclarationRange.First; last = decl->DeclarationRange.Last; mask = decl->Declaration.UsageMask; - switch( decl->Declaration.Interpolate ) { - case TGSI_INTERPOLATE_CONSTANT: - eval = eval_constant_coef; - break; - - case TGSI_INTERPOLATE_LINEAR: - eval = eval_linear_coef; - break; - - case TGSI_INTERPOLATE_PERSPECTIVE: - eval = eval_perspective_coef; - break; + if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) { + assert(decl->Semantic.SemanticIndex == 0); + assert(first == last); + assert(mask = TGSI_WRITEMASK_XYZW); - default: - assert( 0 ); - return; - } + mach->Inputs[first] = mach->QuadPos; + } else if (decl->Semantic.SemanticName == TGSI_SEMANTIC_FACE) { + uint i; - if( mask == TGSI_WRITEMASK_XYZW ) { - unsigned i, j; + assert(decl->Semantic.SemanticIndex == 0); + assert(first == last); - for( i = first; i <= last; i++ ) { - for( j = 0; j < NUM_CHANNELS; j++ ) { - eval( mach, i, j ); - } + for (i = 0; i < QUAD_SIZE; i++) { + mach->Inputs[first].xyzw[0].f[i] = mach->Face; + } + } else { + eval_coef_func eval; + uint i, j; + + switch (decl->Declaration.Interpolate) { + case TGSI_INTERPOLATE_CONSTANT: + eval = eval_constant_coef; + break; + + case TGSI_INTERPOLATE_LINEAR: + eval = eval_linear_coef; + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + eval = eval_perspective_coef; + break; + + default: + assert(0); + return; } - } - else { - unsigned i, j; - for( j = 0; j < NUM_CHANNELS; j++ ) { - if( mask & (1 << j) ) { - for( i = first; i <= last; i++ ) { - eval( mach, i, j ); + for (j = 0; j < NUM_CHANNELS; j++) { + if (mask & (1 << j)) { + for (i = first; i <= last; i++) { + eval(mach, i, j); } } } @@ -1847,6 +1960,7 @@ exec_instruction( { uint chan_index; union tgsi_exec_channel r[10]; + union tgsi_exec_channel d[8]; (*pc)++; @@ -1855,42 +1969,27 @@ exec_instruction( case TGSI_OPCODE_FLR: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_flr( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_flr(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_MOV: - if (inst->Flags & SOA_DEPENDENCY_FLAG) { - /* Do all fetches into temp regs, then do all stores to avoid - * intermediate/accidental clobbering. This could be done all the - * time for MOV but for other instructions we'll need more temps... - */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[chan_index], 0, chan_index ); - } - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[chan_index], 0, chan_index ); - } + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&d[chan_index], 0, chan_index); } - else { - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - STORE( &r[0], 0, chan_index ); - } + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_LIT: - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { FETCH( &r[0], 0, CHAN_X ); if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, CHAN_Y ); + micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); } if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { @@ -1901,11 +2000,19 @@ exec_instruction( micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); micro_pow( &r[1], &r[1], &r[2] ); - micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, CHAN_Z ); + micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); } - } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { + STORE(&d[CHAN_Y], 0, CHAN_Y); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + STORE(&d[CHAN_Z], 0, CHAN_Z); + } + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); } @@ -1973,14 +2080,13 @@ exec_instruction( break; case TGSI_OPCODE_MUL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) - { + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); - - micro_mul( &r[0], &r[0], &r[1] ); - - STORE(&r[0], 0, chan_index); + micro_mul(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -1988,8 +2094,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_add( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_add(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2045,25 +2153,29 @@ exec_instruction( break; case TGSI_OPCODE_DST: - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { FETCH( &r[0], 0, CHAN_Y ); FETCH( &r[1], 1, CHAN_Y); - micro_mul( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, CHAN_Y ); + micro_mul(&d[CHAN_Y], &r[0], &r[1]); } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[0], 0, CHAN_Z ); - STORE( &r[0], 0, CHAN_Z ); + FETCH(&d[CHAN_Z], 0, CHAN_Z); } - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - FETCH( &r[0], 1, CHAN_W ); - STORE( &r[0], 0, CHAN_W ); + FETCH(&d[CHAN_W], 1, CHAN_W); + } + + if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { + STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { + STORE(&d[CHAN_Y], 0, CHAN_Y); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + STORE(&d[CHAN_Z], 0, CHAN_Z); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { + STORE(&d[CHAN_W], 0, CHAN_W); } break; @@ -2073,9 +2185,10 @@ exec_instruction( FETCH(&r[1], 1, chan_index); /* XXX use micro_min()?? */ - micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] ); - - STORE(&r[0], 0, chan_index); + micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2085,9 +2198,10 @@ exec_instruction( FETCH(&r[1], 1, chan_index); /* XXX use micro_max()?? */ - micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); - - STORE(&r[0], 0, chan_index ); + micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] ); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2096,8 +2210,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); + micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2106,8 +2222,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); + micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2118,8 +2236,10 @@ exec_instruction( FETCH( &r[1], 1, chan_index ); micro_mul( &r[0], &r[0], &r[1] ); FETCH( &r[1], 2, chan_index ); - micro_add( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_add(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2127,10 +2247,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); - - micro_sub( &r[0], &r[0], &r[1] ); - - STORE(&r[0], 0, chan_index); + micro_sub(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2139,12 +2259,12 @@ exec_instruction( FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); FETCH(&r[2], 2, chan_index); - micro_sub( &r[1], &r[1], &r[2] ); micro_mul( &r[0], &r[0], &r[1] ); - micro_add( &r[0], &r[0], &r[2] ); - - STORE(&r[0], 0, chan_index); + micro_add(&d[chan_index], &r[0], &r[2]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2153,8 +2273,10 @@ exec_instruction( FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); FETCH(&r[2], 2, chan_index); - micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); - STORE(&r[0], 0, chan_index); + micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2179,8 +2301,10 @@ exec_instruction( case TGSI_OPCODE_FRC: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_frc( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_frc(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2190,8 +2314,10 @@ exec_instruction( FETCH(&r[1], 1, chan_index); micro_max(&r[0], &r[0], &r[1]); FETCH(&r[1], 2, chan_index); - micro_min(&r[0], &r[0], &r[1]); - STORE(&r[0], 0, chan_index); + micro_min(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2199,19 +2325,17 @@ exec_instruction( case TGSI_OPCODE_ARR: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_rnd( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_rnd(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_EX2: FETCH(&r[0], 0, CHAN_X); -#if FAST_MATH micro_exp2( &r[0], &r[0] ); -#else - micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); -#endif FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { STORE( &r[0], 0, chan_index ); @@ -2247,11 +2371,7 @@ exec_instruction( FETCH(&r[4], 1, CHAN_Y); micro_mul( &r[5], &r[3], &r[4] ); - micro_sub( &r[2], &r[2], &r[5] ); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &r[2], 0, CHAN_X ); - } + micro_sub(&d[CHAN_X], &r[2], &r[5]); FETCH(&r[2], 1, CHAN_X); @@ -2260,20 +2380,21 @@ exec_instruction( FETCH(&r[5], 0, CHAN_X); micro_mul( &r[1], &r[1], &r[5] ); - micro_sub( &r[3], &r[3], &r[1] ); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - STORE( &r[3], 0, CHAN_Y ); - } + micro_sub(&d[CHAN_Y], &r[3], &r[1]); micro_mul( &r[5], &r[5], &r[4] ); micro_mul( &r[0], &r[0], &r[2] ); - micro_sub( &r[5], &r[5], &r[0] ); + micro_sub(&d[CHAN_Z], &r[5], &r[0]); - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - STORE( &r[5], 0, CHAN_Z ); + if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { + STORE(&d[CHAN_X], 0, CHAN_X); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { + STORE(&d[CHAN_Y], 0, CHAN_Y); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + STORE(&d[CHAN_Z], 0, CHAN_Z); } - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); } @@ -2282,11 +2403,11 @@ exec_instruction( case TGSI_OPCODE_ABS: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH(&r[0], 0, chan_index); - - micro_abs( &r[0], &r[0] ); - - STORE(&r[0], 0, chan_index); + micro_abs(&d[chan_index], &r[0]); } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); + } break; case TGSI_OPCODE_RCC: @@ -2338,16 +2459,20 @@ exec_instruction( case TGSI_OPCODE_DDX: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_ddx( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_ddx(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_DDY: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_ddy( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_ddy(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2428,10 +2553,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_eq( &r[0], &r[0], &r[1], - &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], - &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); + micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2445,8 +2570,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); - STORE( &r[0], 0, chan_index ); + micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2462,8 +2589,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); + micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2471,8 +2600,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); - STORE( &r[0], 0, chan_index ); + micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2502,7 +2633,7 @@ exec_instruction( /* src[1] = d[strq]/dx */ /* src[2] = d[strq]/dy */ /* src[3] = sampler unit */ - assert (0); + exec_txd(mach, inst); break; case TGSI_OPCODE_TXL: @@ -2546,13 +2677,8 @@ exec_instruction( micro_mul(&r[3], &r[3], &r[1]); micro_add(&r[2], &r[2], &r[3]); FETCH(&r[3], 0, CHAN_X); - micro_add(&r[2], &r[2], &r[3]); - if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { - STORE(&r[2], 0, CHAN_X); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { - STORE(&r[2], 0, CHAN_Z); - } + micro_add(&d[CHAN_X], &r[2], &r[3]); + } if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) || IS_CHANNEL_ENABLED(*inst, CHAN_W)) { @@ -2562,13 +2688,20 @@ exec_instruction( micro_mul(&r[3], &r[3], &r[1]); micro_add(&r[2], &r[2], &r[3]); FETCH(&r[3], 0, CHAN_Y); - micro_add(&r[2], &r[2], &r[3]); - if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { - STORE(&r[2], 0, CHAN_Y); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { - STORE(&r[2], 0, CHAN_W); - } + micro_add(&d[CHAN_Y], &r[2], &r[3]); + + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { + STORE(&d[CHAN_X], 0, CHAN_X); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { + STORE(&d[CHAN_Y], 0, CHAN_Y); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + STORE(&d[CHAN_X], 0, CHAN_Z); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { + STORE(&d[CHAN_Y], 0, CHAN_W); } break; @@ -2653,8 +2786,10 @@ exec_instruction( /* TGSI_OPCODE_SGN */ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_sgn( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_sgn(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2663,10 +2798,10 @@ exec_instruction( FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); FETCH(&r[2], 2, chan_index); - - micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] ); - - STORE(&r[0], 0, chan_index); + micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2841,32 +2976,40 @@ exec_instruction( case TGSI_OPCODE_CEIL: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_ceil( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_ceil(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_I2F: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_i2f( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_i2f(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_NOT: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_not( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_not(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_TRUNC: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_trunc( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_trunc(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2874,8 +3017,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_shl( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_shl(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2883,8 +3028,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_ishr( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_ishr(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2892,8 +3039,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_and( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_and(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2901,8 +3050,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_or( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_or(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2914,8 +3065,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_xor( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_xor(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2946,8 +3099,23 @@ exec_instruction( for (chan_index = 0; chan_index < 3; chan_index++) { FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index ); } - STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); ++mach->LoopCounterStackTop; + STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X); + /* update LoopMask */ + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { + mach->LoopMask &= ~0x1; + } + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { + mach->LoopMask &= ~0x2; + } + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { + mach->LoopMask &= ~0x4; + } + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { + mach->LoopMask &= ~0x8; + } + /* TODO: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); /* fall-through (for now) */ case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ @@ -2961,28 +3129,28 @@ exec_instruction( case TGSI_OPCODE_ENDFOR: assert(mach->LoopCounterStackTop > 0); - micro_sub( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], - &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); + micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], + &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); /* update LoopMask */ - if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[0] <= 0) { + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { mach->LoopMask &= ~0x1; } - if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[1] <= 0 ) { + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { mach->LoopMask &= ~0x2; } - if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[2] <= 0 ) { + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { mach->LoopMask &= ~0x4; } - if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[3] <= 0 ) { + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { mach->LoopMask &= ~0x8; } - micro_add( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); + micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); assert(mach->LoopLabelStackTop > 0); inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1]; - STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); + STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X); /* Restore ContMask, but don't pop */ assert(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; @@ -3049,7 +3217,28 @@ exec_instruction( break; case TGSI_OPCODE_ENDSUB: - /* no-op */ + /* + * XXX: This really should be a no-op. We should never reach this opcode. + */ + + assert(mach->CallStackTop > 0); + mach->CallStackTop--; + + mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; + mach->CondMask = mach->CondStack[mach->CondStackTop]; + + mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; + mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; + + mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; + mach->ContMask = mach->ContStack[mach->ContStackTop]; + + assert(mach->FuncStackTop > 0); + mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; + + *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; + + UPDATE_EXEC_MASK(mach); break; case TGSI_OPCODE_NOP: @@ -3060,6 +3249,8 @@ exec_instruction( } } +#define DEBUG_EXECUTION 0 + /** * Run TGSI interpreter. @@ -3103,10 +3294,67 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) exec_declaration( mach, mach->Declarations+i ); } - /* execute instructions, until pc is set to -1 */ - while (pc != -1) { - assert(pc < (int) mach->NumInstructions); - exec_instruction( mach, mach->Instructions + pc, &pc ); + { +#if DEBUG_EXECUTION + struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS]; + struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; + uint inst = 1; + + memcpy(temps, mach->Temps, sizeof(temps)); + memcpy(outputs, mach->Outputs, sizeof(outputs)); +#endif + + /* execute instructions, until pc is set to -1 */ + while (pc != -1) { + +#if DEBUG_EXECUTION + uint i; + + tgsi_dump_instruction(&mach->Instructions[pc], inst++); +#endif + + assert(pc < (int) mach->NumInstructions); + exec_instruction(mach, mach->Instructions + pc, &pc); + +#if DEBUG_EXECUTION + for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) { + if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) { + uint j; + + memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i])); + debug_printf("TEMP[%2u] = ", i); + for (j = 0; j < 4; j++) { + if (j > 0) { + debug_printf(" "); + } + debug_printf("(%6f, %6f, %6f, %6f)\n", + temps[i].xyzw[0].f[j], + temps[i].xyzw[1].f[j], + temps[i].xyzw[2].f[j], + temps[i].xyzw[3].f[j]); + } + } + } + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { + uint j; + + memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); + debug_printf("OUT[%2u] = ", i); + for (j = 0; j < 4; j++) { + if (j > 0) { + debug_printf(" "); + } + debug_printf("{%6f, %6f, %6f, %6f}\n", + outputs[i].xyzw[0].f[j], + outputs[i].xyzw[1].f[j], + outputs[i].xyzw[2].f[j], + outputs[i].xyzw[3].f[j]); + } + } + } +#endif + } } #if 0 @@ -3120,5 +3368,10 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) } #endif + assert(mach->CondStackTop == 0); + assert(mach->LoopStackTop == 0); + assert(mach->ContStackTop == 0); + assert(mach->CallStackTop == 0); + return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 471f591dd6..fd94c1bc44 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -218,6 +218,7 @@ struct tgsi_exec_machine struct tgsi_exec_vector Outputs[PIPE_MAX_ATTRIBS]; struct tgsi_exec_vector *Addrs; + struct tgsi_exec_vector *Predicates; struct tgsi_sampler **Samplers; @@ -232,6 +233,7 @@ struct tgsi_exec_machine /* FRAGMENT processor only. */ const struct tgsi_interp_coef *InterpCoefs; struct tgsi_exec_vector QuadPos; + float Face; /**< +1 if front facing, -1 if back facing */ /* Conditional execution masks */ uint CondMask; /**< For IF/ELSE/ENDIF */ @@ -252,7 +254,7 @@ struct tgsi_exec_machine uint LoopLabelStack[TGSI_EXEC_MAX_LOOP_NESTING]; int LoopLabelStackTop; - /** Loop counter stack (x = count, y = current, z = step) */ + /** Loop counter stack (x = index, y = counter, z = step) */ struct tgsi_exec_vector LoopCounterStack[TGSI_EXEC_MAX_LOOP_NESTING]; int LoopCounterStackTop; diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index 83f9df1183..9ca2993452 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -175,6 +175,10 @@ tgsi_parse_token( copy_token(&inst->Instruction, &token); extended = inst->Instruction.Extended; + if (inst->Instruction.Predicate) { + next_token(ctx, &inst->InstructionPredicate); + } + while( extended ) { struct tgsi_src_register_ext token; @@ -189,10 +193,6 @@ tgsi_parse_token( copy_token(&inst->InstructionExtTexture, &token); break; - case TGSI_INSTRUCTION_EXT_TYPE_PREDICATE: - copy_token(&inst->InstructionExtPredicate, &token); - break; - default: assert( 0 ); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h index 76f1676d85..cb4772ade8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -80,9 +80,9 @@ struct tgsi_full_immediate struct tgsi_full_instruction { struct tgsi_instruction Instruction; + struct tgsi_instruction_predicate InstructionPredicate; struct tgsi_instruction_ext_label InstructionExtLabel; struct tgsi_instruction_ext_texture InstructionExtTexture; - struct tgsi_instruction_ext_predicate InstructionExtPredicate; struct tgsi_full_dst_register FullDstRegisters[TGSI_FULL_MAX_DST_REGISTERS]; struct tgsi_full_src_register FullSrcRegisters[TGSI_FULL_MAX_SRC_REGISTERS]; uint Flags; /**< user-defined usage */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 36e27ea52f..4d8145032b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -34,7 +34,7 @@ typedef uint reg_flag; #define BITS_IN_REG_FLAG (sizeof( reg_flag ) * 8) -#define MAX_REGISTERS 256 +#define MAX_REGISTERS 1024 #define MAX_REG_FLAGS ((MAX_REGISTERS + BITS_IN_REG_FLAG - 1) / BITS_IN_REG_FLAG) struct sanity_check_ctx diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 3f752e9352..bd963267cc 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -47,9 +47,9 @@ union tgsi_any_token { struct tgsi_immediate imm; union tgsi_immediate_data imm_data; struct tgsi_instruction insn; + struct tgsi_instruction_predicate insn_predicate; struct tgsi_instruction_ext_label insn_ext_label; struct tgsi_instruction_ext_texture insn_ext_texture; - struct tgsi_instruction_ext_predicate insn_ext_predicate; struct tgsi_src_register src; struct tgsi_src_register_ext_mod src_ext_mod; struct tgsi_dimension dim; @@ -72,6 +72,7 @@ struct ureg_tokens { #define UREG_MAX_IMMEDIATE 32 #define UREG_MAX_TEMP 256 #define UREG_MAX_ADDR 2 +#define UREG_MAX_LOOP 1 #define UREG_MAX_PRED 1 #define DOMAIN_DECL 0 @@ -117,6 +118,7 @@ struct ureg_program unsigned nr_addrs; unsigned nr_preds; + unsigned nr_loops; unsigned nr_instructions; struct ureg_tokens domain[2]; @@ -350,6 +352,7 @@ struct ureg_src ureg_DECL_constant(struct ureg_program *ureg, i = ureg->nr_constant_ranges++; ureg->constant_range[i].first = index; ureg->constant_range[i].last = index; + goto out; } /* Collapse all ranges down to one: @@ -417,6 +420,19 @@ struct ureg_dst ureg_DECL_address( struct ureg_program *ureg ) return ureg_dst_register( TGSI_FILE_ADDRESS, 0 ); } +/* Allocate a new loop register. + */ +struct ureg_dst +ureg_DECL_loop(struct ureg_program *ureg) +{ + if (ureg->nr_loops < UREG_MAX_LOOP) { + return ureg_dst_register(TGSI_FILE_LOOP, ureg->nr_loops++); + } + + assert(0); + return ureg_dst_register(TGSI_FILE_LOOP, 0); +} + /* Allocate a new predicate register. */ struct ureg_dst @@ -661,35 +677,27 @@ ureg_emit_insn(struct ureg_program *ureg, validate( opcode, num_dst, num_src ); out = get_tokens( ureg, DOMAIN_INSN, count ); - out[0].value = 0; - out[0].insn.Type = TGSI_TOKEN_TYPE_INSTRUCTION; - out[0].insn.NrTokens = 0; + out[0].insn = tgsi_default_instruction(); out[0].insn.Opcode = opcode; out[0].insn.Saturate = saturate; out[0].insn.NumDstRegs = num_dst; out[0].insn.NumSrcRegs = num_src; - out[0].insn.Padding = 0; result.insn_token = ureg->domain[DOMAIN_INSN].count - count; + result.extended_token = result.insn_token; if (predicate) { - out[0].insn.Extended = 1; - out[1].insn_ext_predicate = tgsi_default_instruction_ext_predicate(); - out[1].insn_ext_predicate.Negate = pred_negate; - out[1].insn_ext_predicate.SwizzleX = pred_swizzle_x; - out[1].insn_ext_predicate.SwizzleY = pred_swizzle_y; - out[1].insn_ext_predicate.SwizzleZ = pred_swizzle_z; - out[1].insn_ext_predicate.SwizzleW = pred_swizzle_w; - - result.extended_token = result.insn_token + 1; - } else { - out[0].insn.Extended = 0; - - result.extended_token = result.insn_token; + out[0].insn.Predicate = 1; + out[1].insn_predicate = tgsi_default_instruction_predicate(); + out[1].insn_predicate.Negate = pred_negate; + out[1].insn_predicate.SwizzleX = pred_swizzle_x; + out[1].insn_predicate.SwizzleY = pred_swizzle_y; + out[1].insn_predicate.SwizzleZ = pred_swizzle_z; + out[1].insn_predicate.SwizzleW = pred_swizzle_w; } ureg->nr_instructions++; - + return result; } @@ -1023,6 +1031,13 @@ static void emit_decls( struct ureg_program *ureg ) 0, ureg->nr_addrs ); } + if (ureg->nr_loops) { + emit_decl_range(ureg, + TGSI_FILE_LOOP, + 0, + ureg->nr_loops); + } + if (ureg->nr_preds) { emit_decl_range(ureg, TGSI_FILE_PREDICATE, diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index dae4291194..94cc70a208 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -158,6 +158,9 @@ struct ureg_dst ureg_DECL_address( struct ureg_program * ); struct ureg_dst +ureg_DECL_loop( struct ureg_program * ); + +struct ureg_dst ureg_DECL_predicate(struct ureg_program *); /* Supply an index to the sampler declaration as this is the hook to |