diff options
Diffstat (limited to 'src/gallium/auxiliary/tgsi/tgsi_exec.c')
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.c | 123 |
1 files changed, 105 insertions, 18 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index f98b66dc0b..94589cf79f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -133,7 +133,7 @@ tgsi_exec_machine_bind_shader( struct tgsi_exec_machine *mach, const struct tgsi_token *tokens, uint numSamplers, - struct tgsi_sampler *samplers) + struct tgsi_sampler **samplers) { uint k; struct tgsi_parse_context parse; @@ -202,7 +202,7 @@ tgsi_exec_machine_bind_shader( case TGSI_TOKEN_TYPE_IMMEDIATE: { - uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; + uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; assert( size % 4 == 0 ); assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES ); @@ -320,6 +320,7 @@ micro_add( dst->f[3] = src0->f[3] + src1->f[3]; } +#if 0 static void micro_iadd( union tgsi_exec_channel *dst, @@ -331,6 +332,7 @@ micro_iadd( dst->i[2] = src0->i[2] + src1->i[2]; dst->i[3] = src0->i[3] + src1->i[3]; } +#endif static void micro_and( @@ -408,6 +410,7 @@ micro_div( } } +#if 0 static void micro_udiv( union tgsi_exec_channel *dst, @@ -419,6 +422,7 @@ micro_udiv( dst->u[2] = src0->u[2] / src1->u[2]; dst->u[3] = src0->u[3] / src1->u[3]; } +#endif static void micro_eq( @@ -434,6 +438,7 @@ micro_eq( dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; } +#if 0 static void micro_ieq( union tgsi_exec_channel *dst, @@ -447,6 +452,7 @@ micro_ieq( dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; } +#endif static void micro_exp2( @@ -466,6 +472,7 @@ micro_exp2( #endif } +#if 0 static void micro_f2ut( union tgsi_exec_channel *dst, @@ -476,6 +483,7 @@ micro_f2ut( dst->u[2] = (uint) src->f[2]; dst->u[3] = (uint) src->f[3]; } +#endif static void micro_flr( @@ -570,6 +578,7 @@ micro_lt( dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; } +#if 0 static void micro_ilt( union tgsi_exec_channel *dst, @@ -583,7 +592,9 @@ micro_ilt( dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; } +#endif +#if 0 static void micro_ult( union tgsi_exec_channel *dst, @@ -597,6 +608,7 @@ micro_ult( dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; } +#endif static void micro_max( @@ -610,6 +622,7 @@ micro_max( dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; } +#if 0 static void micro_imax( union tgsi_exec_channel *dst, @@ -621,7 +634,9 @@ micro_imax( dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; } +#endif +#if 0 static void micro_umax( union tgsi_exec_channel *dst, @@ -633,6 +648,7 @@ micro_umax( dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; } +#endif static void micro_min( @@ -646,6 +662,7 @@ micro_min( dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; } +#if 0 static void micro_imin( union tgsi_exec_channel *dst, @@ -657,7 +674,9 @@ micro_imin( dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; } +#endif +#if 0 static void micro_umin( union tgsi_exec_channel *dst, @@ -669,7 +688,9 @@ micro_umin( dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; } +#endif +#if 0 static void micro_umod( union tgsi_exec_channel *dst, @@ -681,6 +702,7 @@ micro_umod( dst->u[2] = src0->u[2] % src1->u[2]; dst->u[3] = src0->u[3] % src1->u[3]; } +#endif static void micro_mul( @@ -694,6 +716,7 @@ micro_mul( dst->f[3] = src0->f[3] * src1->f[3]; } +#if 0 static void micro_imul( union tgsi_exec_channel *dst, @@ -705,7 +728,9 @@ micro_imul( dst->i[2] = src0->i[2] * src1->i[2]; dst->i[3] = src0->i[3] * src1->i[3]; } +#endif +#if 0 static void micro_imul64( union tgsi_exec_channel *dst0, @@ -722,7 +747,9 @@ micro_imul64( dst0->i[2] = 0; dst0->i[3] = 0; } +#endif +#if 0 static void micro_umul64( union tgsi_exec_channel *dst0, @@ -739,7 +766,10 @@ micro_umul64( dst0->u[2] = 0; dst0->u[3] = 0; } +#endif + +#if 0 static void micro_movc( union tgsi_exec_channel *dst, @@ -752,6 +782,7 @@ micro_movc( dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; } +#endif static void micro_neg( @@ -764,6 +795,7 @@ micro_neg( dst->f[3] = -src->f[3]; } +#if 0 static void micro_ineg( union tgsi_exec_channel *dst, @@ -774,6 +806,7 @@ micro_ineg( dst->i[2] = -src->i[2]; dst->i[3] = -src->i[3]; } +#endif static void micro_not( @@ -874,6 +907,7 @@ micro_trunc( dst->f[3] = (float) (int) src0->f[3]; } +#if 0 static void micro_ushr( union tgsi_exec_channel *dst, @@ -885,6 +919,7 @@ micro_ushr( dst->u[2] = src0->u[2] >> src1->u[2]; dst->u[3] = src0->u[3] >> src1->u[3]; } +#endif static void micro_sin( @@ -919,6 +954,7 @@ micro_sub( dst->f[3] = src0->f[3] - src1->f[3]; } +#if 0 static void micro_u2f( union tgsi_exec_channel *dst, @@ -929,6 +965,7 @@ micro_u2f( dst->f[2] = (float) src->u[2]; dst->f[3] = (float) src->u[3]; } +#endif static void micro_xor( @@ -1045,11 +1082,28 @@ fetch_source( union tgsi_exec_channel index; uint swizzle; + /* We start with a direct index into a register file. + * + * file[1], + * where: + * file = SrcRegister.File + * [1] = SrcRegister.Index + */ index.i[0] = index.i[1] = index.i[2] = index.i[3] = reg->SrcRegister.Index; + /* There is an extra source register that indirectly subscripts + * a register file. The direct index now becomes an offset + * that is being added to the indirect register. + * + * file[ind[2].x+1], + * where: + * ind = SrcRegisterInd.File + * [2] = SrcRegisterInd.Index + * .x = SrcRegisterInd.SwizzleX + */ if (reg->SrcRegister.Indirect) { union tgsi_exec_channel index2; union tgsi_exec_channel indir_index; @@ -1086,19 +1140,31 @@ fetch_source( } } - if( reg->SrcRegister.Dimension ) { - switch( reg->SrcRegister.File ) { + /* There is an extra source register that is a second + * subscript to a register file. Effectively it means that + * the register file is actually a 2D array of registers. + * + * file[1][3] == file[1*sizeof(file[1])+3], + * where: + * [3] = SrcRegisterDim.Index + */ + if (reg->SrcRegister.Dimension) { + /* The size of the first-order array depends on the register file type. + * We need to multiply the index to the first array to get an effective, + * "flat" index that points to the beginning of the second-order array. + */ + switch (reg->SrcRegister.File) { case TGSI_FILE_INPUT: - index.i[0] *= 17; - index.i[1] *= 17; - index.i[2] *= 17; - index.i[3] *= 17; + index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; + index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; + index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; + index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; break; case TGSI_FILE_CONSTANT: - index.i[0] *= 4096; - index.i[1] *= 4096; - index.i[2] *= 4096; - index.i[3] *= 4096; + index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; + index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; + index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; + index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; break; default: assert( 0 ); @@ -1109,6 +1175,17 @@ fetch_source( index.i[2] += reg->SrcRegisterDim.Index; index.i[3] += reg->SrcRegisterDim.Index; + /* Again, the second subscript index can be addressed indirectly + * identically to the first one. + * Nothing stops us from indirectly addressing the indirect register, + * but there is no need for that, so we won't exercise it. + * + * file[1][ind[4].y+3], + * where: + * ind = SrcRegisterDimInd.File + * [4] = SrcRegisterDimInd.Index + * .y = SrcRegisterDimInd.SwizzleX + */ if (reg->SrcRegisterDim.Indirect) { union tgsi_exec_channel index2; union tgsi_exec_channel indir_index; @@ -1141,6 +1218,11 @@ fetch_source( index.i[i] = 0; } } + + /* If by any chance there was a need for a 3D array of register + * files, we would have to check whether SrcRegisterDim is followed + * by a dimension register and continue the saga. + */ } swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); @@ -1490,7 +1572,7 @@ exec_kilp(struct tgsi_exec_machine *mach, /* - * Fetch a texel using STR texture coordinates. + * Fetch a four texture samples using STR texture coordinates. */ static void fetch_texel( struct tgsi_sampler *sampler, @@ -1524,7 +1606,7 @@ exec_tex(struct tgsi_exec_machine *mach, boolean projected) { const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; - union tgsi_exec_channel r[8]; + union tgsi_exec_channel r[4]; uint chan_index; float lodBias; @@ -1532,6 +1614,7 @@ exec_tex(struct tgsi_exec_machine *mach, switch (inst->InstructionExtTexture.Texture) { case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: FETCH(&r[0], 0, CHAN_X); @@ -1547,13 +1630,15 @@ exec_tex(struct tgsi_exec_machine *mach, else lodBias = 0.0; - fetch_texel(&mach->Samplers[unit], + fetch_texel(mach->Samplers[unit], &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; case TGSI_TEXTURE_2D: case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: FETCH(&r[0], 0, CHAN_X); FETCH(&r[1], 0, CHAN_Y); @@ -1573,7 +1658,7 @@ exec_tex(struct tgsi_exec_machine *mach, else lodBias = 0.0; - fetch_texel(&mach->Samplers[unit], + fetch_texel(mach->Samplers[unit], &r[0], &r[1], &r[2], lodBias, /* inputs */ &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; @@ -1599,7 +1684,7 @@ exec_tex(struct tgsi_exec_machine *mach, else lodBias = 0.0; - fetch_texel(&mach->Samplers[unit], + fetch_texel(mach->Samplers[unit], &r[0], &r[1], &r[2], lodBias, &r[0], &r[1], &r[2], &r[3]); break; @@ -1709,6 +1794,7 @@ exec_declaration( break; default: + eval = NULL; assert( 0 ); } @@ -1751,7 +1837,7 @@ exec_instruction( case TGSI_OPCODE_ARL: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_trunc( &r[0], &r[0] ); + micro_flr( &r[0], &r[0] ); STORE( &r[0], 0, chan_index ); } break; @@ -1806,6 +1892,7 @@ exec_instruction( case TGSI_OPCODE_RSQ: /* TGSI_OPCODE_RECIPSQRT */ FETCH( &r[0], 0, CHAN_X ); + micro_abs( &r[0], &r[0] ); micro_sqrt( &r[0], &r[0] ); micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { |