summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/tgsi/tgsi_exec.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/tgsi/tgsi_exec.c')
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c123
1 files changed, 105 insertions, 18 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index f98b66dc0b..94589cf79f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -133,7 +133,7 @@ tgsi_exec_machine_bind_shader(
struct tgsi_exec_machine *mach,
const struct tgsi_token *tokens,
uint numSamplers,
- struct tgsi_sampler *samplers)
+ struct tgsi_sampler **samplers)
{
uint k;
struct tgsi_parse_context parse;
@@ -202,7 +202,7 @@ tgsi_exec_machine_bind_shader(
case TGSI_TOKEN_TYPE_IMMEDIATE:
{
- uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
+ uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
assert( size % 4 == 0 );
assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
@@ -320,6 +320,7 @@ micro_add(
dst->f[3] = src0->f[3] + src1->f[3];
}
+#if 0
static void
micro_iadd(
union tgsi_exec_channel *dst,
@@ -331,6 +332,7 @@ micro_iadd(
dst->i[2] = src0->i[2] + src1->i[2];
dst->i[3] = src0->i[3] + src1->i[3];
}
+#endif
static void
micro_and(
@@ -408,6 +410,7 @@ micro_div(
}
}
+#if 0
static void
micro_udiv(
union tgsi_exec_channel *dst,
@@ -419,6 +422,7 @@ micro_udiv(
dst->u[2] = src0->u[2] / src1->u[2];
dst->u[3] = src0->u[3] / src1->u[3];
}
+#endif
static void
micro_eq(
@@ -434,6 +438,7 @@ micro_eq(
dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
}
+#if 0
static void
micro_ieq(
union tgsi_exec_channel *dst,
@@ -447,6 +452,7 @@ micro_ieq(
dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
}
+#endif
static void
micro_exp2(
@@ -466,6 +472,7 @@ micro_exp2(
#endif
}
+#if 0
static void
micro_f2ut(
union tgsi_exec_channel *dst,
@@ -476,6 +483,7 @@ micro_f2ut(
dst->u[2] = (uint) src->f[2];
dst->u[3] = (uint) src->f[3];
}
+#endif
static void
micro_flr(
@@ -570,6 +578,7 @@ micro_lt(
dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
}
+#if 0
static void
micro_ilt(
union tgsi_exec_channel *dst,
@@ -583,7 +592,9 @@ micro_ilt(
dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
}
+#endif
+#if 0
static void
micro_ult(
union tgsi_exec_channel *dst,
@@ -597,6 +608,7 @@ micro_ult(
dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
}
+#endif
static void
micro_max(
@@ -610,6 +622,7 @@ micro_max(
dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
}
+#if 0
static void
micro_imax(
union tgsi_exec_channel *dst,
@@ -621,7 +634,9 @@ micro_imax(
dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
}
+#endif
+#if 0
static void
micro_umax(
union tgsi_exec_channel *dst,
@@ -633,6 +648,7 @@ micro_umax(
dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
}
+#endif
static void
micro_min(
@@ -646,6 +662,7 @@ micro_min(
dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
}
+#if 0
static void
micro_imin(
union tgsi_exec_channel *dst,
@@ -657,7 +674,9 @@ micro_imin(
dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
}
+#endif
+#if 0
static void
micro_umin(
union tgsi_exec_channel *dst,
@@ -669,7 +688,9 @@ micro_umin(
dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
}
+#endif
+#if 0
static void
micro_umod(
union tgsi_exec_channel *dst,
@@ -681,6 +702,7 @@ micro_umod(
dst->u[2] = src0->u[2] % src1->u[2];
dst->u[3] = src0->u[3] % src1->u[3];
}
+#endif
static void
micro_mul(
@@ -694,6 +716,7 @@ micro_mul(
dst->f[3] = src0->f[3] * src1->f[3];
}
+#if 0
static void
micro_imul(
union tgsi_exec_channel *dst,
@@ -705,7 +728,9 @@ micro_imul(
dst->i[2] = src0->i[2] * src1->i[2];
dst->i[3] = src0->i[3] * src1->i[3];
}
+#endif
+#if 0
static void
micro_imul64(
union tgsi_exec_channel *dst0,
@@ -722,7 +747,9 @@ micro_imul64(
dst0->i[2] = 0;
dst0->i[3] = 0;
}
+#endif
+#if 0
static void
micro_umul64(
union tgsi_exec_channel *dst0,
@@ -739,7 +766,10 @@ micro_umul64(
dst0->u[2] = 0;
dst0->u[3] = 0;
}
+#endif
+
+#if 0
static void
micro_movc(
union tgsi_exec_channel *dst,
@@ -752,6 +782,7 @@ micro_movc(
dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
}
+#endif
static void
micro_neg(
@@ -764,6 +795,7 @@ micro_neg(
dst->f[3] = -src->f[3];
}
+#if 0
static void
micro_ineg(
union tgsi_exec_channel *dst,
@@ -774,6 +806,7 @@ micro_ineg(
dst->i[2] = -src->i[2];
dst->i[3] = -src->i[3];
}
+#endif
static void
micro_not(
@@ -874,6 +907,7 @@ micro_trunc(
dst->f[3] = (float) (int) src0->f[3];
}
+#if 0
static void
micro_ushr(
union tgsi_exec_channel *dst,
@@ -885,6 +919,7 @@ micro_ushr(
dst->u[2] = src0->u[2] >> src1->u[2];
dst->u[3] = src0->u[3] >> src1->u[3];
}
+#endif
static void
micro_sin(
@@ -919,6 +954,7 @@ micro_sub(
dst->f[3] = src0->f[3] - src1->f[3];
}
+#if 0
static void
micro_u2f(
union tgsi_exec_channel *dst,
@@ -929,6 +965,7 @@ micro_u2f(
dst->f[2] = (float) src->u[2];
dst->f[3] = (float) src->u[3];
}
+#endif
static void
micro_xor(
@@ -1045,11 +1082,28 @@ fetch_source(
union tgsi_exec_channel index;
uint swizzle;
+ /* We start with a direct index into a register file.
+ *
+ * file[1],
+ * where:
+ * file = SrcRegister.File
+ * [1] = SrcRegister.Index
+ */
index.i[0] =
index.i[1] =
index.i[2] =
index.i[3] = reg->SrcRegister.Index;
+ /* There is an extra source register that indirectly subscripts
+ * a register file. The direct index now becomes an offset
+ * that is being added to the indirect register.
+ *
+ * file[ind[2].x+1],
+ * where:
+ * ind = SrcRegisterInd.File
+ * [2] = SrcRegisterInd.Index
+ * .x = SrcRegisterInd.SwizzleX
+ */
if (reg->SrcRegister.Indirect) {
union tgsi_exec_channel index2;
union tgsi_exec_channel indir_index;
@@ -1086,19 +1140,31 @@ fetch_source(
}
}
- if( reg->SrcRegister.Dimension ) {
- switch( reg->SrcRegister.File ) {
+ /* There is an extra source register that is a second
+ * subscript to a register file. Effectively it means that
+ * the register file is actually a 2D array of registers.
+ *
+ * file[1][3] == file[1*sizeof(file[1])+3],
+ * where:
+ * [3] = SrcRegisterDim.Index
+ */
+ if (reg->SrcRegister.Dimension) {
+ /* The size of the first-order array depends on the register file type.
+ * We need to multiply the index to the first array to get an effective,
+ * "flat" index that points to the beginning of the second-order array.
+ */
+ switch (reg->SrcRegister.File) {
case TGSI_FILE_INPUT:
- index.i[0] *= 17;
- index.i[1] *= 17;
- index.i[2] *= 17;
- index.i[3] *= 17;
+ index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
+ index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
+ index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
+ index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
break;
case TGSI_FILE_CONSTANT:
- index.i[0] *= 4096;
- index.i[1] *= 4096;
- index.i[2] *= 4096;
- index.i[3] *= 4096;
+ index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
+ index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
+ index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
+ index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
break;
default:
assert( 0 );
@@ -1109,6 +1175,17 @@ fetch_source(
index.i[2] += reg->SrcRegisterDim.Index;
index.i[3] += reg->SrcRegisterDim.Index;
+ /* Again, the second subscript index can be addressed indirectly
+ * identically to the first one.
+ * Nothing stops us from indirectly addressing the indirect register,
+ * but there is no need for that, so we won't exercise it.
+ *
+ * file[1][ind[4].y+3],
+ * where:
+ * ind = SrcRegisterDimInd.File
+ * [4] = SrcRegisterDimInd.Index
+ * .y = SrcRegisterDimInd.SwizzleX
+ */
if (reg->SrcRegisterDim.Indirect) {
union tgsi_exec_channel index2;
union tgsi_exec_channel indir_index;
@@ -1141,6 +1218,11 @@ fetch_source(
index.i[i] = 0;
}
}
+
+ /* If by any chance there was a need for a 3D array of register
+ * files, we would have to check whether SrcRegisterDim is followed
+ * by a dimension register and continue the saga.
+ */
}
swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
@@ -1490,7 +1572,7 @@ exec_kilp(struct tgsi_exec_machine *mach,
/*
- * Fetch a texel using STR texture coordinates.
+ * Fetch a four texture samples using STR texture coordinates.
*/
static void
fetch_texel( struct tgsi_sampler *sampler,
@@ -1524,7 +1606,7 @@ exec_tex(struct tgsi_exec_machine *mach,
boolean projected)
{
const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
- union tgsi_exec_channel r[8];
+ union tgsi_exec_channel r[4];
uint chan_index;
float lodBias;
@@ -1532,6 +1614,7 @@ exec_tex(struct tgsi_exec_machine *mach,
switch (inst->InstructionExtTexture.Texture) {
case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_SHADOW1D:
FETCH(&r[0], 0, CHAN_X);
@@ -1547,13 +1630,15 @@ exec_tex(struct tgsi_exec_machine *mach,
else
lodBias = 0.0;
- fetch_texel(&mach->Samplers[unit],
+ fetch_texel(mach->Samplers[unit],
&r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
break;
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
FETCH(&r[0], 0, CHAN_X);
FETCH(&r[1], 0, CHAN_Y);
@@ -1573,7 +1658,7 @@ exec_tex(struct tgsi_exec_machine *mach,
else
lodBias = 0.0;
- fetch_texel(&mach->Samplers[unit],
+ fetch_texel(mach->Samplers[unit],
&r[0], &r[1], &r[2], lodBias, /* inputs */
&r[0], &r[1], &r[2], &r[3]); /* outputs */
break;
@@ -1599,7 +1684,7 @@ exec_tex(struct tgsi_exec_machine *mach,
else
lodBias = 0.0;
- fetch_texel(&mach->Samplers[unit],
+ fetch_texel(mach->Samplers[unit],
&r[0], &r[1], &r[2], lodBias,
&r[0], &r[1], &r[2], &r[3]);
break;
@@ -1709,6 +1794,7 @@ exec_declaration(
break;
default:
+ eval = NULL;
assert( 0 );
}
@@ -1751,7 +1837,7 @@ exec_instruction(
case TGSI_OPCODE_ARL:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_trunc( &r[0], &r[0] );
+ micro_flr( &r[0], &r[0] );
STORE( &r[0], 0, chan_index );
}
break;
@@ -1806,6 +1892,7 @@ exec_instruction(
case TGSI_OPCODE_RSQ:
/* TGSI_OPCODE_RECIPSQRT */
FETCH( &r[0], 0, CHAN_X );
+ micro_abs( &r[0], &r[0] );
micro_sqrt( &r[0], &r[0] );
micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {