summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/tgsi/tgsi_exec.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/tgsi/tgsi_exec.c')
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c141
1 files changed, 100 insertions, 41 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 41dffc3dba..96567772b7 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -467,17 +467,6 @@ micro_exp2(
}
static void
-micro_f2it(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->i[0] = (int) src->f[0];
- dst->i[1] = (int) src->f[1];
- dst->i[2] = (int) src->f[2];
- dst->i[3] = (int) src->f[3];
-}
-
-static void
micro_f2ut(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src )
@@ -958,14 +947,22 @@ fetch_src_file_channel(
switch( file ) {
case TGSI_FILE_CONSTANT:
assert(mach->Consts);
- assert(index->i[0] >= 0);
- assert(index->i[1] >= 0);
- assert(index->i[2] >= 0);
- assert(index->i[3] >= 0);
- chan->f[0] = mach->Consts[index->i[0]][swizzle];
- chan->f[1] = mach->Consts[index->i[1]][swizzle];
- chan->f[2] = mach->Consts[index->i[2]][swizzle];
- chan->f[3] = mach->Consts[index->i[3]][swizzle];
+ if (index->i[0] < 0)
+ chan->f[0] = 0.0f;
+ else
+ chan->f[0] = mach->Consts[index->i[0]][swizzle];
+ if (index->i[1] < 0)
+ chan->f[1] = 0.0f;
+ else
+ chan->f[1] = mach->Consts[index->i[1]][swizzle];
+ if (index->i[2] < 0)
+ chan->f[2] = 0.0f;
+ else
+ chan->f[2] = mach->Consts[index->i[2]][swizzle];
+ if (index->i[3] < 0)
+ chan->f[3] = 0.0f;
+ else
+ chan->f[3] = mach->Consts[index->i[3]][swizzle];
break;
case TGSI_FILE_INPUT:
@@ -1037,11 +1034,28 @@ fetch_source(
union tgsi_exec_channel index;
uint swizzle;
+ /* We start with a direct index into a register file.
+ *
+ * file[1],
+ * where:
+ * file = SrcRegister.File
+ * [1] = SrcRegister.Index
+ */
index.i[0] =
index.i[1] =
index.i[2] =
index.i[3] = reg->SrcRegister.Index;
+ /* There is an extra source register that indirectly subscripts
+ * a register file. The direct index now becomes an offset
+ * that is being added to the indirect register.
+ *
+ * file[ind[2].x+1],
+ * where:
+ * ind = SrcRegisterInd.File
+ * [2] = SrcRegisterInd.Index
+ * .x = SrcRegisterInd.SwizzleX
+ */
if (reg->SrcRegister.Indirect) {
union tgsi_exec_channel index2;
union tgsi_exec_channel indir_index;
@@ -1064,10 +1078,10 @@ fetch_source(
&indir_index );
/* add value of address register to the offset */
- index.i[0] += indir_index.i[0];
- index.i[1] += indir_index.i[1];
- index.i[2] += indir_index.i[2];
- index.i[3] += indir_index.i[3];
+ index.i[0] += (int) indir_index.f[0];
+ index.i[1] += (int) indir_index.f[1];
+ index.i[2] += (int) indir_index.f[2];
+ index.i[3] += (int) indir_index.f[3];
/* for disabled execution channels, zero-out the index to
* avoid using a potential garbage value.
@@ -1078,19 +1092,31 @@ fetch_source(
}
}
- if( reg->SrcRegister.Dimension ) {
- switch( reg->SrcRegister.File ) {
+ /* There is an extra source register that is a second
+ * subscript to a register file. Effectively it means that
+ * the register file is actually a 2D array of registers.
+ *
+ * file[1][3] == file[1*sizeof(file[1])+3],
+ * where:
+ * [3] = SrcRegisterDim.Index
+ */
+ if (reg->SrcRegister.Dimension) {
+ /* The size of the first-order array depends on the register file type.
+ * We need to multiply the index to the first array to get an effective,
+ * "flat" index that points to the beginning of the second-order array.
+ */
+ switch (reg->SrcRegister.File) {
case TGSI_FILE_INPUT:
- index.i[0] *= 17;
- index.i[1] *= 17;
- index.i[2] *= 17;
- index.i[3] *= 17;
+ index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
+ index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
+ index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
+ index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
break;
case TGSI_FILE_CONSTANT:
- index.i[0] *= 4096;
- index.i[1] *= 4096;
- index.i[2] *= 4096;
- index.i[3] *= 4096;
+ index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
+ index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
+ index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
+ index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
break;
default:
assert( 0 );
@@ -1101,6 +1127,17 @@ fetch_source(
index.i[2] += reg->SrcRegisterDim.Index;
index.i[3] += reg->SrcRegisterDim.Index;
+ /* Again, the second subscript index can be addressed indirectly
+ * identically to the first one.
+ * Nothing stops us from indirectly addressing the indirect register,
+ * but there is no need for that, so we won't exercise it.
+ *
+ * file[1][ind[4].y+3],
+ * where:
+ * ind = SrcRegisterDimInd.File
+ * [4] = SrcRegisterDimInd.Index
+ * .y = SrcRegisterDimInd.SwizzleX
+ */
if (reg->SrcRegisterDim.Indirect) {
union tgsi_exec_channel index2;
union tgsi_exec_channel indir_index;
@@ -1120,10 +1157,10 @@ fetch_source(
&index2,
&indir_index );
- index.i[0] += indir_index.i[0];
- index.i[1] += indir_index.i[1];
- index.i[2] += indir_index.i[2];
- index.i[3] += indir_index.i[3];
+ index.i[0] += (int) indir_index.f[0];
+ index.i[1] += (int) indir_index.f[1];
+ index.i[2] += (int) indir_index.f[2];
+ index.i[3] += (int) indir_index.f[3];
/* for disabled execution channels, zero-out the index to
* avoid using a potential garbage value.
@@ -1133,6 +1170,11 @@ fetch_source(
index.i[i] = 0;
}
}
+
+ /* If by any chance there was a need for a 3D array of register
+ * files, we would have to check whether SrcRegisterDim is followed
+ * by a dimension register and continue the saga.
+ */
}
swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
@@ -1701,6 +1743,7 @@ exec_declaration(
break;
default:
+ eval = NULL;
assert( 0 );
}
@@ -1743,7 +1786,7 @@ exec_instruction(
case TGSI_OPCODE_ARL:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_f2it( &r[0], &r[0] );
+ micro_trunc( &r[0], &r[0] );
STORE( &r[0], 0, chan_index );
}
break;
@@ -2033,7 +2076,21 @@ exec_instruction(
case TGSI_OPCODE_DOT2ADD:
/* TGSI_OPCODE_DP2A */
- assert (0);
+ FETCH( &r[0], 0, CHAN_X );
+ FETCH( &r[1], 1, CHAN_X );
+ micro_mul( &r[0], &r[0], &r[1] );
+
+ FETCH( &r[1], 0, CHAN_Y );
+ FETCH( &r[2], 1, CHAN_Y );
+ micro_mul( &r[1], &r[1], &r[2] );
+ micro_add( &r[0], &r[0], &r[1] );
+
+ FETCH( &r[2], 2, CHAN_X );
+ micro_add( &r[0], &r[0], &r[2] );
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
break;
case TGSI_OPCODE_INDEX:
@@ -2478,7 +2535,8 @@ exec_instruction(
micro_mul( &dot, &r[2], &r[2] );
micro_add( &tmp, &tmp, &dot );
- /* tmp = 1 / tmp */
+ /* tmp = 1 / sqrt(tmp) */
+ micro_sqrt( &tmp, &tmp );
micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
/* note: w channel is undefined */
@@ -2511,7 +2569,8 @@ exec_instruction(
micro_mul( &dot, &r[3], &r[3] );
micro_add( &tmp, &tmp, &dot );
- /* tmp = 1 / tmp */
+ /* tmp = 1 / sqrt(tmp) */
+ micro_sqrt( &tmp, &tmp );
micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {