summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/tgsi
diff options
context:
space:
mode:
authorAlex Deucher <alexdeucher@gmail.com>2009-06-12 12:09:34 -0400
committerAlex Deucher <alexdeucher@gmail.com>2009-06-12 12:09:34 -0400
commit1036ef2bf468611d37b5df06fc4424f2002e3837 (patch)
treef0859a6d903c2570a0a00c918da88139f8f7d065 /src/gallium/auxiliary/tgsi
parent917f8bc1a85e61311cef6478127b387df70fba14 (diff)
parent1cd0afffc9edbcac690f8ab436aecfced26b0aba (diff)
Merge master and fix conflicts
Diffstat (limited to 'src/gallium/auxiliary/tgsi')
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c62
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c133
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_transform.c27
4 files changed, 197 insertions, 27 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index e8bd7cda3b..aba7a3f937 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -53,6 +53,7 @@
#include "pipe/p_compiler.h"
#include "pipe/p_state.h"
#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi_exec.h"
@@ -169,6 +170,56 @@ print_temp(const struct tgsi_exec_machine *mach, uint index)
#endif
+/**
+ * Check if there's a potential src/dst register data dependency when
+ * using SOA execution.
+ * Example:
+ * MOV T, T.yxwz;
+ * This would expand into:
+ * MOV t0, t1;
+ * MOV t1, t0;
+ * MOV t2, t3;
+ * MOV t3, t2;
+ * The second instruction will have the wrong value for t0 if executed as-is.
+ */
+static boolean
+tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
+{
+ uint i, chan;
+
+ uint writemask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ if (writemask == TGSI_WRITEMASK_X ||
+ writemask == TGSI_WRITEMASK_Y ||
+ writemask == TGSI_WRITEMASK_Z ||
+ writemask == TGSI_WRITEMASK_W ||
+ writemask == TGSI_WRITEMASK_NONE) {
+ /* no chance of data dependency */
+ return FALSE;
+ }
+
+ /* loop over src regs */
+ for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ if ((inst->FullSrcRegisters[i].SrcRegister.File ==
+ inst->FullDstRegisters[0].DstRegister.File) &&
+ (inst->FullSrcRegisters[i].SrcRegister.Index ==
+ inst->FullDstRegisters[0].DstRegister.Index)) {
+ /* loop over dest channels */
+ uint channelsWritten = 0x0;
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan) {
+ /* check if we're reading a channel that's been written */
+ uint swizzle = tgsi_util_get_full_src_register_extswizzle(&inst->FullSrcRegisters[i], chan);
+ if (swizzle <= TGSI_SWIZZLE_W &&
+ (channelsWritten & (1 << swizzle))) {
+ return TRUE;
+ }
+
+ channelsWritten |= (1 << chan);
+ }
+ }
+ }
+ return FALSE;
+}
+
/**
* Initialize machine state by expanding tokens to full instructions,
@@ -280,6 +331,17 @@ tgsi_exec_machine_bind_shader(
memcpy(instructions + numInstructions,
&parse.FullToken.FullInstruction,
sizeof(instructions[0]));
+
+#if 0
+ if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) {
+ debug_printf("SOA dependency in instruction:\n");
+ tgsi_dump_instruction(&parse.FullToken.FullInstruction,
+ numInstructions);
+ }
+#else
+ (void) tgsi_check_soa_dependencies;
+#endif
+
numInstructions++;
break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 0b4b2a6fb6..da22baad3e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -34,7 +34,7 @@
extern "C" {
#endif
-#define MAX_LABELS 1024
+#define MAX_LABELS (4 * 1024) /**< basically, max instructions */
#define NUM_CHANNELS 4 /* R,G,B,A */
#define QUAD_SIZE 4 /* 4 pixel/quad */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index d70bcd03c5..ba2bfdef06 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -1466,6 +1466,31 @@ emit_cmp(
}
}
+
+/**
+ * Check if inst src/dest regs use indirect addressing into temporary
+ * register file.
+ */
+static boolean
+indirect_temp_reference(const struct tgsi_full_instruction *inst)
+{
+ uint i;
+ for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i];
+ if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
+ reg->SrcRegister.Indirect)
+ return TRUE;
+ }
+ for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
+ const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i];
+ if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
+ reg->DstRegister.Indirect)
+ return TRUE;
+ }
+ return FALSE;
+}
+
+
static int
emit_instruction(
struct x86_function *func,
@@ -1473,10 +1498,15 @@ emit_instruction(
{
unsigned chan_index;
+ /* we can't handle indirect addressing into temp register file yet */
+ if (indirect_temp_reference(inst))
+ return FALSE;
+
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ARL:
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
+ emit_flr(func, 0, 0);
emit_f2it( func, 0 );
STORE( func, *inst, 0, 0, chan_index );
}
@@ -1553,7 +1583,7 @@ emit_instruction(
func,
make_xmm( 2 ),
make_xmm( 0 ),
- cc_LessThanEqual );
+ cc_LessThan );
sse_andps(
func,
make_xmm( 2 ),
@@ -2177,32 +2207,83 @@ emit_instruction(
/* 3 or 4-component normalization */
{
uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
- /* note: cannot use xmm regs 2/3 here (see emit_rsqrt() above) */
- FETCH( func, *inst, 4, 0, CHAN_X ); /* xmm4 = src[0].x */
- FETCH( func, *inst, 5, 0, CHAN_Y ); /* xmm5 = src[0].y */
- FETCH( func, *inst, 6, 0, CHAN_Z ); /* xmm6 = src[0].z */
- if (dims == 4) {
- FETCH( func, *inst, 7, 0, CHAN_W ); /* xmm7 = src[0].w */
- }
- emit_MOV( func, 0, 4 ); /* xmm0 = xmm3 */
- emit_mul( func, 0, 4 ); /* xmm0 *= xmm3 */
- emit_MOV( func, 1, 5 ); /* xmm1 = xmm4 */
- emit_mul( func, 1, 5 ); /* xmm1 *= xmm4 */
- emit_add( func, 0, 1 ); /* xmm0 += xmm1 */
- emit_MOV( func, 1, 6 ); /* xmm1 = xmm5 */
- emit_mul( func, 1, 6 ); /* xmm1 *= xmm5 */
- emit_add( func, 0, 1 ); /* xmm0 += xmm1 */
- if (dims == 4) {
- emit_MOV( func, 1, 7 ); /* xmm1 = xmm7 */
- emit_mul( func, 1, 7 ); /* xmm1 *= xmm7 */
- emit_add( func, 0, 0 ); /* xmm0 += xmm1 */
- }
- emit_rsqrt( func, 1, 0 ); /* xmm1 = 1/sqrt(xmm0) */
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- if (chan_index < dims) {
- emit_mul( func, 4+chan_index, 1); /* xmm[4+ch] *= xmm1 */
- STORE( func, *inst, 4+chan_index, 0, chan_index );
+
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) ||
+ IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) ||
+ IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z) ||
+ (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W) && dims == 4)) {
+
+ /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
+
+ /* xmm4 = src.x */
+ /* xmm0 = src.x * src.x */
+ FETCH(func, *inst, 0, 0, CHAN_X);
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ emit_MOV(func, 4, 0);
+ }
+ emit_mul(func, 0, 0);
+
+ /* xmm5 = src.y */
+ /* xmm0 = xmm0 + src.y * src.y */
+ FETCH(func, *inst, 1, 0, CHAN_Y);
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ emit_MOV(func, 5, 1);
+ }
+ emit_mul(func, 1, 1);
+ emit_add(func, 0, 1);
+
+ /* xmm6 = src.z */
+ /* xmm0 = xmm0 + src.z * src.z */
+ FETCH(func, *inst, 1, 0, CHAN_Z);
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ emit_MOV(func, 6, 1);
+ }
+ emit_mul(func, 1, 1);
+ emit_add(func, 0, 1);
+
+ if (dims == 4) {
+ /* xmm7 = src.w */
+ /* xmm0 = xmm0 + src.w * src.w */
+ FETCH(func, *inst, 1, 0, CHAN_W);
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) {
+ emit_MOV(func, 7, 1);
+ }
+ emit_mul(func, 1, 1);
+ emit_add(func, 0, 1);
}
+
+ /* xmm1 = 1 / sqrt(xmm0) */
+ emit_rsqrt(func, 1, 0);
+
+ /* dst.x = xmm1 * src.x */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ emit_mul(func, 4, 1);
+ STORE(func, *inst, 4, 0, CHAN_X);
+ }
+
+ /* dst.y = xmm1 * src.y */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ emit_mul(func, 5, 1);
+ STORE(func, *inst, 5, 0, CHAN_Y);
+ }
+
+ /* dst.z = xmm1 * src.z */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ emit_mul(func, 6, 1);
+ STORE(func, *inst, 6, 0, CHAN_Z);
+ }
+
+ /* dst.w = xmm1 * src.w */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) && dims == 4) {
+ emit_mul(func, 7, 1);
+ STORE(func, *inst, 7, 0, CHAN_W);
+ }
+ }
+
+ /* dst0.w = 1.0 */
+ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W) && dims == 3) {
+ emit_tempf(func, 0, TEMP_ONE_I, TEMP_ONE_C);
+ STORE(func, *inst, 0, 0, CHAN_W);
}
}
break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c b/src/gallium/auxiliary/tgsi/tgsi_transform.c
index 062c1be938..bc9c18fd4a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_transform.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c
@@ -198,3 +198,30 @@ tgsi_transform_shader(const struct tgsi_token *tokens_in,
return ctx->ti;
}
+
+
+#include "tgsi_text.h"
+
+extern int tgsi_transform_foo( struct tgsi_token *tokens_out,
+ uint max_tokens_out );
+
+/* This function exists only so that tgsi_text_translate() doesn't get
+ * magic-ed out of the libtgsi.a archive by the build system. Don't
+ * remove unless you know this has been fixed - check on mingw/scons
+ * builds as well.
+ */
+int
+tgsi_transform_foo( struct tgsi_token *tokens_out,
+ uint max_tokens_out )
+{
+ const char *text =
+ "FRAG1.1\n"
+ "DCL IN[0], COLOR, CONSTANT\n"
+ "DCL OUT[0], COLOR\n"
+ " 0: MOV OUT[0], IN[0]\n"
+ " 1: END";
+
+ return tgsi_text_translate( text,
+ tokens_out,
+ max_tokens_out );
+}