From c54b2199759e688ae89530f39106557c762b2fa4 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Thu, 9 Apr 2009 22:38:07 +0200 Subject: tgsi/sse2: Fix LIT instruction. --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary/tgsi') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index d70bcd03c5..e399b9b9d5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1553,7 +1553,7 @@ emit_instruction( func, make_xmm( 2 ), make_xmm( 0 ), - cc_LessThanEqual ); + cc_LessThan ); sse_andps( func, make_xmm( 2 ), -- cgit v1.2.3 From 790a18f2c2a4df78cf4b4f88e55036d54324497a Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Thu, 9 Apr 2009 22:47:06 +0200 Subject: tgsi/sse2: Fix ARL instruction. --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary/tgsi') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index e399b9b9d5..98994d8494 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1477,6 +1477,7 @@ emit_instruction( case TGSI_OPCODE_ARL: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); + emit_flr(func, 0); emit_f2it( func, 0 ); STORE( func, *inst, 0, 0, chan_index ); } -- cgit v1.2.3 From c27c670ad8fc9f8276caeaff69eeb75e929dac9a Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Thu, 9 Apr 2009 23:24:57 +0200 Subject: tgsi/sse2: Fix build. --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary/tgsi') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 98994d8494..31225ba430 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1477,7 +1477,7 @@ emit_instruction( case TGSI_OPCODE_ARL: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); - emit_flr(func, 0); + emit_flr(func, 0, 0); emit_f2it( func, 0 ); STORE( func, *inst, 0, 0, chan_index ); } -- cgit v1.2.3 From 5fae9514c235cc5590f15fd802bd762107fc689d Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 10 Apr 2009 11:58:49 +0200 Subject: tgsi/sse2: Cleanup NRM/NRM4 implementation. Fix comments. Make sure .w is set to 1.0 for NRM. Optimise for non-.xyzw writemasks. --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 101 +++++++++++++++++++++++++-------- 1 file changed, 76 insertions(+), 25 deletions(-) (limited to 'src/gallium/auxiliary/tgsi') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 31225ba430..4b4e34b29e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -2178,32 +2178,83 @@ emit_instruction( /* 3 or 4-component normalization */ { uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; - /* note: cannot use xmm regs 2/3 here (see emit_rsqrt() above) */ - FETCH( func, *inst, 4, 0, CHAN_X ); /* xmm4 = src[0].x */ - FETCH( func, *inst, 5, 0, CHAN_Y ); /* xmm5 = src[0].y */ - FETCH( func, *inst, 6, 0, CHAN_Z ); /* xmm6 = src[0].z */ - if (dims == 4) { - FETCH( func, *inst, 7, 0, CHAN_W ); /* xmm7 = src[0].w */ - } - emit_MOV( func, 0, 4 ); /* xmm0 = xmm3 */ - emit_mul( func, 0, 4 ); /* xmm0 *= xmm3 */ - emit_MOV( func, 1, 5 ); /* xmm1 = xmm4 */ - emit_mul( func, 1, 5 ); /* xmm1 *= xmm4 */ - emit_add( func, 0, 1 ); /* xmm0 += xmm1 */ - emit_MOV( func, 1, 6 ); /* xmm1 = xmm5 */ - emit_mul( func, 1, 6 ); /* xmm1 *= xmm5 */ - emit_add( func, 0, 1 ); /* xmm0 += xmm1 */ - if (dims == 4) { - emit_MOV( func, 1, 7 ); /* xmm1 = xmm7 */ - emit_mul( func, 1, 7 ); /* xmm1 *= xmm7 */ - emit_add( func, 0, 0 ); /* xmm0 += xmm1 */ - } - emit_rsqrt( func, 1, 0 ); /* xmm1 = 1/sqrt(xmm0) */ - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - if (chan_index < dims) { - emit_mul( func, 4+chan_index, 1); /* xmm[4+ch] *= xmm1 */ - STORE( func, *inst, 4+chan_index, 0, chan_index ); + + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) || + IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) || + IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z) || + (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W) && dims == 4)) { + + /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ + + /* xmm4 = src.x */ + /* xmm0 = src.x * src.x */ + FETCH(func, *inst, 0, 0, CHAN_X); + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { + emit_MOV(func, 4, 0); } + emit_mul(func, 0, 0); + + /* xmm5 = src.y */ + /* xmm0 = xmm0 + src.y * src.y */ + FETCH(func, *inst, 1, 0, CHAN_Y); + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { + emit_MOV(func, 5, 1); + } + emit_mul(func, 1, 1); + emit_add(func, 0, 1); + + /* xmm6 = src.z */ + /* xmm0 = xmm0 + src.z * src.z */ + FETCH(func, *inst, 1, 0, CHAN_Z); + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + emit_MOV(func, 6, 1); + } + emit_mul(func, 1, 1); + emit_add(func, 0, 1); + + if (dims == 4) { + /* xmm7 = src.w */ + /* xmm0 = xmm0 + src.w * src.w */ + FETCH(func, *inst, 1, 0, CHAN_W); + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { + emit_MOV(func, 7, 1); + } + emit_mul(func, 1, 1); + emit_add(func, 0, 1); + } + + /* xmm1 = 1 / sqrt(xmm0) */ + emit_rsqrt(func, 1, 0); + + /* dst.x = xmm1 * src.x */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { + emit_mul(func, 4, 1); + STORE(func, *inst, 4, 0, CHAN_X); + } + + /* dst.y = xmm1 * src.y */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { + emit_mul(func, 5, 1); + STORE(func, *inst, 5, 0, CHAN_Y); + } + + /* dst.z = xmm1 * src.z */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + emit_mul(func, 6, 1); + STORE(func, *inst, 6, 0, CHAN_Z); + } + + /* dst.w = xmm1 * src.w */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) && dims == 4) { + emit_mul(func, 7, 1); + STORE(func, *inst, 7, 0, CHAN_W); + } + } + + /* dst0.w = 1.0 */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W) && dims == 3) { + emit_tempf(func, 0, TEMP_ONE_I, TEMP_ONE_C); + STORE(func, *inst, 0, 0, CHAN_W); } } break; -- cgit v1.2.3 From 91eb8baaca21d24bfd3640c9f6b316610a7c5910 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 24 Apr 2009 17:08:59 -0600 Subject: tgis: SSE code generator doesn't yet support indirect addressing of temp regs Fall back to interpreter in this case. --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'src/gallium/auxiliary/tgsi') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 4b4e34b29e..ba2bfdef06 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1466,6 +1466,31 @@ emit_cmp( } } + +/** + * Check if inst src/dest regs use indirect addressing into temporary + * register file. + */ +static boolean +indirect_temp_reference(const struct tgsi_full_instruction *inst) +{ + uint i; + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i]; + if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY && + reg->SrcRegister.Indirect) + return TRUE; + } + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i]; + if (reg->DstRegister.File == TGSI_FILE_TEMPORARY && + reg->DstRegister.Indirect) + return TRUE; + } + return FALSE; +} + + static int emit_instruction( struct x86_function *func, @@ -1473,6 +1498,10 @@ emit_instruction( { unsigned chan_index; + /* we can't handle indirect addressing into temp register file yet */ + if (indirect_temp_reference(inst)) + return FALSE; + switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -- cgit v1.2.3 From 8fa6c1ac9299402c1faf75b264cf70b1b83d1eff Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 29 Apr 2009 11:56:57 -0600 Subject: tgsi: added tgsi_check_soa_dependencies() and related debug code (disabled) The TGSI interpeter operates in SOA style. We need to check for data dependencies in instructions which read from and write to the same register. For now just adding some debug code to detect that condition. Actual fixes to follow. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 62 ++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'src/gallium/auxiliary/tgsi') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index e8bd7cda3b..aba7a3f937 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -53,6 +53,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi_exec.h" @@ -169,6 +170,56 @@ print_temp(const struct tgsi_exec_machine *mach, uint index) #endif +/** + * Check if there's a potential src/dst register data dependency when + * using SOA execution. + * Example: + * MOV T, T.yxwz; + * This would expand into: + * MOV t0, t1; + * MOV t1, t0; + * MOV t2, t3; + * MOV t3, t2; + * The second instruction will have the wrong value for t0 if executed as-is. + */ +static boolean +tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) +{ + uint i, chan; + + uint writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; + if (writemask == TGSI_WRITEMASK_X || + writemask == TGSI_WRITEMASK_Y || + writemask == TGSI_WRITEMASK_Z || + writemask == TGSI_WRITEMASK_W || + writemask == TGSI_WRITEMASK_NONE) { + /* no chance of data dependency */ + return FALSE; + } + + /* loop over src regs */ + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + if ((inst->FullSrcRegisters[i].SrcRegister.File == + inst->FullDstRegisters[0].DstRegister.File) && + (inst->FullSrcRegisters[i].SrcRegister.Index == + inst->FullDstRegisters[0].DstRegister.Index)) { + /* loop over dest channels */ + uint channelsWritten = 0x0; + FOR_EACH_ENABLED_CHANNEL(*inst, chan) { + /* check if we're reading a channel that's been written */ + uint swizzle = tgsi_util_get_full_src_register_extswizzle(&inst->FullSrcRegisters[i], chan); + if (swizzle <= TGSI_SWIZZLE_W && + (channelsWritten & (1 << swizzle))) { + return TRUE; + } + + channelsWritten |= (1 << chan); + } + } + } + return FALSE; +} + /** * Initialize machine state by expanding tokens to full instructions, @@ -280,6 +331,17 @@ tgsi_exec_machine_bind_shader( memcpy(instructions + numInstructions, &parse.FullToken.FullInstruction, sizeof(instructions[0])); + +#if 0 + if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) { + debug_printf("SOA dependency in instruction:\n"); + tgsi_dump_instruction(&parse.FullToken.FullInstruction, + numInstructions); + } +#else + (void) tgsi_check_soa_dependencies; +#endif + numInstructions++; break; -- cgit v1.2.3 From f628d7f5eebe9743f85ea8edf7c09b32cf393e4a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 30 Apr 2009 13:09:34 +0100 Subject: gallium/tgsi: hack around linker/archiver breakage Add a dummy function which exists only so that tgsi_text_translate() doesn't get magic-ed out of the libtgsi.a archive by the build system. Don't remove unless you know this has been fixed - check on mingw/scons builds as well. --- src/gallium/auxiliary/tgsi/tgsi_transform.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'src/gallium/auxiliary/tgsi') diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c b/src/gallium/auxiliary/tgsi/tgsi_transform.c index 062c1be938..bc9c18fd4a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_transform.c +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c @@ -198,3 +198,30 @@ tgsi_transform_shader(const struct tgsi_token *tokens_in, return ctx->ti; } + + +#include "tgsi_text.h" + +extern int tgsi_transform_foo( struct tgsi_token *tokens_out, + uint max_tokens_out ); + +/* This function exists only so that tgsi_text_translate() doesn't get + * magic-ed out of the libtgsi.a archive by the build system. Don't + * remove unless you know this has been fixed - check on mingw/scons + * builds as well. + */ +int +tgsi_transform_foo( struct tgsi_token *tokens_out, + uint max_tokens_out ) +{ + const char *text = + "FRAG1.1\n" + "DCL IN[0], COLOR, CONSTANT\n" + "DCL OUT[0], COLOR\n" + " 0: MOV OUT[0], IN[0]\n" + " 1: END"; + + return tgsi_text_translate( text, + tokens_out, + max_tokens_out ); +} -- cgit v1.2.3 From 45e744dddc8a8f3b42610bfa512bc296bd5264bc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Jun 2009 13:08:52 -0600 Subject: tgsi: increase MAX_LABELS to 4096 --- src/gallium/auxiliary/tgsi/tgsi_exec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary/tgsi') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 0b4b2a6fb6..da22baad3e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -34,7 +34,7 @@ extern "C" { #endif -#define MAX_LABELS 1024 +#define MAX_LABELS (4 * 1024) /**< basically, max instructions */ #define NUM_CHANNELS 4 /* R,G,B,A */ #define QUAD_SIZE 4 /* 4 pixel/quad */ -- cgit v1.2.3