diff options
author | Alex Deucher <alexdeucher@gmail.com> | 2009-06-12 12:09:34 -0400 |
---|---|---|
committer | Alex Deucher <alexdeucher@gmail.com> | 2009-06-12 12:09:34 -0400 |
commit | 1036ef2bf468611d37b5df06fc4424f2002e3837 (patch) | |
tree | f0859a6d903c2570a0a00c918da88139f8f7d065 /src/gallium/auxiliary/tgsi | |
parent | 917f8bc1a85e61311cef6478127b387df70fba14 (diff) | |
parent | 1cd0afffc9edbcac690f8ab436aecfced26b0aba (diff) |
Merge master and fix conflicts
Diffstat (limited to 'src/gallium/auxiliary/tgsi')
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.c | 62 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.h | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_sse2.c | 133 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_transform.c | 27 |
4 files changed, 197 insertions, 27 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index e8bd7cda3b..aba7a3f937 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -53,6 +53,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi_exec.h" @@ -169,6 +170,56 @@ print_temp(const struct tgsi_exec_machine *mach, uint index) #endif +/** + * Check if there's a potential src/dst register data dependency when + * using SOA execution. + * Example: + * MOV T, T.yxwz; + * This would expand into: + * MOV t0, t1; + * MOV t1, t0; + * MOV t2, t3; + * MOV t3, t2; + * The second instruction will have the wrong value for t0 if executed as-is. + */ +static boolean +tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) +{ + uint i, chan; + + uint writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; + if (writemask == TGSI_WRITEMASK_X || + writemask == TGSI_WRITEMASK_Y || + writemask == TGSI_WRITEMASK_Z || + writemask == TGSI_WRITEMASK_W || + writemask == TGSI_WRITEMASK_NONE) { + /* no chance of data dependency */ + return FALSE; + } + + /* loop over src regs */ + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + if ((inst->FullSrcRegisters[i].SrcRegister.File == + inst->FullDstRegisters[0].DstRegister.File) && + (inst->FullSrcRegisters[i].SrcRegister.Index == + inst->FullDstRegisters[0].DstRegister.Index)) { + /* loop over dest channels */ + uint channelsWritten = 0x0; + FOR_EACH_ENABLED_CHANNEL(*inst, chan) { + /* check if we're reading a channel that's been written */ + uint swizzle = tgsi_util_get_full_src_register_extswizzle(&inst->FullSrcRegisters[i], chan); + if (swizzle <= TGSI_SWIZZLE_W && + (channelsWritten & (1 << swizzle))) { + return TRUE; + } + + channelsWritten |= (1 << chan); + } + } + } + return FALSE; +} + /** * Initialize machine state by expanding tokens to full instructions, @@ -280,6 +331,17 @@ tgsi_exec_machine_bind_shader( memcpy(instructions + numInstructions, &parse.FullToken.FullInstruction, sizeof(instructions[0])); + +#if 0 + if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) { + debug_printf("SOA dependency in instruction:\n"); + tgsi_dump_instruction(&parse.FullToken.FullInstruction, + numInstructions); + } +#else + (void) tgsi_check_soa_dependencies; +#endif + numInstructions++; break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 0b4b2a6fb6..da22baad3e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -34,7 +34,7 @@ extern "C" { #endif -#define MAX_LABELS 1024 +#define MAX_LABELS (4 * 1024) /**< basically, max instructions */ #define NUM_CHANNELS 4 /* R,G,B,A */ #define QUAD_SIZE 4 /* 4 pixel/quad */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index d70bcd03c5..ba2bfdef06 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1466,6 +1466,31 @@ emit_cmp( } } + +/** + * Check if inst src/dest regs use indirect addressing into temporary + * register file. + */ +static boolean +indirect_temp_reference(const struct tgsi_full_instruction *inst) +{ + uint i; + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i]; + if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY && + reg->SrcRegister.Indirect) + return TRUE; + } + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i]; + if (reg->DstRegister.File == TGSI_FILE_TEMPORARY && + reg->DstRegister.Indirect) + return TRUE; + } + return FALSE; +} + + static int emit_instruction( struct x86_function *func, @@ -1473,10 +1498,15 @@ emit_instruction( { unsigned chan_index; + /* we can't handle indirect addressing into temp register file yet */ + if (indirect_temp_reference(inst)) + return FALSE; + switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); + emit_flr(func, 0, 0); emit_f2it( func, 0 ); STORE( func, *inst, 0, 0, chan_index ); } @@ -1553,7 +1583,7 @@ emit_instruction( func, make_xmm( 2 ), make_xmm( 0 ), - cc_LessThanEqual ); + cc_LessThan ); sse_andps( func, make_xmm( 2 ), @@ -2177,32 +2207,83 @@ emit_instruction( /* 3 or 4-component normalization */ { uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; - /* note: cannot use xmm regs 2/3 here (see emit_rsqrt() above) */ - FETCH( func, *inst, 4, 0, CHAN_X ); /* xmm4 = src[0].x */ - FETCH( func, *inst, 5, 0, CHAN_Y ); /* xmm5 = src[0].y */ - FETCH( func, *inst, 6, 0, CHAN_Z ); /* xmm6 = src[0].z */ - if (dims == 4) { - FETCH( func, *inst, 7, 0, CHAN_W ); /* xmm7 = src[0].w */ - } - emit_MOV( func, 0, 4 ); /* xmm0 = xmm3 */ - emit_mul( func, 0, 4 ); /* xmm0 *= xmm3 */ - emit_MOV( func, 1, 5 ); /* xmm1 = xmm4 */ - emit_mul( func, 1, 5 ); /* xmm1 *= xmm4 */ - emit_add( func, 0, 1 ); /* xmm0 += xmm1 */ - emit_MOV( func, 1, 6 ); /* xmm1 = xmm5 */ - emit_mul( func, 1, 6 ); /* xmm1 *= xmm5 */ - emit_add( func, 0, 1 ); /* xmm0 += xmm1 */ - if (dims == 4) { - emit_MOV( func, 1, 7 ); /* xmm1 = xmm7 */ - emit_mul( func, 1, 7 ); /* xmm1 *= xmm7 */ - emit_add( func, 0, 0 ); /* xmm0 += xmm1 */ - } - emit_rsqrt( func, 1, 0 ); /* xmm1 = 1/sqrt(xmm0) */ - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - if (chan_index < dims) { - emit_mul( func, 4+chan_index, 1); /* xmm[4+ch] *= xmm1 */ - STORE( func, *inst, 4+chan_index, 0, chan_index ); + + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) || + IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) || + IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z) || + (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W) && dims == 4)) { + + /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ + + /* xmm4 = src.x */ + /* xmm0 = src.x * src.x */ + FETCH(func, *inst, 0, 0, CHAN_X); + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { + emit_MOV(func, 4, 0); + } + emit_mul(func, 0, 0); + + /* xmm5 = src.y */ + /* xmm0 = xmm0 + src.y * src.y */ + FETCH(func, *inst, 1, 0, CHAN_Y); + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { + emit_MOV(func, 5, 1); + } + emit_mul(func, 1, 1); + emit_add(func, 0, 1); + + /* xmm6 = src.z */ + /* xmm0 = xmm0 + src.z * src.z */ + FETCH(func, *inst, 1, 0, CHAN_Z); + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + emit_MOV(func, 6, 1); + } + emit_mul(func, 1, 1); + emit_add(func, 0, 1); + + if (dims == 4) { + /* xmm7 = src.w */ + /* xmm0 = xmm0 + src.w * src.w */ + FETCH(func, *inst, 1, 0, CHAN_W); + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { + emit_MOV(func, 7, 1); + } + emit_mul(func, 1, 1); + emit_add(func, 0, 1); } + + /* xmm1 = 1 / sqrt(xmm0) */ + emit_rsqrt(func, 1, 0); + + /* dst.x = xmm1 * src.x */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { + emit_mul(func, 4, 1); + STORE(func, *inst, 4, 0, CHAN_X); + } + + /* dst.y = xmm1 * src.y */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { + emit_mul(func, 5, 1); + STORE(func, *inst, 5, 0, CHAN_Y); + } + + /* dst.z = xmm1 * src.z */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + emit_mul(func, 6, 1); + STORE(func, *inst, 6, 0, CHAN_Z); + } + + /* dst.w = xmm1 * src.w */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) && dims == 4) { + emit_mul(func, 7, 1); + STORE(func, *inst, 7, 0, CHAN_W); + } + } + + /* dst0.w = 1.0 */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W) && dims == 3) { + emit_tempf(func, 0, TEMP_ONE_I, TEMP_ONE_C); + STORE(func, *inst, 0, 0, CHAN_W); } } break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c b/src/gallium/auxiliary/tgsi/tgsi_transform.c index 062c1be938..bc9c18fd4a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_transform.c +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c @@ -198,3 +198,30 @@ tgsi_transform_shader(const struct tgsi_token *tokens_in, return ctx->ti; } + + +#include "tgsi_text.h" + +extern int tgsi_transform_foo( struct tgsi_token *tokens_out, + uint max_tokens_out ); + +/* This function exists only so that tgsi_text_translate() doesn't get + * magic-ed out of the libtgsi.a archive by the build system. Don't + * remove unless you know this has been fixed - check on mingw/scons + * builds as well. + */ +int +tgsi_transform_foo( struct tgsi_token *tokens_out, + uint max_tokens_out ) +{ + const char *text = + "FRAG1.1\n" + "DCL IN[0], COLOR, CONSTANT\n" + "DCL OUT[0], COLOR\n" + " 0: MOV OUT[0], IN[0]\n" + " 1: END"; + + return tgsi_text_translate( text, + tokens_out, + max_tokens_out ); +} |