diff options
author | Keith Whitwell <keithw@vmware.com> | 2009-09-13 11:59:24 -0700 |
---|---|---|
committer | Keith Whitwell <keithw@vmware.com> | 2009-09-13 11:59:24 -0700 |
commit | 66a7eedaa2f66e5e941cea0303c5ec348e9cc641 (patch) | |
tree | cc5f22903fea5f28f68f6f41eed17eaabe13c97a /src/gallium/auxiliary | |
parent | 86226d5ea186d3fc6013bc40a341e0c0a891de39 (diff) |
tgsi: handle some src/dst aliasing in tgsi_sse2.c
Src/Dst aliasing (aka SOA dependencies) requires some care to ensure
intermediate results do not overwrite yet-to-be read source registers.
This change ensures that MOV/SWZ handle this correctly, which is poor but
no worse than the current tgsi_exec.c path. Remove the fallback as there
is nothing to be gained correctness-wise between the two implementations now.
Fixing this properly looks like a bit of work in this code, but might be
easily achieved by sending destination writes to temporary storage.
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_sse2.c | 31 |
1 files changed, 23 insertions, 8 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 53e3f746ee..501fc05e72 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -39,8 +39,9 @@ #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" -#include "tgsi_exec.h" -#include "tgsi_sse2.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_sse2.h" #include "rtasm/rtasm_x86sse.h" @@ -1760,10 +1761,6 @@ emit_instruction( if (indirect_temp_reference(inst)) return FALSE; - /* need to use extra temps to fix SOA dependencies : */ - if (tgsi_check_soa_dependencies(inst)) - return FALSE; - switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { @@ -1777,8 +1774,10 @@ emit_instruction( case TGSI_OPCODE_MOV: case TGSI_OPCODE_SWZ: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - STORE( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 4 + chan_index, 0, chan_index ); + } + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 4 + chan_index, 0, chan_index ); } break; @@ -2938,6 +2937,22 @@ tgsi_emit_sse2( parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? "vertex shader" : "fragment shader"); } + + if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) { + uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode; + + /* XXX: we only handle src/dst aliasing in a few opcodes + * currently. Need to use an additional temporay to hold + * the result in the cases where the code is too opaque to + * fix. + */ + if (opcode != TGSI_OPCODE_MOV && + opcode != TGSI_OPCODE_SWZ) { + debug_printf("Warning: src/dst aliasing in instruction" + " is not handled:\n"); + tgsi_dump_instruction(&parse.FullToken.FullInstruction, 1); + } + } break; case TGSI_TOKEN_TYPE_IMMEDIATE: |