diff options
Diffstat (limited to 'src/gallium/auxiliary/tgsi')
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_dump.c | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.c | 32 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.h | 22 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_info.c | 15 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_info.h | 3 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_ppc.c | 8 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_scan.c | 7 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_scan.h | 1 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_sse2.c | 81 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_ureg.c | 6 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_ureg.h | 4 |
11 files changed, 124 insertions, 57 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 1ad183b8dd..35480076ed 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -56,7 +56,7 @@ dump_ctx_printf(struct dump_ctx *ctx, const char *format, ...) va_list ap; (void)ctx; va_start(ap, format); - debug_vprintf(format, ap); + _debug_vprintf(format, ap); va_end(ap); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 9b1ca7fa85..c15d970b57 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -621,12 +621,10 @@ tgsi_exec_machine_bind_shader( { uint k; struct tgsi_parse_context parse; - struct tgsi_exec_labels *labels = &mach->Labels; struct tgsi_full_instruction *instructions; struct tgsi_full_declaration *declarations; uint maxInstructions = 10, numInstructions = 0; uint maxDeclarations = 10, numDeclarations = 0; - uint instno = 0; #if 0 tgsi_dump(tokens, 0); @@ -637,6 +635,23 @@ tgsi_exec_machine_bind_shader( mach->Tokens = tokens; mach->Samplers = samplers; + if (!tokens) { + /* unbind and free all */ + if (mach->Declarations) { + FREE( mach->Declarations ); + } + mach->Declarations = NULL; + mach->NumDeclarations = 0; + + if (mach->Instructions) { + FREE( mach->Instructions ); + } + mach->Instructions = NULL; + mach->NumInstructions = 0; + + return; + } + k = tgsi_parse_init (&parse, mach->Tokens); if (k != TGSI_PARSE_OK) { debug_printf( "Problem parsing!\n" ); @@ -645,7 +660,6 @@ tgsi_exec_machine_bind_shader( mach->Processor = parse.FullHeader.Processor.Processor; mach->ImmLimit = 0; - labels->count = 0; declarations = (struct tgsi_full_declaration *) MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); @@ -663,7 +677,6 @@ tgsi_exec_machine_bind_shader( } while( !tgsi_parse_end_of_tokens( &parse ) ) { - uint pointer = parse.Position; uint i; tgsi_parse_token( &parse ); @@ -707,11 +720,6 @@ tgsi_exec_machine_bind_shader( break; case TGSI_TOKEN_TYPE_INSTRUCTION: - assert( labels->count < MAX_LABELS ); - - labels->labels[labels->count][0] = instno; - labels->labels[labels->count][1] = pointer; - labels->count++; /* save expanded instruction */ if (numInstructions == maxInstructions) { @@ -801,7 +809,9 @@ void tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) { if (mach) { - FREE(mach->Instructions); + if (mach->Instructions) + FREE(mach->Instructions); + if (mach->Declarations) FREE(mach->Declarations); } @@ -3126,7 +3136,7 @@ exec_instruction( break; case TGSI_OPCODE_DIV: - assert( 0 ); + exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DP2: diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 11bbaf6722..3caf820af6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -37,8 +37,6 @@ extern "C" { #endif -#define MAX_LABELS (4 * 1024) /**< basically, max instructions */ - #define NUM_CHANNELS 4 /* R,G,B,A */ #define QUAD_SIZE 4 /* 4 pixel/quad */ @@ -93,16 +91,6 @@ struct tgsi_sampler float rgba[NUM_CHANNELS][QUAD_SIZE]); }; -/** - * For branching/calling subroutines. - */ -struct tgsi_exec_labels -{ - unsigned labels[MAX_LABELS][2]; - unsigned count; -}; - - #define TGSI_EXEC_NUM_TEMPS 128 #define TGSI_EXEC_NUM_IMMEDIATES 256 @@ -186,10 +174,11 @@ struct tgsi_exec_labels -#define TGSI_EXEC_MAX_COND_NESTING 32 -#define TGSI_EXEC_MAX_LOOP_NESTING 32 -#define TGSI_EXEC_MAX_SWITCH_NESTING 32 -#define TGSI_EXEC_MAX_CALL_NESTING 32 +#define TGSI_EXEC_MAX_NESTING 32 +#define TGSI_EXEC_MAX_COND_NESTING TGSI_EXEC_MAX_NESTING +#define TGSI_EXEC_MAX_LOOP_NESTING TGSI_EXEC_MAX_NESTING +#define TGSI_EXEC_MAX_SWITCH_NESTING TGSI_EXEC_MAX_NESTING +#define TGSI_EXEC_MAX_CALL_NESTING TGSI_EXEC_MAX_NESTING /* The maximum number of input attributes per vertex. For 2D * input register files, this is the stride between two 1D @@ -324,7 +313,6 @@ struct tgsi_exec_machine struct tgsi_full_declaration *Declarations; uint NumDeclarations; - struct tgsi_exec_labels Labels; }; struct tgsi_exec_machine * diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index cfa2f631bd..e59e964ffa 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -205,3 +205,18 @@ tgsi_get_opcode_name( uint opcode ) return info->mnemonic; } + +const char * +tgsi_get_processor_name( uint processor ) +{ + switch (processor) { + case TGSI_PROCESSOR_VERTEX: + return "vertex shader"; + case TGSI_PROCESSOR_FRAGMENT: + return "fragment shader"; + case TGSI_PROCESSOR_GEOMETRY: + return "geometry shader"; + default: + return "unknown shader type!"; + } +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h index 74713c3b98..50248884fd 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.h +++ b/src/gallium/auxiliary/tgsi/tgsi_info.h @@ -52,6 +52,9 @@ tgsi_get_opcode_info( uint opcode ); const char * tgsi_get_opcode_name( uint opcode ); +const char * +tgsi_get_processor_name( uint processor ); + #if defined __cplusplus } diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index ad553c71a5..3521847b61 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -1366,4 +1366,12 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, return ok; } +#else + +void ppc_dummy_func(void); + +void ppc_dummy_func(void) +{ +} + #endif /* PIPE_ARCH_PPC */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 232fc537c1..e0c5d3d3d6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -88,10 +88,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST); info->opcode_count[fullinst->Instruction.Opcode]++; - /* special case: scan fragment shaders for use of the fog - * input/attribute. The X component is fog, the Y component - * is the front/back-face flag. - */ + /* check if we read the frag shader FOG or FACE inputs */ if (procType == TGSI_PROCESSOR_FRAGMENT) { uint i; for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) { @@ -109,6 +106,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens, } } } + + info->num_instructions++; } break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index 741aa7d5c4..27de33f799 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -54,6 +54,7 @@ struct tgsi_shader_info int file_max[TGSI_FILE_COUNT]; /**< highest index of declared registers */ uint immediate_count; /**< number of immediates declared */ + uint num_instructions; uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 1071298b49..d5061f8b51 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1929,20 +1929,32 @@ emit_instruction( break; case TGSI_OPCODE_MUL: + /* do all fetches and adds, storing results in temp regs */ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - emit_mul( func, 0, 1 ); - STORE( func, *inst, 0, 0, chan_index ); + int r = chan_index + 1; + FETCH( func, *inst, 0, 0, chan_index ); /* load xmm[0] */ + FETCH( func, *inst, r, 1, chan_index ); /* load xmm[r] */ + emit_mul( func, r, 0 ); /* xmm[r] = xmm[r] * xmm[0] */ + } + /* do all stores of the temp regs */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + int r = chan_index + 1; + STORE( func, *inst, r, 0, chan_index ); /* store xmm[r] */ } break; case TGSI_OPCODE_ADD: + /* do all fetches and adds, storing results in temp regs */ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - emit_add( func, 0, 1 ); - STORE( func, *inst, 0, 0, chan_index ); + int r = chan_index + 1; + FETCH( func, *inst, 0, 0, chan_index ); /* load xmm[0] */ + FETCH( func, *inst, r, 1, chan_index ); /* load xmm[r] */ + emit_add( func, r, 0 ); /* xmm[r] = xmm[r] + xmm[0] */ + } + /* do all stores of the temp regs */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + int r = chan_index + 1; + STORE( func, *inst, r, 0, chan_index ); /* store xmm[r] */ } break; @@ -2816,6 +2828,40 @@ static void soa_to_aos( struct x86_function *func, x86_pop( func, x86_make_reg( file_REG32, reg_BX ) ); } + +/** + * Check if the instructions dst register is the same as any src + * register and warn if there's a posible SOA dependency. + */ +static void +check_soa_dependencies(const struct tgsi_full_instruction *inst) +{ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_MUL: + case TGSI_OPCODE_XPD: + /* OK - these opcodes correctly handle SOA dependencies */ + break; + default: + if (tgsi_check_soa_dependencies(inst)) { + uint opcode = inst->Instruction.Opcode; + + /* XXX: we only handle src/dst aliasing in a few opcodes + * currently. Need to use an additional temporay to hold + * the result in the cases where the code is too opaque to + * fix. + */ + if (opcode != TGSI_OPCODE_MOV) { + debug_printf("Warning: src/dst aliasing in instruction" + " is not handled:\n"); + tgsi_dump_instruction(inst, 1); + } + } + } +} + + /** * Translate a TGSI vertex/fragment shader to SSE2 code. * Slightly different things are done for vertex vs. fragment shaders. @@ -2905,27 +2951,14 @@ tgsi_emit_sse2( if (!ok) { uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode; + uint proc = parse.FullHeader.Processor.Processor; debug_printf("failed to translate tgsi opcode %d (%s) to SSE (%s)\n", opcode, tgsi_get_opcode_name(opcode), - parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? - "vertex shader" : "fragment shader"); + tgsi_get_processor_name(proc)); } - if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) { - uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode; - - /* XXX: we only handle src/dst aliasing in a few opcodes - * currently. Need to use an additional temporay to hold - * the result in the cases where the code is too opaque to - * fix. - */ - if (opcode != TGSI_OPCODE_MOV) { - debug_printf("Warning: src/dst aliasing in instruction" - " is not handled:\n"); - tgsi_dump_instruction(&parse.FullToken.FullInstruction, 1); - } - } + check_soa_dependencies(&parse.FullToken.FullInstruction); break; case TGSI_TOKEN_TYPE_IMMEDIATE: diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 49b854b123..5fda808dbe 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -1467,6 +1467,12 @@ const struct tgsi_token *ureg_get_tokens( struct ureg_program *ureg, } +void ureg_free_tokens( const struct tgsi_token *tokens ) +{ + FREE((struct tgsi_token *)tokens); +} + + struct ureg_program *ureg_create( unsigned processor ) { struct ureg_program *ureg = CALLOC_STRUCT( ureg_program ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index f32420dd87..055545f3d2 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -104,6 +104,10 @@ ureg_get_tokens( struct ureg_program *ureg, unsigned *nr_tokens ); +/* Free the tokens created by ureg_get_tokens() */ +void ureg_free_tokens( const struct tgsi_token *tokens ); + + void ureg_destroy( struct ureg_program * ); |