summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/tgsi
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/tgsi')
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c32
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h22
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_info.c15
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_info.h3
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ppc.c8
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.c7
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.h1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c81
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.c6
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.h4
11 files changed, 124 insertions, 57 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 1ad183b8dd..35480076ed 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -56,7 +56,7 @@ dump_ctx_printf(struct dump_ctx *ctx, const char *format, ...)
va_list ap;
(void)ctx;
va_start(ap, format);
- debug_vprintf(format, ap);
+ _debug_vprintf(format, ap);
va_end(ap);
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 9b1ca7fa85..c15d970b57 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -621,12 +621,10 @@ tgsi_exec_machine_bind_shader(
{
uint k;
struct tgsi_parse_context parse;
- struct tgsi_exec_labels *labels = &mach->Labels;
struct tgsi_full_instruction *instructions;
struct tgsi_full_declaration *declarations;
uint maxInstructions = 10, numInstructions = 0;
uint maxDeclarations = 10, numDeclarations = 0;
- uint instno = 0;
#if 0
tgsi_dump(tokens, 0);
@@ -637,6 +635,23 @@ tgsi_exec_machine_bind_shader(
mach->Tokens = tokens;
mach->Samplers = samplers;
+ if (!tokens) {
+ /* unbind and free all */
+ if (mach->Declarations) {
+ FREE( mach->Declarations );
+ }
+ mach->Declarations = NULL;
+ mach->NumDeclarations = 0;
+
+ if (mach->Instructions) {
+ FREE( mach->Instructions );
+ }
+ mach->Instructions = NULL;
+ mach->NumInstructions = 0;
+
+ return;
+ }
+
k = tgsi_parse_init (&parse, mach->Tokens);
if (k != TGSI_PARSE_OK) {
debug_printf( "Problem parsing!\n" );
@@ -645,7 +660,6 @@ tgsi_exec_machine_bind_shader(
mach->Processor = parse.FullHeader.Processor.Processor;
mach->ImmLimit = 0;
- labels->count = 0;
declarations = (struct tgsi_full_declaration *)
MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
@@ -663,7 +677,6 @@ tgsi_exec_machine_bind_shader(
}
while( !tgsi_parse_end_of_tokens( &parse ) ) {
- uint pointer = parse.Position;
uint i;
tgsi_parse_token( &parse );
@@ -707,11 +720,6 @@ tgsi_exec_machine_bind_shader(
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
- assert( labels->count < MAX_LABELS );
-
- labels->labels[labels->count][0] = instno;
- labels->labels[labels->count][1] = pointer;
- labels->count++;
/* save expanded instruction */
if (numInstructions == maxInstructions) {
@@ -801,7 +809,9 @@ void
tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
{
if (mach) {
- FREE(mach->Instructions);
+ if (mach->Instructions)
+ FREE(mach->Instructions);
+ if (mach->Declarations)
FREE(mach->Declarations);
}
@@ -3126,7 +3136,7 @@ exec_instruction(
break;
case TGSI_OPCODE_DIV:
- assert( 0 );
+ exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_DP2:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 11bbaf6722..3caf820af6 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -37,8 +37,6 @@ extern "C" {
#endif
-#define MAX_LABELS (4 * 1024) /**< basically, max instructions */
-
#define NUM_CHANNELS 4 /* R,G,B,A */
#define QUAD_SIZE 4 /* 4 pixel/quad */
@@ -93,16 +91,6 @@ struct tgsi_sampler
float rgba[NUM_CHANNELS][QUAD_SIZE]);
};
-/**
- * For branching/calling subroutines.
- */
-struct tgsi_exec_labels
-{
- unsigned labels[MAX_LABELS][2];
- unsigned count;
-};
-
-
#define TGSI_EXEC_NUM_TEMPS 128
#define TGSI_EXEC_NUM_IMMEDIATES 256
@@ -186,10 +174,11 @@ struct tgsi_exec_labels
-#define TGSI_EXEC_MAX_COND_NESTING 32
-#define TGSI_EXEC_MAX_LOOP_NESTING 32
-#define TGSI_EXEC_MAX_SWITCH_NESTING 32
-#define TGSI_EXEC_MAX_CALL_NESTING 32
+#define TGSI_EXEC_MAX_NESTING 32
+#define TGSI_EXEC_MAX_COND_NESTING TGSI_EXEC_MAX_NESTING
+#define TGSI_EXEC_MAX_LOOP_NESTING TGSI_EXEC_MAX_NESTING
+#define TGSI_EXEC_MAX_SWITCH_NESTING TGSI_EXEC_MAX_NESTING
+#define TGSI_EXEC_MAX_CALL_NESTING TGSI_EXEC_MAX_NESTING
/* The maximum number of input attributes per vertex. For 2D
* input register files, this is the stride between two 1D
@@ -324,7 +313,6 @@ struct tgsi_exec_machine
struct tgsi_full_declaration *Declarations;
uint NumDeclarations;
- struct tgsi_exec_labels Labels;
};
struct tgsi_exec_machine *
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index cfa2f631bd..e59e964ffa 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -205,3 +205,18 @@ tgsi_get_opcode_name( uint opcode )
return info->mnemonic;
}
+
+const char *
+tgsi_get_processor_name( uint processor )
+{
+ switch (processor) {
+ case TGSI_PROCESSOR_VERTEX:
+ return "vertex shader";
+ case TGSI_PROCESSOR_FRAGMENT:
+ return "fragment shader";
+ case TGSI_PROCESSOR_GEOMETRY:
+ return "geometry shader";
+ default:
+ return "unknown shader type!";
+ }
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h
index 74713c3b98..50248884fd 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.h
@@ -52,6 +52,9 @@ tgsi_get_opcode_info( uint opcode );
const char *
tgsi_get_opcode_name( uint opcode );
+const char *
+tgsi_get_processor_name( uint processor );
+
#if defined __cplusplus
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index ad553c71a5..3521847b61 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -1366,4 +1366,12 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
return ok;
}
+#else
+
+void ppc_dummy_func(void);
+
+void ppc_dummy_func(void)
+{
+}
+
#endif /* PIPE_ARCH_PPC */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 232fc537c1..e0c5d3d3d6 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -88,10 +88,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST);
info->opcode_count[fullinst->Instruction.Opcode]++;
- /* special case: scan fragment shaders for use of the fog
- * input/attribute. The X component is fog, the Y component
- * is the front/back-face flag.
- */
+ /* check if we read the frag shader FOG or FACE inputs */
if (procType == TGSI_PROCESSOR_FRAGMENT) {
uint i;
for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
@@ -109,6 +106,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
}
}
}
+
+ info->num_instructions++;
}
break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index 741aa7d5c4..27de33f799 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -54,6 +54,7 @@ struct tgsi_shader_info
int file_max[TGSI_FILE_COUNT]; /**< highest index of declared registers */
uint immediate_count; /**< number of immediates declared */
+ uint num_instructions;
uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 1071298b49..d5061f8b51 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -1929,20 +1929,32 @@ emit_instruction(
break;
case TGSI_OPCODE_MUL:
+ /* do all fetches and adds, storing results in temp regs */
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- FETCH( func, *inst, 1, 1, chan_index );
- emit_mul( func, 0, 1 );
- STORE( func, *inst, 0, 0, chan_index );
+ int r = chan_index + 1;
+ FETCH( func, *inst, 0, 0, chan_index ); /* load xmm[0] */
+ FETCH( func, *inst, r, 1, chan_index ); /* load xmm[r] */
+ emit_mul( func, r, 0 ); /* xmm[r] = xmm[r] * xmm[0] */
+ }
+ /* do all stores of the temp regs */
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ int r = chan_index + 1;
+ STORE( func, *inst, r, 0, chan_index ); /* store xmm[r] */
}
break;
case TGSI_OPCODE_ADD:
+ /* do all fetches and adds, storing results in temp regs */
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- FETCH( func, *inst, 1, 1, chan_index );
- emit_add( func, 0, 1 );
- STORE( func, *inst, 0, 0, chan_index );
+ int r = chan_index + 1;
+ FETCH( func, *inst, 0, 0, chan_index ); /* load xmm[0] */
+ FETCH( func, *inst, r, 1, chan_index ); /* load xmm[r] */
+ emit_add( func, r, 0 ); /* xmm[r] = xmm[r] + xmm[0] */
+ }
+ /* do all stores of the temp regs */
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ int r = chan_index + 1;
+ STORE( func, *inst, r, 0, chan_index ); /* store xmm[r] */
}
break;
@@ -2816,6 +2828,40 @@ static void soa_to_aos( struct x86_function *func,
x86_pop( func, x86_make_reg( file_REG32, reg_BX ) );
}
+
+/**
+ * Check if the instructions dst register is the same as any src
+ * register and warn if there's a posible SOA dependency.
+ */
+static void
+check_soa_dependencies(const struct tgsi_full_instruction *inst)
+{
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ADD:
+ case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_MUL:
+ case TGSI_OPCODE_XPD:
+ /* OK - these opcodes correctly handle SOA dependencies */
+ break;
+ default:
+ if (tgsi_check_soa_dependencies(inst)) {
+ uint opcode = inst->Instruction.Opcode;
+
+ /* XXX: we only handle src/dst aliasing in a few opcodes
+ * currently. Need to use an additional temporay to hold
+ * the result in the cases where the code is too opaque to
+ * fix.
+ */
+ if (opcode != TGSI_OPCODE_MOV) {
+ debug_printf("Warning: src/dst aliasing in instruction"
+ " is not handled:\n");
+ tgsi_dump_instruction(inst, 1);
+ }
+ }
+ }
+}
+
+
/**
* Translate a TGSI vertex/fragment shader to SSE2 code.
* Slightly different things are done for vertex vs. fragment shaders.
@@ -2905,27 +2951,14 @@ tgsi_emit_sse2(
if (!ok) {
uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
+ uint proc = parse.FullHeader.Processor.Processor;
debug_printf("failed to translate tgsi opcode %d (%s) to SSE (%s)\n",
opcode,
tgsi_get_opcode_name(opcode),
- parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ?
- "vertex shader" : "fragment shader");
+ tgsi_get_processor_name(proc));
}
- if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) {
- uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
-
- /* XXX: we only handle src/dst aliasing in a few opcodes
- * currently. Need to use an additional temporay to hold
- * the result in the cases where the code is too opaque to
- * fix.
- */
- if (opcode != TGSI_OPCODE_MOV) {
- debug_printf("Warning: src/dst aliasing in instruction"
- " is not handled:\n");
- tgsi_dump_instruction(&parse.FullToken.FullInstruction, 1);
- }
- }
+ check_soa_dependencies(&parse.FullToken.FullInstruction);
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index 49b854b123..5fda808dbe 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -1467,6 +1467,12 @@ const struct tgsi_token *ureg_get_tokens( struct ureg_program *ureg,
}
+void ureg_free_tokens( const struct tgsi_token *tokens )
+{
+ FREE((struct tgsi_token *)tokens);
+}
+
+
struct ureg_program *ureg_create( unsigned processor )
{
struct ureg_program *ureg = CALLOC_STRUCT( ureg_program );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index f32420dd87..055545f3d2 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -104,6 +104,10 @@ ureg_get_tokens( struct ureg_program *ureg,
unsigned *nr_tokens );
+/* Free the tokens created by ureg_get_tokens() */
+void ureg_free_tokens( const struct tgsi_token *tokens );
+
+
void
ureg_destroy( struct ureg_program * );