diff options
Diffstat (limited to 'src/gallium/auxiliary/tgsi')
-rw-r--r-- | src/gallium/auxiliary/tgsi/Makefile | 1 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/SConscript | 1 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt | 28 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_build.c | 23 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_build.h | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_dump.c | 13 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_dump_c.c | 8 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.c | 67 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_info.c | 258 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_info.h | 7 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h | 173 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_parse.c | 13 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_parse.h | 6 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_ppc.c | 25 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_sanity.c | 29 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_sse2.c | 55 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_text.c | 18 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_ureg.c | 797 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_ureg.h | 439 |
19 files changed, 1691 insertions, 272 deletions
diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile index b4900e8dba..5f0a580b09 100644 --- a/src/gallium/auxiliary/tgsi/Makefile +++ b/src/gallium/auxiliary/tgsi/Makefile @@ -16,6 +16,7 @@ C_SOURCES = \ tgsi_sse2.c \ tgsi_text.c \ tgsi_transform.c \ + tgsi_ureg.c \ tgsi_util.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript index 8200cce42f..b6bc2924f0 100644 --- a/src/gallium/auxiliary/tgsi/SConscript +++ b/src/gallium/auxiliary/tgsi/SConscript @@ -16,6 +16,7 @@ tgsi = env.ConvenienceLibrary( 'tgsi_sse2.c', 'tgsi_text.c', 'tgsi_transform.c', + 'tgsi_ureg.c', 'tgsi_util.c', ]) diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt index a3f4947c73..802ec37118 100644 --- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt +++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt @@ -665,9 +665,18 @@ TGSI Instruction Specification TBD -1.9.8 LOOP - Loop +1.9.8 BGNFOR - Begin a For-Loop - TBD + dst.x = floor(src.x) + dst.y = floor(src.y) + dst.z = floor(src.z) + + if (dst.y <= 0) + pc = [matching ENDFOR] + 1 + endif + + Note: The destination must be a loop register. + The source must be a constant register. 1.9.9 REP - Repeat @@ -685,9 +694,16 @@ TGSI Instruction Specification TBD -1.9.12 ENDLOOP - End Loop +1.9.12 ENDFOR - End a For-Loop - TBD + dst.x = dst.x + dst.z + dst.y = dst.y - 1.0 + + if (dst.y > 0) + pc = [matching BGNFOR instruction] + 1 + endif + + Note: The destination must be a loop register. 1.9.13 ENDREP - End Repeat @@ -840,7 +856,7 @@ TGSI Instruction Specification ---------- -1.13.1 BGNLOOP2 - Begin Loop +1.13.1 BGNLOOP - Begin a Loop TBD @@ -850,7 +866,7 @@ TGSI Instruction Specification TBD -1.13.3 ENDLOOP2 - End Loop +1.13.3 ENDLOOP - End a Loop TBD diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index d272533d63..010d501c60 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -335,7 +335,10 @@ tgsi_default_full_immediate( void ) struct tgsi_full_immediate fullimm; fullimm.Immediate = tgsi_default_immediate(); - fullimm.u.Pointer = (void *) 0; + fullimm.u[0].Float = 0.0f; + fullimm.u[1].Float = 0.0f; + fullimm.u[2].Float = 0.0f; + fullimm.u[3].Float = 0.0f; return fullimm; } @@ -352,19 +355,19 @@ immediate_grow( header_bodysize_grow( header ); } -struct tgsi_immediate_float32 +union tgsi_immediate_data tgsi_build_immediate_float32( float value, struct tgsi_immediate *immediate, struct tgsi_header *header ) { - struct tgsi_immediate_float32 immediate_float32; + union tgsi_immediate_data immediate_data; - immediate_float32.Float = value; + immediate_data.Float = value; immediate_grow( immediate, header ); - return immediate_float32; + return immediate_data; } unsigned @@ -384,16 +387,18 @@ tgsi_build_full_immediate( *immediate = tgsi_build_immediate( header ); + assert( full_imm->Immediate.NrTokens <= 4 + 1 ); + for( i = 0; i < full_imm->Immediate.NrTokens - 1; i++ ) { - struct tgsi_immediate_float32 *if32; + union tgsi_immediate_data *data; if( maxsize <= size ) return 0; - if32 = (struct tgsi_immediate_float32 *) &tokens[size]; + data = (union tgsi_immediate_data *) &tokens[size]; size++; - *if32 = tgsi_build_immediate_float32( - full_imm->u.ImmediateFloat32[i].Float, + *data = tgsi_build_immediate_float32( + full_imm->u[i].Float, immediate, header ); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h index 9a3a077cf2..17d977b059 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -119,7 +119,7 @@ tgsi_build_immediate( struct tgsi_full_immediate tgsi_default_full_immediate( void ); -struct tgsi_immediate_float32 +union tgsi_immediate_data tgsi_build_immediate_float32( float value, struct tgsi_immediate *immediate, diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index a6994ecd48..f36b1114a9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -82,7 +82,7 @@ static const char *processor_type_names[] = "GEOM" }; -static const char *file_names[] = +static const char *file_names[TGSI_FILE_COUNT] = { "NULL", "CONST", @@ -91,7 +91,8 @@ static const char *file_names[] = "TEMP", "SAMP", "ADDR", - "IMM" + "IMM", + "LOOP" }; static const char *interpolate_names[] = @@ -295,10 +296,12 @@ iter_immediate( ENM( imm->Immediate.DataType, immediate_type_names ); TXT( " { " ); + + assert( imm->Immediate.NrTokens <= 4 + 1 ); for (i = 0; i < imm->Immediate.NrTokens - 1; i++) { switch (imm->Immediate.DataType) { case TGSI_IMM_FLOAT32: - FLT( imm->u.ImmediateFloat32[i].Float ); + FLT( imm->u[i].Float ); break; default: assert( 0 ); @@ -470,8 +473,8 @@ iter_instruction( switch (inst->Instruction.Opcode) { case TGSI_OPCODE_IF: case TGSI_OPCODE_ELSE: - case TGSI_OPCODE_BGNLOOP2: - case TGSI_OPCODE_ENDLOOP2: + case TGSI_OPCODE_BGNLOOP: + case TGSI_OPCODE_ENDLOOP: case TGSI_OPCODE_CAL: TXT( " :" ); UID( inst->InstructionExtLabel.Label ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c index 3dc61c48ca..4a9c02b141 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c @@ -69,7 +69,7 @@ static const char *TGSI_TOKEN_TYPES[] = "TOKEN_TYPE_INSTRUCTION" }; -static const char *TGSI_FILES[] = +static const char *TGSI_FILES[TGSI_FILE_COUNT] = { "FILE_NULL", "FILE_CONSTANT", @@ -78,7 +78,8 @@ static const char *TGSI_FILES[] = "FILE_TEMPORARY", "FILE_SAMPLER", "FILE_ADDRESS", - "FILE_IMMEDIATE" + "FILE_IMMEDIATE", + "FILE_LOOP" }; static const char *TGSI_INTERPOLATES[] = @@ -283,12 +284,13 @@ dump_immediate_verbose( UIX( imm->Immediate.Padding ); } + assert( imm->Immediate.NrTokens <= 4 + 1 ); for( i = 0; i < imm->Immediate.NrTokens - 1; i++ ) { EOL(); switch( imm->Immediate.DataType ) { case TGSI_IMM_FLOAT32: TXT( "\nFloat: " ); - FLT( imm->u.ImmediateFloat32[i].Float ); + FLT( imm->u[i].Float ); break; default: diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index fe571a86bc..951ecfd552 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -301,14 +301,14 @@ tgsi_exec_machine_bind_shader( case TGSI_TOKEN_TYPE_IMMEDIATE: { uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; - assert( size % 4 == 0 ); - assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES ); + assert( size <= 4 ); + assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES ); for( i = 0; i < size; i++ ) { - mach->Imms[mach->ImmLimit + i / 4][i % 4] = - parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; + mach->Imms[mach->ImmLimit][i] = + parse.FullToken.FullImmediate.u[i].Float; } - mach->ImmLimit += size / 4; + mach->ImmLimit += 1; } break; @@ -375,15 +375,9 @@ tgsi_exec_machine_create( void ) if (!mach) goto fail; - mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; + memset(mach, 0, sizeof(*mach)); - mach->Samplers = NULL; - mach->Consts = NULL; - mach->Tokens = NULL; - mach->Primitives = NULL; - mach->InterpCoefs = NULL; - mach->Instructions = NULL; - mach->Declarations = NULL; + mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; /* Setup constants. */ for( i = 0; i < 4; i++ ) { @@ -2020,8 +2014,7 @@ exec_instruction( switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: - case TGSI_OPCODE_FLOOR: - /* TGSI_OPCODE_FLR */ + case TGSI_OPCODE_FLR: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); micro_flr( &r[0], &r[0] ); @@ -2290,8 +2283,7 @@ exec_instruction( } break; - case TGSI_OPCODE_LERP: - /* TGSI_OPCODE_LRP */ + case TGSI_OPCODE_LRP: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); @@ -2325,8 +2317,7 @@ exec_instruction( } break; - case TGSI_OPCODE_DOT2ADD: - /* TGSI_OPCODE_DP2A */ + case TGSI_OPCODE_DP2A: FETCH( &r[0], 0, CHAN_X ); FETCH( &r[1], 1, CHAN_X ); micro_mul( &r[0], &r[0], &r[1] ); @@ -2344,18 +2335,7 @@ exec_instruction( } break; - case TGSI_OPCODE_INDEX: - /* XXX: considered for removal */ - assert (0); - break; - - case TGSI_OPCODE_NEGATE: - /* XXX: considered for removal */ - assert (0); - break; - - case TGSI_OPCODE_FRAC: - /* TGSI_OPCODE_FRC */ + case TGSI_OPCODE_FRC: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); micro_frc( &r[0], &r[0] ); @@ -2383,8 +2363,7 @@ exec_instruction( } break; - case TGSI_OPCODE_EXPBASE2: - /* TGSI_OPCODE_EX2 */ + case TGSI_OPCODE_EX2: FETCH(&r[0], 0, CHAN_X); #if FAST_MATH @@ -2398,8 +2377,7 @@ exec_instruction( } break; - case TGSI_OPCODE_LOGBASE2: - /* TGSI_OPCODE_LG2 */ + case TGSI_OPCODE_LG2: FETCH( &r[0], 0, CHAN_X ); micro_lg2( &r[0], &r[0] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { @@ -2407,8 +2385,7 @@ exec_instruction( } break; - case TGSI_OPCODE_POWER: - /* TGSI_OPCODE_POW */ + case TGSI_OPCODE_POW: FETCH(&r[0], 0, CHAN_X); FETCH(&r[1], 1, CHAN_X); @@ -2419,8 +2396,7 @@ exec_instruction( } break; - case TGSI_OPCODE_CROSSPRODUCT: - /* TGSI_OPCODE_XPD */ + case TGSI_OPCODE_XPD: FETCH(&r[0], 0, CHAN_Y); FETCH(&r[1], 1, CHAN_Z); @@ -2462,11 +2438,6 @@ exec_instruction( } break; - case TGSI_OPCODE_MULTIPLYMATRIX: - /* XXX: considered for removal */ - assert (0); - break; - case TGSI_OPCODE_ABS: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH(&r[0], 0, chan_index); @@ -3110,9 +3081,9 @@ exec_instruction( mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; break; - case TGSI_OPCODE_LOOP: + case TGSI_OPCODE_BGNFOR: /* fall-through (for now) */ - case TGSI_OPCODE_BGNLOOP2: + case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; @@ -3120,9 +3091,9 @@ exec_instruction( mach->ContStack[mach->ContStackTop++] = mach->ContMask; break; - case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_ENDFOR: /* fall-through (for now at least) */ - case TGSI_OPCODE_ENDLOOP2: + case TGSI_OPCODE_ENDLOOP: /* Restore ContMask, but don't pop */ assert(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 37f2b66d1f..ccf4b205ff 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -26,136 +26,156 @@ **************************************************************************/ #include "util/u_debug.h" +#include "util/u_memory.h" #include "tgsi_info.h" static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { - { 1, 1, 0, 0, "ARL", NULL, NULL }, - { 1, 1, 0, 0, "MOV", NULL, NULL }, - { 1, 1, 0, 0, "LIT", NULL, NULL }, - { 1, 1, 0, 0, "RCP", "RECIP", NULL }, - { 1, 1, 0, 0, "RSQ", "RECIPSQRT", NULL }, - { 1, 1, 0, 0, "EXP", "EXPP", NULL }, - { 1, 1, 0, 0, "LOG", NULL, NULL }, - { 1, 2, 0, 0, "MUL", NULL, NULL }, - { 1, 2, 0, 0, "ADD", NULL, NULL }, - { 1, 2, 0, 0, "DP3", "DOT3", NULL }, - { 1, 2, 0, 0, "DP4", "DOT4", NULL }, - { 1, 2, 0, 0, "DST", NULL, NULL }, - { 1, 2, 0, 0, "MIN", NULL, NULL }, - { 1, 2, 0, 0, "MAX", NULL, NULL }, - { 1, 2, 0, 0, "SLT", "SETLT", NULL }, - { 1, 2, 0, 0, "SGE", "SETGE", NULL }, - { 1, 3, 0, 0, "MAD", "MADD", NULL }, - { 1, 2, 0, 0, "SUB", NULL, NULL }, - { 1, 3, 0, 0, "LRP", "LERP", NULL }, - { 1, 3, 0, 0, "CND", NULL, NULL }, - { 1, 3, 0, 0, "CND0", NULL, NULL }, - { 1, 3, 0, 0, "DP2A", "DP2ADD", "DOT2ADD" }, - { 1, 2, 0, 0, "INDEX", NULL, NULL }, - { 1, 1, 0, 0, "NEGATE", NULL, NULL }, - { 1, 1, 0, 0, "FRC", "FRAC", NULL }, - { 1, 3, 0, 0, "CLAMP", NULL, NULL }, - { 1, 1, 0, 0, "FLR", "FLOOR", NULL }, - { 1, 1, 0, 0, "ROUND", NULL, NULL }, - { 1, 1, 0, 0, "EX2", "EXPBASE2", NULL }, - { 1, 1, 0, 0, "LG2", "LOGBASE2", "LOGP" }, - { 1, 2, 0, 0, "POW", "POWER", NULL }, - { 1, 2, 0, 0, "XPD", "CRS", "CROSSPRODUCT" }, - { 1, 2, 0, 0, "M4X4", "MULTIPLYMATRIX", NULL }, - { 1, 1, 0, 0, "ABS", NULL, NULL }, - { 1, 1, 0, 0, "RCC", NULL, NULL }, - { 1, 2, 0, 0, "DPH", NULL, NULL }, - { 1, 1, 0, 0, "COS", NULL, NULL }, - { 1, 1, 0, 0, "DDX", "DSX", NULL }, - { 1, 1, 0, 0, "DDY", "DSY", NULL }, - { 0, 0, 0, 0, "KILP", NULL, NULL }, - { 1, 1, 0, 0, "PK2H", NULL, NULL }, - { 1, 1, 0, 0, "PK2US", NULL, NULL }, - { 1, 1, 0, 0, "PK4B", NULL, NULL }, - { 1, 1, 0, 0, "PK4UB", NULL, NULL }, - { 1, 2, 0, 0, "RFL", NULL, NULL }, - { 1, 2, 0, 0, "SEQ", NULL, NULL }, - { 1, 2, 0, 0, "SFL", NULL, NULL }, - { 1, 2, 0, 0, "SGT", NULL, NULL }, - { 1, 1, 0, 0, "SIN", NULL, NULL }, - { 1, 2, 0, 0, "SLE", NULL, NULL }, - { 1, 2, 0, 0, "SNE", NULL, NULL }, - { 1, 2, 0, 0, "STR", NULL, NULL }, - { 1, 2, 1, 0, "TEX", "TEXLD", NULL }, - { 1, 4, 1, 0, "TXD", "TEXLDD", NULL }, - { 1, 2, 1, 0, "TXP", NULL, NULL }, - { 1, 1, 0, 0, "UP2H", NULL, NULL }, - { 1, 1, 0, 0, "UP2US", NULL, NULL }, - { 1, 1, 0, 0, "UP4B", NULL, NULL }, - { 1, 1, 0, 0, "UP4UB", NULL, NULL }, - { 1, 3, 0, 0, "X2D", NULL, NULL }, - { 1, 1, 0, 0, "ARA", NULL, NULL }, - { 1, 1, 0, 0, "ARR", "MOVA", NULL }, - { 0, 1, 0, 0, "BRA", NULL, NULL }, - { 0, 0, 0, 1, "CAL", "CALL", NULL }, - { 0, 0, 0, 0, "RET", NULL, NULL }, - { 1, 1, 0, 0, "SGN", "SSG", NULL }, - { 1, 3, 0, 0, "CMP", NULL, NULL }, - { 1, 1, 0, 0, "SCS", "SINCOS", NULL }, - { 1, 2, 1, 0, "TXB", "TEXLDB", NULL }, - { 1, 1, 0, 0, "NRM", NULL, NULL }, - { 1, 2, 0, 0, "DIV", NULL, NULL }, - { 1, 2, 0, 0, "DP2", NULL, NULL }, - { 1, 2, 1, 0, "TXL", NULL, NULL }, - { 0, 0, 0, 0, "BRK", "BREAK", NULL }, - { 0, 1, 0, 1, "IF", NULL, NULL }, - { 0, 0, 0, 0, "LOOP", NULL, NULL }, - { 0, 1, 0, 0, "REP", NULL, NULL }, - { 0, 0, 0, 1, "ELSE", NULL, NULL }, - { 0, 0, 0, 0, "ENDIF", NULL, NULL }, - { 0, 0, 0, 0, "ENDLOOP", NULL, NULL }, - { 0, 0, 0, 0, "ENDREP", NULL, NULL }, - { 0, 1, 0, 0, "PUSHA", NULL, NULL }, - { 1, 0, 0, 0, "POPA", NULL, NULL }, - { 1, 1, 0, 0, "CEIL", NULL, NULL }, - { 1, 1, 0, 0, "I2F", NULL, NULL }, - { 1, 1, 0, 0, "NOT", NULL, NULL }, - { 1, 1, 0, 0, "INT", "TRUNC", NULL }, - { 1, 2, 0, 0, "SHL", NULL, NULL }, - { 1, 2, 0, 0, "SHR", NULL, NULL }, - { 1, 2, 0, 0, "AND", NULL, NULL }, - { 1, 2, 0, 0, "OR", NULL, NULL }, - { 1, 2, 0, 0, "MOD", NULL, NULL }, - { 1, 2, 0, 0, "XOR", NULL, NULL }, - { 1, 3, 0, 0, "SAD", NULL, NULL }, - { 1, 2, 1, 0, "TXF", NULL, NULL }, - { 1, 2, 1, 0, "TXQ", NULL, NULL }, - { 0, 0, 0, 0, "CONT", NULL, NULL }, - { 0, 0, 0, 0, "EMIT", NULL, NULL }, - { 0, 0, 0, 0, "ENDPRIM", NULL, NULL }, - { 0, 0, 0, 1, "BGNLOOP2", NULL, NULL }, - { 0, 0, 0, 0, "BGNSUB", NULL, NULL }, - { 0, 0, 0, 1, "ENDLOOP2", NULL, NULL }, - { 0, 0, 0, 0, "ENDSUB", NULL, NULL }, - { 1, 1, 0, 0, "NOISE1", NULL, NULL }, - { 1, 1, 0, 0, "NOISE2", NULL, NULL }, - { 1, 1, 0, 0, "NOISE3", NULL, NULL }, - { 1, 1, 0, 0, "NOISE4", NULL, NULL }, - { 0, 0, 0, 0, "NOP", NULL, NULL }, - { 1, 2, 0, 0, "M4X3", NULL, NULL }, - { 1, 2, 0, 0, "M3X4", NULL, NULL }, - { 1, 2, 0, 0, "M3X3", NULL, NULL }, - { 1, 2, 0, 0, "M3X2", NULL, NULL }, - { 1, 1, 0, 0, "NRM4", NULL, NULL }, - { 0, 1, 0, 0, "CALLNZ", NULL, NULL }, - { 0, 1, 0, 0, "IFC", NULL, NULL }, - { 0, 1, 0, 0, "BREAKC", NULL, NULL }, - { 0, 1, 0, 0, "KIL", "TEXKILL", NULL }, - { 0, 0, 0, 0, "END", NULL, NULL }, - { 1, 1, 0, 0, "SWZ", NULL, NULL } + { 1, 1, 0, 0, "ARL", TGSI_OPCODE_ARL }, + { 1, 1, 0, 0, "MOV", TGSI_OPCODE_MOV }, + { 1, 1, 0, 0, "LIT", TGSI_OPCODE_LIT }, + { 1, 1, 0, 0, "RCP", TGSI_OPCODE_RCP }, + { 1, 1, 0, 0, "RSQ", TGSI_OPCODE_RSQ }, + { 1, 1, 0, 0, "EXP", TGSI_OPCODE_EXP }, + { 1, 1, 0, 0, "LOG", TGSI_OPCODE_LOG }, + { 1, 2, 0, 0, "MUL", TGSI_OPCODE_MUL }, + { 1, 2, 0, 0, "ADD", TGSI_OPCODE_ADD }, + { 1, 2, 0, 0, "DP3", TGSI_OPCODE_DP3 }, + { 1, 2, 0, 0, "DP4", TGSI_OPCODE_DP4 }, + { 1, 2, 0, 0, "DST", TGSI_OPCODE_DST }, + { 1, 2, 0, 0, "MIN", TGSI_OPCODE_MIN }, + { 1, 2, 0, 0, "MAX", TGSI_OPCODE_MAX }, + { 1, 2, 0, 0, "SLT", TGSI_OPCODE_SLT }, + { 1, 2, 0, 0, "SGE", TGSI_OPCODE_SGE }, + { 1, 3, 0, 0, "MAD", TGSI_OPCODE_MAD }, + { 1, 2, 0, 0, "SUB", TGSI_OPCODE_SUB }, + { 1, 3, 0, 0, "LRP", TGSI_OPCODE_LRP }, + { 1, 3, 0, 0, "CND", TGSI_OPCODE_CND }, + { 1, 3, 0, 0, "CND0", TGSI_OPCODE_CND0 }, + { 1, 3, 0, 0, "DP2A", TGSI_OPCODE_DP2A }, + { 0, 0, 0, 0, "", 22 }, /* removed */ + { 0, 0, 0, 0, "", 23 }, /* removed */ + { 1, 1, 0, 0, "FRC", TGSI_OPCODE_FRC }, + { 1, 3, 0, 0, "CLAMP", TGSI_OPCODE_CLAMP }, + { 1, 1, 0, 0, "FLR", TGSI_OPCODE_FLR }, + { 1, 1, 0, 0, "ROUND", TGSI_OPCODE_ROUND }, + { 1, 1, 0, 0, "EX2", TGSI_OPCODE_EX2 }, + { 1, 1, 0, 0, "LG2", TGSI_OPCODE_LG2 }, + { 1, 2, 0, 0, "POW", TGSI_OPCODE_POW }, + { 1, 2, 0, 0, "XPD", TGSI_OPCODE_XPD }, + { 0, 0, 0, 0, "", 32 }, /* removed */ + { 1, 1, 0, 0, "ABS", TGSI_OPCODE_ABS }, + { 1, 1, 0, 0, "RCC", TGSI_OPCODE_RCC }, + { 1, 2, 0, 0, "DPH", TGSI_OPCODE_DPH }, + { 1, 1, 0, 0, "COS", TGSI_OPCODE_COS }, + { 1, 1, 0, 0, "DDX", TGSI_OPCODE_DDX }, + { 1, 1, 0, 0, "DDY", TGSI_OPCODE_DDY }, + { 0, 0, 0, 0, "KILP", TGSI_OPCODE_KILP }, + { 1, 1, 0, 0, "PK2H", TGSI_OPCODE_PK2H }, + { 1, 1, 0, 0, "PK2US", TGSI_OPCODE_PK2US }, + { 1, 1, 0, 0, "PK4B", TGSI_OPCODE_PK4B }, + { 1, 1, 0, 0, "PK4UB", TGSI_OPCODE_PK4UB }, + { 1, 2, 0, 0, "RFL", TGSI_OPCODE_RFL }, + { 1, 2, 0, 0, "SEQ", TGSI_OPCODE_SEQ }, + { 1, 2, 0, 0, "SFL", TGSI_OPCODE_SFL }, + { 1, 2, 0, 0, "SGT", TGSI_OPCODE_SGT }, + { 1, 1, 0, 0, "SIN", TGSI_OPCODE_SIN }, + { 1, 2, 0, 0, "SLE", TGSI_OPCODE_SLE }, + { 1, 2, 0, 0, "SNE", TGSI_OPCODE_SNE }, + { 1, 2, 0, 0, "STR", TGSI_OPCODE_STR }, + { 1, 2, 1, 0, "TEX", TGSI_OPCODE_TEX }, + { 1, 4, 1, 0, "TXD", TGSI_OPCODE_TXD }, + { 1, 2, 1, 0, "TXP", TGSI_OPCODE_TXP }, + { 1, 1, 0, 0, "UP2H", TGSI_OPCODE_UP2H }, + { 1, 1, 0, 0, "UP2US", TGSI_OPCODE_UP2US }, + { 1, 1, 0, 0, "UP4B", TGSI_OPCODE_UP4B }, + { 1, 1, 0, 0, "UP4UB", TGSI_OPCODE_UP4UB }, + { 1, 3, 0, 0, "X2D", TGSI_OPCODE_X2D }, + { 1, 1, 0, 0, "ARA", TGSI_OPCODE_ARA }, + { 1, 1, 0, 0, "ARR", TGSI_OPCODE_ARR }, + { 0, 1, 0, 0, "BRA", TGSI_OPCODE_BRA }, + { 0, 0, 0, 1, "CAL", TGSI_OPCODE_CAL }, + { 0, 0, 0, 0, "RET", TGSI_OPCODE_RET }, + { 1, 1, 0, 0, "SSG", TGSI_OPCODE_SSG }, + { 1, 3, 0, 0, "CMP", TGSI_OPCODE_CMP }, + { 1, 1, 0, 0, "SCS", TGSI_OPCODE_SCS }, + { 1, 2, 1, 0, "TXB", TGSI_OPCODE_TXB }, + { 1, 1, 0, 0, "NRM", TGSI_OPCODE_NRM }, + { 1, 2, 0, 0, "DIV", TGSI_OPCODE_DIV }, + { 1, 2, 0, 0, "DP2", TGSI_OPCODE_DP2 }, + { 1, 2, 1, 0, "TXL", TGSI_OPCODE_TXL }, + { 0, 0, 0, 0, "BRK", TGSI_OPCODE_BRK }, + { 0, 1, 0, 1, "IF", TGSI_OPCODE_IF }, + { 1, 1, 0, 0, "BGNFOR", TGSI_OPCODE_BGNFOR }, + { 0, 1, 0, 0, "REP", TGSI_OPCODE_REP }, + { 0, 0, 0, 1, "ELSE", TGSI_OPCODE_ELSE }, + { 0, 0, 0, 0, "ENDIF", TGSI_OPCODE_ENDIF }, + { 1, 0, 0, 0, "ENDFOR", TGSI_OPCODE_ENDFOR }, + { 0, 0, 0, 0, "ENDREP", TGSI_OPCODE_ENDREP }, + { 0, 1, 0, 0, "PUSHA", TGSI_OPCODE_PUSHA }, + { 1, 0, 0, 0, "POPA", TGSI_OPCODE_POPA }, + { 1, 1, 0, 0, "CEIL", TGSI_OPCODE_CEIL }, + { 1, 1, 0, 0, "I2F", TGSI_OPCODE_I2F }, + { 1, 1, 0, 0, "NOT", TGSI_OPCODE_NOT }, + { 1, 1, 0, 0, "TRUNC", TGSI_OPCODE_TRUNC }, + { 1, 2, 0, 0, "SHL", TGSI_OPCODE_SHL }, + { 1, 2, 0, 0, "SHR", TGSI_OPCODE_SHR }, + { 1, 2, 0, 0, "AND", TGSI_OPCODE_AND }, + { 1, 2, 0, 0, "OR", TGSI_OPCODE_OR }, + { 1, 2, 0, 0, "MOD", TGSI_OPCODE_MOD }, + { 1, 2, 0, 0, "XOR", TGSI_OPCODE_XOR }, + { 1, 3, 0, 0, "SAD", TGSI_OPCODE_SAD }, + { 1, 2, 1, 0, "TXF", TGSI_OPCODE_TXF }, + { 1, 2, 1, 0, "TXQ", TGSI_OPCODE_TXQ }, + { 0, 0, 0, 0, "CONT", TGSI_OPCODE_CONT }, + { 0, 0, 0, 0, "EMIT", TGSI_OPCODE_EMIT }, + { 0, 0, 0, 0, "ENDPRIM", TGSI_OPCODE_ENDPRIM }, + { 0, 0, 0, 1, "BGNLOOP", TGSI_OPCODE_BGNLOOP }, + { 0, 0, 0, 0, "BGNSUB", TGSI_OPCODE_BGNSUB }, + { 0, 0, 0, 1, "ENDLOOP", TGSI_OPCODE_ENDLOOP }, + { 0, 0, 0, 0, "ENDSUB", TGSI_OPCODE_ENDSUB }, + { 1, 1, 0, 0, "NOISE1", TGSI_OPCODE_NOISE1 }, + { 1, 1, 0, 0, "NOISE2", TGSI_OPCODE_NOISE2 }, + { 1, 1, 0, 0, "NOISE3", TGSI_OPCODE_NOISE3 }, + { 1, 1, 0, 0, "NOISE4", TGSI_OPCODE_NOISE4 }, + { 0, 0, 0, 0, "NOP", TGSI_OPCODE_NOP }, + { 0, 0, 0, 0, "", 108 }, /* removed */ + { 0, 0, 0, 0, "", 109 }, /* removed */ + { 0, 0, 0, 0, "", 110 }, /* removed */ + { 0, 0, 0, 0, "", 111 }, /* removed */ + { 1, 1, 0, 0, "NRM4", TGSI_OPCODE_NRM4 }, + { 0, 1, 0, 0, "CALLNZ", TGSI_OPCODE_CALLNZ }, + { 0, 1, 0, 0, "IFC", TGSI_OPCODE_IFC }, + { 0, 1, 0, 0, "BREAKC", TGSI_OPCODE_BREAKC }, + { 0, 1, 0, 0, "KIL", TGSI_OPCODE_KIL }, + { 0, 0, 0, 0, "END", TGSI_OPCODE_END }, + { 1, 1, 0, 0, "SWZ", TGSI_OPCODE_SWZ } }; const struct tgsi_opcode_info * tgsi_get_opcode_info( uint opcode ) { + static boolean firsttime = 1; + + if (firsttime) { + unsigned i; + firsttime = 0; + for (i = 0; i < Elements(opcode_info); i++) + assert(opcode_info[i].opcode == i); + } + if (opcode < TGSI_OPCODE_LAST) return &opcode_info[opcode]; + assert( 0 ); return NULL; } + + +const char * +tgsi_get_opcode_name( uint opcode ) +{ + const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode); + return info->mnemonic; +} + diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h index 077e25acd7..b2375c6971 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.h +++ b/src/gallium/auxiliary/tgsi/tgsi_info.h @@ -41,13 +41,16 @@ struct tgsi_opcode_info boolean is_tex; boolean is_branch; const char *mnemonic; - const char *alt_mnemonic1; - const char *alt_mnemonic2; + uint opcode; }; const struct tgsi_opcode_info * tgsi_get_opcode_info( uint opcode ); +const char * +tgsi_get_opcode_name( uint opcode ); + + #if defined __cplusplus } #endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h new file mode 100644 index 0000000000..ed594a3e2c --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -0,0 +1,173 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +#ifndef OP12_TEX +#define OP12_TEX(a) OP12(a) +#endif + +#ifndef OP14_TEX +#define OP14_TEX(a) OP14(a) +#endif + +#ifndef OP00_LBL +#define OP00_LBL(a) OP00(a) +#endif + +#ifndef OP01_LBL +#define OP01_LBL(a) OP01(a) +#endif + +OP11(ARL) +OP11(MOV) +OP11(LIT) +OP11(RCP) +OP11(RSQ) +OP11(EXP) +OP11(LOG) +OP12(MUL) +OP12(ADD) +OP12(DP3) +OP12(DP4) +OP12(DST) +OP12(MIN) +OP12(MAX) +OP12(SLT) +OP12(SGE) +OP13(MAD) +OP12(SUB) +OP13(LRP) +OP13(CND) +OP13(CND0) +OP13(DP2A) +OP11(FRC) +OP13(CLAMP) +OP11(FLR) +OP11(ROUND) +OP11(EX2) +OP11(LG2) +OP12(POW) +OP12(XPD) +OP11(ABS) +OP11(RCC) +OP12(DPH) +OP11(COS) +OP11(DDX) +OP11(DDY) +OP00(KILP) +OP11(PK2H) +OP11(PK2US) +OP11(PK4B) +OP11(PK4UB) +OP12(RFL) +OP12(SEQ) +OP12(SFL) +OP12(SGT) +OP11(SIN) +OP12(SLE) +OP12(SNE) +OP12(STR) +OP12_TEX(TEX) +OP14_TEX(TXD) +OP12_TEX(TXP) +OP11(UP2H) +OP11(UP2US) +OP11(UP4B) +OP11(UP4UB) +OP13(X2D) +OP11(ARA) +OP11(ARR) +OP01(BRA) +OP00_LBL(CAL) +OP00(RET) +OP11(SSG) +OP13(CMP) +OP11(SCS) +OP12_TEX(TXB) +OP11(NRM) +OP12(DIV) +OP12(DP2) +OP12_TEX(TXL) +OP00(BRK) +OP01_LBL(IF) +OP11(BGNFOR) +OP01(REP) +OP00_LBL(ELSE) +OP00(ENDIF) +OP10(ENDFOR) +OP00(ENDREP) +OP01(PUSHA) +OP10(POPA) +OP11(CEIL) +OP11(I2F) +OP11(NOT) +OP11(TRUNC) +OP12(SHL) +OP12(SHR) +OP12(AND) +OP12(OR) +OP12(MOD) +OP12(XOR) +OP13(SAD) +OP12_TEX(TXF) +OP12_TEX(TXQ) +OP00(CONT) +OP00(EMIT) +OP00(ENDPRIM) +OP00_LBL(BGNLOOP) +OP00(BGNSUB) +OP00_LBL(ENDLOOP) +OP00(ENDSUB) +OP11(NOISE1) +OP11(NOISE2) +OP11(NOISE3) +OP11(NOISE4) +OP00(NOP) +OP11(NRM4) +OP01(CALLNZ) +OP01(IFC) +OP01(BREAKC) +OP01(KIL) +OP00(END) +OP11(SWZ) + + +#undef OP00 +#undef OP01 +#undef OP10 +#undef OP11 +#undef OP12 +#undef OP13 + +#ifdef OP14 +#undef OP14 +#endif + +#undef OP00_LBL +#undef OP01_LBL + +#undef OP12_TEX +#undef OP14_TEX + diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index 7f2cfb7988..4870f82b6b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -42,9 +42,6 @@ void tgsi_full_token_free( union tgsi_full_token *full_token ) { - if( full_token->Token.Type == TGSI_TOKEN_TYPE_IMMEDIATE ) { - FREE( (void *) full_token->FullImmediate.u.Pointer ); - } } unsigned @@ -156,14 +153,8 @@ tgsi_parse_token( case TGSI_IMM_FLOAT32: { uint imm_count = imm->Immediate.NrTokens - 1; - struct tgsi_immediate_float32 *data; - - data = (struct tgsi_immediate_float32 *) MALLOC(sizeof(struct tgsi_immediate_float32) * imm_count); - if (data) { - for (i = 0; i < imm_count; i++) { - next_token(ctx, &data[i]); - } - imm->u.ImmediateFloat32 = data; + for (i = 0; i < imm_count; i++) { + next_token(ctx, &imm->u[i]); } } break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h index a289e26e3a..1035bda1a8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -73,11 +73,7 @@ struct tgsi_full_declaration struct tgsi_full_immediate { struct tgsi_immediate Immediate; - union - { - const void *Pointer; - const struct tgsi_immediate_float32 *ImmediateFloat32; - } u; + union tgsi_immediate_data u[4]; }; #define TGSI_FULL_MAX_DST_REGISTERS 2 diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 0c64ae5713..2d6ad12ffb 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -38,6 +38,7 @@ #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_sse.h" +#include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi_dump.h" @@ -619,17 +620,17 @@ emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) ppc_vandc(gen->f, v1, v0, bit31_vec); /* v1 = v0 & ~bit31 */ } break; - case TGSI_OPCODE_FLOOR: + case TGSI_OPCODE_FLR: ppc_vrfim(gen->f, v1, v0); /* v1 = floor(v0) */ break; - case TGSI_OPCODE_FRAC: + case TGSI_OPCODE_FRC: ppc_vrfim(gen->f, v1, v0); /* tmp = floor(v0) */ ppc_vsubfp(gen->f, v1, v0, v1); /* v1 = v0 - v1 */ break; - case TGSI_OPCODE_EXPBASE2: + case TGSI_OPCODE_EX2: ppc_vexptefp(gen->f, v1, v0); /* v1 = 2^v0 */ break; - case TGSI_OPCODE_LOGBASE2: + case TGSI_OPCODE_LG2: /* XXX this may be broken! */ ppc_vlogefp(gen->f, v1, v0); /* v1 = log2(v0) */ break; @@ -1111,10 +1112,10 @@ emit_instruction(struct gen_context *gen, case TGSI_OPCODE_MOV: case TGSI_OPCODE_SWZ: case TGSI_OPCODE_ABS: - case TGSI_OPCODE_FLOOR: - case TGSI_OPCODE_FRAC: - case TGSI_OPCODE_EXPBASE2: - case TGSI_OPCODE_LOGBASE2: + case TGSI_OPCODE_FLR: + case TGSI_OPCODE_FRC: + case TGSI_OPCODE_EX2: + case TGSI_OPCODE_LG2: emit_unaryop(gen, inst); break; case TGSI_OPCODE_RSQ: @@ -1317,8 +1318,10 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, ok = emit_instruction(&gen, &parse.FullToken.FullInstruction); if (!ok) { - debug_printf("failed to translate tgsi opcode %d to PPC (%s)\n", - parse.FullToken.FullInstruction.Instruction.Opcode, + uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode; + debug_printf("failed to translate tgsi opcode %d (%s) to PPC (%s)\n", + opcode, + tgsi_get_opcode_name(opcode), parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? "vertex shader" : "fragment shader"); } @@ -1333,7 +1336,7 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); for (i = 0; i < size; i++) { immediates[num_immediates][i] = - parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; + parse.FullToken.FullImmediate.u[i].Float; } num_immediates++; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 6f1f5c2b4b..4fe8553c42 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -131,7 +131,7 @@ is_register_used( return (ctx->regs_used[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE; } -static const char *file_names[] = +static const char *file_names[TGSI_FILE_COUNT] = { "NULL", "CONST", @@ -140,7 +140,8 @@ static const char *file_names[] = "TEMP", "SAMP", "ADDR", - "IMM" + "IMM", + "LOOP" }; static boolean @@ -234,9 +235,29 @@ iter_instruction( index, "indirect", FALSE ); - if (file != TGSI_FILE_ADDRESS || index != 0) - report_warning( ctx, "Indirect register not ADDR[0]" ); + if (!(file == TGSI_FILE_ADDRESS || file == TGSI_FILE_LOOP) || index != 0) { + report_warning(ctx, "Indirect register neither ADDR[0] nor LOOP[0]"); + } + } + } + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_BGNFOR: + case TGSI_OPCODE_ENDFOR: + if (inst->FullDstRegisters[0].DstRegister.File != TGSI_FILE_LOOP || + inst->FullDstRegisters[0].DstRegister.Index != 0) { + report_error(ctx, "Destination register must be LOOP[0]"); + } + break; + } + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_BGNFOR: + if (inst->FullSrcRegisters[0].SrcRegister.File != TGSI_FILE_CONSTANT && + inst->FullSrcRegisters[0].SrcRegister.File != TGSI_FILE_IMMEDIATE) { + report_error(ctx, "Source register file must be either CONST or IMM"); } + break; } ctx->num_instructions++; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 4c3343d26c..cfec5cfc01 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -36,6 +36,7 @@ #if defined(PIPE_ARCH_SSE) #include "util/u_sse.h" #endif +#include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi_exec.h" @@ -1467,15 +1468,15 @@ emit_tex( struct x86_function *func, switch (inst->InstructionExtTexture.Texture) { case TGSI_TEXTURE_1D: - case TGSI_TEXTURE_SHADOW1D: count = 1; break; case TGSI_TEXTURE_2D: case TGSI_TEXTURE_RECT: - case TGSI_TEXTURE_SHADOW2D: - case TGSI_TEXTURE_SHADOWRECT: count = 2; break; + case TGSI_TEXTURE_SHADOW1D: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: case TGSI_TEXTURE_3D: case TGSI_TEXTURE_CUBE: count = 3; @@ -2064,8 +2065,7 @@ emit_instruction( } break; - case TGSI_OPCODE_LERP: - /* TGSI_OPCODE_LRP */ + case TGSI_OPCODE_LRP: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); FETCH( func, *inst, 1, 1, chan_index ); @@ -2085,8 +2085,7 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_DOT2ADD: - /* TGSI_OPCODE_DP2A */ + case TGSI_OPCODE_DP2A: FETCH( func, *inst, 0, 0, CHAN_X ); /* xmm0 = src[0].x */ FETCH( func, *inst, 1, 1, CHAN_X ); /* xmm1 = src[1].x */ emit_mul( func, 0, 1 ); /* xmm0 = xmm0 * xmm1 */ @@ -2101,16 +2100,7 @@ emit_instruction( } break; - case TGSI_OPCODE_INDEX: - return 0; - break; - - case TGSI_OPCODE_NEGATE: - return 0; - break; - - case TGSI_OPCODE_FRAC: - /* TGSI_OPCODE_FRC */ + case TGSI_OPCODE_FRC: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); emit_frc( func, 0, 0 ); @@ -2122,8 +2112,7 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_FLOOR: - /* TGSI_OPCODE_FLR */ + case TGSI_OPCODE_FLR: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); emit_flr( func, 0, 0 ); @@ -2139,8 +2128,7 @@ emit_instruction( } break; - case TGSI_OPCODE_EXPBASE2: - /* TGSI_OPCODE_EX2 */ + case TGSI_OPCODE_EX2: FETCH( func, *inst, 0, 0, CHAN_X ); emit_ex2( func, 0, 0 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { @@ -2148,8 +2136,7 @@ emit_instruction( } break; - case TGSI_OPCODE_LOGBASE2: - /* TGSI_OPCODE_LG2 */ + case TGSI_OPCODE_LG2: FETCH( func, *inst, 0, 0, CHAN_X ); emit_lg2( func, 0, 0 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { @@ -2157,8 +2144,7 @@ emit_instruction( } break; - case TGSI_OPCODE_POWER: - /* TGSI_OPCODE_POW */ + case TGSI_OPCODE_POW: FETCH( func, *inst, 0, 0, CHAN_X ); FETCH( func, *inst, 1, 1, CHAN_X ); emit_pow( func, 0, 0, 0, 1 ); @@ -2167,8 +2153,7 @@ emit_instruction( } break; - case TGSI_OPCODE_CROSSPRODUCT: - /* TGSI_OPCODE_XPD */ + case TGSI_OPCODE_XPD: if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { FETCH( func, *inst, 1, 1, CHAN_Z ); @@ -2214,10 +2199,6 @@ emit_instruction( } break; - case TGSI_OPCODE_MULTIPLYMATRIX: - return 0; - break; - case TGSI_OPCODE_ABS: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); @@ -2551,7 +2532,7 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_LOOP: + case TGSI_OPCODE_BGNFOR: return 0; break; @@ -2567,7 +2548,7 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_ENDFOR: return 0; break; @@ -2937,8 +2918,10 @@ tgsi_emit_sse2( &parse.FullToken.FullInstruction ); if (!ok) { - debug_printf("failed to translate tgsi opcode %d to SSE (%s)\n", - parse.FullToken.FullInstruction.Instruction.Opcode, + uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode; + debug_printf("failed to translate tgsi opcode %d (%s) to SSE (%s)\n", + opcode, + tgsi_get_opcode_name(opcode), parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? "vertex shader" : "fragment shader"); } @@ -2953,7 +2936,7 @@ tgsi_emit_sse2( assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); for( i = 0; i < size; i++ ) { immediates[num_immediates][i] = - parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; + parse.FullToken.FullImmediate.u[i].Float; } #if 0 debug_printf("SSE FS immediate[%d] = %f %f %f %f\n", diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index a76bbc9140..d438450b1e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -231,7 +231,8 @@ static const char *file_names[TGSI_FILE_COUNT] = "TEMP", "SAMP", "ADDR", - "IMM" + "IMM", + "LOOP" }; static boolean @@ -789,16 +790,6 @@ match_inst_mnemonic(const char **pcur, if (str_match_no_case(pcur, info->mnemonic)) { return TRUE; } - if (info->alt_mnemonic1) { - if (str_match_no_case(pcur, info->alt_mnemonic1)) { - return TRUE; - } - if (info->alt_mnemonic2) { - if (str_match_no_case(pcur, info->alt_mnemonic2)) { - return TRUE; - } - } - } return FALSE; } @@ -1091,7 +1082,10 @@ static boolean parse_immediate( struct translate_ctx *ctx ) imm = tgsi_default_full_immediate(); imm.Immediate.NrTokens += 4; imm.Immediate.DataType = TGSI_IMM_FLOAT32; - imm.u.Pointer = values; + imm.u[0].Float = values[0]; + imm.u[1].Float = values[1]; + imm.u[2].Float = values[2]; + imm.u[3].Float = values[3]; advance = tgsi_build_full_immediate( &imm, diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c new file mode 100644 index 0000000000..ba84a82b2b --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -0,0 +1,797 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE, INC AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "tgsi/tgsi_ureg.h" +#include "tgsi/tgsi_dump.h" +#include "util/u_memory.h" + +union tgsi_any_token { + struct tgsi_version version; + struct tgsi_header header; + struct tgsi_processor processor; + struct tgsi_token token; + struct tgsi_declaration decl; + struct tgsi_declaration_range decl_range; + struct tgsi_declaration_semantic decl_semantic; + struct tgsi_immediate imm; + union tgsi_immediate_data imm_data; + struct tgsi_instruction insn; + struct tgsi_instruction_ext_nv insn_ext_nv; + struct tgsi_instruction_ext_label insn_ext_label; + struct tgsi_instruction_ext_texture insn_ext_texture; + struct tgsi_instruction_ext_predicate insn_ext_predicate; + struct tgsi_src_register src; + struct tgsi_src_register_ext_swz src_ext_swz; + struct tgsi_src_register_ext_mod src_ext_mod; + struct tgsi_dimension dim; + struct tgsi_dst_register dst; + struct tgsi_dst_register_ext_concode dst_ext_code; + struct tgsi_dst_register_ext_modulate dst_ext_mod; + struct tgsi_dst_register_ext_predicate dst_ext_pred; + unsigned value; +}; + + +struct ureg_tokens { + union tgsi_any_token *tokens; + unsigned size; + unsigned order; + unsigned count; +}; + +#define UREG_MAX_INPUT PIPE_MAX_ATTRIBS +#define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS +#define UREG_MAX_IMMEDIATE 32 +#define UREG_MAX_TEMP 256 + +#define DOMAIN_DECL 0 +#define DOMAIN_INSN 1 + +struct ureg_program +{ + unsigned processor; + struct pipe_context *pipe; + + struct { + unsigned semantic_name; + unsigned semantic_index; + unsigned interp; + } input[UREG_MAX_INPUT]; + unsigned nr_inputs; + + struct { + unsigned semantic_name; + unsigned semantic_index; + } output[UREG_MAX_OUTPUT]; + unsigned nr_outputs; + + struct { + float v[4]; + unsigned nr; + } immediate[UREG_MAX_IMMEDIATE]; + unsigned nr_immediates; + + unsigned temps_active[UREG_MAX_TEMP / 32]; + unsigned nr_temps; + + unsigned nr_constants; + unsigned nr_samplers; + + struct ureg_tokens domain[2]; +}; + +static union tgsi_any_token error_tokens[32]; + +static void tokens_error( struct ureg_tokens *tokens ) +{ + tokens->tokens = error_tokens; + tokens->size = Elements(error_tokens); + tokens->count = 0; +} + + +static void tokens_expand( struct ureg_tokens *tokens, + unsigned count ) +{ + unsigned old_size = tokens->size * sizeof(unsigned); + + if (tokens->tokens == error_tokens) + goto fail; + + while (tokens->count + count > tokens->size) { + tokens->size = (1 << ++tokens->order); + } + + tokens->tokens = REALLOC(tokens->tokens, + old_size, + tokens->size * sizeof(unsigned)); + if (tokens->tokens == NULL) + goto fail; + + return; + +fail: + tokens_error(tokens); +} + +static void set_bad( struct ureg_program *ureg ) +{ + tokens_error(&ureg->domain[0]); +} + + + +static union tgsi_any_token *get_tokens( struct ureg_program *ureg, + unsigned domain, + unsigned count ) +{ + struct ureg_tokens *tokens = &ureg->domain[domain]; + union tgsi_any_token *result; + + if (tokens->count + count > tokens->size) + tokens_expand(tokens, count); + + result = &tokens->tokens[tokens->count]; + tokens->count += count; + return result; +} + + +static union tgsi_any_token *retrieve_token( struct ureg_program *ureg, + unsigned domain, + unsigned nr ) +{ + if (ureg->domain[domain].tokens == error_tokens) + return &error_tokens[0]; + + return &ureg->domain[domain].tokens[nr]; +} + + + +static INLINE struct ureg_dst +ureg_dst_register( unsigned file, + unsigned index ) +{ + struct ureg_dst dst; + + dst.File = file; + dst.WriteMask = TGSI_WRITEMASK_XYZW; + dst.Indirect = 0; + dst.Saturate = 0; + dst.Index = index; + dst.Pad1 = 0; + dst.Pad2 = 0; + + return dst; +} + +static INLINE struct ureg_src +ureg_src_register( unsigned file, + unsigned index ) +{ + struct ureg_src src; + + src.File = file; + src.SwizzleX = TGSI_SWIZZLE_X; + src.SwizzleY = TGSI_SWIZZLE_Y; + src.SwizzleZ = TGSI_SWIZZLE_Z; + src.SwizzleW = TGSI_SWIZZLE_W; + src.Pad = 0; + src.Indirect = 0; + src.Absolute = 0; + src.Index = index; + src.Negate = 0; + + return src; +} + + + + +static struct ureg_src +ureg_DECL_input( struct ureg_program *ureg, + unsigned name, + unsigned index, + unsigned interp_mode ) +{ + unsigned i; + + for (i = 0; i < ureg->nr_inputs; i++) { + if (ureg->input[i].semantic_name == name && + ureg->input[i].semantic_index == index) + goto out; + } + + if (ureg->nr_inputs < UREG_MAX_INPUT) { + ureg->input[i].semantic_name = name; + ureg->input[i].semantic_index = index; + ureg->input[i].interp = interp_mode; + ureg->nr_inputs++; + } + else { + set_bad( ureg ); + } + +out: + return ureg_src_register( TGSI_FILE_INPUT, i ); +} + + + +struct ureg_src +ureg_DECL_fs_input( struct ureg_program *ureg, + unsigned name, + unsigned index, + unsigned interp ) +{ + return ureg_DECL_input( ureg, name, index, interp ); +} + + +struct ureg_src +ureg_DECL_vs_input( struct ureg_program *ureg, + unsigned name, + unsigned index ) +{ + return ureg_DECL_input( ureg, name, index, TGSI_INTERPOLATE_CONSTANT ); +} + + +struct ureg_dst +ureg_DECL_output( struct ureg_program *ureg, + unsigned name, + unsigned index ) +{ + unsigned i; + + for (i = 0; i < ureg->nr_outputs; i++) { + if (ureg->output[i].semantic_name == name && + ureg->output[i].semantic_index == index) + goto out; + } + + if (ureg->nr_outputs < UREG_MAX_OUTPUT) { + ureg->output[i].semantic_name = name; + ureg->output[i].semantic_index = index; + ureg->nr_outputs++; + } + else { + set_bad( ureg ); + } + +out: + return ureg_dst_register( TGSI_FILE_OUTPUT, i ); +} + + +/* Returns a new constant register. Keep track of which have been + * referred to so that we can emit decls later. + * + * There is nothing in this code to bind this constant to any tracked + * value or manage any constant_buffer contents -- that's the + * resposibility of the calling code. + */ +struct ureg_src ureg_DECL_constant(struct ureg_program *ureg ) +{ + return ureg_src_register( TGSI_FILE_TEMPORARY, ureg->nr_constants++ ); +} + + +/* Allocate a new temporary. Temporaries greater than UREG_MAX_TEMP + * are legal, but will not be released. + */ +struct ureg_dst ureg_DECL_temporary( struct ureg_program *ureg ) +{ + unsigned i; + + for (i = 0; i < UREG_MAX_TEMP; i += 32) { + int bit = ffs(~ureg->temps_active[i/32]); + if (bit != 0) { + i += bit - 1; + goto out; + } + } + + /* No reusable temps, so allocate a new one: + */ + i = ureg->nr_temps++; + +out: + if (i < UREG_MAX_TEMP) + ureg->temps_active[i/32] |= 1 << (i % 32); + + if (i >= ureg->nr_temps) + ureg->nr_temps = i + 1; + + return ureg_dst_register( TGSI_FILE_TEMPORARY, i ); +} + + +void ureg_release_temporary( struct ureg_program *ureg, + struct ureg_dst tmp ) +{ + if (tmp.Index < UREG_MAX_TEMP) + ureg->temps_active[tmp.Index/32] &= ~(1 << (tmp.Index % 32)); +} + + +/* Allocate a new sampler. + */ +struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg ) +{ + return ureg_src_register( TGSI_FILE_SAMPLER, ureg->nr_samplers++ ); +} + + + + +static int match_or_expand_immediate( const float *v, + unsigned nr, + float *v2, + unsigned *nr2, + unsigned *swizzle ) +{ + unsigned i, j; + + for (i = 0; i < nr; i++) { + boolean found = FALSE; + + for (j = 0; j < *nr2 && !found; j++) { + if (v[i] == v2[j]) { + *swizzle |= j << (i * 2); + found = TRUE; + } + } + + if (!found) { + if (*nr2 >= 4) + return FALSE; + + v2[*nr2] = v[i]; + *swizzle |= *nr2 << (i * 2); + (*nr2)++; + } + } + + return TRUE; +} + + + + +struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg, + const float *v, + unsigned nr ) +{ + unsigned i; + unsigned swizzle; + + /* Could do a first pass where we examine all existing immediates + * without expanding. + */ + + for (i = 0; i < ureg->nr_immediates; i++) { + if (match_or_expand_immediate( v, + nr, + ureg->immediate[i].v, + &ureg->immediate[i].nr, + &swizzle )) + goto out; + } + + if (ureg->nr_immediates < UREG_MAX_IMMEDIATE) { + i = ureg->nr_immediates++; + if (match_or_expand_immediate( v, + nr, + ureg->immediate[i].v, + &ureg->immediate[i].nr, + &swizzle )) + goto out; + } + + set_bad( ureg ); + +out: + return ureg_swizzle( ureg_src_register( TGSI_FILE_IMMEDIATE, i ), + (swizzle >> 0) & 0x3, + (swizzle >> 2) & 0x3, + (swizzle >> 4) & 0x3, + (swizzle >> 6) & 0x3); +} + + +void +ureg_emit_src( struct ureg_program *ureg, + struct ureg_src src ) +{ + unsigned size = (1 + + (src.Absolute ? 1 : 0) + + (src.Indirect ? 1 : 0)); + + union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); + unsigned n = 0; + + out[n].value = 0; + out[n].src.File = src.File; + out[n].src.SwizzleX = src.SwizzleX; + out[n].src.SwizzleY = src.SwizzleY; + out[n].src.SwizzleZ = src.SwizzleZ; + out[n].src.SwizzleW = src.SwizzleW; + out[n].src.Indirect = src.Indirect; + out[n].src.Index = src.Index; + n++; + + if (src.Absolute) { + out[n].value = 0; + out[n].src_ext_mod.Absolute = 1; + n++; + } + + if (src.Indirect) { + out[n].value = 0; + out[n].src.File = TGSI_FILE_ADDRESS; + out[n].src.SwizzleX = TGSI_SWIZZLE_X; + out[n].src.SwizzleY = TGSI_SWIZZLE_X; + out[n].src.SwizzleZ = TGSI_SWIZZLE_X; + out[n].src.SwizzleW = TGSI_SWIZZLE_X; + out[n].src.Indirect = 0; + out[n].src.Index = 0; + n++; + } + + assert(n == size); +} + + +void +ureg_emit_dst( struct ureg_program *ureg, + struct ureg_dst dst ) +{ + unsigned size = (1 + + (dst.Indirect ? 1 : 0)); + + union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); + unsigned n = 0; + + out[n].value = 0; + out[n].dst.File = dst.File; + out[n].dst.WriteMask = dst.WriteMask; + out[n].dst.Indirect = dst.Indirect; + out[n].dst.Index = dst.Index; + n++; + + if (dst.Indirect) { + out[n].value = 0; + out[n].src.File = TGSI_FILE_ADDRESS; + out[n].src.SwizzleX = TGSI_SWIZZLE_X; + out[n].src.SwizzleY = TGSI_SWIZZLE_X; + out[n].src.SwizzleZ = TGSI_SWIZZLE_X; + out[n].src.SwizzleW = TGSI_SWIZZLE_X; + out[n].src.Indirect = 0; + out[n].src.Index = 0; + n++; + } + + assert(n == size); +} + + + +unsigned +ureg_emit_insn(struct ureg_program *ureg, + unsigned opcode, + boolean saturate, + unsigned num_dst, + unsigned num_src ) +{ + union tgsi_any_token *out; + + out = get_tokens( ureg, DOMAIN_INSN, 1 ); + out[0].value = 0; + out[0].insn.Type = TGSI_TOKEN_TYPE_INSTRUCTION; + out[0].insn.NrTokens = 0; + out[0].insn.Opcode = opcode; + out[0].insn.Saturate = saturate; + out[0].insn.NrTokens = 0; + out[0].insn.NumDstRegs = num_dst; + out[0].insn.NumSrcRegs = num_src; + out[0].insn.Padding = 0; + out[0].insn.Extended = 0; + + return ureg->domain[DOMAIN_INSN].count - 1; +} + + +void +ureg_emit_label(struct ureg_program *ureg, + unsigned insn_token, + unsigned *label_token ) +{ + union tgsi_any_token *out, *insn; + + out = get_tokens( ureg, DOMAIN_INSN, 1 ); + insn = retrieve_token( ureg, DOMAIN_INSN, insn_token ); + + insn->insn.Extended = 1; + + out[0].value = 0; + out[0].insn_ext_label.Type = TGSI_INSTRUCTION_EXT_TYPE_LABEL; +} + + +void +ureg_emit_texture(struct ureg_program *ureg, + unsigned insn_token, + unsigned target ) +{ + union tgsi_any_token *out, *insn; + + out = get_tokens( ureg, DOMAIN_INSN, 1 ); + insn = retrieve_token( ureg, DOMAIN_INSN, insn_token ); + + insn->insn.Extended = 1; + + out[0].value = 0; + out[0].insn_ext_texture.Type = TGSI_INSTRUCTION_EXT_TYPE_TEXTURE; + out[0].insn_ext_texture.Texture = target; +} + + +void +ureg_fixup_insn_size(struct ureg_program *ureg, + unsigned insn ) +{ + union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, insn ); + + out->insn.NrTokens = ureg->domain[DOMAIN_INSN].count - insn - 1; +} + + + + + +static void emit_decl( struct ureg_program *ureg, + unsigned file, + unsigned index, + unsigned semantic_name, + unsigned semantic_index, + unsigned interp ) +{ + union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 3 ); + + out[0].value = 0; + out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; + out[0].decl.NrTokens = 3; + out[0].decl.File = file; + out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */ + out[0].decl.Interpolate = interp; + out[0].decl.Semantic = 1; + + out[1].value = 0; + out[1].decl_range.First = + out[1].decl_range.Last = index; + + out[2].value = 0; + out[2].decl_semantic.SemanticName = semantic_name; + out[2].decl_semantic.SemanticIndex = semantic_index; + +} + + +static void emit_decl_range( struct ureg_program *ureg, + unsigned file, + unsigned first, + unsigned count ) +{ + union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 ); + + out[0].value = 0; + out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; + out[0].decl.NrTokens = 2; + out[0].decl.File = file; + out[0].decl.UsageMask = 0xf; + out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT; + out[0].decl.Semantic = 0; + + out[1].value = 0; + out[1].decl_range.First = first; + out[1].decl_range.Last = first + count - 1; +} + +static void emit_immediate( struct ureg_program *ureg, + const float *v ) +{ + union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 5 ); + + out[0].value = 0; + out[0].imm.Type = TGSI_TOKEN_TYPE_IMMEDIATE; + out[0].imm.NrTokens = 5; + out[0].imm.DataType = TGSI_IMM_FLOAT32; + out[0].imm.Padding = 0; + out[0].imm.Extended = 0; + + out[1].imm_data.Float = v[0]; + out[2].imm_data.Float = v[1]; + out[3].imm_data.Float = v[2]; + out[4].imm_data.Float = v[3]; +} + + + + +static void emit_decls( struct ureg_program *ureg ) +{ + unsigned i; + + for (i = 0; i < ureg->nr_inputs; i++) { + emit_decl( ureg, + TGSI_FILE_INPUT, + i, + ureg->input[i].semantic_name, + ureg->input[i].semantic_index, + ureg->input[i].interp ); + } + + for (i = 0; i < ureg->nr_outputs; i++) { + emit_decl( ureg, + TGSI_FILE_OUTPUT, + i, + ureg->output[i].semantic_name, + ureg->output[i].semantic_index, + TGSI_INTERPOLATE_CONSTANT ); + } + + if (ureg->nr_samplers) { + emit_decl_range( ureg, + TGSI_FILE_SAMPLER, + 0, ureg->nr_samplers ); + } + + if (ureg->nr_constants) { + emit_decl_range( ureg, + TGSI_FILE_CONSTANT, + 0, ureg->nr_constants ); + } + + if (ureg->nr_temps) { + emit_decl_range( ureg, + TGSI_FILE_TEMPORARY, + 0, ureg->nr_temps ); + } + + for (i = 0; i < ureg->nr_immediates; i++) { + emit_immediate( ureg, + ureg->immediate[i].v ); + } +} + +/* Append the instruction tokens onto the declarations to build a + * contiguous stream suitable to send to the driver. + */ +static void copy_instructions( struct ureg_program *ureg ) +{ + unsigned nr_tokens = ureg->domain[DOMAIN_INSN].count; + union tgsi_any_token *out = get_tokens( ureg, + DOMAIN_DECL, + nr_tokens ); + + memcpy(out, + ureg->domain[DOMAIN_INSN].tokens, + nr_tokens * sizeof out[0] ); +} + + +static void +fixup_header_size(struct ureg_program *ureg, + unsigned insn ) +{ + union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_DECL, 1 ); + + out->header.BodySize = ureg->domain[DOMAIN_DECL].count - 3; +} + + +static void +emit_header( struct ureg_program *ureg ) +{ + union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 3 ); + + out[0].version.MajorVersion = 1; + out[0].version.MinorVersion = 1; + out[0].version.Padding = 0; + + out[1].header.HeaderSize = 2; + out[1].header.BodySize = 0; + + out[2].processor.Processor = ureg->processor; + out[2].processor.Padding = 0; +} + + +void *ureg_create_shader( struct ureg_program *ureg ) +{ + struct pipe_shader_state state; + unsigned insn; + + emit_header( ureg ); + emit_decls( ureg ); + copy_instructions( ureg ); + fixup_header_size( ureg, insn ); + + if (ureg->domain[0].tokens == error_tokens || + ureg->domain[1].tokens == error_tokens) { + debug_printf("%s: error in generated shader\n", __FUNCTION__); + assert(0); + return NULL; + } + + state.tokens = (const struct tgsi_token *)ureg->domain[DOMAIN_DECL].tokens; + + if (0) { + debug_printf("%s: emitted shader %d tokens:\n", __FUNCTION__, + ureg->domain[DOMAIN_DECL].count); + tgsi_dump( state.tokens, 0 ); + } + + if (ureg->processor == TGSI_PROCESSOR_VERTEX) + return ureg->pipe->create_vs_state( ureg->pipe, &state ); + else + return ureg->pipe->create_fs_state( ureg->pipe, &state ); +} + + + + +struct ureg_program *ureg_create( struct pipe_context *pipe, + unsigned processor ) +{ + struct ureg_program *ureg = CALLOC_STRUCT( ureg_program ); + if (ureg == NULL) + return NULL; + + ureg->pipe = pipe; + ureg->processor = processor; + return ureg; +} + + +void ureg_destroy( struct ureg_program *ureg ) +{ + unsigned i; + + for (i = 0; i < Elements(ureg->domain); i++) { + if (ureg->domain[i].tokens && + ureg->domain[i].tokens != error_tokens) + FREE(ureg->domain[i].tokens); + } + + FREE(ureg); +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h new file mode 100644 index 0000000000..0a976fd63b --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -0,0 +1,439 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE, INC AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_UREG_H +#define TGSI_UREG_H + +#include "pipe/p_compiler.h" +#include "pipe/p_shader_tokens.h" + +struct ureg_program; + +/* Almost a tgsi_src_register, but we need to pull in the Absolute + * flag from the _ext token. Indirect flag always implies ADDR[0]. + */ +struct ureg_src +{ + unsigned File : 4; /* TGSI_FILE_ */ + unsigned SwizzleX : 2; /* TGSI_SWIZZLE_ */ + unsigned SwizzleY : 2; /* TGSI_SWIZZLE_ */ + unsigned SwizzleZ : 2; /* TGSI_SWIZZLE_ */ + unsigned SwizzleW : 2; /* TGSI_SWIZZLE_ */ + unsigned Pad : 1; /* BOOL */ + unsigned Indirect : 1; /* BOOL */ + unsigned Absolute : 1; /* BOOL */ + int Index : 16; /* SINT */ + unsigned Negate : 1; /* BOOL */ +}; + +/* Very similar to a tgsi_dst_register, removing unsupported fields + * and adding a Saturate flag. It's easier to push saturate into the + * destination register than to try and create a _SAT varient of each + * instruction function. + */ +struct ureg_dst +{ + unsigned File : 4; /* TGSI_FILE_ */ + unsigned WriteMask : 4; /* TGSI_WRITEMASK_ */ + unsigned Indirect : 1; /* BOOL */ + unsigned Saturate : 1; /* BOOL */ + int Index : 16; /* SINT */ + unsigned Pad1 : 5; + unsigned Pad2 : 1; /* BOOL */ +}; + +struct pipe_context; + +struct ureg_program * +ureg_create( struct pipe_context *pipe, + unsigned processor ); + +void * +ureg_create_shader( struct ureg_program * ); + +void +ureg_destroy( struct ureg_program * ); + + +/*********************************************************************** + * Convenience routine: + */ +static INLINE void *ureg_create_shader_and_destroy( struct ureg_program *p ) +{ + void *result = ureg_create_shader( p ); + ureg_destroy( p ); + return result; +} + + + +/*********************************************************************** + * Build shader declarations: + */ + +struct ureg_src +ureg_DECL_fs_input( struct ureg_program *, + unsigned semantic_name, + unsigned semantic_index, + unsigned interp_mode ); + +struct ureg_src +ureg_DECL_vs_input( struct ureg_program *, + unsigned semantic_name, + unsigned semantic_index ); + +struct ureg_dst +ureg_DECL_output( struct ureg_program *, + unsigned semantic_name, + unsigned semantic_index ); + +struct ureg_src +ureg_DECL_immediate( struct ureg_program *, + const float *v, + unsigned nr ); + +struct ureg_src +ureg_DECL_constant( struct ureg_program * ); + +struct ureg_dst +ureg_DECL_temporary( struct ureg_program * ); + +void +ureg_release_temporary( struct ureg_program *ureg, + struct ureg_dst tmp ); + +struct ureg_src +ureg_DECL_sampler( struct ureg_program * ); + + +static INLINE struct ureg_src +ureg_DECL_immediate4f( struct ureg_program *ureg, + float a, float b, + float c, float d) +{ + float v[4]; + v[0] = a; + v[1] = b; + v[2] = c; + v[3] = d; + return ureg_DECL_immediate( ureg, v, 4 ); +} + +static INLINE struct ureg_src +ureg_DECL_immediate3f( struct ureg_program *ureg, + float a, float b, + float c) +{ + float v[3]; + v[0] = a; + v[1] = b; + v[2] = c; + return ureg_DECL_immediate( ureg, v, 3 ); +} + +static INLINE struct ureg_src +ureg_DECL_immediate2f( struct ureg_program *ureg, + float a, float b) +{ + float v[2]; + v[0] = a; + v[1] = b; + return ureg_DECL_immediate( ureg, v, 2 ); +} + +static INLINE struct ureg_src +ureg_DECL_immediate1f( struct ureg_program *ureg, + float a) +{ + float v[1]; + v[0] = a; + return ureg_DECL_immediate( ureg, v, 1 ); +} + +/*********************************************************************** + * Internal instruction helpers, don't call these directly: + */ + +unsigned +ureg_emit_insn(struct ureg_program *ureg, + unsigned opcode, + boolean saturate, + unsigned num_dst, + unsigned num_src ); + +void +ureg_emit_label(struct ureg_program *ureg, + unsigned insn_token, + unsigned *label_token ); + +void +ureg_emit_texture(struct ureg_program *ureg, + unsigned insn_token, + unsigned target ); + +void +ureg_emit_dst( struct ureg_program *ureg, + struct ureg_dst dst ); + +void +ureg_emit_src( struct ureg_program *ureg, + struct ureg_src src ); + +void +ureg_fixup_insn_size(struct ureg_program *ureg, + unsigned insn ); + + +#define OP00( op ) \ +static INLINE void ureg_##op( struct ureg_program *ureg ) \ +{ \ + unsigned opcode = TGSI_OPCODE_##op; \ + unsigned insn = ureg_emit_insn( ureg, opcode, FALSE, 0, 0 ); \ + ureg_fixup_insn_size( ureg, insn ); \ +} + +#define OP01( op ) \ +static INLINE void ureg_##op( struct ureg_program *ureg, \ + struct ureg_src src ) \ +{ \ + unsigned opcode = TGSI_OPCODE_##op; \ + unsigned insn = ureg_emit_insn( ureg, opcode, FALSE, 0, 1 ); \ + ureg_emit_src( ureg, src ); \ + ureg_fixup_insn_size( ureg, insn ); \ +} + +#define OP00_LBL( op ) \ +static INLINE void ureg_##op( struct ureg_program *ureg, \ + unsigned *label_token ) \ +{ \ + unsigned opcode = TGSI_OPCODE_##op; \ + unsigned insn = ureg_emit_insn( ureg, opcode, FALSE, 0, 0 ); \ + ureg_emit_label( ureg, insn, label_token ); \ + ureg_fixup_insn_size( ureg, insn ); \ +} + +#define OP01_LBL( op ) \ +static INLINE void ureg_##op( struct ureg_program *ureg, \ + struct ureg_src src, \ + unsigned *label_token ) \ +{ \ + unsigned opcode = TGSI_OPCODE_##op; \ + unsigned insn = ureg_emit_insn( ureg, opcode, FALSE, 0, 1 ); \ + ureg_emit_label( ureg, insn, label_token ); \ + ureg_emit_src( ureg, src ); \ + ureg_fixup_insn_size( ureg, insn ); \ +} + +#define OP10( op ) \ +static INLINE void ureg_##op( struct ureg_program *ureg, \ + struct ureg_dst dst ) \ +{ \ + unsigned opcode = TGSI_OPCODE_##op; \ + unsigned insn = ureg_emit_insn( ureg, opcode, dst.Saturate, 1, 0 ); \ + ureg_emit_dst( ureg, dst ); \ + ureg_fixup_insn_size( ureg, insn ); \ +} + + +#define OP11( op ) \ +static INLINE void ureg_##op( struct ureg_program *ureg, \ + struct ureg_dst dst, \ + struct ureg_src src ) \ +{ \ + unsigned opcode = TGSI_OPCODE_##op; \ + unsigned insn = ureg_emit_insn( ureg, opcode, dst.Saturate, 1, 1 ); \ + ureg_emit_dst( ureg, dst ); \ + ureg_emit_src( ureg, src ); \ + ureg_fixup_insn_size( ureg, insn ); \ +} + +#define OP12( op ) \ +static INLINE void ureg_##op( struct ureg_program *ureg, \ + struct ureg_dst dst, \ + struct ureg_src src0, \ + struct ureg_src src1 ) \ +{ \ + unsigned opcode = TGSI_OPCODE_##op; \ + unsigned insn = ureg_emit_insn( ureg, opcode, dst.Saturate, 1, 2 ); \ + ureg_emit_dst( ureg, dst ); \ + ureg_emit_src( ureg, src0 ); \ + ureg_emit_src( ureg, src1 ); \ + ureg_fixup_insn_size( ureg, insn ); \ +} + +#define OP12_TEX( op ) \ +static INLINE void ureg_##op( struct ureg_program *ureg, \ + struct ureg_dst dst, \ + unsigned target, \ + struct ureg_src src0, \ + struct ureg_src src1 ) \ +{ \ + unsigned opcode = TGSI_OPCODE_##op; \ + unsigned insn = ureg_emit_insn( ureg, opcode, dst.Saturate, 1, 2 ); \ + ureg_emit_texture( ureg, insn, target ); \ + ureg_emit_dst( ureg, dst ); \ + ureg_emit_src( ureg, src0 ); \ + ureg_emit_src( ureg, src1 ); \ + ureg_fixup_insn_size( ureg, insn ); \ +} + +#define OP13( op ) \ +static INLINE void ureg_##op( struct ureg_program *ureg, \ + struct ureg_dst dst, \ + struct ureg_src src0, \ + struct ureg_src src1, \ + struct ureg_src src2 ) \ +{ \ + unsigned opcode = TGSI_OPCODE_##op; \ + unsigned insn = ureg_emit_insn( ureg, opcode, dst.Saturate, 1, 3 ); \ + ureg_emit_dst( ureg, dst ); \ + ureg_emit_src( ureg, src0 ); \ + ureg_emit_src( ureg, src1 ); \ + ureg_emit_src( ureg, src2 ); \ + ureg_fixup_insn_size( ureg, insn ); \ +} + +#define OP14_TEX( op ) \ +static INLINE void ureg_##op( struct ureg_program *ureg, \ + struct ureg_dst dst, \ + unsigned target, \ + struct ureg_src src0, \ + struct ureg_src src1, \ + struct ureg_src src2, \ + struct ureg_src src3 ) \ +{ \ + unsigned opcode = TGSI_OPCODE_##op; \ + unsigned insn = ureg_emit_insn( ureg, opcode, dst.Saturate, 1, 4 ); \ + ureg_emit_texture( ureg, insn, target ); \ + ureg_emit_dst( ureg, dst ); \ + ureg_emit_src( ureg, src0 ); \ + ureg_emit_src( ureg, src1 ); \ + ureg_emit_src( ureg, src2 ); \ + ureg_emit_src( ureg, src3 ); \ + ureg_fixup_insn_size( ureg, insn ); \ +} + + +/* Use a template include to generate a correctly-typed ureg_OP() + * function for each TGSI opcode: + */ +#include "tgsi_opcode_tmp.h" + + +/*********************************************************************** + * Inline helpers for manipulating register structs: + */ +static INLINE struct ureg_src +ureg_negate( struct ureg_src reg ) +{ + reg.Negate ^= 1; + return reg; +} + +static INLINE struct ureg_src +ureg_abs( struct ureg_src reg ) +{ + reg.Absolute = 1; + reg.Negate = 0; + return reg; +} + +static INLINE struct ureg_src +ureg_swizzle( struct ureg_src reg, + int x, int y, int z, int w ) +{ + unsigned swz = ( (reg.SwizzleX << 0) | + (reg.SwizzleY << 2) | + (reg.SwizzleZ << 4) | + (reg.SwizzleW << 6)); + + reg.SwizzleX = (swz >> (x*2)) & 0x3; + reg.SwizzleY = (swz >> (y*2)) & 0x3; + reg.SwizzleZ = (swz >> (z*2)) & 0x3; + reg.SwizzleW = (swz >> (w*2)) & 0x3; + return reg; +} + +static INLINE struct ureg_src +ureg_scalar( struct ureg_src reg, int x ) +{ + return ureg_swizzle(reg, x, x, x, x); +} + +static INLINE struct ureg_dst +ureg_writemask( struct ureg_dst reg, + unsigned writemask ) +{ + reg.WriteMask &= writemask; + return reg; +} + +static INLINE struct ureg_dst +ureg_saturate( struct ureg_dst reg ) +{ + reg.Saturate = 1; + return reg; +} + +static INLINE struct ureg_dst +ureg_dst( struct ureg_src src ) +{ + struct ureg_dst dst; + + dst.File = src.File; + dst.WriteMask = TGSI_WRITEMASK_XYZW; + dst.Indirect = src.Indirect; + dst.Saturate = 0; + dst.Index = src.Index; + dst.Pad1 = 0; + dst.Pad2 = 0; + + return dst; +} + +static INLINE struct ureg_src +ureg_src( struct ureg_dst dst ) +{ + struct ureg_src src; + + src.File = dst.File; + src.SwizzleX = TGSI_SWIZZLE_X; + src.SwizzleY = TGSI_SWIZZLE_Y; + src.SwizzleZ = TGSI_SWIZZLE_Z; + src.SwizzleW = TGSI_SWIZZLE_W; + src.Pad = 0; + src.Indirect = dst.Indirect; + src.Absolute = 0; + src.Index = dst.Index; + src.Negate = 0; + + return src; +} + + + +#endif |