diff options
Diffstat (limited to 'src/gallium/auxiliary')
26 files changed, 734 insertions, 244 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt index 802ec37118..eb492076b7 100644 --- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt +++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt @@ -187,11 +187,7 @@ TGSI Instruction Specification 1.2.6 CND0 - Condition Zero - dst.x = (src2.x >= 0.0) ? src0.x : src1.x - dst.y = (src2.y >= 0.0) ? src0.y : src1.y - dst.z = (src2.z >= 0.0) ? src0.z : src1.z - dst.w = (src2.w >= 0.0) ? src0.w : src1.w - + Removed. Use (CMP src2, src1, src0) instead. 1.2.7 DOT2ADD - 2-component Dot Product And Add @@ -1031,12 +1027,12 @@ TGSI Instruction Specification 1.18.1 EXPP - Approximate Exponential Base 2 - Alias for EXP. + Use EXP. See also 1.19.3. 1.18.2 LOGP - Logarithm Base 2 - Alias for LG2. + Use LOG. See also 1.19.4. 1.19 vs_2_0 @@ -1053,6 +1049,16 @@ TGSI Instruction Specification Alias for ARR. +1.19.3 EXPP - Approximate Exponential Base 2 + + Use EX2. + + +1.19.4 LOGP - Logarithm Base 2 + + Use LG2. + + 2 Explanation of symbols used ============================== diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 05b07a3a73..111d95b666 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -43,6 +43,7 @@ struct dump_ctx struct tgsi_iterate_context iter; uint instno; + int indent; uint indentation; @@ -335,14 +336,6 @@ tgsi_dump_immediate( iter_immediate( &ctx.iter, (struct tgsi_full_immediate *)imm ); } -static void -indent(struct dump_ctx *ctx) -{ - uint i; - for (i = 0; i < ctx->indentation; i++) - TXT(" "); -} - static boolean iter_instruction( struct tgsi_iterate_context *iter, @@ -350,22 +343,19 @@ iter_instruction( { struct dump_ctx *ctx = (struct dump_ctx *) iter; uint instno = ctx->instno++; - + const struct tgsi_opcode_info *info = tgsi_get_opcode_info( inst->Instruction.Opcode ); uint i; boolean first_reg = TRUE; INSTID( instno ); TXT( ": " ); - - /* update indentation */ - if (inst->Instruction.Opcode == TGSI_OPCODE_ENDIF || - inst->Instruction.Opcode == TGSI_OPCODE_ENDFOR || - inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP) { - ctx->indentation -= indent_spaces; - } - indent(ctx); - - TXT( tgsi_get_opcode_info( inst->Instruction.Opcode )->mnemonic ); + + ctx->indent -= info->pre_dedent; + for(i = 0; (int)i < ctx->indent; ++i) + TXT( " " ); + ctx->indent += info->post_indent; + + TXT( info->mnemonic ); switch (inst->Instruction.Saturate) { case TGSI_SAT_NONE: @@ -526,6 +516,7 @@ tgsi_dump_instruction( struct dump_ctx ctx; ctx.instno = instno; + ctx.indent = 0; ctx.printf = dump_ctx_printf; ctx.indentation = 0; @@ -559,6 +550,7 @@ tgsi_dump( ctx.iter.epilog = NULL; ctx.instno = 0; + ctx.indent = 0; ctx.printf = dump_ctx_printf; ctx.indentation = 0; @@ -612,6 +604,7 @@ tgsi_dump_str( ctx.base.iter.epilog = NULL; ctx.base.instno = 0; + ctx.base.indent = 0; ctx.base.printf = &str_dump_ctx_printf; ctx.base.indentation = 0; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 711e86d6ed..c79c56debd 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2329,16 +2329,6 @@ exec_instruction( } break; - case TGSI_OPCODE_CND0: - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]); - STORE(&r[0], 0, chan_index); - } - break; - case TGSI_OPCODE_DP2A: FETCH( &r[0], 0, CHAN_X ); FETCH( &r[1], 1, CHAN_X ); @@ -2766,19 +2756,32 @@ exec_instruction( if (mach->ExecMask) { /* do the call */ - /* push the Cond, Loop, Cont stacks */ + /* First, record the depths of the execution stacks. + * This is important for deeply nested/looped return statements. + * We have to unwind the stacks by the correct amount. For a + * real code generator, we could determine the number of entries + * to pop off each stack with simple static analysis and avoid + * implementing this data structure at run time. + */ + mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; + mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; + mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; + /* note that PC was already incremented above */ + mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; + + mach->CallStackTop++; + + /* Second, push the Cond, Loop, Cont, Func stacks */ assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); mach->CondStack[mach->CondStackTop++] = mach->CondMask; assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->ContStack[mach->ContStackTop++] = mach->ContMask; - assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; - /* note that PC was already incremented above */ - mach->CallStack[mach->CallStackTop++] = *pc; + /* Finally, jump to the subroutine */ *pc = inst->InstructionExtLabel.Label; } break; @@ -2795,18 +2798,24 @@ exec_instruction( *pc = -1; return; } - *pc = mach->CallStack[--mach->CallStackTop]; - /* pop the Cond, Loop, Cont stacks */ - assert(mach->CondStackTop > 0); - mach->CondMask = mach->CondStack[--mach->CondStackTop]; - assert(mach->LoopStackTop > 0); - mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[--mach->ContStackTop]; + assert(mach->CallStackTop > 0); + mach->CallStackTop--; + + mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; + mach->CondMask = mach->CondStack[mach->CondStackTop]; + + mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; + mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; + + mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; + mach->ContMask = mach->ContStack[mach->ContStackTop]; + assert(mach->FuncStackTop > 0); mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; + *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; + UPDATE_EXEC_MASK(mach); } break; @@ -3104,6 +3113,12 @@ exec_instruction( break; case TGSI_OPCODE_BGNFOR: + assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + for (chan_index = 0; chan_index < 3; chan_index++) { + FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index ); + } + STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); + ++mach->LoopCounterStackTop; /* fall-through (for now) */ case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ @@ -3111,10 +3126,58 @@ exec_instruction( mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->ContStack[mach->ContStackTop++] = mach->ContMask; + assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; break; case TGSI_OPCODE_ENDFOR: - /* fall-through (for now at least) */ + assert(mach->LoopCounterStackTop > 0); + micro_sub( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], + &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); + /* update LoopMask */ + if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[0] <= 0) { + mach->LoopMask &= ~0x1; + } + if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[1] <= 0 ) { + mach->LoopMask &= ~0x2; + } + if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[2] <= 0 ) { + mach->LoopMask &= ~0x4; + } + if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[3] <= 0 ) { + mach->LoopMask &= ~0x8; + } + micro_add( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); + assert(mach->LoopLabelStackTop > 0); + inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1]; + STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); + /* Restore ContMask, but don't pop */ + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; + UPDATE_EXEC_MASK(mach); + if (mach->ExecMask) { + /* repeat loop: jump to instruction just past BGNLOOP */ + assert(mach->LoopLabelStackTop > 0); + *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; + } + else { + /* exit loop: pop LoopMask */ + assert(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + /* pop ContMask */ + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[--mach->ContStackTop]; + assert(mach->LoopLabelStackTop > 0); + --mach->LoopLabelStackTop; + assert(mach->LoopCounterStackTop > 0); + --mach->LoopCounterStackTop; + } + UPDATE_EXEC_MASK(mach); + break; + case TGSI_OPCODE_ENDLOOP: /* Restore ContMask, but don't pop */ assert(mach->ContStackTop > 0); @@ -3122,7 +3185,8 @@ exec_instruction( UPDATE_EXEC_MASK(mach); if (mach->ExecMask) { /* repeat loop: jump to instruction just past BGNLOOP */ - *pc = inst->InstructionExtLabel.Label + 1; + assert(mach->LoopLabelStackTop > 0); + *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; } else { /* exit loop: pop LoopMask */ @@ -3131,6 +3195,8 @@ exec_instruction( /* pop ContMask */ assert(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[--mach->ContStackTop]; + assert(mach->LoopLabelStackTop > 0); + --mach->LoopLabelStackTop; } UPDATE_EXEC_MASK(mach); break; @@ -3198,7 +3264,6 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) mach->FuncMask = 0xf; mach->ExecMask = 0xf; - mach->CondStackTop = 0; /* temporarily subvert this assertion */ assert(mach->CondStackTop == 0); assert(mach->LoopStackTop == 0); assert(mach->ContStackTop == 0); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index fd9ef6f35d..c72f76809d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -186,6 +186,17 @@ struct tgsi_exec_labels */ #define TGSI_EXEC_MAX_CONST_BUFFER 4096 + +/** function call/activation record */ +struct tgsi_call_record +{ + uint CondStackTop; + uint LoopStackTop; + uint ContStackTop; + uint ReturnAddr; +}; + + /** * Run-time virtual machine state for executing TGSI shader. */ @@ -232,6 +243,14 @@ struct tgsi_exec_machine uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING]; int LoopStackTop; + /** Loop label stack */ + uint LoopLabelStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int LoopLabelStackTop; + + /** Loop counter stack (x = count, y = current, z = step) */ + struct tgsi_exec_vector LoopCounterStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int LoopCounterStackTop; + /** Loop continue mask stack (see comments in tgsi_exec.c) */ uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; int ContStackTop; @@ -241,7 +260,7 @@ struct tgsi_exec_machine int FuncStackTop; /** Function call stack for saving/restoring the program counter */ - uint CallStack[TGSI_EXEC_MAX_CALL_NESTING]; + struct tgsi_call_record CallStack[TGSI_EXEC_MAX_CALL_NESTING]; int CallStackTop; struct tgsi_full_instruction *Instructions; diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index ccf4b205ff..17af4cb7ad 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -31,125 +31,125 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { - { 1, 1, 0, 0, "ARL", TGSI_OPCODE_ARL }, - { 1, 1, 0, 0, "MOV", TGSI_OPCODE_MOV }, - { 1, 1, 0, 0, "LIT", TGSI_OPCODE_LIT }, - { 1, 1, 0, 0, "RCP", TGSI_OPCODE_RCP }, - { 1, 1, 0, 0, "RSQ", TGSI_OPCODE_RSQ }, - { 1, 1, 0, 0, "EXP", TGSI_OPCODE_EXP }, - { 1, 1, 0, 0, "LOG", TGSI_OPCODE_LOG }, - { 1, 2, 0, 0, "MUL", TGSI_OPCODE_MUL }, - { 1, 2, 0, 0, "ADD", TGSI_OPCODE_ADD }, - { 1, 2, 0, 0, "DP3", TGSI_OPCODE_DP3 }, - { 1, 2, 0, 0, "DP4", TGSI_OPCODE_DP4 }, - { 1, 2, 0, 0, "DST", TGSI_OPCODE_DST }, - { 1, 2, 0, 0, "MIN", TGSI_OPCODE_MIN }, - { 1, 2, 0, 0, "MAX", TGSI_OPCODE_MAX }, - { 1, 2, 0, 0, "SLT", TGSI_OPCODE_SLT }, - { 1, 2, 0, 0, "SGE", TGSI_OPCODE_SGE }, - { 1, 3, 0, 0, "MAD", TGSI_OPCODE_MAD }, - { 1, 2, 0, 0, "SUB", TGSI_OPCODE_SUB }, - { 1, 3, 0, 0, "LRP", TGSI_OPCODE_LRP }, - { 1, 3, 0, 0, "CND", TGSI_OPCODE_CND }, - { 1, 3, 0, 0, "CND0", TGSI_OPCODE_CND0 }, - { 1, 3, 0, 0, "DP2A", TGSI_OPCODE_DP2A }, - { 0, 0, 0, 0, "", 22 }, /* removed */ - { 0, 0, 0, 0, "", 23 }, /* removed */ - { 1, 1, 0, 0, "FRC", TGSI_OPCODE_FRC }, - { 1, 3, 0, 0, "CLAMP", TGSI_OPCODE_CLAMP }, - { 1, 1, 0, 0, "FLR", TGSI_OPCODE_FLR }, - { 1, 1, 0, 0, "ROUND", TGSI_OPCODE_ROUND }, - { 1, 1, 0, 0, "EX2", TGSI_OPCODE_EX2 }, - { 1, 1, 0, 0, "LG2", TGSI_OPCODE_LG2 }, - { 1, 2, 0, 0, "POW", TGSI_OPCODE_POW }, - { 1, 2, 0, 0, "XPD", TGSI_OPCODE_XPD }, - { 0, 0, 0, 0, "", 32 }, /* removed */ - { 1, 1, 0, 0, "ABS", TGSI_OPCODE_ABS }, - { 1, 1, 0, 0, "RCC", TGSI_OPCODE_RCC }, - { 1, 2, 0, 0, "DPH", TGSI_OPCODE_DPH }, - { 1, 1, 0, 0, "COS", TGSI_OPCODE_COS }, - { 1, 1, 0, 0, "DDX", TGSI_OPCODE_DDX }, - { 1, 1, 0, 0, "DDY", TGSI_OPCODE_DDY }, - { 0, 0, 0, 0, "KILP", TGSI_OPCODE_KILP }, - { 1, 1, 0, 0, "PK2H", TGSI_OPCODE_PK2H }, - { 1, 1, 0, 0, "PK2US", TGSI_OPCODE_PK2US }, - { 1, 1, 0, 0, "PK4B", TGSI_OPCODE_PK4B }, - { 1, 1, 0, 0, "PK4UB", TGSI_OPCODE_PK4UB }, - { 1, 2, 0, 0, "RFL", TGSI_OPCODE_RFL }, - { 1, 2, 0, 0, "SEQ", TGSI_OPCODE_SEQ }, - { 1, 2, 0, 0, "SFL", TGSI_OPCODE_SFL }, - { 1, 2, 0, 0, "SGT", TGSI_OPCODE_SGT }, - { 1, 1, 0, 0, "SIN", TGSI_OPCODE_SIN }, - { 1, 2, 0, 0, "SLE", TGSI_OPCODE_SLE }, - { 1, 2, 0, 0, "SNE", TGSI_OPCODE_SNE }, - { 1, 2, 0, 0, "STR", TGSI_OPCODE_STR }, - { 1, 2, 1, 0, "TEX", TGSI_OPCODE_TEX }, - { 1, 4, 1, 0, "TXD", TGSI_OPCODE_TXD }, - { 1, 2, 1, 0, "TXP", TGSI_OPCODE_TXP }, - { 1, 1, 0, 0, "UP2H", TGSI_OPCODE_UP2H }, - { 1, 1, 0, 0, "UP2US", TGSI_OPCODE_UP2US }, - { 1, 1, 0, 0, "UP4B", TGSI_OPCODE_UP4B }, - { 1, 1, 0, 0, "UP4UB", TGSI_OPCODE_UP4UB }, - { 1, 3, 0, 0, "X2D", TGSI_OPCODE_X2D }, - { 1, 1, 0, 0, "ARA", TGSI_OPCODE_ARA }, - { 1, 1, 0, 0, "ARR", TGSI_OPCODE_ARR }, - { 0, 1, 0, 0, "BRA", TGSI_OPCODE_BRA }, - { 0, 0, 0, 1, "CAL", TGSI_OPCODE_CAL }, - { 0, 0, 0, 0, "RET", TGSI_OPCODE_RET }, - { 1, 1, 0, 0, "SSG", TGSI_OPCODE_SSG }, - { 1, 3, 0, 0, "CMP", TGSI_OPCODE_CMP }, - { 1, 1, 0, 0, "SCS", TGSI_OPCODE_SCS }, - { 1, 2, 1, 0, "TXB", TGSI_OPCODE_TXB }, - { 1, 1, 0, 0, "NRM", TGSI_OPCODE_NRM }, - { 1, 2, 0, 0, "DIV", TGSI_OPCODE_DIV }, - { 1, 2, 0, 0, "DP2", TGSI_OPCODE_DP2 }, - { 1, 2, 1, 0, "TXL", TGSI_OPCODE_TXL }, - { 0, 0, 0, 0, "BRK", TGSI_OPCODE_BRK }, - { 0, 1, 0, 1, "IF", TGSI_OPCODE_IF }, - { 1, 1, 0, 0, "BGNFOR", TGSI_OPCODE_BGNFOR }, - { 0, 1, 0, 0, "REP", TGSI_OPCODE_REP }, - { 0, 0, 0, 1, "ELSE", TGSI_OPCODE_ELSE }, - { 0, 0, 0, 0, "ENDIF", TGSI_OPCODE_ENDIF }, - { 1, 0, 0, 0, "ENDFOR", TGSI_OPCODE_ENDFOR }, - { 0, 0, 0, 0, "ENDREP", TGSI_OPCODE_ENDREP }, - { 0, 1, 0, 0, "PUSHA", TGSI_OPCODE_PUSHA }, - { 1, 0, 0, 0, "POPA", TGSI_OPCODE_POPA }, - { 1, 1, 0, 0, "CEIL", TGSI_OPCODE_CEIL }, - { 1, 1, 0, 0, "I2F", TGSI_OPCODE_I2F }, - { 1, 1, 0, 0, "NOT", TGSI_OPCODE_NOT }, - { 1, 1, 0, 0, "TRUNC", TGSI_OPCODE_TRUNC }, - { 1, 2, 0, 0, "SHL", TGSI_OPCODE_SHL }, - { 1, 2, 0, 0, "SHR", TGSI_OPCODE_SHR }, - { 1, 2, 0, 0, "AND", TGSI_OPCODE_AND }, - { 1, 2, 0, 0, "OR", TGSI_OPCODE_OR }, - { 1, 2, 0, 0, "MOD", TGSI_OPCODE_MOD }, - { 1, 2, 0, 0, "XOR", TGSI_OPCODE_XOR }, - { 1, 3, 0, 0, "SAD", TGSI_OPCODE_SAD }, - { 1, 2, 1, 0, "TXF", TGSI_OPCODE_TXF }, - { 1, 2, 1, 0, "TXQ", TGSI_OPCODE_TXQ }, - { 0, 0, 0, 0, "CONT", TGSI_OPCODE_CONT }, - { 0, 0, 0, 0, "EMIT", TGSI_OPCODE_EMIT }, - { 0, 0, 0, 0, "ENDPRIM", TGSI_OPCODE_ENDPRIM }, - { 0, 0, 0, 1, "BGNLOOP", TGSI_OPCODE_BGNLOOP }, - { 0, 0, 0, 0, "BGNSUB", TGSI_OPCODE_BGNSUB }, - { 0, 0, 0, 1, "ENDLOOP", TGSI_OPCODE_ENDLOOP }, - { 0, 0, 0, 0, "ENDSUB", TGSI_OPCODE_ENDSUB }, - { 1, 1, 0, 0, "NOISE1", TGSI_OPCODE_NOISE1 }, - { 1, 1, 0, 0, "NOISE2", TGSI_OPCODE_NOISE2 }, - { 1, 1, 0, 0, "NOISE3", TGSI_OPCODE_NOISE3 }, - { 1, 1, 0, 0, "NOISE4", TGSI_OPCODE_NOISE4 }, - { 0, 0, 0, 0, "NOP", TGSI_OPCODE_NOP }, - { 0, 0, 0, 0, "", 108 }, /* removed */ - { 0, 0, 0, 0, "", 109 }, /* removed */ - { 0, 0, 0, 0, "", 110 }, /* removed */ - { 0, 0, 0, 0, "", 111 }, /* removed */ - { 1, 1, 0, 0, "NRM4", TGSI_OPCODE_NRM4 }, - { 0, 1, 0, 0, "CALLNZ", TGSI_OPCODE_CALLNZ }, - { 0, 1, 0, 0, "IFC", TGSI_OPCODE_IFC }, - { 0, 1, 0, 0, "BREAKC", TGSI_OPCODE_BREAKC }, - { 0, 1, 0, 0, "KIL", TGSI_OPCODE_KIL }, - { 0, 0, 0, 0, "END", TGSI_OPCODE_END }, - { 1, 1, 0, 0, "SWZ", TGSI_OPCODE_SWZ } + { 1, 1, 0, 0, 0, 0, "ARL", TGSI_OPCODE_ARL }, + { 1, 1, 0, 0, 0, 0, "MOV", TGSI_OPCODE_MOV }, + { 1, 1, 0, 0, 0, 0, "LIT", TGSI_OPCODE_LIT }, + { 1, 1, 0, 0, 0, 0, "RCP", TGSI_OPCODE_RCP }, + { 1, 1, 0, 0, 0, 0, "RSQ", TGSI_OPCODE_RSQ }, + { 1, 1, 0, 0, 0, 0, "EXP", TGSI_OPCODE_EXP }, + { 1, 1, 0, 0, 0, 0, "LOG", TGSI_OPCODE_LOG }, + { 1, 2, 0, 0, 0, 0, "MUL", TGSI_OPCODE_MUL }, + { 1, 2, 0, 0, 0, 0, "ADD", TGSI_OPCODE_ADD }, + { 1, 2, 0, 0, 0, 0, "DP3", TGSI_OPCODE_DP3 }, + { 1, 2, 0, 0, 0, 0, "DP4", TGSI_OPCODE_DP4 }, + { 1, 2, 0, 0, 0, 0, "DST", TGSI_OPCODE_DST }, + { 1, 2, 0, 0, 0, 0, "MIN", TGSI_OPCODE_MIN }, + { 1, 2, 0, 0, 0, 0, "MAX", TGSI_OPCODE_MAX }, + { 1, 2, 0, 0, 0, 0, "SLT", TGSI_OPCODE_SLT }, + { 1, 2, 0, 0, 0, 0, "SGE", TGSI_OPCODE_SGE }, + { 1, 3, 0, 0, 0, 0, "MAD", TGSI_OPCODE_MAD }, + { 1, 2, 0, 0, 0, 0, "SUB", TGSI_OPCODE_SUB }, + { 1, 3, 0, 0, 0, 0, "LRP", TGSI_OPCODE_LRP }, + { 1, 3, 0, 0, 0, 0, "CND", TGSI_OPCODE_CND }, + { 0, 0, 0, 0, 0, 0, "", 20 }, /* removed */ + { 1, 3, 0, 0, 0, 0, "DP2A", TGSI_OPCODE_DP2A }, + { 0, 0, 0, 0, 0, 0, "", 22 }, /* removed */ + { 0, 0, 0, 0, 0, 0, "", 23 }, /* removed */ + { 1, 1, 0, 0, 0, 0, "FRC", TGSI_OPCODE_FRC }, + { 1, 3, 0, 0, 0, 0, "CLAMP", TGSI_OPCODE_CLAMP }, + { 1, 1, 0, 0, 0, 0, "FLR", TGSI_OPCODE_FLR }, + { 1, 1, 0, 0, 0, 0, "ROUND", TGSI_OPCODE_ROUND }, + { 1, 1, 0, 0, 0, 0, "EX2", TGSI_OPCODE_EX2 }, + { 1, 1, 0, 0, 0, 0, "LG2", TGSI_OPCODE_LG2 }, + { 1, 2, 0, 0, 0, 0, "POW", TGSI_OPCODE_POW }, + { 1, 2, 0, 0, 0, 0, "XPD", TGSI_OPCODE_XPD }, + { 0, 0, 0, 0, 0, 0, "", 32 }, /* removed */ + { 1, 1, 0, 0, 0, 0, "ABS", TGSI_OPCODE_ABS }, + { 1, 1, 0, 0, 0, 0, "RCC", TGSI_OPCODE_RCC }, + { 1, 2, 0, 0, 0, 0, "DPH", TGSI_OPCODE_DPH }, + { 1, 1, 0, 0, 0, 0, "COS", TGSI_OPCODE_COS }, + { 1, 1, 0, 0, 0, 0, "DDX", TGSI_OPCODE_DDX }, + { 1, 1, 0, 0, 0, 0, "DDY", TGSI_OPCODE_DDY }, + { 0, 0, 0, 0, 0, 0, "KILP", TGSI_OPCODE_KILP }, + { 1, 1, 0, 0, 0, 0, "PK2H", TGSI_OPCODE_PK2H }, + { 1, 1, 0, 0, 0, 0, "PK2US", TGSI_OPCODE_PK2US }, + { 1, 1, 0, 0, 0, 0, "PK4B", TGSI_OPCODE_PK4B }, + { 1, 1, 0, 0, 0, 0, "PK4UB", TGSI_OPCODE_PK4UB }, + { 1, 2, 0, 0, 0, 0, "RFL", TGSI_OPCODE_RFL }, + { 1, 2, 0, 0, 0, 0, "SEQ", TGSI_OPCODE_SEQ }, + { 1, 2, 0, 0, 0, 0, "SFL", TGSI_OPCODE_SFL }, + { 1, 2, 0, 0, 0, 0, "SGT", TGSI_OPCODE_SGT }, + { 1, 1, 0, 0, 0, 0, "SIN", TGSI_OPCODE_SIN }, + { 1, 2, 0, 0, 0, 0, "SLE", TGSI_OPCODE_SLE }, + { 1, 2, 0, 0, 0, 0, "SNE", TGSI_OPCODE_SNE }, + { 1, 2, 0, 0, 0, 0, "STR", TGSI_OPCODE_STR }, + { 1, 2, 1, 0, 0, 0, "TEX", TGSI_OPCODE_TEX }, + { 1, 4, 1, 0, 0, 0, "TXD", TGSI_OPCODE_TXD }, + { 1, 2, 1, 0, 0, 0, "TXP", TGSI_OPCODE_TXP }, + { 1, 1, 0, 0, 0, 0, "UP2H", TGSI_OPCODE_UP2H }, + { 1, 1, 0, 0, 0, 0, "UP2US", TGSI_OPCODE_UP2US }, + { 1, 1, 0, 0, 0, 0, "UP4B", TGSI_OPCODE_UP4B }, + { 1, 1, 0, 0, 0, 0, "UP4UB", TGSI_OPCODE_UP4UB }, + { 1, 3, 0, 0, 0, 0, "X2D", TGSI_OPCODE_X2D }, + { 1, 1, 0, 0, 0, 0, "ARA", TGSI_OPCODE_ARA }, + { 1, 1, 0, 0, 0, 0, "ARR", TGSI_OPCODE_ARR }, + { 0, 1, 0, 0, 0, 0, "BRA", TGSI_OPCODE_BRA }, + { 0, 0, 0, 1, 0, 0, "CAL", TGSI_OPCODE_CAL }, + { 0, 0, 0, 0, 0, 0, "RET", TGSI_OPCODE_RET }, + { 1, 1, 0, 0, 0, 0, "SSG", TGSI_OPCODE_SSG }, + { 1, 3, 0, 0, 0, 0, "CMP", TGSI_OPCODE_CMP }, + { 1, 1, 0, 0, 0, 0, "SCS", TGSI_OPCODE_SCS }, + { 1, 2, 1, 0, 0, 0, "TXB", TGSI_OPCODE_TXB }, + { 1, 1, 0, 0, 0, 0, "NRM", TGSI_OPCODE_NRM }, + { 1, 2, 0, 0, 0, 0, "DIV", TGSI_OPCODE_DIV }, + { 1, 2, 0, 0, 0, 0, "DP2", TGSI_OPCODE_DP2 }, + { 1, 2, 1, 0, 0, 0, "TXL", TGSI_OPCODE_TXL }, + { 0, 0, 0, 0, 0, 0, "BRK", TGSI_OPCODE_BRK }, + { 0, 1, 0, 1, 0, 1, "IF", TGSI_OPCODE_IF }, + { 1, 1, 0, 0, 0, 1, "BGNFOR", TGSI_OPCODE_BGNFOR }, + { 0, 1, 0, 0, 0, 1, "REP", TGSI_OPCODE_REP }, + { 0, 0, 0, 1, 1, 1, "ELSE", TGSI_OPCODE_ELSE }, + { 0, 0, 0, 0, 1, 0, "ENDIF", TGSI_OPCODE_ENDIF }, + { 1, 0, 0, 0, 1, 0, "ENDFOR", TGSI_OPCODE_ENDFOR }, + { 0, 0, 0, 0, 1, 0, "ENDREP", TGSI_OPCODE_ENDREP }, + { 0, 1, 0, 0, 0, 0, "PUSHA", TGSI_OPCODE_PUSHA }, + { 1, 0, 0, 0, 0, 0, "POPA", TGSI_OPCODE_POPA }, + { 1, 1, 0, 0, 0, 0, "CEIL", TGSI_OPCODE_CEIL }, + { 1, 1, 0, 0, 0, 0, "I2F", TGSI_OPCODE_I2F }, + { 1, 1, 0, 0, 0, 0, "NOT", TGSI_OPCODE_NOT }, + { 1, 1, 0, 0, 0, 0, "TRUNC", TGSI_OPCODE_TRUNC }, + { 1, 2, 0, 0, 0, 0, "SHL", TGSI_OPCODE_SHL }, + { 1, 2, 0, 0, 0, 0, "SHR", TGSI_OPCODE_SHR }, + { 1, 2, 0, 0, 0, 0, "AND", TGSI_OPCODE_AND }, + { 1, 2, 0, 0, 0, 0, "OR", TGSI_OPCODE_OR }, + { 1, 2, 0, 0, 0, 0, "MOD", TGSI_OPCODE_MOD }, + { 1, 2, 0, 0, 0, 0, "XOR", TGSI_OPCODE_XOR }, + { 1, 3, 0, 0, 0, 0, "SAD", TGSI_OPCODE_SAD }, + { 1, 2, 1, 0, 0, 0, "TXF", TGSI_OPCODE_TXF }, + { 1, 2, 1, 0, 0, 0, "TXQ", TGSI_OPCODE_TXQ }, + { 0, 0, 0, 0, 0, 0, "CONT", TGSI_OPCODE_CONT }, + { 0, 0, 0, 0, 0, 0, "EMIT", TGSI_OPCODE_EMIT }, + { 0, 0, 0, 0, 0, 0, "ENDPRIM", TGSI_OPCODE_ENDPRIM }, + { 0, 0, 0, 1, 0, 1, "BGNLOOP", TGSI_OPCODE_BGNLOOP }, + { 0, 0, 0, 0, 0, 1, "BGNSUB", TGSI_OPCODE_BGNSUB }, + { 0, 0, 0, 1, 1, 0, "ENDLOOP", TGSI_OPCODE_ENDLOOP }, + { 0, 0, 0, 0, 1, 0, "ENDSUB", TGSI_OPCODE_ENDSUB }, + { 1, 1, 0, 0, 0, 0, "NOISE1", TGSI_OPCODE_NOISE1 }, + { 1, 1, 0, 0, 0, 0, "NOISE2", TGSI_OPCODE_NOISE2 }, + { 1, 1, 0, 0, 0, 0, "NOISE3", TGSI_OPCODE_NOISE3 }, + { 1, 1, 0, 0, 0, 0, "NOISE4", TGSI_OPCODE_NOISE4 }, + { 0, 0, 0, 0, 0, 0, "NOP", TGSI_OPCODE_NOP }, + { 0, 0, 0, 0, 0, 0, "", 108 }, /* removed */ + { 0, 0, 0, 0, 0, 0, "", 109 }, /* removed */ + { 0, 0, 0, 0, 0, 0, "", 110 }, /* removed */ + { 0, 0, 0, 0, 0, 0, "", 111 }, /* removed */ + { 1, 1, 0, 0, 0, 0, "NRM4", TGSI_OPCODE_NRM4 }, + { 0, 1, 0, 0, 0, 0, "CALLNZ", TGSI_OPCODE_CALLNZ }, + { 0, 1, 0, 0, 0, 0, "IFC", TGSI_OPCODE_IFC }, + { 0, 1, 0, 0, 0, 0, "BREAKC", TGSI_OPCODE_BREAKC }, + { 0, 1, 0, 0, 0, 0, "KIL", TGSI_OPCODE_KIL }, + { 0, 0, 0, 0, 0, 0, "END", TGSI_OPCODE_END }, + { 1, 1, 0, 0, 0, 0, "SWZ", TGSI_OPCODE_SWZ } }; const struct tgsi_opcode_info * diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h index b2375c6971..74713c3b98 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.h +++ b/src/gallium/auxiliary/tgsi/tgsi_info.h @@ -36,10 +36,12 @@ extern "C" { struct tgsi_opcode_info { - uint num_dst; - uint num_src; - boolean is_tex; - boolean is_branch; + unsigned num_dst:3; + unsigned num_src:3; + unsigned is_tex:1; + unsigned is_branch:1; + int pre_dedent:2; + int post_indent:2; const char *mnemonic; uint opcode; }; diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index ed594a3e2c..e7bcf4bf75 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -60,7 +60,6 @@ OP13(MAD) OP12(SUB) OP13(LRP) OP13(CND) -OP13(CND0) OP13(DP2A) OP11(FRC) OP13(CLAMP) diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 4fe8553c42..8a13885da9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -199,10 +199,10 @@ iter_instruction( } if (info->num_dst != inst->Instruction.NumDstRegs) { - report_error( ctx, "Invalid number of destination operands, should be %u", info->num_dst ); + report_error( ctx, "%s: Invalid number of destination operands, should be %u", info->mnemonic, info->num_dst ); } if (info->num_src != inst->Instruction.NumSrcRegs) { - report_error( ctx, "Invalid number of source operands, should be %u", info->num_src ); + report_error( ctx, "%s: Invalid number of source operands, should be %u", info->mnemonic, info->num_src ); } /* Check destination and source registers' validity. diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 46f2387c15..3cdf8b9f35 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -2089,10 +2089,6 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_CND0: - return 0; - break; - case TGSI_OPCODE_DP2A: FETCH( func, *inst, 0, 0, CHAN_X ); /* xmm0 = src[0].x */ FETCH( func, *inst, 1, 1, CHAN_X ); /* xmm1 = src[1].x */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index c0a0627e0b..f7096bd8e2 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -29,6 +29,7 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" #include "tgsi/tgsi_ureg.h" +#include "tgsi/tgsi_info.h" #include "tgsi/tgsi_dump.h" #include "util/u_memory.h" #include "util/u_math.h" @@ -71,6 +72,7 @@ struct ureg_tokens { #define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS #define UREG_MAX_IMMEDIATE 32 #define UREG_MAX_TEMP 256 +#define UREG_MAX_ADDR 2 #define DOMAIN_DECL 0 #define DOMAIN_INSN 1 @@ -99,11 +101,15 @@ struct ureg_program } immediate[UREG_MAX_IMMEDIATE]; unsigned nr_immediates; + struct ureg_src sampler[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers; + unsigned temps_active[UREG_MAX_TEMP / 32]; unsigned nr_temps; + unsigned nr_addrs; + unsigned nr_constants; - unsigned nr_samplers; unsigned nr_instructions; struct ureg_tokens domain[2]; @@ -187,6 +193,8 @@ ureg_dst_register( unsigned file, dst.File = file; dst.WriteMask = TGSI_WRITEMASK_XYZW; dst.Indirect = 0; + dst.IndirectIndex = 0; + dst.IndirectSwizzle = 0; dst.Saturate = 0; dst.Index = index; dst.Pad1 = 0; @@ -208,6 +216,8 @@ ureg_src_register( unsigned file, src.SwizzleW = TGSI_SWIZZLE_W; src.Pad = 0; src.Indirect = 0; + src.IndirectIndex = 0; + src.IndirectSwizzle = 0; src.Absolute = 0; src.Index = index; src.Negate = 0; @@ -254,6 +264,7 @@ ureg_DECL_fs_input( struct ureg_program *ureg, unsigned index, unsigned interp ) { + assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT); return ureg_DECL_input( ureg, name, index, interp ); } @@ -263,6 +274,7 @@ ureg_DECL_vs_input( struct ureg_program *ureg, unsigned name, unsigned index ) { + assert(ureg->processor == TGSI_PROCESSOR_VERTEX); return ureg_DECL_input( ureg, name, index, TGSI_INTERPOLATE_CONSTANT ); } @@ -346,11 +358,36 @@ void ureg_release_temporary( struct ureg_program *ureg, } +/* Allocate a new address register. + */ +struct ureg_dst ureg_DECL_address( struct ureg_program *ureg ) +{ + if (ureg->nr_addrs < UREG_MAX_ADDR) + return ureg_dst_register( TGSI_FILE_ADDRESS, ureg->nr_addrs++ ); + + assert( 0 ); + return ureg_dst_register( TGSI_FILE_ADDRESS, 0 ); +} + /* Allocate a new sampler. */ -struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg ) +struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg, + unsigned nr ) { - return ureg_src_register( TGSI_FILE_SAMPLER, ureg->nr_samplers++ ); + unsigned i; + + for (i = 0; i < ureg->nr_samplers; i++) + if (ureg->sampler[i].Index == nr) + return ureg->sampler[i]; + + if (i < PIPE_MAX_SAMPLERS) { + ureg->sampler[i] = ureg_src_register( TGSI_FILE_SAMPLER, nr ); + ureg->nr_samplers++; + return ureg->sampler[i]; + } + + assert( 0 ); + return ureg->sampler[0]; } @@ -363,6 +400,8 @@ static int match_or_expand_immediate( const float *v, unsigned *swizzle ) { unsigned i, j; + + *swizzle = 0; for (i = 0; i < nr; i++) { boolean found = FALSE; @@ -394,8 +433,8 @@ struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg, const float *v, unsigned nr ) { - unsigned i; - unsigned swizzle = 0; + unsigned i, j; + unsigned swizzle; /* Could do a first pass where we examine all existing immediates * without expanding. @@ -423,6 +462,12 @@ struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg, set_bad( ureg ); out: + /* Make sure that all referenced elements are from this immediate. + * Has the effect of making size-one immediates into scalars. + */ + for (j = nr; j < 4; j++) + swizzle |= (swizzle & 0x3) << (j * 2); + return ureg_swizzle( ureg_src_register( TGSI_FILE_IMMEDIATE, i ), (swizzle >> 0) & 0x3, (swizzle >> 2) & 0x3, @@ -442,31 +487,39 @@ ureg_emit_src( struct ureg_program *ureg, union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); unsigned n = 0; + assert(src.File != TGSI_FILE_NULL); + assert(src.File != TGSI_FILE_OUTPUT); + assert(src.File < TGSI_FILE_COUNT); + out[n].value = 0; out[n].src.File = src.File; out[n].src.SwizzleX = src.SwizzleX; out[n].src.SwizzleY = src.SwizzleY; out[n].src.SwizzleZ = src.SwizzleZ; out[n].src.SwizzleW = src.SwizzleW; - out[n].src.Indirect = src.Indirect; out[n].src.Index = src.Index; + out[n].src.Negate = src.Negate; n++; if (src.Absolute) { + out[0].src.Extended = 1; + out[0].src.Negate = 0; out[n].value = 0; + out[n].src_ext_mod.Type = TGSI_SRC_REGISTER_EXT_TYPE_MOD; out[n].src_ext_mod.Absolute = 1; + out[n].src_ext_mod.Negate = src.Negate; n++; } if (src.Indirect) { + out[0].src.Indirect = 1; out[n].value = 0; out[n].src.File = TGSI_FILE_ADDRESS; - out[n].src.SwizzleX = TGSI_SWIZZLE_X; - out[n].src.SwizzleY = TGSI_SWIZZLE_X; - out[n].src.SwizzleZ = TGSI_SWIZZLE_X; - out[n].src.SwizzleW = TGSI_SWIZZLE_X; - out[n].src.Indirect = 0; - out[n].src.Index = 0; + out[n].src.SwizzleX = src.IndirectSwizzle; + out[n].src.SwizzleY = src.IndirectSwizzle; + out[n].src.SwizzleZ = src.IndirectSwizzle; + out[n].src.SwizzleW = src.IndirectSwizzle; + out[n].src.Index = src.IndirectIndex; n++; } @@ -484,6 +537,13 @@ ureg_emit_dst( struct ureg_program *ureg, union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); unsigned n = 0; + assert(dst.File != TGSI_FILE_NULL); + assert(dst.File != TGSI_FILE_CONSTANT); + assert(dst.File != TGSI_FILE_INPUT); + assert(dst.File != TGSI_FILE_SAMPLER); + assert(dst.File != TGSI_FILE_IMMEDIATE); + assert(dst.File < TGSI_FILE_COUNT); + out[n].value = 0; out[n].dst.File = dst.File; out[n].dst.WriteMask = dst.WriteMask; @@ -494,12 +554,11 @@ ureg_emit_dst( struct ureg_program *ureg, if (dst.Indirect) { out[n].value = 0; out[n].src.File = TGSI_FILE_ADDRESS; - out[n].src.SwizzleX = TGSI_SWIZZLE_X; - out[n].src.SwizzleY = TGSI_SWIZZLE_X; - out[n].src.SwizzleZ = TGSI_SWIZZLE_X; - out[n].src.SwizzleW = TGSI_SWIZZLE_X; - out[n].src.Indirect = 0; - out[n].src.Index = 0; + out[n].src.SwizzleX = dst.IndirectSwizzle; + out[n].src.SwizzleY = dst.IndirectSwizzle; + out[n].src.SwizzleZ = dst.IndirectSwizzle; + out[n].src.SwizzleW = dst.IndirectSwizzle; + out[n].src.Index = dst.IndirectIndex; n++; } @@ -523,7 +582,6 @@ ureg_emit_insn(struct ureg_program *ureg, out[0].insn.NrTokens = 0; out[0].insn.Opcode = opcode; out[0].insn.Saturate = saturate; - out[0].insn.NrTokens = 0; out[0].insn.NumDstRegs = num_dst; out[0].insn.NumSrcRegs = num_src; out[0].insn.Padding = 0; @@ -542,6 +600,9 @@ ureg_emit_label(struct ureg_program *ureg, { union tgsi_any_token *out, *insn; + if(!label_token) + return; + out = get_tokens( ureg, DOMAIN_INSN, 1 ); insn = retrieve_token( ureg, DOMAIN_INSN, insn_token ); @@ -617,6 +678,17 @@ ureg_insn(struct ureg_program *ureg, unsigned insn, i; boolean saturate; +#ifdef DEBUG + { + const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode ); + assert(info); + if(info) { + assert(nr_dst == info->num_dst); + assert(nr_src == info->num_src); + } + } +#endif + saturate = nr_dst ? dst[0].Saturate : FALSE; insn = ureg_emit_insn( ureg, opcode, saturate, nr_dst, nr_src ); @@ -723,10 +795,10 @@ static void emit_decls( struct ureg_program *ureg ) TGSI_INTERPOLATE_CONSTANT ); } - if (ureg->nr_samplers) { + for (i = 0; i < ureg->nr_samplers; i++) { emit_decl_range( ureg, TGSI_FILE_SAMPLER, - 0, ureg->nr_samplers ); + ureg->sampler[i].Index, 1 ); } if (ureg->nr_constants) { @@ -741,6 +813,12 @@ static void emit_decls( struct ureg_program *ureg ) 0, ureg->nr_temps ); } + if (ureg->nr_addrs) { + emit_decl_range( ureg, + TGSI_FILE_ADDRESS, + 0, ureg->nr_addrs ); + } + for (i = 0; i < ureg->nr_immediates; i++) { emit_immediate( ureg, ureg->immediate[i].v ); @@ -764,7 +842,7 @@ static void copy_instructions( struct ureg_program *ureg ) static void -fixup_header_size(struct ureg_program *ureg ) +fixup_header_size(struct ureg_program *ureg) { union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_DECL, 1 ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 8836a1ea0e..acbca59040 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -31,6 +31,10 @@ #include "pipe/p_compiler.h" #include "pipe/p_shader_tokens.h" +#ifdef __cplusplus +extern "C" { +#endif + struct ureg_program; /* Almost a tgsi_src_register, but we need to pull in the Absolute @@ -48,6 +52,8 @@ struct ureg_src unsigned Absolute : 1; /* BOOL */ int Index : 16; /* SINT */ unsigned Negate : 1; /* BOOL */ + int IndirectIndex : 16; /* SINT */ + int IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ }; /* Very similar to a tgsi_dst_register, removing unsupported fields @@ -64,6 +70,8 @@ struct ureg_dst int Index : 16; /* SINT */ unsigned Pad1 : 5; unsigned Pad2 : 1; /* BOOL */ + int IndirectIndex : 16; /* SINT */ + int IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ }; struct pipe_context; @@ -131,12 +139,21 @@ void ureg_release_temporary( struct ureg_program *ureg, struct ureg_dst tmp ); +struct ureg_dst +ureg_DECL_address( struct ureg_program * ); + +/* Supply an index to the sampler declaration as this is the hook to + * the external pipe_sampler state. Users of this function probably + * don't want just any sampler, but a specific one which they've set + * up state for in the context. + */ struct ureg_src -ureg_DECL_sampler( struct ureg_program * ); +ureg_DECL_sampler( struct ureg_program *, + unsigned index ); static INLINE struct ureg_src -ureg_DECL_immediate4f( struct ureg_program *ureg, +ureg_imm4f( struct ureg_program *ureg, float a, float b, float c, float d) { @@ -149,7 +166,7 @@ ureg_DECL_immediate4f( struct ureg_program *ureg, } static INLINE struct ureg_src -ureg_DECL_immediate3f( struct ureg_program *ureg, +ureg_imm3f( struct ureg_program *ureg, float a, float b, float c) { @@ -161,7 +178,7 @@ ureg_DECL_immediate3f( struct ureg_program *ureg, } static INLINE struct ureg_src -ureg_DECL_immediate2f( struct ureg_program *ureg, +ureg_imm2f( struct ureg_program *ureg, float a, float b) { float v[2]; @@ -171,7 +188,7 @@ ureg_DECL_immediate2f( struct ureg_program *ureg, } static INLINE struct ureg_src -ureg_DECL_immediate1f( struct ureg_program *ureg, +ureg_imm1f( struct ureg_program *ureg, float a) { float v[1]; @@ -392,6 +409,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \ static INLINE struct ureg_src ureg_negate( struct ureg_src reg ) { + assert(reg.File != TGSI_FILE_NULL); reg.Negate ^= 1; return reg; } @@ -399,6 +417,7 @@ ureg_negate( struct ureg_src reg ) static INLINE struct ureg_src ureg_abs( struct ureg_src reg ) { + assert(reg.File != TGSI_FILE_NULL); reg.Absolute = 1; reg.Negate = 0; return reg; @@ -413,6 +432,12 @@ ureg_swizzle( struct ureg_src reg, (reg.SwizzleZ << 4) | (reg.SwizzleW << 6)); + assert(reg.File != TGSI_FILE_NULL); + assert(x < 4); + assert(y < 4); + assert(z < 4); + assert(w < 4); + reg.SwizzleX = (swz >> (x*2)) & 0x3; reg.SwizzleY = (swz >> (y*2)) & 0x3; reg.SwizzleZ = (swz >> (z*2)) & 0x3; @@ -430,6 +455,7 @@ static INLINE struct ureg_dst ureg_writemask( struct ureg_dst reg, unsigned writemask ) { + assert(reg.File != TGSI_FILE_NULL); reg.WriteMask &= writemask; return reg; } @@ -437,10 +463,33 @@ ureg_writemask( struct ureg_dst reg, static INLINE struct ureg_dst ureg_saturate( struct ureg_dst reg ) { + assert(reg.File != TGSI_FILE_NULL); reg.Saturate = 1; return reg; } +static INLINE struct ureg_dst +ureg_dst_indirect( struct ureg_dst reg, struct ureg_src addr ) +{ + assert(reg.File != TGSI_FILE_NULL); + assert(addr.File == TGSI_FILE_ADDRESS); + reg.Indirect = 1; + reg.IndirectIndex = addr.Index; + reg.IndirectSwizzle = addr.SwizzleX; + return reg; +} + +static INLINE struct ureg_src +ureg_src_indirect( struct ureg_src reg, struct ureg_src addr ) +{ + assert(reg.File != TGSI_FILE_NULL); + assert(addr.File == TGSI_FILE_ADDRESS); + reg.Indirect = 1; + reg.IndirectIndex = addr.Index; + reg.IndirectSwizzle = addr.SwizzleX; + return reg; +} + static INLINE struct ureg_dst ureg_dst( struct ureg_src src ) { @@ -449,6 +498,8 @@ ureg_dst( struct ureg_src src ) dst.File = src.File; dst.WriteMask = TGSI_WRITEMASK_XYZW; dst.Indirect = src.Indirect; + dst.IndirectIndex = src.IndirectIndex; + dst.IndirectSwizzle = src.IndirectSwizzle; dst.Saturate = 0; dst.Index = src.Index; dst.Pad1 = 0; @@ -469,6 +520,8 @@ ureg_src( struct ureg_dst dst ) src.SwizzleW = TGSI_SWIZZLE_W; src.Pad = 0; src.Indirect = dst.Indirect; + src.IndirectIndex = dst.IndirectIndex; + src.IndirectSwizzle = dst.IndirectSwizzle; src.Absolute = 0; src.Index = dst.Index; src.Negate = 0; @@ -478,4 +531,60 @@ ureg_src( struct ureg_dst dst ) +static INLINE struct ureg_dst +ureg_dst_undef( void ) +{ + struct ureg_dst dst; + + dst.File = TGSI_FILE_NULL; + dst.WriteMask = 0; + dst.Indirect = 0; + dst.IndirectIndex = 0; + dst.IndirectSwizzle = 0; + dst.Saturate = 0; + dst.Index = 0; + dst.Pad1 = 0; + dst.Pad2 = 0; + + return dst; +} + +static INLINE struct ureg_src +ureg_src_undef( void ) +{ + struct ureg_src src; + + src.File = TGSI_FILE_NULL; + src.SwizzleX = 0; + src.SwizzleY = 0; + src.SwizzleZ = 0; + src.SwizzleW = 0; + src.Pad = 0; + src.Indirect = 0; + src.IndirectIndex = 0; + src.IndirectSwizzle = 0; + src.Absolute = 0; + src.Index = 0; + src.Negate = 0; + + return src; +} + +static INLINE boolean +ureg_src_is_undef( struct ureg_src src ) +{ + return src.File == TGSI_FILE_NULL; +} + +static INLINE boolean +ureg_dst_is_undef( struct ureg_dst dst ) +{ + return dst.File == TGSI_FILE_NULL; +} + + +#ifdef __cplusplus +} +#endif + #endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h b/src/gallium/auxiliary/tgsi/tgsi_util.h index 7877f34558..21eb656327 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.h +++ b/src/gallium/auxiliary/tgsi/tgsi_util.h @@ -32,6 +32,10 @@ extern "C" { #endif +struct tgsi_src_register; +struct tgsi_src_register_ext_swz; +struct tgsi_full_src_register; + void * tgsi_align_128bit( void *unaligned ); diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index cda6dbd46d..c516317d70 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -62,7 +62,7 @@ struct blit_state struct pipe_viewport_state viewport; void *vs; - void *fs; + void *fs[TGSI_WRITEMASK_XYZW + 1]; struct pipe_buffer *vbuf; /**< quad vertices */ unsigned vbuf_slot; @@ -125,7 +125,7 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso) } /* fragment shader */ - ctx->fs = util_make_fragment_tex_shader(pipe); + ctx->fs[TGSI_WRITEMASK_XYZW] = util_make_fragment_tex_shader(pipe); ctx->vbuf = NULL; /* init vertex data that doesn't change */ @@ -146,9 +146,13 @@ void util_destroy_blit(struct blit_state *ctx) { struct pipe_context *pipe = ctx->pipe; + unsigned i; pipe->delete_vs_state(pipe, ctx->vs); - pipe->delete_fs_state(pipe, ctx->fs); + + for (i = 0; i < Elements(ctx->fs); i++) + if (ctx->fs[i]) + pipe->delete_fs_state(pipe, ctx->fs[i]); pipe_buffer_reference(&ctx->vbuf, NULL); @@ -299,14 +303,15 @@ regions_overlap(int srcX0, int srcY0, * XXX need some control over blitting Z and/or stencil. */ void -util_blit_pixels(struct blit_state *ctx, - struct pipe_surface *src, - int srcX0, int srcY0, - int srcX1, int srcY1, - struct pipe_surface *dst, - int dstX0, int dstY0, - int dstX1, int dstY1, - float z, uint filter) +util_blit_pixels_writemask(struct blit_state *ctx, + struct pipe_surface *src, + int srcX0, int srcY0, + int srcX1, int srcY1, + struct pipe_surface *dst, + int dstX0, int dstY0, + int dstX1, int dstY1, + float z, uint filter, + uint writemask) { struct pipe_context *pipe = ctx->pipe; struct pipe_screen *screen = pipe->screen; @@ -426,8 +431,11 @@ util_blit_pixels(struct blit_state *ctx, /* texture */ cso_set_sampler_textures(ctx->cso, 1, &tex); + if (ctx->fs[writemask] == NULL) + ctx->fs[writemask] = util_make_fragment_tex_shader_writemask(pipe, writemask); + /* shaders */ - cso_set_fragment_shader_handle(ctx->cso, ctx->fs); + cso_set_fragment_shader_handle(ctx->cso, ctx->fs[writemask]); cso_set_vertex_shader_handle(ctx->cso, ctx->vs); /* drawing dest */ @@ -462,6 +470,27 @@ util_blit_pixels(struct blit_state *ctx, } +void +util_blit_pixels(struct blit_state *ctx, + struct pipe_surface *src, + int srcX0, int srcY0, + int srcX1, int srcY1, + struct pipe_surface *dst, + int dstX0, int dstY0, + int dstX1, int dstY1, + float z, uint filter ) +{ + util_blit_pixels_writemask( ctx, src, + srcX0, srcY0, + srcX1, srcY1, + dst, + dstX0, dstY0, + dstX1, dstY1, + z, filter, + TGSI_WRITEMASK_XYZW ); +} + + /* Release vertex buffer at end of frame to avoid synchronous * rendering. */ @@ -535,7 +564,7 @@ util_blit_pixels_tex(struct blit_state *ctx, cso_set_sampler_textures(ctx->cso, 1, &tex); /* shaders */ - cso_set_fragment_shader_handle(ctx->cso, ctx->fs); + cso_set_fragment_shader_handle(ctx->cso, ctx->fs[TGSI_WRITEMASK_XYZW]); cso_set_vertex_shader_handle(ctx->cso, ctx->vs); /* drawing dest */ diff --git a/src/gallium/auxiliary/util/u_blit.h b/src/gallium/auxiliary/util/u_blit.h index c35beceda8..a102021529 100644 --- a/src/gallium/auxiliary/util/u_blit.h +++ b/src/gallium/auxiliary/util/u_blit.h @@ -60,6 +60,17 @@ util_blit_pixels(struct blit_state *ctx, int dstX1, int dstY1, float z, uint filter); +void +util_blit_pixels_writemask(struct blit_state *ctx, + struct pipe_surface *src, + int srcX0, int srcY0, + int srcX1, int srcY1, + struct pipe_surface *dst, + int dstX0, int dstY0, + int dstX1, int dstY1, + float z, uint filter, + uint writemask); + extern void util_blit_pixels_tex(struct blit_state *ctx, struct pipe_texture *tex, diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h index d42b65ce28..1380d98d7e 100644 --- a/src/gallium/auxiliary/util/u_debug.h +++ b/src/gallium/auxiliary/util/u_debug.h @@ -88,6 +88,7 @@ _debug_printf(const char *format, ...) * - avoid outputing large strings (512 bytes is the current maximum length * that is guaranteed to be printed in all platforms) */ +#if !defined(PIPE_OS_HAIKU) static INLINE void debug_printf(const char *format, ...) { @@ -101,6 +102,7 @@ debug_printf(const char *format, ...) #endif } +#endif /* !PIPE_OS_HAIKU */ /* * ... isn't portable so we need to pass arguments in parentheses. diff --git a/src/gallium/auxiliary/util/u_fifo.h b/src/gallium/auxiliary/util/u_fifo.h new file mode 100644 index 0000000000..9e007de1ad --- /dev/null +++ b/src/gallium/auxiliary/util/u_fifo.h @@ -0,0 +1,94 @@ +/************************************************************************** + * + * Copyright © 2009 Jakob Bornecrantz + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_FIFO_H +#define U_FIFO_H + +#include "util/u_memory.h" + +struct util_fifo +{ + size_t head; + size_t tail; + size_t num; + size_t size; +}; + +static INLINE struct util_fifo * +u_fifo_create(size_t size) +{ + struct util_fifo *fifo; + fifo = MALLOC(sizeof(*fifo) + size * sizeof(void*)); + + fifo->head = 0; + fifo->tail = 0; + fifo->num = 0; + fifo->size = size; + + return fifo; +} + +static INLINE boolean +u_fifo_add(struct util_fifo *fifo, void *ptr) +{ + void **array = (void**)&fifo[1]; + if (fifo->num >= fifo->size) + return FALSE; + + if (++fifo->head >= fifo->size) + fifo->head = 0; + + array[fifo->head] = ptr; + + ++fifo->num; + + return TRUE; +} + +static INLINE boolean +u_fifo_pop(struct util_fifo *fifo, void **ptr) +{ + void **array = (void**)&fifo[1]; + + if (!fifo->num) + return FALSE; + + if (++fifo->tail >= fifo->size) + fifo->tail = 0; + + *ptr = array[fifo->tail]; + + ++fifo->num; + + return TRUE; +} + +static INLINE void +u_fifo_destroy(struct util_fifo *fifo) +{ + FREE(fifo); +} + +#endif diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 00a46d0cc4..6e82983e58 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -14,10 +14,10 @@ PIPE_FORMAT_L16_UNORM , arith , 1, 1, un16, , , , xxx1, PIPE_FORMAT_Z16_UNORM , array , 1, 1, un16, , , , x___, zs PIPE_FORMAT_Z32_UNORM , array , 1, 1, un32, , , , x___, zs PIPE_FORMAT_Z32_FLOAT , array , 1, 1, f32 , , , , x___, zs -PIPE_FORMAT_S8Z24_UNORM , arith , 1, 1, un8 , un24, , , yx__, zs -PIPE_FORMAT_Z24S8_UNORM , arith , 1, 1, un24, un8 , , , xy__, zs -PIPE_FORMAT_X8Z24_UNORM , arith , 1, 1, un8 , un24, , , y___, zs -PIPE_FORMAT_Z24X8_UNORM , arith , 1, 1, un24, un8 , , , x___, zs +PIPE_FORMAT_S8Z24_UNORM , arith , 1, 1, un24, un8 , , , xy__, zs +PIPE_FORMAT_Z24S8_UNORM , arith , 1, 1, un8 , un24, , , yx__, zs +PIPE_FORMAT_X8Z24_UNORM , arith , 1, 1, un24, un8 , , , x___, zs +PIPE_FORMAT_Z24X8_UNORM , arith , 1, 1, un8 , un24, , , y___, zs PIPE_FORMAT_S8_UNORM , array , 1, 1, un8 , , , , _x__, zs PIPE_FORMAT_R64_FLOAT , array , 1, 1, f64 , , , , x001, rgb PIPE_FORMAT_R64G64_FLOAT , array , 1, 1, f64 , f64 , , , xy01, rgb diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index b0807c1339..b12c97dfb4 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -341,6 +341,16 @@ util_is_inf_or_nan(float x) /** + * Test whether x is a power of two. + */ +static INLINE boolean +util_is_pot(unsigned x) +{ + return (x & (x - 1)) == 0; +} + + +/** * Find first bit set in word. Least significant bit is 1. * Return 0 if no bits set. */ @@ -374,6 +384,10 @@ unsigned ffs( unsigned u ) #define ffs __builtin_ffs #endif +#ifdef __MINGW32__ +#define ffs __builtin_ffs +#endif + /* Could also binary search for the highest bit. */ diff --git a/src/gallium/auxiliary/util/u_network.c b/src/gallium/auxiliary/util/u_network.c index 07d804ecdb..bc4b758406 100644 --- a/src/gallium/auxiliary/util/u_network.c +++ b/src/gallium/auxiliary/util/u_network.c @@ -6,7 +6,7 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) # include <winsock2.h> # include <windows.h> -#elif defined(PIPE_OS_LINUX) +#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) # include <sys/socket.h> # include <netinet/in.h> # include <unistd.h> @@ -54,7 +54,7 @@ u_socket_close(int s) if (s < 0) return; -#if defined(PIPE_OS_LINUX) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) shutdown(s, SHUT_RDWR); close(s); #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) @@ -169,7 +169,7 @@ u_socket_listen_on_port(uint16_t portnum) void u_socket_block(int s, boolean block) { -#if defined(PIPE_OS_LINUX) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) int old = fcntl(s, F_GETFL, 0); if (old == -1) return; diff --git a/src/gallium/auxiliary/util/u_network.h b/src/gallium/auxiliary/util/u_network.h index 14d3884427..8c778f492c 100644 --- a/src/gallium/auxiliary/util/u_network.h +++ b/src/gallium/auxiliary/util/u_network.h @@ -6,7 +6,7 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) # define PIPE_HAVE_SOCKETS -#elif defined(PIPE_OS_LINUX) +#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) # define PIPE_HAVE_SOCKETS #endif diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index d54a1d8c74..ab754296fa 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -88,11 +88,14 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe, /** * Make simple fragment texture shader: - * TEX OUT[0], IN[0], SAMP[0], 2D; + * IMM {0,0,0,1} // (if writemask != 0xf) + * MOV OUT[0], IMM[0] // (if writemask != 0xf) + * TEX OUT[0].writemask, IN[0], SAMP[0], 2D; * END; */ void * -util_make_fragment_tex_shader(struct pipe_context *pipe) +util_make_fragment_tex_shader_writemask(struct pipe_context *pipe, + unsigned writemask ) { struct ureg_program *ureg; struct ureg_src sampler; @@ -103,7 +106,7 @@ util_make_fragment_tex_shader(struct pipe_context *pipe) if (ureg == NULL) return NULL; - sampler = ureg_DECL_sampler( ureg ); + sampler = ureg_DECL_sampler( ureg, 0 ); tex = ureg_DECL_fs_input( ureg, TGSI_SEMANTIC_GENERIC, 0, @@ -119,7 +122,12 @@ util_make_fragment_tex_shader(struct pipe_context *pipe) return ureg_create_shader_and_destroy( ureg, pipe ); } - +void * +util_make_fragment_tex_shader(struct pipe_context *pipe ) +{ + return util_make_fragment_tex_shader_writemask( pipe, + TGSI_WRITEMASK_XYZW ); +} diff --git a/src/gallium/auxiliary/util/u_simple_shaders.h b/src/gallium/auxiliary/util/u_simple_shaders.h index 6f8d96af9b..d2e80d6eb4 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.h +++ b/src/gallium/auxiliary/util/u_simple_shaders.h @@ -50,6 +50,10 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe, extern void * +util_make_fragment_tex_shader_writemask(struct pipe_context *pipe, + unsigned writemask ); + +extern void * util_make_fragment_tex_shader(struct pipe_context *pipe); diff --git a/src/gallium/auxiliary/util/u_stream_stdc.c b/src/gallium/auxiliary/util/u_stream_stdc.c index d8f648e5dd..5cd05b2904 100644 --- a/src/gallium/auxiliary/util/u_stream_stdc.c +++ b/src/gallium/auxiliary/util/u_stream_stdc.c @@ -32,7 +32,7 @@ #include "pipe/p_config.h" -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_OS_SOLARIS) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) #include <stdio.h> diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 1235a67d26..0d6489c26e 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -170,7 +170,7 @@ x8r8g8b8_get_tile_rgba(const unsigned *src, pRow[0] = ubyte_to_float((pixel >> 16) & 0xff); pRow[1] = ubyte_to_float((pixel >> 8) & 0xff); pRow[2] = ubyte_to_float((pixel >> 0) & 0xff); - pRow[3] = ubyte_to_float(0xff); + pRow[3] = 1.0F; } p += dst_stride; } @@ -394,6 +394,52 @@ r5g6b5_put_tile_rgba(ushort *dst, +/*** PIPE_FORMAT_R8G8B8_UNORM ***/ + +static void +r8g8b8_get_tile_rgba(const ubyte *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = ubyte_to_float(src[0]); + pRow[1] = ubyte_to_float(src[1]); + pRow[2] = ubyte_to_float(src[2]); + pRow[3] = 1.0f; + src += 3; + } + p += dst_stride; + } +} + + +static void +r8g8b8_put_tile_rgba(ubyte *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + dst[0] = float_to_ubyte(pRow[0]); + dst[1] = float_to_ubyte(pRow[1]); + dst[2] = float_to_ubyte(pRow[2]); + dst += 3; + } + p += src_stride; + } +} + + + /*** PIPE_FORMAT_Z16_UNORM ***/ /** @@ -1106,6 +1152,9 @@ pipe_tile_raw_to_rgba(enum pipe_format format, case PIPE_FORMAT_R5G6B5_UNORM: r5g6b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); break; + case PIPE_FORMAT_R8G8B8_UNORM: + r8g8b8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); + break; case PIPE_FORMAT_L8_UNORM: l8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); break; @@ -1222,6 +1271,9 @@ pipe_put_tile_rgba(struct pipe_transfer *pt, case PIPE_FORMAT_R5G6B5_UNORM: r5g6b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride); break; + case PIPE_FORMAT_R8G8B8_UNORM: + r8g8b8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); + break; case PIPE_FORMAT_R8G8B8A8_UNORM: assert(0); break; diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c index c16cdd0b22..b958a98635 100644 --- a/src/gallium/auxiliary/util/u_time.c +++ b/src/gallium/auxiliary/util/u_time.c @@ -35,7 +35,7 @@ #include "pipe/p_config.h" -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) #include <sys/time.h> #elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) #include <windows.h> @@ -77,7 +77,7 @@ util_time_get_frequency(void) void util_time_get(struct util_time *t) { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) gettimeofday(&t->tv, NULL); #elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) LONGLONG temp; @@ -102,7 +102,7 @@ util_time_add(const struct util_time *t1, int64_t usecs, struct util_time *t2) { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) t2->tv.tv_sec = t1->tv.tv_sec + usecs / 1000000; t2->tv.tv_usec = t1->tv.tv_usec + usecs % 1000000; #elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) @@ -124,7 +124,7 @@ int64_t util_time_diff(const struct util_time *t1, const struct util_time *t2) { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) return (t2->tv.tv_usec - t1->tv.tv_usec) + (t2->tv.tv_sec - t1->tv.tv_sec)*1000000; #elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) @@ -144,7 +144,7 @@ util_time_micros( void ) util_time_get(&t1); -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) return t1.tv.tv_usec + t1.tv.tv_sec*1000000LL; #elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) util_time_get_frequency(); @@ -166,7 +166,7 @@ static INLINE int util_time_compare(const struct util_time *t1, const struct util_time *t2) { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) if (t1->tv.tv_sec < t2->tv.tv_sec) return -1; else if(t1->tv.tv_sec > t2->tv.tv_sec) diff --git a/src/gallium/auxiliary/util/u_time.h b/src/gallium/auxiliary/util/u_time.h index 7a5c54d9b2..a6189a247b 100644 --- a/src/gallium/auxiliary/util/u_time.h +++ b/src/gallium/auxiliary/util/u_time.h @@ -43,6 +43,11 @@ #include <unistd.h> /* usleep */ #endif +#if defined(PIPE_OS_HAIKU) +#include <sys/time.h> /* timeval */ +#include <unistd.h> +#endif + #include "pipe/p_compiler.h" @@ -58,7 +63,7 @@ extern "C" { */ struct util_time { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) struct timeval tv; #else int64_t counter; @@ -89,7 +94,7 @@ util_time_timeout(const struct util_time *start, const struct util_time *end, const struct util_time *curr); -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) #define util_time_sleep usleep #else void |