diff options
author | Eric Anholt <eric@anholt.net> | 2010-07-26 17:47:59 -0700 |
---|---|---|
committer | Eric Anholt <eric@anholt.net> | 2010-07-26 17:53:27 -0700 |
commit | afe125e0a18ac3886c45c7e6b02b122fb2d327b5 (patch) | |
tree | 78621707e71154c0b388b0baacffc26432b7e992 /src/mesa/drivers/dri/r300/compiler | |
parent | d64343f1ae84979bd154475badf11af8a9bfc2eb (diff) | |
parent | 5403ca79b225605c79f49866a6497c97da53be3b (diff) |
Merge remote branch 'origin/master' into glsl2
This pulls in multiple i965 driver fixes which will help ensure better
testing coverage during development, and also gets past the conflicts
of the src/mesa/shader -> src/mesa/program move.
Conflicts:
src/mesa/Makefile
src/mesa/main/shaderapi.c
src/mesa/main/shaderobj.h
Diffstat (limited to 'src/mesa/drivers/dri/r300/compiler')
19 files changed, 701 insertions, 156 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile index ff3801dc67..3167d49bca 100644 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -23,6 +23,7 @@ C_SOURCES = \ radeon_dataflow_deadcode.c \ radeon_dataflow_swizzles.c \ radeon_optimize.c \ + radeon_rename_regs.c \ r3xx_fragprog.c \ r300_fragprog.c \ r300_fragprog_swizzle.c \ diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript index 50d9cdb7f2..c6f47a6f8a 100755 --- a/src/mesa/drivers/dri/r300/compiler/SConscript +++ b/src/mesa/drivers/dri/r300/compiler/SConscript @@ -22,6 +22,7 @@ r300compiler = env.ConvenienceLibrary( 'radeon_pair_schedule.c', 'radeon_pair_regalloc.c', 'radeon_optimize.c', + 'radeon_rename_regs.c', 'radeon_emulate_branches.c', 'radeon_emulate_loops.c', 'radeon_dataflow.c', diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 38312658d6..a326ee4c4f 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -29,6 +29,7 @@ #include "radeon_emulate_loops.h" #include "radeon_program_alu.h" #include "radeon_program_tex.h" +#include "radeon_rename_regs.h" #include "r300_fragprog.h" #include "r300_fragprog_swizzle.h" #include "r500_fragprog.h" @@ -97,25 +98,27 @@ static void debug_program_log(struct r300_fragment_program_compiler* c, const ch void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) { + struct emulate_loop_state loop_state; + rewrite_depth_out(c); + /* This transformation needs to be done before any of the IF + * instructions are modified. */ + radeonTransformKILP(&c->Base); + debug_program_log(c, "before compilation"); - /* XXX Ideally this should be done only for r3xx, but since - * we don't have branching support for r5xx, we use the emulation - * on all chipsets. */ - - if(c->Base.is_r500){ - rc_emulate_loops(&c->Base, R500_PFS_MAX_INST); + if (c->Base.is_r500){ + r500_transform_unroll_loops(&c->Base, &loop_state); + debug_program_log(c, "after r500 transform loops"); } else{ - rc_emulate_loops(&c->Base, R300_PFS_MAX_ALU_INST); + rc_transform_unroll_loops(&c->Base, &loop_state); + debug_program_log(c, "after transform loops"); + + rc_emulate_branches(&c->Base); + debug_program_log(c, "after emulate branches"); } - debug_program_log(c, "after emulate loops"); - - rc_emulate_branches(&c->Base); - - debug_program_log(c, "after emulate branches"); if (c->Base.is_r500) { struct radeon_program_transformation transformations[] = { @@ -162,6 +165,11 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) debug_program_log(c, "after deadcode"); + if(!c->Base.is_r500){ + rc_emulate_loops(&loop_state, R300_PFS_MAX_ALU_INST); + debug_program_log(c, "after emulate loops"); + } + rc_optimize(&c->Base); debug_program_log(c, "after dataflow optimize"); @@ -172,6 +180,16 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) debug_program_log(c, "after dataflow passes"); + if(!c->Base.is_r500) { + /* This pass makes it easier for the scheduler to group TEX + * instructions and reduces the chances of creating too + * many texture indirections.*/ + rc_rename_regs(&c->Base); + if (c->Base.Error) + return; + debug_program_log(c, "after register rename"); + } + rc_pair_translate(c); if (c->Base.Error) return; diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 507b2e532f..d347b4df9c 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -30,6 +30,7 @@ #include "radeon_program_alu.h" #include "radeon_swizzle.h" #include "radeon_emulate_branches.h" +#include "radeon_emulate_loops.h" /* * Take an already-setup and valid source then swizzle it appropriately to @@ -145,7 +146,8 @@ static unsigned long t_src(struct r300_vertex_program_code *vp, t_swizzle(GET_SWZ(src->Swizzle, 2)), t_swizzle(GET_SWZ(src->Swizzle, 3)), t_src_class(src->File), - src->Negate) | (src->RelAddr << 4); + src->Negate) | + (src->RelAddr << 4) | (src->Abs << 3); } static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, @@ -161,7 +163,7 @@ static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, t_swizzle(GET_SWZ(src->Swizzle, 0)), t_src_class(src->File), src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) | - (src->RelAddr << 4); + (src->RelAddr << 4) | (src->Abs << 3); } static int valid_dst(struct r300_vertex_program_code *vp, @@ -348,7 +350,8 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi if (!valid_dst(compiler->code, &vpi->DstReg)) continue; - if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) { + if (compiler->code->length >= R500_VS_MAX_ALU_DWORDS || + (compiler->code->length >= R300_VS_MAX_ALU_DWORDS && !compiler->Base.is_r500)) { rc_error(&compiler->Base, "Vertex program has too many instructions\n"); return; } @@ -404,7 +407,7 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c { struct rc_instruction *inst; unsigned int num_orig_temps = 0; - char hwtemps[VSF_MAX_FRAGMENT_TEMPS]; + char hwtemps[R300_VS_MAX_TEMPS]; struct temporary_allocation * ta; unsigned int i, j; @@ -463,11 +466,11 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c unsigned int orig = inst->U.I.DstReg.Index; if (!ta[orig].Allocated) { - for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) { + for(j = 0; j < R300_VS_MAX_TEMPS; ++j) { if (!hwtemps[j]) break; } - if (j >= VSF_MAX_FRAGMENT_TEMPS) { + if (j >= R300_VS_MAX_TEMPS) { fprintf(stderr, "Out of hw temporaries\n"); } else { ta[orig].Allocated = 1; @@ -485,6 +488,44 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c } } +/** + * R3xx-R4xx vertex engine does not support the Absolute source operand modifier + * and the Saturate opcode modifier. Only Absolute is currently transformed. + */ +static int transform_nonnative_modifiers( + struct radeon_compiler *c, + struct rc_instruction *inst, + void* unused) +{ + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + /* Transform ABS(a) to MAX(a, -a). */ + for (i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].Abs) { + struct rc_instruction *new_inst; + unsigned temp; + + inst->U.I.SrcReg[i].Abs = 0; + + temp = rc_find_free_temporary(c); + + new_inst = rc_insert_new_instruction(c, inst->Prev); + new_inst->U.I.Opcode = RC_OPCODE_MAX; + new_inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + new_inst->U.I.DstReg.Index = temp; + new_inst->U.I.SrcReg[0] = inst->U.I.SrcReg[i]; + new_inst->U.I.SrcReg[1] = inst->U.I.SrcReg[i]; + new_inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; + + memset(&inst->U.I.SrcReg[i], 0, sizeof(inst->U.I.SrcReg[i])); + inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[i].Index = temp; + inst->U.I.SrcReg[i].Swizzle = RC_SWIZZLE_XYZW; + } + } + return 1; +} /** * Vertex engine cannot read two inputs or two constants at the same time. @@ -591,6 +632,8 @@ static struct rc_swizzle_caps r300_vertprog_swizzle_caps = { void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) { + struct emulate_loop_state loop_state; + compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; addArtificialOutputs(compiler); @@ -600,19 +643,48 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) /* XXX Ideally this should be done only for r3xx, but since * we don't have branching support for r5xx, we use the emulation * on all chipsets. */ + rc_transform_unroll_loops(&compiler->Base, &loop_state); + + debug_program_log(compiler, "after transform loops"); + + if (compiler->Base.is_r500){ + rc_emulate_loops(&loop_state, R500_VS_MAX_ALU); + } else { + rc_emulate_loops(&loop_state, R300_VS_MAX_ALU); + } + debug_program_log(compiler, "after emulate loops"); + rc_emulate_branches(&compiler->Base); debug_program_log(compiler, "after emulate branches"); - { + if (compiler->Base.is_r500) { struct radeon_program_transformation transformations[] = { { &r300_transform_vertex_alu, 0 }, { &r300_transform_trig_scale_vertex, 0 } }; radeonLocalTransform(&compiler->Base, 2, transformations); - } - debug_program_log(compiler, "after native rewrite"); + debug_program_log(compiler, "after native rewrite"); + } else { + struct radeon_program_transformation transformations[] = { + { &r300_transform_vertex_alu, 0 }, + { &radeonTransformTrigSimple, 0 } + }; + radeonLocalTransform(&compiler->Base, 2, transformations); + + debug_program_log(compiler, "after native rewrite"); + + /* Note: This pass has to be done seperately from ALU rewrite, + * because it needs to check every instruction. + */ + struct radeon_program_transformation transformations2[] = { + { &transform_nonnative_modifiers, 0 }, + }; + radeonLocalTransform(&compiler->Base, 1, transformations2); + + debug_program_log(compiler, "after emulate modifiers"); + } { /* Note: This pass has to be done seperately from ALU rewrite, diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index 632f0bcf4f..e6b5522c5b 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -30,6 +30,7 @@ #include <stdio.h> #include "../r300_reg.h" +#include "radeon_emulate_loops.h" /** * Rewrite IF instructions to use the ALU result special register. @@ -59,6 +60,31 @@ int r500_transform_IF( return 1; } +/** + * Rewrite loops to make them easier to emit. This is not a local + * transformation, because it modifies and reorders an entire block of code. + */ +void r500_transform_unroll_loops(struct radeon_compiler * c, + struct emulate_loop_state *s) +{ + int i; + + rc_transform_unroll_loops(c, s); + + for( i = s->LoopCount - 1; i >= 0; i-- ){ + struct rc_instruction * inst_continue; + if(!s->Loops[i].EndLoop){ + continue; + } + /* Insert a continue instruction at the end of the loop. This + * is required in order to emit loops correctly. */ + inst_continue = rc_insert_new_instruction(c, + s->Loops[i].EndIf->Prev); + inst_continue->U.I.Opcode = RC_OPCODE_CONTINUE; + } + +} + static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) { unsigned int relevant; @@ -252,7 +278,7 @@ void r500FragmentProgramDump(struct rX00_fragment_program_code *c) struct r500_fragment_program_code *code = &c->code.r500; fprintf(stderr, "R500 Fragment Program:\n--------\n"); - int n; + int n, i; uint32_t inst; uint32_t inst0; char *str = NULL; @@ -275,8 +301,8 @@ void r500FragmentProgramDump(struct rX00_fragment_program_code *c) to_mask((inst >> 15) & 0xf)); switch(inst0 & 0x3) { - case 0: - case 1: + case R500_INST_TYPE_ALU: + case R500_INST_TYPE_OUT: fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1); inst = code->inst[n].inst1; @@ -319,9 +345,87 @@ void r500FragmentProgramDump(struct rX00_fragment_program_code *c) (inst >> 23) & 0x3, (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3); break; - case 2: + case R500_INST_TYPE_FC: + fprintf(stderr, "\t2:FC_INST 0x%08x:", code->inst[n].inst2); + inst = code->inst[n].inst2; + /* JUMP_FUNC JUMP_ANY*/ + fprintf(stderr, "0x%02x %1x ", inst >> 8 & 0xff, + (inst & R500_FC_JUMP_ANY) >> 5); + + /* OP */ + switch(inst & 0x7){ + case R500_FC_OP_JUMP: + fprintf(stderr, "JUMP"); + break; + case R500_FC_OP_LOOP: + fprintf(stderr, "LOOP"); + break; + case R500_FC_OP_ENDLOOP: + fprintf(stderr, "ENDLOOP"); + break; + case R500_FC_OP_REP: + fprintf(stderr, "REP"); + break; + case R500_FC_OP_ENDREP: + fprintf(stderr, "ENDREP"); + break; + case R500_FC_OP_BREAKLOOP: + fprintf(stderr, "BREAKLOOP"); + break; + case R500_FC_OP_BREAKREP: + fprintf(stderr, "BREAKREP"); + break; + case R500_FC_OP_CONTINUE: + fprintf(stderr, "CONTINUE"); + break; + } + fprintf(stderr," "); + /* A_OP */ + switch(inst & (0x3 << 6)){ + case R500_FC_A_OP_NONE: + fprintf(stderr, "NONE"); + break; + case R500_FC_A_OP_POP: + fprintf(stderr, "POP"); + break; + case R500_FC_A_OP_PUSH: + fprintf(stderr, "PUSH"); + break; + } + /* B_OP0 B_OP1 */ + for(i=0; i<2; i++){ + fprintf(stderr, " "); + switch(inst & (0x3 << (24 + (i * 2)))){ + /* R500_FC_B_OP0_NONE + * R500_FC_B_OP1_NONE */ + case 0: + fprintf(stderr, "NONE"); + break; + case R500_FC_B_OP0_DECR: + case R500_FC_B_OP1_DECR: + fprintf(stderr, "DECR"); + break; + case R500_FC_B_OP0_INCR: + case R500_FC_B_OP1_INCR: + fprintf(stderr, "INCR"); + break; + } + } + /*POP_CNT B_ELSE */ + fprintf(stderr, " %d %1x", (inst >> 16) & 0x1f, (inst & R500_FC_B_ELSE) >> 4); + inst = code->inst[n].inst3; + /* JUMP_ADDR */ + fprintf(stderr, " %d", inst >> 16); + + if(code->inst[n].inst2 & R500_FC_IGNORE_UNCOVERED){ + fprintf(stderr, " IGN_UNC"); + } + inst = code->inst[n].inst3; + fprintf(stderr, "\n\t3:FC_ADDR 0x%08x:", inst); + fprintf(stderr, "BOOL: 0x%02x, INT: 0x%02x, JUMP_ADDR: %d, JMP_GLBL: %1x\n", + inst & 0x1f, (inst >> 8) & 0x1f, (inst >> 16) & 0x1ff, inst >> 31); break; - case 3: + case R500_INST_TYPE_TEX: inst = code->inst[n].inst1; fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf, to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "", diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h index 4efbae7ba6..0d005a794f 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h @@ -36,6 +36,8 @@ #include "radeon_compiler.h" #include "radeon_swizzle.h" +struct emulate_loop_state; + extern void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c); @@ -47,4 +49,6 @@ extern int r500_transform_IF( struct rc_instruction * inst, void* data); +void r500_transform_unroll_loops(struct radeon_compiler * c, + struct emulate_loop_state * s); #endif diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index fb2d8b5a9c..0bd8f0a239 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -45,6 +45,8 @@ #include "radeon_program_pair.h" +#define MAX_BRANCH_DEPTH_FULL 32 +#define MAX_BRANCH_DEPTH_PARTIAL 4 #define PROG_CODE \ struct r500_fragment_program_code *code = &c->code->code.r500 @@ -61,6 +63,10 @@ struct branch_info { int Endif; }; +struct loop_info { + int LoopStart; +}; + struct emit_state { struct radeon_compiler * C; struct r500_fragment_program_code * Code; @@ -69,7 +75,12 @@ struct emit_state { unsigned int CurrentBranchDepth; unsigned int BranchesReserved; + struct loop_info * Loops; + unsigned int CurrentLoopDepth; + unsigned int LoopsReserved; + unsigned int MaxBranchDepth; + }; static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) @@ -359,16 +370,49 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT; - if (inst->U.I.Opcode == RC_OPCODE_IF) { - if (s->CurrentBranchDepth >= 32) { + switch(inst->U.I.Opcode){ + struct branch_info * branch; + struct loop_info * loop; + case RC_OPCODE_BGNLOOP: + memory_pool_array_reserve(&s->C->Pool, struct loop_info, + s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1); + + loop = &s->Loops[s->CurrentLoopDepth++]; + + /* We don't emit an instruction for BGNLOOP, so we need to + * decrement the instruction counter, but first we need to + * set LoopStart to the current value of inst_end, which + * will end up being the first real instruction in the loop.*/ + loop->LoopStart = s->Code->inst_end--; + break; + + case RC_OPCODE_BRK: + /* Don't emit an instruction for BRK */ + s->Code->inst_end--; + break; + + case RC_OPCODE_CONTINUE: + loop = &s->Loops[s->CurrentLoopDepth - 1]; + s->Code->inst[newip].inst2 = R500_FC_OP_JUMP | + R500_FC_JUMP_FUNC(0xff); + s->Code->inst[newip].inst3 = R500_FC_JUMP_ADDR(loop->LoopStart); + break; + + case RC_OPCODE_ENDLOOP: + /* Don't emit an instruction for ENDLOOP */ + s->Code->inst_end--; + s->CurrentLoopDepth--; + break; + + case RC_OPCODE_IF: + if ( s->CurrentBranchDepth >= MAX_BRANCH_DEPTH_FULL) { rc_error(s->C, "Branch depth exceeds hardware limit"); return; } - memory_pool_array_reserve(&s->C->Pool, struct branch_info, s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1); - struct branch_info * branch = &s->Branches[s->CurrentBranchDepth++]; + branch = &s->Branches[s->CurrentBranchDepth++]; branch->If = newip; branch->Else = -1; branch->Endif = -1; @@ -377,29 +421,50 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst s->MaxBranchDepth = s->CurrentBranchDepth; /* actual instruction is filled in at ENDIF time */ - } else if (inst->U.I.Opcode == RC_OPCODE_ELSE) { + break; + + case RC_OPCODE_ELSE: if (!s->CurrentBranchDepth) { rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); return; } - struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1]; + branch = &s->Branches[s->CurrentBranchDepth - 1]; branch->Else = newip; /* actual instruction is filled in at ENDIF time */ - } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) { + break; + + case RC_OPCODE_ENDIF: if (!s->CurrentBranchDepth) { rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); return; } - struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1]; - branch->Endif = newip; - + branch = &s->Branches[s->CurrentBranchDepth - 1]; + + if(inst->Prev->U.I.Opcode == RC_OPCODE_BRK){ + branch->Endif = --s->Code->inst_end; + s->Code->inst[branch->Endif].inst2 |= + R500_FC_B_OP0_DECR; + } + else{ + branch->Endif = newip; + + s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ + | R500_FC_B_OP0_DECR /* decrement branch counter if stay */ + | R500_FC_B_OP1_NONE /* no branch counter if stay */ + | R500_FC_B_POP_CNT(1) + ; + s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + } s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP | R500_FC_A_OP_NONE /* no address stack */ | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */ | R500_FC_B_OP0_INCR /* increment branch counter if stay */ + | R500_FC_IGNORE_UNCOVERED ; if (branch->Else >= 0) { @@ -421,17 +486,10 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); } - s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP - | R500_FC_A_OP_NONE /* no address stack */ - | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ - | R500_FC_B_OP0_DECR /* decrement branch counter if stay */ - | R500_FC_B_OP1_NONE /* no branch counter if stay */ - | R500_FC_B_POP_CNT(1) - ; - s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); s->CurrentBranchDepth--; - } else { + break; + default: rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name); } } @@ -486,6 +544,10 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; } + /* Use FULL flow control mode if branches are nested deep enough. + * We don not need to enable FULL flow control mode for loops, becasue + * we aren't using the hardware loop instructions. + */ if (s.MaxBranchDepth >= 4) { if (code->max_temp_idx < 1) code->max_temp_idx = 1; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index 1979e7e4e4..d03689763b 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -235,8 +235,11 @@ struct rX00_fragment_program_code { }; -#define VSF_MAX_FRAGMENT_LENGTH (255*4) -#define VSF_MAX_FRAGMENT_TEMPS (14) +#define R300_VS_MAX_ALU 256 +#define R300_VS_MAX_ALU_DWORDS (R300_VS_MAX_ALU * 4) +#define R500_VS_MAX_ALU 1024 +#define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4) +#define R300_VS_MAX_TEMPS 32 #define VSF_MAX_INPUTS 32 #define VSF_MAX_OUTPUTS 32 @@ -244,8 +247,8 @@ struct rX00_fragment_program_code { struct r300_vertex_program_code { int length; union { - uint32_t d[VSF_MAX_FRAGMENT_LENGTH]; - float f[VSF_MAX_FRAGMENT_LENGTH]; + uint32_t d[R500_VS_MAX_ALU_DWORDS]; + float f[R500_VS_MAX_ALU_DWORDS]; } body; int pos_end; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c index e3c2c83c0c..fbb4235c22 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c @@ -202,32 +202,65 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f inst = inst->Prev) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - if (opcode->IsFlowControl) { - if (opcode->Opcode == RC_OPCODE_ENDIF) { - push_branch(&s); - } else { - if (s.BranchStackSize) { - struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1]; - - if (opcode->Opcode == RC_OPCODE_IF) { - or_updatemasks(&s.R, - &s.R, - branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif); - - s.BranchStackSize--; - } else if (opcode->Opcode == RC_OPCODE_ELSE) { - if (branch->HaveElse) { - rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__); - } else { - memcpy(&branch->StoreElse, &s.R, sizeof(s.R)); - memcpy(&s.R, &branch->StoreEndif, sizeof(s.R)); - branch->HaveElse = 1; - } + switch(opcode->Opcode){ + /* Mark all sources in the loop body as used before doing + * normal deadcode analysis. This is probably not optimal. + */ + case RC_OPCODE_ENDLOOP: + { + int endloops = 1; + struct rc_instruction *ptr; + for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){ + opcode = rc_get_opcode_info(ptr->U.I.Opcode); + if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ + endloops--; + continue; + } + if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){ + endloops++; + continue; + } + if(opcode->HasDstReg){ + int src = 0; + unsigned int srcmasks[3]; + rc_compute_sources_for_writemask(ptr, + ptr->U.I.DstReg.WriteMask, srcmasks); + for(src=0; src < opcode->NumSrcRegs; src++){ + mark_used(&s, + ptr->U.I.SrcReg[src].File, + ptr->U.I.SrcReg[src].Index, + srcmasks[src]); + } + } + } + break; + } + case RC_OPCODE_CONTINUE: + case RC_OPCODE_BRK: + case RC_OPCODE_BGNLOOP: + break; + case RC_OPCODE_ENDIF: + push_branch(&s); + break; + default: + if (opcode->IsFlowControl && s.BranchStackSize) { + struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1]; + if (opcode->Opcode == RC_OPCODE_IF) { + or_updatemasks(&s.R, + &s.R, + branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif); + + s.BranchStackSize--; + } else if (opcode->Opcode == RC_OPCODE_ELSE) { + if (branch->HaveElse) { + rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__); } else { - rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name); + memcpy(&branch->StoreElse, &s.R, sizeof(s.R)); + memcpy(&s.R, &branch->StoreEndif, sizeof(s.R)); + branch->HaveElse = 1; } } else { - rc_error(c, "%s: Unexpected control flow instruction\n", __FUNCTION__); + rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name); } } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c index 4c5d29f421..131e9e7436 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c @@ -38,22 +38,6 @@ #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) -struct emulate_loop_state { - struct radeon_compiler * C; - struct loop_info * Loops; - unsigned int LoopCount; - unsigned int LoopReserved; -}; - -struct loop_info { - struct rc_instruction * BeginLoop; - struct rc_instruction * Cond; - struct rc_instruction * If; - struct rc_instruction * Brk; - struct rc_instruction * EndIf; - struct rc_instruction * EndLoop; -}; - struct const_value { struct radeon_compiler * C; @@ -94,22 +78,13 @@ static int src_reg_is_immediate(struct rc_src_register * src, c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE; } -static unsigned int loop_count_instructions(struct loop_info * loop) +static unsigned int loop_calc_iterations(struct emulate_loop_state *s, + struct loop_info * loop, unsigned int max_instructions) { - unsigned int count = 0; - struct rc_instruction * inst = loop->BeginLoop->Next; - while(inst != loop->EndLoop){ - count++; - inst = inst->Next; - } - return count; -} - -static unsigned int loop_calc_iterations(struct loop_info * loop, - unsigned int loop_count, unsigned int max_instructions) -{ - unsigned int icount = loop_count_instructions(loop); - return max_instructions / (loop_count * icount); + unsigned int total_i = rc_recompute_ips(s->C); + unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1; + /* +1 because the program already has one iteration of the loop. */ + return 1 + ((max_instructions - total_i) / (s->LoopCount * loop_i)); } static void loop_unroll(struct emulate_loop_state * s, @@ -214,8 +189,7 @@ static void get_incr_amount(void * data, struct rc_instruction * inst, } static int transform_const_loop(struct emulate_loop_state * s, - struct loop_info * loop, - struct rc_instruction * cond) + struct loop_info * loop) { int end_loops = 1; int iterations; @@ -228,13 +202,13 @@ static int transform_const_loop(struct emulate_loop_state * s, /* Find the counter and the upper limit */ - if(src_reg_is_immediate(&cond->U.I.SrcReg[0], s->C)){ - limit = &cond->U.I.SrcReg[0]; - counter = &cond->U.I.SrcReg[1]; + if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], s->C)){ + limit = &loop->Cond->U.I.SrcReg[0]; + counter = &loop->Cond->U.I.SrcReg[1]; } - else if(src_reg_is_immediate(&cond->U.I.SrcReg[1], s->C)){ - limit = &cond->U.I.SrcReg[1]; - counter = &cond->U.I.SrcReg[0]; + else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], s->C)){ + limit = &loop->Cond->U.I.SrcReg[1]; + counter = &loop->Cond->U.I.SrcReg[0]; } else{ DBG("No constant limit.\n"); @@ -293,8 +267,22 @@ static int transform_const_loop(struct emulate_loop_state * s, * simple, since we only support increment and decrement loops. */ limit_value = get_constant_value(s->C, limit, 0); - iterations = (int) ((limit_value - counter_value.Value) / + DBG("Limit is %f.\n", limit_value); + switch(loop->Cond->U.I.Opcode){ + case RC_OPCODE_SGT: + case RC_OPCODE_SLT: + iterations = (int) ceilf((limit_value - counter_value.Value) / count_inst.Amount); + break; + + case RC_OPCODE_SLE: + case RC_OPCODE_SGE: + iterations = (int) floorf((limit_value - counter_value.Value) / + count_inst.Amount) + 1; + break; + default: + return 0; + } DBG("Loop will have %d iterations.\n", iterations); @@ -414,7 +402,7 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s, } /* Check if the number of loops is known at compile time. */ - if(transform_const_loop(s, loop, ptr)){ + if(transform_const_loop(s, loop)){ return loop->BeginLoop->Next; } @@ -425,9 +413,14 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s, return loop->EndLoop; } -static void rc_transform_loops(struct emulate_loop_state * s) +void rc_transform_unroll_loops(struct radeon_compiler *c, + struct emulate_loop_state * s) { - struct rc_instruction * ptr = s->C->Program.Instructions.Next; + struct rc_instruction * ptr; + + memset(s, 0, sizeof(struct emulate_loop_state)); + s->C = c; + ptr = s->C->Program.Instructions.Next; while(ptr != &s->C->Program.Instructions) { if(ptr->Type == RC_INSTRUCTION_NORMAL && ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ @@ -440,7 +433,7 @@ static void rc_transform_loops(struct emulate_loop_state * s) } } -static void rc_unroll_loops(struct emulate_loop_state *s, +void rc_emulate_loops(struct emulate_loop_state *s, unsigned int max_instructions) { int i; @@ -451,24 +444,8 @@ static void rc_unroll_loops(struct emulate_loop_state *s, if(!s->Loops[i].EndLoop){ continue; } - unsigned int iterations = loop_calc_iterations(&s->Loops[i], - s->LoopCount, max_instructions); + unsigned int iterations = loop_calc_iterations(s, &s->Loops[i], + max_instructions); loop_unroll(s, &s->Loops[i], iterations); } } - -void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions) -{ - struct emulate_loop_state s; - - memset(&s, 0, sizeof(struct emulate_loop_state)); - s.C = c; - - /* We may need to move these two operations to r3xx_(vert|frag)prog.c - * and run the optimization passes between them in order to increase - * the number of unrolls we can do for each loop. - */ - rc_transform_loops(&s); - - rc_unroll_loops(&s, max_instructions); -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h index ddcf1c0fab..7748813c4e 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h @@ -7,6 +7,26 @@ struct radeon_compiler; -void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions); +struct loop_info { + struct rc_instruction * BeginLoop; + struct rc_instruction * Cond; + struct rc_instruction * If; + struct rc_instruction * Brk; + struct rc_instruction * EndIf; + struct rc_instruction * EndLoop; +}; + +struct emulate_loop_state { + struct radeon_compiler * C; + struct loop_info * Loops; + unsigned int LoopCount; + unsigned int LoopReserved; +}; + +void rc_transform_unroll_loops(struct radeon_compiler *c, + struct emulate_loop_state * s); + +void rc_emulate_loops(struct emulate_loop_state *s, + unsigned int max_instructions); #endif /* RADEON_EMULATE_LOOPS_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index 1dc16855dc..04f234f11d 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -386,6 +386,12 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .NumSrcRegs = 0, }, { + .Opcode = RC_OPCODE_CONTINUE, + .Name = "CONTINUE", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { .Opcode = RC_OPCODE_REPL_ALPHA, .Name = "REPL_ALPHA", .HasDstReg = 1 @@ -393,6 +399,10 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { { .Opcode = RC_OPCODE_BEGIN_TEX, .Name = "BEGIN_TEX" + }, + { + .Opcode = RC_OPCODE_KILP, + .Name = "KILP", } }; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h index 91c82ac089..8b9fa07dde 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -187,6 +187,8 @@ typedef enum { RC_OPCODE_ENDLOOP, + RC_OPCODE_CONTINUE, + /** special instruction, used in R300-R500 fragment program pair instructions * indicates that the result of the alpha operation shall be replicated * across all other channels */ @@ -197,6 +199,9 @@ typedef enum { * can run simultaneously. */ RC_OPCODE_BEGIN_TEX, + /** Stop execution of the shader (GLSL discard) */ + RC_OPCODE_KILP, + MAX_RC_OPCODE } rc_opcode; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index 21d7210888..eca0651536 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -75,6 +75,15 @@ struct peephole_state { int BranchDepth; }; +/** + * This is a callback function that is meant to be passed to + * rc_for_all_reads_mask. This function will be called once for each source + * register in inst. + * @param inst The instruction that the source register belongs to. + * @param file The register file of the source register. + * @param index The index of the source register. + * @param mask The components of the source register that are being read from. + */ static void peephole_scan_read(void * data, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int mask) { @@ -153,6 +162,11 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo for(struct rc_instruction * inst = inst_mov->Next; inst != &c->Program.Instructions; inst = inst->Next) { + /* XXX In the future we might be able to make the optimizer + * smart enough to handle loops. */ + if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP){ + return; + } rc_for_all_reads_mask(inst, peephole_scan_read, &s); rc_for_all_writes_mask(inst, peephole_scan_write, &s); if (s.Conflict) @@ -161,7 +175,8 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo if (s.BranchDepth >= 0) { if (inst->U.I.Opcode == RC_OPCODE_IF) { s.BranchDepth++; - } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) { + } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF + || inst->U.I.Opcode == RC_OPCODE_ELSE) { s.BranchDepth--; if (s.BranchDepth < 0) { s.DefinedMask &= ~s.MovMask; @@ -208,7 +223,8 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo if (s.BranchDepth >= 0) { if (inst->U.I.Opcode == RC_OPCODE_IF) { s.BranchDepth++; - } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) { + } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF + || inst->U.I.Opcode == RC_OPCODE_ELSE) { s.BranchDepth--; if (s.BranchDepth < 0) break; /* no more readers after this point */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index a279549ff8..fc540496c4 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -141,12 +141,28 @@ static void add_inst_to_list(struct schedule_instruction ** list, struct schedul *list = inst; } +static void add_inst_to_list_end(struct schedule_instruction ** list, + struct schedule_instruction * inst) +{ + if(!*list){ + *list = inst; + }else{ + struct schedule_instruction * temp = *list; + while(temp->NextReady){ + temp = temp->NextReady; + } + temp->NextReady = inst; + } +} + static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst) { DBG("%i is now ready\n", sinst->Instruction->IP); + /* Adding Ready TEX instructions to the end of the "Ready List" helps + * us emit TEX instructions in blocks without losing our place. */ if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) - add_inst_to_list(&s->ReadyTEX, sinst); + add_inst_to_list_end(&s->ReadyTEX, sinst); else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP) add_inst_to_list(&s->ReadyRGB, sinst); else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP) @@ -163,11 +179,14 @@ static void decrease_dependencies(struct schedule_state * s, struct schedule_ins instruction_ready(s, sinst); } -static void commit_instruction(struct schedule_state * s, struct schedule_instruction * sinst) -{ - DBG("%i: commit\n", sinst->Instruction->IP); - - for(unsigned int i = 0; i < sinst->NumReadValues; ++i) { +/** + * This function decreases the dependencies of the next instruction that + * wants to write to each of sinst's read values. + */ +static void commit_update_reads(struct schedule_state * s, + struct schedule_instruction * sinst){ + unsigned int i; + for(i = 0; i < sinst->NumReadValues; ++i) { struct reg_value * v = sinst->ReadValues[i]; assert(v->NumReaders > 0); v->NumReaders--; @@ -176,8 +195,12 @@ static void commit_instruction(struct schedule_state * s, struct schedule_instru decrease_dependencies(s, v->Next->Writer); } } +} - for(unsigned int i = 0; i < sinst->NumWriteValues; ++i) { +static void commit_update_writes(struct schedule_state * s, + struct schedule_instruction * sinst){ + unsigned int i; + for(i = 0; i < sinst->NumWriteValues; ++i) { struct reg_value * v = sinst->WriteValues[i]; if (v->NumReaders) { for(struct reg_value_reader * r = v->Readers; r; r = r->Next) { @@ -196,6 +219,15 @@ static void commit_instruction(struct schedule_state * s, struct schedule_instru } } +static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst) +{ + DBG("%i: commit\n", sinst->Instruction->IP); + + commit_update_reads(s, sinst); + + commit_update_writes(s, sinst); +} + /** * Emit all ready texture instructions in a single block. * @@ -208,21 +240,37 @@ static void emit_all_tex(struct schedule_state * s, struct rc_instruction * befo assert(s->ReadyTEX); - /* Don't let the ready list change under us! */ - readytex = s->ReadyTEX; - s->ReadyTEX = 0; - /* Node marker for R300 */ struct rc_instruction * inst_begin = rc_insert_new_instruction(s->C, before->Prev); inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX; /* Link texture instructions back in */ + readytex = s->ReadyTEX; while(readytex) { - struct schedule_instruction * tex = readytex; + rc_insert_instruction(before->Prev, readytex->Instruction); + DBG("%i: commit TEX reads\n", readytex->Instruction->IP); + + /* All of the TEX instructions in the same TEX block have + * their source registers read from before any of the + * instructions in that block write to their destination + * registers. This means that when we commit a TEX + * instruction, any other TEX instruction that wants to write + * to one of the committed instruction's source register can be + * marked as ready and should be emitted in the same TEX + * block. This prevents the following sequence from being + * emitted in two different TEX blocks: + * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0]; + * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0]; + */ + commit_update_reads(s, readytex); + readytex = readytex->NextReady; + } + readytex = s->ReadyTEX; + s->ReadyTEX = 0; + while(readytex){ + DBG("%i: commit TEX writes\n", readytex->Instruction->IP); + commit_update_writes(s, readytex); readytex = readytex->NextReady; - - rc_insert_instruction(before->Prev, tex->Instruction); - commit_instruction(s, tex); } } @@ -328,7 +376,7 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor } rc_insert_instruction(before->Prev, sinst->Instruction); - commit_instruction(s, sinst); + commit_alu_instruction(s, sinst); } else { struct schedule_instruction **prgb; struct schedule_instruction **palpha; @@ -346,8 +394,8 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor *prgb = (*prgb)->NextReady; *palpha = (*palpha)->NextReady; rc_insert_instruction(before->Prev, psirgb->Instruction); - commit_instruction(s, psirgb); - commit_instruction(s, psialpha); + commit_alu_instruction(s, psirgb); + commit_alu_instruction(s, psialpha); goto success; } } @@ -357,7 +405,7 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor s->ReadyRGB = s->ReadyRGB->NextReady; rc_insert_instruction(before->Prev, sinst->Instruction); - commit_instruction(s, sinst); + commit_alu_instruction(s, sinst); success: ; } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index c922d3d9a4..3cc2897293 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -973,3 +973,32 @@ int radeonTransformDeriv(struct radeon_compiler* c, return 1; } + +/** + * IF Temp[0].x -\ + * KILP - > KIL -abs(Temp[0].x) + * ENDIF -/ + * + * This needs to be done in its own pass, because it modifies the instructions + * before and after KILP. + */ +void radeonTransformKILP(struct radeon_compiler * c) +{ + struct rc_instruction * inst; + for (inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + + if (inst->U.I.Opcode != RC_OPCODE_KILP + || inst->Prev->U.I.Opcode != RC_OPCODE_IF + || inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) { + continue; + } + inst->U.I.Opcode = RC_OPCODE_KIL; + inst->U.I.SrcReg[0] = negate(absolute(inst->Prev->U.I.SrcReg[0])); + + /* Remove IF */ + rc_remove_instruction(inst->Prev); + /* Remove ENDIF */ + rc_remove_instruction(inst->Next); + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h index 77d444476f..e6e2cc20c5 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h @@ -60,4 +60,6 @@ int radeonTransformDeriv( struct rc_instruction * inst, void*); +void radeonTransformKILP(struct radeon_compiler * c); + #endif /* __RADEON_PROGRAM_ALU_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c new file mode 100644 index 0000000000..31c9866883 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c @@ -0,0 +1,131 @@ +/* + * Copyright 2010 Tom Stellard <tstellar@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + */ + +#include "radeon_rename_regs.h" + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + +struct reg_rename { + int old_index; + int new_index; + int temp_index; +}; + +static void rename_reg(void * data, struct rc_instruction * inst, + rc_register_file * file, unsigned int * index) +{ + struct reg_rename *r = data; + + if(r->old_index == *index && *file == RC_FILE_TEMPORARY) { + *index = r->new_index; + } + else if(r->new_index == *index && *file == RC_FILE_TEMPORARY) { + *index = r->temp_index; + } +} + +static void rename_all( + struct radeon_compiler *c, + struct rc_instruction * start, + unsigned int old, + unsigned int new, + unsigned int temp) +{ + struct rc_instruction * inst; + struct reg_rename r; + r.old_index = old; + r.new_index = new; + r.temp_index = temp; + for(inst = start; inst != &c->Program.Instructions; + inst = inst->Next) { + rc_remap_registers(inst, rename_reg, &r); + } +} + +/** + * This function renames registers in an attempt to get the code close to + * SSA form. After this function has completed, most of the register are only + * written to one time, with a few exceptions. For example, this block of code + * will not be modified by this function: + * Mov Temp[0].x Const[0].x + * Mov Temp[0].y Const[0].y + * Basically, destination registers will be renamed if: + * 1. There have been no previous writes to that register + * or + * 2. If the instruction is writting to the exact components (no more, no less) + * of a register that has been written to by previous instructions. + * + * This function assumes all the instructions are still of type + * RC_INSTRUCTION_NORMAL. + */ +void rc_rename_regs(struct radeon_compiler * c) +{ + unsigned int cur_index = 0; + unsigned int icount; + struct rc_instruction * inst; + unsigned int * masks; + + /* The number of instructions in the program is also the maximum + * number of temp registers that could potentially be used. */ + icount = rc_recompute_ips(c); + masks = memory_pool_malloc(&c->Pool, icount * sizeof(unsigned int)); + memset(masks, 0, icount * sizeof(unsigned int)); + + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + const struct rc_opcode_info * info; + if(inst->Type != RC_INSTRUCTION_NORMAL) { + rc_error(c, "%s only works with normal instructions.", + __FUNCTION__); + return; + } + unsigned int old_index, temp_index; + struct rc_dst_register * dst = &inst->U.I.DstReg; + info = rc_get_opcode_info(inst->U.I.Opcode); + if(!info->HasDstReg || dst->File != RC_FILE_TEMPORARY) { + continue; + } + if(dst->Index >= icount || !masks[dst->Index] || + masks[dst->Index] == dst->WriteMask) { + old_index = dst->Index; + /* We need to set dst->Index here so get free temporary + * will work. */ + dst->Index = cur_index++; + temp_index = rc_find_free_temporary(c); + rename_all(c, inst->Next, old_index, + dst->Index, temp_index); + } + assert(dst->Index < icount); + masks[dst->Index] |= dst->WriteMask; + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.h b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.h new file mode 100644 index 0000000000..4323b995d8 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.h @@ -0,0 +1,9 @@ + +#ifndef RADEON_RENAME_REGS_H +#define RADEON_RENAME_REGS_H + +struct radeon_compiler; + +void rc_rename_regs(struct radeon_compiler * c); + +#endif /* RADEON_RENAME_REGS_H */ |