diff options
5 files changed, 112 insertions, 77 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index bbdfa0d56f..31f556a96a 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -97,6 +97,8 @@ static void debug_program_log(struct r300_fragment_program_compiler* c, const ch void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) { + struct emulate_loop_state loop_state; + rewrite_depth_out(c); debug_program_log(c, "before compilation"); @@ -104,14 +106,11 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) /* XXX Ideally this should be done only for r3xx, but since * we don't have branching support for r5xx, we use the emulation * on all chipsets. */ - - if (c->Base.is_r500) { - rc_emulate_loops(&c->Base, R500_PFS_MAX_INST); - } else { - rc_emulate_loops(&c->Base, R300_PFS_MAX_ALU_INST); - } - debug_program_log(c, "after emulate loops"); + rc_transform_unroll_loops(&c->Base, &loop_state); + + debug_program_log(c, "after transform loops"); + rc_emulate_branches(&c->Base); debug_program_log(c, "after emulate branches"); @@ -161,6 +160,15 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) debug_program_log(c, "after deadcode"); + if(c->Base.is_r500){ + rc_emulate_loops(&loop_state, R500_PFS_MAX_INST); + } + else{ + rc_emulate_loops(&loop_state, R300_PFS_MAX_ALU_INST); + } + + debug_program_log(c, "after emulate looops"); + rc_optimize(&c->Base); debug_program_log(c, "after dataflow optimize"); diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index e984797e2d..bd8d63246a 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -593,6 +593,8 @@ static struct rc_swizzle_caps r300_vertprog_swizzle_caps = { void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) { + struct emulate_loop_state loop_state; + compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; addArtificialOutputs(compiler); @@ -602,10 +604,14 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) /* XXX Ideally this should be done only for r3xx, but since * we don't have branching support for r5xx, we use the emulation * on all chipsets. */ + rc_transform_unroll_loops(&compiler->Base, &loop_state); + + debug_program_log(compiler, "after transform loops"); + if (compiler->Base.is_r500){ - rc_emulate_loops(&compiler->Base, R500_VS_MAX_ALU); + rc_emulate_loops(&loop_state, R500_VS_MAX_ALU); } else { - rc_emulate_loops(&compiler->Base, R300_VS_MAX_ALU); + rc_emulate_loops(&loop_state, R300_VS_MAX_ALU); } debug_program_log(compiler, "after emulate loops"); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c index e3c2c83c0c..f8bced2532 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c @@ -202,32 +202,61 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f inst = inst->Prev) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - if (opcode->IsFlowControl) { - if (opcode->Opcode == RC_OPCODE_ENDIF) { - push_branch(&s); - } else { - if (s.BranchStackSize) { - struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1]; - - if (opcode->Opcode == RC_OPCODE_IF) { - or_updatemasks(&s.R, - &s.R, - branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif); - - s.BranchStackSize--; - } else if (opcode->Opcode == RC_OPCODE_ELSE) { - if (branch->HaveElse) { - rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__); - } else { - memcpy(&branch->StoreElse, &s.R, sizeof(s.R)); - memcpy(&s.R, &branch->StoreEndif, sizeof(s.R)); - branch->HaveElse = 1; - } + switch(opcode->Opcode){ + /* Mark all sources in the loop body as used before doing + * normal deadcode analysis. This is probably not optimal. + */ + case RC_OPCODE_ENDLOOP: + { + int endloops = 1; + struct rc_instruction *ptr; + for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){ + opcode = rc_get_opcode_info(ptr->U.I.Opcode); + if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ + endloops--; + continue; + } + if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){ + endloops++; + continue; + } + if(opcode->HasDstReg){ + int src = 0; + unsigned int srcmasks[3]; + rc_compute_sources_for_writemask(ptr, + ptr->U.I.DstReg.WriteMask, srcmasks); + for(src=0; src < opcode->NumSrcRegs; src++){ + mark_used(&s, + ptr->U.I.SrcReg[src].File, + ptr->U.I.SrcReg[src].Index, + srcmasks[src]); + } + } + } + break; + } + case RC_OPCODE_ENDIF: + push_branch(&s); + break; + default: + if (opcode->IsFlowControl && s.BranchStackSize) { + struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1]; + if (opcode->Opcode == RC_OPCODE_IF) { + or_updatemasks(&s.R, + &s.R, + branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif); + + s.BranchStackSize--; + } else if (opcode->Opcode == RC_OPCODE_ELSE) { + if (branch->HaveElse) { + rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__); } else { - rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name); + memcpy(&branch->StoreElse, &s.R, sizeof(s.R)); + memcpy(&s.R, &branch->StoreEndif, sizeof(s.R)); + branch->HaveElse = 1; } } else { - rc_error(c, "%s: Unexpected control flow instruction\n", __FUNCTION__); + rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name); } } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c index 4c5d29f421..1aaaa6cccd 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c @@ -38,22 +38,6 @@ #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) -struct emulate_loop_state { - struct radeon_compiler * C; - struct loop_info * Loops; - unsigned int LoopCount; - unsigned int LoopReserved; -}; - -struct loop_info { - struct rc_instruction * BeginLoop; - struct rc_instruction * Cond; - struct rc_instruction * If; - struct rc_instruction * Brk; - struct rc_instruction * EndIf; - struct rc_instruction * EndLoop; -}; - struct const_value { struct radeon_compiler * C; @@ -214,8 +198,7 @@ static void get_incr_amount(void * data, struct rc_instruction * inst, } static int transform_const_loop(struct emulate_loop_state * s, - struct loop_info * loop, - struct rc_instruction * cond) + struct loop_info * loop) { int end_loops = 1; int iterations; @@ -228,13 +211,13 @@ static int transform_const_loop(struct emulate_loop_state * s, /* Find the counter and the upper limit */ - if(src_reg_is_immediate(&cond->U.I.SrcReg[0], s->C)){ - limit = &cond->U.I.SrcReg[0]; - counter = &cond->U.I.SrcReg[1]; + if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], s->C)){ + limit = &loop->Cond->U.I.SrcReg[0]; + counter = &loop->Cond->U.I.SrcReg[1]; } - else if(src_reg_is_immediate(&cond->U.I.SrcReg[1], s->C)){ - limit = &cond->U.I.SrcReg[1]; - counter = &cond->U.I.SrcReg[0]; + else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], s->C)){ + limit = &loop->Cond->U.I.SrcReg[1]; + counter = &loop->Cond->U.I.SrcReg[0]; } else{ DBG("No constant limit.\n"); @@ -414,7 +397,7 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s, } /* Check if the number of loops is known at compile time. */ - if(transform_const_loop(s, loop, ptr)){ + if(transform_const_loop(s, loop)){ return loop->BeginLoop->Next; } @@ -425,9 +408,14 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s, return loop->EndLoop; } -static void rc_transform_loops(struct emulate_loop_state * s) +void rc_transform_unroll_loops(struct radeon_compiler *c, + struct emulate_loop_state * s) { - struct rc_instruction * ptr = s->C->Program.Instructions.Next; + struct rc_instruction * ptr; + + memset(s, 0, sizeof(struct emulate_loop_state)); + s->C = c; + ptr = s->C->Program.Instructions.Next; while(ptr != &s->C->Program.Instructions) { if(ptr->Type == RC_INSTRUCTION_NORMAL && ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ @@ -440,7 +428,7 @@ static void rc_transform_loops(struct emulate_loop_state * s) } } -static void rc_unroll_loops(struct emulate_loop_state *s, +void rc_emulate_loops(struct emulate_loop_state *s, unsigned int max_instructions) { int i; @@ -456,19 +444,3 @@ static void rc_unroll_loops(struct emulate_loop_state *s, loop_unroll(s, &s->Loops[i], iterations); } } - -void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions) -{ - struct emulate_loop_state s; - - memset(&s, 0, sizeof(struct emulate_loop_state)); - s.C = c; - - /* We may need to move these two operations to r3xx_(vert|frag)prog.c - * and run the optimization passes between them in order to increase - * the number of unrolls we can do for each loop. - */ - rc_transform_loops(&s); - - rc_unroll_loops(&s, max_instructions); -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h index ddcf1c0fab..7748813c4e 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h @@ -7,6 +7,26 @@ struct radeon_compiler; -void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions); +struct loop_info { + struct rc_instruction * BeginLoop; + struct rc_instruction * Cond; + struct rc_instruction * If; + struct rc_instruction * Brk; + struct rc_instruction * EndIf; + struct rc_instruction * EndLoop; +}; + +struct emulate_loop_state { + struct radeon_compiler * C; + struct loop_info * Loops; + unsigned int LoopCount; + unsigned int LoopReserved; +}; + +void rc_transform_unroll_loops(struct radeon_compiler *c, + struct emulate_loop_state * s); + +void rc_emulate_loops(struct emulate_loop_state *s, + unsigned int max_instructions); #endif /* RADEON_EMULATE_LOOPS_H */ |