summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c22
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c10
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c75
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c60
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h22
5 files changed, 112 insertions, 77 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index bbdfa0d56f..31f556a96a 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -97,6 +97,8 @@ static void debug_program_log(struct r300_fragment_program_compiler* c, const ch
void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{
+ struct emulate_loop_state loop_state;
+
rewrite_depth_out(c);
debug_program_log(c, "before compilation");
@@ -104,14 +106,11 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
/* XXX Ideally this should be done only for r3xx, but since
* we don't have branching support for r5xx, we use the emulation
* on all chipsets. */
-
- if (c->Base.is_r500) {
- rc_emulate_loops(&c->Base, R500_PFS_MAX_INST);
- } else {
- rc_emulate_loops(&c->Base, R300_PFS_MAX_ALU_INST);
- }
- debug_program_log(c, "after emulate loops");
+ rc_transform_unroll_loops(&c->Base, &loop_state);
+
+ debug_program_log(c, "after transform loops");
+
rc_emulate_branches(&c->Base);
debug_program_log(c, "after emulate branches");
@@ -161,6 +160,15 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
debug_program_log(c, "after deadcode");
+ if(c->Base.is_r500){
+ rc_emulate_loops(&loop_state, R500_PFS_MAX_INST);
+ }
+ else{
+ rc_emulate_loops(&loop_state, R300_PFS_MAX_ALU_INST);
+ }
+
+ debug_program_log(c, "after emulate looops");
+
rc_optimize(&c->Base);
debug_program_log(c, "after dataflow optimize");
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index e984797e2d..bd8d63246a 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -593,6 +593,8 @@ static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
{
+ struct emulate_loop_state loop_state;
+
compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
addArtificialOutputs(compiler);
@@ -602,10 +604,14 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
/* XXX Ideally this should be done only for r3xx, but since
* we don't have branching support for r5xx, we use the emulation
* on all chipsets. */
+ rc_transform_unroll_loops(&compiler->Base, &loop_state);
+
+ debug_program_log(compiler, "after transform loops");
+
if (compiler->Base.is_r500){
- rc_emulate_loops(&compiler->Base, R500_VS_MAX_ALU);
+ rc_emulate_loops(&loop_state, R500_VS_MAX_ALU);
} else {
- rc_emulate_loops(&compiler->Base, R300_VS_MAX_ALU);
+ rc_emulate_loops(&loop_state, R300_VS_MAX_ALU);
}
debug_program_log(compiler, "after emulate loops");
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
index e3c2c83c0c..f8bced2532 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
@@ -202,32 +202,61 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f
inst = inst->Prev) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- if (opcode->IsFlowControl) {
- if (opcode->Opcode == RC_OPCODE_ENDIF) {
- push_branch(&s);
- } else {
- if (s.BranchStackSize) {
- struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
-
- if (opcode->Opcode == RC_OPCODE_IF) {
- or_updatemasks(&s.R,
- &s.R,
- branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
-
- s.BranchStackSize--;
- } else if (opcode->Opcode == RC_OPCODE_ELSE) {
- if (branch->HaveElse) {
- rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__);
- } else {
- memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
- memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
- branch->HaveElse = 1;
- }
+ switch(opcode->Opcode){
+ /* Mark all sources in the loop body as used before doing
+ * normal deadcode analysis. This is probably not optimal.
+ */
+ case RC_OPCODE_ENDLOOP:
+ {
+ int endloops = 1;
+ struct rc_instruction *ptr;
+ for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){
+ opcode = rc_get_opcode_info(ptr->U.I.Opcode);
+ if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
+ endloops--;
+ continue;
+ }
+ if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){
+ endloops++;
+ continue;
+ }
+ if(opcode->HasDstReg){
+ int src = 0;
+ unsigned int srcmasks[3];
+ rc_compute_sources_for_writemask(ptr,
+ ptr->U.I.DstReg.WriteMask, srcmasks);
+ for(src=0; src < opcode->NumSrcRegs; src++){
+ mark_used(&s,
+ ptr->U.I.SrcReg[src].File,
+ ptr->U.I.SrcReg[src].Index,
+ srcmasks[src]);
+ }
+ }
+ }
+ break;
+ }
+ case RC_OPCODE_ENDIF:
+ push_branch(&s);
+ break;
+ default:
+ if (opcode->IsFlowControl && s.BranchStackSize) {
+ struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
+ if (opcode->Opcode == RC_OPCODE_IF) {
+ or_updatemasks(&s.R,
+ &s.R,
+ branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
+
+ s.BranchStackSize--;
+ } else if (opcode->Opcode == RC_OPCODE_ELSE) {
+ if (branch->HaveElse) {
+ rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__);
} else {
- rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
+ memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
+ memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
+ branch->HaveElse = 1;
}
} else {
- rc_error(c, "%s: Unexpected control flow instruction\n", __FUNCTION__);
+ rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
}
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
index 4c5d29f421..1aaaa6cccd 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
@@ -38,22 +38,6 @@
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
-struct emulate_loop_state {
- struct radeon_compiler * C;
- struct loop_info * Loops;
- unsigned int LoopCount;
- unsigned int LoopReserved;
-};
-
-struct loop_info {
- struct rc_instruction * BeginLoop;
- struct rc_instruction * Cond;
- struct rc_instruction * If;
- struct rc_instruction * Brk;
- struct rc_instruction * EndIf;
- struct rc_instruction * EndLoop;
-};
-
struct const_value {
struct radeon_compiler * C;
@@ -214,8 +198,7 @@ static void get_incr_amount(void * data, struct rc_instruction * inst,
}
static int transform_const_loop(struct emulate_loop_state * s,
- struct loop_info * loop,
- struct rc_instruction * cond)
+ struct loop_info * loop)
{
int end_loops = 1;
int iterations;
@@ -228,13 +211,13 @@ static int transform_const_loop(struct emulate_loop_state * s,
/* Find the counter and the upper limit */
- if(src_reg_is_immediate(&cond->U.I.SrcReg[0], s->C)){
- limit = &cond->U.I.SrcReg[0];
- counter = &cond->U.I.SrcReg[1];
+ if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], s->C)){
+ limit = &loop->Cond->U.I.SrcReg[0];
+ counter = &loop->Cond->U.I.SrcReg[1];
}
- else if(src_reg_is_immediate(&cond->U.I.SrcReg[1], s->C)){
- limit = &cond->U.I.SrcReg[1];
- counter = &cond->U.I.SrcReg[0];
+ else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], s->C)){
+ limit = &loop->Cond->U.I.SrcReg[1];
+ counter = &loop->Cond->U.I.SrcReg[0];
}
else{
DBG("No constant limit.\n");
@@ -414,7 +397,7 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
}
/* Check if the number of loops is known at compile time. */
- if(transform_const_loop(s, loop, ptr)){
+ if(transform_const_loop(s, loop)){
return loop->BeginLoop->Next;
}
@@ -425,9 +408,14 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
return loop->EndLoop;
}
-static void rc_transform_loops(struct emulate_loop_state * s)
+void rc_transform_unroll_loops(struct radeon_compiler *c,
+ struct emulate_loop_state * s)
{
- struct rc_instruction * ptr = s->C->Program.Instructions.Next;
+ struct rc_instruction * ptr;
+
+ memset(s, 0, sizeof(struct emulate_loop_state));
+ s->C = c;
+ ptr = s->C->Program.Instructions.Next;
while(ptr != &s->C->Program.Instructions) {
if(ptr->Type == RC_INSTRUCTION_NORMAL &&
ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
@@ -440,7 +428,7 @@ static void rc_transform_loops(struct emulate_loop_state * s)
}
}
-static void rc_unroll_loops(struct emulate_loop_state *s,
+void rc_emulate_loops(struct emulate_loop_state *s,
unsigned int max_instructions)
{
int i;
@@ -456,19 +444,3 @@ static void rc_unroll_loops(struct emulate_loop_state *s,
loop_unroll(s, &s->Loops[i], iterations);
}
}
-
-void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions)
-{
- struct emulate_loop_state s;
-
- memset(&s, 0, sizeof(struct emulate_loop_state));
- s.C = c;
-
- /* We may need to move these two operations to r3xx_(vert|frag)prog.c
- * and run the optimization passes between them in order to increase
- * the number of unrolls we can do for each loop.
- */
- rc_transform_loops(&s);
-
- rc_unroll_loops(&s, max_instructions);
-}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
index ddcf1c0fab..7748813c4e 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
@@ -7,6 +7,26 @@
struct radeon_compiler;
-void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions);
+struct loop_info {
+ struct rc_instruction * BeginLoop;
+ struct rc_instruction * Cond;
+ struct rc_instruction * If;
+ struct rc_instruction * Brk;
+ struct rc_instruction * EndIf;
+ struct rc_instruction * EndLoop;
+};
+
+struct emulate_loop_state {
+ struct radeon_compiler * C;
+ struct loop_info * Loops;
+ unsigned int LoopCount;
+ unsigned int LoopReserved;
+};
+
+void rc_transform_unroll_loops(struct radeon_compiler *c,
+ struct emulate_loop_state * s);
+
+void rc_emulate_loops(struct emulate_loop_state *s,
+ unsigned int max_instructions);
#endif /* RADEON_EMULATE_LOOPS_H */