diff options
Diffstat (limited to 'src/mesa')
6 files changed, 230 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile index 34d22b4559..ff3801dc67 100644 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -9,6 +9,7 @@ C_SOURCES = \ radeon_code.c \ radeon_compiler.c \ radeon_emulate_branches.c \ + radeon_emulate_loops.c \ radeon_program.c \ radeon_program_print.c \ radeon_opcodes.c \ diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 7f3b88ed75..38312658d6 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -26,6 +26,7 @@ #include "radeon_dataflow.h" #include "radeon_emulate_branches.h" +#include "radeon_emulate_loops.h" #include "radeon_program_alu.h" #include "radeon_program_tex.h" #include "r300_fragprog.h" @@ -103,6 +104,15 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) /* XXX Ideally this should be done only for r3xx, but since * we don't have branching support for r5xx, we use the emulation * on all chipsets. */ + + if(c->Base.is_r500){ + rc_emulate_loops(&c->Base, R500_PFS_MAX_INST); + } + else{ + rc_emulate_loops(&c->Base, R300_PFS_MAX_ALU_INST); + } + debug_program_log(c, "after emulate loops"); + rc_emulate_branches(&c->Base); debug_program_log(c, "after emulate branches"); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c new file mode 100644 index 0000000000..b05ba08e13 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c @@ -0,0 +1,183 @@ +/* + * Copyright 2010 Tom Stellard <tstellar@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + */ + +#include "radeon_emulate_loops.h" + +#include "radeon_compiler.h" + +struct emulate_loop_state { + struct radeon_compiler * C; + struct loop_info * Loops; + unsigned int LoopCount; + unsigned int LoopReserved; +}; + +struct loop_info { + struct rc_instruction * BeginLoop; + struct rc_instruction * EndLoop; +}; + +static unsigned int loop_count_instructions(struct loop_info * loop) +{ + unsigned int count = 0; + struct rc_instruction * inst = loop->BeginLoop->Next; + while(inst != loop->EndLoop){ + count++; + inst = inst->Next; + } + return count; +} + +static unsigned int loop_calc_iterations(struct loop_info * loop, + unsigned int loop_count, unsigned int max_instructions) +{ + unsigned int icount = loop_count_instructions(loop); + return max_instructions / (loop_count * icount); +} + +static void loop_unroll(struct emulate_loop_state * s, + struct loop_info *loop, unsigned int iterations) +{ + unsigned int i; + struct rc_instruction * ptr; + struct rc_instruction * first = loop->BeginLoop->Next; + struct rc_instruction * last = loop->EndLoop->Prev; + struct rc_instruction * append_to = last; + rc_remove_instruction(loop->BeginLoop); + rc_remove_instruction(loop->EndLoop); + for( i = 1; i < iterations; i++){ + for(ptr = first; ptr != last->Next; ptr = ptr->Next){ + struct rc_instruction *new = rc_alloc_instruction(s->C); + memcpy(new, ptr, sizeof(struct rc_instruction)); + rc_insert_instruction(append_to, new); + append_to = new; + } + } +} + +/** + * This function prepares a loop to be unrolled by converting it into an if + * statement. Here is an outline of the conversion process: + * BGNLOOP; -> BGNLOOP; + * SGE temp[0], temp[1], temp[2]; -> SLT temp[0], temp[1], temp[2]; + * IF temp[0]; -> IF temp[0]; + * BRK; -> + * ENDIF; -> <Loop Body> + * <Loop Body> -> ENDIF; + * ENDLOOP; -> ENDLOOP + * + * @param inst Pointer to a BGNLOOP instruction. + */ +static struct rc_instruction * transform_loop(struct emulate_loop_state * s, + struct rc_instruction * inst) +{ + struct loop_info *loop; + struct rc_instruction * ptr; + + memory_pool_array_reserve(&s->C->Pool, struct loop_info, + s->Loops, s->LoopCount, s->LoopReserved, 1); + + loop = &s->Loops[s->LoopCount++]; + memset(loop, 0, sizeof(struct loop_info)); + + loop->BeginLoop = inst; + /* Reverse the SGE instruction */ + ptr = inst->Next; + ptr->U.I.Opcode = RC_OPCODE_SLT; + while(!loop->EndLoop){ + struct rc_instruction * endif; + if(ptr->Type == RC_INSTRUCTION_NORMAL){ + } + switch(ptr->U.I.Opcode){ + case RC_OPCODE_BGNLOOP: + /* Nested loop */ + ptr = transform_loop(s, ptr); + break; + case RC_OPCODE_BRK: + /* The BRK instruction should always be followed by + * an ENDIF. This ENDIF will eventually replace the + * ENDLOOP insruction. */ + endif = ptr->Next; + rc_remove_instruction(ptr); + rc_remove_instruction(endif); + break; + case RC_OPCODE_ENDLOOP: + /* Insert the ENDIF before ENDLOOP. */ + rc_insert_instruction(ptr->Prev, endif); + loop->EndLoop = ptr; + break; + } + ptr = ptr->Next; + } + return ptr; +} + +static void rc_transform_loops(struct emulate_loop_state * s) +{ + struct rc_instruction * ptr = s->C->Program.Instructions.Next; + while(ptr != &s->C->Program.Instructions) { + if(ptr->Type == RC_INSTRUCTION_NORMAL && + ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ + ptr = transform_loop(s, ptr); + } + ptr = ptr->Next; + } +} + +static void rc_unroll_loops(struct emulate_loop_state *s, + unsigned int max_instructions) +{ + int i; + /* Iterate backwards of the list of loops so that loops that nested + * loops are unrolled first. + */ + for( i = s->LoopCount - 1; i >= 0; i-- ){ + unsigned int iterations = loop_calc_iterations(&s->Loops[i], + s->LoopCount, max_instructions); + loop_unroll(s, &s->Loops[i], iterations); + } +} + +void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions) +{ + struct emulate_loop_state s; + + memset(&s, 0, sizeof(struct emulate_loop_state)); + s.C = c; + + /* We may need to move these two operations to r3xx_(vert|frag)prog.c + * and run the optimization passes between them in order to increase + * the number of unrolls we can do for each loop. + */ + rc_transform_loops(&s); + + rc_unroll_loops(&s, max_instructions); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h new file mode 100644 index 0000000000..ddcf1c0fab --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h @@ -0,0 +1,12 @@ + + +#ifndef RADEON_EMULATE_LOOPS_H +#define RADEON_EMULATE_LOOPS_H + +#define MAX_ITERATIONS 8 + +struct radeon_compiler; + +void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions); + +#endif /* RADEON_EMULATE_LOOPS_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index d593b3e81a..1dc16855dc 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -368,6 +368,24 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .NumSrcRegs = 0 }, { + .Opcode = RC_OPCODE_BGNLOOP, + .Name = "BGNLOOP", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_BRK, + .Name = "BRK", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_ENDLOOP, + .Name = "ENDLOOP", + .IsFlowControl = 1, + .NumSrcRegs = 0, + }, + { .Opcode = RC_OPCODE_REPL_ALPHA, .Name = "REPL_ALPHA", .HasDstReg = 1 diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h index 87a2e23084..91c82ac089 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -180,6 +180,12 @@ typedef enum { /** branch instruction: has no effect */ RC_OPCODE_ENDIF, + + RC_OPCODE_BGNLOOP, + + RC_OPCODE_BRK, + + RC_OPCODE_ENDLOOP, /** special instruction, used in R300-R500 fragment program pair instructions * indicates that the result of the alpha operation shall be replicated |