summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r300/compiler
diff options
context:
space:
mode:
authorChristoph Bumiller <e0425955@student.tuwien.ac.at>2010-08-18 14:37:47 +0200
committerChristoph Bumiller <e0425955@student.tuwien.ac.at>2010-08-18 14:37:47 +0200
commit3e54d63429fe7ca5db3c75c181abbaf7a7f55724 (patch)
treee129c36aaef712525f0a04fc5b06c445e3cf84df /src/mesa/drivers/dri/r300/compiler
parenteaab76457818fad0926b84c663440e8987e1f19f (diff)
parent85d9bc236d6a8ff8f12cbc2150f8c3740354f573 (diff)
Merge remote branch 'origin/master' into nv50-compiler
Diffstat (limited to 'src/mesa/drivers/dri/r300/compiler')
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c8
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c268
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c26
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.c26
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.h2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c128
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.h22
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.c43
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.h5
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c39
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c260
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h10
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_optimize.c3
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c38
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c23
17 files changed, 699 insertions, 208 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index a326ee4c4f..d2fa816894 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -109,13 +109,13 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
debug_program_log(c, "before compilation");
if (c->Base.is_r500){
- r500_transform_unroll_loops(&c->Base, &loop_state);
- debug_program_log(c, "after r500 transform loops");
+ rc_unroll_loops(&c->Base, R500_PFS_MAX_INST);
+ debug_program_log(c, "after unroll loops");
}
else{
- rc_transform_unroll_loops(&c->Base, &loop_state);
+ rc_transform_loops(&c->Base, &loop_state, -1);
debug_program_log(c, "after transform loops");
-
+
rc_emulate_branches(&c->Base);
debug_program_log(c, "after emulate branches");
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index d347b4df9c..666c9c2a7a 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -32,6 +32,11 @@
#include "radeon_emulate_branches.h"
#include "radeon_emulate_loops.h"
+struct loop {
+ int BgnLoop;
+
+};
+
/*
* Take an already-setup and valid source then swizzle it appropriately to
* obtain a constant ZERO or ONE source.
@@ -332,11 +337,140 @@ static void ei_pow(struct r300_vertex_program_code *vp,
inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
}
+static void mark_write(void * userdata, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ unsigned int * writemasks = userdata;
+
+ if (file != RC_FILE_TEMPORARY)
+ return;
+
+ if (index >= R300_VS_MAX_TEMPS)
+ return;
+
+ writemasks[index] |= mask;
+}
+
+static unsigned long t_pred_src(struct r300_vertex_program_compiler * compiler)
+{
+ return PVS_SRC_OPERAND(compiler->PredicateIndex,
+ t_swizzle(RC_SWIZZLE_ZERO),
+ t_swizzle(RC_SWIZZLE_ZERO),
+ t_swizzle(RC_SWIZZLE_ZERO),
+ t_swizzle(RC_SWIZZLE_W),
+ t_src_class(RC_FILE_TEMPORARY),
+ 0);
+}
+
+static unsigned long t_pred_dst(struct r300_vertex_program_compiler * compiler,
+ unsigned int hw_opcode, int is_math)
+{
+ return PVS_OP_DST_OPERAND(hw_opcode,
+ is_math,
+ 0,
+ compiler->PredicateIndex,
+ RC_MASK_W,
+ t_dst_class(RC_FILE_TEMPORARY));
+
+}
+
+static void ei_if(struct r300_vertex_program_compiler * compiler,
+ struct rc_instruction *rci,
+ unsigned int * inst,
+ unsigned int branch_depth)
+{
+ unsigned int predicate_opcode;
+ int is_math = 0;
+
+ if (!compiler->Base.is_r500) {
+ rc_error(&compiler->Base,"Opcode IF not supported\n");
+ return;
+ }
+
+ /* Reserve a temporary to use as our predicate stack counter, if we
+ * don't already have one. */
+ if (!compiler->PredicateMask) {
+ unsigned int writemasks[R300_VS_MAX_TEMPS];
+ memset(writemasks, 0, sizeof(writemasks));
+ struct rc_instruction * inst;
+ unsigned int i;
+ for(inst = compiler->Base.Program.Instructions.Next;
+ inst != &compiler->Base.Program.Instructions;
+ inst = inst->Next) {
+ rc_for_all_writes_mask(inst, mark_write, writemasks);
+ }
+ for(i = 0; i < R300_VS_MAX_TEMPS; i++) {
+ unsigned int mask = ~writemasks[i] & RC_MASK_XYZW;
+ /* Only the W component can be used fo the predicate
+ * stack counter. */
+ if (mask & RC_MASK_W) {
+ compiler->PredicateMask = RC_MASK_W;
+ compiler->PredicateIndex = i;
+ break;
+ }
+ }
+ if (i == R300_VS_MAX_TEMPS) {
+ rc_error(&compiler->Base, "No free temporary to use for"
+ " predicate stack counter.\n");
+ return;
+ }
+ }
+ predicate_opcode =
+ branch_depth ? VE_PRED_SET_NEQ_PUSH : ME_PRED_SET_NEQ;
+
+ rci->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(rci->U.I.SrcReg[0].Swizzle,0));
+ if (branch_depth == 0) {
+ is_math = 1;
+ predicate_opcode = ME_PRED_SET_NEQ;
+ inst[1] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
+ inst[2] = 0;
+ } else {
+ predicate_opcode = VE_PRED_SET_NEQ_PUSH;
+ inst[1] = t_pred_src(compiler);
+ inst[2] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
+ }
+
+ inst[0] = t_pred_dst(compiler, predicate_opcode, is_math);
+ inst[3] = 0;
+
+}
+
+static void ei_else(struct r300_vertex_program_compiler * compiler,
+ unsigned int * inst)
+{
+ if (!compiler->Base.is_r500) {
+ rc_error(&compiler->Base,"Opcode ELSE not supported\n");
+ return;
+ }
+ inst[0] = t_pred_dst(compiler, ME_PRED_SET_INV, 1);
+ inst[1] = t_pred_src(compiler);
+ inst[2] = 0;
+ inst[3] = 0;
+}
+
+static void ei_endif(struct r300_vertex_program_compiler *compiler,
+ unsigned int * inst)
+{
+ if (!compiler->Base.is_r500) {
+ rc_error(&compiler->Base,"Opcode ENDIF not supported\n");
+ return;
+ }
+ inst[0] = t_pred_dst(compiler, ME_PRED_SET_POP, 1);
+ inst[1] = t_pred_src(compiler);
+ inst[2] = 0;
+ inst[3] = 0;
+}
static void translate_vertex_program(struct r300_vertex_program_compiler * compiler)
{
struct rc_instruction *rci;
+ struct loop * loops;
+ int current_loop_depth = 0;
+ int loops_reserved = 0;
+
+ unsigned int branch_depth = 0;
+
compiler->code->pos_end = 0; /* Not supported yet */
compiler->code->length = 0;
@@ -366,9 +500,12 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break;
case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
+ case RC_OPCODE_ELSE: ei_else(compiler, inst); break;
+ case RC_OPCODE_ENDIF: ei_endif(compiler, inst); branch_depth--; break;
case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
+ case RC_OPCODE_IF: ei_if(compiler, rci, inst, branch_depth); branch_depth++; break;
case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
@@ -385,11 +522,86 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break;
case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
+ case RC_OPCODE_BGNLOOP:
+ {
+ struct loop * l;
+
+ if ((!compiler->Base.is_r500
+ && loops_reserved >= R300_VS_MAX_LOOP_DEPTH)
+ || loops_reserved >= R500_VS_MAX_FC_DEPTH) {
+ rc_error(&compiler->Base,
+ "Loops are nested too deep.");
+ return;
+ }
+ memory_pool_array_reserve(&compiler->Base.Pool,
+ struct loop, loops, current_loop_depth,
+ loops_reserved, 1);
+ l = &loops[current_loop_depth++];
+ memset(l , 0, sizeof(struct loop));
+ l->BgnLoop = (compiler->code->length / 4);
+ continue;
+ }
+ case RC_OPCODE_ENDLOOP:
+ {
+ struct loop * l = &loops[current_loop_depth - 1];
+ unsigned int act_addr = l->BgnLoop - 1;
+ unsigned int last_addr = (compiler->code->length / 4) - 1;
+ unsigned int ret_addr = l->BgnLoop;
+
+ if (loops_reserved >= R300_VS_MAX_FC_OPS) {
+ rc_error(&compiler->Base,
+ "Too many flow control instructions.");
+ return;
+ }
+ if (compiler->Base.is_r500) {
+ compiler->code->fc_op_addrs.r500
+ [compiler->code->num_fc_ops].lw =
+ R500_PVS_FC_ACT_ADRS(act_addr)
+ | R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff)
+ ;
+ compiler->code->fc_op_addrs.r500
+ [compiler->code->num_fc_ops].uw =
+ R500_PVS_FC_LAST_INST(last_addr)
+ | R500_PVS_FC_RTN_INST(ret_addr)
+ ;
+ } else {
+ compiler->code->fc_op_addrs.r300
+ [compiler->code->num_fc_ops] =
+ R300_PVS_FC_ACT_ADRS(act_addr)
+ | R300_PVS_FC_LOOP_CNT_JMP_INST(0xff)
+ | R300_PVS_FC_LAST_INST(last_addr)
+ | R300_PVS_FC_RTN_INST(ret_addr)
+ ;
+ }
+ compiler->code->fc_loop_index[compiler->code->num_fc_ops] =
+ R300_PVS_FC_LOOP_INIT_VAL(0x0)
+ | R300_PVS_FC_LOOP_STEP_VAL(0x1)
+ ;
+ compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP(
+ compiler->code->num_fc_ops);
+ compiler->code->num_fc_ops++;
+ current_loop_depth--;
+ continue;
+ }
+
default:
rc_error(&compiler->Base, "Unknown opcode %s\n", rc_get_opcode_info(vpi->Opcode)->Name);
return;
}
+ /* Non-flow control instructions that are inside an if statement
+ * need to pay attention to the predicate bit. */
+ if (branch_depth
+ && vpi->Opcode != RC_OPCODE_IF
+ && vpi->Opcode != RC_OPCODE_ELSE
+ && vpi->Opcode != RC_OPCODE_ENDIF) {
+
+ inst[0] |= (PVS_DST_PRED_ENABLE_MASK
+ << PVS_DST_PRED_ENABLE_SHIFT);
+ inst[0] |= (PVS_DST_PRED_SENSE_MASK
+ << PVS_DST_PRED_SENSE_SHIFT);
+ }
+
compiler->code->length += 4;
if (compiler->Base.Error)
@@ -406,6 +618,7 @@ struct temporary_allocation {
static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
{
struct rc_instruction *inst;
+ struct rc_instruction *end_loop = NULL;
unsigned int num_orig_temps = 0;
char hwtemps[R300_VS_MAX_TEMPS];
struct temporary_allocation * ta;
@@ -440,10 +653,35 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
/* Pass 2: Determine original temporary lifetimes */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ /* Instructions inside of loops need to use the ENDLOOP
+ * instruction as their LastRead. */
+ if (!end_loop && inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ int endloops = 1;
+ struct rc_instruction * ptr;
+ for(ptr = inst->Next;
+ ptr != &compiler->Base.Program.Instructions;
+ ptr = ptr->Next){
+ if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ endloops++;
+ } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
+ endloops--;
+ if (endloops <= 0) {
+ end_loop = ptr;
+ break;
+ }
+ }
+ }
+ }
+
+ if (inst == end_loop) {
+ end_loop = NULL;
+ continue;
+ }
for (i = 0; i < opcode->NumSrcRegs; ++i) {
if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY)
- ta[inst->U.I.SrcReg[i].Index].LastRead = inst;
+ ta[inst->U.I.SrcReg[i].Index].LastRead =
+ end_loop ? end_loop : inst;
}
}
@@ -633,30 +871,24 @@ static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
{
struct emulate_loop_state loop_state;
-
+
compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
addArtificialOutputs(compiler);
debug_program_log(compiler, "before compilation");
- /* XXX Ideally this should be done only for r3xx, but since
- * we don't have branching support for r5xx, we use the emulation
- * on all chipsets. */
- rc_transform_unroll_loops(&compiler->Base, &loop_state);
-
- debug_program_log(compiler, "after transform loops");
-
- if (compiler->Base.is_r500){
- rc_emulate_loops(&loop_state, R500_VS_MAX_ALU);
- } else {
- rc_emulate_loops(&loop_state, R300_VS_MAX_ALU);
- }
- debug_program_log(compiler, "after emulate loops");
+ if (compiler->Base.is_r500)
+ rc_transform_loops(&compiler->Base, &loop_state, R500_VS_MAX_ALU);
+ else
+ rc_transform_loops(&compiler->Base, &loop_state, R300_VS_MAX_ALU);
- rc_emulate_branches(&compiler->Base);
+ debug_program_log(compiler, "after emulate loops");
- debug_program_log(compiler, "after emulate branches");
+ if (!compiler->Base.is_r500) {
+ rc_emulate_branches(&compiler->Base);
+ debug_program_log(compiler, "after emulate branches");
+ }
if (compiler->Base.is_r500) {
struct radeon_program_transformation transformations[] = {
@@ -718,6 +950,6 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
if (compiler->Base.Debug) {
fprintf(stderr, "Final vertex program code:\n");
- r300_vertex_program_dump(compiler->code);
+ r300_vertex_program_dump(compiler);
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
index 5800f1a78e..e6009338e2 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
@@ -20,7 +20,9 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
+#include "radeon_compiler.h"
#include "radeon_code.h"
+#include "../r300_reg.h"
#include <stdio.h>
@@ -133,6 +135,10 @@ static void r300_vs_op_dump(uint32_t op)
{
fprintf(stderr, " dst: %d%s op: ",
(op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]);
+ if ((op >> PVS_DST_PRED_ENABLE_SHIFT) & 0x1) {
+ fprintf(stderr, "PRED %u",
+ (op >> PVS_DST_PRED_SENSE_SHIFT) & 0x1);
+ }
if (op & 0x80) {
if (op & 0x1) {
fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n");
@@ -160,8 +166,9 @@ static void r300_vs_src_dump(uint32_t src)
r300_vs_swiz_debug[(src >> 22) & 0x7]);
}
-void r300_vertex_program_dump(struct r300_vertex_program_code * vs)
+void r300_vertex_program_dump(struct r300_vertex_program_compiler * c)
{
+ struct r300_vertex_program_code * vs = c->code;
unsigned instrcount = vs->length / 4;
unsigned i;
@@ -177,4 +184,21 @@ void r300_vertex_program_dump(struct r300_vertex_program_code * vs)
r300_vs_src_dump(vs->body.d[offset+1+src]);
}
}
+
+ fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops);
+ for(i = 0; i < vs->num_fc_ops; i++) {
+ switch((vs->fc_ops >> (i * 2)) & 0x3 ) {
+ case 0: fprintf(stderr, "NOP"); break;
+ case 1: fprintf(stderr, "JUMP"); break;
+ case 2: fprintf(stderr, "LOOP"); break;
+ case 3: fprintf(stderr, "JSR"); break;
+ }
+ if (c->Base.is_r500) {
+ fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x\n",
+ vs->fc_op_addrs.r500[i].uw,
+ vs->fc_op_addrs.r500[i].lw);
+ } else {
+ fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]);
+ }
+ }
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
index e6b5522c5b..80a120497e 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -30,7 +30,6 @@
#include <stdio.h>
#include "../r300_reg.h"
-#include "radeon_emulate_loops.h"
/**
* Rewrite IF instructions to use the ALU result special register.
@@ -60,31 +59,6 @@ int r500_transform_IF(
return 1;
}
-/**
- * Rewrite loops to make them easier to emit. This is not a local
- * transformation, because it modifies and reorders an entire block of code.
- */
-void r500_transform_unroll_loops(struct radeon_compiler * c,
- struct emulate_loop_state *s)
-{
- int i;
-
- rc_transform_unroll_loops(c, s);
-
- for( i = s->LoopCount - 1; i >= 0; i-- ){
- struct rc_instruction * inst_continue;
- if(!s->Loops[i].EndLoop){
- continue;
- }
- /* Insert a continue instruction at the end of the loop. This
- * is required in order to emit loops correctly. */
- inst_continue = rc_insert_new_instruction(c,
- s->Loops[i].EndIf->Prev);
- inst_continue->U.I.Opcode = RC_OPCODE_CONTINUE;
- }
-
-}
-
static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
unsigned int relevant;
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
index 0d005a794f..34173351f8 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
@@ -49,6 +49,4 @@ extern int r500_transform_IF(
struct rc_instruction * inst,
void* data);
-void r500_transform_unroll_loops(struct radeon_compiler * c,
- struct emulate_loop_state * s);
#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index 0bd8f0a239..9b60e30f58 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -64,7 +64,16 @@ struct branch_info {
};
struct loop_info {
- int LoopStart;
+ int BgnLoop;
+
+ int BranchDepth;
+ int * Brks;
+ int BrkCount;
+ int BrkReserved;
+
+ int * Conts;
+ int ContCount;
+ int ContReserved;
};
struct emit_state {
@@ -368,6 +377,12 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
unsigned int newip = ++s->Code->inst_end;
+ /* Currently all loops use the same integer constant to intialize
+ * the loop variables. */
+ if(!s->Code->int_constants[0]) {
+ s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
+ s->Code->int_constant_count = 1;
+ }
s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
switch(inst->U.I.Opcode){
@@ -378,32 +393,77 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
loop = &s->Loops[s->CurrentLoopDepth++];
-
- /* We don't emit an instruction for BGNLOOP, so we need to
- * decrement the instruction counter, but first we need to
- * set LoopStart to the current value of inst_end, which
- * will end up being the first real instruction in the loop.*/
- loop->LoopStart = s->Code->inst_end--;
+ memset(loop, 0, sizeof(struct loop_info));
+ loop->BranchDepth = s->CurrentBranchDepth;
+ loop->BgnLoop = newip;
+
+ s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
+ | R500_FC_JUMP_FUNC(0x00)
+ | R500_FC_IGNORE_UNCOVERED
+ ;
break;
-
case RC_OPCODE_BRK:
- /* Don't emit an instruction for BRK */
- s->Code->inst_end--;
+ loop = &s->Loops[s->CurrentLoopDepth - 1];
+ memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
+ loop->BrkCount, loop->BrkReserved, 1);
+
+ loop->Brks[loop->BrkCount++] = newip;
+ s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
+ | R500_FC_JUMP_FUNC(0xff)
+ | R500_FC_B_OP1_DECR
+ | R500_FC_B_POP_CNT(
+ s->CurrentBranchDepth - loop->BranchDepth)
+ | R500_FC_IGNORE_UNCOVERED
+ ;
break;
- case RC_OPCODE_CONTINUE:
+ case RC_OPCODE_CONT:
loop = &s->Loops[s->CurrentLoopDepth - 1];
- s->Code->inst[newip].inst2 = R500_FC_OP_JUMP |
- R500_FC_JUMP_FUNC(0xff);
- s->Code->inst[newip].inst3 = R500_FC_JUMP_ADDR(loop->LoopStart);
+ memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
+ loop->ContCount, loop->ContReserved, 1);
+ loop->Conts[loop->ContCount++] = newip;
+ s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
+ | R500_FC_JUMP_FUNC(0xff)
+ | R500_FC_B_OP1_DECR
+ | R500_FC_B_POP_CNT(
+ s->CurrentBranchDepth - loop->BranchDepth)
+ | R500_FC_IGNORE_UNCOVERED
+ ;
break;
case RC_OPCODE_ENDLOOP:
- /* Don't emit an instruction for ENDLOOP */
- s->Code->inst_end--;
+ {
+ loop = &s->Loops[s->CurrentLoopDepth - 1];
+ /* Emit ENDLOOP */
+ s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
+ | R500_FC_JUMP_FUNC(0xff)
+ | R500_FC_JUMP_ANY
+ | R500_FC_IGNORE_UNCOVERED
+ ;
+ /* The constant integer at index 0 is used by all loops. */
+ s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
+ | R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
+ ;
+
+ /* Set jump address and int constant for BGNLOOP */
+ s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
+ | R500_FC_JUMP_ADDR(newip)
+ ;
+
+ /* Set jump address for the BRK instructions. */
+ while(loop->BrkCount--) {
+ s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
+ R500_FC_JUMP_ADDR(newip + 1);
+ }
+
+ /* Set jump address for CONT instructions. */
+ while(loop->ContCount--) {
+ s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
+ R500_FC_JUMP_ADDR(newip);
+ }
s->CurrentLoopDepth--;
break;
-
+ }
case RC_OPCODE_IF:
if ( s->CurrentBranchDepth >= MAX_BRANCH_DEPTH_FULL) {
rc_error(s->C, "Branch depth exceeds hardware limit");
@@ -442,24 +502,16 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
}
branch = &s->Branches[s->CurrentBranchDepth - 1];
-
- if(inst->Prev->U.I.Opcode == RC_OPCODE_BRK){
- branch->Endif = --s->Code->inst_end;
- s->Code->inst[branch->Endif].inst2 |=
- R500_FC_B_OP0_DECR;
- }
- else{
- branch->Endif = newip;
-
- s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
- | R500_FC_A_OP_NONE /* no address stack */
- | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
- | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
- | R500_FC_B_OP1_NONE /* no branch counter if stay */
- | R500_FC_B_POP_CNT(1)
+ branch->Endif = newip;
+
+ s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
+ | R500_FC_A_OP_NONE /* no address stack */
+ | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
+ | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
+ | R500_FC_B_OP1_NONE /* no branch counter if stay */
+ | R500_FC_B_POP_CNT(1)
;
- s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
- }
+ s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
| R500_FC_A_OP_NONE /* no address stack */
| R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
@@ -544,11 +596,9 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi
code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
}
- /* Use FULL flow control mode if branches are nested deep enough.
- * We don not need to enable FULL flow control mode for loops, becasue
- * we aren't using the hardware loop instructions.
- */
- if (s.MaxBranchDepth >= 4) {
+ /* Enable full flow control mode if we are using loops or have if
+ * statements nested at least four deep. */
+ if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
if (code->max_temp_idx < 1)
code->max_temp_idx = 1;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index d03689763b..896246d203 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -221,6 +221,9 @@ struct r500_fragment_program_code {
int max_temp_idx;
uint32_t us_fc_ctrl;
+
+ uint32_t int_constants[32];
+ uint32_t int_constant_count;
};
struct rX00_fragment_program_code {
@@ -240,6 +243,12 @@ struct rX00_fragment_program_code {
#define R500_VS_MAX_ALU 1024
#define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4)
#define R300_VS_MAX_TEMPS 32
+/* This is the max for all chipsets (r300-r500) */
+#define R300_VS_MAX_FC_OPS 16
+/* The r500 maximum depth is not just for loops, but any combination of loops
+ * and subroutine jumps. */
+#define R500_VS_MAX_FC_DEPTH 8
+#define R300_VS_MAX_LOOP_DEPTH 1
#define VSF_MAX_INPUTS 32
#define VSF_MAX_OUTPUTS 32
@@ -260,9 +269,18 @@ struct r300_vertex_program_code {
uint32_t InputsRead;
uint32_t OutputsWritten;
-};
-void r300_vertex_program_dump(struct r300_vertex_program_code * vs);
+ unsigned int num_fc_ops;
+ uint32_t fc_ops;
+ union {
+ uint32_t r300[R300_VS_MAX_FC_OPS];
+ struct {
+ uint32_t lw;
+ uint32_t uw;
+ } r500[R300_VS_MAX_FC_OPS];
+ } fc_op_addrs;
+ int32_t fc_loop_index[R300_VS_MAX_FC_OPS];
+};
#endif /* RADEON_CODE_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
index 1c8ba864a4..935dc9b0a8 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -307,3 +307,46 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig
}
}
+
+/**
+ * The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
+ * Gallium and OpenGL define it the other way around.
+ *
+ * So let's just negate FACE at the beginning of the shader and rewrite the rest
+ * of the shader to read from the newly allocated temporary.
+ */
+void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
+{
+ unsigned tempregi = rc_find_free_temporary(c);
+ struct rc_instruction *inst_add;
+ struct rc_instruction *inst;
+
+ /* perspective divide */
+ inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);
+ inst_add->U.I.Opcode = RC_OPCODE_ADD;
+
+ inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_add->U.I.DstReg.Index = tempregi;
+ inst_add->U.I.DstReg.WriteMask = RC_MASK_X;
+
+ inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;
+ inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
+
+ inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;
+ inst_add->U.I.SrcReg[1].Index = face;
+ inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
+ inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
+
+ for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned i;
+
+ for(i = 0; i < opcode->NumSrcRegs; i++) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
+ inst->U.I.SrcReg[i].Index == face) {
+ inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[i].Index = tempregi;
+ }
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index f15905d79d..7c42eb3ae5 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -81,6 +81,7 @@ void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_ou
void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output);
void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
int full_vtransform);
+void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face);
struct r300_fragment_program_compiler {
struct radeon_compiler Base;
@@ -110,8 +111,12 @@ struct r300_vertex_program_compiler {
void * UserData;
void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c);
+
+ int PredicateIndex;
+ unsigned int PredicateMask;
};
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c);
+void r300_vertex_program_dump(struct r300_vertex_program_compiler * c);
#endif /* RADEON_COMPILER_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
index fbb4235c22..faf531b412 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
@@ -43,6 +43,12 @@ struct instruction_state {
unsigned char SrcReg[3];
};
+struct loopinfo {
+ struct updatemask_state * Breaks;
+ unsigned int BreakCount;
+ unsigned int BreaksReserved;
+};
+
struct branchinfo {
unsigned int HaveElse:1;
@@ -59,6 +65,10 @@ struct deadcode_state {
struct branchinfo * BranchStack;
unsigned int BranchStackSize;
unsigned int BranchStackReserved;
+
+ struct loopinfo * LoopStack;
+ unsigned int LoopStackSize;
+ unsigned int LoopStackReserved;
};
@@ -78,6 +88,22 @@ static void or_updatemasks(
dst->Address = a->Address | b->Address;
}
+static void push_break(struct deadcode_state *s)
+{
+ struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1];
+ memory_pool_array_reserve(&s->C->Pool, struct updatemask_state,
+ loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1);
+
+ memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R));
+}
+
+static void push_loop(struct deadcode_state * s)
+{
+ memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack,
+ s->LoopStackSize, s->LoopStackReserved, 1);
+ memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo));
+}
+
static void push_branch(struct deadcode_state * s)
{
memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
@@ -233,11 +259,22 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f
}
}
}
+ push_loop(&s);
break;
}
- case RC_OPCODE_CONTINUE:
case RC_OPCODE_BRK:
+ push_break(&s);
+ break;
case RC_OPCODE_BGNLOOP:
+ {
+ unsigned int i;
+ struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1];
+ for(i = 0; i < loop->BreakCount; i++) {
+ or_updatemasks(&s.R, &s.R, &loop->Breaks[i]);
+ }
+ break;
+ }
+ case RC_OPCODE_CONT:
break;
case RC_OPCODE_ENDIF:
push_branch(&s);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
index 131e9e7436..32d4b45dd6 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
@@ -39,7 +39,6 @@
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
struct const_value {
-
struct radeon_compiler * C;
struct rc_src_register * Src;
float Value;
@@ -78,17 +77,17 @@ static int src_reg_is_immediate(struct rc_src_register * src,
c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE;
}
-static unsigned int loop_calc_iterations(struct emulate_loop_state *s,
- struct loop_info * loop, unsigned int max_instructions)
+static unsigned int loop_max_possible_iterations(struct radeon_compiler *c,
+ struct loop_info * loop, unsigned int prog_inst_limit)
{
- unsigned int total_i = rc_recompute_ips(s->C);
+ unsigned int total_i = rc_recompute_ips(c);
unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1;
/* +1 because the program already has one iteration of the loop. */
- return 1 + ((max_instructions - total_i) / (s->LoopCount * loop_i));
+ return 1 + ((prog_inst_limit - total_i) / loop_i);
}
-static void loop_unroll(struct emulate_loop_state * s,
- struct loop_info *loop, unsigned int iterations)
+static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop,
+ unsigned int iterations)
{
unsigned int i;
struct rc_instruction * ptr;
@@ -99,7 +98,7 @@ static void loop_unroll(struct emulate_loop_state * s,
rc_remove_instruction(loop->EndLoop);
for( i = 1; i < iterations; i++){
for(ptr = first; ptr != last->Next; ptr = ptr->Next){
- struct rc_instruction *new = rc_alloc_instruction(s->C);
+ struct rc_instruction *new = rc_alloc_instruction(c);
memcpy(new, ptr, sizeof(struct rc_instruction));
rc_insert_instruction(append_to, new);
append_to = new;
@@ -115,7 +114,7 @@ static void update_const_value(void * data, struct rc_instruction * inst,
if(value->Src->File != file ||
value->Src->Index != index ||
!(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){
- return;
+ return;
}
switch(inst->U.I.Opcode){
case RC_OPCODE_MOV:
@@ -140,7 +139,7 @@ static void get_incr_amount(void * data, struct rc_instruction * inst,
if(file != RC_FILE_TEMPORARY ||
count_inst->Index != index ||
(1 << GET_SWZ(count_inst->Swz,0) != mask)){
- return;
+ return;
}
/* Find the index of the counter register. */
opcode = rc_get_opcode_info(inst->U.I.Opcode);
@@ -185,13 +184,16 @@ static void get_incr_amount(void * data, struct rc_instruction * inst,
count_inst->Unknown = 1;
return;
}
-
}
-static int transform_const_loop(struct emulate_loop_state * s,
- struct loop_info * loop)
+/**
+ * If prog_inst_limit is -1, then all eligible loops will be unrolled regardless
+ * of how many iterations they have.
+ */
+static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop,
+ unsigned int prog_inst_limit)
{
- int end_loops = 1;
+ int end_loops;
int iterations;
struct count_inst count_inst;
float limit_value;
@@ -201,12 +203,12 @@ static int transform_const_loop(struct emulate_loop_state * s,
struct rc_instruction * inst;
/* Find the counter and the upper limit */
-
- if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], s->C)){
+
+ if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], c)){
limit = &loop->Cond->U.I.SrcReg[0];
counter = &loop->Cond->U.I.SrcReg[1];
}
- else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], s->C)){
+ else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], c)){
limit = &loop->Cond->U.I.SrcReg[1];
counter = &loop->Cond->U.I.SrcReg[0];
}
@@ -214,13 +216,13 @@ static int transform_const_loop(struct emulate_loop_state * s,
DBG("No constant limit.\n");
return 0;
}
-
+
/* Find the initial value of the counter */
counter_value.Src = counter;
counter_value.Value = 0.0f;
counter_value.HasValue = 0;
- counter_value.C = s->C;
- for(inst = s->C->Program.Instructions.Next; inst != loop->BeginLoop;
+ counter_value.C = c;
+ for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop;
inst = inst->Next){
rc_for_all_writes_mask(inst, update_const_value, &counter_value);
}
@@ -230,11 +232,12 @@ static int transform_const_loop(struct emulate_loop_state * s,
}
DBG("Initial counter value is %f\n", counter_value.Value);
/* Determine how the counter is modified each loop */
- count_inst.C = s->C;
+ count_inst.C = c;
count_inst.Index = counter->Index;
count_inst.Swz = counter->Swizzle;
count_inst.Amount = 0.0f;
count_inst.Unknown = 0;
+ end_loops = 1;
for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){
switch(inst->U.I.Opcode){
/* XXX In the future we might want to try to unroll nested
@@ -246,6 +249,16 @@ static int transform_const_loop(struct emulate_loop_state * s,
loop->EndLoop = inst;
end_loops--;
break;
+ case RC_OPCODE_BRK:
+ /* Don't unroll loops if it has a BRK instruction
+ * other one used when testing the main conditional
+ * of the loop. */
+
+ /* Make sure we haven't entered a nested loops. */
+ if(inst != loop->Brk && end_loops == 1) {
+ return 0;
+ }
+ break;
/* XXX Check if the counter is modified within an if statement.
*/
case RC_OPCODE_IF:
@@ -266,17 +279,20 @@ static int transform_const_loop(struct emulate_loop_state * s,
/* Calculate the number of iterations of this loop. Keeping this
* simple, since we only support increment and decrement loops.
*/
- limit_value = get_constant_value(s->C, limit, 0);
+ limit_value = get_constant_value(c, limit, 0);
DBG("Limit is %f.\n", limit_value);
+ /* The iteration calculations are opposite of what you would expect.
+ * In a normal loop, if the condition is met, then loop continues, but
+ * with our loops, if the condition is met, the is exited. */
switch(loop->Cond->U.I.Opcode){
- case RC_OPCODE_SGT:
- case RC_OPCODE_SLT:
+ case RC_OPCODE_SGE:
+ case RC_OPCODE_SLE:
iterations = (int) ceilf((limit_value - counter_value.Value) /
count_inst.Amount);
break;
- case RC_OPCODE_SLE:
- case RC_OPCODE_SGE:
+ case RC_OPCODE_SGT:
+ case RC_OPCODE_SLT:
iterations = (int) floorf((limit_value - counter_value.Value) /
count_inst.Amount) + 1;
break;
@@ -284,77 +300,85 @@ static int transform_const_loop(struct emulate_loop_state * s,
return 0;
}
+ if (prog_inst_limit > 0
+ && iterations > loop_max_possible_iterations(c, loop,
+ prog_inst_limit)) {
+ return 0;
+ }
+
DBG("Loop will have %d iterations.\n", iterations);
-
+
/* Prepare loop for unrolling */
rc_remove_instruction(loop->Cond);
rc_remove_instruction(loop->If);
rc_remove_instruction(loop->Brk);
rc_remove_instruction(loop->EndIf);
-
- loop_unroll(s, loop, iterations);
+
+ unroll_loop(c, loop, iterations);
loop->EndLoop = NULL;
return 1;
}
-/**
- * This function prepares a loop to be unrolled by converting it into an if
- * statement. Here is an outline of the conversion process:
- * BGNLOOP; -> BGNLOOP;
- * <Additional conditional code> -> <Additional conditional code>
- * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2];
- * IF temp[0]; -> IF temp[0];
- * BRK; ->
- * ENDIF; -> <Loop Body>
- * <Loop Body> -> ENDIF;
- * ENDLOOP; -> ENDLOOP
- *
+/**
+ * @param c
+ * @param loop
* @param inst A pointer to a BGNLOOP instruction.
- * @return If the loop can be unrolled, a pointer to the first instruction of
- * the unrolled loop.
- * Otherwise, A pointer to the ENDLOOP instruction.
- * Null if there is an error.
+ * @return 1 if all of the members of loop where set.
+ * @return 0 if there was an error and some members of loop are still NULL.
*/
-static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
+static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop,
struct rc_instruction * inst)
{
- struct loop_info *loop;
struct rc_instruction * ptr;
- memory_pool_array_reserve(&s->C->Pool, struct loop_info,
- s->Loops, s->LoopCount, s->LoopReserved, 1);
-
- loop = &s->Loops[s->LoopCount++];
- memset(loop, 0, sizeof(struct loop_info));
if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){
- rc_error(s->C, "expected BGNLOOP\n", __FUNCTION__);
- return NULL;
+ rc_error(c, "%s: expected BGNLOOP", __FUNCTION__);
+ return 0;
}
+
+ memset(loop, 0, sizeof(struct loop_info));
+
loop->BeginLoop = inst;
- for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next){
+ for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) {
+
+ if (ptr == &c->Program.Instructions) {
+ rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n",
+ __FUNCTION__);
+ return 0;
+ }
+
switch(ptr->U.I.Opcode){
case RC_OPCODE_BGNLOOP:
- /* Nested loop */
- ptr = transform_loop(s, ptr);
- if(!ptr){
- return NULL;
+ {
+ /* Nested loop, skip ahead to the end. */
+ unsigned int loop_depth = 1;
+ for(ptr = ptr->Next; ptr != &c->Program.Instructions;
+ ptr = ptr->Next){
+ if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ loop_depth++;
+ } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
+ if (!--loop_depth) {
+ break;
+ }
+ }
+ }
+ if (ptr == &c->Program.Instructions) {
+ rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n",
+ __FUNCTION__);
+ return 0;
}
break;
+ }
case RC_OPCODE_BRK:
- loop->Brk = ptr;
- if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF){
- rc_error(s->C,
- "%s: expected ENDIF\n",__FUNCTION__);
- return NULL;
- }
- loop->EndIf = ptr->Next;
- if(ptr->Prev->U.I.Opcode != RC_OPCODE_IF){
- rc_error(s->C,
- "%s: expected IF\n", __FUNCTION__);
- return NULL;
+ if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF
+ || ptr->Prev->U.I.Opcode != RC_OPCODE_IF
+ || loop->Brk){
+ continue;
}
+ loop->Brk = ptr;
loop->If = ptr->Prev;
+ loop->EndIf = ptr->Next;
switch(loop->If->Prev->U.I.Opcode){
case RC_OPCODE_SLT:
case RC_OPCODE_SGE:
@@ -364,18 +388,58 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
case RC_OPCODE_SNE:
break;
default:
- rc_error(s->C, "%s expected conditional\n",
+ rc_error(c, "%s: expected conditional",
__FUNCTION__);
- return NULL;
+ return 0;
}
loop->Cond = loop->If->Prev;
- ptr = loop->EndIf;
break;
+
case RC_OPCODE_ENDLOOP:
loop->EndLoop = ptr;
break;
}
}
+
+ if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf
+ && loop->Cond && loop->EndLoop) {
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * This function prepares a loop to be unrolled by converting it into an if
+ * statement. Here is an outline of the conversion process:
+ * BGNLOOP; -> BGNLOOP;
+ * <Additional conditional code> -> <Additional conditional code>
+ * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2];
+ * IF temp[0]; -> IF temp[0];
+ * BRK; ->
+ * ENDIF; -> <Loop Body>
+ * <Loop Body> -> ENDIF;
+ * ENDLOOP; -> ENDLOOP
+ *
+ * @param inst A pointer to a BGNLOOP instruction.
+ * @return 1 for success, 0 for failure
+ */
+static int transform_loop(struct emulate_loop_state * s,
+ struct rc_instruction * inst)
+{
+ struct loop_info * loop;
+
+ memory_pool_array_reserve(&s->C->Pool, struct loop_info,
+ s->Loops, s->LoopCount, s->LoopReserved, 1);
+
+ loop = &s->Loops[s->LoopCount++];
+
+ if (!build_loop_info(s->C, loop, inst))
+ return 0;
+
+ if(try_unroll_loop(s->C, loop, s->prog_inst_limit)){
+ return 1;
+ }
+
/* Reverse the conditional instruction */
switch(loop->Cond->U.I.Opcode){
case RC_OPCODE_SGE:
@@ -398,43 +462,51 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
break;
default:
rc_error(s->C, "loop->Cond is not a conditional.\n");
- return NULL;
- }
-
- /* Check if the number of loops is known at compile time. */
- if(transform_const_loop(s, loop)){
- return loop->BeginLoop->Next;
+ return 0;
}
- /* Prepare the loop to be unrolled */
+ /* Prepare the loop to be emulated */
rc_remove_instruction(loop->Brk);
rc_remove_instruction(loop->EndIf);
rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf);
- return loop->EndLoop;
+ return 1;
}
-void rc_transform_unroll_loops(struct radeon_compiler *c,
- struct emulate_loop_state * s)
+void rc_transform_loops(struct radeon_compiler *c,
+ struct emulate_loop_state * s, int prog_inst_limit)
{
struct rc_instruction * ptr;
-
+
memset(s, 0, sizeof(struct emulate_loop_state));
s->C = c;
- ptr = s->C->Program.Instructions.Next;
- while(ptr != &s->C->Program.Instructions) {
+ s->prog_inst_limit = prog_inst_limit;
+ for(ptr = s->C->Program.Instructions.Next;
+ ptr != &s->C->Program.Instructions; ptr = ptr->Next) {
if(ptr->Type == RC_INSTRUCTION_NORMAL &&
ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
- ptr = transform_loop(s, ptr);
- if(!ptr){
+ if (!transform_loop(s, ptr))
return;
+ }
+ }
+}
+
+void rc_unroll_loops(struct radeon_compiler *c, int prog_inst_limit)
+{
+ struct rc_instruction * inst;
+ struct loop_info loop;
+
+ for(inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions; inst = inst->Next) {
+
+ if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ if (build_loop_info(c, &loop, inst)) {
+ try_unroll_loop(c, &loop, prog_inst_limit);
}
}
- ptr = ptr->Next;
}
}
-void rc_emulate_loops(struct emulate_loop_state *s,
- unsigned int max_instructions)
+void rc_emulate_loops(struct emulate_loop_state *s, int prog_inst_limit)
{
int i;
/* Iterate backwards of the list of loops so that loops that nested
@@ -444,8 +516,8 @@ void rc_emulate_loops(struct emulate_loop_state *s,
if(!s->Loops[i].EndLoop){
continue;
}
- unsigned int iterations = loop_calc_iterations(s, &s->Loops[i],
- max_instructions);
- loop_unroll(s, &s->Loops[i], iterations);
+ unsigned int iterations = loop_max_possible_iterations(
+ s->C, &s->Loops[i], prog_inst_limit);
+ unroll_loop(s->C, &s->Loops[i], iterations);
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
index 7748813c4e..bba1f68e30 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
@@ -21,12 +21,14 @@ struct emulate_loop_state {
struct loop_info * Loops;
unsigned int LoopCount;
unsigned int LoopReserved;
+ int prog_inst_limit;
};
-void rc_transform_unroll_loops(struct radeon_compiler *c,
- struct emulate_loop_state * s);
+void rc_transform_loops(struct radeon_compiler *c,
+ struct emulate_loop_state * s, int prog_inst_limit);
-void rc_emulate_loops(struct emulate_loop_state *s,
- unsigned int max_instructions);
+void rc_unroll_loops(struct radeon_compiler * c, int prog_inst_limit);
+
+void rc_emulate_loops(struct emulate_loop_state * s, int prog_inst_limit);
#endif /* RADEON_EMULATE_LOOPS_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
index 04f234f11d..2ea830be7f 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -386,8 +386,8 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.NumSrcRegs = 0,
},
{
- .Opcode = RC_OPCODE_CONTINUE,
- .Name = "CONTINUE",
+ .Opcode = RC_OPCODE_CONT,
+ .Name = "CONT",
.IsFlowControl = 1,
.NumSrcRegs = 0
},
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
index 8b9fa07dde..6e18d6eb3f 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
@@ -187,7 +187,7 @@ typedef enum {
RC_OPCODE_ENDLOOP,
- RC_OPCODE_CONTINUE,
+ RC_OPCODE_CONT,
/** special instruction, used in R300-R500 fragment program pair instructions
* indicates that the result of the alpha operation shall be replicated
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index eca0651536..7a3f35950a 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -164,7 +164,8 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo
inst = inst->Next) {
/* XXX In the future we might be able to make the optimizer
* smart enough to handle loops. */
- if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP){
+ if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP
+ || inst->U.I.Opcode == RC_OPCODE_ENDLOOP){
return;
}
rc_for_all_reads_mask(inst, peephole_scan_read, &s);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
index 8a912da461..ce72cd97ab 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
@@ -65,6 +65,11 @@ struct regalloc_state {
struct hardware_register * HwTemporary;
unsigned int NumHwTemporaries;
+ /**
+ * If an instruction is inside of a loop, end_loop will be the
+ * IP of the ENDLOOP instruction, otherwise end_loop will be 0
+ */
+ int end_loop;
};
static void print_live_intervals(struct live_intervals * src)
@@ -178,10 +183,10 @@ static void scan_callback(void * data, struct rc_instruction * inst,
else
reg->Live.Start = inst->IP;
reg->Live.End = inst->IP;
- } else {
- if (inst->IP > reg->Live.End)
- reg->Live.End = inst->IP;
- }
+ } else if (s->end_loop)
+ reg->Live.End = s->end_loop;
+ else if (inst->IP > reg->Live.End)
+ reg->Live.End = inst->IP;
}
static void compute_live_intervals(struct regalloc_state * s)
@@ -191,6 +196,31 @@ static void compute_live_intervals(struct regalloc_state * s)
for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
inst != &s->C->Program.Instructions;
inst = inst->Next) {
+
+ /* For all instructions inside of a loop, the ENDLOOP
+ * instruction is used as the end of the live interval. */
+ if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && !s->end_loop) {
+ int loops = 1;
+ struct rc_instruction * tmp;
+ for(tmp = inst->Next;
+ tmp != &s->C->Program.Instructions;
+ tmp = tmp->Next) {
+ if (tmp->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ loops++;
+ break;
+ } else if (tmp->U.I.Opcode
+ == RC_OPCODE_ENDLOOP) {
+ if(!--loops) {
+ s->end_loop = tmp->IP;
+ break;
+ }
+ }
+ }
+ }
+
+ if (inst->IP == s->end_loop)
+ s->end_loop = 0;
+
rc_for_all_reads_mask(inst, scan_callback, s);
rc_for_all_writes_mask(inst, scan_callback, s);
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index 3cc2897293..857aae5514 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -988,17 +988,22 @@ void radeonTransformKILP(struct radeon_compiler * c)
for (inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions; inst = inst->Next) {
- if (inst->U.I.Opcode != RC_OPCODE_KILP
- || inst->Prev->U.I.Opcode != RC_OPCODE_IF
- || inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) {
+ if (inst->U.I.Opcode != RC_OPCODE_KILP)
continue;
- }
+
inst->U.I.Opcode = RC_OPCODE_KIL;
- inst->U.I.SrcReg[0] = negate(absolute(inst->Prev->U.I.SrcReg[0]));
- /* Remove IF */
- rc_remove_instruction(inst->Prev);
- /* Remove ENDIF */
- rc_remove_instruction(inst->Next);
+ if (inst->Prev->U.I.Opcode != RC_OPCODE_IF
+ || inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) {
+ inst->U.I.SrcReg[0] = negate(builtin_one);
+ } else {
+
+ inst->U.I.SrcReg[0] =
+ negate(absolute(inst->Prev->U.I.SrcReg[0]));
+ /* Remove IF */
+ rc_remove_instruction(inst->Prev);
+ /* Remove ENDIF */
+ rc_remove_instruction(inst->Next);
+ }
}
}