diff options
author | Marek Olšák <maraeo@gmail.com> | 2010-09-03 20:26:43 +0200 |
---|---|---|
committer | Marek Olšák <maraeo@gmail.com> | 2010-09-04 18:56:22 +0200 |
commit | 63943c8fcdc7dae4c059d364528b0a90b8c4041f (patch) | |
tree | 145a55d9004a6c1661916e76b476d0d7c91fb684 /src/mesa/drivers/dri | |
parent | 33360a707e16c3349fde9dd43fee8e38bae9e7f0 (diff) |
r300/compiler: improve register allocation with indexable temporaries for VS
Register allocation can now reallocate temporaries right after the last indexed
source operand, instead of being disabled for the whole shader.
Diffstat (limited to 'src/mesa/drivers/dri')
-rw-r--r-- | src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 63 |
1 files changed, 46 insertions, 17 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 3e8a8236c0..1628078a32 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -665,12 +665,15 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user) struct rc_instruction *inst; struct rc_instruction *end_loop = NULL; unsigned int num_orig_temps = 0; - char hwtemps[R300_VS_MAX_TEMPS]; + char hwtemps[RC_REGISTER_MAX_INDEX]; struct temporary_allocation * ta; unsigned int i, j; + struct rc_instruction *last_inst_src_reladdr = NULL; memset(hwtemps, 0, sizeof(hwtemps)); + rc_recompute_ips(c); + /* Pass 1: Count original temporaries. */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); @@ -690,7 +693,8 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user) } } - /* Pass 2: If there is relative addressing of temporaries, we cannot change register indices. Give up. */ + /* Pass 2: If there is relative addressing of dst temporaries, we cannot change register indices. Give up. + * For src temporaries, save the last instruction which uses relative addressing. */ for (inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); @@ -701,7 +705,7 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user) for (i = 0; i < opcode->NumSrcRegs; ++i) { if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && inst->U.I.SrcReg[i].RelAddr) { - return; + last_inst_src_reladdr = inst; } } } @@ -739,9 +743,26 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user) } for (i = 0; i < opcode->NumSrcRegs; ++i) { - if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) - ta[inst->U.I.SrcReg[i].Index].LastRead = - end_loop ? end_loop : inst; + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + struct rc_instruction *last_read; + + /* From "last_inst_src_reladdr", "end_loop", and "inst", + * select the instruction with the highest instruction index (IP). + * Note that "end_loop", if available, has always a higher index than "inst". */ + if (last_inst_src_reladdr) { + if (end_loop) { + last_read = last_inst_src_reladdr->IP > end_loop->IP ? + last_inst_src_reladdr : end_loop; + } else { + last_read = last_inst_src_reladdr->IP > inst->IP ? + last_inst_src_reladdr : inst; + } + } else { + last_read = end_loop ? end_loop : inst; + } + + ta[inst->U.I.SrcReg[i].Index].LastRead = last_read; + } } } @@ -749,13 +770,15 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user) for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - for (i = 0; i < opcode->NumSrcRegs; ++i) { - if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { - unsigned int orig = inst->U.I.SrcReg[i].Index; - inst->U.I.SrcReg[i].Index = ta[orig].HwTemp; + if (!last_inst_src_reladdr || last_inst_src_reladdr->IP < inst->IP) { + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + unsigned int orig = inst->U.I.SrcReg[i].Index; + inst->U.I.SrcReg[i].Index = ta[orig].HwTemp; - if (ta[orig].Allocated && inst == ta[orig].LastRead) - hwtemps[ta[orig].HwTemp] = 0; + if (ta[orig].Allocated && inst == ta[orig].LastRead) + hwtemps[ta[orig].HwTemp] = 0; + } } } @@ -764,16 +787,22 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user) unsigned int orig = inst->U.I.DstReg.Index; if (!ta[orig].Allocated) { - for(j = 0; j < R300_VS_MAX_TEMPS; ++j) { + for(j = 0; j < c->max_temp_regs; ++j) { if (!hwtemps[j]) break; } - if (j >= R300_VS_MAX_TEMPS) { - fprintf(stderr, "Out of hw temporaries\n"); + if (j >= c->max_temp_regs) { + rc_error(c, "Too many temporaries\n"); + return; } else { ta[orig].Allocated = 1; - ta[orig].HwTemp = j; - hwtemps[j] = 1; + if (last_inst_src_reladdr && + last_inst_src_reladdr->IP > inst->IP) { + ta[orig].HwTemp = orig; + } else { + ta[orig].HwTemp = j; + } + hwtemps[ta[orig].HwTemp] = 1; } } |