From bcecb8ff66d2c002ac1273c0a9e9b2b9f9d3d43e Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 23 May 2009 13:32:51 +0200 Subject: nv50: record last access to temp and attr regs We now inspect the TGSI instructions in tx_prep to determine where temps and FP attrs are last accessed. This will enable us to reclaim some temporaries early and we also use it to omit pre-loading FP attributes that aren't used. --- src/gallium/drivers/nv50/nv50_program.c | 127 ++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 40d2384c3e..c73ed08a55 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -85,6 +85,8 @@ struct nv50_reg { int hw; int neg; + + int acc; /* instruction where this reg is last read (first insn == 1) */ }; struct nv50_pc { @@ -108,6 +110,10 @@ struct nv50_pc { struct nv50_reg *temp_temp[16]; unsigned temp_temp_nr; + + /* current instruction and total number of insns */ + unsigned insn_cur; + unsigned insn_nr; }; static void @@ -1323,6 +1329,112 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) return TRUE; } +/* Adjust a bitmask that indicates what components of a source are used, + * we use this in tx_prep so we only load interpolants that are needed. + */ +static void +insn_adjust_mask(const struct tgsi_full_instruction *insn, unsigned *mask) +{ + const struct tgsi_instruction_ext_texture *tex; + + switch (insn->Instruction.Opcode) { + case TGSI_OPCODE_DP3: + *mask = 0x7; + break; + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_DPH: + *mask = 0xF; + break; + case TGSI_OPCODE_LIT: + *mask = 0xB; + break; + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: + *mask = 0x1; + break; + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXP: + assert(insn->Instruction.Extended); + tex = &insn->InstructionExtTexture; + + *mask = 0x7; + if (tex->Texture == TGSI_TEXTURE_1D) + *mask = 0x1; + else + if (tex->Texture == TGSI_TEXTURE_2D) + *mask = 0x3; + + if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) + *mask |= 0x8; + break; + default: + break; + } +} + +static void +prep_inspect_insn(struct nv50_pc *pc, const union tgsi_full_token *tok, + unsigned *r_usage[2]) +{ + const struct tgsi_full_instruction *insn; + const struct tgsi_full_src_register *src; + const struct tgsi_dst_register *dst; + + unsigned i, c, k, n, mask, *acc_p; + + insn = &tok->FullInstruction; + dst = &insn->FullDstRegisters[0].DstRegister; + mask = dst->WriteMask; + + if (!r_usage[0]) + r_usage[0] = CALLOC(pc->temp_nr * 4, sizeof(unsigned)); + if (!r_usage[1]) + r_usage[1] = CALLOC(pc->attr_nr * 4, sizeof(unsigned)); + + if (dst->File == TGSI_FILE_TEMPORARY) { + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + r_usage[0][dst->Index * 4 + c] = pc->insn_nr; + } + } + + for (i = 0; i < insn->Instruction.NumSrcRegs; i++) { + src = &insn->FullSrcRegisters[i]; + + switch (src->SrcRegister.File) { + case TGSI_FILE_TEMPORARY: + acc_p = r_usage[0]; + break; + case TGSI_FILE_INPUT: + acc_p = r_usage[1]; + break; + default: + continue; + } + + insn_adjust_mask(insn, &mask); + + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + + k = tgsi_util_get_full_src_register_extswizzle(src, c); + switch (k) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + n = src->SrcRegister.Index * 4 + k; + acc_p[n] = pc->insn_nr; + break; + default: + break; + } + } + } +} + static boolean nv50_program_tx_prep(struct nv50_pc *pc) { @@ -1330,6 +1442,11 @@ nv50_program_tx_prep(struct nv50_pc *pc) boolean ret = FALSE; unsigned i, c; + /* track register access for temps and attrs */ + unsigned *r_usage[2]; + r_usage[0] = NULL; + r_usage[1] = NULL; + tgsi_parse_init(&p, pc->p->pipe.tokens); while (!tgsi_parse_end_of_tokens(&p)) { const union tgsi_full_token *tok = &p.FullToken; @@ -1382,6 +1499,8 @@ nv50_program_tx_prep(struct nv50_pc *pc) } break; case TGSI_TOKEN_TYPE_INSTRUCTION: + pc->insn_nr++; + prep_inspect_insn(pc, tok, r_usage); break; default: break; @@ -1398,6 +1517,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) pc->temp[i*4+c].type = P_TEMP; pc->temp[i*4+c].hw = -1; pc->temp[i*4+c].index = i; + pc->temp[i*4+c].acc = r_usage[0][i*4+c]; } } } @@ -1427,6 +1547,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) pc->attr[i*4+c].type = at->type; pc->attr[i*4+c].hw = at->hw; pc->attr[i*4+c].index = at->index; + pc->attr[i*4+c].acc = r_usage[1][i*4+c]; } else { pc->p->cfg.vp.attr[aid/32] |= (1 << (aid % 32)); @@ -1504,6 +1625,11 @@ nv50_program_tx_prep(struct nv50_pc *pc) ret = TRUE; out_err: + if (r_usage[0]) + FREE(r_usage[0]); + if (r_usage[1]) + FREE(r_usage[1]); + tgsi_parse_free(&p); return ret; } @@ -1558,6 +1684,7 @@ nv50_program_tx(struct nv50_program *p) switch (tok->Token.Type) { case TGSI_TOKEN_TYPE_INSTRUCTION: + ++pc->insn_cur; ret = nv50_program_tx_insn(pc, tok); if (ret == FALSE) goto out_err; -- cgit v1.2.3