diff options
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_pc.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 23 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_pc_regalloc.c | 37 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 12 |
4 files changed, 43 insertions, 32 deletions
diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h index 01ca95b074..0756288daf 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.h +++ b/src/gallium/drivers/nvc0/nvc0_pc.h @@ -53,7 +53,8 @@ /** * BIND forces source operand i into the same register as destination operand i, - * and the operands will be assigned consecutive registers (needed for TEX) + * and the operands will be assigned consecutive registers (needed for TEX). + * Beware conflicts ! * SELECT forces its multiple source operands and its destination operand into * one and the same register. */ diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index 57bf4b77f3..a6791529fa 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -1147,13 +1147,15 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) /* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy * neighbouring registers. CSE might have messed this up. + * Just generate a MOV for each source to avoid conflicts if they're used in + * multiple NV_OP_BIND at different positions. */ static int nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b) { struct nv_value *val; struct nv_instruction *bnd, *nvi, *next; - int s, t; + int s; for (bnd = b->entry; bnd; bnd = next) { next = bnd->next; @@ -1161,20 +1163,17 @@ nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b) continue; for (s = 0; s < 4 && bnd->src[s]; ++s) { val = bnd->src[s]->value; - for (t = s + 1; t < 4 && bnd->src[t]; ++t) { - if (bnd->src[t]->value != val) - continue; - nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV); - nvi->def[0] = new_value_like(ctx->pc, val); - nvi->def[0]->insn = nvi; - nv_reference(ctx->pc, nvi, 0, val); - nvc0_insn_insert_before(bnd, nvi); - nv_reference(ctx->pc, bnd, t, nvi->def[0]); - } + nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV); + nvi->def[0] = new_value_like(ctx->pc, val); + nvi->def[0]->insn = nvi; + nv_reference(ctx->pc, nvi, 0, val); + nv_reference(ctx->pc, bnd, s, nvi->def[0]); + + nvc0_insn_insert_before(bnd, nvi); } } - DESCEND_ARBITRARY(t, nv_pass_fix_bind); + DESCEND_ARBITRARY(s, nv_pass_fix_bind); return 0; } diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c index d24f09a150..ee28268006 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c @@ -477,7 +477,7 @@ pass_join_values(struct nv_pc_pass *ctx, int iter) break; case NV_OP_MOV: if ((iter == 2) && i->src[0]->value->insn && - !nv_is_texture_op(i->src[0]->value->join->insn->opcode)) + !nv_is_vector_op(i->src[0]->value->join->insn->opcode)) try_join_values(ctx, i->def[0], i->src[0]->value); break; case NV_OP_SELECT: @@ -488,18 +488,16 @@ pass_join_values(struct nv_pc_pass *ctx, int iter) do_join_values(ctx, i->def[0], i->src[c]->value); } break; - case NV_OP_TEX: - case NV_OP_TXB: - case NV_OP_TXL: - case NV_OP_TXQ: - /* on nvc0, TEX src and dst can differ */ - break; case NV_OP_BIND: if (iter) break; - for (c = 0; c < 6 && i->src[c]; ++c) + for (c = 0; c < 4 && i->src[c]; ++c) do_join_values(ctx, i->def[c], i->src[c]->value); break; + case NV_OP_TEX: + case NV_OP_TXB: + case NV_OP_TXL: + case NV_OP_TXQ: /* on nvc0, TEX src and dst can differ */ default: break; } @@ -730,6 +728,21 @@ nvc0_ctor_register_set(struct nv_pc *pc, struct register_set *set) set->pc = pc; } +/* We allocate registers for all defs of a vector instruction at once. + * Since we'll encounter all of them in the allocation loop, do the allocation + * when we're at the one with the live range that starts latest. + */ +static boolean +is_best_representative(struct nv_value *val) +{ + struct nv_instruction *nvi = val->insn; + int i; + for (i = 0; i < 4 && val->insn->def[i]; ++i) + if (nvi->def[i]->livei && nvi->def[i]->livei->bgn > val->livei->bgn) + return FALSE; + return TRUE; +} + static void insert_ordered_tail(struct nv_value *list, struct nv_value *nval) { @@ -821,11 +834,13 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter) boolean mem = FALSE; int v = nvi_vector_size(cur->insn); - if (v > 1) - mem = !reg_assign(&f, &cur->insn->def[0], v); - else + if (v > 1) { + if (is_best_representative(cur)) + mem = !reg_assign(&f, &cur->insn->def[0], v); + } else { if (iter) mem = !reg_assign(&f, &cur, 1); + } if (mem) { NOUVEAU_ERR("out of registers\n"); diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 9b5d429078..f53af6c49c 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -1333,10 +1333,6 @@ emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc, if (array) arg[dim] = bld_cvt(bld, NV_TYPE_U32, NV_TYPE_F32, arg[dim]); - /* ensure that all inputs reside in a GPR */ - for (c = 0; c < dim + array + cube + shadow; ++c) - (src[c] = bld_insn_1(bld, NV_OP_MOV, arg[c]))->insn->fixed = 1; - /* bind { layer x y z } and { lod/bias shadow } to adjacent regs */ bnd = new_instruction(bld->pc, NV_OP_BIND); @@ -1878,10 +1874,10 @@ bld_instruction(struct bld_context *bld, } for (c = 0; c < 4; ++c) - if ((mask & (1 << c)) && - ((dst0[c]->reg.file == NV_FILE_IMM) || - (dst0[c]->reg.id == 63 && dst0[c]->reg.file == NV_FILE_GPR))) - dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]); + if (mask & (1 << c)) + if ((dst0[c]->reg.file == NV_FILE_IMM) || + (dst0[c]->reg.file == NV_FILE_GPR && dst0[c]->reg.id == 63)) + dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]); c = 0; if ((mask & 0x3) == 0x3) { |