summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc.h3
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc_optimize.c23
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc_regalloc.c37
-rw-r--r--src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c12
4 files changed, 43 insertions, 32 deletions
diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h
index 01ca95b074..0756288daf 100644
--- a/src/gallium/drivers/nvc0/nvc0_pc.h
+++ b/src/gallium/drivers/nvc0/nvc0_pc.h
@@ -53,7 +53,8 @@
/**
* BIND forces source operand i into the same register as destination operand i,
- * and the operands will be assigned consecutive registers (needed for TEX)
+ * and the operands will be assigned consecutive registers (needed for TEX).
+ * Beware conflicts !
* SELECT forces its multiple source operands and its destination operand into
* one and the same register.
*/
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c
index 57bf4b77f3..a6791529fa 100644
--- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c
+++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c
@@ -1147,13 +1147,15 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
/* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy
* neighbouring registers. CSE might have messed this up.
+ * Just generate a MOV for each source to avoid conflicts if they're used in
+ * multiple NV_OP_BIND at different positions.
*/
static int
nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b)
{
struct nv_value *val;
struct nv_instruction *bnd, *nvi, *next;
- int s, t;
+ int s;
for (bnd = b->entry; bnd; bnd = next) {
next = bnd->next;
@@ -1161,20 +1163,17 @@ nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b)
continue;
for (s = 0; s < 4 && bnd->src[s]; ++s) {
val = bnd->src[s]->value;
- for (t = s + 1; t < 4 && bnd->src[t]; ++t) {
- if (bnd->src[t]->value != val)
- continue;
- nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV);
- nvi->def[0] = new_value_like(ctx->pc, val);
- nvi->def[0]->insn = nvi;
- nv_reference(ctx->pc, nvi, 0, val);
- nvc0_insn_insert_before(bnd, nvi);
- nv_reference(ctx->pc, bnd, t, nvi->def[0]);
- }
+ nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV);
+ nvi->def[0] = new_value_like(ctx->pc, val);
+ nvi->def[0]->insn = nvi;
+ nv_reference(ctx->pc, nvi, 0, val);
+ nv_reference(ctx->pc, bnd, s, nvi->def[0]);
+
+ nvc0_insn_insert_before(bnd, nvi);
}
}
- DESCEND_ARBITRARY(t, nv_pass_fix_bind);
+ DESCEND_ARBITRARY(s, nv_pass_fix_bind);
return 0;
}
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c
index d24f09a150..ee28268006 100644
--- a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c
+++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c
@@ -477,7 +477,7 @@ pass_join_values(struct nv_pc_pass *ctx, int iter)
break;
case NV_OP_MOV:
if ((iter == 2) && i->src[0]->value->insn &&
- !nv_is_texture_op(i->src[0]->value->join->insn->opcode))
+ !nv_is_vector_op(i->src[0]->value->join->insn->opcode))
try_join_values(ctx, i->def[0], i->src[0]->value);
break;
case NV_OP_SELECT:
@@ -488,18 +488,16 @@ pass_join_values(struct nv_pc_pass *ctx, int iter)
do_join_values(ctx, i->def[0], i->src[c]->value);
}
break;
- case NV_OP_TEX:
- case NV_OP_TXB:
- case NV_OP_TXL:
- case NV_OP_TXQ:
- /* on nvc0, TEX src and dst can differ */
- break;
case NV_OP_BIND:
if (iter)
break;
- for (c = 0; c < 6 && i->src[c]; ++c)
+ for (c = 0; c < 4 && i->src[c]; ++c)
do_join_values(ctx, i->def[c], i->src[c]->value);
break;
+ case NV_OP_TEX:
+ case NV_OP_TXB:
+ case NV_OP_TXL:
+ case NV_OP_TXQ: /* on nvc0, TEX src and dst can differ */
default:
break;
}
@@ -730,6 +728,21 @@ nvc0_ctor_register_set(struct nv_pc *pc, struct register_set *set)
set->pc = pc;
}
+/* We allocate registers for all defs of a vector instruction at once.
+ * Since we'll encounter all of them in the allocation loop, do the allocation
+ * when we're at the one with the live range that starts latest.
+ */
+static boolean
+is_best_representative(struct nv_value *val)
+{
+ struct nv_instruction *nvi = val->insn;
+ int i;
+ for (i = 0; i < 4 && val->insn->def[i]; ++i)
+ if (nvi->def[i]->livei && nvi->def[i]->livei->bgn > val->livei->bgn)
+ return FALSE;
+ return TRUE;
+}
+
static void
insert_ordered_tail(struct nv_value *list, struct nv_value *nval)
{
@@ -821,11 +834,13 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter)
boolean mem = FALSE;
int v = nvi_vector_size(cur->insn);
- if (v > 1)
- mem = !reg_assign(&f, &cur->insn->def[0], v);
- else
+ if (v > 1) {
+ if (is_best_representative(cur))
+ mem = !reg_assign(&f, &cur->insn->def[0], v);
+ } else {
if (iter)
mem = !reg_assign(&f, &cur, 1);
+ }
if (mem) {
NOUVEAU_ERR("out of registers\n");
diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
index 9b5d429078..f53af6c49c 100644
--- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
+++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
@@ -1333,10 +1333,6 @@ emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc,
if (array)
arg[dim] = bld_cvt(bld, NV_TYPE_U32, NV_TYPE_F32, arg[dim]);
- /* ensure that all inputs reside in a GPR */
- for (c = 0; c < dim + array + cube + shadow; ++c)
- (src[c] = bld_insn_1(bld, NV_OP_MOV, arg[c]))->insn->fixed = 1;
-
/* bind { layer x y z } and { lod/bias shadow } to adjacent regs */
bnd = new_instruction(bld->pc, NV_OP_BIND);
@@ -1878,10 +1874,10 @@ bld_instruction(struct bld_context *bld,
}
for (c = 0; c < 4; ++c)
- if ((mask & (1 << c)) &&
- ((dst0[c]->reg.file == NV_FILE_IMM) ||
- (dst0[c]->reg.id == 63 && dst0[c]->reg.file == NV_FILE_GPR)))
- dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]);
+ if (mask & (1 << c))
+ if ((dst0[c]->reg.file == NV_FILE_IMM) ||
+ (dst0[c]->reg.file == NV_FILE_GPR && dst0[c]->reg.id == 63))
+ dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]);
c = 0;
if ((mask & 0x3) == 0x3) {