summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/nv50/nv50_pc.c39
-rw-r--r--src/gallium/drivers/nv50/nv50_pc.h3
-rw-r--r--src/gallium/drivers/nv50/nv50_pc_optimize.c4
-rw-r--r--src/gallium/drivers/nv50/nv50_pc_regalloc.c140
-rw-r--r--src/gallium/drivers/nv50/nv50_tgsi_to_nc.c127
5 files changed, 166 insertions, 147 deletions
diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c
index 614982db2d..e32d28a9ce 100644
--- a/src/gallium/drivers/nv50/nv50_pc.c
+++ b/src/gallium/drivers/nv50/nv50_pc.c
@@ -394,7 +394,7 @@ nv_nvi_delete(struct nv_instruction *nvi)
struct nv_basic_block *b = nvi->bb;
int j;
- debug_printf("REM: "); nv_print_instruction(nvi);
+ /* debug_printf("REM: "); nv_print_instruction(nvi); */
for (j = 0; j < 5; ++j)
nv_reference(NULL, &nvi->src[j], NULL);
@@ -477,5 +477,40 @@ nvbb_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d)
for (j = 0; j < b->num_in; ++j)
n += nvbb_dominated_by(b->in[j], d);
- return n && (n == b->num_in);
+ return (n && (n == b->num_in)) ? 1 : 0;
+}
+
+/* check if bf (future) can be reached from bp (past) */
+boolean
+nvbb_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,
+ struct nv_basic_block *bt)
+{
+ if (bf == bp)
+ return TRUE;
+ if (bp == bt)
+ return FALSE;
+
+ if (bp->out[0] && bp->out[0] != bp &&
+ nvbb_reachable_by(bf, bp->out[0], bt))
+ return TRUE;
+ if (bp->out[1] && bp->out[1] != bp &&
+ nvbb_reachable_by(bf, bp->out[1], bt))
+ return TRUE;
+ return FALSE;
+}
+
+struct nv_basic_block *
+nvbb_dom_frontier(struct nv_basic_block *b)
+{
+ struct nv_basic_block *df = b->out[0];
+
+ assert(df);
+ while (nvbb_dominated_by(df, b) ||
+ (!nvbb_dominated_by(df->in[0], b) &&
+ (!df->in[1] || !nvbb_dominated_by(df->in[1], b)))) {
+ df = df->out[0];
+ assert(df);
+ }
+ assert(df);
+ return df;
}
diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h
index 4b191c508a..987043c7a0 100644
--- a/src/gallium/drivers/nv50/nv50_pc.h
+++ b/src/gallium/drivers/nv50/nv50_pc.h
@@ -426,6 +426,9 @@ void nv_nvi_delete(struct nv_instruction *);
void nv_nvi_permute(struct nv_instruction *, struct nv_instruction *);
void nvbb_attach_block(struct nv_basic_block *parent, struct nv_basic_block *);
int nvbb_dominated_by(struct nv_basic_block *, struct nv_basic_block *);
+boolean nvbb_reachable_by(struct nv_basic_block *, struct nv_basic_block *,
+ struct nv_basic_block *);
+struct nv_basic_block *nvbb_dom_frontier(struct nv_basic_block *);
int nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val,
struct nv_value *new_val);
diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c
index 324f8bb2da..f2f8d0eaa3 100644
--- a/src/gallium/drivers/nv50/nv50_pc_optimize.c
+++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c
@@ -771,6 +771,10 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
if (ik->src[4] || ir->src[4])
continue; /* don't mess with address registers */
+ if (ik->flags_src || ir->flags_src ||
+ ik->flags_def || ir->flags_def)
+ continue; /* and also not with flags, for now */
+
for (s = 0; s < 3; ++s) {
struct nv_value *a, *b;
diff --git a/src/gallium/drivers/nv50/nv50_pc_regalloc.c b/src/gallium/drivers/nv50/nv50_pc_regalloc.c
index 941ec9f6f8..172e44f62b 100644
--- a/src/gallium/drivers/nv50/nv50_pc_regalloc.c
+++ b/src/gallium/drivers/nv50/nv50_pc_regalloc.c
@@ -43,25 +43,6 @@ struct nv_pc_pass {
uint pass_seq;
};
-/* check if bf (future) can be reached from bp (past) */
-static boolean
-bb_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,
- struct nv_basic_block *bt)
-{
- if (bf == bp)
- return TRUE;
- if (bp == bt)
- return FALSE;
-
- if (bp->out[0] && bp->out[0] != bp &&
- bb_reachable_by(bf, bp->out[0], bt))
- return TRUE;
- if (bp->out[1] && bp->out[1] != bp &&
- bb_reachable_by(bf, bp->out[1], bt))
- return TRUE;
- return FALSE;
-}
-
static void
ranges_coalesce(struct nv_range *range)
{
@@ -377,32 +358,13 @@ try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
do_join_values(ctx, a, b);
}
-/* For phi functions with sources from blocks that are not direct predecessors,
- * if such a source is to be used in an earlier predecessor, we need to add an
- * additional phi function. Used when inserting the MOVs below.
- */
-static struct nv_value *
-propagate_phi(struct nv_pc *pc, struct nv_instruction *phi, int s)
-{
- struct nv_basic_block *b = pc->current_block;
- struct nv_value *val = phi->src[s]->value;
- struct nv_instruction *nvi = new_instruction(pc, NV_OP_PHI);
- int i, k;
-
- (nvi->def[0] = new_value(pc, val->reg.file, val->reg.type))->insn = nvi;
-
- for (k = 0, i = 0; i < 4 && phi->src[i]; ++i) {
- if (bb_reachable_by(b, phi->src[i]->value->insn->bb, b))
- nvi->src[k++] = new_ref(pc, phi->src[i]->value);
- }
- return nvi->def[0];
-}
-
-/* For IF blocks without ELSE blocks, insert an empty block for the MOVs.
- * Insert additional PHIs for cases where a direct MOV wouldn't be valid.
+/* For each operand of each PHI in b, generate a new value by inserting a MOV
+ * at the end of the block it is coming from and replace the operand with its
+ * result. This eliminates liveness conflicts and enables us to let values be
+ * copied to the right register if such a conflict exists nonetheless.
*/
static int
-pass_generate_phi_movs_1(struct nv_pc_pass *ctx, struct nv_basic_block *b)
+pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
{
struct nv_instruction *i, *ni;
struct nv_value *val;
@@ -426,31 +388,36 @@ pass_generate_phi_movs_1(struct nv_pc_pass *ctx, struct nv_basic_block *b)
if (p->exit->target == b) /* target to new else-block */
p->exit->target = pn;
- for (j = 0; j < b->num_in; ++j) {
- if (b->in[j] == p) {
- b->in[j] = pn;
- break;
- }
- }
+ b->in[n] = pn;
+
pn->out[0] = b;
pn->in[0] = p;
pn->num_in = 1;
}
-
ctx->pc->current_block = pn;
for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
for (j = 0; j < 4 && i->src[j]; ++j) {
- if (bb_reachable_by(pn, i->src[j]->value->insn->bb, b))
+ if (nvbb_reachable_by(p, i->src[j]->value->insn->bb, b))
break;
}
if (j >= 4 || !i->src[j])
continue;
val = i->src[j]->value;
- if (!nvbb_dominated_by(pn, val->insn->bb))
- nv_reference(ctx->pc, &i->src[j], propagate_phi(ctx->pc, i, j));
+ ni = new_instruction(ctx->pc, NV_OP_MOV);
+
+ /* TODO: insert instruction at correct position in the first place */
+ if (ni->prev && ni->prev->target)
+ nv_nvi_permute(ni->prev, ni);
+
+ ni->def[0] = new_value(ctx->pc, val->reg.file, val->reg.type);
+ ni->def[0]->insn = ni;
+ ni->src[0] = new_ref(ctx->pc, val);
+
+ nv_reference(ctx->pc, &i->src[j], ni->def[0]);
}
+
if (pn != p && pn->exit) {
ctx->pc->current_block = b->in[n ? 0 : 1];
ni = new_instruction(ctx->pc, NV_OP_BRA);
@@ -461,70 +428,11 @@ pass_generate_phi_movs_1(struct nv_pc_pass *ctx, struct nv_basic_block *b)
for (j = 0; j < 2; ++j)
if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq)
- pass_generate_phi_movs_1(ctx, b->out[j]);
+ pass_generate_phi_movs(ctx, b->out[j]);
return 0;
}
-/* Now everything should be in order and we can insert the MOVs. */
-static int
-pass_generate_phi_movs_2(struct nv_pc_pass *ctx, struct nv_basic_block *b)
-{
- struct nv_instruction *i, *mov;
- struct nv_value *val;
- struct nv_basic_block *p;
- int n, j;
-
- b->pass_seq = ctx->pc->pass_seq;
-
- for (n = 0; n < b->num_in; ++n) {
- ctx->pc->current_block = p = b->in[n];
-
- for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
- for (j = 0; j < 4 && i->src[j]; ++j) {
- if (bb_reachable_by(p, i->src[j]->value->insn->bb, b))
- break;
- }
- if (j >= 4 || !i->src[j])
- continue;
- val = i->src[j]->value;
-
- mov = new_instruction(ctx->pc, NV_OP_MOV);
-
- /* TODO: insert instruction at correct position in the first place */
- if (mov->prev && mov->prev->target)
- nv_nvi_permute(mov->prev, mov);
-
- mov->def[0] = new_value(ctx->pc, val->reg.file, val->reg.type);
- mov->def[0]->insn = mov;
- mov->src[0] = new_ref(ctx->pc, val);
-
- nv_reference(ctx->pc, &i->src[j], mov->def[0]);
- }
- }
-
- for (j = 1; j >= 0; --j) /* different order for the sake of diversity */
- if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq)
- pass_generate_phi_movs_2(ctx, b->out[j]);
-
- return 0;
-}
-
-/* For each operand of each PHI in b, generate a new value by inserting a MOV
- * at the end of the block it is coming from and replace the operand with its
- * result. This eliminates liveness conflicts and enables us to let values be
- * copied to the right register if such a conflict exists nonetheless.
- */
-static INLINE int
-pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
-{
- if (pass_generate_phi_movs_1(ctx, b))
- return 1;
-
- ++ctx->pc->pass_seq;
- return pass_generate_phi_movs_2(ctx, b);
-}
-
static int
pass_join_values(struct nv_pc_pass *ctx, int iter)
{
@@ -688,7 +596,7 @@ pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b)
break;
assert(i->src[j]->value->insn);
- if (bb_reachable_by(b, i->src[j]->value->insn->bb, b->out[n])) {
+ if (nvbb_reachable_by(b, i->src[j]->value->insn->bb, b->out[n])) {
live_set_add(b, i->src[j]->value);
debug_printf("BB:%i liveset + %i\n", b->id, i->src[j]->value->n);
} else {
@@ -774,7 +682,7 @@ pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b)
if (!i->src[s])
break;
assert(i->src[s]->value->insn);
- if (bb_reachable_by(b, i->src[s]->value->insn->bb, b->out[j]))
+ if (nvbb_reachable_by(b, i->src[s]->value->insn->bb, b->out[j]))
live_set_add(b, i->src[s]->value);
else
live_set_rem(b, i->src[s]->value);
@@ -978,7 +886,7 @@ nv_pc_exec_pass1(struct nv_pc *pc)
nv_print_program(ctx->pc->root);
- ctx->insns = CALLOC(pc->num_instructions, sizeof(struct nv_instruction *));
+ ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *));
pc->pass_seq++;
ret = pass_generate_phi_movs(ctx, pc->root);
diff --git a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
index 8846ef08b5..6a9259c898 100644
--- a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
+++ b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
@@ -51,16 +51,22 @@ struct bld_value_stack {
};
static INLINE void
-bld_push_value(struct bld_value_stack *stk)
+bld_vals_push_val(struct bld_value_stack *stk, struct nv_value *val)
{
- assert(!stk->size || (stk->body[stk->size - 1] != stk->top));
+ assert(!stk->size || (stk->body[stk->size - 1] != val));
if (!(stk->size % 8)) {
unsigned old_sz = (stk->size + 0) * sizeof(struct nv_value *);
unsigned new_sz = (stk->size + 8) * sizeof(struct nv_value *);
stk->body = (struct nv_value **)REALLOC(stk->body, old_sz, new_sz);
}
- stk->body[stk->size++] = stk->top;
+ stk->body[stk->size++] = val;
+}
+
+static INLINE void
+bld_vals_push(struct bld_value_stack *stk)
+{
+ bld_vals_push_val(stk, stk->top);
stk->top = NULL;
}
@@ -72,7 +78,7 @@ bld_push_values(struct bld_value_stack *stacks, int n)
for (i = 0; i < n; ++i)
for (c = 0; c < 4; ++c)
if (stacks[i * 4 + c].top)
- bld_push_value(&stacks[i * 4 + c]);
+ bld_vals_push(&stacks[i * 4 + c]);
}
#define FETCH_TEMP(i, c) (bld->tvs[i][c].top)
@@ -121,6 +127,17 @@ struct bld_context {
uint num_immds;
};
+static INLINE void
+bld_warn_uninitialized(struct bld_context *bld, int kind,
+ struct bld_value_stack *stk, struct nv_basic_block *b)
+{
+ long i = (stk - &bld->tvs[0][0]) / 4;
+ long c = (stk - &bld->tvs[0][0]) & 3;
+
+ debug_printf("WARNING: TEMP[%li].%li %s used uninitialized in BB:%i\n",
+ i, c, kind ? "may be" : "is", b->id);
+}
+
static INLINE struct nv_value *
bld_def(struct nv_instruction *i, int c, struct nv_value *value)
{
@@ -168,42 +185,91 @@ fetch_by_bb(struct bld_value_stack *stack,
fetch_by_bb(stack, vals, n, b->in[i]);
}
+static INLINE struct nv_value *
+bld_load_imm_u32(struct bld_context *bld, uint32_t u);
+
static struct nv_value *
-bld_fetch_global(struct bld_context *bld, struct bld_value_stack *stack)
+bld_phi(struct bld_context *bld, struct nv_basic_block *b,
+ struct bld_value_stack *stack)
{
- struct nv_value *vals[16], *phi = NULL;
- int j, i = 0, n = 0;
+ struct nv_basic_block *in;
+ struct nv_value *vals[16], *val;
+ struct nv_instruction *phi;
+ int i, j, n;
+
+ do {
+ i = n = 0;
+ fetch_by_bb(stack, vals, &n, b);
+
+ if (!n) {
+ bld_warn_uninitialized(bld, 0, stack, b);
+ return NULL;
+ }
- fetch_by_bb(stack, vals, &n, bld->pc->current_block);
+ if (n == 1) {
+ if (nvbb_dominated_by(b, vals[0]->insn->bb))
+ break;
- if (n == 0)
- return NULL;
- if (n == 1)
- return vals[0];
+ bld_warn_uninitialized(bld, 1, stack, b);
+
+ /* back-tracking to insert missing value of other path */
+ in = b;
+ while (in->in[0]) {
+ if (in->num_in == 1) {
+ in = in->in[0];
+ } else {
+ if (!nvbb_reachable_by(in->in[0], vals[0]->insn->bb, b)) {
+ in = in->in[0];
+ break;
+ }
+ if (!nvbb_reachable_by(in->in[1], vals[0]->insn->bb, b)) {
+ in = in->in[1];
+ break;
+ }
+ in = in->in[0];
+ }
+ }
+ bld->pc->current_block = in;
+
+ /* should make this a no-op */
+ bld_vals_push_val(stack, bld_load_imm_u32(bld, 0));
+ continue;
+ }
- debug_printf("phi required: %i candidates\n", n);
+ for (i = 0; i < n; ++i) {
+ if (nvbb_dominated_by(b, vals[i]->insn->bb))
+ continue;
- while (i < n) {
- struct nv_instruction *insn = new_instruction(bld->pc, NV_OP_PHI);
+ for (j = 0; j < b->num_in; ++j)
+ if (nvbb_dominated_by(b->in[j], vals[i]->insn->bb))
+ break;
+ if (j == b->num_in) {
+ in = nvbb_dom_frontier(vals[i]->insn->bb);
+ val = bld_phi(bld, in, stack);
+ bld_vals_push_val(stack, val);
+ break;
+ }
+ }
+ } while(i < n);
- j = phi ? 1 : 0;
- if (phi)
- insn->src[0] = new_ref(bld->pc, phi);
+ bld->pc->current_block = b;
- phi = new_value(bld->pc, vals[0]->reg.file, vals[0]->reg.type);
+ if (n == 1)
+ return vals[0];
- bld_def(insn, 0, phi);
+ phi = new_instruction(bld->pc, NV_OP_PHI);
- for (; j < 4; ++j) {
- insn->src[j] = new_ref(bld->pc, vals[i++]);
- if (i == n)
- break;
- }
- debug_printf("new phi: %i, %i in\n", phi->n, j);
- }
+ bld_def(phi, 0, new_value(bld->pc, vals[0]->reg.file, vals[0]->reg.type));
+ for (i = 0; i < n; ++i)
+ phi->src[i] = new_ref(bld->pc, vals[i]);
- /* insert_at_head(list, phi) is done at end of block */
- return phi;
+ return phi->def[0];
+}
+
+static INLINE struct nv_value *
+bld_fetch_global(struct bld_context *bld, struct bld_value_stack *stack)
+{
+ return bld_phi(bld, bld->pc->current_block, stack);
}
static INLINE struct nv_value *
@@ -640,6 +706,9 @@ bld_new_block(struct bld_context *bld, struct nv_basic_block *b)
for (i = 0; i < 4; ++i)
bld->saved_addr[i][0] = NULL;
+
+ for (i = 0; i < 128; ++i)
+ bld->saved_inputs[i] = NULL;
}
static struct nv_value *