summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/nv50/nv50_pc_regalloc.c
diff options
context:
space:
mode:
authorChristoph Bumiller <e0425955@student.tuwien.ac.at>2010-07-23 21:21:25 +0200
committerChristoph Bumiller <e0425955@student.tuwien.ac.at>2010-07-23 21:35:00 +0200
commit633f5ac6124b1b57152c09becba92d176e905ae9 (patch)
treec3c48494660dd514a171c1efdd989462c0efff4c /src/gallium/drivers/nv50/nv50_pc_regalloc.c
parentc65f4fd5ae2ba4ac36d9bd86cdc492df0f1da1b3 (diff)
nv50: import new compiler
Diffstat (limited to 'src/gallium/drivers/nv50/nv50_pc_regalloc.c')
-rw-r--r--src/gallium/drivers/nv50/nv50_pc_regalloc.c973
1 files changed, 973 insertions, 0 deletions
diff --git a/src/gallium/drivers/nv50/nv50_pc_regalloc.c b/src/gallium/drivers/nv50/nv50_pc_regalloc.c
new file mode 100644
index 0000000000..eb446d641a
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_pc_regalloc.c
@@ -0,0 +1,973 @@
+/*
+ * XXX: phi function live intervals start at first ordinary instruction,
+ * add_range should be taking care of that already ...
+ *
+ * XXX: TEX must choose TEX's def as representative
+ *
+ * XXX: Aieee! Must materialize MOVs if source is in other basic block!
+ * -- absolutely, or we cannot execute the MOV conditionally at all
+ * XXX: Aieee! Must include PHIs in LVA so we pull through liveness if
+ * PHI source is e.g. in dominator block.
+ * -- seems we lose liveness somehow, track that
+ */
+
+#include "nv50_context.h"
+#include "nv50_pc.h"
+
+#include "util/u_simple_list.h"
+
+#define NUM_REGISTER_FILES 4
+
+struct register_set {
+ struct nv_pc *pc;
+
+ uint32_t last[NUM_REGISTER_FILES];
+ uint32_t bits[NUM_REGISTER_FILES][8];
+};
+
+struct nv_pc_pass {
+ struct nv_pc *pc;
+
+ struct nv_instruction **insns;
+ int num_insns;
+
+ uint pass_seq;
+};
+
+static void
+ranges_coalesce(struct nv_range *range)
+{
+ while (range->next && range->end >= range->next->bgn) {
+ struct nv_range *rnn = range->next->next;
+ assert(range->bgn <= range->next->bgn);
+ range->end = MAX2(range->end, range->next->end);
+ FREE(range->next);
+ range->next = rnn;
+ }
+}
+
+static boolean
+add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range)
+{
+ struct nv_range *range, **nextp = &val->livei;
+
+ for (range = val->livei; range; range = range->next) {
+ if (end < range->bgn)
+ break; /* insert before */
+
+ if (bgn > range->end) {
+ nextp = &range->next;
+ continue; /* insert after */
+ }
+
+ /* overlap */
+ if (bgn < range->bgn) {
+ range->bgn = bgn;
+ if (end > range->end)
+ range->end = end;
+ ranges_coalesce(range);
+ return TRUE;
+ }
+ if (end > range->end) {
+ range->end = end;
+ ranges_coalesce(range);
+ return TRUE;
+ }
+ assert(bgn >= range->bgn);
+ assert(end <= range->end);
+ return TRUE;
+ }
+
+ if (!new_range)
+ new_range = CALLOC_STRUCT(nv_range);
+
+ new_range->bgn = bgn;
+ new_range->end = end;
+ new_range->next = range;
+ *(nextp) = new_range;
+ return FALSE;
+}
+
+static void
+add_range(struct nv_value *val, struct nv_basic_block *b, int end)
+{
+ int bgn;
+
+ if (!val->insn) /* ignore non-def values */
+ return;
+ assert(b->entry->serial <= b->exit->serial);
+ assert(b->phi->serial <= end);
+ assert(b->exit->serial + 1 >= end);
+
+ bgn = val->insn->serial;
+ if (bgn < b->entry->serial || bgn > b->exit->serial)
+ bgn = b->entry->serial;
+ // debug_printf("add_range(value %i): [%i, %i)\n", val->n, bgn, end);
+
+ if (bgn > end) {
+ debug_printf("Aieee! BLOCK [%i, %i], RANGE [%i, %i)\n",
+ b->entry->serial, b->exit->serial, bgn, end);
+ }
+ assert(bgn <= end);
+
+ if (bgn < val->insn->serial)
+ debug_printf("WARNING: leaking value %i ?\n", val->n);
+
+ add_range_ex(val, bgn, end, NULL);
+}
+
+#ifdef NV50_RA_DEBUG_JOIN
+static void
+livei_print(struct nv_value *a)
+{
+ struct nv_range *r = a->livei;
+
+ debug_printf("livei %i: ", a->n);
+ while (r) {
+ debug_printf("[%i, %i) ", r->bgn, r->end);
+ r = r->next;
+ }
+ debug_printf("\n");
+}
+#endif
+
+static void
+livei_unify(struct nv_value *dst, struct nv_value *src)
+{
+ struct nv_range *range, *next;
+
+ for (range = src->livei; range; range = next) {
+ next = range->next;
+ if (add_range_ex(dst, range->bgn, range->end, range))
+ FREE(range);
+ }
+ src->livei = NULL;
+}
+
+static void
+livei_release(struct nv_value *val)
+{
+ struct nv_range *range, *next;
+
+ for (range = val->livei; range; range = next) {
+ next = range->next;
+ FREE(range);
+ }
+}
+
+static boolean
+livei_have_overlap(struct nv_value *a, struct nv_value *b)
+{
+ struct nv_range *r_a, *r_b;
+
+ for (r_a = a->livei; r_a; r_a = r_a->next) {
+ for (r_b = b->livei; r_b; r_b = r_b->next) {
+ if (r_b->bgn < r_a->end &&
+ r_b->end > r_a->bgn)
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+static int
+livei_end(struct nv_value *a)
+{
+ struct nv_range *r = a->livei;
+
+ assert(r);
+ while (r->next)
+ r = r->next;
+ return r->end;
+}
+
+static boolean
+livei_contains(struct nv_value *a, int pos)
+{
+ struct nv_range *r;
+
+ for (r = a->livei; r && r->bgn <= pos; r = r->next)
+ if (r->end > pos)
+ return TRUE;
+ return FALSE;
+}
+
+static boolean
+reg_assign(struct register_set *set, struct nv_value **def, int n)
+{
+ int i, id, s;
+ uint m;
+ int f = def[0]->reg.file;
+
+ s = n << (nv_type_order(def[0]->reg.type) - 1);
+ m = (1 << s) - 1;
+
+ id = set->last[f];
+
+ for (i = 0; i * 32 < set->last[f]; ++i) {
+ if (set->bits[f][i] == 0xffffffff)
+ continue;
+
+ for (id = 0; id < 32; id += s)
+ if (!(set->bits[f][i] & (m << id)))
+ break;
+ if (id < 32)
+ break;
+ }
+ if (i * 32 + id > set->last[f])
+ return FALSE;
+
+ set->bits[f][i] |= m << id;
+
+ id += i * 32;
+
+ set->pc->max_reg[f] = MAX2(set->pc->max_reg[f], id + s - 1);
+
+ id >>= nv_type_order(def[0]->reg.type) - 1;
+
+ for (i = 0; i < n; ++i)
+ if (def[i]->livei)
+ def[i]->reg.id = id++;
+
+ return TRUE;
+}
+
+static INLINE void
+reg_occupy(struct register_set *set, struct nv_value *val)
+{
+ int s, id = val->reg.id, f = val->reg.file;
+ uint m;
+
+ if (id < 0)
+ return;
+ s = nv_type_order(val->reg.type) - 1;
+ id <<= s;
+ m = (1 << (1 << s)) - 1;
+
+ set->bits[f][id / 32] |= m << (id % 32);
+
+ if (set->pc->max_reg[f] < id)
+ set->pc->max_reg[f] = id;
+}
+
+static INLINE void
+reg_release(struct register_set *set, struct nv_value *val)
+{
+ int s, id = val->reg.id, f = val->reg.file;
+ uint m;
+
+ if (id < 0)
+ return;
+
+ s = nv_type_order(val->reg.type) - 1;
+ id <<= s;
+ m = (1 << (1 << s)) - 1;
+
+ set->bits[f][id / 32] &= ~(m << (id % 32));
+}
+
+static INLINE boolean
+join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
+{
+ int i;
+ struct nv_value *val;
+
+ if (a->reg.file != b->reg.file ||
+ nv_type_sizeof(a->reg.type) != nv_type_sizeof(b->reg.type))
+ return FALSE;
+
+ if (a->join->reg.id == b->join->reg.id)
+ return TRUE;
+
+#if 1
+ /* either a or b or both have been assigned */
+
+ if (a->join->reg.id >= 0 && b->join->reg.id >= 0)
+ return FALSE;
+ else
+ if (b->join->reg.id >= 0) {
+ if (a->join->reg.id >= 0)
+ return FALSE;
+ val = a;
+ a = b;
+ b = val;
+ }
+
+ for (i = 0; i < ctx->pc->num_values; ++i) {
+ val = &ctx->pc->values[i];
+
+ if (val->join->reg.id != a->join->reg.id)
+ continue;
+ if (val->join != a->join && livei_have_overlap(val->join, b->join))
+ return FALSE;
+ }
+ return TRUE;
+#endif
+ return FALSE;
+}
+
+static INLINE void
+do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
+{
+ int j;
+ struct nv_value *bjoin = b->join;
+
+ if (b->join->reg.id >= 0)
+ a->join->reg.id = b->join->reg.id;
+
+ livei_unify(a->join, b->join);
+
+#ifdef NV50_RA_DEBUG_JOIN
+ debug_printf("joining %i to %i\n", b->n, a->n);
+#endif
+
+ /* make a->join the new representative */
+ for (j = 0; j < ctx->pc->num_values; ++j)
+ if (ctx->pc->values[j].join == bjoin)
+ ctx->pc->values[j].join = a->join;
+
+ assert(b->join == a->join);
+}
+
+static INLINE void
+try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
+{
+ if (!join_allowed(ctx, a, b)) {
+#ifdef NV50_RA_DEBUG_JOIN
+ debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n);
+#endif
+ return;
+ }
+ if (livei_have_overlap(a->join, b->join)) {
+#ifdef NV50_RA_DEBUG_JOIN
+ debug_printf("cannot join %i to %i: livei overlap\n", b->n, a->n);
+ livei_print(a);
+ livei_print(b);
+#endif
+ return;
+ }
+
+ do_join_values(ctx, a, b);
+}
+
+/* For each operand of each phi in b, generate a new value by inserting a MOV
+ * at the end of the block it is coming from and replace the operand with it.
+ * This eliminates liveness conflicts.
+ */
+static int
+pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *i, *i2;
+ struct nv_basic_block *p, *pn;
+ struct nv_value *val;
+ int n, j;
+
+ b->pass_seq = ctx->pc->pass_seq;
+
+ for (n = 0; n < b->num_in; ++n) {
+ p = b->in[n];
+ assert(p);
+
+ if (b->num_in > 1 && p->out[0] && p->out[1]) { /* if without else */
+ pn = new_basic_block(ctx->pc);
+
+ if (p->out[0] == b)
+ p->out[0] = pn;
+ else
+ p->out[1] = pn;
+
+ if (p->exit->target == b) /* target to new else-block */
+ p->exit->target = pn;
+
+ for (j = 0; j < b->num_in; ++j) {
+ if (b->in[j] == p) {
+ b->in[j] = pn;
+ break;
+ }
+ }
+
+ pn->out[0] = b;
+ pn->in[0] = p;
+ pn->num_in = 1;
+ } else
+ pn = p;
+
+ ctx->pc->current_block = pn;
+
+ /* every block with PHIs will also have other operations */
+ for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
+ for (j = 0; j < 4; ++j) {
+ if (!i->src[j])
+ j = 3;
+ else
+ if (i->src[j]->value->insn->bb == p)
+ break;
+ }
+ if (j >= 4)
+ continue;
+ assert(i->src[j]);
+ val = i->src[j]->value;
+
+ /* XXX: should probably not insert this after terminator */
+ i2 = new_instruction(ctx->pc, NV_OP_MOV);
+
+ i2->def[0] = new_value(ctx->pc, val->reg.file, val->reg.type);
+ i2->src[0] = new_ref (ctx->pc, val);
+ i2->def[0]->insn = i2;
+
+ nv_reference(ctx->pc, &i->src[j], i2->def[0]);
+ }
+ if (pn != p && pn->exit) {
+ /* XXX: this branch should probably be eliminated */
+ ctx->pc->current_block = b->in[n ? 0 : 1];
+ i2 = new_instruction(ctx->pc, NV_OP_BRA);
+ i2->target = b;
+ i2->is_terminator = 1;
+ }
+ }
+
+ if (b->out[0] && b->out[0]->pass_seq < ctx->pc->pass_seq) {
+ pass_generate_phi_movs(ctx, b->out[0]);
+ }
+
+ if (b->out[1] && b->out[1]->pass_seq < ctx->pc->pass_seq) {
+ pass_generate_phi_movs(ctx, b->out[1]);
+ }
+
+ return 0;
+}
+
+static int
+pass_join_values(struct nv_pc_pass *ctx, int iter)
+{
+ int c, n;
+
+ for (n = 0; n < ctx->num_insns; ++n) {
+ struct nv_instruction *i = ctx->insns[n];
+
+ switch (i->opcode) {
+ case NV_OP_PHI:
+ if (!iter)
+ continue;
+ try_join_values(ctx, i->src[0]->value, i->src[1]->value);
+ try_join_values(ctx, i->def[0], i->src[0]->value);
+ break;
+ case NV_OP_MOV:
+ if (iter && i->src[0]->value->insn &&
+ !nv_is_vector_op(i->src[0]->value->join->insn->opcode))
+ try_join_values(ctx, i->def[0], i->src[0]->value);
+ break;
+ case NV_OP_SELECT:
+ if (!iter)
+ break;
+ assert(join_allowed(ctx, i->def[0], i->src[0]->value));
+ assert(join_allowed(ctx, i->def[0], i->src[1]->value));
+ do_join_values(ctx, i->def[0], i->src[0]->value);
+ do_join_values(ctx, i->def[0], i->src[1]->value);
+ break;
+ case NV_OP_TEX:
+ case NV_OP_TXB:
+ case NV_OP_TXL:
+ case NV_OP_TXQ:
+ if (iter)
+ break;
+ for (c = 0; c < 4; ++c) {
+ if (!i->src[c])
+ break;
+ do_join_values(ctx, i->def[c], i->src[c]->value);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ return 0;
+}
+
+static int
+pass_order_instructions(struct nv_pc_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *i;
+
+ b->priv = 0;
+
+ assert(!b->exit || !b->exit->next);
+ for (i = b->phi; i; i = i->next) {
+ i->serial = ctx->num_insns;
+ ctx->insns[ctx->num_insns++] = i;
+ }
+
+ b->pass_seq = ctx->pc->pass_seq;
+
+ if (!b->out[0])
+ return 0;
+ if (!b->out[1] && ++(b->out[0]->priv) != b->out[0]->num_in)
+ return 0;
+
+ if (b->out[0] != b)
+ pass_order_instructions(ctx, b->out[0]);
+ if (b->out[1] && b->out[1] != b)
+ pass_order_instructions(ctx, b->out[1]);
+
+ return 0;
+}
+
+static void
+bb_live_set_print(struct nv_pc *pc, struct nv_basic_block *b)
+{
+#ifdef NV50_RA_DEBUG_LIVE_SETS
+ int j;
+ struct nv_value *val;
+
+ debug_printf("live_set of %p: ", b);
+
+ for (j = 0; j < pc->num_values; ++j) {
+ if (!(b->live_set[j / 32] & (1 << (j % 32))))
+ continue;
+ val = &pc->values[j];
+ if (!val->insn)
+ continue;
+ debug_printf("%i ", val->n);
+ }
+ debug_printf("\n");
+#endif
+}
+
+static INLINE void
+live_set_add(struct nv_basic_block *b, struct nv_value *val)
+{
+ if (!val->insn) /* don't add non-def values */
+ return;
+ /* debug_printf("live[%p] <- %i\n", b, val->n); */
+
+ b->live_set[val->n / 32] |= 1 << (val->n % 32);
+}
+
+static INLINE void
+live_set_rem(struct nv_basic_block *b, struct nv_value *val)
+{
+ /* if (val->insn)
+ debug_printf("live[%p] -> %i\n", b, val->n); */
+ b->live_set[val->n / 32] &= ~(1 << (val->n % 32));
+}
+
+static INLINE boolean
+live_set_test(struct nv_basic_block *b, struct nv_ref *ref)
+{
+ int n = ref->value->n;
+ return b->live_set[n / 32] & (1 << (n % 32));
+}
+
+/* check if bf (future) can be reached from bp (past) */
+static boolean
+bb_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,
+ struct nv_basic_block *bt)
+{
+ if (bf == bp)
+ return TRUE;
+ if (bp == bt)
+ return FALSE;
+
+ if (bp->out[0] && bp->out[0] != bp &&
+ bb_reachable_by(bf, bp->out[0], bt))
+ return TRUE;
+ if (bp->out[1] && bp->out[1] != bp &&
+ bb_reachable_by(bf, bp->out[1], bt))
+ return TRUE;
+ return FALSE;
+}
+
+/* The live set of a block contains those values that are live immediately
+ * before the beginning of the block.
+ */
+static int
+pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *i;
+ int j, n, ret = 0;
+
+ /* slight hack for undecidedness: set phi = entry if it's undefined */
+ if (!b->phi)
+ b->phi = b->entry;
+
+ for (n = 0; n < 2; ++n) {
+ if (!b->out[n] || b->out[n] == b)
+ continue;
+ ret = pass_build_live_sets(ctx, b->out[n]);
+ if (ret)
+ return ret;
+
+ if (n == 0) {
+ for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
+ b->live_set[j] = b->out[n]->live_set[j];
+ } else {
+ for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
+ b->live_set[j] |= b->out[n]->live_set[j];
+ }
+
+ /* Kick values out of our live set that are created in incoming
+ * blocks of our successors that are not us.
+ */
+ for (i = b->out[n]->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
+ for (j = 0; j < 4; ++j) {
+ if (!i->src[j])
+ break;
+ assert(i->src[j]->value->insn);
+
+ if (bb_reachable_by(b, i->src[j]->value->insn->bb, b->out[n])) {
+ live_set_add(b, i->src[j]->value);
+ debug_printf("%p: live set + %i\n", b, i->src[j]->value->n);
+ } else {
+ live_set_rem(b, i->src[j]->value);
+ debug_printf("%p: live set - %i\n", b, i->src[j]->value->n);
+ }
+ }
+ }
+ }
+
+ if (b->pass_seq >= ctx->pc->pass_seq)
+ return 0;
+ b->pass_seq = ctx->pc->pass_seq;
+
+ debug_printf("%s: visiting block %p\n", __FUNCTION__, b);
+
+ if (!b->entry)
+ return 0;
+ bb_live_set_print(ctx->pc, b);
+
+ for (i = b->exit; i; i = i->prev) {
+ for (j = 0; j < 4; j++) {
+ if (!i->def[j])
+ break;
+ live_set_rem(b, i->def[j]);
+ }
+ for (j = 0; j < 4; j++) {
+ if (!i->src[j])
+ break;
+ live_set_add(b, i->src[j]->value);
+ }
+ if (i->src[4])
+ live_set_add(b, i->src[4]->value);
+ if (i->flags_def)
+ live_set_rem(b, i->flags_def);
+ if (i->flags_src)
+ live_set_add(b, i->flags_src->value);
+ }
+ bb_live_set_print(ctx->pc, b);
+
+ return 0;
+}
+
+static void collect_live_values(struct nv_basic_block *b, const int n)
+{
+ int i;
+
+ if (b->out[0]) {
+ if (b->out[1]) { /* what to do about back-edges ? */
+ for (i = 0; i < n; ++i)
+ b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i];
+ } else {
+ memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t));
+ }
+ } else
+ if (b->out[1]) {
+ memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t));
+ } else {
+ memset(b->live_set, 0, n * sizeof(uint32_t));
+ }
+}
+
+/* NOTE: the live intervals of phi functions start the the first non-phi instruction */
+static int
+pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *i, *i_stop;
+ int j, s;
+ const int n = (ctx->pc->num_values + 31) / 32;
+
+ debug_printf("building intervals for BB %i\n", b->id);
+
+ /* verify that first block does not have live-in values */
+ if (b->num_in == 0)
+ for (j = 0; j < n; ++j)
+ assert(b->live_set[j] == 0);
+
+ collect_live_values(b, n);
+
+ /* remove live-outs def'd in a parallel block, hopefully they're all phi'd */
+ for (j = 0; j < 2; ++j) {
+ if (!b->out[j] || !b->out[j]->phi)
+ continue;
+ for (i = b->out[j]->phi; i->opcode == NV_OP_PHI; i = i->next) {
+ live_set_rem(b, i->def[0]);
+
+ for (s = 0; s < 4; ++s) {
+ if (!i->src[s])
+ break;
+ assert(i->src[s]->value->insn);
+ if (bb_reachable_by(b, i->src[s]->value->insn->bb, b->out[j]))
+ live_set_add(b, i->src[s]->value);
+ else
+ live_set_rem(b, i->src[s]->value);
+ }
+ }
+ }
+
+ /* remaining live-outs are live until the end */
+ for (j = 0; j < ctx->pc->num_values; ++j) {
+ if (!(b->live_set[j / 32] & (1 << (j % 32))))
+ continue;
+#ifdef NV50_RA_DEBUG_LIVEI
+ debug_printf("adding range for live value %i\n", j);
+#endif
+ add_range(&ctx->pc->values[j], b, b->exit->serial + 1);
+ }
+ debug_printf("%s: looping through instructions now\n", __func__);
+
+ i_stop = b->entry ? b->entry->prev : NULL;
+
+ /* don't have to include phi functions here (will have 0 live range) */
+ for (i = b->exit; i != i_stop; i = i->prev) {
+ assert(i->serial >= b->phi->serial && i->serial <= b->exit->serial);
+ for (j = 0; j < 4; ++j) {
+ if (i->def[j])
+ live_set_rem(b, i->def[j]);
+ }
+ if (i->flags_def)
+ live_set_rem(b, i->flags_def);
+
+ for (j = 0; j < 5; ++j) {
+ if (i->src[j] && !live_set_test(b, i->src[j])) {
+ live_set_add(b, i->src[j]->value);
+#ifdef NV50_RA_DEBUG_LIVEI
+ debug_printf("adding range for source that ends living: %i\n",
+ i->src[j]->value->n);
+#endif
+ add_range(i->src[j]->value, b, i->serial);
+ }
+ }
+ if (i->flags_src && !live_set_test(b, i->flags_src)) {
+ live_set_add(b, i->flags_src->value);
+#ifdef NV50_RA_DEBUG_LIVEI
+ debug_printf("adding range for source that ends living: %i\n",
+ i->flags_src->value->n);
+#endif
+ add_range(i->flags_src->value, b, i->serial);
+ }
+ }
+
+ b->pass_seq = ctx->pc->pass_seq;
+
+ if (b->out[0] && b->out[0]->pass_seq < ctx->pc->pass_seq)
+ pass_build_intervals(ctx, b->out[0]);
+
+ if (b->out[1] && b->out[1]->pass_seq < ctx->pc->pass_seq)
+ pass_build_intervals(ctx, b->out[1]);
+
+ debug_printf("built intervals for block %p\n", b);
+
+ return 0;
+}
+
+static INLINE void
+nv50_ctor_register_set(struct nv_pc *pc, struct register_set *set)
+{
+ memset(set, 0, sizeof(*set));
+
+ set->last[NV_FILE_GPR] = 255;
+ set->last[NV_FILE_OUT] = 127;
+ set->last[NV_FILE_FLAGS] = 4;
+ set->last[NV_FILE_ADDR] = 4;
+
+ set->pc = pc;
+}
+
+static void
+insert_ordered_tail(struct nv_value *list, struct nv_value *nval)
+{
+ struct nv_value *elem = list->prev;
+
+ // debug_printf("inserting value %i\n", nval->n);
+
+ for (elem = list->prev;
+ elem != list && elem->livei->bgn > nval->livei->bgn;
+ elem = elem->prev);
+ /* now elem begins before or at the same time as val */
+
+ nval->prev = elem;
+ nval->next = elem->next;
+ elem->next->prev = nval;
+ elem->next = nval;
+}
+
+static int
+pass_linear_scan(struct nv_pc_pass *ctx, int iter)
+{
+ struct nv_instruction *i;
+ struct register_set f, free;
+ int k, n;
+ struct nv_value *cur, *val, *tmp[2];
+ struct nv_value active, inactive, handled, unhandled;
+
+ make_empty_list(&active);
+ make_empty_list(&inactive);
+ make_empty_list(&handled);
+ make_empty_list(&unhandled);
+
+ nv50_ctor_register_set(ctx->pc, &free);
+
+ /* joined values should have range = NULL and thus not be added;
+ * also, fixed memory values won't be added because they're not
+ * def'd, just used
+ */
+ for (n = 0; n < ctx->num_insns; ++n) {
+ i = ctx->insns[n];
+
+ for (k = 0; k < 4; ++k) {
+ if (i->def[k] && i->def[k]->livei)
+ insert_ordered_tail(&unhandled, i->def[k]);
+ else
+ if (0 && i->def[k])
+ debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n);
+ }
+ if (i->flags_def && i->flags_def->livei)
+ insert_ordered_tail(&unhandled, i->flags_def);
+ }
+
+ for (val = unhandled.next; val != unhandled.prev; val = val->next) {
+ assert(val->join == val);
+ assert(val->livei->bgn <= val->next->livei->bgn);
+ }
+
+ foreach_s(cur, tmp[0], &unhandled) {
+ remove_from_list(cur);
+
+ /* debug_printf("handling value %i\n", cur->n); */
+
+ foreach_s(val, tmp[1], &active) {
+ if (livei_end(val) <= cur->livei->bgn) {
+ reg_release(&free, val);
+ move_to_head(&handled, val);
+ } else
+ if (!livei_contains(val, cur->livei->bgn)) {
+ reg_release(&free, val);
+ move_to_head(&inactive, val);
+ }
+ }
+
+ foreach_s(val, tmp[1], &inactive) {
+ if (livei_end(val) <= cur->livei->bgn)
+ move_to_head(&handled, val);
+ else
+ if (livei_contains(val, cur->livei->bgn)) {
+ reg_occupy(&free, val);
+ move_to_head(&active, val);
+ }
+ }
+
+ f = free;
+
+ foreach(val, &inactive)
+ if (livei_have_overlap(val, cur))
+ reg_occupy(&f, val);
+
+ foreach(val, &unhandled)
+ if (val->reg.id >= 0 && livei_have_overlap(val, cur))
+ reg_occupy(&f, val);
+
+ if (cur->reg.id < 0) {
+ boolean mem = FALSE;
+
+ if (nv_is_vector_op(cur->insn->opcode))
+ mem = !reg_assign(&f, &cur->insn->def[0], 4);
+ else
+ if (iter)
+ mem = !reg_assign(&f, &cur, 1);
+
+ if (mem) {
+ NOUVEAU_ERR("out of registers\n");
+ abort();
+ }
+ }
+ insert_at_head(&active, cur);
+ reg_occupy(&free, cur);
+ }
+
+ return 0;
+}
+
+static int
+pass_eliminate_moves(struct nv_pc_pass *ctx)
+{
+ return 0;
+}
+
+int
+nv_pc_exec_pass1(struct nv_pc *pc)
+{
+ struct nv_pc_pass *ctx;
+ int i, ret;
+
+ debug_printf("REGISTER ALLOCATION - entering\n");
+
+ ctx = CALLOC_STRUCT(nv_pc_pass);
+ if (!ctx)
+ return -1;
+ ctx->pc = pc;
+
+ nv_print_program(ctx->pc->root);
+
+ ctx->insns = CALLOC(pc->num_instructions, sizeof(struct nv_instruction *));
+
+ pc->pass_seq++;
+ ret = pass_generate_phi_movs(ctx, pc->root);
+ assert(!ret);
+
+ nv_print_program(ctx->pc->root);
+
+ for (i = 0; i < pc->loop_nesting_bound; ++i) {
+ pc->pass_seq++;
+ ret = pass_build_live_sets(ctx, pc->root);
+ assert(!ret && "live sets");
+ if (ret) {
+ NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i);
+ goto out;
+ }
+ }
+
+ pc->pass_seq++;
+ ret = pass_order_instructions(ctx, pc->root);
+ assert(!ret && "order instructions");
+ if (ret)
+ goto out;
+
+ pc->pass_seq++;
+ ret = pass_build_intervals(ctx, pc->root);
+ assert(!ret && "build intervals");
+ if (ret) {
+ NOUVEAU_ERR("failed to build live intervals\n");
+ goto out;
+ }
+
+ for (i = 0; i < 2; ++i) {
+ ret = pass_join_values(ctx, i);
+ if (ret)
+ goto out;
+ ret = pass_linear_scan(ctx, i);
+ if (ret)
+ goto out;
+ }
+ assert(!ret && "joining");
+
+ ret = pass_eliminate_moves(ctx);
+
+ for (i = 0; i < pc->num_values; ++i)
+ livei_release(&pc->values[i]);
+
+ debug_printf("REGISTER ALLOCATION - leaving\n");
+ nv_print_program(ctx->pc->root);
+
+out:
+ FREE(ctx);
+ return ret;
+}