diff options
| author | Christoph Bumiller <e0425955@student.tuwien.ac.at> | 2010-07-31 17:52:54 +0200 | 
|---|---|---|
| committer | Christoph Bumiller <e0425955@student.tuwien.ac.at> | 2010-07-31 18:32:35 +0200 | 
| commit | fa67cabe7a9f1343e96c7c8a105e82dc05e3de44 (patch) | |
| tree | 303a429b3a0cf3e65852e4689a337ec28334637f | |
| parent | 5705b45b6a050f908120779e6049853931a8025a (diff) | |
nv50: fixes for nested IFs
| -rw-r--r-- | src/gallium/drivers/nv50/nv50_pc.c | 15 | ||||
| -rw-r--r-- | src/gallium/drivers/nv50/nv50_pc.h | 1 | ||||
| -rw-r--r-- | src/gallium/drivers/nv50/nv50_pc_optimize.c | 1 | ||||
| -rw-r--r-- | src/gallium/drivers/nv50/nv50_pc_regalloc.c | 175 | ||||
| -rw-r--r-- | src/gallium/drivers/nv50/nv50_tgsi_to_nc.c | 4 | 
5 files changed, 127 insertions, 69 deletions
diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c index 0e8aadf5a9..614982db2d 100644 --- a/src/gallium/drivers/nv50/nv50_pc.c +++ b/src/gallium/drivers/nv50/nv50_pc.c @@ -464,3 +464,18 @@ void nvbb_attach_block(struct nv_basic_block *parent, struct nv_basic_block *b)     b->in[b->num_in++] = parent;  } + +int +nvbb_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d) +{ +   int j, n; + +   if (b == d) +      return 1; + +   n = 0; +   for (j = 0; j < b->num_in; ++j) +      n += nvbb_dominated_by(b->in[j], d); + +   return n && (n == b->num_in); +} diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h index da3f984783..4b191c508a 100644 --- a/src/gallium/drivers/nv50/nv50_pc.h +++ b/src/gallium/drivers/nv50/nv50_pc.h @@ -425,6 +425,7 @@ int nv_nvi_refcount(struct nv_instruction *);  void nv_nvi_delete(struct nv_instruction *);  void nv_nvi_permute(struct nv_instruction *, struct nv_instruction *);  void nvbb_attach_block(struct nv_basic_block *parent, struct nv_basic_block *); +int nvbb_dominated_by(struct nv_basic_block *, struct nv_basic_block *);  int nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val,                         struct nv_value *new_val); diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c index 42f3a8634e..1f2f1630f4 100644 --- a/src/gallium/drivers/nv50/nv50_pc_optimize.c +++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c @@ -815,6 +815,7 @@ nv_pc_exec_pass0(struct nv_pc *pc)     struct nv_pass_dce dce;     int ret; +   pass.n = 0;     pass.pc = pc;     pc->pass_seq++; diff --git a/src/gallium/drivers/nv50/nv50_pc_regalloc.c b/src/gallium/drivers/nv50/nv50_pc_regalloc.c index 568384fd82..941ec9f6f8 100644 --- a/src/gallium/drivers/nv50/nv50_pc_regalloc.c +++ b/src/gallium/drivers/nv50/nv50_pc_regalloc.c @@ -20,19 +20,6 @@   * SOFTWARE.   */ -/* - * XXX: phi function live intervals start at first ordinary instruction, - *      add_range should be taking care of that already ... - * - * XXX: TEX must choose TEX's def as representative - * - * XXX: Aieee! Must materialize MOVs if source is in other basic block! - *       -- absolutely, or we cannot execute the MOV conditionally at all - * XXX: Aieee! Must include PHIs in LVA so we pull through liveness if - *      PHI source is e.g. in dominator block. - *       -- seems we lose liveness somehow, track that - */ -  #include "nv50_context.h"  #include "nv50_pc.h" @@ -143,7 +130,6 @@ add_range(struct nv_value *val, struct nv_basic_block *b, int end)     bgn = val->insn->serial;     if (bgn < b->entry->serial || bgn > b->exit->serial)        bgn = b->entry->serial; -   // debug_printf("add_range(value %i): [%i, %i)\n", val->n, bgn, end);     if (bgn > end) {        debug_printf("Aieee! BLOCK [%i, %i], RANGE [%i, %i)\n", @@ -391,25 +377,45 @@ try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)     do_join_values(ctx, a, b);  } -/* For each operand of each phi in b, generate a new value by inserting a MOV - * at the end of the block it is coming from and replace the operand with it. - * This eliminates liveness conflicts. +/* For phi functions with sources from blocks that are not direct predecessors, + * if such a source is to be used in an earlier predecessor, we need to add an + * additional phi function. Used when inserting the MOVs below. + */ +static struct nv_value * +propagate_phi(struct nv_pc *pc, struct nv_instruction *phi, int s) +{ +   struct nv_basic_block *b = pc->current_block; +   struct nv_value *val = phi->src[s]->value; +   struct nv_instruction *nvi = new_instruction(pc, NV_OP_PHI); +   int i, k; + +   (nvi->def[0] = new_value(pc, val->reg.file, val->reg.type))->insn = nvi; + +   for (k = 0, i = 0; i < 4 && phi->src[i]; ++i) { +      if (bb_reachable_by(b, phi->src[i]->value->insn->bb, b)) +         nvi->src[k++] = new_ref(pc, phi->src[i]->value); +   } +   return nvi->def[0]; +} + +/* For IF blocks without ELSE blocks, insert an empty block for the MOVs. + * Insert additional PHIs for cases where a direct MOV wouldn't be valid.   */  static int -pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) +pass_generate_phi_movs_1(struct nv_pc_pass *ctx, struct nv_basic_block *b)  { -   struct nv_instruction *i, *i2; -   struct nv_basic_block *p, *pn; +   struct nv_instruction *i, *ni;     struct nv_value *val; +   struct nv_basic_block *p, *pn;     int n, j;     b->pass_seq = ctx->pc->pass_seq;     for (n = 0; n < b->num_in; ++n) { -      p = b->in[n]; +      p = pn = b->in[n];        assert(p); -      if (b->num_in > 1 && p->out[0] && p->out[1]) { /* if without else */ +      if (b->num_in > 1 && p->out[0] && p->out[1]) {           pn = new_basic_block(ctx->pc);           if (p->out[0] == b) @@ -426,58 +432,99 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)                 break;              }           } -           pn->out[0] = b;           pn->in[0] = p;           pn->num_in = 1; -      } else -         pn = p; +      }        ctx->pc->current_block = pn; -      /* every block with PHIs will also have other operations */        for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) { -         for (j = 0; j < 4; ++j) { -            if (!i->src[j]) -               j = 3; -            else +         for (j = 0; j < 4 && i->src[j]; ++j) {              if (bb_reachable_by(pn, i->src[j]->value->insn->bb, b))                 break;           } -         if (j >= 4) +         if (j >= 4 || !i->src[j])              continue; -         assert(i->src[j]);           val = i->src[j]->value; -         /* XXX: should probably not insert this after terminator */ -         i2 = new_instruction(ctx->pc, NV_OP_MOV); - -         i2->def[0] = new_value(ctx->pc, val->reg.file, val->reg.type); -         i2->src[0] = new_ref  (ctx->pc, val); -         i2->def[0]->insn = i2; - -         nv_reference(ctx->pc, &i->src[j], i2->def[0]); +         if (!nvbb_dominated_by(pn, val->insn->bb)) +            nv_reference(ctx->pc, &i->src[j], propagate_phi(ctx->pc, i, j));        }        if (pn != p && pn->exit) { -         /* XXX: this branch should probably be eliminated */           ctx->pc->current_block = b->in[n ? 0 : 1]; -         i2 = new_instruction(ctx->pc, NV_OP_BRA); -         i2->target = b; -         i2->is_terminator = 1; +         ni = new_instruction(ctx->pc, NV_OP_BRA); +         ni->target = b; +         ni->is_terminator = 1;        }     } -   if (b->out[0] && b->out[0]->pass_seq < ctx->pc->pass_seq) { -      pass_generate_phi_movs(ctx, b->out[0]); -   } +   for (j = 0; j < 2; ++j) +      if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) +         pass_generate_phi_movs_1(ctx, b->out[j]); + +   return 0; +} + +/* Now everything should be in order and we can insert the MOVs. */ +static int +pass_generate_phi_movs_2(struct nv_pc_pass *ctx, struct nv_basic_block *b) +{ +   struct nv_instruction *i, *mov; +   struct nv_value *val; +   struct nv_basic_block *p; +   int n, j; + +   b->pass_seq = ctx->pc->pass_seq; + +   for (n = 0; n < b->num_in; ++n) { +      ctx->pc->current_block = p = b->in[n]; + +      for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) { +         for (j = 0; j < 4 && i->src[j]; ++j) { +            if (bb_reachable_by(p, i->src[j]->value->insn->bb, b)) +               break; +         } +         if (j >= 4 || !i->src[j]) +            continue; +         val = i->src[j]->value; + +         mov = new_instruction(ctx->pc, NV_OP_MOV); + +         /* TODO: insert instruction at correct position in the first place */ +         if (mov->prev && mov->prev->target) +            nv_nvi_permute(mov->prev, mov); + +         mov->def[0] = new_value(ctx->pc, val->reg.file, val->reg.type); +         mov->def[0]->insn = mov; +         mov->src[0] = new_ref(ctx->pc, val); -   if (b->out[1] && b->out[1]->pass_seq < ctx->pc->pass_seq) { -      pass_generate_phi_movs(ctx, b->out[1]); +         nv_reference(ctx->pc, &i->src[j], mov->def[0]); +      }     } +   for (j = 1; j >= 0; --j) /* different order for the sake of diversity */ +      if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) +         pass_generate_phi_movs_2(ctx, b->out[j]); +     return 0;  } +/* For each operand of each PHI in b, generate a new value by inserting a MOV + * at the end of the block it is coming from and replace the operand with its + * result. This eliminates liveness conflicts and enables us to let values be + * copied to the right register if such a conflict exists nonetheless. + */ +static INLINE int +pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) +{ +   if (pass_generate_phi_movs_1(ctx, b)) +      return 1; + +   ++ctx->pc->pass_seq; +   return pass_generate_phi_movs_2(ctx, b); +} +  static int  pass_join_values(struct nv_pc_pass *ctx, int iter)  { @@ -525,6 +572,7 @@ pass_join_values(struct nv_pc_pass *ctx, int iter)     return 0;  } +/* Order the instructions so that live intervals can be expressed in numbers. */  static int  pass_order_instructions(struct nv_pc_pass *ctx, struct nv_basic_block *b)  { @@ -560,7 +608,7 @@ bb_live_set_print(struct nv_pc *pc, struct nv_basic_block *b)     int j;     struct nv_value *val; -   debug_printf("live_set of %p: ", b); +   debug_printf("LIVE-INs of BB:%i: ", b->id);     for (j = 0; j < pc->num_values; ++j) {        if (!(b->live_set[j / 32] & (1 << (j % 32)))) @@ -579,16 +627,12 @@ live_set_add(struct nv_basic_block *b, struct nv_value *val)  {     if (!val->insn) /* don't add non-def values */        return; -   /* debug_printf("live[%p] <- %i\n", b, val->n); */ -     b->live_set[val->n / 32] |= 1 << (val->n % 32);  }  static INLINE void  live_set_rem(struct nv_basic_block *b, struct nv_value *val)  { -   /* if (val->insn) -      debug_printf("live[%p] -> %i\n", b, val->n); */     b->live_set[val->n / 32] &= ~(1 << (val->n % 32));  } @@ -600,7 +644,7 @@ live_set_test(struct nv_basic_block *b, struct nv_ref *ref)  }  /* The live set of a block contains those values that are live immediately - * before the beginning of the block. + * before the beginning of the block, so do a backwards scan.   */  static int  pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b) @@ -608,6 +652,14 @@ pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b)     struct nv_instruction *i;     int j, n, ret = 0; +   debug_printf("pass_build_live_sets BB:%i\n", b->id); + +   if (b->pass_seq >= ctx->pc->pass_seq) { +      debug_printf("already visited\n"); +      return 0; +   } +   b->pass_seq = ctx->pc->pass_seq; +     /* slight hack for undecidedness: set phi = entry if it's undefined */     if (!b->phi)        b->phi = b->entry; @@ -638,23 +690,18 @@ pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b)              if (bb_reachable_by(b, i->src[j]->value->insn->bb, b->out[n])) {                 live_set_add(b, i->src[j]->value); -               debug_printf("%p: live set + %i\n", b, i->src[j]->value->n); +               debug_printf("BB:%i liveset + %i\n", b->id, i->src[j]->value->n);              } else {                 live_set_rem(b, i->src[j]->value); -               debug_printf("%p: live set - %i\n", b, i->src[j]->value->n); +               debug_printf("BB:%i liveset - %i\n", b->id, i->src[j]->value->n);              }           }        }     } -   if (b->pass_seq >= ctx->pc->pass_seq) -      return 0; -   b->pass_seq = ctx->pc->pass_seq; - -   debug_printf("%s: visiting block %p\n", __FUNCTION__, b); -     if (!b->entry)        return 0; +     bb_live_set_print(ctx->pc, b);     for (i = b->exit; i; i = i->prev) { @@ -786,8 +833,6 @@ pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b)     if (b->out[1] && b->out[1]->pass_seq < ctx->pc->pass_seq)        pass_build_intervals(ctx, b->out[1]); -   debug_printf("built intervals for block %p\n", b); -     return 0;  } diff --git a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c index aafb5e8295..8846ef08b5 100644 --- a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c +++ b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c @@ -212,15 +212,11 @@ bld_imm_u32(struct bld_context *bld, uint32_t u)     int i;     unsigned n = bld->num_immds; -   debug_printf("bld_imm_u32: 0x%08x\n", u); -     for (i = 0; i < n; ++i)        if (bld->saved_immd[i]->reg.imm.u32 == u)           return bld->saved_immd[i];     assert(n < BLD_MAX_IMMDS); -   debug_printf("need new one\n"); -     bld->num_immds++;     bld->saved_immd[n] = new_value(bld->pc, NV_FILE_IMM, NV_TYPE_U32);  | 
