diff options
| -rw-r--r-- | src/gallium/drivers/nv50/nv50_pc.c | 39 | ||||
| -rw-r--r-- | src/gallium/drivers/nv50/nv50_pc.h | 3 | ||||
| -rw-r--r-- | src/gallium/drivers/nv50/nv50_pc_optimize.c | 4 | ||||
| -rw-r--r-- | src/gallium/drivers/nv50/nv50_pc_regalloc.c | 140 | ||||
| -rw-r--r-- | src/gallium/drivers/nv50/nv50_tgsi_to_nc.c | 123 | 
5 files changed, 164 insertions, 145 deletions
diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c index 614982db2d..e32d28a9ce 100644 --- a/src/gallium/drivers/nv50/nv50_pc.c +++ b/src/gallium/drivers/nv50/nv50_pc.c @@ -394,7 +394,7 @@ nv_nvi_delete(struct nv_instruction *nvi)     struct nv_basic_block *b = nvi->bb;     int j; -   debug_printf("REM: "); nv_print_instruction(nvi); +   /* debug_printf("REM: "); nv_print_instruction(nvi); */     for (j = 0; j < 5; ++j)        nv_reference(NULL, &nvi->src[j], NULL); @@ -477,5 +477,40 @@ nvbb_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d)     for (j = 0; j < b->num_in; ++j)        n += nvbb_dominated_by(b->in[j], d); -   return n && (n == b->num_in); +   return (n && (n == b->num_in)) ? 1 : 0; +} + +/* check if bf (future) can be reached from bp (past) */ +boolean +nvbb_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp, +                  struct nv_basic_block *bt) +{ +   if (bf == bp) +      return TRUE; +   if (bp == bt) +      return FALSE; + +   if (bp->out[0] && bp->out[0] != bp && +       nvbb_reachable_by(bf, bp->out[0], bt)) +      return TRUE; +   if (bp->out[1] && bp->out[1] != bp && +       nvbb_reachable_by(bf, bp->out[1], bt)) +      return TRUE; +   return FALSE; +} + +struct nv_basic_block * +nvbb_dom_frontier(struct nv_basic_block *b) +{ +   struct nv_basic_block *df = b->out[0]; + +   assert(df); +   while (nvbb_dominated_by(df, b) || +          (!nvbb_dominated_by(df->in[0], b) && +           (!df->in[1] || !nvbb_dominated_by(df->in[1], b)))) { +      df = df->out[0]; +      assert(df); +   } +   assert(df); +   return df;  } diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h index 4b191c508a..987043c7a0 100644 --- a/src/gallium/drivers/nv50/nv50_pc.h +++ b/src/gallium/drivers/nv50/nv50_pc.h @@ -426,6 +426,9 @@ void nv_nvi_delete(struct nv_instruction *);  void nv_nvi_permute(struct nv_instruction *, struct nv_instruction *);  void nvbb_attach_block(struct nv_basic_block *parent, struct nv_basic_block *);  int nvbb_dominated_by(struct nv_basic_block *, struct nv_basic_block *); +boolean nvbb_reachable_by(struct nv_basic_block *, struct nv_basic_block *, +                          struct nv_basic_block *); +struct nv_basic_block *nvbb_dom_frontier(struct nv_basic_block *);  int nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val,                         struct nv_value *new_val); diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c index 324f8bb2da..f2f8d0eaa3 100644 --- a/src/gallium/drivers/nv50/nv50_pc_optimize.c +++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c @@ -771,6 +771,10 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)              if (ik->src[4] || ir->src[4])                 continue; /* don't mess with address registers */ +            if (ik->flags_src || ir->flags_src || +                ik->flags_def || ir->flags_def) +               continue; /* and also not with flags, for now */ +              for (s = 0; s < 3; ++s) {                 struct nv_value *a, *b; diff --git a/src/gallium/drivers/nv50/nv50_pc_regalloc.c b/src/gallium/drivers/nv50/nv50_pc_regalloc.c index 941ec9f6f8..172e44f62b 100644 --- a/src/gallium/drivers/nv50/nv50_pc_regalloc.c +++ b/src/gallium/drivers/nv50/nv50_pc_regalloc.c @@ -43,25 +43,6 @@ struct nv_pc_pass {     uint pass_seq;  }; -/* check if bf (future) can be reached from bp (past) */ -static boolean -bb_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp, -                struct nv_basic_block *bt) -{ -   if (bf == bp) -      return TRUE; -   if (bp == bt) -      return FALSE; - -   if (bp->out[0] && bp->out[0] != bp && -       bb_reachable_by(bf, bp->out[0], bt)) -      return TRUE; -   if (bp->out[1] && bp->out[1] != bp && -       bb_reachable_by(bf, bp->out[1], bt)) -      return TRUE; -   return FALSE; -} -  static void  ranges_coalesce(struct nv_range *range)  { @@ -377,32 +358,13 @@ try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)     do_join_values(ctx, a, b);  } -/* For phi functions with sources from blocks that are not direct predecessors, - * if such a source is to be used in an earlier predecessor, we need to add an - * additional phi function. Used when inserting the MOVs below. - */ -static struct nv_value * -propagate_phi(struct nv_pc *pc, struct nv_instruction *phi, int s) -{ -   struct nv_basic_block *b = pc->current_block; -   struct nv_value *val = phi->src[s]->value; -   struct nv_instruction *nvi = new_instruction(pc, NV_OP_PHI); -   int i, k; - -   (nvi->def[0] = new_value(pc, val->reg.file, val->reg.type))->insn = nvi; - -   for (k = 0, i = 0; i < 4 && phi->src[i]; ++i) { -      if (bb_reachable_by(b, phi->src[i]->value->insn->bb, b)) -         nvi->src[k++] = new_ref(pc, phi->src[i]->value); -   } -   return nvi->def[0]; -} - -/* For IF blocks without ELSE blocks, insert an empty block for the MOVs. - * Insert additional PHIs for cases where a direct MOV wouldn't be valid. +/* For each operand of each PHI in b, generate a new value by inserting a MOV + * at the end of the block it is coming from and replace the operand with its + * result. This eliminates liveness conflicts and enables us to let values be + * copied to the right register if such a conflict exists nonetheless.   */  static int -pass_generate_phi_movs_1(struct nv_pc_pass *ctx, struct nv_basic_block *b) +pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)  {     struct nv_instruction *i, *ni;     struct nv_value *val; @@ -426,31 +388,36 @@ pass_generate_phi_movs_1(struct nv_pc_pass *ctx, struct nv_basic_block *b)           if (p->exit->target == b) /* target to new else-block */              p->exit->target = pn; -         for (j = 0; j < b->num_in; ++j) { -            if (b->in[j] == p) { -               b->in[j] = pn; -               break; -            } -         } +         b->in[n] = pn; +           pn->out[0] = b;           pn->in[0] = p;           pn->num_in = 1;        } -        ctx->pc->current_block = pn;        for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {           for (j = 0; j < 4 && i->src[j]; ++j) { -            if (bb_reachable_by(pn, i->src[j]->value->insn->bb, b)) +            if (nvbb_reachable_by(p, i->src[j]->value->insn->bb, b))                 break;           }           if (j >= 4 || !i->src[j])              continue;           val = i->src[j]->value; -         if (!nvbb_dominated_by(pn, val->insn->bb)) -            nv_reference(ctx->pc, &i->src[j], propagate_phi(ctx->pc, i, j)); +         ni = new_instruction(ctx->pc, NV_OP_MOV); + +         /* TODO: insert instruction at correct position in the first place */ +         if (ni->prev && ni->prev->target) +            nv_nvi_permute(ni->prev, ni); + +         ni->def[0] = new_value(ctx->pc, val->reg.file, val->reg.type); +         ni->def[0]->insn = ni; +         ni->src[0] = new_ref(ctx->pc, val); + +         nv_reference(ctx->pc, &i->src[j], ni->def[0]);        } +        if (pn != p && pn->exit) {           ctx->pc->current_block = b->in[n ? 0 : 1];           ni = new_instruction(ctx->pc, NV_OP_BRA); @@ -461,70 +428,11 @@ pass_generate_phi_movs_1(struct nv_pc_pass *ctx, struct nv_basic_block *b)     for (j = 0; j < 2; ++j)        if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) -         pass_generate_phi_movs_1(ctx, b->out[j]); +         pass_generate_phi_movs(ctx, b->out[j]);     return 0;  } -/* Now everything should be in order and we can insert the MOVs. */ -static int -pass_generate_phi_movs_2(struct nv_pc_pass *ctx, struct nv_basic_block *b) -{ -   struct nv_instruction *i, *mov; -   struct nv_value *val; -   struct nv_basic_block *p; -   int n, j; - -   b->pass_seq = ctx->pc->pass_seq; - -   for (n = 0; n < b->num_in; ++n) { -      ctx->pc->current_block = p = b->in[n]; - -      for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) { -         for (j = 0; j < 4 && i->src[j]; ++j) { -            if (bb_reachable_by(p, i->src[j]->value->insn->bb, b)) -               break; -         } -         if (j >= 4 || !i->src[j]) -            continue; -         val = i->src[j]->value; - -         mov = new_instruction(ctx->pc, NV_OP_MOV); - -         /* TODO: insert instruction at correct position in the first place */ -         if (mov->prev && mov->prev->target) -            nv_nvi_permute(mov->prev, mov); - -         mov->def[0] = new_value(ctx->pc, val->reg.file, val->reg.type); -         mov->def[0]->insn = mov; -         mov->src[0] = new_ref(ctx->pc, val); - -         nv_reference(ctx->pc, &i->src[j], mov->def[0]); -      } -   } - -   for (j = 1; j >= 0; --j) /* different order for the sake of diversity */ -      if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) -         pass_generate_phi_movs_2(ctx, b->out[j]); - -   return 0; -} - -/* For each operand of each PHI in b, generate a new value by inserting a MOV - * at the end of the block it is coming from and replace the operand with its - * result. This eliminates liveness conflicts and enables us to let values be - * copied to the right register if such a conflict exists nonetheless. - */ -static INLINE int -pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) -{ -   if (pass_generate_phi_movs_1(ctx, b)) -      return 1; - -   ++ctx->pc->pass_seq; -   return pass_generate_phi_movs_2(ctx, b); -} -  static int  pass_join_values(struct nv_pc_pass *ctx, int iter)  { @@ -688,7 +596,7 @@ pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b)                 break;              assert(i->src[j]->value->insn); -            if (bb_reachable_by(b, i->src[j]->value->insn->bb, b->out[n])) { +            if (nvbb_reachable_by(b, i->src[j]->value->insn->bb, b->out[n])) {                 live_set_add(b, i->src[j]->value);                 debug_printf("BB:%i liveset + %i\n", b->id, i->src[j]->value->n);              } else { @@ -774,7 +682,7 @@ pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b)              if (!i->src[s])                 break;              assert(i->src[s]->value->insn); -            if (bb_reachable_by(b, i->src[s]->value->insn->bb, b->out[j])) +            if (nvbb_reachable_by(b, i->src[s]->value->insn->bb, b->out[j]))                 live_set_add(b, i->src[s]->value);              else                 live_set_rem(b, i->src[s]->value); @@ -978,7 +886,7 @@ nv_pc_exec_pass1(struct nv_pc *pc)     nv_print_program(ctx->pc->root); -   ctx->insns = CALLOC(pc->num_instructions, sizeof(struct nv_instruction *)); +   ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *));     pc->pass_seq++;     ret = pass_generate_phi_movs(ctx, pc->root); diff --git a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c index 8846ef08b5..6a9259c898 100644 --- a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c +++ b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c @@ -51,16 +51,22 @@ struct bld_value_stack {  };  static INLINE void -bld_push_value(struct bld_value_stack *stk) +bld_vals_push_val(struct bld_value_stack *stk, struct nv_value *val)  { -   assert(!stk->size || (stk->body[stk->size - 1] != stk->top)); +   assert(!stk->size || (stk->body[stk->size - 1] != val));     if (!(stk->size % 8)) {        unsigned old_sz = (stk->size + 0) * sizeof(struct nv_value *);        unsigned new_sz = (stk->size + 8) * sizeof(struct nv_value *);        stk->body = (struct nv_value **)REALLOC(stk->body, old_sz, new_sz);     } -   stk->body[stk->size++] = stk->top; +   stk->body[stk->size++] = val; +} + +static INLINE void +bld_vals_push(struct bld_value_stack *stk) +{ +   bld_vals_push_val(stk, stk->top);     stk->top = NULL;  } @@ -72,7 +78,7 @@ bld_push_values(struct bld_value_stack *stacks, int n)     for (i = 0; i < n; ++i)        for (c = 0; c < 4; ++c)           if (stacks[i * 4 + c].top) -            bld_push_value(&stacks[i * 4 + c]); +            bld_vals_push(&stacks[i * 4 + c]);  }  #define FETCH_TEMP(i, c)    (bld->tvs[i][c].top) @@ -121,6 +127,17 @@ struct bld_context {     uint num_immds;  }; +static INLINE void +bld_warn_uninitialized(struct bld_context *bld, int kind, +                       struct bld_value_stack *stk, struct nv_basic_block *b) +{ +   long i = (stk - &bld->tvs[0][0]) / 4; +   long c = (stk - &bld->tvs[0][0]) & 3; + +   debug_printf("WARNING: TEMP[%li].%li %s used uninitialized in BB:%i\n", +                i, c, kind ? "may be" : "is", b->id); +} +  static INLINE struct nv_value *  bld_def(struct nv_instruction *i, int c, struct nv_value *value)  { @@ -168,42 +185,91 @@ fetch_by_bb(struct bld_value_stack *stack,        fetch_by_bb(stack, vals, n, b->in[i]);  } +static INLINE struct nv_value * +bld_load_imm_u32(struct bld_context *bld, uint32_t u); +  static struct nv_value * -bld_fetch_global(struct bld_context *bld, struct bld_value_stack *stack) +bld_phi(struct bld_context *bld, struct nv_basic_block *b, +        struct bld_value_stack *stack)  { -   struct nv_value *vals[16], *phi = NULL; -   int j, i = 0, n = 0; +   struct nv_basic_block *in; +   struct nv_value *vals[16], *val; +   struct nv_instruction *phi; +   int i, j, n; -   fetch_by_bb(stack, vals, &n, bld->pc->current_block); +   do { +      i = n = 0; +      fetch_by_bb(stack, vals, &n, b); -   if (n == 0) -      return NULL; -   if (n == 1) -      return vals[0]; +      if (!n) { +         bld_warn_uninitialized(bld, 0, stack, b); +         return NULL; +      } -   debug_printf("phi required: %i candidates\n", n); +      if (n == 1) { +         if (nvbb_dominated_by(b, vals[0]->insn->bb)) +            break; -   while (i < n) { -      struct nv_instruction *insn = new_instruction(bld->pc, NV_OP_PHI); +         bld_warn_uninitialized(bld, 1, stack, b); -      j = phi ? 1 : 0; -      if (phi) -         insn->src[0] = new_ref(bld->pc, phi); +         /* back-tracking to insert missing value of other path */ +         in = b; +         while (in->in[0]) { +            if (in->num_in == 1) { +               in = in->in[0]; +            } else { +               if (!nvbb_reachable_by(in->in[0], vals[0]->insn->bb, b)) { +                  in = in->in[0]; +                  break; +               } +               if (!nvbb_reachable_by(in->in[1], vals[0]->insn->bb, b)) { +                  in = in->in[1]; +                  break; +               } +               in = in->in[0]; +            } +         } +         bld->pc->current_block = in; -      phi = new_value(bld->pc, vals[0]->reg.file, vals[0]->reg.type); +         /* should make this a no-op */ +         bld_vals_push_val(stack, bld_load_imm_u32(bld, 0)); +         continue; +      } -      bld_def(insn, 0, phi); +      for (i = 0; i < n; ++i) { +         if (nvbb_dominated_by(b, vals[i]->insn->bb)) +            continue; -      for (; j < 4; ++j) { -         insn->src[j] = new_ref(bld->pc, vals[i++]); -         if (i == n) +         for (j = 0; j < b->num_in; ++j) +            if (nvbb_dominated_by(b->in[j], vals[i]->insn->bb)) +               break; +         if (j == b->num_in) { +            in = nvbb_dom_frontier(vals[i]->insn->bb); +            val = bld_phi(bld, in, stack); +            bld_vals_push_val(stack, val);              break; +         }        } -      debug_printf("new phi: %i, %i in\n", phi->n, j); -   } +   } while(i < n); + +   bld->pc->current_block = b; + +   if (n == 1) +      return vals[0]; -   /* insert_at_head(list, phi) is done at end of block */ -   return phi; +   phi = new_instruction(bld->pc, NV_OP_PHI); + +   bld_def(phi, 0, new_value(bld->pc, vals[0]->reg.file, vals[0]->reg.type)); +   for (i = 0; i < n; ++i) +      phi->src[i] = new_ref(bld->pc, vals[i]); + +   return phi->def[0]; +} + +static INLINE struct nv_value * +bld_fetch_global(struct bld_context *bld, struct bld_value_stack *stack) +{ +   return bld_phi(bld, bld->pc->current_block, stack);  }  static INLINE struct nv_value * @@ -640,6 +706,9 @@ bld_new_block(struct bld_context *bld, struct nv_basic_block *b)     for (i = 0; i < 4; ++i)        bld->saved_addr[i][0] = NULL; + +   for (i = 0; i < 128; ++i) +      bld->saved_inputs[i] = NULL;  }  static struct nv_value *  | 
