diff options
| author | Christoph Bumiller <e0425955@student.tuwien.ac.at> | 2010-09-01 17:54:56 +0200 | 
|---|---|---|
| committer | Christoph Bumiller <e0425955@student.tuwien.ac.at> | 2010-09-01 18:02:50 +0200 | 
| commit | 0a8292e096bc37eeb225bf7d3854b6b6edc4bceb (patch) | |
| tree | 80bbb0365721665a8ff932e7b6ccb7a7c44a3e5c /src | |
| parent | d90502b2b468732e2a42985580bbbe9d9fdfd14e (diff) | |
nv50: attempt at making more complicated loops work
Nested loops, and loops with multiple exits (BREAK, CONT).
Diffstat (limited to 'src')
| -rw-r--r-- | src/gallium/drivers/nv50/nv50_pc.c | 20 | ||||
| -rw-r--r-- | src/gallium/drivers/nv50/nv50_pc.h | 6 | ||||
| -rw-r--r-- | src/gallium/drivers/nv50/nv50_pc_optimize.c | 14 | ||||
| -rw-r--r-- | src/gallium/drivers/nv50/nv50_pc_regalloc.c | 85 | ||||
| -rw-r--r-- | src/gallium/drivers/nv50/nv50_tgsi_to_nc.c | 70 | 
5 files changed, 138 insertions, 57 deletions
| diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c index 1c12fe1b9e..b03f5b27f6 100644 --- a/src/gallium/drivers/nv50/nv50_pc.c +++ b/src/gallium/drivers/nv50/nv50_pc.c @@ -220,6 +220,7 @@ edge_name(ubyte type)     case CFG_EDGE_BACK: return "back";     case CFG_EDGE_LOOP_ENTER: return "loop";     case CFG_EDGE_LOOP_LEAVE: return "break"; +   case CFG_EDGE_FAKE: return "fake";     default:        return "?";     } @@ -247,6 +248,7 @@ nv_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, void *priv)           case CFG_EDGE_BACK:              continue;           case CFG_EDGE_FORWARD: +         case CFG_EDGE_FAKE:              if (++b->out[j]->priv == b->out[j]->num_in)                 bb[p++] = b->out[j];              break; @@ -264,9 +266,11 @@ nv_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, void *priv)        f(priv, b); -      if (!p) -         while (pp > 0) -            bb[p++] = bbb[--pp]; +      if (!p) { +         p = pp; +         for (; pp > 0; --pp) +            bb[pp - 1] = bbb[pp - 1]; +      }     }  } @@ -366,11 +370,17 @@ nv50_generate_code(struct nv50_translation_info *ti)     ret = nv_pc_exec_pass0(pc);     if (ret)        goto out; +#ifdef NV50PC_DEBUG +   nv_print_program(pc->root); +#endif     /* register allocation */     ret = nv_pc_exec_pass1(pc);     if (ret)        goto out; +#ifdef NV50PC_DEBUG +   nv_print_program(pc->root); +#endif     /* prepare for emission */     ret = nv_pc_exec_pass2(pc); @@ -580,10 +590,10 @@ nvbb_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,     if (bp == bt)        return FALSE; -   if (bp->out[0] && bp->out_kind[0] != CFG_EDGE_BACK && +   if (bp->out[0] && !IS_WALL_EDGE(bp->out_kind[0]) &&         nvbb_reachable_by(bf, bp->out[0], bt))        return TRUE; -   if (bp->out[1] && bp->out_kind[1] != CFG_EDGE_BACK && +   if (bp->out[1] && !IS_WALL_EDGE(bp->out_kind[1]) &&         nvbb_reachable_by(bf, bp->out[1], bt))        return TRUE;     return FALSE; diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h index 48918f46d5..2bb3ea4374 100644 --- a/src/gallium/drivers/nv50/nv50_pc.h +++ b/src/gallium/drivers/nv50/nv50_pc.h @@ -257,6 +257,12 @@ struct nv_instruction {  #define CFG_EDGE_BACK        1  #define CFG_EDGE_LOOP_ENTER  2  #define CFG_EDGE_LOOP_LEAVE  4 +#define CFG_EDGE_FAKE        8 + +/* 'WALL' edge means where reachability check doesn't follow */ +/* 'LOOP' edge means just having to do with loops */ +#define IS_LOOP_EDGE(k) ((k) & 7) +#define IS_WALL_EDGE(k) ((k) & 9)  struct nv_basic_block {     struct nv_instruction *entry; /* first non-phi instruction */ diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c index 4b1cd56fc1..1d2710a8ac 100644 --- a/src/gallium/drivers/nv50/nv50_pc_optimize.c +++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c @@ -362,6 +362,9 @@ nv_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)           nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);           if (ld->src[4])              nv_reference(ctx->pc, &nvi->src[4], ld->src[4]->value); + +         if (!nv_nvi_refcount(ld)) +            nv_nvi_delete(ld);        }     }     DESCEND_ARBITRARY(j, nv_pass_fold_loads); @@ -504,7 +507,7 @@ constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,     u1.u32 = src1->reg.imm.u32;     modifiers_apply(&u0.u32, type, nvi->src[0]->mod); -   modifiers_apply(&u0.u32, type, nvi->src[1]->mod); +   modifiers_apply(&u1.u32, type, nvi->src[1]->mod);     switch (nvi->opcode) {     case NV_OP_MAD: @@ -951,7 +954,9 @@ nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)           if (b->exit && b->exit->opcode == NV_OP_JOINAT)              nv_nvi_delete(b->exit); -         if ((nvi = b->out[0]->out[0]->entry)) { +         i = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0; + +         if ((nvi = b->out[0]->out[i]->entry)) {              nvi->is_join = 0;              if (nvi->opcode == NV_OP_JOIN)                 nv_nvi_delete(nvi); @@ -980,7 +985,8 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)              if (ir->opcode != ik->opcode)                 continue; -            if (ik->opcode == NV_OP_LDA || +            if (!ir->def[0] || !ik->def[0] || +                ik->opcode == NV_OP_LDA ||                  ik->opcode == NV_OP_STA ||                  ik->opcode == NV_OP_MOV ||                  nv_is_vector_op(ik->opcode)) @@ -993,8 +999,6 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)                  ik->flags_def || ir->flags_def)                 continue; /* and also not with flags, for now */ -            assert(ik->def[0] && ir->def[0]); -              if (ik->def[0]->reg.file == NV_FILE_OUT ||                  ir->def[0]->reg.file == NV_FILE_OUT ||                  !values_equal(ik->def[0], ir->def[0])) diff --git a/src/gallium/drivers/nv50/nv50_pc_regalloc.c b/src/gallium/drivers/nv50/nv50_pc_regalloc.c index 59462cc11e..81decf8d4a 100644 --- a/src/gallium/drivers/nv50/nv50_pc_regalloc.c +++ b/src/gallium/drivers/nv50/nv50_pc_regalloc.c @@ -22,6 +22,10 @@  /* #define NV50PC_DEBUG */ +/* #define NV50_RA_DEBUG_LIVEI */ +/* #define NV50_RA_DEBUG_LIVE_SETS */ +/* #define NV50_RA_DEBUG_JOIN */ +  #include "nv50_context.h"  #include "nv50_pc.h" @@ -119,7 +123,7 @@ add_range(struct nv_value *val, struct nv_basic_block *b, int end)     add_range_ex(val, bgn, end, NULL);  } -#ifdef NV50_RA_DEBUG_JOIN +#if defined(NV50_RA_DEBUG_JOIN) || defined(NV50_RA_DEBUG_LIVEI)  static void  livei_print(struct nv_value *a)  { @@ -359,16 +363,37 @@ need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p)     int i = 0, n = 0;     for (; i < 2; ++i) -      if (p->out[i] && p->out_kind[i] != CFG_EDGE_LOOP_LEAVE) +      if (p->out[i] && !IS_LOOP_EDGE(p->out_kind[i]))           ++n;     return (b->num_in > 1) && (n == 2);  } +static int +phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b, +                struct nv_basic_block *tb) +{ +   int i, j; + +   for (j = -1, i = 0; i < 4 && phi->src[i]; ++i) { +      if (!nvbb_reachable_by(b, phi->src[i]->value->insn->bb, tb)) +         continue; +      /* NOTE: back-edges are ignored by the reachable-by check */ +      if (j < 0 || !nvbb_reachable_by(phi->src[j]->value->insn->bb, +                                      phi->src[i]->value->insn->bb, tb)) +         j = i; +   } +   return j; +} +  /* For each operand of each PHI in b, generate a new value by inserting a MOV   * at the end of the block it is coming from and replace the operand with its   * result. This eliminates liveness conflicts and enables us to let values be   * copied to the right register if such a conflict exists nonetheless. + * + * These MOVs are also crucial in making sure the live intervals of phi srces + * are extended until the end of the loop, since they are not included in the + * live-in sets.   */  static int  pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) @@ -404,14 +429,17 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)        ctx->pc->current_block = pn;        for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) { -         for (j = 0; j < 4 && i->src[j]; ++j) { -            if (nvbb_reachable_by(p, i->src[j]->value->insn->bb, b)) -               break; -         } -         if (j >= 4 || !i->src[j]) +         if ((j = phi_opnd_for_bb(i, p, b)) < 0)              continue;           val = i->src[j]->value; +         if (i->src[j]->flags) { +            val = val->insn->src[0]->value; +            while (j < 4 && i->src[j]) +               ++j; +            assert(j < 4); +         } +           ni = new_instruction(ctx->pc, NV_OP_MOV);           /* TODO: insert instruction at correct position in the first place */ @@ -423,6 +451,8 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)           ni->src[0] = new_ref(ctx->pc, val);           nv_reference(ctx->pc, &i->src[j], ni->def[0]); + +         i->src[j]->flags = 1;        }        if (pn != p && pn->exit) { @@ -452,8 +482,8 @@ pass_join_values(struct nv_pc_pass *ctx, int iter)        case NV_OP_PHI:           if (!iter)              continue; -         try_join_values(ctx, i->src[0]->value, i->src[1]->value); -         try_join_values(ctx, i->def[0], i->src[0]->value); +         for (c = 0; c < 4 && i->src[c]; ++c) +            try_join_values(ctx, i->def[0], i->src[c]->value);           break;        case NV_OP_MOV:           if (iter && i->src[0]->value->insn && @@ -576,22 +606,6 @@ pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b)           for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)              b->live_set[j] |= b->out[n]->live_set[j];        } - -      /* Kick values out of our live set that are created in incoming -       * blocks of our successors that are not us. -       */ -      for (i = b->out[n]->phi; i && i->opcode == NV_OP_PHI; i = i->next) { -         for (j = 0; j < 4; ++j) { -            if (!i->src[j]) -               break; -            assert(i->src[j]->value->insn); - -            if (nvbb_reachable_by(b, i->src[j]->value->insn->bb, b->out[n])) -               live_set_add(b, i->src[j]->value); -            else -               live_set_rem(b, i->src[j]->value); -         } -      }     }     if (!b->entry) @@ -599,7 +613,7 @@ pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b)     bb_live_set_print(ctx->pc, b); -   for (i = b->exit; i; i = i->prev) { +   for (i = b->exit; i != b->entry->prev; i = i->prev) {        for (j = 0; j < 4; j++) {           if (!i->def[j])              break; @@ -617,6 +631,9 @@ pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b)        if (i->flags_src)           live_set_add(b, i->flags_src->value);     } +   for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) +      live_set_rem(b, i->def[0]); +     bb_live_set_print(ctx->pc, b);     return 0; @@ -680,10 +697,12 @@ pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b)        for (j = 0; j < ctx->pc->num_values; ++j) {           if (!(b->live_set[j / 32] & (1 << (j % 32))))              continue; +         add_range(&ctx->pc->values[j], b, b->exit->serial + 1);  #ifdef NV50_RA_DEBUG_LIVEI -         debug_printf("adding range for live value %i\n", j); +         debug_printf("adding range for live value %i: ", j); +         livei_print(&ctx->pc->values[j]);  #endif -         add_range(&ctx->pc->values[j], b, b->exit->serial + 1); +        }     } @@ -702,20 +721,22 @@ pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b)        for (j = 0; j < 5; ++j) {           if (i->src[j] && !live_set_test(b, i->src[j])) {              live_set_add(b, i->src[j]->value); +            add_range(i->src[j]->value, b, i->serial);  #ifdef NV50_RA_DEBUG_LIVEI -            debug_printf("adding range for source that ends living: %i\n", +            debug_printf("adding range for source %i (ends living): ",                           i->src[j]->value->n); +            livei_print(i->src[j]->value);  #endif -            add_range(i->src[j]->value, b, i->serial);           }        }        if (i->flags_src && !live_set_test(b, i->flags_src)) {           live_set_add(b, i->flags_src->value); +         add_range(i->flags_src->value, b, i->serial);  #ifdef NV50_RA_DEBUG_LIVEI -         debug_printf("adding range for source that ends living: %i\n", +         debug_printf("adding range for source %i (ends living): ",                        i->flags_src->value->n); +         livei_print(i->flags_src->value);  #endif -         add_range(i->flags_src->value, b, i->serial);        }     } diff --git a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c index 115b5df939..8b18a9c025 100644 --- a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c +++ b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c @@ -120,6 +120,8 @@ struct bld_context {     struct nv_basic_block *brkt_bb[BLD_MAX_LOOP_NESTING];     int loop_lvl; +   ubyte out_kind; /* CFG_EDGE_FORWARD, or FAKE in case of BREAK/CONT */ +     struct bld_value_stack tvs[BLD_MAX_TEMPS][4]; /* TGSI_FILE_TEMPORARY */     struct bld_value_stack avs[BLD_MAX_ADDRS][4]; /* TGSI_FILE_ADDRESS */     struct bld_value_stack pvs[BLD_MAX_PREDS][4]; /* TGSI_FILE_PREDICATE */ @@ -268,7 +270,7 @@ fetch_by_bb(struct bld_value_stack *stack,        return;     }     for (i = 0; i < b->num_in; ++i) -      if (b->in_kind[i] != CFG_EDGE_BACK) +      if (!IS_WALL_EDGE(b->in_kind[i]))           fetch_by_bb(stack, vals, n, b->in[i]);  } @@ -362,18 +364,31 @@ bld_phi(struct bld_context *bld, struct nv_basic_block *b,     return phi->def[0];  } +/* Insert a phi function in the loop header. + * For nested loops, we need to insert phi functions in all the outer + * loop headers if they don't have one yet. + * + * @def: redefinition from inside loop, or NULL if to be replaced later + */  static struct nv_value *  bld_loop_phi(struct bld_context *bld, struct bld_value_stack *stack,               struct nv_value *def)  { -   struct nv_basic_block *bb = bld->pc->current_block;     struct nv_instruction *phi; -   struct nv_value *val; +   struct nv_basic_block *bb = bld->pc->current_block; +   struct nv_value *val = NULL; -   val = bld_phi(bld, bld->pc->current_block, stack); +   if (bld->loop_lvl > 1) { +      --bld->loop_lvl; +      if (!((stack->loop_def | stack->loop_use) & (1 << bld->loop_lvl))) +         val = bld_loop_phi(bld, stack, NULL); +      ++bld->loop_lvl; +   } + +   if (!val) +      val = bld_phi(bld, bld->pc->current_block, stack); /* old definition */     if (!val) {        bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1]->in[0]; -        val = bld_undef(bld, bld_stack_file(bld, stack));     } @@ -449,10 +464,11 @@ bld_replace_value(struct nv_pc *, struct nv_basic_block *, struct nv_value *,  static void  bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb)  { +   struct nv_basic_block *save = bld->pc->current_block;     struct nv_instruction *phi, *next;     struct nv_value *val;     struct bld_value_stack *stk; -   int s; +   int i, s, n;     for (phi = bb->phi; phi && phi->opcode == NV_OP_PHI; phi = next) {        next = phi->next; @@ -460,19 +476,33 @@ bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb)        stk = (struct bld_value_stack *)phi->target;        phi->target = NULL; -      val = bld_fetch_global(bld, stk); +      for (s = 1, n = 0; n < bb->num_in; ++n) { +         if (bb->in_kind[n] != CFG_EDGE_BACK) +            continue; -      nv_reference(bld->pc, &phi->src[1], val); +         assert(s < 4); +         bld->pc->current_block = bb->in[n]; +         val = bld_fetch_global(bld, stk); + +         for (i = 0; i < 4; ++i) +            if (phi->src[i] && phi->src[i]->value == val) +               break; +         if (i == 4) +            nv_reference(bld->pc, &phi->src[s++], val); +      } +      bld->pc->current_block = save; -      s = -1;        if (phi->src[0]->value == phi->def[0] ||            phi->src[0]->value == phi->src[1]->value)           s = 1;        else        if (phi->src[1]->value == phi->def[0])           s = 0; +      else +         continue;        if (s >= 0) { +         /* eliminate the phi */           bld_vals_del_val(stk, phi->def[0]);           ++bld->pc->pass_seq; @@ -915,6 +945,8 @@ bld_new_block(struct bld_context *bld, struct nv_basic_block *b)     for (i = 0; i < 128; ++i)        bld->saved_inputs[i] = NULL; + +   bld->out_kind = CFG_EDGE_FORWARD;  }  static struct nv_value * @@ -1366,7 +1398,7 @@ bld_instruction(struct bld_context *bld,        struct nv_basic_block *b = new_basic_block(bld->pc);        --bld->cond_lvl; -      nvbb_attach_block(bld->pc->current_block, b, CFG_EDGE_FORWARD); +      nvbb_attach_block(bld->pc->current_block, b, bld->out_kind);        nvbb_attach_block(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);        bld->cond_bb[bld->cond_lvl]->exit->target = b; @@ -1407,8 +1439,10 @@ bld_instruction(struct bld_context *bld,        bld_flow(bld, NV_OP_BREAK, NV_CC_TR, NULL, bb, FALSE); -      /* XXX: don't do this for redundant BRKs */ -      nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE); +      if (bld->out_kind == CFG_EDGE_FORWARD) /* else we already had BRK/CONT */ +         nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE); + +      bld->out_kind = CFG_EDGE_FAKE;     }        break;     case TGSI_OPCODE_CONT: @@ -1418,11 +1452,17 @@ bld_instruction(struct bld_context *bld,        bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, bb, FALSE);        nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_BACK); + +      if ((bb = bld->join_bb[bld->cond_lvl - 1])) { +         bld->join_bb[bld->cond_lvl - 1] = NULL; +         nv_nvi_delete(bb->exit->prev); +      } +      bld->out_kind = CFG_EDGE_FAKE;     }        break;     case TGSI_OPCODE_ENDLOOP:     { -      struct nv_basic_block *bb = bld->loop_bb[--bld->loop_lvl]; +      struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];        bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, bb, FALSE); @@ -1430,7 +1470,7 @@ bld_instruction(struct bld_context *bld,        bld_loop_end(bld, bb); /* replace loop-side operand of the phis */ -      bld_new_block(bld, bld->brkt_bb[bld->loop_lvl]); +      bld_new_block(bld, bld->brkt_bb[--bld->loop_lvl]);     }        break;     case TGSI_OPCODE_ABS: @@ -1651,7 +1691,7 @@ bld_replace_value(struct nv_pc *pc, struct nv_basic_block *b,  {     struct nv_instruction *nvi; -   for (nvi = b->entry; nvi; nvi = nvi->next) { +   for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = nvi->next) {        int s;        for (s = 0; s < 5; ++s) {           if (!nvi->src[s]) | 
