diff options
Diffstat (limited to 'src/gallium/drivers')
| -rw-r--r-- | src/gallium/drivers/nvc0/nvc0_pc.c | 13 | ||||
| -rw-r--r-- | src/gallium/drivers/nvc0/nvc0_pc.h | 12 | ||||
| -rw-r--r-- | src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 86 | ||||
| -rw-r--r-- | src/gallium/drivers/nvc0/nvc0_pc_print.c | 28 | ||||
| -rw-r--r-- | src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 30 | 
5 files changed, 123 insertions, 46 deletions
diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c index a200632102..e0cba05b97 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc.c @@ -55,15 +55,11 @@ nvc0_insn_can_load(struct nv_instruction *nvi, int s,  boolean  nvc0_insn_is_predicateable(struct nv_instruction *nvi)  { -   int s; - -   if (!nv_op_predicateable(nvi->opcode)) +   if (nvi->predicate >= 0) /* already predicated */        return FALSE; -   if (nvi->predicate >= 0) +   if (!nvc0_op_info_table[nvi->opcode].predicate && +       !nvc0_op_info_table[nvi->opcode].pseudo)        return FALSE; -   for (s = 0; s < 4 && nvi->src[s]; ++s) -      if (nvi->src[s]->value->reg.file == NV_FILE_IMM) -         return FALSE;     return TRUE;  } @@ -505,6 +501,9 @@ nvc0_insn_append(struct nv_basic_block *b, struct nv_instruction *i)     i->bb = b;     b->num_instructions++; + +   if (i->prev && i->prev->terminator) +      nvc0_insns_permute(i->prev, i);  }  void diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h index 40d728aefc..efa073a920 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.h +++ b/src/gallium/drivers/nvc0/nvc0_pc.h @@ -205,6 +205,10 @@  #define NV_CC_C  0x11  #define NV_CC_A  0x12  #define NV_CC_S  0x13 +#define NV_CC_INVERSE(cc) ((cc) ^ 0x7) +/* for 1 bit predicates: */ +#define NV_CC_P     0 +#define NV_CC_NOT_P 1  #define NV_PC_MAX_INSTRUCTIONS 2048  #define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4) @@ -260,12 +264,6 @@ nv_op_supported_src_mods(uint opcode)     return nvc0_op_info_table[opcode].mods;  } -static INLINE boolean -nv_op_predicateable(uint opcode) -{ -   return nvc0_op_info_table[opcode].predicate ? TRUE : FALSE; -} -  static INLINE uint  nv_type_order(ubyte type)  { @@ -488,7 +486,7 @@ nv_alloc_instruction(struct nv_pc *pc, uint opcode)     assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS);     insn->opcode = opcode; -   insn->cc = 0; +   insn->cc = NV_CC_P;     insn->indirect = -1;     insn->predicate = -1; diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index 2e554dbe4e..6cfa03d5b1 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -99,6 +99,7 @@ inst_removable(struct nv_instruction *nvi)               nvc0_insn_refcount(nvi)));  } +/* Check if we do not actually have to emit this instruction. */  static INLINE boolean  inst_is_noop(struct nv_instruction *nvi)  { @@ -1043,7 +1044,6 @@ nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)     return 0;  } -#if 0  /* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.   * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with   * BREAK and dummy ELSE block. @@ -1064,24 +1064,92 @@ bb_is_if_else_endif(struct nv_basic_block *bb)     }  } -/* predicate instructions and remove branch at the end */ +/* Predicate instructions and delete any branch at the end if it is + * not a break from a loop. + */  static void  predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b, -                       struct nv_value *p, ubyte cc) +                       struct nv_value *pred, uint8_t cc)  { +   struct nv_instruction *nvi, *prev; +   int s; +   if (!b->entry) +      return; +   for (nvi = b->entry; nvi; nvi = nvi->next) { +      prev = nvi; +      if (inst_is_noop(nvi)) +         continue; +      for (s = 0; nvi->src[s]; ++s); +      assert(s < 6); +      nvi->predicate = s; +      nvi->cc = cc; +      nv_reference(pc, nvi, nvi->predicate, pred); +   } +   if (prev->opcode == NV_OP_BRA && +       b->out_kind[0] != CFG_EDGE_LOOP_LEAVE && +       b->out_kind[1] != CFG_EDGE_LOOP_LEAVE) +      nvc0_insn_delete(prev);  } -#endif -/* NOTE: Run this after register allocation, we can just cut out the cflow - * instructions and hook the predicates to the conditional OPs if they are - * not using immediates; better than inserting SELECT to join definitions. - * - * NOTE: Should adapt prior optimization to make this possible more often. +static INLINE boolean +may_predicate_insn(struct nv_instruction *nvi, struct nv_value *pred) +{ +   if (nvi->def[0] && values_equal(nvi->def[0], pred)) +      return FALSE; +   return nvc0_insn_is_predicateable(nvi); +} + +/* Transform IF/ELSE/ENDIF constructs into predicated instructions + * where feasible.   */  static int  nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)  { +   struct nv_instruction *nvi; +   struct nv_value *pred; +   int k; +   int n0, n1; /* instruction counts of outgoing blocks */ + +   if (bb_is_if_else_endif(b)) { +      assert(b->exit && b->exit->opcode == NV_OP_BRA); + +      assert(b->exit->predicate >= 0); +      pred = b->exit->src[b->exit->predicate]->value; + +      n1 = n0 = 0; +      for (nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0) +         if (!may_predicate_insn(nvi, pred)) +            break; +      if (!nvi) { +         /* we're after register allocation, so there always is an ELSE block */ +         for (nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1) +            if (!may_predicate_insn(nvi, pred)) +               break; +      } + +      /* 12 is an arbitrary limit */ +      if (!nvi && n0 < 12 && n1 < 12) { +         predicate_instructions(ctx->pc, b->out[0], pred, !b->exit->cc); +         predicate_instructions(ctx->pc, b->out[1], pred, b->exit->cc); + +         nvc0_insn_delete(b->exit); /* delete the branch */ + +         /* and a potential joinat before it */ +         if (b->exit && b->exit->opcode == NV_OP_JOINAT) +            nvc0_insn_delete(b->exit); + +         /* remove join operations at the end of the conditional */ +         k = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0; +         if ((nvi = b->out[0]->out[k]->entry)) { +            nvi->join = 0; +            if (nvi->opcode == NV_OP_JOIN) +               nvc0_insn_delete(nvi); +         } +      } +   } +   DESCEND_ARBITRARY(k, nv_pass_flatten); +     return 0;  } diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c index 9e0bffacd6..7840078614 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_print.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c @@ -225,7 +225,7 @@ nvc0_print_instruction(struct nv_instruction *i)     PRINT("%s", gree);     if (NV_BASEOP(i->opcode) == NV_OP_SET) -      PRINT("set %s", nv_cond_name(i->set_cond)); +      PRINT("%s %s", nvc0_opcode_name(i->opcode), nv_cond_name(i->set_cond));     else     if (i->saturate)        PRINT("sat %s", nvc0_opcode_name(i->opcode)); @@ -278,7 +278,7 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] =     { NV_OP_MERGE,  "merge",  NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 },     { NV_OP_PHI,    "phi",    NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 },     { NV_OP_SELECT, "select", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, -   { NV_OP_NOP,    "nop",    NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 0, 0, 0 }, +   { NV_OP_NOP,    "nop",    NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 1, 0, 0, 0 },     { NV_OP_LD,  "ld",  NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },     { NV_OP_ST,  "st",  NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, @@ -343,18 +343,18 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] =     { NV_OP_MIN, "max", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 },     { NV_OP_MAX, "min", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 },     { NV_OP_MIN, "min", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 }, -   { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 }, -   { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, -   { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 }, +   { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 2, 2 }, +   { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 2, 2 },     { NV_OP_SHR, "sar", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 1, 0 }, -   { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, -   { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, -   { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, -   { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, -   { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, -   { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, -   { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, -   { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, +   { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, +   { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },     { NV_OP_SAT, "sat", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },     { NV_OP_SET_F32_AND, "and set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, @@ -369,7 +369,7 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] =     { NV_OP_ADD, "sub", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 1, 0 }, -   { NV_OP_SET, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 }, +   { NV_OP_SET, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 },     { NV_OP_TXG, "texgrad", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 43c27fd890..72bfcd0c95 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -659,7 +659,7 @@ bld_kil(struct bld_context *bld, struct nv_value *src)  static void  bld_flow(struct bld_context *bld, uint opcode, -         struct nv_value *src, struct nv_basic_block *target, +         struct nv_value *pred, uint8_t cc, struct nv_basic_block *target,           boolean reconverge)  {     struct nv_instruction *nvi; @@ -670,8 +670,10 @@ bld_flow(struct bld_context *bld, uint opcode,     nvi = new_instruction(bld->pc, opcode);     nvi->target = target;     nvi->terminator = 1; -   if (src) -      bld_src_predicate(bld, nvi, 0, src); +   if (pred) { +      nvi->cc = cc; +      bld_src_predicate(bld, nvi, 0, pred); +   }  }  static ubyte @@ -1584,6 +1586,7 @@ bld_instruction(struct bld_context *bld,     case TGSI_OPCODE_IF:     {        struct nv_basic_block *b = new_basic_block(bld->pc); +      struct nv_value *pred = emit_fetch(bld, insn, 0, 0);        assert(bld->cond_lvl < BLD_MAX_COND_NESTING); @@ -1592,10 +1595,19 @@ bld_instruction(struct bld_context *bld,        bld->join_bb[bld->cond_lvl] = bld->pc->current_block;        bld->cond_bb[bld->cond_lvl] = bld->pc->current_block; -      src1 = bld_setp(bld, NV_OP_SET_U32, NV_CC_EQ, -                      emit_fetch(bld, insn, 0, 0), bld->zero); +      if (pred->insn && NV_BASEOP(pred->insn->opcode) == NV_OP_SET) { +         pred = bld_clone(bld, pred->insn); +         pred->reg.size = 1; +         pred->reg.file = NV_FILE_PRED; +         if (pred->insn->opcode == NV_OP_FSET_F32) +            pred->insn->opcode = NV_OP_SET_F32; +      } else { +         pred = bld_setp(bld, NV_OP_SET_U32, NV_CC_NE | NV_CC_U, +                         pred, bld->zero); +      } +      assert(!mask); -      bld_flow(bld, NV_OP_BRA, src1, NULL, (bld->cond_lvl == 0)); +      bld_flow(bld, NV_OP_BRA, pred, NV_CC_NOT_P, NULL, (bld->cond_lvl == 0));        ++bld->cond_lvl;        bld_new_block(bld, b); @@ -1661,7 +1673,7 @@ bld_instruction(struct bld_context *bld,     {        struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1]; -      bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); +      bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE);        if (bld->out_kind == CFG_EDGE_FORWARD) /* else we already had BRK/CONT */           nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE); @@ -1673,7 +1685,7 @@ bld_instruction(struct bld_context *bld,     {        struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; -      bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); +      bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE);        nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK); @@ -1689,7 +1701,7 @@ bld_instruction(struct bld_context *bld,        struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];        if (bld->out_kind != CFG_EDGE_FAKE) { /* else we already had BRK/CONT */ -         bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); +         bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE);           nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK);        }  | 
