diff options
| author | Christoph Bumiller <e0425955@student.tuwien.ac.at> | 2010-08-15 21:37:50 +0200 | 
|---|---|---|
| committer | Christoph Bumiller <e0425955@student.tuwien.ac.at> | 2010-08-15 21:40:00 +0200 | 
| commit | 4de293bb9acd1ecda683f735af32f7485a0f213e (patch) | |
| tree | efa03988c937976f5c712c6688b86ac89eeadbd0 /src | |
| parent | 34e0db4c509fd669a7713c63848a98d89463ce1a (diff) | |
nv50: loops part 2
At least the mesa demo glsl/mandelbrot should work now.
Diffstat (limited to 'src')
| -rw-r--r-- | src/gallium/drivers/nv50/nv50_pc.h | 8 | ||||
| -rw-r--r-- | src/gallium/drivers/nv50/nv50_pc_emit.c | 1 | ||||
| -rw-r--r-- | src/gallium/drivers/nv50/nv50_pc_optimize.c | 4 | ||||
| -rw-r--r-- | src/gallium/drivers/nv50/nv50_pc_print.c | 2 | ||||
| -rw-r--r-- | src/gallium/drivers/nv50/nv50_screen.c | 27 | ||||
| -rw-r--r-- | src/gallium/drivers/nv50/nv50_screen.h | 4 | ||||
| -rw-r--r-- | src/gallium/drivers/nv50/nv50_tgsi_to_nc.c | 233 | 
7 files changed, 189 insertions, 90 deletions
| diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h index 8b1c9b3a72..b24a3067b8 100644 --- a/src/gallium/drivers/nv50/nv50_pc.h +++ b/src/gallium/drivers/nv50/nv50_pc.h @@ -47,7 +47,7 @@  #define NV_OP_SHL       17  #define NV_OP_SHR       18  #define NV_OP_RCP       19 -/* gap */ +#define NV_OP_UNDEF     20  #define NV_OP_RSQ       21  #define NV_OP_LG2       22  #define NV_OP_SIN       23 @@ -360,6 +360,12 @@ new_value(struct nv_pc *pc, ubyte file, ubyte type)     return value;  } +static INLINE struct nv_value * +new_value_like(struct nv_pc *pc, struct nv_value *like) +{ +   return new_value(pc, like->reg.file, like->reg.type); +} +  static INLINE struct nv_ref *  new_ref(struct nv_pc *pc, struct nv_value *val)  { diff --git a/src/gallium/drivers/nv50/nv50_pc_emit.c b/src/gallium/drivers/nv50/nv50_pc_emit.c index 35bd5ff10f..fe44b327ab 100644 --- a/src/gallium/drivers/nv50/nv50_pc_emit.c +++ b/src/gallium/drivers/nv50/nv50_pc_emit.c @@ -1130,6 +1130,7 @@ nv50_emit_instruction(struct nv_pc *pc, struct nv_instruction *i)        pc->emit[1] = 0xe0000000;        break;     case NV_OP_PHI: +   case NV_OP_UNDEF:     case NV_OP_SUB:        NOUVEAU_ERR("operation \"%s\" should have been eliminated\n",  		  nv_opcode_name(i->opcode)); diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c index 64ffeaf430..daf63a3d20 100644 --- a/src/gallium/drivers/nv50/nv50_pc_optimize.c +++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c @@ -89,7 +89,7 @@ inst_cullable(struct nv_instruction *nvi)  static INLINE boolean  nvi_isnop(struct nv_instruction *nvi)  { -   if (nvi->opcode == NV_OP_EXPORT) +   if (nvi->opcode == NV_OP_EXPORT || nvi->opcode == NV_OP_UNDEF)        return TRUE;     if (nvi->fixed || @@ -849,7 +849,7 @@ nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)     int j;     struct nv_instruction *nvi, *next; -   for (nvi = b->entry; nvi; nvi = next) { +   for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {        next = nvi->next;        if (inst_cullable(nvi)) { diff --git a/src/gallium/drivers/nv50/nv50_pc_print.c b/src/gallium/drivers/nv50/nv50_pc_print.c index c812dbd066..a4f567bde4 100644 --- a/src/gallium/drivers/nv50/nv50_pc_print.c +++ b/src/gallium/drivers/nv50/nv50_pc_print.c @@ -59,7 +59,7 @@ static const char *nv_opcode_names[NV_OP_COUNT + 1] = {     "shl",     "shr",     "rcp", -   "(undefined)", +   "undef",     "rsqrt",     "lg2",     "sin", diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index e0c06c29ba..78137d6940 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -253,14 +253,23 @@ nv50_screen_relocs(struct nv50_screen *screen)  	}  } +#ifndef NOUVEAU_GETPARAM_GRAPH_UNITS +# define NOUVEAU_GETPARAM_GRAPH_UNITS 13 +#endif + +extern int nouveau_device_get_param(struct nouveau_device *dev, +                                    uint64_t param, uint64_t *value); +  struct pipe_screen *  nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)  {  	struct nv50_screen *screen = CALLOC_STRUCT(nv50_screen);  	struct nouveau_channel *chan;  	struct pipe_screen *pscreen; +	uint64_t value;  	unsigned chipset = dev->chipset;  	unsigned tesla_class = 0; +	unsigned stack_size;  	int ret, i;  	const unsigned rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; @@ -478,6 +487,24 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)  	OUT_RING  (chan, 0x121 | (NV50_CB_PGP << 12));  	OUT_RING  (chan, 0x131 | (NV50_CB_PFP << 12)); +	/* shader stack */ +	nouveau_device_get_param(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value); + +	stack_size  = util_bitcount(value & 0xffff); +	stack_size *= util_bitcount((value >> 24) & 0xf); +	stack_size *= 32 * 64 * 8; + +	ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, +			     stack_size, &screen->stack_bo); +	if (ret) { +		nv50_screen_destroy(pscreen); +		return NULL; +	} +	BEGIN_RING(chan, screen->tesla, NV50TCL_STACK_ADDRESS_HIGH, 3); +	OUT_RELOCh(chan, screen->stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +	OUT_RELOCl(chan, screen->stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +	OUT_RING  (chan, 4); +  	/* Vertex array limits - max them out */  	for (i = 0; i < 16; i++) {  		BEGIN_RING(chan, screen->tesla, diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index a491ba31b2..1517f5608f 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -22,11 +22,11 @@ struct nv50_screen {  	struct nouveau_resource *immd_heap; -	struct pipe_resource *strm_vbuf[16]; -  	struct nouveau_bo *tic;  	struct nouveau_bo *tsc; +	struct nouveau_bo *stack_bo; +  	boolean force_push;  }; diff --git a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c index da33adcaa4..7e77ed6ef6 100644 --- a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c +++ b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c @@ -22,19 +22,6 @@  /* XXX: need to clean this up so we get the typecasting right more naturally */ -/* LOOP FIXME 1 - * In bld_store_loop_var, only replace values that belong to the TGSI register - * written. - * For TGSI MOV, we only associate the source value with the value tracker of - * the destination, instead of generating an actual MOV. - * - * Possible solution: generate PHI functions in loop headers in advance. - */ -/* LOOP FIXME 2: - * In fetch_by_bb, when going back through a break-block, we miss all of the - * definitions from inside the loop. - */ -  #include <unistd.h>  #include "nv50_context.h" @@ -78,6 +65,24 @@ bld_vals_push_val(struct bld_value_stack *stk, struct nv_value *val)     stk->body[stk->size++] = val;  } +static INLINE boolean +bld_vals_del_val(struct bld_value_stack *stk, struct nv_value *val) +{ +   unsigned i; + +   for (i = stk->size - 1; i >= 0; --i) +      if (stk->body[i] == val) +         break; +   if (i < 0) +      return FALSE; + +   if (i != stk->size - 1) +      stk->body[i] = stk->body[stk->size - 1]; + +   --stk->size; /* XXX: old size in REALLOC */ +   return TRUE; +} +  static INLINE void  bld_vals_push(struct bld_value_stack *stk)  { @@ -118,7 +123,7 @@ struct bld_context {     struct bld_value_stack pvs[BLD_MAX_PREDS][4]; /* TGSI_FILE_PREDICATE */     struct bld_value_stack ovs[PIPE_MAX_SHADER_OUTPUTS][4]; -   uint32_t outputs_written[PIPE_MAX_SHADER_OUTPUTS / 32]; +   uint32_t outputs_written[(PIPE_MAX_SHADER_OUTPUTS + 31) / 32];     struct nv_value *frgcrd[4];     struct nv_value *sysval[4]; @@ -130,6 +135,21 @@ struct bld_context {     uint num_immds;  }; +static INLINE ubyte +bld_stack_file(struct bld_context *bld, struct bld_value_stack *stk) +{ +   if (stk < &bld->avs[0][0]) +      return NV_FILE_GPR; +   else +   if (stk < &bld->pvs[0][0]) +      return NV_FILE_ADDR; +   else +   if (stk < &bld->ovs[0][0]) +      return NV_FILE_FLAGS; +   else +      return NV_FILE_OUT; +} +  static INLINE struct nv_value *  bld_fetch(struct bld_context *bld, struct bld_value_stack *stk, int i, int c)  { @@ -138,16 +158,29 @@ bld_fetch(struct bld_context *bld, struct bld_value_stack *stk, int i, int c)     return stk[i * 4 + c].top;  } -static void -bld_store_loop_var(struct bld_context *, struct bld_value_stack *); +static struct nv_value * +bld_loop_phi(struct bld_context *, struct bld_value_stack *, struct nv_value *); +/* If a variable is defined in a loop without prior use, we don't need + * a phi in the loop header to account for backwards flow. + * + * However, if this variable is then also used outside the loop, we do + * need a phi after all. But we must not use this phi's def inside the + * loop, so we can eliminate the phi if it is unused later. + */  static INLINE void  bld_store(struct bld_context *bld, struct bld_value_stack *stk, int i, int c,            struct nv_value *val)  { -   bld_store_loop_var(bld, &stk[i * 4 + c]); +   const uint16_t m = 1 << bld->loop_lvl; + +   stk = &stk[i * 4 + c]; -   stk[i * 4 + c].top = val; +   if (bld->loop_lvl && !(m & (stk->loop_def | stk->loop_use))) +      bld_loop_phi(bld, stk, val); + +   stk->top = val; +   stk->loop_def |= 1 << bld->loop_lvl;  }  static INLINE void @@ -182,6 +215,9 @@ bld_warn_uninitialized(struct bld_context *bld, int kind,     long i = (stk - &bld->tvs[0][0]) / 4;     long c = (stk - &bld->tvs[0][0]) & 3; +   if (c == 3) +      c = -1; +     debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n",                  i, (int)('x' + c), kind ? "may be" : "is", b->id);  } @@ -237,6 +273,14 @@ fetch_by_bb(struct bld_value_stack *stack,  static INLINE struct nv_value *  bld_load_imm_u32(struct bld_context *bld, uint32_t u); +static INLINE struct nv_value * +bld_undef(struct bld_context *bld, ubyte file) +{ +   struct nv_instruction *nvi = new_instruction(bld->pc, NV_OP_UNDEF); + +   return bld_def(nvi, 0, new_value(bld->pc, file, NV_TYPE_U32)); +} +  static struct nv_value *  bld_phi(struct bld_context *bld, struct nv_basic_block *b,          struct bld_value_stack *stack) @@ -267,21 +311,19 @@ bld_phi(struct bld_context *bld, struct nv_basic_block *b,              if (in->num_in == 1) {                 in = in->in[0];              } else { -               if (!nvbb_reachable_by(in->in[0], vals[0]->insn->bb, b)) { +               if (!nvbb_reachable_by(in->in[0], vals[0]->insn->bb, b))                    in = in->in[0]; -                  break; -               } -               if (!nvbb_reachable_by(in->in[1], vals[0]->insn->bb, b)) { +               else +               if (!nvbb_reachable_by(in->in[1], vals[0]->insn->bb, b))                    in = in->in[1]; -                  break; -               } -               in = in->in[0]; +               else +                  in = in->in[0];              }           }           bld->pc->current_block = in;           /* should make this a no-op */ -         bld_vals_push_val(stack, bld_load_imm_u32(bld, 0)); +         bld_vals_push_val(stack, bld_undef(bld, vals[0]->reg.file));           continue;        } @@ -318,10 +360,55 @@ bld_phi(struct bld_context *bld, struct nv_basic_block *b,     return phi->def[0];  } +static struct nv_value * +bld_loop_phi(struct bld_context *bld, struct bld_value_stack *stack, +             struct nv_value *def) +{ +   struct nv_basic_block *bb = bld->pc->current_block; +   struct nv_instruction *phi; +   struct nv_value *val; + +   val = bld_phi(bld, bld->pc->current_block, stack); +   if (!val) { +      bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1]->in[0]; + +      val = bld_undef(bld, bld_stack_file(bld, stack)); +   } + +   bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1]; + +   phi = new_instruction(bld->pc, NV_OP_PHI); + +   bld_def(phi, 0, new_value_like(bld->pc, val)); +   if (!def) +      def = phi->def[0]; + +   bld_vals_push_val(stack, phi->def[0]); + +   phi->target = (struct nv_basic_block *)stack; /* cheat */ + +   nv_reference(bld->pc, &phi->src[0], val); +   nv_reference(bld->pc, &phi->src[1], def); + +   bld->pc->current_block = bb; + +   return phi->def[0]; +} +  static INLINE struct nv_value *  bld_fetch_global(struct bld_context *bld, struct bld_value_stack *stack)  { -   stack->loop_use |= 1 << bld->loop_lvl; +   const uint16_t m = 1 << bld->loop_lvl; +   const uint16_t use = stack->loop_use; + +   stack->loop_use |= m; + +   /* If neither used nor def'd inside the loop, build a phi in foresight, +    * so we don't have to replace stuff later on, which requires tracking. +    */ +   if (bld->loop_lvl && !((use | stack->loop_def) & m)) +      return bld_loop_phi(bld, stack, NULL); +     return bld_phi(bld, bld->pc->current_block, stack);  } @@ -347,72 +434,50 @@ static void  bld_replace_value(struct nv_pc *, struct nv_basic_block *, struct nv_value *,                    struct nv_value *); -/* When setting a variable inside a loop, and we have used it before in the - * loop, we need to insert a phi function in the loop header. +/* Replace the source of the phi in the loop header by the last assignment, + * or eliminate the phi function if there is no assignment inside the loop. + * + * Redundancy situation 1 - (used) but (not redefined) value: + *  %3 = phi %0, %3 = %3 is used + *  %3 = phi %0, %4 = is new definition + * + * Redundancy situation 2 - (not used) but (redefined) value: + *  %3 = phi %0, %2 = %2 is used, %3 could be used outside, deleted by DCE   */  static void -bld_store_loop_var(struct bld_context *bld, struct bld_value_stack *stk) +bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb)  { -   struct nv_basic_block *bb; -   struct nv_instruction *phi; +   struct nv_instruction *phi, *next;     struct nv_value *val; -   int ll; -   uint16_t loop_def = stk->loop_def; - -   if (!(ll = bld->loop_lvl)) -      return; -   stk->loop_def |= 1 << ll; - -   if ((~stk->loop_use | loop_def) & (1 << ll)) -      return; - -#if 0 -   debug_printf("TEMP[%li].%c used before loop redef (def=%x/use=%x)\n", -                (stk - &bld->tvs[0][0]) / 4, -                (int)('x' + ((stk - &bld->tvs[0][0]) & 3)), -                loop_def, stk->loop_use); -#endif +   struct bld_value_stack *stk; +   int s; -   stk->loop_def |= 1 << ll; +   for (phi = bb->phi; phi && phi->opcode == NV_OP_PHI; phi = next) { +      next = phi->next; -   assert(bld->loop_bb[ll - 1]->num_in == 1); - -   /* get last assignment from outside this loop, could be from bld_phi */ -   val = stk->body[stk->size - 1]; - -   /* create the phi in the loop entry block */ - -   bb = bld->pc->current_block; -   bld->pc->current_block = bld->loop_bb[ll - 1]; - -   phi = new_instruction(bld->pc, NV_OP_PHI); +      stk = (struct bld_value_stack *)phi->target; +      phi->target = NULL; -   bld_def(phi, 0, new_value(bld->pc, val->reg.file, val->reg.type)); +      val = bld_fetch_global(bld, stk); -   bld->pc->pass_seq++; -   bld_replace_value(bld->pc, bld->loop_bb[ll - 1], val, phi->def[0]); +      nv_reference(bld->pc, &phi->src[1], val); -   assert(!stk->top); -   bld_vals_push_val(stk, phi->def[0]); +      s = -1; +      if (phi->src[0]->value == phi->def[0] || +          phi->src[0]->value == phi->src[1]->value) +         s = 1; +      else +      if (phi->src[1]->value == phi->def[0]) +         s = 0; -   phi->target = (struct nv_basic_block *)stk; /* cheat */ +      if (s >= 0) { +         bld_vals_del_val(stk, phi->def[0]); -   nv_reference(bld->pc, &phi->src[0], val); -   nv_reference(bld->pc, &phi->src[1], phi->def[0]); +         ++bld->pc->pass_seq; +         bld_replace_value(bld->pc, bb, phi->def[0], phi->src[s]->value); -   bld->pc->current_block = bb; -} - -static void -bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb) -{ -   struct nv_instruction *phi; -   struct nv_value *val; - -   for (phi = bb->phi; phi && phi->opcode == NV_OP_PHI; phi = phi->next) { -      val = bld_fetch_global(bld, (struct bld_value_stack *)phi->target); -      nv_reference(bld->pc, &phi->src[1], val); -      phi->target = NULL; +         nv_nvi_delete(phi); +      }     }  } @@ -437,7 +502,7 @@ bld_insn_1(struct bld_context *bld, uint opcode, struct nv_value *src0)  static struct nv_value *  bld_insn_2(struct bld_context *bld, uint opcode, -	      struct nv_value *src0, struct nv_value *src1) +           struct nv_value *src0, struct nv_value *src1)  {     struct nv_instruction *insn = new_instruction(bld->pc, opcode); @@ -449,8 +514,8 @@ bld_insn_2(struct bld_context *bld, uint opcode,  static struct nv_value *  bld_insn_3(struct bld_context *bld, uint opcode, -              struct nv_value *src0, struct nv_value *src1, -              struct nv_value *src2) +           struct nv_value *src0, struct nv_value *src1, +           struct nv_value *src2)  {     struct nv_instruction *insn = new_instruction(bld->pc, opcode); | 
