diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_curbe.c | 22 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_vs.c | 4 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_vs_emit.c | 133 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i965/gen6_vs_state.c | 24 | 
4 files changed, 93 insertions, 90 deletions
| diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 7b823eb201..877b22fec1 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -242,21 +242,13 @@ static void prepare_constant_buffer(struct brw_context *brw)        GLuint offset = brw->curbe.vs_start * 16;        GLuint nr = brw->vs.prog_data->nr_params / 4; -      if (vp->use_const_buffer) { -	 /* Load the subset of push constants that will get used when -	  * we also have a pull constant buffer. -	  */ -	 for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { -	    if (brw->vs.constant_map[i] != -1) { -	       assert(brw->vs.constant_map[i] <= nr); -	       memcpy(buf + offset + brw->vs.constant_map[i] * 4, -		      vp->program.Base.Parameters->ParameterValues[i], -		      4 * sizeof(float)); -	    } -	 } -      } else { -	 for (i = 0; i < nr; i++) { -	    memcpy(buf + offset + i * 4, +      /* Load the subset of push constants that will get used when +       * we also have a pull constant buffer. +       */ +      for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { +	 if (brw->vs.constant_map[i] != -1) { +	    assert(brw->vs.constant_map[i] <= nr); +	    memcpy(buf + offset + brw->vs.constant_map[i] * 4,  		   vp->program.Base.Parameters->ParameterValues[i],  		   4 * sizeof(float));  	 } diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 4a41c7a517..59f270d675 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -99,8 +99,8 @@ static void do_vs_prog( struct brw_context *brw,     (void) ctx;     aux_size = sizeof(c.prog_data); -   if (c.vp->use_const_buffer) -      aux_size += c.vp->program.Base.Parameters->NumParameters; +   /* constant_map */ +   aux_size += c.vp->program.Base.Parameters->NumParameters;     drm_intel_bo_unreference(brw->vs.prog_bo);     brw->vs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_VS_PROG, diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 184452666a..e1a3f33393 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -143,6 +143,8 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )     GLuint i, reg = 0, mrf;     int attributes_in_vue;     int first_reladdr_output; +   int max_constant; +   int constant = 0;     /* Determine whether to use a real constant buffer or use a block      * of GRF registers for constants.  The later is faster but only @@ -181,62 +183,81 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )     } -   /* Vertex program parameters from curbe: +   /* Assign some (probably all) of the vertex program constants to +    * the push constant buffer/CURBE. +    * +    * There's an obvious limit to the numer of push constants equal to +    * the number of register available, and that number is smaller +    * than the minimum maximum number of vertex program parameters, so +    * support for pull constants is required if we overflow. +    * Additionally, on gen6 the number of push constants is even +    * lower. +    * +    * When there's relative addressing, we don't know what range of +    * Mesa IR registers can be accessed.  And generally, when relative +    * addressing is used we also have too many constants to load them +    * all as push constants.  So, we'll just support relative +    * addressing out of the pull constant buffers, and try to load as +    * many statically-accessed constants into the push constant buffer +    * as we can.      */ -   if (c->vp->use_const_buffer) { -      int max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries; -      int constant = 0; +   if (intel->gen >= 6) { +      /* We can only load 32 regs of push constants. */ +      max_constant = 32 * 2 - c->key.nr_userclip; +   } else { +      max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries; +   } -      /* We've got more constants than we can load with the push -       * mechanism.  This is often correlated with reladdr loads where -       * we should probably be using a pull mechanism anyway to avoid -       * excessive reading.  However, the pull mechanism is slow in -       * general.  So, we try to allocate as many non-reladdr-loaded -       * constants through the push buffer as we can before giving up. -       */ -      memset(c->constant_map, -1, c->vp->program.Base.Parameters->NumParameters); -      for (i = 0; -	   i < c->vp->program.Base.NumInstructions && constant < max_constant; -	   i++) { -	 struct prog_instruction *inst = &c->vp->program.Base.Instructions[i]; -	 int arg; +   /* constant_map maps from ParameterValues[] index to index in the +    * push constant buffer, or -1 if it's only in the pull constant +    * buffer. +    */ +   memset(c->constant_map, -1, c->vp->program.Base.Parameters->NumParameters); +   for (i = 0; +	i < c->vp->program.Base.NumInstructions && constant < max_constant; +	i++) { +      struct prog_instruction *inst = &c->vp->program.Base.Instructions[i]; +      int arg; -	 for (arg = 0; arg < 3 && constant < max_constant; arg++) { -	    if ((inst->SrcReg[arg].File != PROGRAM_STATE_VAR && -		 inst->SrcReg[arg].File != PROGRAM_CONSTANT && -		 inst->SrcReg[arg].File != PROGRAM_UNIFORM && -		 inst->SrcReg[arg].File != PROGRAM_ENV_PARAM && -		 inst->SrcReg[arg].File != PROGRAM_LOCAL_PARAM) || -		inst->SrcReg[arg].RelAddr) -	       continue; +      for (arg = 0; arg < 3 && constant < max_constant; arg++) { +	 if (inst->SrcReg[arg].File != PROGRAM_STATE_VAR && +	     inst->SrcReg[arg].File != PROGRAM_CONSTANT && +	     inst->SrcReg[arg].File != PROGRAM_UNIFORM && +	     inst->SrcReg[arg].File != PROGRAM_ENV_PARAM && +	     inst->SrcReg[arg].File != PROGRAM_LOCAL_PARAM) { +	    continue; +	 } -	    if (c->constant_map[inst->SrcReg[arg].Index] == -1) { -	       c->constant_map[inst->SrcReg[arg].Index] = constant++; -	    } +	 if (inst->SrcReg[arg].RelAddr) { +	    c->vp->use_const_buffer = GL_TRUE; +	    continue;  	 } -      } -      for (i = 0; i < constant; i++) { -         c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, -							      (i%2) * 4), -						 0, 4, 1); +	 if (c->constant_map[inst->SrcReg[arg].Index] == -1) { +	    c->constant_map[inst->SrcReg[arg].Index] = constant++; +	 }        } -      reg += (constant + 1) / 2; -      c->prog_data.curb_read_length = reg - 1; -      /* XXX 0 causes a bug elsewhere... */ -      c->prog_data.nr_params = MAX2(constant * 4, 4);     } -   else { -      /* use a section of the GRF for constants */ -      GLuint nr_params = c->vp->program.Base.Parameters->NumParameters; -      for (i = 0; i < nr_params; i++) { -         c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); -      } -      reg += (nr_params + 1) / 2; -      c->prog_data.curb_read_length = reg - 1; -      c->prog_data.nr_params = nr_params * 4; +   /* If we ran out of push constant space, then we'll also upload all +    * constants through the pull constant buffer so that they can be +    * accessed no matter what.  For relative addressing (the common +    * case) we need them all in place anyway. +    */ +   if (constant == max_constant) +      c->vp->use_const_buffer = GL_TRUE; + +   for (i = 0; i < constant; i++) { +      c->regs[PROGRAM_STATE_VAR][i] = stride(brw_vec4_grf(reg + i / 2, +							  (i % 2) * 4), +					     0, 4, 1);     } +   reg += (constant + 1) / 2; +   c->prog_data.curb_read_length = reg - 1; +   c->prog_data.nr_params = constant; +   /* XXX 0 causes a bug elsewhere... */ +   if (intel->gen < 6 && c->prog_data.nr_params == 0) +      c->prog_data.nr_params = 4;     /* Allocate input regs:        */ @@ -1302,22 +1323,18 @@ get_src_reg( struct brw_vs_compile *c,     case PROGRAM_UNIFORM:     case PROGRAM_ENV_PARAM:     case PROGRAM_LOCAL_PARAM: -      if (c->vp->use_const_buffer) { -	 if (!relAddr && c->constant_map[index] != -1) { -	    assert(c->regs[PROGRAM_STATE_VAR][c->constant_map[index]].nr != 0); -	    return c->regs[PROGRAM_STATE_VAR][c->constant_map[index]]; -	 } else if (relAddr) +      if (!relAddr && c->constant_map[index] != -1) { +	 /* Take from the push constant buffer if possible. */ +	 assert(c->regs[PROGRAM_STATE_VAR][c->constant_map[index]].nr != 0); +	 return c->regs[PROGRAM_STATE_VAR][c->constant_map[index]]; +      } else { +	 /* Must be in the pull constant buffer then .*/ +	 assert(c->vp->use_const_buffer); +	 if (relAddr)  	    return get_reladdr_constant(c, inst, argIndex);  	 else  	    return get_constant(c, inst, argIndex);        } -      else if (relAddr) { -         return deref(c, c->regs[PROGRAM_STATE_VAR][0], index, 16); -      } -      else { -         assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0); -         return c->regs[PROGRAM_STATE_VAR][index]; -      }     case PROGRAM_ADDRESS:        assert(index == 0);        return c->regs[file][index]; diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index e94d0c0ddb..4ef9e2e607 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -54,7 +54,7 @@ upload_vs_state(struct brw_context *brw)        OUT_BATCH(0);        ADVANCE_BATCH();     } else { -      int params_uploaded = 0; +      int params_uploaded = 0, param_regs;        float *param;        if (brw->vertex_program->IsNVProgram) @@ -88,20 +88,11 @@ upload_vs_state(struct brw_context *brw)  	 params_uploaded++;        } -      if (vp->use_const_buffer) { -	 for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { -	    if (brw->vs.constant_map[i] != -1) { -	       memcpy(param + brw->vs.constant_map[i] * 4, -		      vp->program.Base.Parameters->ParameterValues[i], -		      4 * sizeof(float)); -	       params_uploaded++; -	    } -	 } -      } else { -	 for (i = 0; i < nr_params; i++) { -	    memcpy(param, vp->program.Base.Parameters->ParameterValues[i], +      for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { +	 if (brw->vs.constant_map[i] != -1) { +	    memcpy(param + brw->vs.constant_map[i] * 4, +		   vp->program.Base.Parameters->ParameterValues[i],  		   4 * sizeof(float)); -	    param += 4;  	    params_uploaded++;  	 }        } @@ -117,13 +108,16 @@ upload_vs_state(struct brw_context *brw)        drm_intel_gem_bo_unmap_gtt(constant_bo); +      param_regs = (params_uploaded + 1) / 2; +      assert(param_regs <= 32); +        BEGIN_BATCH(5);        OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 |  		GEN6_CONSTANT_BUFFER_0_ENABLE |  		(5 - 2));        OUT_RELOC(constant_bo,  		I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ -		ALIGN(params_uploaded, 2) / 2 - 1); +		param_regs - 1);        OUT_BATCH(0);        OUT_BATCH(0);        OUT_BATCH(0); | 
