summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965/brw_vs_emit.c
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2010-01-18 15:12:40 -0800
committerEric Anholt <eric@anholt.net>2010-01-19 11:31:23 -0800
commitfb4901593c9495714d3f54920a28c271852e2112 (patch)
tree697c6c525f9340f136459c93f06c823034fadc22 /src/mesa/drivers/dri/i965/brw_vs_emit.c
parent62a96f74c9a1fd07301d349e4181a7212fc7d45c (diff)
i965: Upload as many VS constants as possible through the push constants.
The pull constants require sending out to an overworked shared unit and waiting for a response, while push constants are nicely loaded in for us at thread dispatch time. By putting things we access in every VS invocation there, ETQW performance improved by 2.5% +/- 1.6% (n=6).
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_vs_emit.c')
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c49
1 files changed, 45 insertions, 4 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 7a252dde6c..52cc04fee8 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -104,9 +104,47 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
/* Vertex program parameters from curbe:
*/
if (c->vp->use_const_buffer) {
- /* get constants from a real constant buffer */
- c->prog_data.curb_read_length = 0;
- c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */
+ int max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries;
+ int constant = 0;
+
+ /* We've got more constants than we can load with the push
+ * mechanism. This is often correlated with reladdr loads where
+ * we should probably be using a pull mechanism anyway to avoid
+ * excessive reading. However, the pull mechanism is slow in
+ * general. So, we try to allocate as many non-reladdr-loaded
+ * constants through the push buffer as we can before giving up.
+ */
+ memset(c->constant_map, -1, c->vp->program.Base.Parameters->NumParameters);
+ for (i = 0;
+ i < c->vp->program.Base.NumInstructions && constant < max_constant;
+ i++) {
+ struct prog_instruction *inst = &c->vp->program.Base.Instructions[i];
+ int arg;
+
+ for (arg = 0; arg < 3 && constant < max_constant; arg++) {
+ if ((inst->SrcReg[arg].File != PROGRAM_STATE_VAR &&
+ inst->SrcReg[arg].File != PROGRAM_CONSTANT &&
+ inst->SrcReg[arg].File != PROGRAM_UNIFORM &&
+ inst->SrcReg[arg].File != PROGRAM_ENV_PARAM &&
+ inst->SrcReg[arg].File != PROGRAM_LOCAL_PARAM) ||
+ inst->SrcReg[arg].RelAddr)
+ continue;
+
+ if (c->constant_map[inst->SrcReg[arg].Index] == -1) {
+ c->constant_map[inst->SrcReg[arg].Index] = constant++;
+ }
+ }
+ }
+
+ for (i = 0; i < constant; i++) {
+ c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2,
+ (i%2) * 4),
+ 0, 4, 1);
+ }
+ reg += (constant + 1) / 2;
+ c->prog_data.curb_read_length = reg - 1;
+ /* XXX 0 causes a bug elsewhere... */
+ c->prog_data.nr_params = MAX2(constant * 4, 4);
}
else {
/* use a section of the GRF for constants */
@@ -955,7 +993,10 @@ get_src_reg( struct brw_vs_compile *c,
case PROGRAM_ENV_PARAM:
case PROGRAM_LOCAL_PARAM:
if (c->vp->use_const_buffer) {
- if (relAddr)
+ if (!relAddr && c->constant_map[index] != -1) {
+ assert(c->regs[PROGRAM_STATE_VAR][c->constant_map[index]].nr != 0);
+ return c->regs[PROGRAM_STATE_VAR][c->constant_map[index]];
+ } else if (relAddr)
return get_reladdr_constant(c, inst, argIndex);
else
return get_constant(c, inst, argIndex);