summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965/brw_vs_emit.c
diff options
context:
space:
mode:
authorBrian Paul <brianp@vmware.com>2009-04-14 11:08:42 -0600
committerBrian Paul <brianp@vmware.com>2009-04-14 11:08:42 -0600
commitcafea7528052624c8d3e4cd1c5b26a61bf04d1d0 (patch)
tree114d80c8261fc80eb615c70095d76f8f84d53baa /src/mesa/drivers/dri/i965/brw_vs_emit.c
parent43c7ffaea635f949fd4803c4f594cf53e4b98f24 (diff)
i965: checkpoint commit: VS constant buffers
Hook up a constant buffer, binding table, etc for the VS unit. This will allow using large constant buffers with vertex shaders. The new code is disabled at this time (use_const_buffer=FALSE).
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_vs_emit.c')
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c229
1 files changed, 187 insertions, 42 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 0d6c6ab9a8..d21f2792af 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -38,8 +38,31 @@
#include "brw_vs.h"
+static struct brw_reg get_tmp( struct brw_vs_compile *c )
+{
+ struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
+
+ if (++c->last_tmp > c->prog_data.total_grf)
+ c->prog_data.total_grf = c->last_tmp;
-/* Do things as simply as possible. Allocate and populate all regs
+ return tmp;
+}
+
+static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
+{
+ if (tmp.nr == c->last_tmp-1)
+ c->last_tmp--;
+}
+
+static void release_tmps( struct brw_vs_compile *c )
+{
+ c->last_tmp = c->first_tmp;
+}
+
+
+/**
+ * Preallocate GRF register before code emit.
+ * Do things as simply as possible. Allocate and populate all regs
* ahead of time.
*/
static void brw_vs_alloc_regs( struct brw_vs_compile *c )
@@ -47,6 +70,14 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
GLuint i, reg = 0, mrf;
GLuint nr_params;
+#if 0
+ if (c->vp->program.Base.Parameters->NumParameters >= 6)
+ c->use_const_buffer = 1;
+ else
+#endif
+ c->use_const_buffer = GL_FALSE;
+ /*printf("use_const_buffer = %d\n", c->use_const_buffer);*/
+
/* r0 -- reserved as usual
*/
c->r0 = brw_vec8_grf(reg, 0);
@@ -66,13 +97,19 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
/* Vertex program parameters from curbe:
*/
- nr_params = c->vp->program.Base.Parameters->NumParameters;
- for (i = 0; i < nr_params; i++) {
- c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
- }
- reg += (nr_params + 1) / 2;
-
- c->prog_data.curb_read_length = reg - 1;
+ if (c->use_const_buffer) {
+ /* get constants from a real constant buffer */
+ c->prog_data.curb_read_length = 0;
+ }
+ else {
+ /* use a section of the GRF for constants */
+ nr_params = c->vp->program.Base.Parameters->NumParameters;
+ for (i = 0; i < nr_params; i++) {
+ c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
+ }
+ reg += (nr_params + 1) / 2;
+ c->prog_data.curb_read_length = reg - 1;
+ }
/* Allocate input regs:
*/
@@ -157,6 +194,13 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
c->prog_data.urb_entry_size = (c->nr_outputs + 2 + 3) / 4;
c->prog_data.total_grf = reg;
+ if (c->use_const_buffer) {
+ for (i = 0; i < 3; i++) {
+ c->current_const[i].index = -1;
+ c->current_const[i].reg = get_tmp(c);
+ }
+ }
+
if (INTEL_DEBUG & DEBUG_VS) {
_mesa_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs);
_mesa_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries);
@@ -165,28 +209,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
}
-static struct brw_reg get_tmp( struct brw_vs_compile *c )
-{
- struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
-
- if (++c->last_tmp > c->prog_data.total_grf)
- c->prog_data.total_grf = c->last_tmp;
-
- return tmp;
-}
-
-static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
-{
- if (tmp.nr == c->last_tmp-1)
- c->last_tmp--;
-}
-
-static void release_tmps( struct brw_vs_compile *c )
-{
- c->last_tmp = c->first_tmp;
-}
-
-
/**
* If an instruction uses a temp reg both as a src and the dest, we
* sometimes need to allocate an intermediate temporary.
@@ -673,13 +695,59 @@ static void emit_nrm( struct brw_vs_compile *c,
}
+static struct brw_reg
+get_constant(struct brw_vs_compile *c,
+ const struct prog_instruction *inst,
+ GLuint argIndex)
+{
+ const struct prog_src_register *src = &inst->SrcReg[argIndex];
+ struct brw_compile *p = &c->func;
+ struct brw_reg const_reg;
+
+ if (c->current_const[argIndex].index != src->Index) {
+ struct brw_reg src_reg = get_tmp(c);
+ struct brw_reg t = get_tmp(c);
+
+ c->current_const[argIndex].index = src->Index;
+
+ brw_MOV(p, t, brw_vec8_grf(0, 0));/*SAVE*/
+
+#if 0
+ printf(" fetch const[%d] for arg %d into reg %d\n",
+ src->Index, argIndex, c->current_const[argIndex].reg.nr);
+#endif
+
+ /* need to fetch the constant now */
+ brw_dp_READ_4_vs(p,
+ c->current_const[argIndex].reg, /* writeback dest */
+ src_reg, /* src reg */
+ 1, /* msg_reg */
+ src->RelAddr, /* relative indexing? */
+ 16 * src->Index, /* byte offset */
+ SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
+ );
+
+ brw_MOV(p, brw_vec8_grf(0, 0), t);/*RESTORE*/
+ release_tmp(c, src_reg);
+ release_tmp(c, t);
+ }
+
+ /* replicate lower four floats into upper four floats (to get XYZWXYZW) */
+ const_reg = c->current_const[argIndex].reg;
+ const_reg = stride(const_reg, 0, 4, 0);
+ const_reg.subnr = 0;
+
+ return const_reg;
+}
+
+
+
/* TODO: relative addressing!
*/
static struct brw_reg get_reg( struct brw_vs_compile *c,
gl_register_file file,
GLuint index )
{
-
switch (file) {
case PROGRAM_TEMPORARY:
case PROGRAM_INPUT:
@@ -708,13 +776,63 @@ static struct brw_reg get_reg( struct brw_vs_compile *c,
}
+/**
+ * Get brw reg corresponding to the instruction's [argIndex] src reg.
+ * TODO: relative addressing!
+ */
+static struct brw_reg
+get_src_reg( struct brw_vs_compile *c,
+ const struct prog_instruction *inst,
+ GLuint argIndex )
+{
+ const GLuint file = inst->SrcReg[argIndex].File;
+ const GLint index = inst->SrcReg[argIndex].Index;
+
+ switch (file) {
+ case PROGRAM_TEMPORARY:
+ case PROGRAM_INPUT:
+ case PROGRAM_OUTPUT:
+ assert(c->regs[file][index].nr != 0);
+ return c->regs[file][index];
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_CONSTANT:
+ case PROGRAM_UNIFORM:
+ if (c->use_const_buffer) {
+ return get_constant(c, inst, argIndex);
+ }
+ else {
+ assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
+ return c->regs[PROGRAM_STATE_VAR][index];
+ }
+ case PROGRAM_ADDRESS:
+ assert(index == 0);
+ return c->regs[file][index];
+
+ case PROGRAM_UNDEFINED:
+ /* this is a normal case since we loop over all three src args */
+ return brw_null_reg();
+
+ case PROGRAM_LOCAL_PARAM:
+ case PROGRAM_ENV_PARAM:
+ case PROGRAM_WRITE_ONLY:
+ default:
+ assert(0);
+ return brw_null_reg();
+ }
+}
+
+
+/**
+ * Indirect addressing: get reg[[arg] + offset].
+ */
static struct brw_reg deref( struct brw_vs_compile *c,
struct brw_reg arg,
GLint offset)
{
struct brw_compile *p = &c->func;
struct brw_reg tmp = vec4(get_tmp(c));
- struct brw_reg vp_address = retype(vec1(get_reg(c, PROGRAM_ADDRESS, 0)), BRW_REGISTER_TYPE_UW);
+ struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
+ struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW);
GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
struct brw_reg indirect = brw_vec4_indirect(0,0);
@@ -758,22 +876,29 @@ static void emit_arl( struct brw_vs_compile *c,
}
-/* Will return mangled results for SWZ op. The emit_swz() function
+/**
+ * Return the brw reg for the given instruction's src argument.
+ * Will return mangled results for SWZ op. The emit_swz() function
* ignores this result and recalculates taking extended swizzles into
* account.
*/
static struct brw_reg get_arg( struct brw_vs_compile *c,
- struct prog_src_register *src )
+ const struct prog_instruction *inst,
+ GLuint argIndex )
{
+ const struct prog_src_register *src = &inst->SrcReg[argIndex];
struct brw_reg reg;
if (src->File == PROGRAM_UNDEFINED)
return brw_null_reg();
- if (src->RelAddr)
+ if (src->RelAddr) {
+ /* XXX fix */
reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index);
- else
- reg = get_reg(c, src->File, src->Index);
+ }
+ else {
+ reg = get_src_reg(c, inst, argIndex);
+ }
/* Convert 3-bit swizzle to 2-bit.
*/
@@ -790,10 +915,28 @@ static struct brw_reg get_arg( struct brw_vs_compile *c,
}
+/**
+ * Get brw register for the given program dest register.
+ */
static struct brw_reg get_dst( struct brw_vs_compile *c,
struct prog_dst_register dst )
{
- struct brw_reg reg = get_reg(c, dst.File, dst.Index);
+ struct brw_reg reg;
+
+ switch (dst.File) {
+ case PROGRAM_TEMPORARY:
+ case PROGRAM_OUTPUT:
+ assert(c->regs[dst.File][dst.Index].nr != 0);
+ reg = c->regs[dst.File][dst.Index];
+ break;
+ case PROGRAM_UNDEFINED:
+ /* we may hit this for OPCODE_END, OPCODE_KIL, etc */
+ reg = brw_null_reg();
+ break;
+ default:
+ assert(0);
+ reg = brw_null_reg();
+ }
reg.dw1.bits.writemask = dst.WriteMask;
@@ -803,8 +946,10 @@ static struct brw_reg get_dst( struct brw_vs_compile *c,
static void emit_swz( struct brw_vs_compile *c,
struct brw_reg dst,
- struct prog_src_register src )
+ const struct prog_instruction *inst)
{
+ const GLuint argIndex = 0;
+ const struct prog_src_register src = inst->SrcReg[argIndex];
struct brw_compile *p = &c->func;
GLuint zeros_mask = 0;
GLuint ones_mask = 0;
@@ -847,7 +992,7 @@ static void emit_swz( struct brw_vs_compile *c,
if (src.RelAddr)
arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index);
else
- arg0 = get_reg(c, src.File, src.Index);
+ arg0 = get_src_reg(c, inst, argIndex);
arg0 = brw_swizzle(arg0,
src_swz[0], src_swz[1],
@@ -1053,7 +1198,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
args[i] = c->output_regs[index].reg;
else
- args[i] = get_arg(c, src);
+ args[i] = get_arg(c, inst, i);
}
/* Get dest regs. Note that it is possible for a reg to be both
@@ -1181,7 +1326,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
/* The args[0] value can't be used here as it won't have
* correctly encoded the full swizzle:
*/
- emit_swz(c, dst, inst->SrcReg[0] );
+ emit_swz(c, dst, inst);
break;
case OPCODE_TRUNC:
/* round toward zero */