summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965/brw_vs_emit.c
diff options
context:
space:
mode:
authorBrian Paul <brianp@vmware.com>2009-04-15 16:49:18 -0600
committerBrian Paul <brianp@vmware.com>2009-04-16 11:08:23 -0600
commitee32e9b4753eca62e360f96ce61ef7ff683e6bb7 (patch)
treedd44f98c9d30cd94f512f8b99391b18b288e7a15 /src/mesa/drivers/dri/i965/brw_vs_emit.c
parent19ac3e2729abd85346f88fd69c6bc72938d26101 (diff)
i965: implement relative addressing for VS constant buffer reads
A scatter-read should be possible, but we're just using two READs for the time being.
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_vs_emit.c')
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c114
1 files changed, 59 insertions, 55 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 19ead73d8c..98fbdf5064 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -709,7 +709,7 @@ get_constant(struct brw_vs_compile *c,
assert(argIndex < 3);
- if (c->current_const[argIndex].index != src->Index) {
+ if (c->current_const[argIndex].index != src->Index || src->RelAddr) {
c->current_const[argIndex].index = src->Index;
@@ -722,15 +722,18 @@ get_constant(struct brw_vs_compile *c,
brw_dp_READ_4_vs(p,
c->current_const[argIndex].reg, /* writeback dest */
src->RelAddr, /* relative indexing? */
+ c->regs[PROGRAM_ADDRESS][0], /* address register */
16 * src->Index, /* byte offset */
SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
);
}
- /* replicate lower four floats into upper four floats (to get XYZWXYZW) */
const_reg = c->current_const[argIndex].reg;
- const_reg = stride(const_reg, 0, 4, 0);
- const_reg.subnr = 0;
+ if (!src->RelAddr) {
+ /* replicate lower four floats into upper half (to get XYZWXYZW) */
+ const_reg = stride(const_reg, 0, 4, 0);
+ const_reg.subnr = 0;
+ }
return const_reg;
}
@@ -772,6 +775,42 @@ static struct brw_reg get_reg( struct brw_vs_compile *c,
/**
+ * Indirect addressing: get reg[[arg] + offset].
+ */
+static struct brw_reg deref( struct brw_vs_compile *c,
+ struct brw_reg arg,
+ GLint offset)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = vec4(get_tmp(c));
+ struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
+ struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW);
+ GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
+ struct brw_reg indirect = brw_vec4_indirect(0,0);
+
+ {
+ brw_push_insn_state(p);
+ brw_set_access_mode(p, BRW_ALIGN_1);
+
+ /* This is pretty clunky - load the address register twice and
+ * fetch each 4-dword value in turn. There must be a way to do
+ * this in a single pass, but I couldn't get it to work.
+ */
+ brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset));
+ brw_MOV(p, tmp, indirect);
+
+ brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset));
+ brw_MOV(p, suboffset(tmp, 4), indirect);
+
+ brw_pop_insn_state(p);
+ }
+
+ /* NOTE: tmp not released */
+ return vec8(tmp);
+}
+
+
+/**
* Get brw reg corresponding to the instruction's [argIndex] src reg.
* TODO: relative addressing!
*/
@@ -782,19 +821,29 @@ get_src_reg( struct brw_vs_compile *c,
{
const GLuint file = inst->SrcReg[argIndex].File;
const GLint index = inst->SrcReg[argIndex].Index;
+ const GLboolean relAddr = inst->SrcReg[argIndex].RelAddr;
switch (file) {
case PROGRAM_TEMPORARY:
case PROGRAM_INPUT:
case PROGRAM_OUTPUT:
- assert(c->regs[file][index].nr != 0);
- return c->regs[file][index];
+ if (relAddr) {
+ return deref(c, c->regs[file][0], index);
+ }
+ else {
+ assert(c->regs[file][index].nr != 0);
+ return c->regs[file][index];
+ }
+
case PROGRAM_STATE_VAR:
case PROGRAM_CONSTANT:
case PROGRAM_UNIFORM:
if (c->use_const_buffer) {
return get_constant(c, inst, argIndex);
}
+ else if (relAddr) {
+ return deref(c, c->regs[PROGRAM_STATE_VAR][0], index);
+ }
else {
assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
return c->regs[PROGRAM_STATE_VAR][index];
@@ -817,42 +866,6 @@ get_src_reg( struct brw_vs_compile *c,
}
-/**
- * Indirect addressing: get reg[[arg] + offset].
- */
-static struct brw_reg deref( struct brw_vs_compile *c,
- struct brw_reg arg,
- GLint offset)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg tmp = vec4(get_tmp(c));
- struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
- struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW);
- GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
- struct brw_reg indirect = brw_vec4_indirect(0,0);
-
- {
- brw_push_insn_state(p);
- brw_set_access_mode(p, BRW_ALIGN_1);
-
- /* This is pretty clunky - load the address register twice and
- * fetch each 4-dword value in turn. There must be a way to do
- * this in a single pass, but I couldn't get it to work.
- */
- brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset));
- brw_MOV(p, tmp, indirect);
-
- brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset));
- brw_MOV(p, suboffset(tmp, 4), indirect);
-
- brw_pop_insn_state(p);
- }
-
- /* NOTE: tmp not released */
- return vec8(tmp);
-}
-
-
static void emit_arl( struct brw_vs_compile *c,
struct brw_reg dst,
struct brw_reg arg0 )
@@ -864,8 +877,8 @@ static void emit_arl( struct brw_vs_compile *c,
if (need_tmp)
tmp = get_tmp(c);
- brw_RNDD(p, tmp, arg0);
- brw_MUL(p, dst, tmp, brw_imm_d(16));
+ brw_RNDD(p, tmp, arg0); /* tmp = round(arg0) */
+ brw_MUL(p, dst, tmp, brw_imm_d(16)); /* dst = tmp * 16 */
if (need_tmp)
release_tmp(c, tmp);
@@ -888,13 +901,7 @@ static struct brw_reg get_arg( struct brw_vs_compile *c,
if (src->File == PROGRAM_UNDEFINED)
return brw_null_reg();
- if (src->RelAddr) {
- /* XXX fix */
- reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index);
- }
- else {
- reg = get_src_reg(c, inst, argIndex);
- }
+ reg = get_src_reg(c, inst, argIndex);
/* Convert 3-bit swizzle to 2-bit.
*/
@@ -989,10 +996,7 @@ static void emit_swz( struct brw_vs_compile *c,
if (src_mask) {
struct brw_reg arg0;
- if (src.RelAddr)
- arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index);
- else
- arg0 = get_src_reg(c, inst, argIndex);
+ arg0 = get_src_reg(c, inst, argIndex);
arg0 = brw_swizzle(arg0,
src_swz[0], src_swz[1],