summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrian Paul <brianp@vmware.com>2009-04-15 16:49:18 -0600
committerBrian Paul <brianp@vmware.com>2009-04-16 11:08:23 -0600
commitee32e9b4753eca62e360f96ce61ef7ff683e6bb7 (patch)
treedd44f98c9d30cd94f512f8b99391b18b288e7a15
parent19ac3e2729abd85346f88fd69c6bc72938d26101 (diff)
i965: implement relative addressing for VS constant buffer reads
A scatter-read should be possible, but we're just using two READs for the time being.
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c59
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c114
3 files changed, 115 insertions, 59 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 66f8eb840c..896e67dbfe 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -868,6 +868,7 @@ void brw_dp_READ_4( struct brw_compile *p,
void brw_dp_READ_4_vs( struct brw_compile *p,
struct brw_reg dest,
GLboolean relAddr,
+ struct brw_reg addrReg,
GLuint location,
GLuint bind_table_index );
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index c731a93a8d..df2141660c 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -1003,15 +1003,18 @@ void brw_dp_READ_4( struct brw_compile *p,
/**
- * Read float[4] constant from VS constant buffer.
+ * Read float[4] constant(s) from VS constant buffer.
+ * For relative addressing, two float[4] constants will be read into 'dest'.
+ * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
*/
void brw_dp_READ_4_vs(struct brw_compile *p,
struct brw_reg dest,
GLboolean relAddr,
+ struct brw_reg addrReg,
GLuint location,
GLuint bind_table_index)
{
- const GLuint msg_reg_nr = 1;
+ GLuint msg_reg_nr = 1;
/*
printf("vs const read msg, location %u, msg_reg_nr %d\n",
@@ -1034,7 +1037,12 @@ void brw_dp_READ_4_vs(struct brw_compile *p,
b = brw_message_reg(msg_reg_nr);
b = retype(b, BRW_REGISTER_TYPE_UD);
/*b = get_element_ud(b, 2);*/
- brw_MOV(p, b, brw_imm_ud(location));
+ if (relAddr) {
+ brw_ADD(p, b, addrReg, brw_imm_ud(location));
+ }
+ else {
+ brw_MOV(p, b, brw_imm_ud(location));
+ }
brw_pop_insn_state(p);
}
@@ -1053,13 +1061,56 @@ void brw_dp_READ_4_vs(struct brw_compile *p,
brw_set_dp_read_message(insn,
bind_table_index,
- 0, /* msg_control (0 means 1 Oword) */
+ 0, /* msg_control (0 means 1 Oword, lower half) */
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
0, /* source cache = data cache */
1, /* msg_length */
1, /* response_length (1 Oword) */
0); /* eot */
}
+
+ if (relAddr) {
+ /* second read to get second constant */
+ msg_reg_nr++;
+ {
+ /* Setup MRF[1] with location/offset into const buffer */
+ struct brw_reg b;
+
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ b = brw_message_reg(msg_reg_nr);
+ b = retype(b, BRW_REGISTER_TYPE_UD);
+ addrReg = suboffset(addrReg, 1); /* upper half of addrReg */
+ brw_ADD(p, b, addrReg, brw_imm_ud(location));
+
+ brw_pop_insn_state(p);
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = BRW_PREDICATE_NONE;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+ /*insn->header.access_mode = BRW_ALIGN_16;*/
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, brw_null_reg());
+
+ brw_set_dp_read_message(insn,
+ bind_table_index,
+ 1, /* msg_control (1 means 1 Oword, upper half) */
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+ 0, /* source cache = data cache */
+ 1, /* msg_length */
+ 1, /* response_length (1 Oword) */
+ 0); /* eot */
+ }
+ }
}
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 19ead73d8c..98fbdf5064 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -709,7 +709,7 @@ get_constant(struct brw_vs_compile *c,
assert(argIndex < 3);
- if (c->current_const[argIndex].index != src->Index) {
+ if (c->current_const[argIndex].index != src->Index || src->RelAddr) {
c->current_const[argIndex].index = src->Index;
@@ -722,15 +722,18 @@ get_constant(struct brw_vs_compile *c,
brw_dp_READ_4_vs(p,
c->current_const[argIndex].reg, /* writeback dest */
src->RelAddr, /* relative indexing? */
+ c->regs[PROGRAM_ADDRESS][0], /* address register */
16 * src->Index, /* byte offset */
SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
);
}
- /* replicate lower four floats into upper four floats (to get XYZWXYZW) */
const_reg = c->current_const[argIndex].reg;
- const_reg = stride(const_reg, 0, 4, 0);
- const_reg.subnr = 0;
+ if (!src->RelAddr) {
+ /* replicate lower four floats into upper half (to get XYZWXYZW) */
+ const_reg = stride(const_reg, 0, 4, 0);
+ const_reg.subnr = 0;
+ }
return const_reg;
}
@@ -772,6 +775,42 @@ static struct brw_reg get_reg( struct brw_vs_compile *c,
/**
+ * Indirect addressing: get reg[[arg] + offset].
+ */
+static struct brw_reg deref( struct brw_vs_compile *c,
+ struct brw_reg arg,
+ GLint offset)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = vec4(get_tmp(c));
+ struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
+ struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW);
+ GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
+ struct brw_reg indirect = brw_vec4_indirect(0,0);
+
+ {
+ brw_push_insn_state(p);
+ brw_set_access_mode(p, BRW_ALIGN_1);
+
+ /* This is pretty clunky - load the address register twice and
+ * fetch each 4-dword value in turn. There must be a way to do
+ * this in a single pass, but I couldn't get it to work.
+ */
+ brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset));
+ brw_MOV(p, tmp, indirect);
+
+ brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset));
+ brw_MOV(p, suboffset(tmp, 4), indirect);
+
+ brw_pop_insn_state(p);
+ }
+
+ /* NOTE: tmp not released */
+ return vec8(tmp);
+}
+
+
+/**
* Get brw reg corresponding to the instruction's [argIndex] src reg.
* TODO: relative addressing!
*/
@@ -782,19 +821,29 @@ get_src_reg( struct brw_vs_compile *c,
{
const GLuint file = inst->SrcReg[argIndex].File;
const GLint index = inst->SrcReg[argIndex].Index;
+ const GLboolean relAddr = inst->SrcReg[argIndex].RelAddr;
switch (file) {
case PROGRAM_TEMPORARY:
case PROGRAM_INPUT:
case PROGRAM_OUTPUT:
- assert(c->regs[file][index].nr != 0);
- return c->regs[file][index];
+ if (relAddr) {
+ return deref(c, c->regs[file][0], index);
+ }
+ else {
+ assert(c->regs[file][index].nr != 0);
+ return c->regs[file][index];
+ }
+
case PROGRAM_STATE_VAR:
case PROGRAM_CONSTANT:
case PROGRAM_UNIFORM:
if (c->use_const_buffer) {
return get_constant(c, inst, argIndex);
}
+ else if (relAddr) {
+ return deref(c, c->regs[PROGRAM_STATE_VAR][0], index);
+ }
else {
assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
return c->regs[PROGRAM_STATE_VAR][index];
@@ -817,42 +866,6 @@ get_src_reg( struct brw_vs_compile *c,
}
-/**
- * Indirect addressing: get reg[[arg] + offset].
- */
-static struct brw_reg deref( struct brw_vs_compile *c,
- struct brw_reg arg,
- GLint offset)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg tmp = vec4(get_tmp(c));
- struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
- struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW);
- GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
- struct brw_reg indirect = brw_vec4_indirect(0,0);
-
- {
- brw_push_insn_state(p);
- brw_set_access_mode(p, BRW_ALIGN_1);
-
- /* This is pretty clunky - load the address register twice and
- * fetch each 4-dword value in turn. There must be a way to do
- * this in a single pass, but I couldn't get it to work.
- */
- brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset));
- brw_MOV(p, tmp, indirect);
-
- brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset));
- brw_MOV(p, suboffset(tmp, 4), indirect);
-
- brw_pop_insn_state(p);
- }
-
- /* NOTE: tmp not released */
- return vec8(tmp);
-}
-
-
static void emit_arl( struct brw_vs_compile *c,
struct brw_reg dst,
struct brw_reg arg0 )
@@ -864,8 +877,8 @@ static void emit_arl( struct brw_vs_compile *c,
if (need_tmp)
tmp = get_tmp(c);
- brw_RNDD(p, tmp, arg0);
- brw_MUL(p, dst, tmp, brw_imm_d(16));
+ brw_RNDD(p, tmp, arg0); /* tmp = round(arg0) */
+ brw_MUL(p, dst, tmp, brw_imm_d(16)); /* dst = tmp * 16 */
if (need_tmp)
release_tmp(c, tmp);
@@ -888,13 +901,7 @@ static struct brw_reg get_arg( struct brw_vs_compile *c,
if (src->File == PROGRAM_UNDEFINED)
return brw_null_reg();
- if (src->RelAddr) {
- /* XXX fix */
- reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index);
- }
- else {
- reg = get_src_reg(c, inst, argIndex);
- }
+ reg = get_src_reg(c, inst, argIndex);
/* Convert 3-bit swizzle to 2-bit.
*/
@@ -989,10 +996,7 @@ static void emit_swz( struct brw_vs_compile *c,
if (src_mask) {
struct brw_reg arg0;
- if (src.RelAddr)
- arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index);
- else
- arg0 = get_src_reg(c, inst, argIndex);
+ arg0 = get_src_reg(c, inst, argIndex);
arg0 = brw_swizzle(arg0,
src_swz[0], src_swz[1],