From ee32e9b4753eca62e360f96ce61ef7ff683e6bb7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Apr 2009 16:49:18 -0600 Subject: i965: implement relative addressing for VS constant buffer reads A scatter-read should be possible, but we're just using two READs for the time being. --- src/mesa/drivers/dri/i965/brw_vs_emit.c | 114 +++++++++++++++++--------------- 1 file changed, 59 insertions(+), 55 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_vs_emit.c') diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 19ead73d8c..98fbdf5064 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -709,7 +709,7 @@ get_constant(struct brw_vs_compile *c, assert(argIndex < 3); - if (c->current_const[argIndex].index != src->Index) { + if (c->current_const[argIndex].index != src->Index || src->RelAddr) { c->current_const[argIndex].index = src->Index; @@ -722,15 +722,18 @@ get_constant(struct brw_vs_compile *c, brw_dp_READ_4_vs(p, c->current_const[argIndex].reg, /* writeback dest */ src->RelAddr, /* relative indexing? */ + c->regs[PROGRAM_ADDRESS][0], /* address register */ 16 * src->Index, /* byte offset */ SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ ); } - /* replicate lower four floats into upper four floats (to get XYZWXYZW) */ const_reg = c->current_const[argIndex].reg; - const_reg = stride(const_reg, 0, 4, 0); - const_reg.subnr = 0; + if (!src->RelAddr) { + /* replicate lower four floats into upper half (to get XYZWXYZW) */ + const_reg = stride(const_reg, 0, 4, 0); + const_reg.subnr = 0; + } return const_reg; } @@ -771,6 +774,42 @@ static struct brw_reg get_reg( struct brw_vs_compile *c, } +/** + * Indirect addressing: get reg[[arg] + offset]. + */ +static struct brw_reg deref( struct brw_vs_compile *c, + struct brw_reg arg, + GLint offset) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = vec4(get_tmp(c)); + struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0]; + struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW); + GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16; + struct brw_reg indirect = brw_vec4_indirect(0,0); + + { + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); + + /* This is pretty clunky - load the address register twice and + * fetch each 4-dword value in turn. There must be a way to do + * this in a single pass, but I couldn't get it to work. + */ + brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset)); + brw_MOV(p, tmp, indirect); + + brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset)); + brw_MOV(p, suboffset(tmp, 4), indirect); + + brw_pop_insn_state(p); + } + + /* NOTE: tmp not released */ + return vec8(tmp); +} + + /** * Get brw reg corresponding to the instruction's [argIndex] src reg. * TODO: relative addressing! @@ -782,19 +821,29 @@ get_src_reg( struct brw_vs_compile *c, { const GLuint file = inst->SrcReg[argIndex].File; const GLint index = inst->SrcReg[argIndex].Index; + const GLboolean relAddr = inst->SrcReg[argIndex].RelAddr; switch (file) { case PROGRAM_TEMPORARY: case PROGRAM_INPUT: case PROGRAM_OUTPUT: - assert(c->regs[file][index].nr != 0); - return c->regs[file][index]; + if (relAddr) { + return deref(c, c->regs[file][0], index); + } + else { + assert(c->regs[file][index].nr != 0); + return c->regs[file][index]; + } + case PROGRAM_STATE_VAR: case PROGRAM_CONSTANT: case PROGRAM_UNIFORM: if (c->use_const_buffer) { return get_constant(c, inst, argIndex); } + else if (relAddr) { + return deref(c, c->regs[PROGRAM_STATE_VAR][0], index); + } else { assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0); return c->regs[PROGRAM_STATE_VAR][index]; @@ -817,42 +866,6 @@ get_src_reg( struct brw_vs_compile *c, } -/** - * Indirect addressing: get reg[[arg] + offset]. - */ -static struct brw_reg deref( struct brw_vs_compile *c, - struct brw_reg arg, - GLint offset) -{ - struct brw_compile *p = &c->func; - struct brw_reg tmp = vec4(get_tmp(c)); - struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0]; - struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW); - GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16; - struct brw_reg indirect = brw_vec4_indirect(0,0); - - { - brw_push_insn_state(p); - brw_set_access_mode(p, BRW_ALIGN_1); - - /* This is pretty clunky - load the address register twice and - * fetch each 4-dword value in turn. There must be a way to do - * this in a single pass, but I couldn't get it to work. - */ - brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset)); - brw_MOV(p, tmp, indirect); - - brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset)); - brw_MOV(p, suboffset(tmp, 4), indirect); - - brw_pop_insn_state(p); - } - - /* NOTE: tmp not released */ - return vec8(tmp); -} - - static void emit_arl( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0 ) @@ -864,8 +877,8 @@ static void emit_arl( struct brw_vs_compile *c, if (need_tmp) tmp = get_tmp(c); - brw_RNDD(p, tmp, arg0); - brw_MUL(p, dst, tmp, brw_imm_d(16)); + brw_RNDD(p, tmp, arg0); /* tmp = round(arg0) */ + brw_MUL(p, dst, tmp, brw_imm_d(16)); /* dst = tmp * 16 */ if (need_tmp) release_tmp(c, tmp); @@ -888,13 +901,7 @@ static struct brw_reg get_arg( struct brw_vs_compile *c, if (src->File == PROGRAM_UNDEFINED) return brw_null_reg(); - if (src->RelAddr) { - /* XXX fix */ - reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index); - } - else { - reg = get_src_reg(c, inst, argIndex); - } + reg = get_src_reg(c, inst, argIndex); /* Convert 3-bit swizzle to 2-bit. */ @@ -989,10 +996,7 @@ static void emit_swz( struct brw_vs_compile *c, if (src_mask) { struct brw_reg arg0; - if (src.RelAddr) - arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index); - else - arg0 = get_src_reg(c, inst, argIndex); + arg0 = get_src_reg(c, inst, argIndex); arg0 = brw_swizzle(arg0, src_swz[0], src_swz[1], -- cgit v1.2.3