From fe2d4a5ea02df38c9940a726aa04bcf550fab1da Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 22 Aug 2010 01:33:57 -0700 Subject: i965: Add support for POW in gen6 FS. Fixes glsl-algebraic-pow-2 in brw_wm_glsl.c mode. --- src/mesa/drivers/dri/i965/brw_eu.h | 6 ++ src/mesa/drivers/dri/i965/brw_eu_emit.c | 23 +++++++ src/mesa/drivers/dri/i965/brw_wm_emit.c | 116 +++++++++++++++++++++++++------- 3 files changed, 120 insertions(+), 25 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index c63db16460..c0deb238c2 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -885,6 +885,12 @@ void brw_math( struct brw_compile *p, GLuint data_type, GLuint precision ); +void brw_math2(struct brw_compile *p, + struct brw_reg dest, + GLuint function, + struct brw_reg src0, + struct brw_reg src1); + void brw_dp_READ_16( struct brw_compile *p, struct brw_reg dest, GLuint scratch_offset ); diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index ddd3a94eb0..9c320c613f 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1131,6 +1131,29 @@ void brw_math( struct brw_compile *p, } } +/** Extended math function, float[8]. + */ +void brw_math2(struct brw_compile *p, + struct brw_reg dest, + GLuint function, + struct brw_reg src0, + struct brw_reg src1) +{ + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); + + assert(intel->gen >= 6); + + /* Math is the same ISA format as other opcodes, except that CondModifier + * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. + */ + insn->header.destreg__conditionalmod = function; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, src1); +} + /** * Extended math function, float[16]. * Use 2 send instructions. diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 86ec96368c..9be3bfbbfe 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -918,10 +918,8 @@ void emit_math2(struct brw_wm_compile *c, const struct brw_reg *arg1) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; - GLuint saturate = ((mask & SATURATE) ? - BRW_MATH_SATURATE_SATURATE : - BRW_MATH_SATURATE_NONE); if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ @@ -930,35 +928,103 @@ void emit_math2(struct brw_wm_compile *c, brw_push_insn_state(p); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, brw_message_reg(3), arg1[0]); - if (c->dispatch_width == 16) { - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_MOV(p, brw_message_reg(5), sechalf(arg1[0])); - } + /* math can only operate on up to a vec8 at a time, so in + * dispatch_width==16 we have to do the second half manually. + */ + if (intel->gen >= 6) { + struct brw_reg src0 = arg0[0]; + struct brw_reg src1 = arg1[0]; + struct brw_reg temp_dst = dst[dst_chan]; + + if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) { + if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { + /* Both scalar arguments. Do scalar calc. */ + src0.hstride = BRW_HORIZONTAL_STRIDE_1; + src1.hstride = BRW_HORIZONTAL_STRIDE_1; + temp_dst.hstride = BRW_HORIZONTAL_STRIDE_1; + temp_dst.width = BRW_WIDTH_1; + + if (arg0[0].subnr != 0) { + brw_MOV(p, temp_dst, src0); + src0 = temp_dst; + + /* Ouch. We've used the temp as a dst, and we still + * need a temp to store arg1 in, because src and dst + * offsets have to be equal. Leaving this up to + * glsl2-965 to handle correctly. + */ + assert(arg1[0].subnr == 0); + } else if (arg1[0].subnr != 0) { + brw_MOV(p, temp_dst, src1); + src1 = temp_dst; + } + } else { + brw_MOV(p, temp_dst, src0); + src0 = temp_dst; + } + } else if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { + brw_MOV(p, temp_dst, src1); + src1 = temp_dst; + } - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_math(p, - dst[dst_chan], - function, - saturate, - 2, - arg0[0], - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_math2(p, + temp_dst, + function, + src0, + src1); + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_math2(p, + sechalf(temp_dst), + function, + sechalf(src0), + sechalf(src1)); + } - /* Send two messages to perform all 16 operations: - */ - if (c->dispatch_width == 16) { - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + /* Splat a scalar result into all the channels. */ + if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 && + arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { + temp_dst.hstride = BRW_HORIZONTAL_STRIDE_0; + temp_dst.vstride = BRW_VERTICAL_STRIDE_0; + brw_MOV(p, dst[dst_chan], temp_dst); + } + } else { + GLuint saturate = ((mask & SATURATE) ? + BRW_MATH_SATURATE_SATURATE : + BRW_MATH_SATURATE_NONE); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, brw_message_reg(3), arg1[0]); + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, brw_message_reg(5), sechalf(arg1[0])); + } + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math(p, - offset(dst[dst_chan],1), + dst[dst_chan], function, saturate, - 4, - sechalf(arg0[0]), + 2, + arg0[0], BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); + + /* Send two messages to perform all 16 operations: + */ + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_math(p, + offset(dst[dst_chan],1), + function, + saturate, + 4, + sechalf(arg0[0]), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + } } brw_pop_insn_state(p); } -- cgit v1.2.3