From 4e7d5d0e74c26cac182cea1be0f6b79bb664ad8c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 2 Jul 2010 16:17:50 -0700 Subject: i965: Add support for the DP2 opcode, which we use for dot(vec2, vec2). The original glsl compiler would generate a.x * b.x + a.y * b.y, which we would do mul+mul+add for instead of this mul+mac. Fixes glsl-fs-dot-vec2. --- src/mesa/drivers/dri/i965/brw_wm.h | 5 +++++ src/mesa/drivers/dri/i965/brw_wm_emit.c | 25 +++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_wm_glsl.c | 3 +++ src/mesa/drivers/dri/i965/brw_wm_pass1.c | 5 +++++ 4 files changed, 38 insertions(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 938557ff36..197b875434 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -343,6 +343,11 @@ void emit_delta_xy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0); +void emit_dp2(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); void emit_dp3(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 11f482bddf..a90a2d3cf2 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -731,6 +731,27 @@ void emit_min(struct brw_compile *p, } +void emit_dp2(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) +{ + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; /* Do not emit dead code */ + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + + brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); + + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_MAC(p, dst[dst_chan], arg0[1], arg1[1]); + brw_set_saturate(p, 0); +} + + void emit_dp3(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, @@ -1584,6 +1605,10 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]); break; + case OPCODE_DP2: + emit_dp2(p, dst, dst_flags, args[0], args[1]); + break; + case OPCODE_DP3: emit_dp3(p, dst, dst_flags, args[0], args[1]); break; diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index e23ce7ad7a..57be08a8d1 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -1903,6 +1903,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_SWZ: emit_alu1(p, brw_MOV, dst, dst_flags, args[0]); break; + case OPCODE_DP2: + emit_dp2(p, dst, dst_flags, args[0], args[1]); + break; case OPCODE_DP3: emit_dp3(p, dst, dst_flags, args[0], args[1]); break; diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c index 8ee1f153b1..962515a99e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c @@ -255,6 +255,11 @@ void brw_wm_pass1( struct brw_wm_compile *c ) read2 = WRITEMASK_W; /* pixel w */ break; + case OPCODE_DP2: + read0 = WRITEMASK_XY; + read1 = WRITEMASK_XY; + break; + case OPCODE_DP3: read0 = WRITEMASK_XYZ; read1 = WRITEMASK_XYZ; -- cgit v1.2.3