From 56ff30a9f97a1a7094432333906544d6138d6bf2 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 10 Mar 2010 14:46:27 -0800 Subject: i965: Use the PLN instruction when possible in interpolation. Saves an instruction in PINTERP, LINTERP, and PIXEL_W from brw_wm_glsl.c For non-GLSL it isn't used yet because the deltas have to be laid out differently. --- src/mesa/drivers/dri/i965/brw_context.c | 1 + src/mesa/drivers/dri/i965/brw_context.h | 1 + src/mesa/drivers/dri/i965/brw_defines.h | 1 + src/mesa/drivers/dri/i965/brw_disasm.c | 1 + src/mesa/drivers/dri/i965/brw_eu.h | 1 + src/mesa/drivers/dri/i965/brw_eu_emit.c | 2 +- src/mesa/drivers/dri/i965/brw_wm_emit.c | 44 ++++++++++++++++++++++++++++----- src/mesa/drivers/dri/i965/brw_wm_glsl.c | 38 ++++++++++++++++++++++++++++ 8 files changed, 82 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index a512896f31..241193c357 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -156,6 +156,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, brw->has_surface_tile_offset = GL_TRUE; brw->has_compr4 = GL_TRUE; brw->has_aa_line_parameters = GL_TRUE; + brw->has_pln = GL_TRUE; } else { brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_965; brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index d6fc37e4d8..2855c93ea6 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -446,6 +446,7 @@ struct brw_context GLboolean has_compr4; GLboolean has_negative_rhw_bug; GLboolean has_aa_line_parameters; + GLboolean has_pln; ; struct { struct brw_state_flags dirty; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index bb1b5f5ef0..984e56d00c 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -550,6 +550,7 @@ #define BRW_OPCODE_DP2 87 #define BRW_OPCODE_DPA2 88 #define BRW_OPCODE_LINE 89 +#define BRW_OPCODE_PLN 90 #define BRW_OPCODE_NOP 126 #define BRW_PREDICATE_NONE 0 diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index 4d25152efc..ad61770212 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -50,6 +50,7 @@ struct { [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 39eb88d7c2..8e9117b644 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -795,6 +795,7 @@ ALU2(DPH) ALU2(DP3) ALU2(DP2) ALU2(LINE) +ALU2(PLN) #undef ALU1 #undef ALU2 diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 82f2fdab2f..d2395dec28 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -573,7 +573,7 @@ ALU2(DPH) ALU2(DP3) ALU2(DP2) ALU2(LINE) - +ALU2(PLN) diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index c7d87b9d94..412e09b76a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -34,6 +34,23 @@ #include "brw_context.h" #include "brw_wm.h" +static GLboolean can_do_pln(struct intel_context *intel, + const struct brw_reg *deltas) +{ + struct brw_context *brw = brw_context(&intel->ctx); + + if (!brw->has_pln) + return GL_FALSE; + + if (deltas[1].nr != deltas[0].nr + 1) + return GL_FALSE; + + if (intel->gen < 6 && ((deltas[0].nr & 1) != 0)) + return GL_FALSE; + + return GL_TRUE; +} + /* Not quite sure how correct this is - need to understand horiz * vs. vertical strides a little better. */ @@ -186,6 +203,7 @@ void emit_pixel_w(struct brw_wm_compile *c, const struct brw_reg *deltas) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; /* Don't need this if all you are doing is interpolating color, for * instance. @@ -196,8 +214,12 @@ void emit_pixel_w(struct brw_wm_compile *c, /* Calc 1/w - just linterp wpos[3] optimized by putting the * result straight into a message reg. */ - brw_LINE(p, brw_null_reg(), interp3, deltas[0]); - brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]); + if (can_do_pln(intel, deltas)) { + brw_PLN(p, brw_message_reg(2), interp3, deltas[0]); + } else { + brw_LINE(p, brw_null_reg(), interp3, deltas[0]); + brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]); + } /* Calc w */ if (c->dispatch_width == 16) { @@ -224,6 +246,7 @@ void emit_linterp(struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *deltas) { + struct intel_context *intel = &p->brw->intel; struct brw_reg interp[4]; GLuint nr = arg0[0].nr; GLuint i; @@ -235,8 +258,12 @@ void emit_linterp(struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<brw->intel; struct brw_reg interp[4]; GLuint nr = arg0[0].nr; GLuint i; @@ -260,8 +288,12 @@ void emit_pinterp(struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<func.brw->intel; int i, j; struct brw_reg reg; int urb_read_length = 0; @@ -413,6 +414,43 @@ static void prealloc_reg(struct brw_wm_compile *c) } } + for (i = 0; i < c->nr_fp_insns; i++) { + const struct prog_instruction *inst = &c->prog_instructions[i]; + + switch (inst->Opcode) { + case WM_DELTAXY: + /* Allocate WM_DELTAXY destination on G45/GM45 to an + * even-numbered GRF if possible so that we can use the PLN + * instruction. + */ + if (inst->DstReg.WriteMask == WRITEMASK_XY && + !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][0].inited && + !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][1].inited && + (IS_G4X(intel->intelScreen->deviceID) || intel->gen == 5)) { + int grf; + + for (grf = c->first_free_grf & ~1; + grf < BRW_WM_MAX_GRF; + grf += 2) + { + if (!c->used_grf[grf] && !c->used_grf[grf + 1]) { + c->used_grf[grf] = GL_TRUE; + c->used_grf[grf + 1] = GL_TRUE; + c->first_free_grf = grf + 2; /* a guess */ + + set_reg(c, inst->DstReg.File, inst->DstReg.Index, 0, + brw_vec8_grf(grf, 0)); + set_reg(c, inst->DstReg.File, inst->DstReg.Index, 1, + brw_vec8_grf(grf + 1, 0)); + break; + } + } + } + default: + break; + } + } + /* An instruction may reference up to three constants. * They'll be found in these registers. * XXX alloc these on demand! -- cgit v1.2.3