From 2f914053bc8bba3e6d20334ec44feacc803f5d84 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 20 Aug 2010 18:15:14 -0700 Subject: i965: Set up inputs to the fragment shader according to FP InputsRead. Sending down data that doesn't get read doesn't make any sense, and would make handling things like gl_FrontFacing and gl_PointCoord harder. --- src/mesa/drivers/dri/i965/brw_wm_glsl.c | 50 ++++++++++++++++++++----------- src/mesa/drivers/dri/i965/gen6_sf_state.c | 29 ++++++++++-------- src/mesa/drivers/dri/i965/gen6_wm_state.c | 3 +- 3 files changed, 51 insertions(+), 31 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index a0f299195c..f60245377e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -347,26 +347,42 @@ static void prealloc_reg(struct brw_wm_compile *c) } } - /* fragment shader inputs */ - for (i = 0; i < VERT_RESULT_MAX; i++) { - int fp_input; - - if (i >= VERT_RESULT_VAR0) - fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0; - else if (i <= VERT_RESULT_TEX7) - fp_input = i; - else - fp_input = -1; - - if (fp_input >= 0 && inputs & (1 << fp_input)) { - urb_read_length = reg_index; + /* fragment shader inputs: One 2-reg pair of interpolation + * coefficients for each vec4 to be set up. + */ + if (intel->gen >= 6) { + for (i = 0; i < FRAG_ATTRIB_MAX; i++) { + if (!(c->fp->program.Base.InputsRead & BITFIELD64_BIT(i))) + continue; + reg = brw_vec8_grf(reg_index, 0); - for (j = 0; j < 4; j++) - set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg); - } - if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) { + for (j = 0; j < 4; j++) { + set_reg(c, PROGRAM_PAYLOAD, i, j, reg); + } reg_index += 2; } + urb_read_length = reg_index; + } else { + for (i = 0; i < VERT_RESULT_MAX; i++) { + int fp_input; + + if (i >= VERT_RESULT_VAR0) + fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0; + else if (i <= VERT_RESULT_TEX7) + fp_input = i; + else + fp_input = -1; + + if (fp_input >= 0 && inputs & (1 << fp_input)) { + urb_read_length = reg_index; + reg = brw_vec8_grf(reg_index, 0); + for (j = 0; j < 4; j++) + set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg); + } + if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) { + reg_index += 2; + } + } } c->prog_data.first_curbe_grf = c->key.nr_payload_regs; diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 6820ca3abf..b5984255af 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -33,20 +33,29 @@ #include "intel_batchbuffer.h" static uint32_t -get_attr_override(struct brw_context *brw, int attr) +get_attr_override(struct brw_context *brw, int fs_attr) { - uint32_t attr_override; - int attr_index = 0, i; + int attr_index = 0, i, vs_attr; + + if (fs_attr <= FRAG_ATTRIB_TEX7) + vs_attr = fs_attr; + else if (fs_attr == FRAG_ATTRIB_FACE) + vs_attr = 0; /* XXX */ + else if (fs_attr == FRAG_ATTRIB_PNTC) + vs_attr = 0; /* XXX */ + else { + assert(fs_attr >= FRAG_ATTRIB_VAR0); + vs_attr = fs_attr - FRAG_ATTRIB_VAR0 + VERT_RESULT_VAR0; + } /* Find the source index (0 = first attribute after the 4D position) * for this output attribute. attr is currently a VERT_RESULT_* but should * be FRAG_ATTRIB_*. */ - for (i = 0; i < attr; i++) { + for (i = 0; i < vs_attr; i++) { if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(i)) attr_index++; } - attr_override = attr_index; return attr_index; } @@ -58,8 +67,7 @@ upload_sf_state(struct brw_context *brw) GLcontext *ctx = &intel->ctx; /* CACHE_NEW_VS_PROG */ uint32_t num_inputs = brw_count_bits(brw->vs.prog_data->outputs_written); - /* This should probably be FS inputs read */ - uint32_t num_outputs = brw_count_bits(brw->vs.prog_data->outputs_written); + uint32_t num_outputs = brw_count_bits(brw->fragment_program->Base.InputsRead); uint32_t dw1, dw2, dw3, dw4; int i; /* _NEW_BUFFER */ @@ -144,11 +152,8 @@ upload_sf_state(struct brw_context *brw) for (i = 0; i < 8; i++) { uint32_t attr_overrides = 0; - /* These should be generating FS inputs read instead of VS - * outputs written - */ for (; attr < 64; attr++) { - if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(attr)) { + if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { attr_overrides |= get_attr_override(brw, attr); attr++; break; @@ -156,7 +161,7 @@ upload_sf_state(struct brw_context *brw) } for (; attr < 64; attr++) { - if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(attr)) { + if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { attr_overrides |= get_attr_override(brw, attr) << 16; attr++; break; diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index afce1da666..d88c86a1b2 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -153,8 +153,7 @@ upload_wm_state(struct brw_context *brw) dw6 |= GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC; - /* This should probably be FS inputs read */ - dw6 |= brw_count_bits(brw->vs.prog_data->outputs_written) << + dw6 |= brw_count_bits(brw->fragment_program->Base.InputsRead) << GEN6_WM_NUM_SF_OUTPUTS_SHIFT; BEGIN_BATCH(9); -- cgit v1.2.3