From ead58101f91e82279b25676dfe822120be611995 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 2 Jun 2010 18:43:35 +0100 Subject: llvmpipe: Per quad interpolation. First interpolate the 4 quads upper left corners, then sub-interpolate each quad pixel. Do the perspective divide once per quad. Saves some muls and reciprocates. But doesn't seem to make a noticeable improvement. It make the code simpler and more compact, so commiting anyway. --- src/gallium/drivers/llvmpipe/lp_bld_interp.c | 317 +++++++++++---------------- src/gallium/drivers/llvmpipe/lp_bld_interp.h | 9 +- 2 files changed, 136 insertions(+), 190 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index f0a0a0b6cf..e73733e51c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -101,117 +101,141 @@ coeffs_init(struct lp_build_interp_soa_context *bld, { struct lp_build_context *coeff_bld = &bld->coeff_bld; LLVMBuilderRef builder = coeff_bld->builder; + LLVMValueRef zero = LLVMConstNull(coeff_bld->elem_type); + LLVMValueRef one = LLVMConstReal(coeff_bld->elem_type, 1.0); + LLVMValueRef i0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0); + LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0); + LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0); + LLVMValueRef oow = NULL; unsigned attrib; unsigned chan; - for(attrib = 0; attrib < bld->num_attribs; ++attrib) { + /* TODO: Use more vector operations */ + + for (attrib = 0; attrib < bld->num_attribs; ++attrib) { const unsigned mask = bld->mask[attrib]; const unsigned interp = bld->interp[attrib]; - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(mask & (1 << chan)) { + for (chan = 0; chan < NUM_CHANNELS; ++chan) { + if (mask & (1 << chan)) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0); - LLVMValueRef a0 = coeff_bld->undef; - LLVMValueRef dadx = coeff_bld->undef; - LLVMValueRef dady = coeff_bld->undef; - - switch( interp ) { + LLVMValueRef a0 = zero; + LLVMValueRef dadx = zero; + LLVMValueRef dady = zero; + LLVMValueRef dadxy = zero; + LLVMValueRef dadq; + LLVMValueRef dadq2; + LLVMValueRef a; + + switch (interp) { case LP_INTERP_PERSPECTIVE: /* fall-through */ case LP_INTERP_LINEAR: - dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), ""); - dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), ""); - dadx = lp_build_broadcast_scalar(coeff_bld, dadx); - dady = lp_build_broadcast_scalar(coeff_bld, dady); - attrib_name(dadx, attrib, chan, ".dadx"); - attrib_name(dady, attrib, chan, ".dady"); + if (attrib == 0 && chan == 0) { + dadxy = dadx = one; + } + else if (attrib == 0 && chan == 1) { + dadxy = dady = one; + } + else { + dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), ""); + dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), ""); + dadxy = LLVMBuildAdd(builder, dadx, dady, ""); + attrib_name(dadx, attrib, chan, ".dadx"); + attrib_name(dady, attrib, chan, ".dady"); + attrib_name(dadxy, attrib, chan, ".dadxy"); + } /* fall-through */ case LP_INTERP_CONSTANT: case LP_INTERP_FACING: a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), ""); - a0 = lp_build_broadcast_scalar(coeff_bld, a0); attrib_name(a0, attrib, chan, ".a0"); break; case LP_INTERP_POSITION: /* Nothing to do as the position coeffs are already setup in slot 0 */ - break; + continue; default: assert(0); break; } - bld->a0 [attrib][chan] = a0; - bld->dadx[attrib][chan] = dadx; - bld->dady[attrib][chan] = dady; - } - } - } -} + /* + * dadq = {0, dadx, dady, dadx + dady} + */ + dadq = coeff_bld->undef; + dadq = LLVMBuildInsertElement(builder, dadq, zero, i0, ""); + dadq = LLVMBuildInsertElement(builder, dadq, dadx, i1, ""); + dadq = LLVMBuildInsertElement(builder, dadq, dady, i2, ""); + dadq = LLVMBuildInsertElement(builder, dadq, dadxy, i3, ""); -/** - * Emit LLVM code to compute the fragment shader input attribute values. - * For example, for a color input, we'll compute red, green, blue and alpha - * values for the four pixels in a quad. - * Recall that we're operating on 4-element vectors so each arithmetic - * operation is operating on the four pixels in a quad. - */ -static void -attribs_init(struct lp_build_interp_soa_context *bld) -{ - struct lp_build_context *coeff_bld = &bld->coeff_bld; - LLVMValueRef x = bld->pos[0]; - LLVMValueRef y = bld->pos[1]; - LLVMValueRef oow = NULL; - unsigned attrib; - unsigned chan; + /* + * dadq2 = 2 * dq + */ - for(attrib = 0; attrib < bld->num_attribs; ++attrib) { - const unsigned mask = bld->mask[attrib]; - const unsigned interp = bld->interp[attrib]; - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(mask & (1 << chan)) { - if (interp == LP_INTERP_POSITION) { - assert(attrib > 0); - bld->attribs[attrib][chan] = bld->attribs[0][chan]; - } - else { - LLVMValueRef a0 = bld->a0 [attrib][chan]; - LLVMValueRef dadx = bld->dadx[attrib][chan]; - LLVMValueRef dady = bld->dady[attrib][chan]; - LLVMValueRef res; + dadq2 = LLVMBuildAdd(builder, dadq, dadq, ""); - res = a0; + /* + * a = a0 + x * dadx + y * dady + */ + if (attrib == 0 && chan == 0) { + a = bld->x; + } + else if (attrib == 0 && chan == 1) { + a = bld->y; + } + else { + a = a0; if (interp != LP_INTERP_CONSTANT && interp != LP_INTERP_FACING) { - /* res = res + x * dadx */ - res = lp_build_add(coeff_bld, res, lp_build_mul(coeff_bld, x, dadx)); - /* res = res + y * dady */ - res = lp_build_add(coeff_bld, res, lp_build_mul(coeff_bld, y, dady)); + a = LLVMBuildAdd(builder, a, + LLVMBuildMul(builder, bld->x, dadx, ""), + ""); + a = LLVMBuildAdd(builder, a, + LLVMBuildMul(builder, bld->y, dady, ""), + ""); } + } - /* Keep the value of the attribute before perspective divide - * for faster updates. - */ - bld->attribs_pre[attrib][chan] = res; - - if (interp == LP_INTERP_PERSPECTIVE) { - LLVMValueRef w = bld->pos[3]; - assert(attrib != 0); - assert(bld->mask[0] & TGSI_WRITEMASK_W); - if(!oow) - oow = lp_build_rcp(coeff_bld, w); - res = lp_build_mul(coeff_bld, res, oow); - } + /* + * a = {a, a, a, a} + */ + + a = lp_build_broadcast(builder, coeff_bld->vec_type, a); + + /* + * Compute the attrib values on the upper-left corner of each quad. + */ - attrib_name(res, attrib, chan, ""); + a = LLVMBuildAdd(builder, a, dadq2, ""); - bld->attribs[attrib][chan] = res; + /* + * a *= 1 / w + * dadq *= 1 / w + */ + + if (interp == LP_INTERP_PERSPECTIVE) { + LLVMValueRef w = bld->a[0][3]; + assert(attrib != 0); + assert(bld->mask[0] & TGSI_WRITEMASK_W); + if (!oow) { + oow = lp_build_rcp(coeff_bld, w); + lp_build_name(oow, "oow"); + } + a = lp_build_mul(coeff_bld, a, oow); + dadq = lp_build_mul(coeff_bld, dadq, oow); } + + attrib_name(a, attrib, chan, ".a"); + attrib_name(dadq, attrib, chan, ".dadq"); + + bld->a [attrib][chan] = a; + bld->dadq[attrib][chan] = dadq; } } } @@ -226,7 +250,7 @@ static void attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) { struct lp_build_context *coeff_bld = &bld->coeff_bld; - LLVMValueRef oow = NULL; + LLVMValueRef shuffle = lp_build_const_int_vec(coeff_bld->type, quad_index); unsigned attrib; unsigned chan; @@ -235,50 +259,36 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) for(attrib = 0; attrib < bld->num_attribs; ++attrib) { const unsigned mask = bld->mask[attrib]; const unsigned interp = bld->interp[attrib]; + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + if(mask & (1 << chan)) { + LLVMValueRef a = coeff_bld->undef; + if (interp == LP_INTERP_CONSTANT && + interp == LP_INTERP_FACING) { + a = bld->a[attrib][chan]; + } + else if (interp == LP_INTERP_POSITION) { + assert(attrib > 0); + a = bld->attribs[0][chan]; + } + else { + a = bld->a[attrib][chan]; - if (interp != LP_INTERP_CONSTANT && - interp != LP_INTERP_FACING) { - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(mask & (1 << chan)) { - if (interp == LP_INTERP_POSITION) { - assert(attrib > 0); - bld->attribs[attrib][chan] = bld->attribs[0][chan]; - } - else { - LLVMValueRef dadx = bld->dadx[attrib][chan]; - LLVMValueRef dady = bld->dady[attrib][chan]; - LLVMValueRef res; - - res = bld->attribs_pre[attrib][chan]; - - if (quad_index == 1 || quad_index == 3) { - /* top-right or bottom-right quad */ - /* build res = res + dadx + dadx */ - res = lp_build_add(coeff_bld, res, dadx); - res = lp_build_add(coeff_bld, res, dadx); - } - - if (quad_index == 2 || quad_index == 3) { - /* bottom-left or bottom-right quad */ - /* build res = res + dady + dady */ - res = lp_build_add(coeff_bld, res, dady); - res = lp_build_add(coeff_bld, res, dady); - } - - if (interp == LP_INTERP_PERSPECTIVE) { - LLVMValueRef w = bld->pos[3]; - assert(attrib != 0); - assert(bld->mask[0] & TGSI_WRITEMASK_W); - if(!oow) - oow = lp_build_rcp(coeff_bld, w); - res = lp_build_mul(coeff_bld, res, oow); - } - - attrib_name(res, attrib, chan, ""); - - bld->attribs[attrib][chan] = res; - } + /* + * Broadcast the attribute value for this quad into all elements + */ + + a = LLVMBuildShuffleVector(coeff_bld->builder, + a, coeff_bld->undef, shuffle, ""); + + /* + * Add the derivatives + */ + + a = lp_build_add(coeff_bld, a, bld->dadq[attrib][chan]); + + attrib_name(a, attrib, chan, ""); } + bld->attribs[attrib][chan] = a; } } } @@ -296,70 +306,9 @@ pos_init(struct lp_build_interp_soa_context *bld, LLVMValueRef y0) { struct lp_build_context *coeff_bld = &bld->coeff_bld; - LLVMValueRef x_offsets[QUAD_SIZE]; - LLVMValueRef y_offsets[QUAD_SIZE]; - unsigned i; - - /* - * Derive from the quad's upper left scalar coordinates the coordinates for - * all other quad pixels - */ - - x0 = lp_build_broadcast(coeff_bld->builder, coeff_bld->int_vec_type, x0); - y0 = lp_build_broadcast(coeff_bld->builder, coeff_bld->int_vec_type, y0); - - for(i = 0; i < QUAD_SIZE; ++i) { - x_offsets[i] = LLVMConstInt(coeff_bld->int_elem_type, quad_offset_x[i], 0); - y_offsets[i] = LLVMConstInt(coeff_bld->int_elem_type, quad_offset_y[i], 0); - } - x0 = LLVMBuildAdd(coeff_bld->builder, x0, LLVMConstVector(x_offsets, QUAD_SIZE), ""); - y0 = LLVMBuildAdd(coeff_bld->builder, y0, LLVMConstVector(y_offsets, QUAD_SIZE), ""); - - x0 = LLVMBuildSIToFP(coeff_bld->builder, x0, coeff_bld->vec_type, ""); - y0 = LLVMBuildSIToFP(coeff_bld->builder, y0, coeff_bld->vec_type, ""); - - lp_build_name(x0, "pos.x"); - lp_build_name(y0, "pos.y"); - - bld->attribs[0][0] = x0; - bld->attribs[0][1] = y0; -} - - -/** - * Update quad position values when moving to the next quad. - */ -static void -pos_update(struct lp_build_interp_soa_context *bld, int quad_index) -{ - struct lp_build_context *coeff_bld = &bld->coeff_bld; - LLVMValueRef x = bld->attribs[0][0]; - LLVMValueRef y = bld->attribs[0][1]; - const int xstep = 2, ystep = 2; - - if (quad_index == 1 || quad_index == 3) { - /* top-right or bottom-right quad in block */ - /* build x += xstep */ - x = lp_build_add(coeff_bld, x, - lp_build_const_vec(coeff_bld->type, xstep)); - } - - if (quad_index == 2) { - /* bottom-left quad in block */ - /* build y += ystep */ - y = lp_build_add(coeff_bld, y, - lp_build_const_vec(coeff_bld->type, ystep)); - /* build x -= xstep */ - x = lp_build_sub(coeff_bld, x, - lp_build_const_vec(coeff_bld->type, xstep)); - } - - lp_build_name(x, "pos.x"); - lp_build_name(y, "pos.y"); - - bld->attribs[0][0] = x; - bld->attribs[0][1] = y; + bld->x = LLVMBuildSIToFP(coeff_bld->builder, x0, coeff_bld->elem_type, ""); + bld->y = LLVMBuildSIToFP(coeff_bld->builder, y0, coeff_bld->elem_type, ""); } @@ -401,7 +350,7 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, /* Position */ bld->num_attribs = 1; - bld->mask[0] = TGSI_WRITEMASK_ZW; + bld->mask[0] = TGSI_WRITEMASK_XYZW; bld->interp[0] = LP_INTERP_LINEAR; /* Inputs */ @@ -418,11 +367,11 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, } } - coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr); - pos_init(bld, x0, y0); - attribs_init(bld); + coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr); + + attribs_update(bld, 0); } @@ -435,7 +384,5 @@ lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, { assert(quad_index < 4); - pos_update(bld, quad_index); - attribs_update(bld, quad_index); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h index 45a430701f..2905513301 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -58,12 +58,11 @@ struct lp_build_interp_soa_context unsigned mask[1 + PIPE_MAX_SHADER_INPUTS]; /**< TGSI_WRITE_MASK_x */ enum lp_interp interp[1 + PIPE_MAX_SHADER_INPUTS]; - LLVMValueRef a0 [1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - LLVMValueRef dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - LLVMValueRef dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + LLVMValueRef x; + LLVMValueRef y; - /* Attribute values before perspective divide */ - LLVMValueRef attribs_pre[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + LLVMValueRef a [1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + LLVMValueRef dadq[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; LLVMValueRef attribs[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; -- cgit v1.2.3