From 866e6856d39efe9b1ec739587f420a640ad8618e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 2 Dec 2009 15:13:45 -0700 Subject: llvmpipe: execute shaders on 4x4 blocks instead of 8x2 This matches the convention used by the recursive rasterizer. Also fixed assorted typos, comments, etc. Now tri-z.c, gears.c, etc look basically right but there's still some cracks in triangle rasterization. --- src/gallium/drivers/llvmpipe/lp_bld_interp.c | 135 ++++++++++++++++++--------- src/gallium/drivers/llvmpipe/lp_bld_interp.h | 10 +- src/gallium/drivers/llvmpipe/lp_rast.c | 22 +++-- src/gallium/drivers/llvmpipe/lp_state_fs.c | 12 ++- 4 files changed, 116 insertions(+), 63 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index 338dbca6d1..affeeca6ff 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -45,6 +45,36 @@ #include "lp_bld_interp.h" +/* + * The shader JIT function operates on blocks of quads. + * Each block has 2x2 quads and each quad has 2x2 pixels. + * + * We iterate over the quads in order 0, 1, 2, 3: + * + * ################# + * # | # | # + * #---0---#---1---# + * # | # | # + * ################# + * # | # | # + * #---2---#---3---# + * # | # | # + * ################# + * + * Within each quad, we have four pixels which are represented in SOA + * order: + * + * ######### + * # 0 | 1 # + * #---+---# + * # 2 | 3 # + * ######### + * + * So the green channel (for example) of the four pixels is stored in + * a single vector register: {g0, g1, g2, g3}. + */ + + static void attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix) { @@ -55,6 +85,10 @@ attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix } +/** + * Initialize the bld->a0, dadx, dady fields. This involves fetching + * those values from the arrays which are passed into the JIT function. + */ static void coeffs_init(struct lp_build_interp_soa_context *bld, LLVMValueRef a0_ptr, @@ -91,7 +125,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld, case TGSI_INTERPOLATE_CONSTANT: a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), ""); a0 = lp_build_broadcast_scalar(&bld->base, a0); - attrib_name(a0, attrib, chan, ".dady"); + attrib_name(a0, attrib, chan, ".a0"); break; default: @@ -135,29 +169,12 @@ coeff_multiply(struct lp_build_interp_soa_context *bld, /** - * Multiply the dadx and dady with the xstep and ystep respectively. + * Emit LLVM code to compute the fragment shader input attribute values. + * For example, for a color input, we'll compute red, green, blue and alpha + * values for the four pixels in a quad. + * Recall that we're operating on 4-element vectors so each arithmetic + * operation is operating on the four pixels in a quad. */ -static void -coeffs_update(struct lp_build_interp_soa_context *bld) -{ - unsigned attrib; - unsigned chan; - - for(attrib = 0; attrib < bld->num_attribs; ++attrib) { - unsigned mask = bld->mask[attrib]; - unsigned mode = bld->mode[attrib]; - if (mode != TGSI_INTERPOLATE_CONSTANT) { - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(mask & (1 << chan)) { - bld->dadx[attrib][chan] = coeff_multiply(bld, bld->dadx[attrib][chan], bld->xstep); - bld->dady[attrib][chan] = coeff_multiply(bld, bld->dady[attrib][chan], bld->ystep); - } - } - } - } -} - - static void attribs_init(struct lp_build_interp_soa_context *bld) { @@ -180,7 +197,9 @@ attribs_init(struct lp_build_interp_soa_context *bld) res = a0; if (mode != TGSI_INTERPOLATE_CONSTANT) { + /* res = res + x * dadx */ res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, x, dadx)); + /* res = res + y * dady */ res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, y, dady)); } @@ -204,13 +223,19 @@ attribs_init(struct lp_build_interp_soa_context *bld) } +/** + * Increment the shader input attribute values. + * This is called when we move from one quad to the next. + */ static void -attribs_update(struct lp_build_interp_soa_context *bld) +attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) { LLVMValueRef oow = NULL; unsigned attrib; unsigned chan; + assert(quad_index < 4); + for(attrib = 0; attrib < bld->num_attribs; ++attrib) { unsigned mask = bld->mask[attrib]; unsigned mode = bld->mode[attrib]; @@ -224,13 +249,21 @@ attribs_update(struct lp_build_interp_soa_context *bld) res = bld->attribs_pre[attrib][chan]; - if(bld->xstep) + if (quad_index == 1 || quad_index == 3) { + /* top-right or bottom-right quad */ + /* build res = res + dadx + dadx */ res = lp_build_add(&bld->base, res, dadx); + res = lp_build_add(&bld->base, res, dadx); + } - if(bld->ystep) + if (quad_index == 2 || quad_index == 3) { + /* bottom-left or bottom-right quad */ + /* build res = res + dady + dady */ res = lp_build_add(&bld->base, res, dady); + res = lp_build_add(&bld->base, res, dady); + } - bld->attribs_pre[attrib][chan] = res; + //XXX bld->attribs_pre[attrib][chan] = res; if (mode == TGSI_INTERPOLATE_PERSPECTIVE) { LLVMValueRef w = bld->pos[3]; @@ -268,17 +301,32 @@ pos_init(struct lp_build_interp_soa_context *bld, } +/** + * Update quad position values when moving to the next quad. + */ static void -pos_update(struct lp_build_interp_soa_context *bld) +pos_update(struct lp_build_interp_soa_context *bld, int quad_index) { LLVMValueRef x = bld->attribs[0][0]; LLVMValueRef y = bld->attribs[0][1]; + const int xstep = 2, ystep = 2; - if(bld->xstep) - x = lp_build_add(&bld->base, x, lp_build_const_scalar(bld->base.type, bld->xstep)); + if (quad_index == 1 || quad_index == 3) { + /* top-right or bottom-right quad in block */ + /* build x += xstep */ + x = lp_build_add(&bld->base, x, + lp_build_const_scalar(bld->base.type, xstep)); + } - if(bld->ystep) - y = lp_build_add(&bld->base, y, lp_build_const_scalar(bld->base.type, bld->ystep)); + if (quad_index == 2) { + /* bottom-left quad in block */ + /* build y += ystep */ + y = lp_build_add(&bld->base, y, + lp_build_const_scalar(bld->base.type, ystep)); + /* build x -= xstep */ + x = lp_build_sub(&bld->base, x, + lp_build_const_scalar(bld->base.type, xstep)); + } lp_build_name(x, "pos.x"); lp_build_name(y, "pos.y"); @@ -288,6 +336,9 @@ pos_update(struct lp_build_interp_soa_context *bld) } +/** + * Initialize fragment shader input attribute info. + */ void lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, const struct tgsi_token *tokens, @@ -297,9 +348,7 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr, LLVMValueRef x0, - LLVMValueRef y0, - int xstep, - int ystep) + LLVMValueRef y0) { struct tgsi_parse_context parse; struct tgsi_full_declaration *decl; @@ -357,21 +406,19 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, pos_init(bld, x0, y0); attribs_init(bld); - - bld->xstep = xstep; - bld->ystep = ystep; - - coeffs_update(bld); } /** - * Advance the position and inputs with the xstep and ystep. + * Advance the position and inputs to the given quad within the block. */ void -lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld) +lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, + int quad_index) { - pos_update(bld); + assert(quad_index < 4); + + pos_update(bld, quad_index); - attribs_update(bld); + attribs_update(bld, quad_index); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h index 9c57a10879..e2b3bc1bf0 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -63,9 +63,6 @@ struct lp_build_interp_soa_context LLVMValueRef dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; LLVMValueRef dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - int xstep; - int ystep; - /* Attribute values before perspective divide */ LLVMValueRef attribs_pre[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; @@ -88,12 +85,11 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr, LLVMValueRef x0, - LLVMValueRef y0, - int xstep, - int ystep); + LLVMValueRef y0); void -lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld); +lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, + int quad_index); #endif /* LP_BLD_INTERP_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 09495f6288..f88dd4ae68 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -126,8 +126,6 @@ void lp_rast_end( struct lp_rasterizer *rast ) } - - /** * Begining rasterization of a tile. * \param x window X position of the tile, in pixels @@ -152,7 +150,7 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, { const uint8_t *clear_color = arg.clear_color; - RAST_DEBUG("%s %x,%x,%x,%x\n", __FUNCTION__, + RAST_DEBUG("%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__, clear_color[0], clear_color[1], clear_color[2], @@ -181,7 +179,7 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, { unsigned i, j; - RAST_DEBUG("%s\n", __FUNCTION__); + RAST_DEBUG("%s 0x%x\n", __FUNCTION__, arg.clear_zstencil); for (i = 0; i < TILE_SIZE; i++) for (j = 0; j < TILE_SIZE; j++) @@ -225,6 +223,9 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, } +/** + * Compute shading for a 4x4 block of pixels. + */ void lp_rast_shade_quads( struct lp_rasterizer *rast, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, @@ -237,6 +238,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, void *depth; uint32_t ALIGN16_ATTRIB masks[2][2][2][2]; unsigned ix, iy; + int block_offset; /* Sanity checks */ assert(x % TILE_VECTOR_WIDTH == 0); @@ -275,16 +277,20 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, masks[1][1][1][1] = mask & (1 << (1*8+1*4+1*2+1)) ? ~0 : 0; #endif + assert((x % 2) == 0); + assert((y % 2) == 0); + ix = x % TILE_SIZE; iy = y % TILE_SIZE; + /* offset of the 16x16 pixel block within the tile */ + block_offset = ((iy/4)*(16*16) + (ix/4)*16); + /* color buffer */ - color = &TILE_PIXEL(tile->color, ix, iy, 0); + color = tile->color + 4 * block_offset; /* depth buffer */ - assert((x % 2) == 0); - assert((y % 2) == 0); - depth = tile->depth + (iy/4)*(16*16) + (ix/4)*16; + depth = tile->depth + block_offset; /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ assert(lp_check_alignment(masks, 16)); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 0541d36580..aa9c006633 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -358,6 +358,9 @@ generate_blend(const struct pipe_blend_state *blend, /** * Generate the runtime callable function for the whole fragment pipeline. + * Note that the function which we generate operates on a block of 16 + * pixels at at time. The block contains 2x2 quads. Each quad contains + * 2x2 pixels. */ static struct lp_fragment_shader_variant * generate_fragment(struct llvmpipe_context *lp, @@ -437,8 +440,8 @@ generate_fragment(struct llvmpipe_context *lp, fs_type.sign = TRUE; /* values are signed */ fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ fs_type.width = 32; /* 32-bit float */ - fs_type.length = 4; /* 4 element per vector */ - num_fs = 4; + fs_type.length = 4; /* 4 elements per vector */ + num_fs = 4; /* number of quads per block */ memset(&blend_type, 0, sizeof blend_type); blend_type.floating = FALSE; /* values are integers */ @@ -509,18 +512,19 @@ generate_fragment(struct llvmpipe_context *lp, lp_build_interp_soa_init(&interp, shader->base.tokens, builder, fs_type, a0_ptr, dadx_ptr, dady_ptr, - x0, y0, 2, 0); + x0, y0); /* code generated texture sampling */ sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); + /* loop over quads in the block */ for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); LLVMValueRef out_color[NUM_CHANNELS]; LLVMValueRef depth_ptr_i; if(i != 0) - lp_build_interp_soa_update(&interp); + lp_build_interp_soa_update(&interp, i); fs_mask[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, mask_ptr, &index, 1, ""), ""); depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, ""); -- cgit v1.2.3