From 7670628061c2a6ce0a1a787556b0e33a38fd3049 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 20 Oct 2009 02:46:00 +0100 Subject: llvmpipe: precalculate some offsets --- src/gallium/drivers/llvmpipe/lp_rast.c | 20 ++++---- src/gallium/drivers/llvmpipe/lp_rast.h | 2 + src/gallium/drivers/llvmpipe/lp_rast_priv.h | 2 +- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 80 +++++------------------------ src/gallium/drivers/llvmpipe/lp_setup_tri.c | 26 ++++++++++ 5 files changed, 51 insertions(+), 79 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 85b756e453..39fb8cdb6b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -193,12 +193,7 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { const struct lp_rast_shader_inputs *inputs = arg.shade_tile; - static const uint32_t ALIGN16_ATTRIB masks[4][4] = - { {~0, ~0, ~0, ~0}, - {~0, ~0, ~0, ~0}, - {~0, ~0, ~0, ~0}, - {~0, ~0, ~0, ~0} }; - + const unsigned mask = ~0; unsigned x, y; RAST_DEBUG("%s\n", __FUNCTION__); @@ -207,26 +202,31 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, */ for (y = 0; y < TILE_SIZE; y += 4) for (x = 0; x < TILE_SIZE; x += 4) - lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, &masks[0][0]); + lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, mask); } void lp_rast_shade_quads( struct lp_rasterizer *rast, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, - const uint32_t *masks) + unsigned mask) { #if 1 const struct lp_rast_state *state = inputs->state; struct lp_rast_tile *tile = &rast->tile; void *color; void *depth; - unsigned ix, iy; + uint32_t ALIGN16_ATTRIB masks[16]; + unsigned ix, iy, i; /* Sanity checks */ assert(x % TILE_VECTOR_WIDTH == 0); assert(y % TILE_VECTOR_HEIGHT == 0); + /* mask */ + for (i = 0; i < 16; ++i) + masks[i] = mask & (1 << i) ? ~0 : 0; + ix = x % TILE_SIZE; iy = y % TILE_SIZE; @@ -251,7 +251,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, inputs->a0, inputs->dadx, inputs->dady, - masks, + &masks[0], color, depth); #else diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 9725007119..318bf73b15 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -111,6 +111,8 @@ struct lp_rast_triangle { int c2; int c3; + int step[3][16]; + /* XXX: this is only used inside lp_setup_tri.c, don't really * need it here: */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index f438faaf36..2333729807 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -80,6 +80,6 @@ struct lp_rasterizer { void lp_rast_shade_quads( struct lp_rasterizer *rast, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, - const uint32_t *masks); + unsigned masks); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 5f22aca668..b5a3753a88 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -37,100 +37,44 @@ #define BLOCKSIZE 4 + /* Render a 4x4 unmasked block: */ static void block_full( struct lp_rasterizer *rast, const struct lp_rast_triangle *tri, int x, int y ) { - static const uint32_t ALIGN16_ATTRIB masks[4][4] = - { {~0, ~0, ~0, ~0}, - {~0, ~0, ~0, ~0}, - {~0, ~0, ~0, ~0}, - {~0, ~0, ~0, ~0} }; + unsigned mask = ~0; - lp_rast_shade_quads(rast, &tri->inputs, x, y, &masks[0][0]); + lp_rast_shade_quads(rast, &tri->inputs, x, y, mask); } -static INLINE void -do_quad( const struct lp_rast_triangle *tri, - int c1, int c2, int c3, - int32_t *mask ) -{ - const int xstep1 = -tri->dy12 ; - const int xstep2 = -tri->dy23 ; - const int xstep3 = -tri->dy31 ; - - const int ystep1 = tri->dx12 ; - const int ystep2 = tri->dx23 ; - const int ystep3 = tri->dx31 ; - - mask[0] = ~(((c1) | - (c2) | - (c3)) >> 31); - - mask[1] = ~(((c1 + xstep1) | - (c2 + xstep2) | - (c3 + xstep3)) >> 31); - - mask[2] = ~(((c1 + ystep1) | - (c2 + ystep2) | - (c3 + ystep3)) >> 31); - - mask[3] = ~(((c1 + ystep1 + xstep1) | - (c2 + ystep2 + xstep2) | - (c3 + ystep3 + xstep3)) >> 31); -} /* Evaluate each pixel in a block, generate a mask and possibly render * the quad: */ static void do_block( struct lp_rasterizer *rast, - const struct lp_rast_triangle *tri, + const struct lp_rast_triangle *tri, int x, int y, int c1, int c2, int c3 ) { - const int step = 2 ; - - const int xstep1 = -step * tri->dy12; - const int xstep2 = -step * tri->dy23; - const int xstep3 = -step * tri->dy31; - - const int ystep1 = step * tri->dx12; - const int ystep2 = step * tri->dx23; - const int ystep3 = step * tri->dx31; + int i; + unsigned mask = 0; - int ix, iy; - uint32_t ALIGN16_ATTRIB mask[4][4]; - - - for (iy = 0; iy < 4; iy += 2) { - int cx1 = c1; - int cx2 = c2; - int cx3 = c3; - - for (ix = 0; ix < 2; ix ++) { - - do_quad(tri, cx1, cx2, cx3, (int32_t *)mask[iy+ix]); - - cx1 += xstep1; - cx2 += xstep2; - cx3 += xstep3; - } - - c1 += ystep1; - c2 += ystep2; - c3 += ystep3; - } + for (i = 0; i < 16; i++) + mask |= (~(((c1 + tri->step[0][i]) | + (c2 + tri->step[1][i]) | + (c3 + tri->step[2][i])) >> 31)) & (1 << i); + /* As we do trivial reject already, masks should rarely be all * zero: */ - lp_rast_shade_quads(rast, &tri->inputs, x, y, &mask[0][0] ); + lp_rast_shade_quads(rast, &tri->inputs, x, y, mask ); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 6c9f75e90c..a5a0407a57 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -359,6 +359,32 @@ do_triangle_ccw(struct setup_context *setup, maxx = tri->maxx / TILESIZE; maxy = tri->maxy / TILESIZE; + { + int xstep1 = -tri->dy12; + int xstep2 = -tri->dy23; + int xstep3 = -tri->dy31; + + int ystep1 = tri->dx12; + int ystep2 = tri->dx23; + int ystep3 = tri->dx31; + + int ix, iy; + int qx, qy; + int i = 0; + + for (qy = 0; qy < 4; qy += 2) { + for (qx = 0; qx < 4; qx += 2) { + for (iy = 0; iy < 2; iy++) { + for (ix = 0; ix < 2; ix++, i++) { + tri->step[0][i] = (xstep1 * (qx+ix)) + (ystep1 * (qy+iy)); + tri->step[1][i] = (xstep2 * (qx+ix)) + (ystep2 * (qy+iy)); + tri->step[2][i] = (xstep3 * (qx+ix)) + (ystep3 * (qy+iy)); + } + } + } + } + } + /* Convert to tile coordinates: */ if (miny == maxy && minx == maxx) -- cgit v1.2.3