summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorKeith Whitwell <keithw@vmware.com>2009-10-20 02:46:00 +0100
committerKeith Whitwell <keithw@vmware.com>2009-10-20 02:46:00 +0100
commit7670628061c2a6ce0a1a787556b0e33a38fd3049 (patch)
tree7aed343a2d002847cb33e0b6ec61454fcdb34156 /src
parent5b07d4de38b732f99237161d940f40e3ce6e29c3 (diff)
llvmpipe: precalculate some offsets
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.c20
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_priv.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri.c80
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_tri.c26
5 files changed, 51 insertions, 79 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index 85b756e453..39fb8cdb6b 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -193,12 +193,7 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast,
const union lp_rast_cmd_arg arg )
{
const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
- static const uint32_t ALIGN16_ATTRIB masks[4][4] =
- { {~0, ~0, ~0, ~0},
- {~0, ~0, ~0, ~0},
- {~0, ~0, ~0, ~0},
- {~0, ~0, ~0, ~0} };
-
+ const unsigned mask = ~0;
unsigned x, y;
RAST_DEBUG("%s\n", __FUNCTION__);
@@ -207,26 +202,31 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast,
*/
for (y = 0; y < TILE_SIZE; y += 4)
for (x = 0; x < TILE_SIZE; x += 4)
- lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, &masks[0][0]);
+ lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, mask);
}
void lp_rast_shade_quads( struct lp_rasterizer *rast,
const struct lp_rast_shader_inputs *inputs,
unsigned x, unsigned y,
- const uint32_t *masks)
+ unsigned mask)
{
#if 1
const struct lp_rast_state *state = inputs->state;
struct lp_rast_tile *tile = &rast->tile;
void *color;
void *depth;
- unsigned ix, iy;
+ uint32_t ALIGN16_ATTRIB masks[16];
+ unsigned ix, iy, i;
/* Sanity checks */
assert(x % TILE_VECTOR_WIDTH == 0);
assert(y % TILE_VECTOR_HEIGHT == 0);
+ /* mask */
+ for (i = 0; i < 16; ++i)
+ masks[i] = mask & (1 << i) ? ~0 : 0;
+
ix = x % TILE_SIZE;
iy = y % TILE_SIZE;
@@ -251,7 +251,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast,
inputs->a0,
inputs->dadx,
inputs->dady,
- masks,
+ &masks[0],
color,
depth);
#else
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index 9725007119..318bf73b15 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -111,6 +111,8 @@ struct lp_rast_triangle {
int c2;
int c3;
+ int step[3][16];
+
/* XXX: this is only used inside lp_setup_tri.c, don't really
* need it here:
*/
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index f438faaf36..2333729807 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -80,6 +80,6 @@ struct lp_rasterizer {
void lp_rast_shade_quads( struct lp_rasterizer *rast,
const struct lp_rast_shader_inputs *inputs,
unsigned x, unsigned y,
- const uint32_t *masks);
+ unsigned masks);
#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index 5f22aca668..b5a3753a88 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -37,100 +37,44 @@
#define BLOCKSIZE 4
+
/* Render a 4x4 unmasked block:
*/
static void block_full( struct lp_rasterizer *rast,
const struct lp_rast_triangle *tri,
int x, int y )
{
- static const uint32_t ALIGN16_ATTRIB masks[4][4] =
- { {~0, ~0, ~0, ~0},
- {~0, ~0, ~0, ~0},
- {~0, ~0, ~0, ~0},
- {~0, ~0, ~0, ~0} };
+ unsigned mask = ~0;
- lp_rast_shade_quads(rast, &tri->inputs, x, y, &masks[0][0]);
+ lp_rast_shade_quads(rast, &tri->inputs, x, y, mask);
}
-static INLINE void
-do_quad( const struct lp_rast_triangle *tri,
- int c1, int c2, int c3,
- int32_t *mask )
-{
- const int xstep1 = -tri->dy12 ;
- const int xstep2 = -tri->dy23 ;
- const int xstep3 = -tri->dy31 ;
-
- const int ystep1 = tri->dx12 ;
- const int ystep2 = tri->dx23 ;
- const int ystep3 = tri->dx31 ;
-
- mask[0] = ~(((c1) |
- (c2) |
- (c3)) >> 31);
-
- mask[1] = ~(((c1 + xstep1) |
- (c2 + xstep2) |
- (c3 + xstep3)) >> 31);
-
- mask[2] = ~(((c1 + ystep1) |
- (c2 + ystep2) |
- (c3 + ystep3)) >> 31);
-
- mask[3] = ~(((c1 + ystep1 + xstep1) |
- (c2 + ystep2 + xstep2) |
- (c3 + ystep3 + xstep3)) >> 31);
-}
/* Evaluate each pixel in a block, generate a mask and possibly render
* the quad:
*/
static void
do_block( struct lp_rasterizer *rast,
- const struct lp_rast_triangle *tri,
+ const struct lp_rast_triangle *tri,
int x, int y,
int c1,
int c2,
int c3 )
{
- const int step = 2 ;
-
- const int xstep1 = -step * tri->dy12;
- const int xstep2 = -step * tri->dy23;
- const int xstep3 = -step * tri->dy31;
-
- const int ystep1 = step * tri->dx12;
- const int ystep2 = step * tri->dx23;
- const int ystep3 = step * tri->dx31;
+ int i;
+ unsigned mask = 0;
- int ix, iy;
- uint32_t ALIGN16_ATTRIB mask[4][4];
-
-
- for (iy = 0; iy < 4; iy += 2) {
- int cx1 = c1;
- int cx2 = c2;
- int cx3 = c3;
-
- for (ix = 0; ix < 2; ix ++) {
-
- do_quad(tri, cx1, cx2, cx3, (int32_t *)mask[iy+ix]);
-
- cx1 += xstep1;
- cx2 += xstep2;
- cx3 += xstep3;
- }
-
- c1 += ystep1;
- c2 += ystep2;
- c3 += ystep3;
- }
+ for (i = 0; i < 16; i++)
+ mask |= (~(((c1 + tri->step[0][i]) |
+ (c2 + tri->step[1][i]) |
+ (c3 + tri->step[2][i])) >> 31)) & (1 << i);
+
/* As we do trivial reject already, masks should rarely be all
* zero:
*/
- lp_rast_shade_quads(rast, &tri->inputs, x, y, &mask[0][0] );
+ lp_rast_shade_quads(rast, &tri->inputs, x, y, mask );
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 6c9f75e90c..a5a0407a57 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -359,6 +359,32 @@ do_triangle_ccw(struct setup_context *setup,
maxx = tri->maxx / TILESIZE;
maxy = tri->maxy / TILESIZE;
+ {
+ int xstep1 = -tri->dy12;
+ int xstep2 = -tri->dy23;
+ int xstep3 = -tri->dy31;
+
+ int ystep1 = tri->dx12;
+ int ystep2 = tri->dx23;
+ int ystep3 = tri->dx31;
+
+ int ix, iy;
+ int qx, qy;
+ int i = 0;
+
+ for (qy = 0; qy < 4; qy += 2) {
+ for (qx = 0; qx < 4; qx += 2) {
+ for (iy = 0; iy < 2; iy++) {
+ for (ix = 0; ix < 2; ix++, i++) {
+ tri->step[0][i] = (xstep1 * (qx+ix)) + (ystep1 * (qy+iy));
+ tri->step[1][i] = (xstep2 * (qx+ix)) + (ystep2 * (qy+iy));
+ tri->step[2][i] = (xstep3 * (qx+ix)) + (ystep3 * (qy+iy));
+ }
+ }
+ }
+ }
+ }
+
/* Convert to tile coordinates:
*/
if (miny == maxy && minx == maxx)