From 510b03539413552a543e25de6b896eb10baf60ae Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 15 Aug 2010 16:21:46 +0100 Subject: llvmpipe: reorganize block4 loop, nice speedup isosurf 95->115 fps just by exchanging the two inner loops in this function... --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 15 +++++++++++++++ src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 16 ++++------------ 2 files changed, 19 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index ebe9a8e92b..c1f2680ddc 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -113,6 +113,21 @@ block_full_16(struct lp_rasterizer_task *task, block_full_4(task, tri, x + ix, y + iy); } + +static INLINE unsigned +build_mask(int c, const int *step) +{ + int mask = 0; + int i; + + for (i = 0; i < 16; i++) { + mask |= ((c + step[i]) >> 31) & (1 << i); + } + + return mask; +} + + #define TAG(x) x##_1 #define NR_PLANES 1 #include "lp_rast_tri_tmp.h" diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index a410c611a3..fcb8e2b05d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -46,19 +46,11 @@ TAG(do_block_4)(struct lp_rasterizer_task *task, int x, int y, const int *c) { - unsigned mask = 0; - int i; + unsigned mask = 0xffff; + int j; - for (i = 0; i < 16; i++) { - int any_negative = 0; - int j; - - for (j = 0; j < NR_PLANES; j++) - any_negative |= (c[j] - 1 + plane[j].step[i]); - - any_negative >>= 31; - - mask |= (~any_negative) & (1 << i); + for (j = 0; j < NR_PLANES; j++) { + mask &= ~build_mask(c[j] - 1, plane[j].step); } /* Now pass to the shader: -- cgit v1.2.3