diff options
| author | Keith Whitwell <keithw@vmware.com> | 2010-08-15 16:21:46 +0100 | 
|---|---|---|
| committer | Keith Whitwell <keithw@vmware.com> | 2010-08-15 16:25:06 +0100 | 
| commit | 510b03539413552a543e25de6b896eb10baf60ae (patch) | |
| tree | b5e38181ef9c7133775a29c17bc41a09c6ac1b3d | |
| parent | 2d53dc873ea1d9e0e3e4c1cf08a63621661e422f (diff) | |
llvmpipe: reorganize block4 loop, nice speedup
isosurf 95->115 fps just by exchanging the two inner loops in this
function...
| -rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast_tri.c | 15 | ||||
| -rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 16 | 
2 files changed, 19 insertions, 12 deletions
| diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index ebe9a8e92b..c1f2680ddc 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -113,6 +113,21 @@ block_full_16(struct lp_rasterizer_task *task,  	 block_full_4(task, tri, x + ix, y + iy);  } + +static INLINE unsigned +build_mask(int c, const int *step) +{ +   int mask = 0; +   int i; + +   for (i = 0; i < 16; i++) { +      mask |= ((c + step[i]) >> 31) & (1 << i); +   } +    +   return mask; +} + +  #define TAG(x) x##_1  #define NR_PLANES 1  #include "lp_rast_tri_tmp.h" diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index a410c611a3..fcb8e2b05d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -46,19 +46,11 @@ TAG(do_block_4)(struct lp_rasterizer_task *task,                  int x, int y,                  const int *c)  { -   unsigned mask = 0; -   int i; +   unsigned mask = 0xffff; +   int j; -   for (i = 0; i < 16; i++) { -      int any_negative = 0; -      int j; - -      for (j = 0; j < NR_PLANES; j++)  -         any_negative |= (c[j] - 1 + plane[j].step[i]); -          -      any_negative >>= 31; - -      mask |= (~any_negative) & (1 << i); +   for (j = 0; j < NR_PLANES; j++) { +      mask &= ~build_mask(c[j] - 1, plane[j].step);     }     /* Now pass to the shader: | 
