diff options
| author | Keith Whitwell <keithw@vmware.com> | 2009-10-20 02:46:00 +0100 | 
|---|---|---|
| committer | Keith Whitwell <keithw@vmware.com> | 2009-10-20 02:46:00 +0100 | 
| commit | 7670628061c2a6ce0a1a787556b0e33a38fd3049 (patch) | |
| tree | 7aed343a2d002847cb33e0b6ec61454fcdb34156 | |
| parent | 5b07d4de38b732f99237161d940f40e3ce6e29c3 (diff) | |
llvmpipe: precalculate some offsets
| -rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast.c | 20 | ||||
| -rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast.h | 2 | ||||
| -rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast_priv.h | 2 | ||||
| -rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast_tri.c | 80 | ||||
| -rw-r--r-- | src/gallium/drivers/llvmpipe/lp_setup_tri.c | 26 | 
5 files changed, 51 insertions, 79 deletions
| diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 85b756e453..39fb8cdb6b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -193,12 +193,7 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast,                           const union lp_rast_cmd_arg arg )  {     const struct lp_rast_shader_inputs *inputs = arg.shade_tile; -   static const uint32_t ALIGN16_ATTRIB masks[4][4] =  -      { {~0, ~0, ~0, ~0}, -        {~0, ~0, ~0, ~0}, -        {~0, ~0, ~0, ~0}, -        {~0, ~0, ~0, ~0} }; - +   const unsigned mask = ~0;     unsigned x, y;     RAST_DEBUG("%s\n", __FUNCTION__); @@ -207,26 +202,31 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast,      */     for (y = 0; y < TILE_SIZE; y += 4)        for (x = 0; x < TILE_SIZE; x += 4) -         lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, &masks[0][0]); +         lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, mask);  }  void lp_rast_shade_quads( struct lp_rasterizer *rast,                            const struct lp_rast_shader_inputs *inputs,                            unsigned x, unsigned y, -                          const uint32_t *masks) +                          unsigned mask)  {  #if 1     const struct lp_rast_state *state = inputs->state;     struct lp_rast_tile *tile = &rast->tile;     void *color;     void *depth; -   unsigned ix, iy; +   uint32_t ALIGN16_ATTRIB masks[16]; +   unsigned ix, iy, i;     /* Sanity checks */     assert(x % TILE_VECTOR_WIDTH == 0);     assert(y % TILE_VECTOR_HEIGHT == 0); +   /* mask */ +   for (i = 0; i < 16; ++i) +      masks[i] = mask & (1 << i) ? ~0 : 0; +     ix = x % TILE_SIZE;     iy = y % TILE_SIZE; @@ -251,7 +251,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast,                          inputs->a0,                          inputs->dadx,                          inputs->dady, -                        masks, +                        &masks[0],                          color,                          depth);  #else diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 9725007119..318bf73b15 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -111,6 +111,8 @@ struct lp_rast_triangle {     int c2;     int c3; +   int step[3][16]; +     /* XXX: this is only used inside lp_setup_tri.c, don't really      * need it here:      */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index f438faaf36..2333729807 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -80,6 +80,6 @@ struct lp_rasterizer {  void lp_rast_shade_quads( struct lp_rasterizer *rast,                            const struct lp_rast_shader_inputs *inputs,                            unsigned x, unsigned y, -                          const uint32_t *masks); +                          unsigned masks);  #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 5f22aca668..b5a3753a88 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -37,100 +37,44 @@  #define BLOCKSIZE 4 +  /* Render a 4x4 unmasked block:   */  static void block_full( struct lp_rasterizer *rast,                          const struct lp_rast_triangle *tri,                          int x, int y )  { -   static const uint32_t ALIGN16_ATTRIB masks[4][4] =  -      { {~0, ~0, ~0, ~0}, -        {~0, ~0, ~0, ~0}, -        {~0, ~0, ~0, ~0}, -        {~0, ~0, ~0, ~0} }; +   unsigned mask = ~0; -   lp_rast_shade_quads(rast, &tri->inputs, x, y, &masks[0][0]); +   lp_rast_shade_quads(rast, &tri->inputs, x, y, mask);  } -static INLINE void -do_quad( const struct lp_rast_triangle *tri, -	 int c1, int c2, int c3, -         int32_t *mask ) -{ -   const int xstep1 = -tri->dy12 ; -   const int xstep2 = -tri->dy23 ; -   const int xstep3 = -tri->dy31 ; - -   const int ystep1 = tri->dx12 ; -   const int ystep2 = tri->dx23 ; -   const int ystep3 = tri->dx31 ; -    -   mask[0] = ~(((c1) | -                (c2) | -                (c3)) >> 31); - -   mask[1] = ~(((c1 + xstep1) |  -                (c2 + xstep2) |  -                (c3 + xstep3)) >> 31); - -   mask[2] = ~(((c1 + ystep1) |  -                (c2 + ystep2) |  -                (c3 + ystep3)) >> 31); - -   mask[3] = ~(((c1 + ystep1 + xstep1) |  -                (c2 + ystep2 + xstep2) |  -                (c3 + ystep3 + xstep3)) >> 31); -}  /* Evaluate each pixel in a block, generate a mask and possibly render   * the quad:   */  static void  do_block( struct lp_rasterizer *rast, -          const struct lp_rast_triangle *tri, +	  const struct lp_rast_triangle *tri,            int x, int y,            int c1,            int c2,            int c3 )  { -   const int step = 2 ; - -   const int xstep1 = -step * tri->dy12; -   const int xstep2 = -step * tri->dy23; -   const int xstep3 = -step * tri->dy31; - -   const int ystep1 = step * tri->dx12; -   const int ystep2 = step * tri->dx23; -   const int ystep3 = step * tri->dx31; +   int i; +   unsigned mask = 0; -   int ix, iy; -   uint32_t ALIGN16_ATTRIB mask[4][4]; - - -   for (iy = 0; iy < 4; iy += 2) { -      int cx1 = c1; -      int cx2 = c2; -      int cx3 = c3; - -      for (ix = 0; ix < 2; ix ++) { - -	 do_quad(tri, cx1, cx2, cx3, (int32_t *)mask[iy+ix]); - -	 cx1 += xstep1; -	 cx2 += xstep2; -	 cx3 += xstep3; -      } - -      c1 += ystep1; -      c2 += ystep2; -      c3 += ystep3; -   } +   for (i = 0; i < 16; i++) +      mask |= (~(((c1 + tri->step[0][i]) |  +		  (c2 + tri->step[1][i]) |  +		  (c3 + tri->step[2][i])) >> 31)) & (1 << i); +        /* As we do trivial reject already, masks should rarely be all      * zero:      */ -   lp_rast_shade_quads(rast, &tri->inputs, x, y, &mask[0][0] ); +   lp_rast_shade_quads(rast, &tri->inputs, x, y, mask );  } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 6c9f75e90c..a5a0407a57 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -359,6 +359,32 @@ do_triangle_ccw(struct setup_context *setup,     maxx = tri->maxx / TILESIZE;     maxy = tri->maxy / TILESIZE; +   { +      int xstep1 = -tri->dy12; +      int xstep2 = -tri->dy23; +      int xstep3 = -tri->dy31; + +      int ystep1 = tri->dx12; +      int ystep2 = tri->dx23; +      int ystep3 = tri->dx31; +       +      int ix, iy; +      int qx, qy; +      int i = 0; +       +      for (qy = 0; qy < 4; qy += 2) { +	 for (qx = 0; qx < 4; qx += 2) { +	    for (iy = 0; iy < 2; iy++) { +	       for (ix = 0; ix < 2; ix++, i++) { +		  tri->step[0][i] = (xstep1 * (qx+ix)) + (ystep1 * (qy+iy)); +		  tri->step[1][i] = (xstep2 * (qx+ix)) + (ystep2 * (qy+iy)); +		  tri->step[2][i] = (xstep3 * (qx+ix)) + (ystep3 * (qy+iy)); +	       } +	    } +	 } +      } +   } +     /* Convert to tile coordinates:      */     if (miny == maxy && minx == maxx) | 
