diff options
Diffstat (limited to 'src/gallium')
| -rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast.h | 3 | ||||
| -rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast_tri.c | 272 | 
2 files changed, 88 insertions, 187 deletions
| diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 0aa111b472..41a7f5ebbf 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -92,6 +92,9 @@ struct lp_rast_triangle {     float dx12;     float dx23;     float dx31; + +   /* inputs for the shader */ +   struct lp_rast_shader_inputs *inputs;  };  struct clear_tile { diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 896ac253a0..138d6f55e0 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -29,80 +29,46 @@   * Rasterization for binned triangles within a tile   */ +#include "util/u_math.h"  #include "lp_quad.h" -#include "lp_quad_pipe.h"  #include "lp_rast_priv.h" +#include "lp_tile_soa.h" -#define BLOCKSIZE 4 +#define BLOCKSIZE 8  /* Convert 8x8 block into four runs of quads and render each in turn.   */  #if (BLOCKSIZE == 8) -static void block_full( struct lp_rast_triangle *tri, int x, int y ) +static void block_full( struct lp_rasterizer *rast, +                        const struct lp_rast_triangle *tri, +                        int x, int y )  { -   struct quad_header *ptrs[4]; -   int i; - -   tri->quad[0].input.x0 = x + 0; -   tri->quad[1].input.x0 = x + 2; -   tri->quad[2].input.x0 = x + 4; -   tri->quad[3].input.x0 = x + 6; - -   for (i = 0; i < 4; i++, y += 2) { -      tri->quad[0].inout.mask = 0xf; -      tri->quad[1].inout.mask = 0xf; -      tri->quad[2].inout.mask = 0xf; -      tri->quad[3].inout.mask = 0xf; - -      tri->quad[0].input.y0 = y; -      tri->quad[1].input.y0 = y; -      tri->quad[2].input.y0 = y; -      tri->quad[3].input.y0 = y; - -      /* XXX: don't bother with this ptrs business */ -      ptrs[0] = &tri->quad[0]; -      ptrs[1] = &tri->quad[1]; -      ptrs[2] = &tri->quad[2]; -      ptrs[3] = &tri->quad[3]; +   const unsigned masks[4] = {~0, ~0, ~0, ~0}; +   int iy; -      tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 4 ); -   } +   for (iy = 0; iy < 8; iy += 2) +      lp_rast_shade_quads(rast, tri->inputs, x, y + iy, masks);  }  #else -static void block_full( struct lp_rast_triangle *tri, int x, int y ) +static void block_full( struct lp_rasterizer *rast, +                        const struct lp_rast_triangle *tri, +                        int x, int y )  { -   struct quad_header *ptrs[4]; +   const unsigned masks[4] = {~0, ~0, 0, 0}; /* FIXME: Wasting quads!!! */     int iy; -   tri->quad[0].input.x0 = x + 0; -   tri->quad[1].input.x0 = x + 2; - -   for (iy = 0; iy < 4; iy += 2) { -      tri->quad[0].inout.mask = 0xf; -      tri->quad[1].inout.mask = 0xf; - -      tri->quad[0].input.y0 = y + iy; -      tri->quad[1].input.y0 = y + iy; - -      /* XXX: don't bother with this ptrs business */ -      ptrs[0] = &tri->quad[0]; -      ptrs[1] = &tri->quad[1]; - -      tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 2 ); -   } +   for (iy = 0; iy < 4; iy += 2) +      lp_rast_shade_quads(rast, tri->inputs, x, y + iy, masks);  }  #endif -static void -do_quad( struct lp_rasterizer *rast, +static INLINE unsigned +do_quad( const struct lp_rast_triangle *tri,  	 int x, int y,  	 float c1, float c2, float c3 )  { -   struct lp_rast_triangle *tri = rast->tri; -   struct quad_header *quad = &rast->quad[0]; -     float xstep1 = -tri->dy12;     float xstep2 = -tri->dy23;     float xstep3 = -tri->dy31; @@ -111,43 +77,41 @@ do_quad( struct lp_rasterizer *rast,     float ystep2 = tri->dx23;     float ystep3 = tri->dx31; -   quad->input.x0 = x; -   quad->input.y0 = y; -   quad->inout.mask = 0; +   unsigned mask = 0;     if (c1 > 0 &&         c2 > 0 &&         c3 > 0) -      quad->inout.mask |= 1; +      mask |= 1;     if (c1 + xstep1 > 0 &&          c2 + xstep2 > 0 &&          c3 + xstep3 > 0) -      quad->inout.mask |= 2; +      mask |= 2;     if (c1 + ystep1 > 0 &&          c2 + ystep2 > 0 &&          c3 + ystep3 > 0) -      quad->inout.mask |= 4; +      mask |= 4;     if (c1 + ystep1 + xstep1 > 0 &&          c2 + ystep2 + xstep2 > 0 &&          c3 + ystep3 + xstep3 > 0) -      quad->inout.mask |= 8; +      mask |= 8; -   if (quad->inout.mask) -      rast->state->run( rast->state->state, &quad, 1 ); +   return mask;  }  /* Evaluate each pixel in a block, generate a mask and possibly render   * the quad:   */  static void -do_block( struct lp_rast_triangle *tri, -	 int x, int y, -	 float c1, -	 float c2, -	 float c3 ) +do_block( struct lp_rasterizer *rast, +          const struct lp_rast_triangle *tri, +          int x, int y, +          float c1, +          float c2, +          float c3 )  {     const int step = 2; @@ -166,19 +130,24 @@ do_block( struct lp_rast_triangle *tri,        float cx2 = c2;        float cx3 = c3; +      unsigned masks[4] = {0, 0, 0, 0}; +        for (ix = 0; ix < BLOCKSIZE; ix += 2) { -	 do_quad(tri, x+ix, y+iy, cx1, cx2, cx3); +	 masks[ix >> 1] = do_quad(tri, x + ix, y + iy, cx1, cx2, cx3);  	 cx1 += xstep1;  	 cx2 += xstep2;  	 cx3 += xstep3;        } +      lp_rast_shade_quads(rast, tri->inputs, x, y + iy, masks); +        c1 += ystep1;        c2 += ystep2;        c3 += ystep3;     } +  } @@ -187,8 +156,9 @@ do_block( struct lp_rast_triangle *tri,   * for this triangle:   */  void lp_rast_triangle( struct lp_rasterizer *rast, -		       const struct lp_rast_triangle *tri ) +                       const union lp_rast_cmd_arg *arg )  { +   const struct lp_rast_triangle *tri = arg->triangle;     int minx, maxx, miny, maxy;     /* Clamp to tile dimensions: @@ -205,136 +175,64 @@ void lp_rast_triangle( struct lp_rasterizer *rast,        return;     } -   /* Bind parameter interpolants: -    */ -   for (i = 0; i < Elements(rast->quad); i++) { -      rast->quad[i].coef = tri->coef; -      rast->quad[i].posCoef = &tri->position_coef; -   } - -   /* Small area? -    */ -   if (miny + 16 > maxy && -       minx + 16 > maxx) -   { -      const int step = 2; - -      float xstep1 = -step * tri->dy12; -      float xstep2 = -step * tri->dy23; -      float xstep3 = -step * tri->dy31; - -      float ystep1 = step * tri->dx12; -      float ystep2 = step * tri->dx23; -      float ystep3 = step * tri->dx31; +   const int step = BLOCKSIZE; -      float eo1 = tri->eo1 * step; -      float eo2 = tri->eo2 * step; -      float eo3 = tri->eo3 * step; +   float ei1 = tri->ei1 * step; +   float ei2 = tri->ei2 * step; +   float ei3 = tri->ei3 * step; -      int x, y; +   float eo1 = tri->eo1 * step; +   float eo2 = tri->eo2 * step; +   float eo3 = tri->eo3 * step; -      minx &= ~(step-1); -      maxx &= ~(step-1); +   float xstep1 = -step * tri->dy12; +   float xstep2 = -step * tri->dy23; +   float xstep3 = -step * tri->dy31; -      /* Subdivide space into NxM blocks, where each block is square and -       * power-of-four in dimension. -       * -       * Trivially accept or reject blocks, else jump to per-pixel -       * examination above. -       */ -      for (y = miny; y < maxy; y += step) -      { -	 float cx1 = c1; -	 float cx2 = c2; -	 float cx3 = c3; +   float ystep1 = step * tri->dx12; +   float ystep2 = step * tri->dx23; +   float ystep3 = step * tri->dx31; +   int x, y; -	 for (x = minx; x < maxx; x += step) -	 { -	    if (cx1 + eo1 < 0 ||  -		cx2 + eo2 < 0 || -		cx3 + eo3 < 0)  -	    { -	    } -	    else  -	    { -	       do_quad(&tri, x, y, cx1, cx2, cx3); -	    } +   minx &= ~(step-1); +   miny &= ~(step-1); -	    /* Iterate cx values across the region: -	     */ -	    cx1 += xstep1; -	    cx2 += xstep2; -	    cx3 += xstep3; -	 } -       -	 /* Iterate c values down the region: -	  */ -	 c1 += ystep1; -	 c2 += ystep2; -	 c3 += ystep3;     -      } -   } -   else  +   for (y = miny; y < maxy; y += step)     { -      const int step = BLOCKSIZE; - -      float ei1 = tri->ei1 * step; -      float ei2 = tri->ei2 * step; -      float ei3 = tri->ei3 * step; - -      float eo1 = tri->eo1 * step; -      float eo2 = tri->eo2 * step; -      float eo3 = tri->eo3 * step; - -      float xstep1 = -step * tri->dy12; -      float xstep2 = -step * tri->dy23; -      float xstep3 = -step * tri->dy31; - -      float ystep1 = step * tri->dx12; -      float ystep2 = step * tri->dx23; -      float ystep3 = step * tri->dx31; -      int x, y; - -      minx &= ~(step-1); -      miny &= ~(step-1); +      float cx1 = c1; +      float cx2 = c2; +      float cx3 = c3; -      for (y = miny; y < maxy; y += step) +      for (x = minx; x < maxx; x += step)        { -	 float cx1 = c1; -	 float cx2 = c2; -	 float cx3 = c3; +         if (cx1 + eo1 < 0 || +             cx2 + eo2 < 0 || +             cx3 + eo3 < 0) +         { +         } +         else if (cx1 + ei1 > 0 && +                  cx2 + ei2 > 0 && +                  cx3 + ei3 > 0) +         { +            block_full(rast, tri, x, y); /* trivial accept */ +         } +         else +         { +            do_block(rast, tri, x, y, cx1, cx2, cx3); +         } -	 for (x = minx; x < maxx; x += step) -	 { -	    if (cx1 + eo1 < 0 ||  -		cx2 + eo2 < 0 || -		cx3 + eo3 < 0)  -	    { -	    } -	    else if (cx1 + ei1 > 0 && -		     cx2 + ei2 > 0 && -		     cx3 + ei3 > 0)  -	    { -	       block_full(&tri, x, y); /* trivial accept */ -	    } -	    else  -	    { -	       do_block(&tri, x, y, cx1, cx2, cx3); -	    } - -	    /* Iterate cx values across the region: -	     */ -	    cx1 += xstep1; -	    cx2 += xstep2; -	    cx3 += xstep3; -	 } -       -	 /* Iterate c values down the region: -	  */ -	 c1 += ystep1; -	 c2 += ystep2; -	 c3 += ystep3;     +         /* Iterate cx values across the region: +          */ +         cx1 += xstep1; +         cx2 += xstep2; +         cx3 += xstep3;        } + +      /* Iterate c values down the region: +       */ +      c1 += ystep1; +      c2 += ystep2; +      c3 += ystep3;     }  } | 
