diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/gallium/drivers/llvmpipe/lp_jit.h | 9 | ||||
| -rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast.c | 76 | ||||
| -rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast.h | 11 | ||||
| -rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast_priv.h | 11 | ||||
| -rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast_tri.c | 222 | ||||
| -rw-r--r-- | src/gallium/drivers/llvmpipe/lp_setup_tri.c | 49 | ||||
| -rw-r--r-- | src/gallium/drivers/llvmpipe/lp_state_fs.c | 144 | 
7 files changed, 302 insertions, 220 deletions
| diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 7eccb5da85..e8fb7d990f 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -114,9 +114,14 @@ typedef void                      const void *a0,                      const void *dadx,                      const void *dady, -                    const uint32_t *mask,                      void *color, -                    void *depth); +                    void *depth, +                    const int32_t c1, +                    const int32_t c2, +                    const int32_t c3, +                    const int32_t *step1, +                    const int32_t *step2, +                    const int32_t *step3); diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index ec87d907b8..b1bd27d340 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -25,6 +25,7 @@   *   **************************************************************************/ +#include <limits.h>  #include "util/u_memory.h"  #include "util/u_math.h"  #include "util/u_cpu_detect.h" @@ -279,6 +280,8 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast,                           unsigned thread_index,                           const union lp_rast_cmd_arg arg )  { +   /* Set c1,c2,c3 to large values so the in/out test always passes */ +   const int32_t c1 = INT_MAX/2, c2 = INT_MAX/2, c3 = INT_MAX/2;     const struct lp_rast_shader_inputs *inputs = arg.shade_tile;     const unsigned tile_x = rast->tasks[thread_index].x;     const unsigned tile_y = rast->tasks[thread_index].y; @@ -296,7 +299,7 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast,                                inputs,                                tile_x + x,                                tile_y + y, -                              mask); +                              c1, c2, c3);  } @@ -308,58 +311,25 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast,                            unsigned thread_index,                            const struct lp_rast_shader_inputs *inputs,                            unsigned x, unsigned y, -                          unsigned mask) +                          int32_t c1, int32_t c2, int32_t c3)  { -#if 1     const struct lp_rast_state *state = rast->tasks[thread_index].current_state;     struct lp_rast_tile *tile = &rast->tasks[thread_index].tile;     void *color;     void *depth; -   uint32_t ALIGN16_ATTRIB masks[2][2][2][2];     unsigned ix, iy;     int block_offset; +#ifdef DEBUG     assert(state);     /* Sanity checks */     assert(x % TILE_VECTOR_WIDTH == 0);     assert(y % TILE_VECTOR_HEIGHT == 0); -   /* mask: the rasterizer wants to treat pixels in 4x4 blocks, but -    * the pixel shader wants to swizzle them into 4 2x2 quads. -    *  -    * Additionally, the pixel shader wants masks as full dword ~0, -    * while the rasterizer wants to pack per-pixel bits tightly. -    */ -#if 0 -   unsigned qx, qy; -   for (qy = 0; qy < 2; ++qy) -      for (qx = 0; qx < 2; ++qx) -	 for (iy = 0; iy < 2; ++iy) -	    for (ix = 0; ix < 2; ++ix) -	       masks[qy][qx][iy][ix] = mask & (1 << (qy*8+iy*4+qx*2+ix)) ? ~0 : 0; -#else -   masks[0][0][0][0] = mask & (1 << (0*8+0*4+0*2+0)) ? ~0 : 0; -   masks[0][0][0][1] = mask & (1 << (0*8+0*4+0*2+1)) ? ~0 : 0; -   masks[0][0][1][0] = mask & (1 << (0*8+1*4+0*2+0)) ? ~0 : 0; -   masks[0][0][1][1] = mask & (1 << (0*8+1*4+0*2+1)) ? ~0 : 0; -   masks[0][1][0][0] = mask & (1 << (0*8+0*4+1*2+0)) ? ~0 : 0; -   masks[0][1][0][1] = mask & (1 << (0*8+0*4+1*2+1)) ? ~0 : 0; -   masks[0][1][1][0] = mask & (1 << (0*8+1*4+1*2+0)) ? ~0 : 0; -   masks[0][1][1][1] = mask & (1 << (0*8+1*4+1*2+1)) ? ~0 : 0; - -   masks[1][0][0][0] = mask & (1 << (1*8+0*4+0*2+0)) ? ~0 : 0; -   masks[1][0][0][1] = mask & (1 << (1*8+0*4+0*2+1)) ? ~0 : 0; -   masks[1][0][1][0] = mask & (1 << (1*8+1*4+0*2+0)) ? ~0 : 0; -   masks[1][0][1][1] = mask & (1 << (1*8+1*4+0*2+1)) ? ~0 : 0; -   masks[1][1][0][0] = mask & (1 << (1*8+0*4+1*2+0)) ? ~0 : 0; -   masks[1][1][0][1] = mask & (1 << (1*8+0*4+1*2+1)) ? ~0 : 0; -   masks[1][1][1][0] = mask & (1 << (1*8+1*4+1*2+0)) ? ~0 : 0; -   masks[1][1][1][1] = mask & (1 << (1*8+1*4+1*2+1)) ? ~0 : 0; -#endif -     assert((x % 4) == 0);     assert((y % 4) == 0); +#endif     ix = x % TILE_SIZE;     iy = y % TILE_SIZE; @@ -373,39 +343,27 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast,     /* depth buffer */     depth = tile->depth + block_offset; -   /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ -   assert(lp_check_alignment(masks, 16)); - +#ifdef DEBUG     assert(lp_check_alignment(depth, 16));     assert(lp_check_alignment(color, 16));     assert(lp_check_alignment(state->jit_context.blend_color, 16)); +   assert(lp_check_alignment(inputs->step[0], 16)); +   assert(lp_check_alignment(inputs->step[1], 16)); +   assert(lp_check_alignment(inputs->step[2], 16)); +#endif +     /* run shader */     state->jit_function( &state->jit_context,                          x, y,                          inputs->a0,                          inputs->dadx,                          inputs->dady, -                        &masks[0][0][0][0],                          color, -                        depth); -#else -   struct lp_rast_tile *tile = &rast->tile; -   unsigned chan_index; -   unsigned q, ix, iy; - -   x %= TILE_SIZE; -   y %= TILE_SIZE; - -   /* mask */ -   for (q = 0; q < 4; ++q) -      for(iy = 0; iy < 2; ++iy) -         for(ix = 0; ix < 2; ++ix) -            if(masks[q] & (1 << (iy*2 + ix))) -               for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index) -                  TILE_PIXEL(tile->color, x + q*2 + ix, y + iy, chan_index) = 0xff; - -#endif +                        depth, +                        c1, c2, c3, +                        inputs->step[0], inputs->step[1], inputs->step[2] +                        );  } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 2dd0193d8d..46e22f69a6 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -80,6 +80,9 @@ struct lp_rast_shader_inputs {     float (*a0)[4];     float (*dadx)[4];     float (*dady)[4]; + +   /* edge/step info for 3 edges and 4x4 block of pixels */ +   int ALIGN16_ATTRIB step[3][16];  }; @@ -117,14 +120,10 @@ struct lp_rast_triangle {     int dx31;     /* edge function values at minx,miny ?? */ -   int c1; -   int c2; -   int c3; - -   int step[3][16]; +   int c1, c2, c3;     /* inputs for the shader */ -   struct lp_rast_shader_inputs inputs; +   struct lp_rast_shader_inputs ALIGN16_ATTRIB inputs;  }; diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 79a90f6610..cd72d7e69d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -61,15 +61,6 @@ struct lp_rasterizer_task     unsigned x, y;          /**< Pos of this tile in framebuffer, in pixels */ -   /* Pixel blocks produced during rasterization -    */ -   unsigned nr_blocks; -   struct { -      unsigned x; -      unsigned y; -      unsigned mask; -   } blocks[256]; -     const struct lp_rast_state *current_state;     /** "back" pointer */ @@ -133,6 +124,6 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast,                            unsigned thread_index,                            const struct lp_rast_shader_inputs *inputs,                            unsigned x, unsigned y, -                          unsigned masks); +                          int32_t c1, int32_t c2, int32_t c3);  #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 6c96010c52..9b1861223a 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -29,6 +29,7 @@   * Rasterization for binned triangles within a tile   */ +#include <limits.h>  #include "util/u_math.h"  #include "lp_debug.h"  #include "lp_rast_priv.h" @@ -36,42 +37,89 @@  /** - * Add a 4x4 block of pixels to the block list. - * All pixels are known to be inside the triangle's bounds. + * Map an index in [0,15] to an x,y position, multiplied by 4. + * This is used to get the position of each subtile in a 4x4 + * grid of edge step values. + */ +static const int pos_table4[16][2] = { +   { 0, 0 }, +   { 4, 0 }, +   { 0, 4 }, +   { 4, 4 }, +   { 8, 0 }, +   { 12, 0 }, +   { 8, 4 }, +   { 12, 4 }, +   { 0, 8 }, +   { 4, 8 }, +   { 0, 12 }, +   { 4, 12 }, +   { 8, 8 }, +   { 12, 8 }, +   { 8, 12 }, +   { 12, 12 } +}; + + +static const int pos_table16[16][2] = { +   { 0, 0 }, +   { 16, 0 }, +   { 0, 16 }, +   { 16, 16 }, +   { 32, 0 }, +   { 48, 0 }, +   { 32, 16 }, +   { 48, 16 }, +   { 0, 32 }, +   { 16, 32 }, +   { 0, 48 }, +   { 16, 48 }, +   { 32, 32 }, +   { 48, 32 }, +   { 32, 48 }, +   { 48, 48 } +}; + + +/** + * Shade all pixels in a 4x4 block.   */  static void -block_full_4( struct lp_rasterizer_task *rast_task, int x, int y ) +block_full_4( struct lp_rasterizer_task *rast_task, +              const struct lp_rast_triangle *tri, +              int x, int y )  { -   const unsigned i = rast_task->nr_blocks; -   assert(x % 4 == 0); -   assert(y % 4 == 0); -   rast_task->blocks[i].x = x; -   rast_task->blocks[i].y = y; -   rast_task->blocks[i].mask = ~0; -   rast_task->nr_blocks++; +   /* Set c1,c2,c3 to large values so the in/out test always passes */ +   const int32_t c1 = INT_MAX/2, c2 = INT_MAX/2, c3 = INT_MAX/2; +   lp_rast_shade_quads(rast_task->rast, +                       rast_task->thread_index, +                       &tri->inputs,  +                       x, y, +                       c1, c2, c3);  }  /** - * Add a 16x16 block of pixels to the block list. - * All pixels are known to be inside the triangle's bounds. + * Shade all pixels in a 16x16 block.   */  static void -block_full_16( struct lp_rasterizer_task *rast_task, int x, int y ) +block_full_16( struct lp_rasterizer_task *rast_task, +               const struct lp_rast_triangle *tri, +               int x, int y )  {     unsigned ix, iy;     assert(x % 16 == 0);     assert(y % 16 == 0);     for (iy = 0; iy < 16; iy += 4)        for (ix = 0; ix < 16; ix += 4) -	 block_full_4(rast_task, x + ix, y + iy); +	 block_full_4(rast_task, tri, x + ix, y + iy);  }  /** - * Evaluate each pixel in a 4x4 block to determine if it lies within - * the triangle's bounds. - * Generate a mask of in/out flags and add the block to the blocks list. + * Pass the 4x4 pixel block to the shader function. + * Determination of which of the 16 pixels lies inside the triangle + * will be done as part of the fragment shader.   */  static void  do_block_4( struct lp_rasterizer_task *rast_task, @@ -81,28 +129,11 @@ do_block_4( struct lp_rasterizer_task *rast_task,  	    int c2,  	    int c3 )  { -   int i; -   unsigned mask = 0; - -   assert(x % 4 == 0); -   assert(y % 4 == 0); - -   for (i = 0; i < 16; i++) { -      int any_negative = ((c1 + tri->step[0][i]) |  -                          (c2 + tri->step[1][i]) |  -                          (c3 + tri->step[2][i])) >> 31; -      mask |= (~any_negative) & (1 << i); -   } -    -   /* As we do trivial reject already, masks should rarely be all zero: -    */ -   if (mask) { -      const unsigned i = rast_task->nr_blocks; -      rast_task->blocks[i].x = x; -      rast_task->blocks[i].y = y; -      rast_task->blocks[i].mask = mask; -      rast_task->nr_blocks++; -   } +   lp_rast_shade_quads(rast_task->rast, +                       rast_task->thread_index, +                       &tri->inputs,  +                       x, y, +                       c1, c2, c3);  } @@ -118,40 +149,42 @@ do_block_16( struct lp_rasterizer_task *rast_task,               int c2,               int c3 )  { -   int ix, iy, i = 0; +   const int ei1 = tri->ei1 * 4; +   const int ei2 = tri->ei2 * 4; +   const int ei3 = tri->ei3 * 4; -   int ei1 = tri->ei1 * 4; -   int ei2 = tri->ei2 * 4; -   int ei3 = tri->ei3 * 4; +   const int eo1 = tri->eo1 * 4; +   const int eo2 = tri->eo2 * 4; +   const int eo3 = tri->eo3 * 4; -   int eo1 = tri->eo1 * 4; -   int eo2 = tri->eo2 * 4; -   int eo3 = tri->eo3 * 4; +   int i;     assert(x % 16 == 0);     assert(y % 16 == 0); -   for (iy = 0; iy < 16; iy+=4) { -      for (ix = 0; ix < 16; ix+=4, i++) { -	 int cx1 = c1 + (tri->step[0][i] * 4); -	 int cx2 = c2 + (tri->step[1][i] * 4); -	 int cx3 = c3 + (tri->step[2][i] * 4); -	  -	 if (cx1 + eo1 < 0 || -	     cx2 + eo2 < 0 || -	     cx3 + eo3 < 0) { -            /* the block is completely outside the triangle - nop */ -	 } -	 else if (cx1 + ei1 > 0 && -		  cx2 + ei2 > 0 && -		  cx3 + ei3 > 0) { +   for (i = 0; i < 16; i++) { +      int cx1 = c1 + (tri->inputs.step[0][i] * 4); +      int cx2 = c2 + (tri->inputs.step[1][i] * 4); +      int cx3 = c3 + (tri->inputs.step[2][i] * 4); + +      if (cx1 + eo1 < 0 || +          cx2 + eo2 < 0 || +          cx3 + eo3 < 0) { +         /* the block is completely outside the triangle - nop */ +      } +      else { +         int px = x + pos_table4[i][0]; +         int py = y + pos_table4[i][1]; +         if (cx1 + ei1 > 0 && +             cx2 + ei2 > 0 && +             cx3 + ei3 > 0) {              /* the block is completely inside the triangle */ -	    block_full_4(rast_task, x+ix, y+iy); -	 } -	 else { +            block_full_4(rast_task, tri, px, py); +         } +         else {              /* the block is partially in/out of the triangle */ -	    do_block_4(rast_task, tri, x+ix, y+iy, cx1, cx2, cx3); -	 } +            do_block_4(rast_task, tri, px, py, cx1, cx2, cx3); +         }        }     }  } @@ -171,8 +204,7 @@ lp_rast_triangle( struct lp_rasterizer *rast,     int x = rast_task->x;     int y = rast_task->y; -   int ix, iy; -   unsigned i = 0; +   unsigned i;     int c1 = tri->c1 + tri->dx12 * y - tri->dy12 * x;     int c2 = tri->c2 + tri->dx23 * y - tri->dy23 * x; @@ -186,48 +218,36 @@ lp_rast_triangle( struct lp_rasterizer *rast,     int eo2 = tri->eo2 * 16;     int eo3 = tri->eo3 * 16; -   assert(Elements(rast_task->blocks) == (TILE_SIZE * TILE_SIZE) / (4*4)); -     LP_DBG(DEBUG_RAST, "lp_rast_triangle\n"); -   rast_task->nr_blocks = 0; -     /* Walk over the tile to build a list of 4x4 pixel blocks which will      * be filled/shaded.  We do this at two granularities: 16x16 blocks      * and then 4x4 blocks.      */ -   for (iy = 0; iy < TILE_SIZE; iy += 16) { -      for (ix = 0; ix < TILE_SIZE; ix += 16, i++) { -	 int cx1 = c1 + (tri->step[0][i] * 16); -	 int cx2 = c2 + (tri->step[1][i] * 16); -	 int cx3 = c3 + (tri->step[2][i] * 16); -	  -	 if (cx1 + eo1 < 0 || -	     cx2 + eo2 < 0 || -	     cx3 + eo3 < 0) { -            /* the block is completely outside the triangle - nop */ -	 } -	 else if (cx1 + ei1 > 0 && -		  cx2 + ei2 > 0 && -		  cx3 + ei3 > 0) { +   for (i = 0; i < 16; i++) { +      int cx1 = c1 + (tri->inputs.step[0][i] * 16); +      int cx2 = c2 + (tri->inputs.step[1][i] * 16); +      int cx3 = c3 + (tri->inputs.step[2][i] * 16); + +      if (cx1 + eo1 < 0 || +          cx2 + eo2 < 0 || +          cx3 + eo3 < 0) { +         /* the block is completely outside the triangle - nop */ +      } +      else { +         int px = x + pos_table16[i][0]; +         int py = y + pos_table16[i][1]; + +         if (cx1 + ei1 > 0 && +             cx2 + ei2 > 0 && +             cx3 + ei3 > 0) {              /* the block is completely inside the triangle */ -	    block_full_16(rast_task, x+ix, y+iy); -	 } -	 else { +            block_full_16(rast_task, tri, px, py); +         } +         else {              /* the block is partially in/out of the triangle */ -	    do_block_16(rast_task, tri, x+ix, y+iy, cx1, cx2, cx3); -	 } +            do_block_16(rast_task, tri, px, py, cx1, cx2, cx3); +         }        }     } - -   assert(rast_task->nr_blocks <= Elements(rast_task->blocks)); - -   /* Shade the 4x4 pixel blocks */ -   for (i = 0; i < rast_task->nr_blocks; i++)  -      lp_rast_shade_quads(rast, -                          thread_index, -                          &tri->inputs,  -			  rast_task->blocks[i].x, -			  rast_task->blocks[i].y, -			  rast_task->blocks[i].mask);  } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index aeaf260af2..e15b987767 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -265,7 +265,7 @@ do_triangle_ccw(struct setup_context *setup,     const int y3 = subpixel_snap(v3[0][1]);     struct lp_scene *scene = lp_setup_get_current_scene(setup); -   struct lp_rast_triangle *tri = lp_scene_alloc( scene, sizeof *tri ); +   struct lp_rast_triangle *tri = lp_scene_alloc_aligned( scene, sizeof *tri, 16 );     float area, oneoverarea;     int minx, maxx, miny, maxy; @@ -354,38 +354,29 @@ do_triangle_ccw(struct setup_context *setup,     tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3;     { -      int xstep1 = -tri->dy12; -      int xstep2 = -tri->dy23; -      int xstep3 = -tri->dy31; +      const int xstep1 = -tri->dy12; +      const int xstep2 = -tri->dy23; +      const int xstep3 = -tri->dy31; -      int ystep1 = tri->dx12; -      int ystep2 = tri->dx23; -      int ystep3 = tri->dx31; +      const int ystep1 = tri->dx12; +      const int ystep2 = tri->dx23; +      const int ystep3 = tri->dx31; -      int ix, iy; +      int qx, qy, ix, iy;        int i = 0; -      int c1 = 0; -      int c2 = 0; -      int c3 = 0; -       -      for (iy = 0; iy < 4; iy++) { -	 int cx1 = c1; -	 int cx2 = c2; -	 int cx3 = c3; - -	 for (ix = 0; ix < 4; ix++, i++) { -	    tri->step[0][i] = cx1; -	    tri->step[1][i] = cx2; -	    tri->step[2][i] = cx3; -	    cx1 += xstep1; -	    cx2 += xstep2; -	    cx3 += xstep3; -	 } - -	 c1 += ystep1; -	 c2 += ystep2; -	 c3 += ystep3; +      for (qy = 0; qy < 2; qy++) { +         for (qx = 0; qx < 2; qx++) { +            for (iy = 0; iy < 2; iy++) { +               for (ix = 0; ix < 2; ix++, i++) { +                  int x = qx * 2 + ix; +                  int y = qy * 2 + iy; +                  tri->inputs.step[0][i] = x * xstep1 + y * ystep1; +                  tri->inputs.step[1][i] = x * xstep2 + y * ystep2; +                  tri->inputs.step[2][i] = x * xstep3 + y * ystep3; +               } +            } +         }        }     } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index c0d5a70a55..4af37e365e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -176,7 +176,92 @@ generate_depth(LLVMBuilderRef builder,  /** + * Generate the code to do inside/outside triangle testing for the + * four pixels in a 2x2 quad.  This will set the four elements of the + * quad mask vector to 0 or ~0. + * \param i  which quad of the quad group to test, in [0,3] + */ +static void +generate_tri_edge_mask(LLVMBuilderRef builder, +                       unsigned i, +                       LLVMValueRef *mask,      /* ivec4, out */ +                       LLVMValueRef c0,         /* int32 */ +                       LLVMValueRef c1,         /* int32 */ +                       LLVMValueRef c2,         /* int32 */ +                       LLVMValueRef step0_ptr,  /* ivec4 */ +                       LLVMValueRef step1_ptr,  /* ivec4 */ +                       LLVMValueRef step2_ptr)  /* ivec4 */ +{ +   /* +     c0_vec = splat(c0) +     c1_vec = splat(c1) +     c2_vec = splat(c2) +     s0_vec = c0_vec + step0_ptr[i] +     s1_vec = c1_vec + step1_ptr[i] +     s2_vec = c2_vec + step2_ptr[i] +     m0_vec = s0_vec > {0,0,0,0} +     m1_vec = s1_vec > {0,0,0,0} +     m2_vec = s2_vec > {0,0,0,0} +     mask = m0_vec & m1_vec & m2_vec +    */ +   struct lp_type i32_type; +   LLVMTypeRef i32vec4_type; + +   LLVMValueRef index; +   LLVMValueRef c0_vec, c1_vec, c2_vec; +   LLVMValueRef step0_vec, step1_vec, step2_vec; +   LLVMValueRef m0_vec, m1_vec, m2_vec; +   LLVMValueRef s0_vec, s1_vec, s2_vec; +   LLVMValueRef m; + +   LLVMValueRef zeros; + +   assert(i < 4); +    +   /* int32 vector type */ +   memset(&i32_type, 0, sizeof i32_type); +   i32_type.floating = FALSE; /* values are integers */ +   i32_type.sign = TRUE;      /* values are signed */ +   i32_type.norm = FALSE;     /* values are not normalized */ +   i32_type.width = 32;       /* 32-bit int values */ +   i32_type.length = 4;       /* 4 elements per vector */ + +   i32vec4_type = lp_build_int32_vec4_type(); + +   /* int32_vec4 zero = {0,0,0,0} */ +   zeros = LLVMConstNull(i32vec4_type); + +   c0_vec = lp_build_broadcast(builder, i32vec4_type, c0); +   c1_vec = lp_build_broadcast(builder, i32vec4_type, c1); +   c2_vec = lp_build_broadcast(builder, i32vec4_type, c2); + +   index = LLVMConstInt(LLVMInt32Type(), i, 0); +   step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), ""); +   step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), ""); +   step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), ""); + +   /** XXX with a little work, we could remove the add here and just +    * compare c0_vec > step0_vec. +    */ +   s0_vec = LLVMBuildAdd(builder, c0_vec, step0_vec, ""); +   s1_vec = LLVMBuildAdd(builder, c1_vec, step1_vec, ""); +   s2_vec = LLVMBuildAdd(builder, c2_vec, step2_vec, ""); +   m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, s0_vec, zeros); +   m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, s1_vec, zeros); +   m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, s2_vec, zeros); + +   m = LLVMBuildAnd(builder, m0_vec, m1_vec, ""); +   m = LLVMBuildAnd(builder, m, m2_vec, ""); + +   lp_build_name(m, "m"); + +   *mask = m; +} + + +/**   * Generate the fragment shader, depth/stencil test, and alpha tests. + * \param i  which quad in the tile, in range [0,3]   */  static void  generate_fs(struct llvmpipe_context *lp, @@ -190,7 +275,13 @@ generate_fs(struct llvmpipe_context *lp,              struct lp_build_sampler_soa *sampler,              LLVMValueRef *pmask,              LLVMValueRef *color, -            LLVMValueRef depth_ptr) +            LLVMValueRef depth_ptr, +            LLVMValueRef c0, +            LLVMValueRef c1, +            LLVMValueRef c2, +            LLVMValueRef step0_ptr, +            LLVMValueRef step1_ptr, +            LLVMValueRef step2_ptr)  {     const struct tgsi_token *tokens = shader->base.tokens;     LLVMTypeRef elem_type; @@ -205,6 +296,8 @@ generate_fs(struct llvmpipe_context *lp,     unsigned attrib;     unsigned chan; +   assert(i < 4); +     elem_type = lp_build_elem_type(type);     vec_type = lp_build_vec_type(type);     int_vec_type = lp_build_int_vec_type(type); @@ -224,8 +317,13 @@ generate_fs(struct llvmpipe_context *lp,     }     lp_build_flow_scope_declare(flow, &z); +   /* do triangle edge testing */ +   generate_tri_edge_mask(builder, i, pmask, +                          c0, c1, c2, step0_ptr, step1_ptr, step2_ptr); +     lp_build_mask_begin(&mask, flow, type, *pmask); +     early_depth_test =        key->depth.enabled &&        !key->alpha.enabled && @@ -376,17 +474,18 @@ generate_fragment(struct llvmpipe_context *lp,     LLVMTypeRef fs_int_vec_type;     LLVMTypeRef blend_vec_type;     LLVMTypeRef blend_int_vec_type; -   LLVMTypeRef arg_types[9]; +   LLVMTypeRef arg_types[14];     LLVMTypeRef func_type; +   LLVMTypeRef int32_vec4_type = lp_build_int32_vec4_type();     LLVMValueRef context_ptr;     LLVMValueRef x;     LLVMValueRef y;     LLVMValueRef a0_ptr;     LLVMValueRef dadx_ptr;     LLVMValueRef dady_ptr; -   LLVMValueRef mask_ptr;     LLVMValueRef color_ptr;     LLVMValueRef depth_ptr; +   LLVMValueRef c0, c1, c2, step0_ptr, step1_ptr, step2_ptr;     LLVMBasicBlockRef block;     LLVMBuilderRef builder;     LLVMValueRef x0; @@ -468,9 +567,17 @@ generate_fragment(struct llvmpipe_context *lp,     arg_types[3] = LLVMPointerType(fs_elem_type, 0);    /* a0 */     arg_types[4] = LLVMPointerType(fs_elem_type, 0);    /* dadx */     arg_types[5] = LLVMPointerType(fs_elem_type, 0);    /* dady */ -   arg_types[6] = LLVMPointerType(fs_int_vec_type, 0); /* mask */ -   arg_types[7] = LLVMPointerType(blend_vec_type, 0);  /* color */ -   arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ +   arg_types[6] = LLVMPointerType(blend_vec_type, 0);  /* color */ +   arg_types[7] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ +   arg_types[8] = LLVMInt32Type();                     /* c0 */ +   arg_types[9] = LLVMInt32Type();                    /* c1 */ +   arg_types[10] = LLVMInt32Type();                    /* c2 */ +   /* Note: the step arrays are built as int32[16] but we interpret +    * them here as int32_vec4[4]. +    */ +   arg_types[11] = LLVMPointerType(int32_vec4_type, 0);/* step0 */ +   arg_types[12] = LLVMPointerType(int32_vec4_type, 0);/* step1 */ +   arg_types[13] = LLVMPointerType(int32_vec4_type, 0);/* step2 */     func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); @@ -486,9 +593,14 @@ generate_fragment(struct llvmpipe_context *lp,     a0_ptr       = LLVMGetParam(variant->function, 3);     dadx_ptr     = LLVMGetParam(variant->function, 4);     dady_ptr     = LLVMGetParam(variant->function, 5); -   mask_ptr     = LLVMGetParam(variant->function, 6); -   color_ptr    = LLVMGetParam(variant->function, 7); -   depth_ptr    = LLVMGetParam(variant->function, 8); +   color_ptr    = LLVMGetParam(variant->function, 6); +   depth_ptr    = LLVMGetParam(variant->function, 7); +   c0           = LLVMGetParam(variant->function, 8); +   c1           = LLVMGetParam(variant->function, 9); +   c2           = LLVMGetParam(variant->function, 10); +   step0_ptr    = LLVMGetParam(variant->function, 11); +   step1_ptr    = LLVMGetParam(variant->function, 12); +   step2_ptr    = LLVMGetParam(variant->function, 13);     lp_build_name(context_ptr, "context");     lp_build_name(x, "x"); @@ -496,9 +608,14 @@ generate_fragment(struct llvmpipe_context *lp,     lp_build_name(a0_ptr, "a0");     lp_build_name(dadx_ptr, "dadx");     lp_build_name(dady_ptr, "dady"); -   lp_build_name(mask_ptr, "mask");     lp_build_name(color_ptr, "color");     lp_build_name(depth_ptr, "depth"); +   lp_build_name(c0, "c0"); +   lp_build_name(c1, "c1"); +   lp_build_name(c2, "c2"); +   lp_build_name(step0_ptr, "step0"); +   lp_build_name(step1_ptr, "step1"); +   lp_build_name(step2_ptr, "step2");     /*      * Function body @@ -526,7 +643,6 @@ generate_fragment(struct llvmpipe_context *lp,        if(i != 0)           lp_build_interp_soa_update(&interp, i); -      fs_mask[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, mask_ptr, &index, 1, ""), "");        depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, "");        generate_fs(lp, shader, key, @@ -536,9 +652,11 @@ generate_fragment(struct llvmpipe_context *lp,                    i,                    &interp,                    sampler, -                  &fs_mask[i], +                  &fs_mask[i], /* output */                    out_color, -                  depth_ptr_i); +                  depth_ptr_i, +                  c0, c1, c2, +                  step0_ptr, step1_ptr, step2_ptr);        for(chan = 0; chan < NUM_CHANNELS; ++chan)           fs_out_color[chan][i] = out_color[chan]; | 
