llvmpipe: import experimental softpipe rasterizer code, wip binning code

WIP, does't build or run. Rasterizer code is based on Nick Capen's devmaster posts and the larrabee articles, but currently doesn't share either the performance or correctness of either...
author: Keith Whitwell <keithw@vmware.com> 2009-10-07 22:36:43 +0100
committer: Keith Whitwell <keithw@vmware.com> 2009-10-08 08:32:43 +0100
commit: 89498d01531cd515c769e570bf799c39fbafc8fb (patch)
tree: 8f69ed86cfe3eb4446ab5466a936d5a6dd3977d0 /src/gallium/drivers/llvmpipe/lp_setup_tri.c
parent: 0083d2e40a8b0aa9ea36f98d4b6b7981d5dca0e3 (diff)
1 files changed, 755 insertions, 0 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
new file mode 100644
index 0000000000..a09e0fa643
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -0,0 +1,755 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Recursive rasterization for triangles
+ */
+
+#include "lp_context.h"
+#include "lp_quad.h"
+#include "lp_quad_pipe.h"
+#include "lp_setup.h"
+#include "lp_state.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_vertex.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_thread.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#define BLOCKSIZE 4
+
+struct triangle {
+   /* one-pixel sized trivial accept offsets for each plane */
+   float ei1;                   
+   float ei2;
+   float ei3;
+
+   /* one-pixel sized trivial reject offsets for each plane */
+   float eo1;                   
+   float eo2;
+   float eo3;
+
+   /* y deltas for vertex pairs */
+   float dy12;
+   float dy23;
+   float dy31;
+
+   /* x deltas for vertex pairs */
+   float dx12;
+   float dx23;
+   float dx31;
+
+   /* Attribute interpolation:
+    */
+   float oneoverarea;
+   float x1;
+   float y1;
+   struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS];
+   struct tgsi_interp_coef position_coef;
+
+   /* A run of pre-initialized quads:
+    */
+   struct llvmpipe_context *llvmpipe;
+   struct quad_header quad[4];
+};
+
+
+/**
+ * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
+ */
+static void constant_coef( struct tgsi_interp_coef *coef,
+			   const float (*v3)[4],
+			   unsigned vert_attr,
+			   unsigned i )
+{
+   coef->a0[i] = v3[vert_attr][i];
+   coef->dadx[i] = 0;
+   coef->dady[i] = 0;
+}
+
+/**
+ * Compute a0, dadx and dady for a linearly interpolated coefficient,
+ * for a triangle.
+ */
+static void linear_coef( struct triangle *tri,
+			 struct tgsi_interp_coef *coef,
+			 const float (*v1)[4],
+			 const float (*v2)[4],
+			 const float (*v3)[4],
+			 unsigned vert_attr,
+			 unsigned i)
+{
+   float a1 = v1[vert_attr][i];
+   float a2 = v2[vert_attr][i];
+   float a3 = v3[vert_attr][i];
+
+   float da12 = a1 - a2;
+   float da31 = a3 - a1;
+   float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea;
+   float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea;
+
+   coef->dadx[i] = dadx;
+   coef->dady[i] = dady;
+
+   /* calculate a0 as the value which would be sampled for the
+    * fragment at (0,0), taking into account that we want to sample at
+    * pixel centers, in other words (0.5, 0.5).
+    *
+    * this is neat but unfortunately not a good way to do things for
+    * triangles with very large values of dadx or dady as it will
+    * result in the subtraction and re-addition from a0 of a very
+    * large number, which means we'll end up loosing a lot of the
+    * fractional bits and precision from a0.  the way to fix this is
+    * to define a0 as the sample at a pixel center somewhere near vmin
+    * instead - i'll switch to this later.
+    */
+   coef->a0[i] = (v1[vert_attr][i] -
+                  (dadx * (v1[0][0] - 0.5f) +
+                   dady * (v1[0][1] - 0.5f)));
+}
+
+
+/**
+ * Compute a0, dadx and dady for a perspective-corrected interpolant,
+ * for a triangle.
+ * We basically multiply the vertex value by 1/w before computing
+ * the plane coefficients (a0, dadx, dady).
+ * Later, when we compute the value at a particular fragment position we'll
+ * divide the interpolated value by the interpolated W at that fragment.
+ */
+static void perspective_coef( struct triangle *tri,
+			      struct tgsi_interp_coef *coef,
+			      const float (*v1)[4],
+			      const float (*v2)[4],
+			      const float (*v3)[4],
+			      unsigned vert_attr,
+			      unsigned i)
+{
+   /* premultiply by 1/w  (v[0][3] is always 1/w):
+    */
+   float a1 = v1[vert_attr][i] * v1[0][3];
+   float a2 = v2[vert_attr][i] * v2[0][3];
+   float a3 = v3[vert_attr][i] * v3[0][3];
+   float da12 = a1 - a2;
+   float da31 = a3 - a1;
+   float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea;
+   float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea;
+
+
+   coef->dadx[i] = dadx;
+   coef->dady[i] = dady;
+   coef->a0[i] = (a1 -
+                  (dadx * (v1[0][0] - 0.5f) +
+                   dady * (v1[0][1] - 0.5f)));
+}
+
+
+/**
+ * Special coefficient setup for gl_FragCoord.
+ * X and Y are trivial, though Y has to be inverted for OpenGL.
+ * Z and W are copied from position_coef which should have already been computed.
+ * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
+ */
+static void
+setup_fragcoord_coef(struct triangle *tri, unsigned slot)
+{
+   /*X*/
+   tri->coef[slot].a0[0] = 0.0;
+   tri->coef[slot].dadx[0] = 1.0;
+   tri->coef[slot].dady[0] = 0.0;
+   /*Y*/
+   tri->coef[slot].a0[1] = 0.0;
+   tri->coef[slot].dadx[1] = 0.0;
+   tri->coef[slot].dady[1] = 1.0;
+   /*Z*/
+   tri->coef[slot].a0[2] = tri->position_coef.a0[2];
+   tri->coef[slot].dadx[2] = tri->position_coef.dadx[2];
+   tri->coef[slot].dady[2] = tri->position_coef.dady[2];
+   /*W*/
+   tri->coef[slot].a0[3] = tri->position_coef.a0[3];
+   tri->coef[slot].dadx[3] = tri->position_coef.dadx[3];
+   tri->coef[slot].dady[3] = tri->position_coef.dady[3];
+}
+
+
+
+/**
+ * Compute the tri->coef[] array dadx, dady, a0 values.
+ */
+static void setup_tri_coefficients( struct llvmpipe_context *llvmpipe,
+				    struct triangle *tri,
+				    const float (*v1)[4],
+				    const float (*v2)[4],
+				    const float (*v3)[4],
+				    boolean frontface )
+{
+   const struct lp_fragment_shader *fs = llvmpipe->fs;
+   const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe);
+   unsigned input;
+
+   /* z and w are done by linear interpolation:
+    */
+   linear_coef(tri, &tri->position_coef, v1, v2, v3, 0, 2);
+   linear_coef(tri, &tri->position_coef, v1, v2, v3, 0, 3);
+
+   /* setup interpolation for all the remaining attributes:
+    */
+   for (input = 0; input < fs->info.num_inputs; input++) {
+      unsigned vert_attr = vinfo->attrib[input].src_index;
+      unsigned i;
+
+      switch (vinfo->attrib[input].interp_mode) {
+      case INTERP_CONSTANT:
+         for (i = 0; i < NUM_CHANNELS; i++)
+            constant_coef(&tri->coef[input], v3, vert_attr, i);
+         break;
+
+      case INTERP_LINEAR:
+         for (i = 0; i < NUM_CHANNELS; i++)
+            linear_coef(tri, &tri->coef[input], v1, v2, v3, vert_attr, i);
+         break;
+
+      case INTERP_PERSPECTIVE:
+         for (i = 0; i < NUM_CHANNELS; i++)
+            perspective_coef(tri, &tri->coef[input], v1, v2, v3, vert_attr, i);
+         break;
+
+      case INTERP_POS:
+         setup_fragcoord_coef(tri, input);
+         break;
+
+      default:
+         assert(0);
+      }
+
+      if (fs->info.input_semantic_name[input] == TGSI_SEMANTIC_FACE) {
+         tri->coef[input].a0[0] = 1.0f - frontface;
+         tri->coef[input].dadx[0] = 0.0;
+         tri->coef[input].dady[0] = 0.0;
+      }
+   }
+}
+
+
+
+/* XXX: do this by add/subtracting a large floating point number:
+ */
+static inline float subpixel_snap( float a )
+{
+   int i = a * 16;
+   return (float)i * (1.0/16);
+}
+
+
+/* Convert 8x8 block into four runs of quads and render each in turn.
+ */
+#if (BLOCKSIZE == 8)
+static void block_full( struct triangle *tri, int x, int y )
+{
+   struct quad_header *ptrs[4];
+   int i;
+
+   tri->quad[0].input.x0 = x + 0;
+   tri->quad[1].input.x0 = x + 2;
+   tri->quad[2].input.x0 = x + 4;
+   tri->quad[3].input.x0 = x + 6;
+
+   for (i = 0; i < 4; i++, y += 2) {
+      tri->quad[0].inout.mask = 0xf;
+      tri->quad[1].inout.mask = 0xf;
+      tri->quad[2].inout.mask = 0xf;
+      tri->quad[3].inout.mask = 0xf;
+
+      tri->quad[0].input.y0 = y;
+      tri->quad[1].input.y0 = y;
+      tri->quad[2].input.y0 = y;
+      tri->quad[3].input.y0 = y;
+
+      /* XXX: don't bother with this ptrs business */
+      ptrs[0] = &tri->quad[0];
+      ptrs[1] = &tri->quad[1];
+      ptrs[2] = &tri->quad[2];
+      ptrs[3] = &tri->quad[3];
+
+      tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 4 );
+   }
+}
+#elif (BLOCKSIZE == 4)
+static void block_full( struct triangle *tri, int x, int y )
+{
+   struct quad_header *ptrs[4];
+   int iy;
+
+   tri->quad[0].input.x0 = x + 0;
+   tri->quad[1].input.x0 = x + 2;
+
+   for (iy = 0; iy < 4; iy += 2) {
+      tri->quad[0].inout.mask = 0xf;
+      tri->quad[1].inout.mask = 0xf;
+
+      tri->quad[0].input.y0 = y + iy;
+      tri->quad[1].input.y0 = y + iy;
+
+      /* XXX: don't bother with this ptrs business */
+      ptrs[0] = &tri->quad[0];
+      ptrs[1] = &tri->quad[1];
+
+      tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 2 );
+   }
+}
+#else
+static void block_full( struct triangle *tri, int x, int y )
+{
+   struct quad_header *ptrs[4];
+   int iy;
+
+   tri->quad[0].input.x0 = x;
+   tri->quad[0].input.y0 = y;
+   tri->quad[0].inout.mask = 0xf;
+
+   ptrs[0] = &tri->quad[0];
+   tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 1 );
+}
+#endif
+
+
+static void
+do_quad( struct triangle *tri,
+	 int x, int y,
+	 float c1, float c2, float c3 )
+{
+   struct quad_header *quad = &tri->quad[0];
+
+   float xstep1 = -tri->dy12;
+   float xstep2 = -tri->dy23;
+   float xstep3 = -tri->dy31;
+
+   float ystep1 = tri->dx12;
+   float ystep2 = tri->dx23;
+   float ystep3 = tri->dx31;
+
+   quad->input.x0 = x;
+   quad->input.y0 = y;
+   quad->inout.mask = 0;
+
+   if (c1 > 0 &&
+       c2 > 0 &&
+       c3 > 0)
+      quad->inout.mask |= 1;
+	 
+   if (c1 + xstep1 > 0 && 
+       c2 + xstep2 > 0 && 
+       c3 + xstep3 > 0)
+      quad->inout.mask |= 2;
+
+   if (c1 + ystep1 > 0 && 
+       c2 + ystep2 > 0 && 
+       c3 + ystep3 > 0)
+      quad->inout.mask |= 4;
+
+   if (c1 + ystep1 + xstep1 > 0 && 
+       c2 + ystep2 + xstep2 > 0 && 
+       c3 + ystep3 + xstep3 > 0)
+      quad->inout.mask |= 8;
+
+   if (quad->inout.mask)
+      tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, &quad, 1 );
+}
+
+/* Evaluate each pixel in a block, generate a mask and possibly render
+ * the quad:
+ */
+static void
+do_block( struct triangle *tri,
+	 int x, int y,
+	 float c1,
+	 float c2,
+	 float c3 )
+{
+   const int step = 2;
+
+   float xstep1 = -step * tri->dy12;
+   float xstep2 = -step * tri->dy23;
+   float xstep3 = -step * tri->dy31;
+
+   float ystep1 = step * tri->dx12;
+   float ystep2 = step * tri->dx23;
+   float ystep3 = step * tri->dx31;
+
+   int ix, iy;
+
+   for (iy = 0; iy < BLOCKSIZE; iy += 2) {
+      float cx1 = c1;
+      float cx2 = c2;
+      float cx3 = c3;
+
+      for (ix = 0; ix < BLOCKSIZE; ix += 2) {
+
+	 do_quad(tri, x+ix, y+iy, cx1, cx2, cx3);
+
+	 cx1 += xstep1;
+	 cx2 += xstep2;
+	 cx3 += xstep3;
+      }
+
+      c1 += ystep1;
+      c2 += ystep2;
+      c3 += ystep3;
+   }
+}
+
+
+
+
+/* to avoid having to allocate power-of-four, square render targets,
+ * end up having a specialized version of the above that runs only at
+ * the topmost level.
+ *
+ * at the topmost level there may be an arbitary number of steps on
+ * either dimension, so this loop needs to be either separately
+ * code-generated and unrolled for each render target size, or kept as
+ * generic looping code:
+ */
+
+#define MIN3(a,b,c) MIN2(MIN2(a,b),c)
+#define MAX3(a,b,c) MAX2(MAX2(a,b),c)
+
+static void 
+do_triangle_ccw(struct llvmpipe_context *llvmpipe,
+		const float (*v1)[4],
+		const float (*v2)[4],
+		const float (*v3)[4],
+		boolean frontfacing )
+{
+   const int rt_width = llvmpipe->framebuffer.cbufs[0]->width;
+   const int rt_height = llvmpipe->framebuffer.cbufs[0]->height;
+
+   const float y1 = subpixel_snap(v1[0][1]);
+   const float y2 = subpixel_snap(v2[0][1]);
+   const float y3 = subpixel_snap(v3[0][1]);
+
+   const float x1 = subpixel_snap(v1[0][0]);
+   const float x2 = subpixel_snap(v2[0][0]);
+   const float x3 = subpixel_snap(v3[0][0]);
+   
+   struct triangle tri;
+   float area;
+   float c1, c2, c3;
+   int i;
+   int minx, maxx, miny, maxy;
+
+   tri.llvmpipe = llvmpipe;
+
+
+   tri.dx12 = x1 - x2;
+   tri.dx23 = x2 - x3;
+   tri.dx31 = x3 - x1;
+
+   tri.dy12 = y1 - y2;
+   tri.dy23 = y2 - y3;
+   tri.dy31 = y3 - y1;
+
+   area = (tri.dx12 * tri.dy31 - 
+	   tri.dx31 * tri.dy12);
+
+   /* Cull non-ccw and zero-sized triangles.
+    */
+   if (area <= 0 || util_is_inf_or_nan(area))
+      return;
+
+   // Bounding rectangle
+   minx = util_iround(MIN3(x1, x2, x3) - .5);
+   maxx = util_iround(MAX3(x1, x2, x3) + .5);
+   miny = util_iround(MIN3(y1, y2, y3) - .5);
+   maxy = util_iround(MAX3(y1, y2, y3) + .5);
+   
+   /* Clamp to framebuffer (or tile) dimensions:
+    */
+   miny = MAX2(0, miny);
+   minx = MAX2(0, minx);
+   maxy = MIN2(rt_height, maxy);
+   maxx = MIN2(rt_width, maxx);
+
+   if (miny == maxy || minx == maxx)
+      return;
+
+   /* The only divide in this code.  Is it really needed?
+    */
+   tri.oneoverarea = 1.0f / area;
+
+   /* Setup parameter interpolants:
+    */
+   setup_tri_coefficients( llvmpipe, &tri, v1, v2, v3, frontfacing );
+
+   for (i = 0; i < Elements(tri.quad); i++) {
+      tri.quad[i].coef = tri.coef;
+      tri.quad[i].posCoef = &tri.position_coef;
+   }
+
+   /* half-edge constants, will be interated over the whole
+    * rendertarget.
+    */
+   c1 = tri.dy12 * x1 - tri.dx12 * y1;
+   c2 = tri.dy23 * x2 - tri.dx23 * y2;
+   c3 = tri.dy31 * x3 - tri.dx31 * y3;
+
+   /* correct for top-left fill convention:
+    */
+   if (tri.dy12 < 0 || (tri.dy12 == 0 && tri.dx12 > 0)) c1++;
+   if (tri.dy23 < 0 || (tri.dy23 == 0 && tri.dx23 > 0)) c2++;
+   if (tri.dy31 < 0 || (tri.dy31 == 0 && tri.dx31 > 0)) c3++;
+
+   /* find trivial reject offsets for each edge for a single-pixel
+    * sized block.  These will be scaled up at each recursive level to
+    * match the active blocksize.  Scaling in this way works best if
+    * the blocks are square.
+    */
+   tri.eo1 = 0;
+   if (tri.dy12 < 0) tri.eo1 -= tri.dy12;
+   if (tri.dx12 > 0) tri.eo1 += tri.dx12;
+
+   tri.eo2 = 0;
+   if (tri.dy23 < 0) tri.eo2 -= tri.dy23;
+   if (tri.dx23 > 0) tri.eo2 += tri.dx23;
+
+   tri.eo3 = 0;
+   if (tri.dy31 < 0) tri.eo3 -= tri.dy31;
+   if (tri.dx31 > 0) tri.eo3 += tri.dx31;
+
+   /* Calculate trivial accept offsets from the above.
+    */
+   tri.ei1 = tri.dx12 - tri.dy12 - tri.eo1;
+   tri.ei2 = tri.dx23 - tri.dy23 - tri.eo2;
+   tri.ei3 = tri.dx31 - tri.dy31 - tri.eo3;
+
+   minx &= ~(BLOCKSIZE-1);		/* aligned blocks */
+   miny &= ~(BLOCKSIZE-1);		/* aligned blocks */
+
+   c1 += tri.dx12 * miny - tri.dy12 * minx;
+   c2 += tri.dx23 * miny - tri.dy23 * minx;
+   c3 += tri.dx31 * miny - tri.dy31 * minx;
+
+   if ((miny & ~15) == (maxy & ~15) &&
+       (minx & ~15) == (maxx & ~15))
+   {
+      const int step = 2;
+
+      float xstep1 = -step * tri.dy12;
+      float xstep2 = -step * tri.dy23;
+      float xstep3 = -step * tri.dy31;
+
+      float ystep1 = step * tri.dx12;
+      float ystep2 = step * tri.dx23;
+      float ystep3 = step * tri.dx31;
+
+      float eo1 = tri.eo1 * step;
+      float eo2 = tri.eo2 * step;
+      float eo3 = tri.eo3 * step;
+
+      int x, y;
+
+      /* Subdivide space into NxM blocks, where each block is square and
+       * power-of-four in dimension.
+       *
+       * Trivially accept or reject blocks, else jump to per-pixel
+       * examination above.
+       */
+      for (y = miny; y < maxy; y += step)
+      {
+	 float cx1 = c1;
+	 float cx2 = c2;
+	 float cx3 = c3;
+
+	 for (x = minx; x < maxx; x += step)
+	 {
+	    if (cx1 + eo1 < 0 || 
+		cx2 + eo2 < 0 ||
+		cx3 + eo3 < 0) 
+	    {
+	    }
+	    else 
+	    {
+	       do_quad(&tri, x, y, cx1, cx2, cx3);
+	    }
+
+	    /* Iterate cx values across the region:
+	     */
+	    cx1 += xstep1;
+	    cx2 += xstep2;
+	    cx3 += xstep3;
+	 }
+      
+	 /* Iterate c values down the region:
+	  */
+	 c1 += ystep1;
+	 c2 += ystep2;
+	 c3 += ystep3;    
+      }
+   }
+   else 
+   {
+      const int step = BLOCKSIZE;
+
+      float ei1 = tri.ei1 * step;
+      float ei2 = tri.ei2 * step;
+      float ei3 = tri.ei3 * step;
+
+      float eo1 = tri.eo1 * step;
+      float eo2 = tri.eo2 * step;
+      float eo3 = tri.eo3 * step;
+
+      float xstep1 = -step * tri.dy12;
+      float xstep2 = -step * tri.dy23;
+      float xstep3 = -step * tri.dy31;
+
+      float ystep1 = step * tri.dx12;
+      float ystep2 = step * tri.dx23;
+      float ystep3 = step * tri.dx31;
+      int x, y;
+
+
+      /* Subdivide space into NxM blocks, where each block is square and
+       * power-of-four in dimension.
+       *
+       * Trivially accept or reject blocks, else jump to per-pixel
+       * examination above.
+       */
+      for (y = miny; y < maxy; y += step)
+      {
+	 float cx1 = c1;
+	 float cx2 = c2;
+	 float cx3 = c3;
+	 boolean in = false;
+
+	 for (x = minx; x < maxx; x += step)
+	 {
+	    if (cx1 + eo1 < 0 || 
+		cx2 + eo2 < 0 ||
+		cx3 + eo3 < 0) 
+	    {
+	       /* do nothing */
+	       if (in)
+		  break;
+	    }
+	    else if (cx1 + ei1 > 0 &&
+		     cx2 + ei2 > 0 &&
+		     cx3 + ei3 > 0) 
+	    {
+	       in = TRUE;
+	       block_full(&tri, x, y); /* trivial accept */
+	    }
+	    else 
+	    {
+	       in = TRUE;
+	       // block_full(&tri, x, y); /* trivial accept */
+	       do_block(&tri, x, y, cx1, cx2, cx3);
+	    }
+
+	    /* Iterate cx values across the region:
+	     */
+	    cx1 += xstep1;
+	    cx2 += xstep2;
+	    cx3 += xstep3;
+	 }
+      
+	 /* Iterate c values down the region:
+	  */
+	 c1 += ystep1;
+	 c2 += ystep2;
+	 c3 += ystep3;    
+      }
+   }
+}
+
+static void triangle_cw( struct llvmpipe_context *llvmpipe,
+			 const float (*v0)[4],
+			 const float (*v1)[4],
+			 const float (*v2)[4] )
+{
+   do_triangle_ccw( llvmpipe, v1, v0, v2, !llvmpipe->ccw_is_frontface );
+}
+
+static void triangle_ccw( struct llvmpipe_context *llvmpipe,
+			 const float (*v0)[4],
+			 const float (*v1)[4],
+			 const float (*v2)[4] )
+{
+   do_triangle_ccw( llvmpipe, v0, v1, v2, llvmpipe->ccw_is_frontface );
+}
+
+static void triangle_both( struct llvmpipe_context *llvmpipe,
+			   const float (*v0)[4],
+			   const float (*v1)[4],
+			   const float (*v2)[4] )
+{
+   /* edge vectors e = v0 - v2, f = v1 - v2 */
+   const float ex = v0[0][0] - v2[0][0];
+   const float ey = v0[0][1] - v2[0][1];
+   const float fx = v1[0][0] - v2[0][0];
+   const float fy = v1[0][1] - v2[0][1];
+
+   /* det = cross(e,f).z */
+   if (ex * fy - ey * fx < 0) 
+      triangle_ccw( llvmpipe, v0, v1, v2 );
+   else
+      triangle_cw( llvmpipe, v0, v1, v2 );
+}
+
+static void triangle_nop( struct llvmpipe_context *llvmpipe,
+			  const float (*v0)[4],
+			  const float (*v1)[4],
+			  const float (*v2)[4] )
+{
+}
+
+/**
+ * Do setup for triangle rasterization, then render the triangle.
+ */
+void setup_prepare_tri( struct llvmpipe_context *llvmpipe )
+{
+   llvmpipe->ccw_is_frontface = (llvmpipe->rasterizer->front_winding == 
+				 PIPE_WINDING_CW);
+
+   switch (llvmpipe->rasterizer->cull_mode) {
+   case PIPE_WINDING_NONE:
+      llvmpipe->triangle = triangle_both;
+      break;
+   case PIPE_WINDING_CCW:
+      llvmpipe->triangle = triangle_cw;
+      break;
+   case PIPE_WINDING_CW:
+      llvmpipe->triangle = triangle_ccw;
+      break;
+   default:
+      llvmpipe->triangle = triangle_nop;
+      break;
+   }
+}
+
+
author	Keith Whitwell <keithw@vmware.com>	2009-10-07 22:36:43 +0100
committer	Keith Whitwell <keithw@vmware.com>	2009-10-08 08:32:43 +0100
commit	89498d01531cd515c769e570bf799c39fbafc8fb (patch)
tree	8f69ed86cfe3eb4446ab5466a936d5a6dd3977d0 /src/gallium/drivers/llvmpipe/lp_setup_tri.c
parent	0083d2e40a8b0aa9ea36f98d4b6b7981d5dca0e3 (diff)