diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/gallium/drivers/llvmpipe/SConscript | 2 | ||||
| -rw-r--r-- | src/gallium/drivers/llvmpipe/lp_setup_coef.c | 258 | ||||
| -rw-r--r-- | src/gallium/drivers/llvmpipe/lp_setup_coef.h | 61 | ||||
| -rw-r--r-- | src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c | 208 | ||||
| -rw-r--r-- | src/gallium/drivers/llvmpipe/lp_setup_tri.c | 348 | 
5 files changed, 577 insertions, 300 deletions
| diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 5583fca38e..8d57db72cf 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -63,6 +63,8 @@ llvmpipe = env.ConvenienceLibrary(  		'lp_setup_line.c',  		'lp_setup_point.c',  		'lp_setup_tri.c', +		'lp_setup_coef.c', +		'lp_setup_coef_intrin.c',  		'lp_setup_vbuf.c',  		'lp_state_blend.c',  		'lp_state_clip.c', diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.c b/src/gallium/drivers/llvmpipe/lp_setup_coef.c new file mode 100644 index 0000000000..95e3e8fffe --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_coef.c @@ -0,0 +1,258 @@ +/************************************************************************** + * + * Copyright 2010, VMware. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Binning code for triangles + */ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "lp_perf.h" +#include "lp_setup_context.h" +#include "lp_setup_coef.h" +#include "lp_rast.h" +#include "lp_state_fs.h" + +#if !defined(PIPE_ARCH_SSE) + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + */ +static void constant_coef( struct lp_rast_shader_inputs *inputs, +                           unsigned slot, +			   const float value, +                           unsigned i ) +{ +   inputs->a0[slot][i] = value; +   inputs->dadx[slot][i] = 0.0f; +   inputs->dady[slot][i] = 0.0f; +} + + + +static void linear_coef( struct lp_rast_shader_inputs *inputs, +                         const struct lp_tri_info *info, +                         unsigned slot, +                         unsigned vert_attr, +                         unsigned i) +{ +   float a0 = info->v0[vert_attr][i]; +   float a1 = info->v1[vert_attr][i]; +   float a2 = info->v2[vert_attr][i]; + +   float da01 = a0 - a1; +   float da20 = a2 - a0; +   float dadx = (da01 * info->dy20_ooa - info->dy01_ooa * da20); +   float dady = (da20 * info->dx01_ooa - info->dx20_ooa * da01); + +   inputs->dadx[slot][i] = dadx; +   inputs->dady[slot][i] = dady; + +   /* calculate a0 as the value which would be sampled for the +    * fragment at (0,0), taking into account that we want to sample at +    * pixel centers, in other words (0.5, 0.5). +    * +    * this is neat but unfortunately not a good way to do things for +    * triangles with very large values of dadx or dady as it will +    * result in the subtraction and re-addition from a0 of a very +    * large number, which means we'll end up loosing a lot of the +    * fractional bits and precision from a0.  the way to fix this is +    * to define a0 as the sample at a pixel center somewhere near vmin +    * instead - i'll switch to this later. +    */ +   inputs->a0[slot][i] = a0 - (dadx * info->x0_center + +				   dady * info->y0_center); +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void perspective_coef( struct lp_rast_shader_inputs *inputs, +                              const struct lp_tri_info *info, +                              unsigned slot, +			      unsigned vert_attr, +                              unsigned i) +{ +   /* premultiply by 1/w  (v[0][3] is always 1/w): +    */ +   float a0 = info->v0[vert_attr][i] * info->v0[0][3]; +   float a1 = info->v1[vert_attr][i] * info->v1[0][3]; +   float a2 = info->v2[vert_attr][i] * info->v2[0][3]; +   float da01 = a0 - a1; +   float da20 = a2 - a0; +   float dadx = da01 * info->dy20_ooa - info->dy01_ooa * da20; +   float dady = da20 * info->dx01_ooa - info->dx20_ooa * da01; + +   inputs->dadx[slot][i] = dadx; +   inputs->dady[slot][i] = dady; +   inputs->a0[slot][i] = a0 - (dadx * info->x0_center + +				   dady * info->y0_center); +} + + +/** + * Special coefficient setup for gl_FragCoord. + * X and Y are trivial + * Z and W are copied from position_coef which should have already been computed. + * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. + */ +static void +setup_fragcoord_coef(struct lp_rast_shader_inputs *inputs, +                     const struct lp_tri_info *info, +                     unsigned slot, +                     unsigned usage_mask) +{ +   /*X*/ +   if (usage_mask & TGSI_WRITEMASK_X) { +      inputs->a0[slot][0] = 0.0; +      inputs->dadx[slot][0] = 1.0; +      inputs->dady[slot][0] = 0.0; +   } + +   /*Y*/ +   if (usage_mask & TGSI_WRITEMASK_Y) { +      inputs->a0[slot][1] = 0.0; +      inputs->dadx[slot][1] = 0.0; +      inputs->dady[slot][1] = 1.0; +   } + +   /*Z*/ +   if (usage_mask & TGSI_WRITEMASK_Z) { +      linear_coef(inputs, info, slot, 0, 2); +   } + +   /*W*/ +   if (usage_mask & TGSI_WRITEMASK_W) { +      linear_coef(inputs, info, slot, 0, 3); +   } +} + + +/** + * Setup the fragment input attribute with the front-facing value. + * \param frontface  is the triangle front facing? + */ +static void setup_facing_coef( struct lp_rast_shader_inputs *inputs, +                               unsigned slot, +                               boolean frontface, +                               unsigned usage_mask) +{ +   /* convert TRUE to 1.0 and FALSE to -1.0 */ +   if (usage_mask & TGSI_WRITEMASK_X) +      constant_coef( inputs, slot, 2.0f * frontface - 1.0f, 0 ); + +   if (usage_mask & TGSI_WRITEMASK_Y) +      constant_coef( inputs, slot, 0.0f, 1 ); /* wasted */ + +   if (usage_mask & TGSI_WRITEMASK_Z) +      constant_coef( inputs, slot, 0.0f, 2 ); /* wasted */ + +   if (usage_mask & TGSI_WRITEMASK_W) +      constant_coef( inputs, slot, 0.0f, 3 ); /* wasted */ +} + + +/** + * Compute the tri->coef[] array dadx, dady, a0 values. + */ +void lp_setup_tri_coef( struct lp_setup_context *setup, +			struct lp_rast_shader_inputs *inputs, +			const struct lp_tri_info *info) +{ +   unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; +   unsigned slot; +   unsigned i; + +   /* setup interpolation for all the remaining attributes: +    */ +   for (slot = 0; slot < setup->fs.nr_inputs; slot++) { +      unsigned vert_attr = setup->fs.input[slot].src_index; +      unsigned usage_mask = setup->fs.input[slot].usage_mask; + +      switch (setup->fs.input[slot].interp) { +      case LP_INTERP_CONSTANT: +         if (setup->flatshade_first) { +            for (i = 0; i < NUM_CHANNELS; i++) +               if (usage_mask & (1 << i)) +                  constant_coef(inputs, slot+1, info->v0[vert_attr][i], i); +         } +         else { +            for (i = 0; i < NUM_CHANNELS; i++) +               if (usage_mask & (1 << i)) +                  constant_coef(inputs, slot+1, info->v2[vert_attr][i], i); +         } +         break; + +      case LP_INTERP_LINEAR: +         for (i = 0; i < NUM_CHANNELS; i++) +            if (usage_mask & (1 << i)) +               linear_coef(inputs, info, slot+1, vert_attr, i); +         break; + +      case LP_INTERP_PERSPECTIVE: +         for (i = 0; i < NUM_CHANNELS; i++) +            if (usage_mask & (1 << i)) +               perspective_coef(inputs, info, slot+1, vert_attr, i); +         fragcoord_usage_mask |= TGSI_WRITEMASK_W; +         break; + +      case LP_INTERP_POSITION: +         /* +          * The generated pixel interpolators will pick up the coeffs from +          * slot 0, so all need to ensure that the usage mask is covers all +          * usages. +          */ +         fragcoord_usage_mask |= usage_mask; +         break; + +      case LP_INTERP_FACING: +         setup_facing_coef(inputs, slot+1, info->frontfacing, usage_mask); +         break; + +      default: +         assert(0); +      } +   } + +   /* The internal position input is in slot zero: +    */ +   setup_fragcoord_coef(inputs, info, 0, fragcoord_usage_mask); +} + +#else +extern void lp_setup_coef_dummy(void); +void lp_setup_coef_dummy(void) +{ +} + +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.h b/src/gallium/drivers/llvmpipe/lp_setup_coef.h new file mode 100644 index 0000000000..d68b39c603 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_coef.h @@ -0,0 +1,61 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * The setup code is concerned with point/line/triangle setup and + * putting commands/data into the bins. + */ + + +#ifndef LP_SETUP_COEF_H +#define LP_SETUP_COEF_H + + +struct lp_tri_info { + +   float x0_center; +   float y0_center; + +   /* turn these into an aligned float[4] */ +   float dy01_ooa; +   float dy20_ooa; +   float dx01_ooa; +   float dx20_ooa; + +   const float (*v0)[4]; +   const float (*v1)[4]; +   const float (*v2)[4]; + +   boolean frontfacing;		/* remove eventually */ +}; + +void lp_setup_tri_coef( struct lp_setup_context *setup, +			struct lp_rast_shader_inputs *inputs, +			const struct lp_tri_info *info); + +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c b/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c new file mode 100644 index 0000000000..b477bc2113 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c @@ -0,0 +1,208 @@ +/************************************************************************** + * + * Copyright 2010 VMware. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Binning code for triangles + */ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "lp_perf.h" +#include "lp_setup_context.h" +#include "lp_setup_coef.h" +#include "lp_rast.h" +#include "lp_state_fs.h" + +#if defined(PIPE_ARCH_SSE) +#include <emmintrin.h> + + +static void constant_coef4( struct lp_rast_shader_inputs *inputs, +			    const struct lp_tri_info *info, +			    unsigned slot, +			    const float *attr) +{ +   *(__m128 *)inputs->a0[slot]   = *(__m128 *)attr; +   *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0); +   *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0); +} + + + +/** + * Setup the fragment input attribute with the front-facing value. + * \param frontface  is the triangle front facing? + */ +static void setup_facing_coef( struct lp_rast_shader_inputs *inputs, +			       const struct lp_tri_info *info, +			       unsigned slot ) +{ +   /* XXX: just pass frontface directly to the shader, don't bother +    * treating it as an input. +    */ +   __m128 a0 = _mm_setr_ps(info->frontfacing ? 1.0 : -1.0, +			   0, 0, 0); + +   *(__m128 *)inputs->a0[slot]   = a0; +   *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0); +   *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0); +} + + + +static void calc_coef4( struct lp_rast_shader_inputs *inputs, +			const struct lp_tri_info *info, +			unsigned slot, +			__m128 a0, +			__m128 a1, +			__m128 a2) +{ +   __m128 da01          = _mm_sub_ps(a0, a1); +   __m128 da20          = _mm_sub_ps(a2, a0); + +   __m128 da01_dy20_ooa = _mm_mul_ps(da01, _mm_set1_ps(info->dy20_ooa)); +   __m128 da20_dy01_ooa = _mm_mul_ps(da20, _mm_set1_ps(info->dy01_ooa));    +   __m128 dadx          = _mm_sub_ps(da01_dy20_ooa, da20_dy01_ooa); + +   __m128 da01_dx20_ooa = _mm_mul_ps(da01, _mm_set1_ps(info->dx20_ooa)); +   __m128 da20_dx01_ooa = _mm_mul_ps(da20, _mm_set1_ps(info->dx01_ooa)); +   __m128 dady          = _mm_sub_ps(da20_dx01_ooa, da01_dx20_ooa); + +   __m128 dadx_x0       = _mm_mul_ps(dadx, _mm_set1_ps(info->x0_center)); +   __m128 dady_y0       = _mm_mul_ps(dady, _mm_set1_ps(info->y0_center)); +   __m128 attr_v0       = _mm_add_ps(dadx_x0, dady_y0); +   __m128 attr_0        = _mm_sub_ps(a0, attr_v0); + +   *(__m128 *)inputs->a0[slot]   = attr_0; +   *(__m128 *)inputs->dadx[slot] = dadx; +   *(__m128 *)inputs->dady[slot] = dady; +} + + +static void linear_coef( struct lp_rast_shader_inputs *inputs, +                         const struct lp_tri_info *info, +                         unsigned slot, +                         unsigned vert_attr) +{ +   __m128 a0 = *(const __m128 *)info->v0[vert_attr]; +   __m128 a1 = *(const __m128 *)info->v1[vert_attr]; +   __m128 a2 = *(const __m128 *)info->v2[vert_attr]; + +   calc_coef4(inputs, info, slot, a0, a1, a2); +} + + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void perspective_coef( struct lp_rast_shader_inputs *inputs, +                              const struct lp_tri_info *info, +                              unsigned slot, +			      unsigned vert_attr) +{ +   /* premultiply by 1/w  (v[0][3] is always 1/w): +    */ +   __m128 a0 = *(const __m128 *)info->v0[vert_attr]; +   __m128 a1 = *(const __m128 *)info->v1[vert_attr]; +   __m128 a2 = *(const __m128 *)info->v2[vert_attr]; + +   __m128 a0_oow = _mm_mul_ps(a0, _mm_set1_ps(info->v0[0][3])); +   __m128 a1_oow = _mm_mul_ps(a1, _mm_set1_ps(info->v1[0][3])); +   __m128 a2_oow = _mm_mul_ps(a2, _mm_set1_ps(info->v2[0][3])); + +   calc_coef4(inputs, info, slot, a0_oow, a1_oow, a2_oow); +} + + + + + +/** + * Compute the inputs-> dadx, dady, a0 values. + */ +void lp_setup_tri_coef( struct lp_setup_context *setup, +			struct lp_rast_shader_inputs *inputs, +			const struct lp_tri_info *info) +{ +   unsigned slot; + +   /* The internal position input is in slot zero: +    */ +   linear_coef(inputs, info, 0, 0); + +   /* setup interpolation for all the remaining attributes: +    */ +   for (slot = 0; slot < setup->fs.nr_inputs; slot++) { +      unsigned vert_attr = setup->fs.input[slot].src_index; + +      switch (setup->fs.input[slot].interp) { +      case LP_INTERP_CONSTANT: +         if (setup->flatshade_first) { +	    constant_coef4(inputs, info, slot+1, info->v0[vert_attr]); +         } +         else { +	    constant_coef4(inputs, info, slot+1, info->v2[vert_attr]); +         } +         break; + +      case LP_INTERP_LINEAR: +	 linear_coef(inputs, info, slot+1, vert_attr); +         break; + +      case LP_INTERP_PERSPECTIVE: +	 perspective_coef(inputs, info, slot+1, vert_attr); +         break; + +      case LP_INTERP_POSITION: +         /* +          * The generated pixel interpolators will pick up the coeffs from +          * slot 0. +          */ +         break; + +      case LP_INTERP_FACING: +         setup_facing_coef(inputs, info, slot+1); +         break; + +      default: +         assert(0); +      } +   } +} + +#else +extern void lp_setup_coef_dummy(void); +void lp_setup_coef_dummy(void) +{ +} +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index fe5c9358dd..d86fb8652a 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -34,33 +34,12 @@  #include "util/u_rect.h"  #include "lp_perf.h"  #include "lp_setup_context.h" +#include "lp_setup_coef.h"  #include "lp_rast.h"  #include "lp_state_fs.h"  #define NUM_CHANNELS 4 -struct tri_info { - -   float pixel_offset; - -   /* fixed point vertex coordinates */ -   int x[3]; -   int y[3]; - -   /* float x,y deltas - all from the original coordinates -    */ -   float dy01, dy20; -   float dx01, dx20; -   float oneoverarea; - -   const float (*v0)[4]; -   const float (*v1)[4]; -   const float (*v2)[4]; - -   boolean frontfacing; -}; - -  static INLINE int @@ -77,247 +56,6 @@ fixed_to_float(int a) -/** - * Compute a0 for a constant-valued coefficient (GL_FLAT shading). - */ -static void constant_coef( struct lp_rast_triangle *tri, -                           unsigned slot, -			   const float value, -                           unsigned i ) -{ -   tri->inputs.a0[slot][i] = value; -   tri->inputs.dadx[slot][i] = 0.0f; -   tri->inputs.dady[slot][i] = 0.0f; -} - - - -static void linear_coef( struct lp_rast_triangle *tri, -                         const struct tri_info *info, -                         unsigned slot, -                         unsigned vert_attr, -                         unsigned i) -{ -   float a0 = info->v0[vert_attr][i]; -   float a1 = info->v1[vert_attr][i]; -   float a2 = info->v2[vert_attr][i]; - -   float da01 = a0 - a1; -   float da20 = a2 - a0; -   float dadx = (da01 * info->dy20 - info->dy01 * da20) * info->oneoverarea; -   float dady = (da20 * info->dx01 - info->dx20 * da01) * info->oneoverarea; - -   tri->inputs.dadx[slot][i] = dadx; -   tri->inputs.dady[slot][i] = dady; - -   /* calculate a0 as the value which would be sampled for the -    * fragment at (0,0), taking into account that we want to sample at -    * pixel centers, in other words (0.5, 0.5). -    * -    * this is neat but unfortunately not a good way to do things for -    * triangles with very large values of dadx or dady as it will -    * result in the subtraction and re-addition from a0 of a very -    * large number, which means we'll end up loosing a lot of the -    * fractional bits and precision from a0.  the way to fix this is -    * to define a0 as the sample at a pixel center somewhere near vmin -    * instead - i'll switch to this later. -    */ -   tri->inputs.a0[slot][i] = (a0 - -                              (dadx * (info->v0[0][0] - info->pixel_offset) + -                               dady * (info->v0[0][1] - info->pixel_offset))); -} - - -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a triangle. - * We basically multiply the vertex value by 1/w before computing - * the plane coefficients (a0, dadx, dady). - * Later, when we compute the value at a particular fragment position we'll - * divide the interpolated value by the interpolated W at that fragment. - */ -static void perspective_coef( struct lp_rast_triangle *tri, -                              const struct tri_info *info, -                              unsigned slot, -			      unsigned vert_attr, -                              unsigned i) -{ -   /* premultiply by 1/w  (v[0][3] is always 1/w): -    */ -   float a0 = info->v0[vert_attr][i] * info->v0[0][3]; -   float a1 = info->v1[vert_attr][i] * info->v1[0][3]; -   float a2 = info->v2[vert_attr][i] * info->v2[0][3]; -   float da01 = a0 - a1; -   float da20 = a2 - a0; -   float dadx = (da01 * info->dy20 - info->dy01 * da20) * info->oneoverarea; -   float dady = (da20 * info->dx01 - info->dx20 * da01) * info->oneoverarea; - -   tri->inputs.dadx[slot][i] = dadx; -   tri->inputs.dady[slot][i] = dady; -   tri->inputs.a0[slot][i] = (a0 - -                              (dadx * (info->v0[0][0] - info->pixel_offset) + -                               dady * (info->v0[0][1] - info->pixel_offset))); -} - - -/** - * Special coefficient setup for gl_FragCoord. - * X and Y are trivial - * Z and W are copied from position_coef which should have already been computed. - * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. - */ -static void -setup_fragcoord_coef(struct lp_rast_triangle *tri, -                     const struct tri_info *info, -                     unsigned slot, -                     unsigned usage_mask) -{ -   /*X*/ -   if (usage_mask & TGSI_WRITEMASK_X) { -      tri->inputs.a0[slot][0] = 0.0; -      tri->inputs.dadx[slot][0] = 1.0; -      tri->inputs.dady[slot][0] = 0.0; -   } - -   /*Y*/ -   if (usage_mask & TGSI_WRITEMASK_Y) { -      tri->inputs.a0[slot][1] = 0.0; -      tri->inputs.dadx[slot][1] = 0.0; -      tri->inputs.dady[slot][1] = 1.0; -   } - -   /*Z*/ -   if (usage_mask & TGSI_WRITEMASK_Z) { -      linear_coef(tri, info, slot, 0, 2); -   } - -   /*W*/ -   if (usage_mask & TGSI_WRITEMASK_W) { -      linear_coef(tri, info, slot, 0, 3); -   } -} - - -/** - * Setup the fragment input attribute with the front-facing value. - * \param frontface  is the triangle front facing? - */ -static void setup_facing_coef( struct lp_rast_triangle *tri, -                               unsigned slot, -                               boolean frontface, -                               unsigned usage_mask) -{ -   /* convert TRUE to 1.0 and FALSE to -1.0 */ -   if (usage_mask & TGSI_WRITEMASK_X) -      constant_coef( tri, slot, 2.0f * frontface - 1.0f, 0 ); - -   if (usage_mask & TGSI_WRITEMASK_Y) -      constant_coef( tri, slot, 0.0f, 1 ); /* wasted */ - -   if (usage_mask & TGSI_WRITEMASK_Z) -      constant_coef( tri, slot, 0.0f, 2 ); /* wasted */ - -   if (usage_mask & TGSI_WRITEMASK_W) -      constant_coef( tri, slot, 0.0f, 3 ); /* wasted */ -} - - -/** - * Compute the tri->coef[] array dadx, dady, a0 values. - */ -static void setup_tri_coefficients( struct lp_setup_context *setup, -				    struct lp_rast_triangle *tri, -                                    const struct tri_info *info) -{ -   unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; -   unsigned slot; -   unsigned i; - -   /* setup interpolation for all the remaining attributes: -    */ -   for (slot = 0; slot < setup->fs.nr_inputs; slot++) { -      unsigned vert_attr = setup->fs.input[slot].src_index; -      unsigned usage_mask = setup->fs.input[slot].usage_mask; - -      switch (setup->fs.input[slot].interp) { -      case LP_INTERP_CONSTANT: -         if (setup->flatshade_first) { -            for (i = 0; i < NUM_CHANNELS; i++) -               if (usage_mask & (1 << i)) -                  constant_coef(tri, slot+1, info->v0[vert_attr][i], i); -         } -         else { -            for (i = 0; i < NUM_CHANNELS; i++) -               if (usage_mask & (1 << i)) -                  constant_coef(tri, slot+1, info->v2[vert_attr][i], i); -         } -         break; - -      case LP_INTERP_LINEAR: -         for (i = 0; i < NUM_CHANNELS; i++) -            if (usage_mask & (1 << i)) -               linear_coef(tri, info, slot+1, vert_attr, i); -         break; - -      case LP_INTERP_PERSPECTIVE: -         for (i = 0; i < NUM_CHANNELS; i++) -            if (usage_mask & (1 << i)) -               perspective_coef(tri, info, slot+1, vert_attr, i); -         fragcoord_usage_mask |= TGSI_WRITEMASK_W; -         break; - -      case LP_INTERP_POSITION: -         /* -          * The generated pixel interpolators will pick up the coeffs from -          * slot 0, so all need to ensure that the usage mask is covers all -          * usages. -          */ -         fragcoord_usage_mask |= usage_mask; -         break; - -      case LP_INTERP_FACING: -         setup_facing_coef(tri, slot+1, info->frontfacing, usage_mask); -         break; - -      default: -         assert(0); -      } -   } - -   /* The internal position input is in slot zero: -    */ -   setup_fragcoord_coef(tri, info, 0, fragcoord_usage_mask); - -   if (0) { -      for (i = 0; i < NUM_CHANNELS; i++) { -         float a0   = tri->inputs.a0  [0][i]; -         float dadx = tri->inputs.dadx[0][i]; -         float dady = tri->inputs.dady[0][i]; - -         debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n", -                      "xyzw"[i], -                      a0, dadx, dady); -      } - -      for (slot = 0; slot < setup->fs.nr_inputs; slot++) { -         unsigned usage_mask = setup->fs.input[slot].usage_mask; -         for (i = 0; i < NUM_CHANNELS; i++) { -            if (usage_mask & (1 << i)) { -               float a0   = tri->inputs.a0  [1 + slot][i]; -               float dadx = tri->inputs.dadx[1 + slot][i]; -               float dady = tri->inputs.dady[1 + slot][i]; - -               debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n", -                            slot, -                            "xyzw"[i], -                            a0, dadx, dady); -            } -         } -      } -   } -} - - @@ -440,16 +178,21 @@ lp_rast_cmd lp_rast_tri_tab[8] = {   */  static void  do_triangle_ccw(struct lp_setup_context *setup, +		const float (*v0)[4],  		const float (*v1)[4],  		const float (*v2)[4], -		const float (*v3)[4],  		boolean frontfacing )  {     struct lp_scene *scene = lp_setup_get_current_scene(setup);     struct lp_fragment_shader_variant *variant = setup->fs.current.variant;     struct lp_rast_triangle *tri; -   struct tri_info info; +   int x[3]; +   int y[3]; +   float dy01, dy20; +   float dx01, dx20; +   float oneoverarea; +   struct lp_tri_info info;     int area;     struct u_rect bbox;     int ix0, ix1, iy0, iy1; @@ -458,7 +201,7 @@ do_triangle_ccw(struct lp_setup_context *setup,     int nr_planes = 3;     if (0) -      lp_setup_print_triangle(setup, v1, v2, v3); +      lp_setup_print_triangle(setup, v0, v1, v2);     if (setup->scissor_test) {        nr_planes = 7; @@ -468,13 +211,12 @@ do_triangle_ccw(struct lp_setup_context *setup,     }     /* x/y positions in fixed point */ -   info.x[0] = subpixel_snap(v1[0][0] - setup->pixel_offset); -   info.x[1] = subpixel_snap(v2[0][0] - setup->pixel_offset); -   info.x[2] = subpixel_snap(v3[0][0] - setup->pixel_offset); -   info.y[0] = subpixel_snap(v1[0][1] - setup->pixel_offset); -   info.y[1] = subpixel_snap(v2[0][1] - setup->pixel_offset); -   info.y[2] = subpixel_snap(v3[0][1] - setup->pixel_offset); - +   x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset); +   x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset); +   x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset); +   y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset); +   y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset); +   y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset);     /* Bounding rectangle (in pixels) */ @@ -486,10 +228,10 @@ do_triangle_ccw(struct lp_setup_context *setup,         */        int adj = (setup->pixel_offset != 0) ? 1 : 0; -      bbox.x0 = (MIN3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; -      bbox.x1 = (MAX3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; -      bbox.y0 = (MIN3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; -      bbox.y1 = (MAX3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; +      bbox.x0 = (MIN3(x[0], x[1], x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; +      bbox.x1 = (MAX3(x[0], x[1], x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; +      bbox.y0 = (MIN3(y[0], y[1], y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; +      bbox.y1 = (MAX3(y[0], y[1], y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;        /* Inclusive coordinates:         */ @@ -520,21 +262,21 @@ do_triangle_ccw(struct lp_setup_context *setup,        return;  #ifdef DEBUG -   tri->v[0][0] = v1[0][0]; -   tri->v[1][0] = v2[0][0]; -   tri->v[2][0] = v3[0][0]; -   tri->v[0][1] = v1[0][1]; -   tri->v[1][1] = v2[0][1]; -   tri->v[2][1] = v3[0][1]; +   tri->v[0][0] = v0[0][0]; +   tri->v[1][0] = v1[0][0]; +   tri->v[2][0] = v2[0][0]; +   tri->v[0][1] = v0[0][1]; +   tri->v[1][1] = v1[0][1]; +   tri->v[2][1] = v2[0][1];  #endif -   tri->plane[0].dcdy = info.x[0] - info.x[1]; -   tri->plane[1].dcdy = info.x[1] - info.x[2]; -   tri->plane[2].dcdy = info.x[2] - info.x[0]; +   tri->plane[0].dcdy = x[0] - x[1]; +   tri->plane[1].dcdy = x[1] - x[2]; +   tri->plane[2].dcdy = x[2] - x[0]; -   tri->plane[0].dcdx = info.y[0] - info.y[1]; -   tri->plane[1].dcdx = info.y[1] - info.y[2]; -   tri->plane[2].dcdx = info.y[2] - info.y[0]; +   tri->plane[0].dcdx = y[0] - y[1]; +   tri->plane[1].dcdx = y[1] - y[2]; +   tri->plane[2].dcdx = y[2] - y[0];     area = (tri->plane[0].dcdy * tri->plane[2].dcdx -             tri->plane[2].dcdy * tri->plane[0].dcdx); @@ -554,20 +296,26 @@ do_triangle_ccw(struct lp_setup_context *setup,     /*       */ -   info.pixel_offset = setup->pixel_offset; -   info.v0 = v1; -   info.v1 = v2; -   info.v2 = v3; -   info.dx01 = info.v0[0][0] - info.v1[0][0]; -   info.dx20 = info.v2[0][0] - info.v0[0][0]; -   info.dy01 = info.v0[0][1] - info.v1[0][1]; -   info.dy20 = info.v2[0][1] - info.v0[0][1]; -   info.oneoverarea = 1.0f / (info.dx01 * info.dy20 - info.dx20 * info.dy01); +   dx01 = v0[0][0] - v1[0][0]; +   dy01 = v0[0][1] - v1[0][1]; +   dx20 = v2[0][0] - v0[0][0]; +   dy20 = v2[0][1] - v0[0][1]; +   oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01); + +   info.v0 = v0; +   info.v1 = v1; +   info.v2 = v2;     info.frontfacing = frontfacing; +   info.x0_center = v0[0][0] - setup->pixel_offset; +   info.y0_center = v0[0][1] - setup->pixel_offset; +   info.dx01_ooa  = dx01 * oneoverarea; +   info.dx20_ooa  = dx20 * oneoverarea; +   info.dy01_ooa  = dy01 * oneoverarea; +   info.dy20_ooa  = dy20 * oneoverarea;     /* Setup parameter interpolants:      */ -   setup_tri_coefficients( setup, tri, &info ); +   lp_setup_tri_coef( setup, &tri->inputs, &info );     tri->inputs.facing = frontfacing ? 1.0F : -1.0F;     tri->inputs.state = setup->fs.stored; @@ -580,7 +328,7 @@ do_triangle_ccw(struct lp_setup_context *setup,        /* half-edge constants, will be interated over the whole render         * target.         */ -      plane->c = plane->dcdx * info.x[i] - plane->dcdy * info.y[i]; +      plane->c = plane->dcdx * x[i] - plane->dcdy * y[i];        /* correct for top-left vs. bottom-left fill convention.           * | 
