From de902d3275d1861beb0cebdf0807a17e2682c8de Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 Oct 2009 10:23:53 +0100 Subject: llvmpipe: more wip on coefficients --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 206 ++++++++++++++-------------- 1 file changed, 103 insertions(+), 103 deletions(-) (limited to 'src/gallium/drivers/llvmpipe/lp_setup_tri.c') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index efd91124a0..382a52e951 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -29,23 +29,23 @@ * Binning code for triangles */ -#include "lp_setup.h" -#include "lp_state.h" +#include "lp_setup_context.h" #include "util/u_math.h" #include "util/u_memory.h" +#define NUM_CHANNELS 4 /** * Compute a0 for a constant-valued coefficient (GL_FLAT shading). */ static void constant_coef( struct lp_rast_triangle *tri, - const float (*v3)[4], - unsigned vert_attr, - unsigned i ) + unsigned slot, + const float value, + unsigned i ) { - tri->inputs.a0[i] = v3[vert_attr][i]; - tri->inputs.dadx[i] = 0; - tri->inputs.dady[i] = 0; + tri->inputs.a0[slot][i] = value; + tri->inputs.dadx[slot][i] = 0; + tri->inputs.dady[slot][i] = 0; } /** @@ -53,45 +53,40 @@ static void constant_coef( struct lp_rast_triangle *tri, * for a triangle. */ static void linear_coef( struct lp_rast_triangle *tri, - unsigned input, - const float (*v1)[4], - const float (*v2)[4], - const float (*v3)[4], - unsigned vert_attr) + unsigned slot, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + unsigned vert_attr, + unsigned i) { - unsigned i; - - input *= 4; - - for (i = 0; i < NUM_CHANNELS; i++) { - float a1 = v1[vert_attr][i]; - float a2 = v2[vert_attr][i]; - float a3 = v3[vert_attr][i]; - - float da12 = a1 - a2; - float da31 = a3 - a1; - float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; - float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; - - tri->inputs.dadx[input+i] = dadx; - tri->inputs.dady[input+i] = dady; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - tri->inputs.a0[input+i] = (v1[vert_attr][i] - - (dadx * (v1[0][0] - 0.5f) + - dady * (v1[0][1] - 0.5f))); - } + float a1 = v1[vert_attr][i]; + float a2 = v2[vert_attr][i]; + float a3 = v3[vert_attr][i]; + + float da12 = a1 - a2; + float da31 = a3 - a1; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; + + tri->inputs.dadx[slot][i] = dadx; + tri->inputs.dady[slot][i] = dady; + + /* calculate a0 as the value which would be sampled for the + * fragment at (0,0), taking into account that we want to sample at + * pixel centers, in other words (0.5, 0.5). + * + * this is neat but unfortunately not a good way to do things for + * triangles with very large values of dadx or dady as it will + * result in the subtraction and re-addition from a0 of a very + * large number, which means we'll end up loosing a lot of the + * fractional bits and precision from a0. the way to fix this is + * to define a0 as the sample at a pixel center somewhere near vmin + * instead - i'll switch to this later. + */ + tri->inputs.a0[slot][i] = (v1[vert_attr][i] - + (dadx * (v1[0][0] - 0.5f) + + dady * (v1[0][1] - 0.5f))); } @@ -104,34 +99,29 @@ static void linear_coef( struct lp_rast_triangle *tri, * divide the interpolated value by the interpolated W at that fragment. */ static void perspective_coef( struct lp_rast_triangle *tri, + unsigned slot, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], unsigned vert_attr, - unsigned i) + unsigned i) { - unsigned i; - - input *= 4; - - for (i = 0; i < NUM_CHANNELS; i++) { - /* premultiply by 1/w (v[0][3] is always 1/w): - */ - float a1 = v1[vert_attr][i] * v1[0][3]; - float a2 = v2[vert_attr][i] * v2[0][3]; - float a3 = v3[vert_attr][i] * v3[0][3]; - float da12 = a1 - a2; - float da31 = a3 - a1; - float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; - float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; - - - tri->inputs.dadx[input+i] = dadx; - tri->inputs.dady[input+i] = dady; - tri->inputs.a0[input+i] = (a1 - - (dadx * (v1[0][0] - 0.5f) + - dady * (v1[0][1] - 0.5f))); - } + /* premultiply by 1/w (v[0][3] is always 1/w): + */ + float a1 = v1[vert_attr][i] * v1[0][3]; + float a2 = v2[vert_attr][i] * v2[0][3]; + float a3 = v3[vert_attr][i] * v3[0][3]; + float da12 = a1 - a2; + float da31 = a3 - a1; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; + + + tri->inputs.dadx[slot][i] = dadx; + tri->inputs.dady[slot][i] = dady; + tri->inputs.a0[slot][i] = (a1 - + (dadx * (v1[0][0] - 0.5f) + + dady * (v1[0][1] - 0.5f))); } @@ -142,29 +132,37 @@ static void perspective_coef( struct lp_rast_triangle *tri, * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. */ static void -setup_fragcoord_coef(struct lp_rast_triangle *tri, unsigned slot) +setup_fragcoord_coef(struct lp_rast_triangle *tri, + unsigned slot, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4]) { - slot *= 4; - /*X*/ - tri->inputs.a0[slot+0] = 0.0; - tri->inputs.dadx[slot+0] = 1.0; - tri->inputs.dady[slot+0] = 0.0; + tri->inputs.a0[slot][0] = 0.0; + tri->inputs.dadx[slot][0] = 1.0; + tri->inputs.dady[slot][0] = 0.0; /*Y*/ - tri->inputs.a0[slot+1] = 0.0; - tri->inputs.dadx[slot+1] = 0.0; - tri->inputs.dady[slot+1] = 1.0; + tri->inputs.a0[slot][1] = 0.0; + tri->inputs.dadx[slot][1] = 0.0; + tri->inputs.dady[slot][1] = 1.0; /*Z*/ - tri->inputs.a0[slot+2] = tri->inputs.a0[2]; - tri->inputs.dadx[slot+2] = tri->inputs.dadx[2]; - tri->inputs.dady[slot+2] = tri->inputs.dady[2]; + linear_coef(tri, slot, v1, v2, v3, 0, 2); /*W*/ - tri->inputs.a0[slot+3] = tri->inputs.a0[3]; - tri->inputs.dadx[slot+3] = tri->inputs.dadx[3]; - tri->inputs.dady[slot+3] = tri->inputs.dady[3]; + linear_coef(tri, slot, v1, v2, v3, 0, 3); } +static void setup_facing_coef( struct lp_rast_triangle *tri, + unsigned slot, + boolean frontface ) +{ + constant_coef( tri, slot, 1.0f - frontface, 0 ); + constant_coef( tri, slot, 0.0f, 1 ); /* wasted */ + constant_coef( tri, slot, 0.0f, 2 ); /* wasted */ + constant_coef( tri, slot, 0.0f, 3 ); /* wasted */ +} + /** * Compute the tri->coef[] array dadx, dady, a0 values. @@ -176,40 +174,42 @@ static void setup_tri_coefficients( struct setup_context *setup, const float (*v3)[4], boolean frontface ) { - unsigned input; + unsigned slot; - /* z and w are done by linear interpolation: + /* The internal position input is in slot zero: */ - setup_fragcoord_coef(tri, 0); - linear_coef(tri, input, v1, v2, v3, vert_attr, i); + setup_fragcoord_coef(tri, 0, v1, v2, v3); /* setup interpolation for all the remaining attrbutes: */ - for (input = 0; input < setup->fs.nr_inputs; input++) { - unsigned vert_attr = setup->fs.input[input].src_index; + for (slot = 0; slot < setup->fs.nr_inputs; slot++) { + unsigned vert_attr = setup->fs.input[slot].src_index; unsigned i; - switch (setup->fs.input[input].interp_mode) { + switch (setup->fs.input[slot].interp) { case LP_INTERP_CONSTANT: - constant_coef(tri, input, v3, vert_attr, i); + for (i = 0; i < NUM_CHANNELS; i++) + constant_coef(tri, slot+1, v3[vert_attr][i], i); break; case LP_INTERP_LINEAR: - linear_coef(tri, input, v1, v2, v3, vert_attr, i); + for (i = 0; i < NUM_CHANNELS; i++) + linear_coef(tri, slot+1, v1, v2, v3, vert_attr, i); break; case LP_INTERP_PERSPECTIVE: - perspective_coef(tri, input, v1, v2, v3, vert_attr, i); + for (i = 0; i < NUM_CHANNELS; i++) + perspective_coef(tri, slot+1, v1, v2, v3, vert_attr, i); break; - case LP_INTERP_POS: - setup_fragcoord_coef(tri, input); + case LP_INTERP_POSITION: + /* XXX: fix me - duplicates the values in slot zero. + */ + setup_fragcoord_coef(tri, slot+1, v1, v2, v3); break; case LP_INTERP_FACING: - tri->inputs.a0[input*4+0] = 1.0f - frontface; - tri->inputs.dadx[input*4+0] = 0.0; - tri->da[input].dady[0] = 0.0; + setup_facing_coef(tri, slot+1, frontface); break; default: @@ -246,14 +246,14 @@ static inline float subpixel_snap( float a ) #define MAX3(a,b,c) MAX2(MAX2(a,b),c) static void -do_triangle_ccw(struct lp_setup *setup, +do_triangle_ccw(struct setup_context *setup, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], boolean frontfacing ) { - const int rt_width = setup->framebuffer.cbufs[0]->width; - const int rt_height = setup->framebuffer.cbufs[0]->height; + const int rt_width = setup->fb.width; + const int rt_height = setup->fb.height; const float y1 = subpixel_snap(v1[0][1]); const float y2 = subpixel_snap(v2[0][1]); @@ -263,7 +263,7 @@ do_triangle_ccw(struct lp_setup *setup, const float x2 = subpixel_snap(v2[0][0]); const float x3 = subpixel_snap(v3[0][0]); - struct lp_setup_triangle *tri = lp_setup_alloc_data( setup, sizeof *tri ); + struct lp_setup_triangle *tri = get_data( setup, sizeof *tri ); float area; float c1, c2, c3; int i; -- cgit v1.2.3