diff options
Diffstat (limited to 'src/gallium/drivers/cell/spu/spu_tri.c')
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_tri.c | 555 |
1 files changed, 202 insertions, 353 deletions
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 8b93878192..4caf7d6b61 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -43,11 +43,6 @@ /** Masks are uint[4] vectors with each element being 0 or 0xffffffff */ typedef vector unsigned int mask_t; -typedef union -{ - vector float v; - float f[4]; -} float4; /** @@ -91,9 +86,9 @@ struct edge { struct interp_coef { - float4 a0; - float4 dadx; - float4 dady; + vector float a0; + vector float dadx; + vector float dady; }; @@ -116,21 +111,15 @@ struct setup_stage { struct edge etop; struct edge emaj; - float oneoverarea; + float oneOverArea; /* XXX maybe make into vector? */ + + uint facing; - uint tx, ty; + uint tx, ty; /**< position of current tile (x, y) */ int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy; -#if 0 - struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; -#else struct interp_coef coef[PIPE_MAX_SHADER_INPUTS]; -#endif - -#if 0 - struct quad_header quad; -#endif struct { int left[2]; /**< [0] = row0, [1] = row1 */ @@ -142,118 +131,105 @@ struct setup_stage { }; - static struct setup_stage setup; - - -#if 0 -/** - * Basically a cast wrapper. - */ -static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) -{ - return (struct setup_stage *)stage; -} -#endif - -#if 0 -/** - * Clip setup.quad against the scissor/surface bounds. - */ -static INLINE void -quad_clip(struct setup_stage *setup) -{ - const struct pipe_scissor_state *cliprect = &setup.softpipe->cliprect; - const int minx = (int) cliprect->minx; - const int maxx = (int) cliprect->maxx; - const int miny = (int) cliprect->miny; - const int maxy = (int) cliprect->maxy; - - if (setup.quad.x0 >= maxx || - setup.quad.y0 >= maxy || - setup.quad.x0 + 1 < minx || - setup.quad.y0 + 1 < miny) { - /* totally clipped */ - setup.quad.mask = 0x0; - return; - } - if (setup.quad.x0 < minx) - setup.quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); - if (setup.quad.y0 < miny) - setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); - if (setup.quad.x0 == maxx - 1) - setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); - if (setup.quad.y0 == maxy - 1) - setup.quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); -} -#endif - -#if 0 -/** - * Emit a quad (pass to next stage) with clipping. - */ -static INLINE void -clip_emit_quad(struct setup_stage *setup) -{ - quad_clip(setup); - if (setup.quad.mask) { - struct softpipe_context *sp = setup.softpipe; - sp->quad.first->run(sp->quad.first, &setup.quad); - } -} -#endif - /** * Evaluate attribute coefficients (plane equations) to compute * attribute values for the four fragments in a quad. * Eg: four colors will be computed (in AoS format). */ static INLINE void -eval_coeff(uint slot, float x, float y, vector float result[4]) +eval_coeff(uint slot, float x, float y, vector float w, vector float result[4]) { - switch (spu.vertex_info.interp_mode[slot]) { + switch (spu.vertex_info.attrib[slot].interp_mode) { case INTERP_CONSTANT: result[QUAD_TOP_LEFT] = result[QUAD_TOP_RIGHT] = result[QUAD_BOTTOM_LEFT] = - result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0.v; + result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0; break; - case INTERP_LINEAR: - /* fall-through, for now */ - default: { - register vector float dadx = setup.coef[slot].dadx.v; - register vector float dady = setup.coef[slot].dady.v; - register vector float topLeft - = spu_add(setup.coef[slot].a0.v, - spu_add(spu_mul(spu_splats(x), dadx), - spu_mul(spu_splats(y), dady))); + vector float dadx = setup.coef[slot].dadx; + vector float dady = setup.coef[slot].dady; + vector float topLeft = + spu_add(setup.coef[slot].a0, + spu_add(spu_mul(spu_splats(x), dadx), + spu_mul(spu_splats(y), dady))); result[QUAD_TOP_LEFT] = topLeft; result[QUAD_TOP_RIGHT] = spu_add(topLeft, dadx); result[QUAD_BOTTOM_LEFT] = spu_add(topLeft, dady); result[QUAD_BOTTOM_RIGHT] = spu_add(spu_add(topLeft, dadx), dady); } + break; + case INTERP_PERSPECTIVE: + { + vector float dadx = setup.coef[slot].dadx; + vector float dady = setup.coef[slot].dady; + vector float topLeft = + spu_add(setup.coef[slot].a0, + spu_add(spu_mul(spu_splats(x), dadx), + spu_mul(spu_splats(y), dady))); + + vector float wInv = spu_re(w); /* 1.0 / w */ + + result[QUAD_TOP_LEFT] = spu_mul(topLeft, wInv); + result[QUAD_TOP_RIGHT] = spu_mul(spu_add(topLeft, dadx), wInv); + result[QUAD_BOTTOM_LEFT] = spu_mul(spu_add(topLeft, dady), wInv); + result[QUAD_BOTTOM_RIGHT] = spu_mul(spu_add(spu_add(topLeft, dadx), dady), wInv); + } + break; + case INTERP_POS: + case INTERP_NONE: + break; + default: + ASSERT(0); } } +/** + * As above, but return 4 vectors in SOA format. + * XXX this will all be re-written someday. + */ +static INLINE void +eval_coeff_soa(uint slot, float x, float y, vector float w, vector float result[4]) +{ + eval_coeff(slot, x, y, w, result); + _transpose_matrix4x4(result, result); +} + + +/** Evalute coefficients to get Z for four pixels in a quad */ static INLINE vector float eval_z(float x, float y) { const uint slot = 0; - const float dzdx = setup.coef[slot].dadx.f[2]; - const float dzdy = setup.coef[slot].dady.f[2]; - const float topLeft = setup.coef[slot].a0.f[2] + x * dzdx + y * dzdy; + const float dzdx = spu_extract(setup.coef[slot].dadx, 2); + const float dzdy = spu_extract(setup.coef[slot].dady, 2); + const float topLeft = spu_extract(setup.coef[slot].a0, 2) + x * dzdx + y * dzdy; const vector float topLeftv = spu_splats(topLeft); const vector float derivs = (vector float) { 0.0, dzdx, dzdy, dzdx + dzdy }; return spu_add(topLeftv, derivs); } +/** Evalute coefficients to get W for four pixels in a quad */ +static INLINE vector float +eval_w(float x, float y) +{ + const uint slot = 0; + const float dwdx = spu_extract(setup.coef[slot].dadx, 3); + const float dwdy = spu_extract(setup.coef[slot].dady, 3); + const float topLeft = spu_extract(setup.coef[slot].a0, 3) + x * dwdx + y * dwdy; + const vector float topLeftv = spu_splats(topLeft); + const vector float derivs = (vector float) { 0.0, dwdx, dwdy, dwdx + dwdy }; + return spu_add(topLeftv, derivs); +} + + /** * Emit a quad (pass to next stage). No clipping is done. * Note: about 1/5 to 1/7 of the time, mask is zero and this function @@ -261,120 +237,54 @@ eval_z(float x, float y) * overall. */ static INLINE void -emit_quad( int x, int y, mask_t mask ) +emit_quad( int x, int y, mask_t mask) { /* If any bits in mask are set... */ if (spu_extract(spu_orx(mask), 0)) { const int ix = x - setup.cliprect_minx; const int iy = y - setup.cliprect_miny; - vector float colors[4]; spu.cur_ctile_status = TILE_STATUS_DIRTY; spu.cur_ztile_status = TILE_STATUS_DIRTY; - if (spu.texture[0].start) { - /* texture mapping */ - const uint unit = 0; - vector float texcoords[4]; - eval_coeff(2, (float) x, (float) y, texcoords); - - if (spu_extract(mask, 0)) - colors[0] = spu.sample_texture[unit](unit, texcoords[0]); - if (spu_extract(mask, 1)) - colors[1] = spu.sample_texture[unit](unit, texcoords[1]); - if (spu_extract(mask, 2)) - colors[2] = spu.sample_texture[unit](unit, texcoords[2]); - if (spu_extract(mask, 3)) - colors[3] = spu.sample_texture[unit](unit, texcoords[3]); - - - if (spu.texture[1].start) { - /* multi-texture mapping */ - const uint unit = 1; - vector float colors1[4]; - - eval_coeff(2, (float) x, (float) y, texcoords); - - if (spu_extract(mask, 0)) - colors1[0] = spu.sample_texture[unit](unit, texcoords[0]); - if (spu_extract(mask, 1)) - colors1[1] = spu.sample_texture[unit](unit, texcoords[1]); - if (spu_extract(mask, 2)) - colors1[2] = spu.sample_texture[unit](unit, texcoords[2]); - if (spu_extract(mask, 3)) - colors1[3] = spu.sample_texture[unit](unit, texcoords[3]); - - /* hack: modulate first texture by second */ - colors[0] = spu_mul(colors[0], colors1[0]); - colors[1] = spu_mul(colors[1], colors1[1]); - colors[2] = spu_mul(colors[2], colors1[2]); - colors[3] = spu_mul(colors[3], colors1[3]); - } + { + /* + * Run fragment shader, execute per-fragment ops, update fb/tile. + */ + vector float inputs[4*4], outputs[2*4]; + vector float fragZ = eval_z((float) x, (float) y); + vector float fragW = eval_w((float) x, (float) y); + vector unsigned int kill_mask; - } - else { - /* simple shading */ + /* setup inputs */ #if 0 - eval_coeff(1, (float) x, (float) y, colors); - + eval_coeff_soa(1, (float) x, (float) y, fragW, inputs); #else - /* XXX new fragment program code */ - - if (spu.fragment_program) { - vector float inputs[4*4], outputs[2*4]; - - /* setup inputs */ - eval_coeff(1, (float) x, (float) y, inputs); - - /* Execute the current fragment program */ - spu.fragment_program(inputs, outputs, spu.constants); - - /* Copy outputs */ - colors[0] = outputs[0*4+0]; - colors[1] = outputs[0*4+1]; - colors[2] = outputs[0*4+2]; - colors[3] = outputs[0*4+3]; - - if (0 && spu.init.id==0 && y == 48) { - printf("colors[0] = %f %f %f %f\n", - spu_extract(colors[0], 0), - spu_extract(colors[0], 1), - spu_extract(colors[0], 2), - spu_extract(colors[0], 3)); - printf("colors[1] = %f %f %f %f\n", - spu_extract(colors[1], 0), - spu_extract(colors[1], 1), - spu_extract(colors[1], 2), - spu_extract(colors[1], 3)); - } - + uint i; + for (i = 0; i < spu.vertex_info.num_attribs; i++) { + eval_coeff_soa(i+1, (float) x, (float) y, fragW, inputs + i * 4); } #endif - } + ASSERT(spu.fragment_program); + ASSERT(spu.fragment_ops); + /* Execute the current fragment program */ + kill_mask = spu.fragment_program(inputs, outputs, spu.constants); - { - /* Convert fragment data from AoS to SoA format. - * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA) - * This is temporary! - */ - vector float soa_frag[4]; - _transpose_matrix4x4(soa_frag, colors); - - float4 fragZ; + mask = spu_andc(mask, kill_mask); - fragZ.v = eval_z((float) x, (float) y); - - /* Do all per-fragment/quad operations here, including: - * alpha test, z test, stencil test, blend and framebuffer writing. + /* Execute per-fragment/quad operations, including: + * alpha test, z test, stencil test, blend and framebuffer writing. */ spu.fragment_ops(ix, iy, &spu.ctile, &spu.ztile, - fragZ.v, - soa_frag[0], soa_frag[1], - soa_frag[2], soa_frag[3], - mask); + fragZ, + outputs[0*4+0], + outputs[0*4+1], + outputs[0*4+2], + outputs[0*4+3], + mask, + setup.facing); } - } } @@ -383,7 +293,8 @@ emit_quad( int x, int y, mask_t mask ) * Given an X or Y coordinate, return the block/quad coordinate that it * belongs to. */ -static INLINE int block( int x ) +static INLINE int +block(int x) { return x & ~1; } @@ -394,7 +305,8 @@ static INLINE int block( int x ) * the triangle's bounds. * The mask is a uint4 vector and each element will be 0 or 0xffffffff. */ -static INLINE mask_t calculate_mask( int x ) +static INLINE mask_t +calculate_mask(int x) { /* This is a little tricky. * Use & instead of && to avoid branches. @@ -412,7 +324,8 @@ static INLINE mask_t calculate_mask( int x ) /** * Render a horizontal span of quads */ -static void flush_spans( void ) +static void +flush_spans(void) { int minleft, maxright; int x; @@ -440,7 +353,6 @@ static void flush_spans( void ) return; } - /* OK, we're very likely to need the tile data now. * clear or finish waiting if needed. */ @@ -476,9 +388,7 @@ static void flush_spans( void ) * calculate_mask() could be simplified a bit... */ for (x = block(minleft); x <= block(maxright); x += 2) { -#if 1 - emit_quad( x, setup.span.y, calculate_mask( x ) ); -#endif + emit_quad( x, setup.span.y, calculate_mask( x )); } setup.span.y = 0; @@ -487,33 +397,46 @@ static void flush_spans( void ) setup.span.right[1] = 0; } + #if DEBUG_VERTS -static void print_vertex(const struct vertex_header *v) +static void +print_vertex(const struct vertex_header *v) { - int i; - fprintf(stderr, "Vertex: (%p)\n", v); - for (i = 0; i < setup.quad.nr_attrs; i++) { - fprintf(stderr, " %d: %f %f %f %f\n", i, - v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]); + uint i; + fprintf(stderr, " Vertex: (%p)\n", v); + for (i = 0; i < spu.vertex_info.num_attribs; i++) { + fprintf(stderr, " %d: %f %f %f %f\n", i, + spu_extract(v->data[i], 0), + spu_extract(v->data[i], 1), + spu_extract(v->data[i], 2), + spu_extract(v->data[i], 3)); } } #endif -static boolean setup_sort_vertices(const struct vertex_header *v0, - const struct vertex_header *v1, - const struct vertex_header *v2) +/** + * Sort vertices from top to bottom. + * Compute area and determine front vs. back facing. + * Do coarse clip test against tile bounds + * \return FALSE if tri is totally outside tile, TRUE otherwise + */ +static boolean +setup_sort_vertices(const struct vertex_header *v0, + const struct vertex_header *v1, + const struct vertex_header *v2) { + float area, sign; #if DEBUG_VERTS - fprintf(stderr, "Triangle:\n"); - print_vertex(v0); - print_vertex(v1); - print_vertex(v2); + if (spu.init.id==0) { + fprintf(stderr, "SPU %u: Triangle:\n", spu.init.id); + print_vertex(v0); + print_vertex(v1); + print_vertex(v2); + } #endif - setup.vprovoke = v2; - /* determine bottom to top order of vertices */ { float y0 = spu_extract(v0->data[0], 1); @@ -525,18 +448,21 @@ static boolean setup_sort_vertices(const struct vertex_header *v0, setup.vmin = v0; setup.vmid = v1; setup.vmax = v2; + sign = -1.0f; } else if (y2 <= y0) { /* y2<=y0<=y1 */ setup.vmin = v2; setup.vmid = v0; setup.vmax = v1; + sign = -1.0f; } else { /* y0<=y2<=y1 */ setup.vmin = v0; setup.vmid = v2; setup.vmax = v1; + sign = 1.0f; } } else { @@ -545,18 +471,21 @@ static boolean setup_sort_vertices(const struct vertex_header *v0, setup.vmin = v1; setup.vmid = v0; setup.vmax = v2; + sign = 1.0f; } else if (y2 <= y1) { /* y2<=y1<=y0 */ setup.vmin = v2; setup.vmid = v1; setup.vmax = v0; + sign = 1.0f; } else { /* y1<=y2<=y0 */ setup.vmin = v1; setup.vmid = v2; setup.vmax = v0; + sign = -1.0f; } } } @@ -585,31 +514,16 @@ static boolean setup_sort_vertices(const struct vertex_header *v0, /* * Compute triangle's area. Use 1/area to compute partial * derivatives of attributes later. - * - * The area will be the same as prim->det, but the sign may be - * different depending on how the vertices get sorted above. - * - * To determine whether the primitive is front or back facing we - * use the prim->det value because its sign is correct. */ - { - const float area = (setup.emaj.dx * setup.ebot.dy - - setup.ebot.dx * setup.emaj.dy); - - setup.oneoverarea = 1.0f / area; - /* - _mesa_printf("%s one-over-area %f area %f det %f\n", - __FUNCTION__, setup.oneoverarea, area, prim->det ); - */ - } + area = setup.emaj.dx * setup.ebot.dy - setup.ebot.dx * setup.emaj.dy; -#if 0 - /* We need to know if this is a front or back-facing triangle for: - * - the GLSL gl_FrontFacing fragment attribute (bool) - * - two-sided stencil test - */ - setup.quad.facing = (prim->det > 0.0) ^ (setup.softpipe->rasterizer->front_winding == PIPE_WINDING_CW); -#endif + setup.oneOverArea = 1.0f / area; + + /* The product of area * sign indicates front/back orientation (0/1) */ + setup.facing = (area * sign > 0.0f) + ^ (spu.rasterizer.front_winding == PIPE_WINDING_CW); + + setup.vprovoke = v2; return TRUE; } @@ -622,63 +536,11 @@ static boolean setup_sort_vertices(const struct vertex_header *v0, * \param slot which attribute slot */ static INLINE void -const_coeff(uint slot) +const_coeff4(uint slot) { - setup.coef[slot].dadx.v = (vector float) {0.0, 0.0, 0.0, 0.0}; - setup.coef[slot].dady.v = (vector float) {0.0, 0.0, 0.0, 0.0}; - setup.coef[slot].a0.v = setup.vprovoke->data[slot]; -} - - -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a triangle. - */ -static INLINE void -tri_linear_coeff(uint slot, uint firstComp, uint lastComp) -{ - uint i; - const float *vmin_d = (float *) &setup.vmin->data[slot]; - const float *vmid_d = (float *) &setup.vmid->data[slot]; - const float *vmax_d = (float *) &setup.vmax->data[slot]; - const float x = spu_extract(setup.vmin->data[0], 0) - 0.5f; - const float y = spu_extract(setup.vmin->data[0], 1) - 0.5f; - - for (i = firstComp; i < lastComp; i++) { - float botda = vmid_d[i] - vmin_d[i]; - float majda = vmax_d[i] - vmin_d[i]; - float a = setup.ebot.dy * majda - botda * setup.emaj.dy; - float b = setup.emaj.dx * botda - majda * setup.ebot.dx; - - ASSERT(slot < PIPE_MAX_SHADER_INPUTS); - - setup.coef[slot].dadx.f[i] = a * setup.oneoverarea; - setup.coef[slot].dady.f[i] = b * setup.oneoverarea; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - setup.coef[slot].a0.f[i] = (vmin_d[i] - - (setup.coef[slot].dadx.f[i] * x + - setup.coef[slot].dady.f[i] * y)); - } - - /* - _mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n", - slot, "xyzw"[i], - setup.coef[slot].a0[i], - setup.coef[slot].dadx.f[i], - setup.coef[slot].dady.f[i]); - */ + setup.coef[slot].dadx = (vector float) {0.0, 0.0, 0.0, 0.0}; + setup.coef[slot].dady = (vector float) {0.0, 0.0, 0.0, 0.0}; + setup.coef[slot].a0 = setup.vprovoke->data[slot]; } @@ -702,18 +564,16 @@ tri_linear_coeff4(uint slot) vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), spu_mul(majda, spu_splats(setup.ebot.dx))); - setup.coef[slot].dadx.v = spu_mul(a, spu_splats(setup.oneoverarea)); - setup.coef[slot].dady.v = spu_mul(b, spu_splats(setup.oneoverarea)); + setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea)); + setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea)); - vector float tempx = spu_mul(setup.coef[slot].dadx.v, xxxx); - vector float tempy = spu_mul(setup.coef[slot].dady.v, yyyy); + vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx); + vector float tempy = spu_mul(setup.coef[slot].dady, yyyy); - setup.coef[slot].a0.v = spu_sub(vmin_d, spu_add(tempx, tempy)); + setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy)); } - -#if 0 /** * Compute a0, dadx and dady for a perspective-corrected interpolant, * for a triangle. @@ -722,82 +582,76 @@ tri_linear_coeff4(uint slot) * Later, when we compute the value at a particular fragment position we'll * divide the interpolated value by the interpolated W at that fragment. */ -static void tri_persp_coeff( unsigned slot, - unsigned i ) +static void +tri_persp_coeff4(uint slot) { - /* premultiply by 1/w: - */ - float mina = setup.vmin->data[slot][i] * setup.vmin->data[0][3]; - float mida = setup.vmid->data[slot][i] * setup.vmid->data[0][3]; - float maxa = setup.vmax->data[slot][i] * setup.vmax->data[0][3]; - - float botda = mida - mina; - float majda = maxa - mina; - float a = setup.ebot.dy * majda - botda * setup.emaj.dy; - float b = setup.emaj.dx * botda - majda * setup.ebot.dx; - - /* - printf("tri persp %d,%d: %f %f %f\n", slot, i, - setup.vmin->data[slot][i], - setup.vmid->data[slot][i], - setup.vmax->data[slot][i] - ); - */ + const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f); + const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f); + + const vector float vmin_w = spu_splats(spu_extract(setup.vmin->data[0], 3)); + const vector float vmid_w = spu_splats(spu_extract(setup.vmid->data[0], 3)); + const vector float vmax_w = spu_splats(spu_extract(setup.vmax->data[0], 3)); + + vector float vmin_d = setup.vmin->data[slot]; + vector float vmid_d = setup.vmid->data[slot]; + vector float vmax_d = setup.vmax->data[slot]; - assert(slot < PIPE_MAX_SHADER_INPUTS); - assert(i <= 3); + vmin_d = spu_mul(vmin_d, vmin_w); + vmid_d = spu_mul(vmid_d, vmid_w); + vmax_d = spu_mul(vmax_d, vmax_w); - setup.coef[slot].dadx.f[i] = a * setup.oneoverarea; - setup.coef[slot].dady.f[i] = b * setup.oneoverarea; - setup.coef[slot].a0.f[i] = (mina - - (setup.coef[slot].dadx.f[i] * (setup.vmin->data[0][0] - 0.5f) + - setup.coef[slot].dady.f[i] * (setup.vmin->data[0][1] - 0.5f))); + vector float botda = vmid_d - vmin_d; + vector float majda = vmax_d - vmin_d; + + vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda), + spu_mul(botda, spu_splats(setup.emaj.dy))); + vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), + spu_mul(majda, spu_splats(setup.ebot.dx))); + + setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea)); + setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea)); + + vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx); + vector float tempy = spu_mul(setup.coef[slot].dady, yyyy); + + setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy)); } -#endif + /** * Compute the setup.coef[] array dadx, dady, a0 values. * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized. */ -static void setup_tri_coefficients(void) +static void +setup_tri_coefficients(void) { -#if 1 uint i; for (i = 0; i < spu.vertex_info.num_attribs; i++) { - switch (spu.vertex_info.interp_mode[i]) { + switch (spu.vertex_info.attrib[i].interp_mode) { case INTERP_NONE: break; - case INTERP_POS: - /*tri_linear_coeff(i, 2, 3);*/ - /* XXX interp W if PERSPECTIVE... */ - tri_linear_coeff4(i); - break; case INTERP_CONSTANT: - const_coeff(i); + const_coeff4(i); break; + case INTERP_POS: + /* fall-through */ case INTERP_LINEAR: tri_linear_coeff4(i); break; case INTERP_PERSPECTIVE: - tri_linear_coeff4(i); /* temporary */ + tri_persp_coeff4(i); break; default: ASSERT(0); } } -#else - ASSERT(spu.vertex_info.interp_mode[0] == INTERP_POS); - ASSERT(spu.vertex_info.interp_mode[1] == INTERP_LINEAR || - spu.vertex_info.interp_mode[1] == INTERP_CONSTANT); - tri_linear_coeff(0, 2, 3); /* slot 0, z */ - tri_linear_coeff(1, 0, 4); /* slot 1, color */ -#endif } -static void setup_tri_edges(void) +static void +setup_tri_edges(void) { float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f; float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f; @@ -827,9 +681,8 @@ static void setup_tri_edges(void) * Render the upper or lower half of a triangle. * Scissoring/cliprect is applied here too. */ -static void subtriangle( struct edge *eleft, - struct edge *eright, - unsigned lines ) +static void +subtriangle(struct edge *eleft, struct edge *eright, unsigned lines) { const int minx = setup.cliprect_minx; const int maxx = setup.cliprect_maxx; @@ -902,7 +755,8 @@ static void subtriangle( struct edge *eleft, * The tile data should have already been fetched. */ boolean -tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty) +tri_draw(const float *v0, const float *v1, const float *v2, + uint tx, uint ty) { setup.tx = tx; setup.ty = ty; @@ -926,19 +780,14 @@ tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty) setup.span.y_flags = 0; setup.span.right[0] = 0; setup.span.right[1] = 0; - /* setup.span.z_mode = tri_z_mode( setup.ctx ); */ - /* init_constant_attribs( setup ); */ - - if (setup.oneoverarea < 0.0) { - /* emaj on left: - */ + if (setup.oneOverArea < 0.0) { + /* emaj on left */ subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines ); subtriangle( &setup.emaj, &setup.etop, setup.etop.lines ); } else { - /* emaj on right: - */ + /* emaj on right */ subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines ); subtriangle( &setup.etop, &setup.emaj, setup.etop.lines ); } |