diff options
Diffstat (limited to 'src/mesa/pipe/cell/spu/spu_tri.c')
-rw-r--r-- | src/mesa/pipe/cell/spu/spu_tri.c | 222 |
1 files changed, 26 insertions, 196 deletions
diff --git a/src/mesa/pipe/cell/spu/spu_tri.c b/src/mesa/pipe/cell/spu/spu_tri.c index a32878d917..a26a4f098d 100644 --- a/src/mesa/pipe/cell/spu/spu_tri.c +++ b/src/mesa/pipe/cell/spu/spu_tri.c @@ -39,18 +39,11 @@ #include "spu_tile.h" #include "spu_tri.h" +#include "spu_ztest.h" -/* - * If SIMD_Z=1 the Z buffer is floating point and we use vector instructions - * to do Z testing/updating. - */ -#define SIMD_Z 0 -#if SIMD_Z +/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */ typedef vector unsigned int mask_t; -#else -typedef uint mask_t; -#endif /** @@ -282,20 +275,11 @@ pack_colors(uint uicolors[4], const float4 fcolors[4]) } - -static unsigned int -do_depth_test(int x, int y, unsigned int mask) +static INLINE mask_t +do_depth_test(int x, int y, mask_t quadmask) { - static const float4 zscale16 - = {.f={65535.0, 65535.0, 65535.0, 65535.0}}; - static const float4 zscale32 - = {.f={(float)0xffffffff, - (float)0xffffffff, - (float)0xffffffff, - (float)0xffffffff}}; - int ix = x - setup.cliprect_minx; - int iy = y - setup.cliprect_miny; float4 zvals; + mask_t mask; zvals.v = eval_z((float) x, (float) y); @@ -305,129 +289,20 @@ do_depth_test(int x, int y, unsigned int mask) cur_tile_status_z = TILE_STATUS_DIRTY; } -#if 0 - if (cur_tile_status_z == TILE_STATUS_CLEAR) { - /* now, _really_ clear the tile */ - clear_z_tile(&ztile); - } - else if (cur_tile_status_z != TILE_STATUS_DIRTY) { - /* make sure we've got the tile from main mem */ - wait_on_mask(1 << TAG_READ_TILE_Z); - } - cur_tile_status_z = TILE_STATUS_DIRTY; -#endif - if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) { - zvals.v = spu_mul(zvals.v, zscale16.v); - if (mask & MASK_TOP_LEFT) { - uint z = (uint) zvals.f[0]; - if (z < ztile.t16[iy][ix]) - ztile.t16[iy][ix] = z; - else - mask &= ~MASK_TOP_LEFT; - } - - if (mask & MASK_TOP_RIGHT) { - uint z = (uint) zvals.f[1]; - if (z < ztile.t16[iy][ix+1]) - ztile.t16[iy][ix+1] = z; - else - mask &= ~MASK_TOP_RIGHT; - } - - if (mask & MASK_BOTTOM_LEFT) { - uint z = (uint) zvals.f[2]; - if (z < ztile.t16[iy+1][ix]) - ztile.t16[iy+1][ix] = z; - else - mask &= ~MASK_BOTTOM_LEFT; - } - - if (mask & MASK_BOTTOM_RIGHT) { - uint z = (uint) zvals.f[3]; - if (z < ztile.t16[iy+1][ix+1]) - ztile.t16[iy+1][ix+1] = z; - else - mask &= ~MASK_BOTTOM_RIGHT; - } + int ix = (x - setup.cliprect_minx) / 4; + int iy = (y - setup.cliprect_miny) / 2; + mask = spu_z16_test_less(zvals.v, &ztile.us8[iy][ix], x>>1, quadmask); } else { - zvals.v = spu_mul(zvals.v, zscale32.v); - ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z32_UNORM); - if (mask & MASK_TOP_LEFT) { - uint z = (uint) zvals.f[0]; - if (z < ztile.t32[iy][ix]) - ztile.t32[iy][ix] = z; - else - mask &= ~MASK_TOP_LEFT; - } - - if (mask & MASK_TOP_RIGHT) { - uint z = (uint) zvals.f[1]; - if (z < ztile.t32[iy][ix+1]) - ztile.t32[iy][ix+1] = z; - else - mask &= ~MASK_TOP_RIGHT; - } - - if (mask & MASK_BOTTOM_LEFT) { - uint z = (uint) zvals.f[2]; - if (z < ztile.t32[iy+1][ix]) - ztile.t32[iy+1][ix] = z; - else - mask &= ~MASK_BOTTOM_LEFT; - } - - if (mask & MASK_BOTTOM_RIGHT) { - uint z = (uint) zvals.f[3]; - if (z < ztile.t32[iy+1][ix+1]) - ztile.t32[iy+1][ix+1] = z; - else - mask &= ~MASK_BOTTOM_RIGHT; - } + int ix = (x - setup.cliprect_minx) / 2; + int iy = (y - setup.cliprect_miny) / 2; + mask = spu_z32_test_less(zvals.v, &ztile.ui4[iy][ix], quadmask); } - - if (mask) - cur_tile_status_z = TILE_STATUS_DIRTY; - return mask; } - - -static vector unsigned int -do_depth_test_simd(int x, int y, vector unsigned int quadmask) -{ - int ix = (x - setup.cliprect_minx) / 2; - int iy = (y - setup.cliprect_miny) / 2; - float4 zvals; - - vector unsigned int zmask; - - zvals.v = eval_z((float) x, (float) y); - - if (cur_tile_status_z == TILE_STATUS_CLEAR) { - /* now, _really_ clear the tile */ - clear_z_tile(&ztile); - } - else if (cur_tile_status_z != TILE_STATUS_DIRTY) { - /* make sure we've got the tile from main mem */ - wait_on_mask(1 << TAG_READ_TILE_Z); - } - cur_tile_status_z = TILE_STATUS_DIRTY; - - /* XXX fetch Z value sooner to hide latency here */ - zmask = spu_cmpgt(ztile.f4[ix][iy].v, zvals.v); - zmask = spu_and(zmask, quadmask); - - ztile.f4[ix][iy].v = spu_sel(ztile.f4[ix][iy].v, zvals.v, zmask); - //ztile.f4[ix][iy].v = spu_sel(zvals.v, ztile.f4[ix][iy].v, mask4); - - return zmask; -} - - /** * Emit a quad (pass to next stage). No clipping is done. */ @@ -461,36 +336,18 @@ emit_quad( int x, int y, mask_t mask ) } if (spu.depth_stencil.depth.enabled) { -#if SIMD_Z - mask = do_depth_test_simd(x, y, mask); -#else mask = do_depth_test(x, y, mask); -#endif } -#if !SIMD_Z - if (mask) -#endif - { - if (cur_tile_status_c == TILE_STATUS_CLEAR) { - /* now, _really_ clear the tile */ - clear_c_tile(&ctile); - } + /* If any bits in mask are set... */ + if (spu_extract(spu_orx(mask), 0)) { -#if 0 if (cur_tile_status_c == TILE_STATUS_CLEAR) { /* now, _really_ clear the tile */ clear_c_tile(&ctile); - cur_tile_status_c = TILE_STATUS_DIRTY; } - else if (cur_tile_status_c != TILE_STATUS_DIRTY) { - /* make sure we've got the tile from main mem */ - wait_on_mask(1 << TAG_READ_TILE_COLOR); - } -#endif cur_tile_status_c = TILE_STATUS_DIRTY; -#if SIMD_Z if (spu_extract(mask, 0)) ctile.t32[iy][ix] = colors[QUAD_TOP_LEFT]; if (spu_extract(mask, 1)) @@ -499,20 +356,11 @@ emit_quad( int x, int y, mask_t mask ) ctile.t32[iy+1][ix] = colors[QUAD_BOTTOM_LEFT]; if (spu_extract(mask, 3)) ctile.t32[iy+1][ix+1] = colors[QUAD_BOTTOM_RIGHT]; -#elif 0 + +#if 0 /* SIMD_Z with swizzled color buffer (someday) */ vector float icolors = *((vector float *) &colors); ctile.f4[iy/2][ix/2].v = spu_sel(ctile.f4[iy/2][ix/2].v, icolors, mask); - -#else - if (mask & MASK_TOP_LEFT) - ctile.t32[iy][ix] = colors[QUAD_TOP_LEFT]; - if (mask & MASK_TOP_RIGHT) - ctile.t32[iy][ix+1] = colors[QUAD_TOP_RIGHT]; - if (mask & MASK_BOTTOM_LEFT) - ctile.t32[iy+1][ix] = colors[QUAD_BOTTOM_LEFT]; - if (mask & MASK_BOTTOM_RIGHT) - ctile.t32[iy+1][ix+1] = colors[QUAD_BOTTOM_RIGHT]; #endif } @@ -533,38 +381,20 @@ static INLINE int block( int x ) /** * Compute mask which indicates which pixels in the 2x2 quad are actually inside * the triangle's bounds. - * - * this is pretty nasty... may need to rework flush_spans again to - * fix it, if possible. + * The mask is a uint4 vector and each element will be 0 or 0xffffffff. */ -static mask_t calculate_mask( int x ) +static INLINE mask_t calculate_mask( int x ) { -#if SIMD_Z - uint m0, m1, m2, m3; - - m0 = (x >= setup.span.left[0] && x < setup.span.right[0]) * ~0; - m1 = (x+1 >= setup.span.left[0] && x+1 < setup.span.right[0]) * ~0; - m2 = (x >= setup.span.left[1] && x < setup.span.right[1]) * ~0; - m3 = (x+1 >= setup.span.left[1] && x+1 < setup.span.right[1]) * ~0; - - return (vector unsigned int) {m0, m1, m2, m3}; -#else - unsigned mask = 0x0; - - if (x >= setup.span.left[0] && x < setup.span.right[0]) - mask |= MASK_TOP_LEFT; - - if (x >= setup.span.left[1] && x < setup.span.right[1]) - mask |= MASK_BOTTOM_LEFT; - - if (x+1 >= setup.span.left[0] && x+1 < setup.span.right[0]) - mask |= MASK_TOP_RIGHT; - - if (x+1 >= setup.span.left[1] && x+1 < setup.span.right[1]) - mask |= MASK_BOTTOM_RIGHT; - + /* This is a little tricky. + * Use & instead of && to avoid branches. + * Use negation to convert true/false to ~0/0 values. + */ + mask_t mask; + mask = spu_insert(-((x >= setup.span.left[0]) & (x < setup.span.right[0])), mask, 0); + mask = spu_insert(-((x+1 >= setup.span.left[0]) & (x+1 < setup.span.right[0])), mask, 1); + mask = spu_insert(-((x >= setup.span.left[1]) & (x < setup.span.right[1])), mask, 2); + mask = spu_insert(-((x+1 >= setup.span.left[1]) & (x+1 < setup.span.right[1])), mask, 3); return mask; -#endif } |