diff options
Diffstat (limited to 'src/gallium/drivers/cell/spu/spu_tri.c')
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_tri.c | 59 |
1 files changed, 40 insertions, 19 deletions
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 81823f2463..c4272d6e93 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -29,10 +29,10 @@ * Triangle rendering within a tile. */ +#include <transpose_matrix4x4.h> #include "pipe/p_compiler.h" #include "pipe/p_format.h" #include "pipe/p_util.h" -#include "spu_blend.h" #include "spu_colorpack.h" #include "spu_main.h" #include "spu_texture.h" @@ -261,6 +261,9 @@ do_depth_test(int x, int y, mask_t quadmask) float4 zvals; mask_t mask; + if (spu.fb.depth_format == PIPE_FORMAT_NONE) + return quadmask; + zvals.v = eval_z((float) x, (float) y); mask = (mask_t) spu_do_depth_stencil(x - setup.cliprect_minx, @@ -326,27 +329,45 @@ emit_quad( int x, int y, mask_t mask ) eval_coeff(1, (float) x, (float) y, colors); } -#if 1 - if (spu.blend.blend_enable) - blend_quad(ix % TILE_SIZE, iy % TILE_SIZE, colors); -#endif - if (spu_extract(mask, 0)) - spu.ctile.ui[iy][ix] = spu_pack_color_shuffle(colors[0], shuffle); - if (spu_extract(mask, 1)) - spu.ctile.ui[iy][ix+1] = spu_pack_color_shuffle(colors[1], shuffle); - if (spu_extract(mask, 2)) - spu.ctile.ui[iy+1][ix] = spu_pack_color_shuffle(colors[2], shuffle); - if (spu_extract(mask, 3)) - spu.ctile.ui[iy+1][ix+1] = spu_pack_color_shuffle(colors[3], shuffle); + /* Read the current framebuffer values. + * + * Ignore read_fb for now. In the future we can use this to avoid + * reading the framebuffer if read_fb is false and the fragment mask is + * all 0xffffffff. This is the common case, so it is probably worth + * the effort. We'll have to profile to determine whether or not the + * extra conditional branches hurt overall performance. + */ + vec_float4 aos_pix[4] = { + spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+0]), + spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+1]), + spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+0]), + spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+1]), + }; -#if 0 - /* SIMD_Z with swizzled color buffer (someday) */ - vector unsigned int uicolors = *((vector unsigned int *) &colors); - spu.ctile.ui4[iy/2][ix/2] = spu_sel(spu.ctile.ui4[iy/2][ix/2], uicolors, mask); -#endif - } + qword soa_pix[4]; + qword soa_frag[4]; + /* Convert pixel and fragment data from AoS to SoA format. + */ + _transpose_matrix4x4((vec_float4 *) soa_pix, aos_pix); + _transpose_matrix4x4((vec_float4 *) soa_frag, colors); + + const struct spu_blend_results result = + (*spu.blend)(soa_frag[0], soa_frag[1], soa_frag[2], soa_frag[3], + soa_pix[0], soa_pix[1], soa_pix[2], soa_pix[3], + (qword) mask); + + + /* Convert final pixel data from SoA to AoS format. + */ + _transpose_matrix4x4(aos_pix, (const vec_float4 *) &result); + + spu.ctile.ui[iy+0][ix+0] = spu_pack_color_shuffle(aos_pix[0], shuffle); + spu.ctile.ui[iy+0][ix+1] = spu_pack_color_shuffle(aos_pix[1], shuffle); + spu.ctile.ui[iy+1][ix+0] = spu_pack_color_shuffle(aos_pix[2], shuffle); + spu.ctile.ui[iy+1][ix+1] = spu_pack_color_shuffle(aos_pix[3], shuffle); + } #endif } |