summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/cell/spu
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/cell/spu')
-rw-r--r--src/gallium/drivers/cell/spu/Makefile1
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.c25
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.h16
-rw-r--r--src/gallium/drivers/cell/spu/spu_per_fragment_op.c191
-rw-r--r--src/gallium/drivers/cell/spu/spu_per_fragment_op.h32
-rw-r--r--src/gallium/drivers/cell/spu/spu_render.c4
-rw-r--r--src/gallium/drivers/cell/spu/spu_tri.c23
-rw-r--r--src/gallium/drivers/cell/spu/spu_ztest.h135
8 files changed, 269 insertions, 158 deletions
diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile
index c071de1900..115ca8cd90 100644
--- a/src/gallium/drivers/cell/spu/Makefile
+++ b/src/gallium/drivers/cell/spu/Makefile
@@ -19,6 +19,7 @@ SOURCES = \
spu_main.c \
spu_blend.c \
spu_dcache.c \
+ spu_per_fragment_op.c \
spu_render.c \
spu_texture.c \
spu_tile.c \
diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c
index 59300028d4..8e46f6e471 100644
--- a/src/gallium/drivers/cell/spu/spu_main.c
+++ b/src/gallium/drivers/cell/spu/spu_main.c
@@ -58,6 +58,9 @@ struct spu_vs_context draw;
static unsigned char attribute_fetch_code_buffer[136 * PIPE_ATTRIB_MAX]
ALIGN16_ATTRIB;
+static unsigned char depth_stencil_code_buffer[4 * 64]
+ ALIGN16_ATTRIB;
+
/**
* Tell the PPU that this SPU has finished copying a buffer to
* local store and that it may be reused by the PPU.
@@ -248,14 +251,26 @@ cmd_state_blend(const struct pipe_blend_state *state)
static void
-cmd_state_depth_stencil(const struct pipe_depth_stencil_alpha_state *state)
+cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test *state)
{
if (Debug)
printf("SPU %u: DEPTH_STENCIL: ztest %d\n",
spu.init.id,
- state->depth.enabled);
+ state->read_depth);
+
+ ASSERT_ALIGN16(state->base);
+
+ mfc_get(depth_stencil_code_buffer,
+ (unsigned int) state->base, /* src */
+ ROUNDUP16(state->size),
+ TAG_BATCH_BUFFER,
+ 0, /* tid */
+ 0 /* rid */);
+ wait_on_mask(1 << TAG_BATCH_BUFFER);
- memcpy(&spu.depth_stencil, state, sizeof(*state));
+ spu.frag_test = (frag_test_func) depth_stencil_code_buffer;
+ spu.read_depth = state->read_depth;
+ spu.read_stencil = state->read_stencil;
}
@@ -415,9 +430,9 @@ cmd_batch(uint opcode)
pos += (1 + ROUNDUP8(sizeof(struct pipe_blend_state)) / 8);
break;
case CELL_CMD_STATE_DEPTH_STENCIL:
- cmd_state_depth_stencil((struct pipe_depth_stencil_alpha_state *)
+ cmd_state_depth_stencil((struct cell_command_depth_stencil_alpha_test *)
&buffer[pos+1]);
- pos += (1 + ROUNDUP8(sizeof(struct pipe_depth_stencil_alpha_state)) / 8);
+ pos += (1 + ROUNDUP8(sizeof(struct cell_command_depth_stencil_alpha_test)) / 8);
break;
case CELL_CMD_STATE_SAMPLER:
cmd_state_sampler((struct pipe_sampler_state *) &buffer[pos+1]);
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h
index a13edd1702..444e218645 100644
--- a/src/gallium/drivers/cell/spu/spu_main.h
+++ b/src/gallium/drivers/cell/spu/spu_main.h
@@ -56,6 +56,17 @@ typedef union {
#define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */
+struct spu_frag_test_results {
+ qword mask;
+ qword depth;
+ qword stencil;
+};
+
+typedef struct spu_frag_test_results (*frag_test_func)(qword frag_mask,
+ qword pixel_depth, qword pixel_stencil, qword frag_depth,
+ qword frag_alpha, qword facing);
+
+
struct spu_framebuffer {
void *color_start; /**< addr of color surface in main memory */
void *depth_start; /**< addr of depth surface in main memory */
@@ -79,8 +90,9 @@ struct spu_global
struct cell_init_info init;
struct spu_framebuffer fb;
- struct pipe_blend_state blend_stencil;
- struct pipe_depth_stencil_alpha_state depth_stencil;
+ boolean read_depth;
+ boolean read_stencil;
+ frag_test_func frag_test;
struct pipe_blend_state blend;
struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS];
struct cell_command_texture texture;
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
new file mode 100644
index 0000000000..d42b522b41
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
@@ -0,0 +1,191 @@
+/*
+ * (C) Copyright IBM Corporation 2008
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file spu_per_fragment_op.c
+ * SPU implementation various per-fragment operations.
+ *
+ * \author Ian Romanick <idr@us.ibm.com>
+ */
+
+#include "pipe/p_format.h"
+#include "spu_main.h"
+#include "spu_per_fragment_op.h"
+
+#define ZERO 0x80
+
+static void
+read_ds_quad(tile_t *buffer, unsigned x, unsigned y,
+ enum pipe_format depth_format, qword *depth,
+ qword *stencil)
+{
+ const int ix = x / 2;
+ const int iy = y / 2;
+
+ switch (depth_format) {
+ case PIPE_FORMAT_Z16_UNORM: {
+ qword *ptr = (qword *) &buffer->us8[iy][ix / 2];
+
+ const qword shuf_vec = (qword) {
+ ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3,
+ ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7
+ };
+
+
+ /* At even X values we want the first 4 shorts, and at odd X values we
+ * want the second 4 shorts.
+ */
+ qword bias = (qword) spu_splats((unsigned char) ((ix & 0x01) << 3));
+ qword bias_mask = si_fsmbi(0x3333);
+ qword sv = si_a(shuf_vec, si_and(bias_mask, bias));
+
+ *depth = si_shufb(*ptr, *ptr, sv);
+ *stencil = si_il(0);
+ break;
+ }
+
+
+ case PIPE_FORMAT_Z32_UNORM: {
+ qword *ptr = (qword *) &buffer->ui4[iy][ix];
+
+ *depth = *ptr;
+ *stencil = si_il(0);
+ break;
+ }
+
+
+ case PIPE_FORMAT_Z24S8_UNORM: {
+ qword *ptr = (qword *) &buffer->ui4[iy][ix];
+ qword mask = si_fsmbi(0x7777);
+
+ *depth = si_and(*ptr, mask);
+ *stencil = si_rotmai(si_andc(*ptr, mask), -24);
+ break;
+ }
+
+
+ default:
+ assert(0);
+ break;
+ }
+}
+
+
+static void
+write_ds_quad(tile_t *buffer, unsigned x, unsigned y,
+ enum pipe_format depth_format,
+ qword depth, qword stencil)
+{
+ const int ix = x / 2;
+ const int iy = y / 2;
+
+ (void) stencil;
+
+ switch (depth_format) {
+ case PIPE_FORMAT_Z16_UNORM: {
+ qword *ptr = (qword *) &buffer->us8[iy][ix / 2];
+
+ qword sv = ((ix & 0x01) == 0)
+ ? (qword) { 2, 3, 6, 7, 10, 11, 14, 15,
+ 24, 25, 26, 27, 28, 29, 30, 31 }
+ : (qword) { 16, 17, 18, 19, 20 , 21, 22, 23,
+ 2, 3, 6, 7, 10, 11, 14, 15 };
+ *ptr = si_shufb(depth, *ptr, sv);
+ break;
+ }
+
+
+ case PIPE_FORMAT_Z32_UNORM: {
+ qword *ptr = (qword *) &buffer->ui4[iy][ix];
+ *ptr = depth;
+ break;
+ }
+
+
+ case PIPE_FORMAT_Z24S8_UNORM: {
+ qword *ptr = (qword *) &buffer->ui4[iy][ix];
+ qword mask = si_fsmbi(0x7777);
+
+ stencil = si_rotmai(stencil, 24);
+ *ptr = si_selb(stencil, depth, mask);
+ break;
+ }
+
+
+ default:
+ assert(0);
+ break;
+ }
+}
+
+
+qword
+spu_do_depth_stencil(int x, int y,
+ qword frag_mask, qword frag_depth, qword frag_alpha,
+ qword facing)
+{
+ struct spu_frag_test_results result;
+ qword pixel_depth;
+ qword pixel_stencil;
+
+ /* All of this preable code (everthing before the call to frag_test) should
+ * be generated on the PPU and upload to the SPU.
+ */
+ if (spu.read_depth || spu.read_stencil) {
+ read_ds_quad(&spu.ztile, x, y, spu.fb.depth_format,
+ &pixel_depth, &pixel_stencil);
+ }
+
+ switch (spu.fb.depth_format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x0000ffffu)));
+ frag_depth = si_cfltu(frag_depth, 0);
+ break;
+ case PIPE_FORMAT_Z32_UNORM:
+ frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0xffffffffu)));
+ frag_depth = si_cfltu(frag_depth, 0);
+ break;
+ case PIPE_FORMAT_Z24S8_UNORM:
+ frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x00ffffffu)));
+ frag_depth = si_cfltu(frag_depth, 0);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ result = (*spu.frag_test)(frag_mask, pixel_depth, pixel_stencil,
+ frag_depth, frag_alpha, facing);
+
+
+ /* This code (everthing after the call to frag_test) should
+ * be generated on the PPU and upload to the SPU.
+ */
+ if (spu.read_depth || spu.read_stencil) {
+ write_ds_quad(&spu.ztile, x, y, spu.fb.depth_format,
+ result.depth, result.stencil);
+ }
+
+ return result.mask;
+}
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h
new file mode 100644
index 0000000000..6571258699
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h
@@ -0,0 +1,32 @@
+/*
+ * (C) Copyright IBM Corporation 2008
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SPU_PER_FRAGMENT_OP
+#define SPU_PER_FRAGMENT_OP
+
+extern qword
+spu_do_depth_stencil(int x, int y, qword frag_mask, qword frag_depth,
+ qword frag_alpha, qword facing);
+
+#endif /* SPU_PER_FRAGMENT_OP */
diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c
index 20e77aa2e6..6df59abd36 100644
--- a/src/gallium/drivers/cell/spu/spu_render.c
+++ b/src/gallium/drivers/cell/spu/spu_render.c
@@ -98,7 +98,7 @@ my_tile(uint tx, uint ty)
static INLINE void
get_cz_tiles(uint tx, uint ty)
{
- if (spu.depth_stencil.depth.enabled) {
+ if (spu.read_depth) {
if (spu.cur_ztile_status != TILE_STATUS_CLEAR) {
//printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty);
get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1);
@@ -153,7 +153,7 @@ static INLINE void
wait_put_cz_tiles(void)
{
wait_on_mask(1 << TAG_WRITE_TILE_COLOR);
- if (spu.depth_stencil.depth.enabled) {
+ if (spu.read_depth) {
wait_on_mask(1 << TAG_WRITE_TILE_Z);
}
}
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c
index be9624cf7d..81823f2463 100644
--- a/src/gallium/drivers/cell/spu/spu_tri.c
+++ b/src/gallium/drivers/cell/spu/spu_tri.c
@@ -38,8 +38,7 @@
#include "spu_texture.h"
#include "spu_tile.h"
#include "spu_tri.h"
-
-#include "spu_ztest.h"
+#include "spu_per_fragment_op.h"
/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
@@ -264,16 +263,12 @@ do_depth_test(int x, int y, mask_t quadmask)
zvals.v = eval_z((float) x, (float) y);
- if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) {
- int ix = (x - setup.cliprect_minx) / 4;
- int iy = (y - setup.cliprect_miny) / 2;
- mask = spu_z16_test_less(zvals.v, &spu.ztile.us8[iy][ix], x>>1, quadmask);
- }
- else {
- int ix = (x - setup.cliprect_minx) / 2;
- int iy = (y - setup.cliprect_miny) / 2;
- mask = spu_z32_test_less(zvals.v, &spu.ztile.ui4[iy][ix], quadmask);
- }
+ mask = (mask_t) spu_do_depth_stencil(x - setup.cliprect_minx,
+ y - setup.cliprect_miny,
+ (qword) quadmask,
+ (qword) zvals.v,
+ (qword) spu_splats((unsigned char) 0x0ffu),
+ (qword) spu_splats((unsigned int) 0x01u));
if (spu_extract(spu_orx(mask), 0))
spu.cur_ztile_status = TILE_STATUS_DIRTY;
@@ -299,7 +294,7 @@ emit_quad( int x, int y, mask_t mask )
sp->quad.first->run(sp->quad.first, &setup.quad);
#else
- if (spu.depth_stencil.depth.enabled) {
+ if (spu.read_depth) {
mask = do_depth_test(x, y, mask);
}
@@ -434,7 +429,7 @@ static void flush_spans( void )
}
ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED);
- if (spu.depth_stencil.depth.enabled) {
+ if (spu.read_depth) {
if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
/* wait for mfc_get() to complete */
//printf("SPU: %u: waiting for ztile\n", spu.init.id);
diff --git a/src/gallium/drivers/cell/spu/spu_ztest.h b/src/gallium/drivers/cell/spu/spu_ztest.h
deleted file mode 100644
index ce8ad00339..0000000000
--- a/src/gallium/drivers/cell/spu/spu_ztest.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/**
- * Zbuffer/depth test code.
- */
-
-
-#ifndef SPU_ZTEST_H
-#define SPU_ZTEST_H
-
-
-#ifdef __SPU__
-#include <spu_intrinsics.h>
-#endif
-
-
-
-/**
- * Perform Z testing for a 16-bit/value Z buffer.
- *
- * \param zvals vector of four fragment zvalues as floats
- * \param zbuf ptr to vector of ushort[8] zbuffer values. Note that this
- * contains the Z values for 2 quads, 8 pixels.
- * \param x x coordinate of quad (only lsbit is significant)
- * \param inMask indicates which fragments in the quad are alive
- * \return new mask indicating which fragments are alive after ztest
- */
-static INLINE vector unsigned int
-spu_z16_test_less(vector float zvals, vector unsigned short *zbuf,
- uint x, vector unsigned int inMask)
-{
-#define ZERO 0x80
- vector unsigned int zvals_ui4, zbuf_ui4, mask;
-
- /* convert floats to uints in [0, 65535] */
- zvals_ui4 = spu_convtu(zvals, 32); /* convert to [0, 2^32] */
- zvals_ui4 = spu_rlmask(zvals_ui4, -16); /* right shift 16 */
-
- /* XXX this conditional could be removed with a bit of work */
- if (x & 1) {
- /* convert zbuffer values from ushorts to uints */
- /* gather lower four ushorts */
- zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf,
- (vector unsigned int) *zbuf,
- ((vector unsigned char) {
- ZERO, ZERO, 8, 9, ZERO, ZERO, 10, 11,
- ZERO, ZERO, 12, 13, ZERO, ZERO, 14, 15}));
- /* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */
- mask = spu_cmpgt(zbuf_ui4, zvals_ui4);
- /* mask &= inMask */
- mask = spu_and(mask, inMask);
- /* zbuf = mask ? zval : zbuf */
- zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask);
- /* convert zbuffer values from uints back to ushorts, preserve lower 4 */
- *zbuf = (vector unsigned short)
- spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf,
- ((vector unsigned char) {
- 16, 17, 18, 19, 20, 21, 22, 23,
- 2, 3, 6, 7, 10, 11, 14, 15}));
- }
- else {
- /* convert zbuffer values from ushorts to uints */
- /* gather upper four ushorts */
- zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf,
- (vector unsigned int) *zbuf,
- ((vector unsigned char) {
- ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3,
- ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7}));
- /* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */
- mask = spu_cmpgt(zbuf_ui4, zvals_ui4);
- /* mask &= inMask */
- mask = spu_and(mask, inMask);
- /* zbuf = mask ? zval : zbuf */
- zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask);
- /* convert zbuffer values from uints back to ushorts, preserve upper 4 */
- *zbuf = (vector unsigned short)
- spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf,
- ((vector unsigned char) {
- 2, 3, 6, 7, 10, 11, 14, 15,
- 24, 25, 26, 27, 28, 29, 30, 31}));
- }
- return mask;
-#undef ZERO
-}
-
-
-/**
- * As above, but Zbuffer values as 32-bit uints
- */
-static INLINE vector unsigned int
-spu_z32_test_less(vector float zvals, vector unsigned int *zbuf_ptr,
- vector unsigned int inMask)
-{
- vector unsigned int zvals_ui4, mask, zbuf = *zbuf_ptr;
-
- /* convert floats to uints in [0, 0xffffffff] */
- zvals_ui4 = spu_convtu(zvals, 32);
- /* mask = (zbuf < zvals_ui4) ? ~0 : 0 */
- mask = spu_cmpgt(zbuf, zvals_ui4);
- /* mask &= inMask */
- mask = spu_and(mask, inMask);
- /* zbuf = mask ? zval : zbuf */
- *zbuf_ptr = spu_sel(zbuf, zvals_ui4, mask);
-
- return mask;
-}
-
-
-#endif /* SPU_ZTEST_H */