summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/cell/spu
diff options
context:
space:
mode:
authorBrian Paul <brian.paul@tungstengraphics.com>2008-09-10 18:22:00 -0600
committerBrian Paul <brian.paul@tungstengraphics.com>2008-09-11 10:39:28 -0600
commit284ab5a6127f8b452acaa0e10ac1d9ebc87fac3e (patch)
tree9a443b1d850966178360b17d208e28a1d05481c0 /src/gallium/drivers/cell/spu
parentee582fd3a7a9ddbcb5595249201cf213a6c6f014 (diff)
cell: checkpoint commit of new per-fragment processing
Do code generation for alpha test, z test, stencil, blend, colormask and framebuffer/tile read/write as a single code block. Ian's previous blend/z/stencil test code is still there but mostly disabled and will be removed soon.
Diffstat (limited to 'src/gallium/drivers/cell/spu')
-rw-r--r--src/gallium/drivers/cell/spu/Makefile2
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.c53
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.h23
-rw-r--r--src/gallium/drivers/cell/spu/spu_per_fragment_op.c231
-rw-r--r--src/gallium/drivers/cell/spu/spu_per_fragment_op.h11
-rw-r--r--src/gallium/drivers/cell/spu/spu_tri.c30
6 files changed, 343 insertions, 7 deletions
diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile
index d49abb2e82..e285ae9fdb 100644
--- a/src/gallium/drivers/cell/spu/Makefile
+++ b/src/gallium/drivers/cell/spu/Makefile
@@ -43,7 +43,7 @@ INCLUDE_DIRS = \
$(SPU_CC) $(SPU_CFLAGS) -c $<
.c.s:
- $(SPU_CC) $(SPU_CFLAGS) -S $<
+ $(SPU_CC) $(SPU_CFLAGS) -O3 -S $<
# The .a file will be linked into the main/PPU executable
diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c
index c4236817a9..4e0ec15925 100644
--- a/src/gallium/drivers/cell/spu/spu_main.c
+++ b/src/gallium/drivers/cell/spu/spu_main.c
@@ -34,6 +34,7 @@
#include "spu_main.h"
#include "spu_render.h"
+#include "spu_per_fragment_op.h"
#include "spu_texture.h"
#include "spu_tile.h"
//#include "spu_test.h"
@@ -46,7 +47,7 @@
/*
helpful headers:
/usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h
-/opt/ibm/cell-sdk/prototype/sysroot/usr/include/libmisc.h
+/opt/cell/sdk/usr/include/libmisc.h
*/
boolean Debug = FALSE;
@@ -226,6 +227,24 @@ cmd_release_verts(const struct cell_command_release_verts *release)
}
+/**
+ * Process a CELL_CMD_STATE_FRAGMENT_OPS command.
+ * This involves installing new fragment ops SPU code.
+ * If this function is never called, we'll use a regular C fallback function
+ * for fragment processing.
+ */
+static void
+cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
+{
+ if (Debug)
+ printf("SPU %u: CMD_STATE_FRAGMENT_OPS\n", spu.init.id);
+ /* Copy SPU code from batch buffer to spu buffer */
+ memcpy(spu.fragment_ops.code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
+ /* Point function pointer at new code */
+ spu.fragment_ops.func = (spu_fragment_ops_func) spu.fragment_ops.code;
+}
+
+
static void
cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
{
@@ -257,6 +276,8 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
break;
case PIPE_FORMAT_Z24S8_UNORM:
case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
spu.fb.zsize = 4;
spu.fb.zscale = (float) 0x00ffffffu;
break;
@@ -282,6 +303,8 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
}
+#define NEW_FRAGMENT_FUNCTION 01
+
static void
cmd_state_blend(const struct cell_command_blend *state)
{
@@ -302,7 +325,9 @@ cmd_state_blend(const struct cell_command_blend *state)
wait_on_mask(1 << TAG_BATCH_BUFFER);
spu.blend = (blend_func) fb_blend_code_buffer;
spu.read_fb = state->read_fb;
- } else {
+ }
+ else
+ {
spu.read_fb = FALSE;
}
}
@@ -326,7 +351,9 @@ cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test *stat
0, /* tid */
0 /* rid */);
wait_on_mask(1 << TAG_BATCH_BUFFER);
- } else {
+ }
+ else
+ {
/* If there is no code, emit a return instruction.
*/
depth_stencil_code_buffer[0] = 0x35;
@@ -338,12 +365,14 @@ cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test *stat
spu.frag_test = (frag_test_func) depth_stencil_code_buffer;
spu.read_depth = state->read_depth;
spu.read_stencil = state->read_stencil;
+ spu.depth_stencil_alpha = state->state;
}
static void
cmd_state_logicop(const struct cell_command_logicop * code)
{
+#if !NEW_FRAGMENT_FUNCTION
mfc_get(logicop_code_buffer,
(unsigned int) code->base, /* src */
code->size,
@@ -353,6 +382,7 @@ cmd_state_logicop(const struct cell_command_logicop * code)
wait_on_mask(1 << TAG_BATCH_BUFFER);
spu.logicop = (logicop_func) logicop_code_buffer;
+#endif
}
@@ -455,7 +485,9 @@ cmd_finish(void)
/**
- * Execute a batch of commands
+ * Execute a batch of commands which was sent to us by the PPU.
+ * See the cell_emit_state.c code to see where the commands come from.
+ *
* The opcode param encodes the location of the buffer and its size.
*/
static void
@@ -519,6 +551,14 @@ cmd_batch(uint opcode)
pos += pos_incr;
}
break;
+ case CELL_CMD_STATE_FRAGMENT_OPS:
+ {
+ struct cell_command_fragment_ops *fops
+ = (struct cell_command_fragment_ops *) &buffer[pos];
+ cmd_state_fragment_ops(fops);
+ pos += sizeof(*fops) / 8;
+ }
+ break;
case CELL_CMD_RELEASE_VERTS:
{
struct cell_command_release_verts *release
@@ -680,6 +720,11 @@ one_time_init(void)
memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status));
memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status));
invalidate_tex_cache();
+
+ /* Install default/fallback fragment processing function.
+ * This will normally be overriden by a code-gen'd function.
+ */
+ spu.fragment_ops.func = spu_fallback_fragment_ops;
}
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h
index c2a53c9dcf..7ab34f5222 100644
--- a/src/gallium/drivers/cell/spu/spu_main.h
+++ b/src/gallium/drivers/cell/spu/spu_main.h
@@ -91,6 +91,24 @@ typedef struct spu_blend_results (*logicop_func)(
typedef vector float (*sample_texture_func)(uint unit, vector float texcoord);
+
+typedef void (*spu_fragment_ops_func)(uint x, uint y,
+ tile_t *colorTile,
+ tile_t *depthStencilTile,
+ vector float fragZ,
+ vector float fragRed,
+ vector float fragGreen,
+ vector float fragBlue,
+ vector float fragAlpha,
+ vector unsigned int mask);
+
+struct spu_fragment_ops
+{
+ uint code[SPU_MAX_FRAGMENT_OPS_INSTS];
+ spu_fragment_ops_func func; /**< Current fragment ops function */
+} ALIGN16_ATTRIB;
+
+
struct spu_framebuffer {
void *color_start; /**< addr of color surface in main memory */
void *depth_start; /**< addr of depth surface in main memory */
@@ -127,6 +145,9 @@ struct spu_global
struct cell_init_info init;
struct spu_framebuffer fb;
+
+ struct pipe_depth_stencil_alpha_state depth_stencil_alpha;
+
boolean read_depth;
boolean read_stencil;
frag_test_func frag_test; /**< Current depth/stencil test code */
@@ -142,6 +163,8 @@ struct spu_global
struct vertex_info vertex_info;
+ struct spu_fragment_ops fragment_ops;
+
/* XXX more state to come */
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
index 29dc07a2e8..ffc596aa62 100644
--- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
+++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
@@ -29,8 +29,11 @@
* \author Ian Romanick <idr@us.ibm.com>
*/
+
+#include <transpose_matrix4x4.h>
#include "pipe/p_format.h"
#include "spu_main.h"
+#include "spu_colorpack.h"
#include "spu_per_fragment_op.h"
#define ZERO 0x80
@@ -90,7 +93,8 @@ read_ds_quad(tile_t *tile, unsigned x, unsigned y,
break;
}
- case PIPE_FORMAT_S8Z24_UNORM: {
+ case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM: {
qword *ptr = (qword *) &tile->ui4[iy][ix];
*depth = si_and(*ptr, si_fsmbi(0x7777));
@@ -153,7 +157,8 @@ write_ds_quad(tile_t *buffer, unsigned x, unsigned y,
break;
}
- case PIPE_FORMAT_S8Z24_UNORM: {
+ case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM: {
qword *ptr = (qword *) &buffer->ui4[iy][ix];
/* form select mask = 0111,0111,0111,0111 */
qword mask = si_fsmbi(0x7777);
@@ -217,3 +222,225 @@ spu_do_depth_stencil(int x, int y,
return result.mask;
}
+
+
+
+
+/**
+ * Called by rasterizer for each quad after the shader has run. This
+ * is a fallback/debug function. In reality we'll use a generated
+ * function produced by the PPU. But this function is useful for
+ * debug/validation.
+ */
+void
+spu_fallback_fragment_ops(uint x, uint y,
+ tile_t *colorTile,
+ tile_t *depthStencilTile,
+ vector float fragZ,
+ vector float fragRed,
+ vector float fragGreen,
+ vector float fragBlue,
+ vector float fragAlpha,
+ vector unsigned int mask)
+{
+ vector float frag_soa[4], frag_aos[4];
+ unsigned int c0, c1, c2, c3;
+
+ /* do alpha test */
+ if (spu.depth_stencil_alpha.alpha.enabled) {
+ vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref);
+ vector unsigned int amask;
+
+ switch (spu.depth_stencil_alpha.alpha.func) {
+ case PIPE_FUNC_LESS:
+ amask = spu_cmpgt(ref, fragAlpha); /* mask = (fragAlpha < ref) */
+ break;
+ case PIPE_FUNC_GREATER:
+ amask = spu_cmpgt(fragAlpha, ref); /* mask = (fragAlpha > ref) */
+ break;
+ case PIPE_FUNC_GEQUAL:
+ amask = spu_cmpgt(ref, fragAlpha);
+ amask = spu_nor(amask, amask);
+ break;
+ case PIPE_FUNC_LEQUAL:
+ amask = spu_cmpgt(fragAlpha, ref);
+ amask = spu_nor(amask, amask);
+ break;
+ case PIPE_FUNC_EQUAL:
+ amask = spu_cmpeq(ref, fragAlpha);
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ amask = spu_cmpeq(ref, fragAlpha);
+ amask = spu_nor(amask, amask);
+ break;
+ case PIPE_FUNC_ALWAYS:
+ amask = spu_splats(0xffffffffU);
+ break;
+ case PIPE_FUNC_NEVER:
+ amask = spu_splats( 0x0U);
+ break;
+ default:
+ ;
+ }
+
+ mask = spu_and(mask, amask);
+ }
+
+ /* Z and/or stencil testing... */
+ if (spu.depth_stencil_alpha.depth.enabled ||
+ spu.depth_stencil_alpha.stencil[0].enabled) {
+
+ /* get four Z/Stencil values from tile */
+ vector unsigned int mask24 = spu_splats((unsigned int)0x00ffffffU);
+ vector unsigned int ifbZS = depthStencilTile->ui4[y/2][x/2];
+ vector unsigned int ifbZ = spu_and(ifbZS, mask24);
+ vector unsigned int ifbS = spu_andc(ifbZS, mask24);
+
+ if (spu.depth_stencil_alpha.stencil[0].enabled) {
+ /* do stencil test */
+ ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM);
+
+ }
+ else if (spu.depth_stencil_alpha.depth.enabled) {
+ /* do depth test */
+
+ ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM ||
+ spu.fb.depth_format == PIPE_FORMAT_X8Z24_UNORM);
+
+ vector unsigned int ifragZ;
+ vector unsigned int zmask;
+
+ /* convert four fragZ from float to uint */
+ fragZ = spu_mul(fragZ, spu_splats((float) 0xffffff));
+ ifragZ = spu_convtu(fragZ, 0);
+
+ /* do depth comparison, setting zmask with results */
+ switch (spu.depth_stencil_alpha.depth.func) {
+ case PIPE_FUNC_LESS:
+ zmask = spu_cmpgt(ifbZ, ifragZ); /* mask = (ifragZ < ifbZ) */
+ break;
+ case PIPE_FUNC_GREATER:
+ zmask = spu_cmpgt(ifragZ, ifbZ); /* mask = (ifbZ > ifragZ) */
+ break;
+ case PIPE_FUNC_GEQUAL:
+ zmask = spu_cmpgt(ifbZ, ifragZ);
+ zmask = spu_nor(zmask, zmask);
+ break;
+ case PIPE_FUNC_LEQUAL:
+ zmask = spu_cmpgt(ifragZ, ifbZ);
+ zmask = spu_nor(zmask, zmask);
+ break;
+ case PIPE_FUNC_EQUAL:
+ zmask = spu_cmpeq(ifbZ, ifragZ);
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ zmask = spu_cmpeq(ifbZ, ifragZ);
+ zmask = spu_nor(zmask, zmask);
+ break;
+ case PIPE_FUNC_ALWAYS:
+ zmask = spu_splats(0xffffffffU);
+ break;
+ case PIPE_FUNC_NEVER:
+ zmask = spu_splats( 0x0U);
+ break;
+ default:
+ ;
+ }
+
+ mask = spu_and(mask, zmask);
+
+ /* merge framebuffer Z and fragment Z according to the mask */
+ ifbZ = spu_or(spu_and(ifragZ, mask),
+ spu_andc(ifbZ, mask));
+ }
+
+ if (spu_extract(spu_orx(mask), 0)) {
+ /* put new fragment Z/Stencil values back into Z/Stencil tile */
+ depthStencilTile->ui4[y/2][x/2] = spu_or(ifbZ, ifbS);
+
+ spu.cur_ztile_status = TILE_STATUS_DIRTY;
+ }
+ }
+
+ /* XXX do blending here */
+
+ /* XXX do colormask test here */
+
+
+ if (spu_extract(spu_orx(mask), 0)) {
+ spu.cur_ctile_status = TILE_STATUS_DIRTY;
+ }
+ else {
+ return;
+ }
+
+ /* convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA */
+#if 0
+ {
+ vector float frag_soa[4];
+ frag_soa[0] = fragRed;
+ frag_soa[1] = fragGreen;
+ frag_soa[2] = fragBlue;
+ frag_soa[3] = fragAlpha;
+ _transpose_matrix4x4(frag_aos, frag_soa);
+ }
+#else
+ /* short-cut relying on function parameter layout: */
+ _transpose_matrix4x4(frag_aos, &fragRed);
+ (void) fragGreen;
+ (void) fragBlue;
+#endif
+
+ switch (spu.fb.color_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ c0 = spu_pack_A8R8G8B8(frag_aos[0]);
+ c1 = spu_pack_A8R8G8B8(frag_aos[1]);
+ c2 = spu_pack_A8R8G8B8(frag_aos[2]);
+ c3 = spu_pack_A8R8G8B8(frag_aos[3]);
+ break;
+
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ c0 = spu_pack_B8G8R8A8(frag_aos[0]);
+ c1 = spu_pack_B8G8R8A8(frag_aos[1]);
+ c2 = spu_pack_B8G8R8A8(frag_aos[2]);
+ c3 = spu_pack_B8G8R8A8(frag_aos[3]);
+ break;
+ default:
+ fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n");
+ ASSERT(0);
+ }
+
+#if 0
+ /*
+ * Quad layout:
+ * +--+--+
+ * |p0|p1|
+ * +--+--+
+ * |p2|p3|
+ * +--+--+
+ */
+ if (spu_extract(mask, 0))
+ colorTile->ui[y+0][x+0] = c0;
+ if (spu_extract(mask, 1))
+ colorTile->ui[y+0][x+1] = c1;
+ if (spu_extract(mask, 2))
+ colorTile->ui[y+1][x+0] = c2;
+ if (spu_extract(mask, 3))
+ colorTile->ui[y+1][x+1] = c3;
+#else
+ /*
+ * Quad layout:
+ * +--+--+--+--+
+ * |p0|p1|p2|p3|
+ * +--+--+--+--+
+ */
+ if (spu_extract(mask, 0))
+ colorTile->ui[y][x*2] = c0;
+ if (spu_extract(mask, 1))
+ colorTile->ui[y][x*2+1] = c1;
+ if (spu_extract(mask, 2))
+ colorTile->ui[y][x*2+2] = c2;
+ if (spu_extract(mask, 3))
+ colorTile->ui[y][x*2+3] = c3;
+#endif
+}
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h
index 6571258699..ffadf0661c 100644
--- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h
+++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h
@@ -29,4 +29,15 @@ extern qword
spu_do_depth_stencil(int x, int y, qword frag_mask, qword frag_depth,
qword frag_alpha, qword facing);
+extern void
+spu_fallback_fragment_ops(uint x, uint y,
+ tile_t *colorTile,
+ tile_t *depthStencilTile,
+ vector float fragZ,
+ vector float fragRed,
+ vector float fragGreen,
+ vector float fragBlue,
+ vector float fragAlpha,
+ vector unsigned int mask);
+
#endif /* SPU_PER_FRAGMENT_OP */
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c
index a3ea0a3e69..71ef6ca24f 100644
--- a/src/gallium/drivers/cell/spu/spu_tri.c
+++ b/src/gallium/drivers/cell/spu/spu_tri.c
@@ -297,9 +297,12 @@ emit_quad( int x, int y, mask_t mask )
sp->quad.first->run(sp->quad.first, &setup.quad);
#else
+#define NEW_FRAGMENT_FUNCTION 01
+#if !NEW_FRAGMENT_FUNCTION
if (spu.read_depth) {
mask = do_depth_test(x, y, mask);
}
+#endif
/* If any bits in mask are set... */
if (spu_extract(spu_orx(mask), 0)) {
@@ -308,6 +311,7 @@ emit_quad( int x, int y, mask_t mask )
vector float colors[4];
spu.cur_ctile_status = TILE_STATUS_DIRTY;
+ spu.cur_ztile_status = TILE_STATUS_DIRTY;
if (spu.texture[0].start) {
/* texture mapping */
@@ -355,6 +359,29 @@ emit_quad( int x, int y, mask_t mask )
}
+#if NEW_FRAGMENT_FUNCTION
+ {
+ /* Convert fragment data from AoS to SoA format.
+ * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA)
+ * This is temporary!
+ */
+ vector float soa_frag[4];
+ _transpose_matrix4x4(soa_frag, colors);
+
+ float4 fragZ;
+
+ fragZ.v = eval_z((float) x, (float) y);
+
+ /* Do all per-fragment/quad operations here, including:
+ * alpha test, z test, stencil test, blend and framebuffer writing.
+ */
+ spu.fragment_ops.func(ix, iy, &spu.ctile, &spu.ztile,
+ fragZ.v,
+ soa_frag[0], soa_frag[1],
+ soa_frag[2], soa_frag[3],
+ mask);
+ }
+#else
/* Convert fragment data from AoS to SoA format.
* I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA)
*/
@@ -405,6 +432,9 @@ emit_quad( int x, int y, mask_t mask )
spu.ctile.ui[iy+0][ix+1] = spu_extract((vec_uint4) result.g, 0);
spu.ctile.ui[iy+1][ix+0] = spu_extract((vec_uint4) result.b, 0);
spu.ctile.ui[iy+1][ix+1] = spu_extract((vec_uint4) result.a, 0);
+
+#endif /* NEW_FRAGMENT_FUNCTION */
+
}
#endif
}