summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/cell/spu
diff options
context:
space:
mode:
authorBen Skeggs <skeggsb@gmail.com>2008-09-12 20:33:59 +1000
committerBen Skeggs <skeggsb@gmail.com>2008-09-12 20:33:59 +1000
commit81335d0f1760fe172a106f79e81281c1f0d7dedf (patch)
treef081924b0dad0fdfb1fa94580d63a88498469909 /src/gallium/drivers/cell/spu
parentf302fca5eb63e4bca8af5b35c585451486143e6a (diff)
parentaa66f08a21b791f338b519f0c2162cd8f7b3aeb0 (diff)
Merge remote branch 'upstream/gallium-0.1' into nouveau-gallium-0.1
Diffstat (limited to 'src/gallium/drivers/cell/spu')
-rw-r--r--src/gallium/drivers/cell/spu/Makefile7
-rw-r--r--src/gallium/drivers/cell/spu/spu_colorpack.h6
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.c225
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.h97
-rw-r--r--src/gallium/drivers/cell/spu/spu_per_fragment_op.c602
-rw-r--r--src/gallium/drivers/cell/spu/spu_per_fragment_op.h58
-rw-r--r--src/gallium/drivers/cell/spu/spu_texture.c2
-rw-r--r--src/gallium/drivers/cell/spu/spu_tri.c134
8 files changed, 683 insertions, 448 deletions
diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile
index d49abb2e82..1ae0dfb8c1 100644
--- a/src/gallium/drivers/cell/spu/Makefile
+++ b/src/gallium/drivers/cell/spu/Makefile
@@ -22,12 +22,15 @@ SOURCES = \
spu_render.c \
spu_texture.c \
spu_tile.c \
- spu_tri.c \
+ spu_tri.c
+
+OLD_SOURCES = \
spu_exec.c \
spu_util.c \
spu_vertex_fetch.c \
spu_vertex_shader.c
+
SPU_OBJECTS = $(SOURCES:.c=.o) \
SPU_ASM_OUT = $(SOURCES:.c=.s) \
@@ -43,7 +46,7 @@ INCLUDE_DIRS = \
$(SPU_CC) $(SPU_CFLAGS) -c $<
.c.s:
- $(SPU_CC) $(SPU_CFLAGS) -S $<
+ $(SPU_CC) $(SPU_CFLAGS) -O3 -S $<
# The .a file will be linked into the main/PPU executable
diff --git a/src/gallium/drivers/cell/spu/spu_colorpack.h b/src/gallium/drivers/cell/spu/spu_colorpack.h
index e9fee8a3a6..fd8dc6ded3 100644
--- a/src/gallium/drivers/cell/spu/spu_colorpack.h
+++ b/src/gallium/drivers/cell/spu/spu_colorpack.h
@@ -79,14 +79,14 @@ spu_pack_color_shuffle(vector float rgba, vector unsigned char shuffle)
static INLINE vector float
-spu_unpack_color(uint color)
+spu_unpack_B8G8R8A8(uint color)
{
vector unsigned int color_u4 = spu_splats(color);
color_u4 = spu_shuffle(color_u4, color_u4,
((vector unsigned char) {
- 0, 0, 0, 0,
- 5, 5, 5, 5,
10, 10, 10, 10,
+ 5, 5, 5, 5,
+ 0, 0, 0, 0,
15, 15, 15, 15}) );
return spu_convtf(color_u4, 32);
}
diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c
index d223f32d94..78260c4259 100644
--- a/src/gallium/drivers/cell/spu/spu_main.c
+++ b/src/gallium/drivers/cell/spu/spu_main.c
@@ -34,6 +34,7 @@
#include "spu_main.h"
#include "spu_render.h"
+#include "spu_per_fragment_op.h"
#include "spu_texture.h"
#include "spu_tile.h"
//#include "spu_test.h"
@@ -46,7 +47,7 @@
/*
helpful headers:
/usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h
-/opt/ibm/cell-sdk/prototype/sysroot/usr/include/libmisc.h
+/opt/cell/sdk/usr/include/libmisc.h
*/
boolean Debug = FALSE;
@@ -62,14 +63,6 @@ struct spu_vs_context draw;
static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]
ALIGN16_ATTRIB;
-static unsigned char depth_stencil_code_buffer[4 * 64]
- ALIGN16_ATTRIB;
-
-static unsigned char fb_blend_code_buffer[4 * 64]
- ALIGN16_ATTRIB;
-
-static unsigned char logicop_code_buffer[4 * 64]
- ALIGN16_ATTRIB;
/**
@@ -226,6 +219,46 @@ cmd_release_verts(const struct cell_command_release_verts *release)
}
+/**
+ * Process a CELL_CMD_STATE_FRAGMENT_OPS command.
+ * This involves installing new fragment ops SPU code.
+ * If this function is never called, we'll use a regular C fallback function
+ * for fragment processing.
+ */
+static void
+cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
+{
+ if (Debug)
+ printf("SPU %u: CMD_STATE_FRAGMENT_OPS\n", spu.init.id);
+ /* Copy SPU code from batch buffer to spu buffer */
+ memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
+ /* Copy state info (for fallback case only) */
+ memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
+ memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
+
+ /* Point function pointer at new code */
+ spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code;
+
+ spu.read_depth = spu.depth_stencil_alpha.depth.enabled;
+ spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled;
+}
+
+
+static void
+cmd_state_fragment_program(const struct cell_command_fragment_program *fp)
+{
+ if (Debug)
+ printf("SPU %u: CMD_STATE_FRAGMENT_PROGRAM\n", spu.init.id);
+ /* Copy SPU code from batch buffer to spu buffer */
+ memcpy(spu.fragment_program_code, fp->code,
+ SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4);
+#if 01
+ /* Point function pointer at new code */
+ spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code;
+#endif
+}
+
+
static void
cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
{
@@ -252,102 +285,24 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
switch (spu.fb.depth_format) {
case PIPE_FORMAT_Z32_UNORM:
+ spu.fb.zsize = 4;
+ spu.fb.zscale = (float) 0xffffffffu;
+ break;
case PIPE_FORMAT_Z24S8_UNORM:
case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
spu.fb.zsize = 4;
+ spu.fb.zscale = (float) 0x00ffffffu;
break;
case PIPE_FORMAT_Z16_UNORM:
spu.fb.zsize = 2;
+ spu.fb.zscale = (float) 0xffffu;
break;
default:
spu.fb.zsize = 0;
break;
}
-
- if (spu.fb.color_format == PIPE_FORMAT_A8R8G8B8_UNORM)
- spu.color_shuffle = ((vector unsigned char) {
- 12, 0, 4, 8, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0});
- else if (spu.fb.color_format == PIPE_FORMAT_B8G8R8A8_UNORM)
- spu.color_shuffle = ((vector unsigned char) {
- 8, 4, 0, 12, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0});
- else
- ASSERT(0);
-}
-
-
-static void
-cmd_state_blend(const struct cell_command_blend *state)
-{
- if (Debug)
- printf("SPU %u: BLEND: enabled %d\n",
- spu.init.id,
- (state->size != 0));
-
- ASSERT_ALIGN16(state->base);
-
- if (state->size != 0) {
- mfc_get(fb_blend_code_buffer,
- (unsigned int) state->base, /* src */
- ROUNDUP16(state->size),
- TAG_BATCH_BUFFER,
- 0, /* tid */
- 0 /* rid */);
- wait_on_mask(1 << TAG_BATCH_BUFFER);
- spu.blend = (blend_func) fb_blend_code_buffer;
- spu.read_fb = state->read_fb;
- } else {
- spu.read_fb = FALSE;
- }
-}
-
-
-static void
-cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test *state)
-{
- if (Debug)
- printf("SPU %u: DEPTH_STENCIL: ztest %d\n",
- spu.init.id,
- state->read_depth);
-
- ASSERT_ALIGN16(state->base);
-
- if (state->size != 0) {
- mfc_get(depth_stencil_code_buffer,
- (unsigned int) state->base, /* src */
- ROUNDUP16(state->size),
- TAG_BATCH_BUFFER,
- 0, /* tid */
- 0 /* rid */);
- wait_on_mask(1 << TAG_BATCH_BUFFER);
- } else {
- /* If there is no code, emit a return instruction.
- */
- depth_stencil_code_buffer[0] = 0x35;
- depth_stencil_code_buffer[1] = 0x00;
- depth_stencil_code_buffer[2] = 0x00;
- depth_stencil_code_buffer[3] = 0x00;
- }
-
- spu.frag_test = (frag_test_func) depth_stencil_code_buffer;
- spu.read_depth = state->read_depth;
- spu.read_stencil = state->read_stencil;
-}
-
-
-static void
-cmd_state_logicop(const struct cell_command_logicop * code)
-{
- mfc_get(logicop_code_buffer,
- (unsigned int) code->base, /* src */
- code->size,
- TAG_BATCH_BUFFER,
- 0, /* tid */
- 0 /* rid */);
- wait_on_mask(1 << TAG_BATCH_BUFFER);
-
- spu.logicop = (logicop_func) logicop_code_buffer;
}
@@ -450,7 +405,9 @@ cmd_finish(void)
/**
- * Execute a batch of commands
+ * Execute a batch of commands which was sent to us by the PPU.
+ * See the cell_emit_state.c code to see where the commands come from.
+ *
* The opcode param encodes the location of the buffer and its size.
*/
static void
@@ -487,16 +444,14 @@ cmd_batch(uint opcode)
printf("SPU %u: release batch buf %u\n", spu.init.id, buf);
release_buffer(buf);
+ /*
+ * Loop over commands in the batch buffer
+ */
for (pos = 0; pos < usize; /* no incr */) {
switch (buffer[pos]) {
- case CELL_CMD_STATE_FRAMEBUFFER:
- {
- struct cell_command_framebuffer *fb
- = (struct cell_command_framebuffer *) &buffer[pos];
- cmd_state_framebuffer(fb);
- pos += sizeof(*fb) / 8;
- }
- break;
+ /*
+ * rendering commands
+ */
case CELL_CMD_CLEAR_SURFACE:
{
struct cell_command_clear_surface *clr
@@ -514,26 +469,32 @@ cmd_batch(uint opcode)
pos += pos_incr;
}
break;
- case CELL_CMD_RELEASE_VERTS:
+ /*
+ * state-update commands
+ */
+ case CELL_CMD_STATE_FRAMEBUFFER:
{
- struct cell_command_release_verts *release
- = (struct cell_command_release_verts *) &buffer[pos];
- cmd_release_verts(release);
- pos += sizeof(*release) / 8;
+ struct cell_command_framebuffer *fb
+ = (struct cell_command_framebuffer *) &buffer[pos];
+ cmd_state_framebuffer(fb);
+ pos += sizeof(*fb) / 8;
}
break;
- case CELL_CMD_FINISH:
- cmd_finish();
- pos += 1;
- break;
- case CELL_CMD_STATE_BLEND:
- cmd_state_blend((struct cell_command_blend *) &buffer[pos+1]);
- pos += (1 + ROUNDUP8(sizeof(struct cell_command_blend)) / 8);
+ case CELL_CMD_STATE_FRAGMENT_OPS:
+ {
+ struct cell_command_fragment_ops *fops
+ = (struct cell_command_fragment_ops *) &buffer[pos];
+ cmd_state_fragment_ops(fops);
+ pos += sizeof(*fops) / 8;
+ }
break;
- case CELL_CMD_STATE_DEPTH_STENCIL:
- cmd_state_depth_stencil((struct cell_command_depth_stencil_alpha_test *)
- &buffer[pos+1]);
- pos += (1 + ROUNDUP8(sizeof(struct cell_command_depth_stencil_alpha_test)) / 8);
+ case CELL_CMD_STATE_FRAGMENT_PROGRAM:
+ {
+ struct cell_command_fragment_program *fp
+ = (struct cell_command_fragment_program *) &buffer[pos];
+ cmd_state_fragment_program(fp);
+ pos += sizeof(*fp) / 8;
+ }
break;
case CELL_CMD_STATE_SAMPLER:
{
@@ -569,8 +530,10 @@ cmd_batch(uint opcode)
pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8);
break;
case CELL_CMD_STATE_BIND_VS:
+#if 0
spu_bind_vertex_shader(&draw,
(struct cell_shader_info *) &buffer[pos+1]);
+#endif
pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8);
break;
case CELL_CMD_STATE_ATTRIB_FETCH:
@@ -578,9 +541,20 @@ cmd_batch(uint opcode)
&buffer[pos+1]);
pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8);
break;
- case CELL_CMD_STATE_LOGICOP:
- cmd_state_logicop((struct cell_command_logicop *) &buffer[pos+1]);
- pos += (1 + ROUNDUP8(sizeof(struct cell_command_logicop)) / 8);
+ /*
+ * misc commands
+ */
+ case CELL_CMD_FINISH:
+ cmd_finish();
+ pos += 1;
+ break;
+ case CELL_CMD_RELEASE_VERTS:
+ {
+ struct cell_command_release_verts *release
+ = (struct cell_command_release_verts *) &buffer[pos];
+ cmd_release_verts(release);
+ pos += sizeof(*release) / 8;
+ }
break;
case CELL_CMD_FLUSH_BUFFER_RANGE: {
struct cell_buffer_range *br = (struct cell_buffer_range *)
@@ -650,7 +624,9 @@ main_loop(void)
exitFlag = 1;
break;
case CELL_CMD_VS_EXECUTE:
+#if 0
spu_execute_vertex_shader(&draw, &cmd.vs);
+#endif
break;
case CELL_CMD_BATCH:
cmd_batch(opcode);
@@ -675,6 +651,11 @@ one_time_init(void)
memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status));
memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status));
invalidate_tex_cache();
+
+ /* Install default/fallback fragment processing function.
+ * This will normally be overriden by a code-gen'd function.
+ */
+ spu.fragment_ops = spu_fallback_fragment_ops;
}
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h
index 4879f8c9c8..2c7b625840 100644
--- a/src/gallium/drivers/cell/spu/spu_main.h
+++ b/src/gallium/drivers/cell/spu/spu_main.h
@@ -41,6 +41,10 @@
#define MAX_HEIGHT 1024
+/**
+ * A tile is basically a TILE_SIZE x TILE_SIZE block of 4-byte pixels.
+ * The data may be addressed through several different types.
+ */
typedef union {
ushort us[TILE_SIZE][TILE_SIZE];
uint ui[TILE_SIZE][TILE_SIZE];
@@ -56,38 +60,29 @@ typedef union {
#define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */
-struct spu_frag_test_results {
- qword mask;
- qword depth;
- qword stencil;
-};
-
-typedef struct spu_frag_test_results (*frag_test_func)(qword frag_mask,
- qword pixel_depth, qword pixel_stencil, qword frag_depth,
- qword frag_alpha, qword facing);
-
-
-struct spu_blend_results {
- qword r;
- qword g;
- qword b;
- qword a;
-};
+/** Function for sampling textures */
+typedef vector float (*spu_sample_texture_func)(uint unit,
+ vector float texcoord);
-typedef struct spu_blend_results (*blend_func)(
- qword frag_r, qword frag_g, qword frag_b, qword frag_a,
- qword pixel_r, qword pixel_g, qword pixel_b, qword pixel_a,
- qword const_r, qword const_g, qword const_b, qword const_a);
+/** Function for performing per-fragment ops */
+typedef void (*spu_fragment_ops_func)(uint x, uint y,
+ tile_t *colorTile,
+ tile_t *depthStencilTile,
+ vector float fragZ,
+ vector float fragRed,
+ vector float fragGreen,
+ vector float fragBlue,
+ vector float fragAlpha,
+ vector unsigned int mask);
-typedef struct spu_blend_results (*logicop_func)(
- qword pixel_r, qword pixel_g, qword pixel_b, qword pixel_a,
- qword frag_r, qword frag_g, qword frag_b, qword frag_a,
- qword frag_mask);
+/** Function for running fragment program */
+typedef void (*spu_fragment_program_func)(vector float *inputs,
+ vector float *outputs,
+ vector float *constants);
-typedef vector float (*sample_texture_func)(uint unit, vector float texcoord);
-
-struct spu_framebuffer {
+struct spu_framebuffer
+{
void *color_start; /**< addr of color surface in main memory */
void *depth_start; /**< addr of depth surface in main memory */
enum pipe_format color_format;
@@ -99,6 +94,7 @@ struct spu_framebuffer {
uint depth_clear_value;
uint zsize; /**< 0, 2 or 4 bytes per Z */
+ float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */
} ALIGN16_ATTRIB;
@@ -115,35 +111,31 @@ struct spu_texture
/**
- * All SPU global/context state will be in singleton object of this type:
+ * All SPU global/context state will be in a singleton object of this type:
*/
struct spu_global
{
+ /** One-time init/constant info */
struct cell_init_info init;
+ /*
+ * Current state
+ */
struct spu_framebuffer fb;
- boolean read_depth;
- boolean read_stencil;
- frag_test_func frag_test; /**< Current depth/stencil test code */
-
- boolean read_fb; /**< Does current blend mode require framebuffer read? */
- blend_func blend; /**< Current blend code */
- qword const_blend_color[4] ALIGN16_ATTRIB;
-
- logicop_func logicop; /**< Current logicop code **/
-
+ struct pipe_depth_stencil_alpha_state depth_stencil_alpha;
+ struct pipe_blend_state blend;
struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS];
struct spu_texture texture[PIPE_MAX_SAMPLERS];
-
struct vertex_info vertex_info;
- /* XXX more state to come */
-
-
- /** current color and Z tiles */
+ /** Current color and Z tiles */
tile_t ctile ALIGN16_ATTRIB;
tile_t ztile ALIGN16_ATTRIB;
+ /** Read depth/stencil tiles? */
+ boolean read_depth;
+ boolean read_stencil;
+
/** Current tiles' status */
ubyte cur_ctile_status, cur_ztile_status;
@@ -151,11 +143,22 @@ struct spu_global
ubyte ctile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
ubyte ztile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
+ /** Current fragment ops machine code */
+ uint fragment_ops_code[SPU_MAX_FRAGMENT_OPS_INSTS];
+ /** Current fragment ops function */
+ spu_fragment_ops_func fragment_ops;
+
+ /** Current fragment program machine code */
+ uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS];
+ /** Current fragment ops function */
+ spu_fragment_program_func fragment_program;
- /** for converting RGBA to PIPE_FORMAT_x colors */
- vector unsigned char color_shuffle;
+ /** Current texture sampler function */
+ spu_sample_texture_func sample_texture[CELL_MAX_SAMPLERS];
- sample_texture_func sample_texture[CELL_MAX_SAMPLERS];
+ /** Fragment program constants (XXX preliminary/used) */
+#define MAX_CONSTANTS 32
+ vector float constants[MAX_CONSTANTS];
} ALIGN16_ATTRIB;
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
index c0a729b3d2..03dd547845 100644
--- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
+++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
@@ -1,211 +1,475 @@
-/*
- * (C) Copyright IBM Corporation 2008
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
/**
- * \file spu_per_fragment_op.c
- * SPU implementation various per-fragment operations.
- *
- * \author Ian Romanick <idr@us.ibm.com>
+ * \author Brian Paul
*/
+
+#include <transpose_matrix4x4.h>
#include "pipe/p_format.h"
#include "spu_main.h"
+#include "spu_colorpack.h"
#include "spu_per_fragment_op.h"
-#define ZERO 0x80
-static void
-read_ds_quad(tile_t *buffer, unsigned x, unsigned y,
- enum pipe_format depth_format, qword *depth,
- qword *stencil)
-{
- const int ix = x / 2;
- const int iy = y / 2;
+#define LINEAR_QUAD_LAYOUT 1
- switch (depth_format) {
- case PIPE_FORMAT_Z16_UNORM: {
- qword *ptr = (qword *) &buffer->us8[iy][ix / 2];
- const qword shuf_vec = (qword) {
- ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3,
- ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7
- };
+/**
+ * Called by rasterizer for each quad after the shader has run. Do
+ * all the per-fragment operations including alpha test, z test,
+ * stencil test, blend, colormask and logicops. This is a
+ * fallback/debug function. In reality we'll use a generated function
+ * produced by the PPU. But this function is useful for
+ * debug/validation.
+ */
+void
+spu_fallback_fragment_ops(uint x, uint y,
+ tile_t *colorTile,
+ tile_t *depthStencilTile,
+ vector float fragZ,
+ vector float fragR,
+ vector float fragG,
+ vector float fragB,
+ vector float fragA,
+ vector unsigned int mask)
+{
+ vector float frag_aos[4];
+ unsigned int c0, c1, c2, c3;
+
+ /* do alpha test */
+ if (spu.depth_stencil_alpha.alpha.enabled) {
+ vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref);
+ vector unsigned int amask;
+
+ switch (spu.depth_stencil_alpha.alpha.func) {
+ case PIPE_FUNC_LESS:
+ amask = spu_cmpgt(ref, fragA); /* mask = (fragA < ref) */
+ break;
+ case PIPE_FUNC_GREATER:
+ amask = spu_cmpgt(fragA, ref); /* mask = (fragA > ref) */
+ break;
+ case PIPE_FUNC_GEQUAL:
+ amask = spu_cmpgt(ref, fragA);
+ amask = spu_nor(amask, amask);
+ break;
+ case PIPE_FUNC_LEQUAL:
+ amask = spu_cmpgt(fragA, ref);
+ amask = spu_nor(amask, amask);
+ break;
+ case PIPE_FUNC_EQUAL:
+ amask = spu_cmpeq(ref, fragA);
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ amask = spu_cmpeq(ref, fragA);
+ amask = spu_nor(amask, amask);
+ break;
+ case PIPE_FUNC_ALWAYS:
+ amask = spu_splats(0xffffffffU);
+ break;
+ case PIPE_FUNC_NEVER:
+ amask = spu_splats( 0x0U);
+ break;
+ default:
+ ;
+ }
+
+ mask = spu_and(mask, amask);
+ }
+ /* Z and/or stencil testing... */
+ if (spu.depth_stencil_alpha.depth.enabled ||
+ spu.depth_stencil_alpha.stencil[0].enabled) {
+
+ /* get four Z/Stencil values from tile */
+ vector unsigned int mask24 = spu_splats((unsigned int)0x00ffffffU);
+ vector unsigned int ifbZS = depthStencilTile->ui4[y/2][x/2];
+ vector unsigned int ifbZ = spu_and(ifbZS, mask24);
+ vector unsigned int ifbS = spu_andc(ifbZS, mask24);
+
+ if (spu.depth_stencil_alpha.stencil[0].enabled) {
+ /* do stencil test */
+ ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM);
+
+ }
+ else if (spu.depth_stencil_alpha.depth.enabled) {
+ /* do depth test */
+
+ ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM ||
+ spu.fb.depth_format == PIPE_FORMAT_X8Z24_UNORM);
+
+ vector unsigned int ifragZ;
+ vector unsigned int zmask;
+
+ /* convert four fragZ from float to uint */
+ fragZ = spu_mul(fragZ, spu_splats((float) 0xffffff));
+ ifragZ = spu_convtu(fragZ, 0);
+
+ /* do depth comparison, setting zmask with results */
+ switch (spu.depth_stencil_alpha.depth.func) {
+ case PIPE_FUNC_LESS:
+ zmask = spu_cmpgt(ifbZ, ifragZ); /* mask = (ifragZ < ifbZ) */
+ break;
+ case PIPE_FUNC_GREATER:
+ zmask = spu_cmpgt(ifragZ, ifbZ); /* mask = (ifbZ > ifragZ) */
+ break;
+ case PIPE_FUNC_GEQUAL:
+ zmask = spu_cmpgt(ifbZ, ifragZ);
+ zmask = spu_nor(zmask, zmask);
+ break;
+ case PIPE_FUNC_LEQUAL:
+ zmask = spu_cmpgt(ifragZ, ifbZ);
+ zmask = spu_nor(zmask, zmask);
+ break;
+ case PIPE_FUNC_EQUAL:
+ zmask = spu_cmpeq(ifbZ, ifragZ);
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ zmask = spu_cmpeq(ifbZ, ifragZ);
+ zmask = spu_nor(zmask, zmask);
+ break;
+ case PIPE_FUNC_ALWAYS:
+ zmask = spu_splats(0xffffffffU);
+ break;
+ case PIPE_FUNC_NEVER:
+ zmask = spu_splats( 0x0U);
+ break;
+ default:
+ ;
+ }
+
+ mask = spu_and(mask, zmask);
+
+ /* merge framebuffer Z and fragment Z according to the mask */
+ ifbZ = spu_or(spu_and(ifragZ, mask),
+ spu_andc(ifbZ, mask));
+ }
+
+ if (spu_extract(spu_orx(mask), 0)) {
+ /* put new fragment Z/Stencil values back into Z/Stencil tile */
+ depthStencilTile->ui4[y/2][x/2] = spu_or(ifbZ, ifbS);
+
+ spu.cur_ztile_status = TILE_STATUS_DIRTY;
+ }
+ }
- /* At even X values we want the first 4 shorts, and at odd X values we
- * want the second 4 shorts.
+ if (spu.blend.blend_enable) {
+ /* blending terms, misc regs */
+ vector float term1r, term1g, term1b, term1a;
+ vector float term2r, term2g, term2b, term2a;
+ vector float one, tmp;
+
+ vector float fbRGBA[4]; /* current framebuffer colors */
+
+ /* get colors from framebuffer/tile */
+ {
+ vector float fc[4];
+ uint c0, c1, c2, c3;
+
+#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */
+ c0 = colorTile->ui[y][x*2+0];
+ c1 = colorTile->ui[y][x*2+1];
+ c2 = colorTile->ui[y][x*2+2];
+ c3 = colorTile->ui[y][x*2+3];
+#else
+ c0 = colorTile->ui[y+0][x+0];
+ c1 = colorTile->ui[y+0][x+1];
+ c2 = colorTile->ui[y+1][x+0];
+ c3 = colorTile->ui[y+1][x+1];
+#endif
+ switch (spu.fb.color_format) {
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ fc[0] = spu_unpack_B8G8R8A8(c0);
+ fc[1] = spu_unpack_B8G8R8A8(c1);
+ fc[2] = spu_unpack_B8G8R8A8(c2);
+ fc[3] = spu_unpack_B8G8R8A8(c3);
+ break;
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ fc[0] = spu_unpack_A8R8G8B8(c0);
+ fc[1] = spu_unpack_A8R8G8B8(c1);
+ fc[2] = spu_unpack_A8R8G8B8(c2);
+ fc[3] = spu_unpack_A8R8G8B8(c3);
+ break;
+ default:
+ ASSERT(0);
+ }
+ _transpose_matrix4x4(fbRGBA, fc);
+ }
+
+ /*
+ * Compute Src RGB terms
*/
- qword bias = (qword) spu_splats((unsigned char) ((ix & 0x01) << 3));
- qword bias_mask = si_fsmbi(0x3333);
- qword sv = si_a(shuf_vec, si_and(bias_mask, bias));
-
- *depth = si_shufb(*ptr, *ptr, sv);
- *stencil = si_il(0);
- break;
+ switch (spu.blend.rgb_src_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ term1r = fragR;
+ term1g = fragG;
+ term1b = fragB;
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ term1r =
+ term1g =
+ term1b = spu_splats(0.0f);
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ term1r = spu_mul(fragR, fragR);
+ term1g = spu_mul(fragG, fragG);
+ term1b = spu_mul(fragB, fragB);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ term1r = spu_mul(fragR, fragA);
+ term1g = spu_mul(fragG, fragA);
+ term1b = spu_mul(fragB, fragA);
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Compute Src Alpha term
+ */
+ switch (spu.blend.alpha_src_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ term1a = fragA;
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ term1a = spu_splats(0.0f);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ term1a = spu_mul(fragA, fragA);
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Compute Dest RGB terms
+ */
+ switch (spu.blend.rgb_dst_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ term2r = fragR;
+ term2g = fragG;
+ term2b = fragB;
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ term2r =
+ term2g =
+ term2b = spu_splats(0.0f);
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ term2r = spu_mul(fbRGBA[0], fragR);
+ term2g = spu_mul(fbRGBA[1], fragG);
+ term2b = spu_mul(fbRGBA[2], fragB);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ term2r = spu_mul(fbRGBA[0], fragA);
+ term2g = spu_mul(fbRGBA[1], fragA);
+ term2b = spu_mul(fbRGBA[2], fragA);
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ one = spu_splats(1.0f);
+ tmp = spu_sub(one, fragA);
+ term2r = spu_mul(fbRGBA[0], tmp);
+ term2g = spu_mul(fbRGBA[1], tmp);
+ term2b = spu_mul(fbRGBA[2], tmp);
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Compute Dest Alpha term
+ */
+ switch (spu.blend.alpha_dst_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ term2a = fragA;
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ term2a = spu_splats(0.0f);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ term2a = spu_mul(fbRGBA[3], fragA);
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ one = spu_splats(1.0f);
+ tmp = spu_sub(one, fragA);
+ term2a = spu_mul(fbRGBA[3], tmp);
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Combine Src/Dest RGB terms
+ */
+ switch (spu.blend.rgb_func) {
+ case PIPE_BLEND_ADD:
+ fragR = spu_add(term1r, term2r);
+ fragG = spu_add(term1g, term2g);
+ fragB = spu_add(term1b, term2b);
+ break;
+ case PIPE_BLEND_SUBTRACT:
+ fragR = spu_sub(term1r, term2r);
+ fragG = spu_sub(term1g, term2g);
+ fragB = spu_sub(term1b, term2b);
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Combine Src/Dest A term
+ */
+ switch (spu.blend.alpha_func) {
+ case PIPE_BLEND_ADD:
+ fragA = spu_add(term1a, term2a);
+ break;
+ case PIPE_BLEND_SUBTRACT:
+ fragA = spu_sub(term1a, term2a);
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
}
- case PIPE_FORMAT_Z32_UNORM: {
- qword *ptr = (qword *) &buffer->ui4[iy][ix];
-
- *depth = *ptr;
- *stencil = si_il(0);
- break;
+ /*
+ * Convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA.
+ */
+#if 0
+ /* original code */
+ {
+ vector float frag_soa[4];
+ frag_soa[0] = fragR;
+ frag_soa[1] = fragG;
+ frag_soa[2] = fragB;
+ frag_soa[3] = fragA;
+ _transpose_matrix4x4(frag_aos, frag_soa);
}
-
-
- case PIPE_FORMAT_Z24S8_UNORM: {
- qword *ptr = (qword *) &buffer->ui4[iy][ix];
- qword mask = si_fsmbi(0xEEEE);
-
- *depth = si_rotmai(si_and(*ptr, mask), -8);
- *stencil = si_andc(*ptr, mask);
+#else
+ /* short-cut relying on function parameter layout: */
+ _transpose_matrix4x4(frag_aos, &fragR);
+ (void) fragG;
+ (void) fragB;
+#endif
+
+ /*
+ * Pack float colors into 32-bit RGBA words.
+ */
+ switch (spu.fb.color_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ c0 = spu_pack_A8R8G8B8(frag_aos[0]);
+ c1 = spu_pack_A8R8G8B8(frag_aos[1]);
+ c2 = spu_pack_A8R8G8B8(frag_aos[2]);
+ c3 = spu_pack_A8R8G8B8(frag_aos[3]);
break;
- }
-
-
- case PIPE_FORMAT_S8Z24_UNORM: {
- qword *ptr = (qword *) &buffer->ui4[iy][ix];
- *depth = si_and(*ptr, si_fsmbi(0x7777));
- *stencil = si_andi(si_roti(*ptr, 8), 0x0ff);
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ c0 = spu_pack_B8G8R8A8(frag_aos[0]);
+ c1 = spu_pack_B8G8R8A8(frag_aos[1]);
+ c2 = spu_pack_B8G8R8A8(frag_aos[2]);
+ c3 = spu_pack_B8G8R8A8(frag_aos[3]);
break;
- }
-
-
default:
+ fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n");
ASSERT(0);
- break;
- }
-}
-
-
-static void
-write_ds_quad(tile_t *buffer, unsigned x, unsigned y,
- enum pipe_format depth_format,
- qword depth, qword stencil)
-{
- const int ix = x / 2;
- const int iy = y / 2;
-
- (void) stencil;
-
- switch (depth_format) {
- case PIPE_FORMAT_Z16_UNORM: {
- qword *ptr = (qword *) &buffer->us8[iy][ix / 2];
-
- qword sv = ((ix & 0x01) == 0)
- ? (qword) { 2, 3, 6, 7, 10, 11, 14, 15,
- 24, 25, 26, 27, 28, 29, 30, 31 }
- : (qword) { 16, 17, 18, 19, 20 , 21, 22, 23,
- 2, 3, 6, 7, 10, 11, 14, 15 };
- *ptr = si_shufb(depth, *ptr, sv);
- break;
- }
-
-
- case PIPE_FORMAT_Z32_UNORM: {
- qword *ptr = (qword *) &buffer->ui4[iy][ix];
- *ptr = depth;
- break;
}
- case PIPE_FORMAT_Z24S8_UNORM: {
- qword *ptr = (qword *) &buffer->ui4[iy][ix];
- qword mask = si_fsmbi(0xEEEE);
-
- depth = si_shli(depth, 8);
- *ptr = si_selb(stencil, depth, mask);
- break;
- }
-
-
- case PIPE_FORMAT_S8Z24_UNORM: {
- qword *ptr = (qword *) &buffer->ui4[iy][ix];
- qword mask = si_fsmbi(0x7777);
-
- stencil = si_shli(stencil, 24);
- *ptr = si_selb(stencil, depth, mask);
- break;
+ /*
+ * Color masking
+ */
+ if (spu.blend.colormask != 0xf) {
+ /* XXX to do */
+ /* apply color mask to 32-bit packed colors */
}
- default:
- ASSERT(0);
- break;
+ /*
+ * Logic Ops
+ */
+ if (spu.blend.logicop_enable) {
+ /* XXX to do */
+ /* apply logicop to 32-bit packed colors */
}
-}
-qword
-spu_do_depth_stencil(int x, int y,
- qword frag_mask, qword frag_depth, qword frag_alpha,
- qword facing)
-{
- struct spu_frag_test_results result;
- qword pixel_depth;
- qword pixel_stencil;
-
- /* All of this preable code (everthing before the call to frag_test) should
- * be generated on the PPU and upload to the SPU.
+ /*
+ * If mask is non-zero, mark tile as dirty.
*/
- if (spu.read_depth || spu.read_stencil) {
- read_ds_quad(&spu.ztile, x, y, spu.fb.depth_format,
- &pixel_depth, &pixel_stencil);
+ if (spu_extract(spu_orx(mask), 0)) {
+ spu.cur_ctile_status = TILE_STATUS_DIRTY;
}
-
- switch (spu.fb.depth_format) {
- case PIPE_FORMAT_Z16_UNORM:
- frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x0000ffffu)));
- frag_depth = si_cfltu(frag_depth, 0);
- break;
- case PIPE_FORMAT_Z32_UNORM:
- frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0xffffffffu)));
- frag_depth = si_cfltu(frag_depth, 0);
- break;
- case PIPE_FORMAT_Z24S8_UNORM:
- case PIPE_FORMAT_S8Z24_UNORM:
- frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x00ffffffu)));
- frag_depth = si_cfltu(frag_depth, 0);
- break;
- default:
- ASSERT(0);
- break;
+ else {
+ return;
}
- result = (*spu.frag_test)(frag_mask, pixel_depth, pixel_stencil,
- frag_depth, frag_alpha, facing);
-
- /* This code (everthing after the call to frag_test) should
- * be generated on the PPU and upload to the SPU.
+ /*
+ * Write new quad colors to the framebuffer/tile.
+ * Only write pixels where the corresponding mask word is set.
*/
- if (spu.read_depth || spu.read_stencil) {
- write_ds_quad(&spu.ztile, x, y, spu.fb.depth_format,
- result.depth, result.stencil);
- }
-
- return result.mask;
+#if LINEAR_QUAD_LAYOUT
+ /*
+ * Quad layout:
+ * +--+--+--+--+
+ * |p0|p1|p2|p3|
+ * +--+--+--+--+
+ */
+ if (spu_extract(mask, 0))
+ colorTile->ui[y][x*2] = c0;
+ if (spu_extract(mask, 1))
+ colorTile->ui[y][x*2+1] = c1;
+ if (spu_extract(mask, 2))
+ colorTile->ui[y][x*2+2] = c2;
+ if (spu_extract(mask, 3))
+ colorTile->ui[y][x*2+3] = c3;
+#else
+ /*
+ * Quad layout:
+ * +--+--+
+ * |p0|p1|
+ * +--+--+
+ * |p2|p3|
+ * +--+--+
+ */
+ if (spu_extract(mask, 0))
+ colorTile->ui[y+0][x+0] = c0;
+ if (spu_extract(mask, 1))
+ colorTile->ui[y+0][x+1] = c1;
+ if (spu_extract(mask, 2))
+ colorTile->ui[y+1][x+0] = c2;
+ if (spu_extract(mask, 3))
+ colorTile->ui[y+1][x+1] = c3;
+#endif
}
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h
index 6571258699..f817abf046 100644
--- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h
+++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h
@@ -1,32 +1,44 @@
-/*
- * (C) Copyright IBM Corporation 2008
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
#ifndef SPU_PER_FRAGMENT_OP
#define SPU_PER_FRAGMENT_OP
-extern qword
-spu_do_depth_stencil(int x, int y, qword frag_mask, qword frag_depth,
- qword frag_alpha, qword facing);
+
+extern void
+spu_fallback_fragment_ops(uint x, uint y,
+ tile_t *colorTile,
+ tile_t *depthStencilTile,
+ vector float fragZ,
+ vector float fragRed,
+ vector float fragGreen,
+ vector float fragBlue,
+ vector float fragAlpha,
+ vector unsigned int mask);
+
#endif /* SPU_PER_FRAGMENT_OP */
diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c
index 5051774f00..117b8a36f8 100644
--- a/src/gallium/drivers/cell/spu/spu_texture.c
+++ b/src/gallium/drivers/cell/spu/spu_texture.c
@@ -97,7 +97,7 @@ get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels)
const qword offset_y = si_andi((qword) y, 0x1f);
const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].tiles_per_row);
- const qword tile_size = (qword) spu_splats(sizeof(tile_t));
+ const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t));
qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x);
tile_offset = si_mpy((qword) tile_offset, tile_size);
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c
index 2a4e0b423c..8b93878192 100644
--- a/src/gallium/drivers/cell/spu/spu_tri.c
+++ b/src/gallium/drivers/cell/spu/spu_tri.c
@@ -38,7 +38,6 @@
#include "spu_texture.h"
#include "spu_tile.h"
#include "spu_tri.h"
-#include "spu_per_fragment_op.h"
/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
@@ -209,7 +208,7 @@ clip_emit_quad(struct setup_stage *setup)
/**
* Evaluate attribute coefficients (plane equations) to compute
* attribute values for the four fragments in a quad.
- * Eg: four colors will be compute.
+ * Eg: four colors will be computed (in AoS format).
*/
static INLINE void
eval_coeff(uint slot, float x, float y, vector float result[4])
@@ -255,31 +254,6 @@ eval_z(float x, float y)
}
-static INLINE mask_t
-do_depth_test(int x, int y, mask_t quadmask)
-{
- float4 zvals;
- mask_t mask;
-
- if (spu.fb.depth_format == PIPE_FORMAT_NONE)
- return quadmask;
-
- zvals.v = eval_z((float) x, (float) y);
-
- mask = (mask_t) spu_do_depth_stencil(x - setup.cliprect_minx,
- y - setup.cliprect_miny,
- (qword) quadmask,
- (qword) zvals.v,
- (qword) spu_splats((unsigned char) 0x0ffu),
- (qword) spu_splats((unsigned int) 0x01u));
-
- if (spu_extract(spu_orx(mask), 0))
- spu.cur_ztile_status = TILE_STATUS_DIRTY;
-
- return mask;
-}
-
-
/**
* Emit a quad (pass to next stage). No clipping is done.
* Note: about 1/5 to 1/7 of the time, mask is zero and this function
@@ -289,18 +263,6 @@ do_depth_test(int x, int y, mask_t quadmask)
static INLINE void
emit_quad( int x, int y, mask_t mask )
{
-#if 0
- struct softpipe_context *sp = setup.softpipe;
- setup.quad.x0 = x;
- setup.quad.y0 = y;
- setup.quad.mask = mask;
- sp->quad.first->run(sp->quad.first, &setup.quad);
-#else
-
- if (spu.read_depth) {
- mask = do_depth_test(x, y, mask);
- }
-
/* If any bits in mask are set... */
if (spu_extract(spu_orx(mask), 0)) {
const int ix = x - setup.cliprect_minx;
@@ -308,6 +270,7 @@ emit_quad( int x, int y, mask_t mask )
vector float colors[4];
spu.cur_ctile_status = TILE_STATUS_DIRTY;
+ spu.cur_ztile_status = TILE_STATUS_DIRTY;
if (spu.texture[0].start) {
/* texture mapping */
@@ -351,59 +314,68 @@ emit_quad( int x, int y, mask_t mask )
}
else {
/* simple shading */
+#if 0
eval_coeff(1, (float) x, (float) y, colors);
- }
-
- /* Convert fragment data from AoS to SoA format.
- */
- qword soa_frag[4];
- _transpose_matrix4x4((vec_float4 *) soa_frag, colors);
+#else
+ /* XXX new fragment program code */
+
+ if (spu.fragment_program) {
+ vector float inputs[4*4], outputs[2*4];
+
+ /* setup inputs */
+ eval_coeff(1, (float) x, (float) y, inputs);
+
+ /* Execute the current fragment program */
+ spu.fragment_program(inputs, outputs, spu.constants);
+
+ /* Copy outputs */
+ colors[0] = outputs[0*4+0];
+ colors[1] = outputs[0*4+1];
+ colors[2] = outputs[0*4+2];
+ colors[3] = outputs[0*4+3];
+
+ if (0 && spu.init.id==0 && y == 48) {
+ printf("colors[0] = %f %f %f %f\n",
+ spu_extract(colors[0], 0),
+ spu_extract(colors[0], 1),
+ spu_extract(colors[0], 2),
+ spu_extract(colors[0], 3));
+ printf("colors[1] = %f %f %f %f\n",
+ spu_extract(colors[1], 0),
+ spu_extract(colors[1], 1),
+ spu_extract(colors[1], 2),
+ spu_extract(colors[1], 3));
+ }
- /* Read the current framebuffer values.
- */
- const qword pix[4] = {
- (qword) spu_splats(spu.ctile.ui[iy+0][ix+0]),
- (qword) spu_splats(spu.ctile.ui[iy+0][ix+1]),
- (qword) spu_splats(spu.ctile.ui[iy+1][ix+0]),
- (qword) spu_splats(spu.ctile.ui[iy+1][ix+1]),
- };
+ }
+#endif
+ }
- qword soa_pix[4];
- if (spu.read_fb) {
- /* Convert pixel data from AoS to SoA format.
+ {
+ /* Convert fragment data from AoS to SoA format.
+ * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA)
+ * This is temporary!
*/
- vec_float4 aos_pix[4] = {
- spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+0]),
- spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+1]),
- spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+0]),
- spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+1]),
- };
-
- _transpose_matrix4x4((vec_float4 *) soa_pix, aos_pix);
- }
+ vector float soa_frag[4];
+ _transpose_matrix4x4(soa_frag, colors);
+ float4 fragZ;
- struct spu_blend_results result =
- (*spu.blend)(soa_frag[0], soa_frag[1], soa_frag[2], soa_frag[3],
- soa_pix[0], soa_pix[1], soa_pix[2], soa_pix[3],
- spu.const_blend_color[0], spu.const_blend_color[1],
- spu.const_blend_color[2], spu.const_blend_color[3]);
+ fragZ.v = eval_z((float) x, (float) y);
+ /* Do all per-fragment/quad operations here, including:
+ * alpha test, z test, stencil test, blend and framebuffer writing.
+ */
+ spu.fragment_ops(ix, iy, &spu.ctile, &spu.ztile,
+ fragZ.v,
+ soa_frag[0], soa_frag[1],
+ soa_frag[2], soa_frag[3],
+ mask);
+ }
- /* Convert final pixel data from SoA to AoS format.
- */
- result = (*spu.logicop)(pix[0], pix[1], pix[2], pix[3],
- result.r, result.g, result.b, result.a,
- (qword) mask);
-
- spu.ctile.ui[iy+0][ix+0] = spu_extract((vec_uint4) result.r, 0);
- spu.ctile.ui[iy+0][ix+1] = spu_extract((vec_uint4) result.g, 0);
- spu.ctile.ui[iy+1][ix+0] = spu_extract((vec_uint4) result.b, 0);
- spu.ctile.ui[iy+1][ix+1] = spu_extract((vec_uint4) result.a, 0);
}
-#endif
}