summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/cell/spu
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/cell/spu')
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.c19
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.h9
-rw-r--r--src/gallium/drivers/cell/spu/spu_tri.c59
3 files changed, 60 insertions, 27 deletions
diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c
index 0a490ab277..fccff01e10 100644
--- a/src/gallium/drivers/cell/spu/spu_main.c
+++ b/src/gallium/drivers/cell/spu/spu_main.c
@@ -64,6 +64,9 @@ static unsigned char depth_stencil_code_buffer[4 * 64]
static unsigned char fb_blend_code_buffer[4 * 64]
ALIGN16_ATTRIB;
+static unsigned char logicop_code_buffer[4 * 64]
+ ALIGN16_ATTRIB;
+
/**
* Tell the PPU that this SPU has finished copying a buffer to
@@ -513,6 +516,22 @@ cmd_batch(uint opcode)
pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8);
break;
}
+ case CELL_CMD_STATE_LOGICOP: {
+ struct cell_command_logicop *code =
+ (struct cell_command_logicop *) &buffer[pos+1];
+
+ mfc_get(logicop_code_buffer,
+ (unsigned int) code->base, /* src */
+ code->size,
+ TAG_BATCH_BUFFER,
+ 0, /* tid */
+ 0 /* rid */);
+ wait_on_mask(1 << TAG_BATCH_BUFFER);
+
+ spu.logicop = (logicop_func) logicop_code_buffer;
+ pos += (1 + ROUNDUP8(sizeof(struct cell_command_logicop)) / 8);
+ break;
+ }
case CELL_CMD_FLUSH_BUFFER_RANGE: {
struct cell_buffer_range *br = (struct cell_buffer_range *)
&buffer[pos+1];
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h
index 49f5d99674..c20452931a 100644
--- a/src/gallium/drivers/cell/spu/spu_main.h
+++ b/src/gallium/drivers/cell/spu/spu_main.h
@@ -77,9 +77,14 @@ struct spu_blend_results {
typedef struct spu_blend_results (*blend_func)(
qword frag_r, qword frag_g, qword frag_b, qword frag_a,
qword pixel_r, qword pixel_g, qword pixel_b, qword pixel_a,
- qword const_r, qword const_g, qword const_b, qword const_a,
+ qword const_r, qword const_g, qword const_b, qword const_a);
+
+typedef struct spu_blend_results (*logicop_func)(
+ qword pixel_r, qword pixel_g, qword pixel_b, qword pixel_a,
+ qword frag_r, qword frag_g, qword frag_b, qword frag_a,
qword frag_mask);
+
struct spu_framebuffer {
void *color_start; /**< addr of color surface in main memory */
void *depth_start; /**< addr of depth surface in main memory */
@@ -111,6 +116,8 @@ struct spu_global
blend_func blend;
qword const_blend_color[4] ALIGN16_ATTRIB;
+ logicop_func logicop;
+
struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS];
struct cell_command_texture texture;
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c
index e6a1ce01df..95c629a8aa 100644
--- a/src/gallium/drivers/cell/spu/spu_tri.c
+++ b/src/gallium/drivers/cell/spu/spu_tri.c
@@ -305,7 +305,6 @@ emit_quad( int x, int y, mask_t mask )
if (spu_extract(spu_orx(mask), 0)) {
const int ix = x - setup.cliprect_minx;
const int iy = y - setup.cliprect_miny;
- const vector unsigned char shuffle = spu.color_shuffle;
vector float colors[4];
spu.cur_ctile_status = TILE_STATUS_DIRTY;
@@ -330,45 +329,53 @@ emit_quad( int x, int y, mask_t mask )
}
+ /* Convert fragment data from AoS to SoA format.
+ */
+ qword soa_frag[4];
+ _transpose_matrix4x4((vec_float4 *) soa_frag, colors);
+
/* Read the current framebuffer values.
- *
- * Ignore read_fb for now. In the future we can use this to avoid
- * reading the framebuffer if read_fb is false and the fragment mask is
- * all 0xffffffff. This is the common case, so it is probably worth
- * the effort. We'll have to profile to determine whether or not the
- * extra conditional branches hurt overall performance.
*/
- vec_float4 aos_pix[4] = {
- spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+0]),
- spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+1]),
- spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+0]),
- spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+1]),
+ const qword pix[4] = {
+ (qword) spu_splats(spu.ctile.ui[iy+0][ix+0]),
+ (qword) spu_splats(spu.ctile.ui[iy+0][ix+1]),
+ (qword) spu_splats(spu.ctile.ui[iy+1][ix+0]),
+ (qword) spu_splats(spu.ctile.ui[iy+1][ix+1]),
};
qword soa_pix[4];
- qword soa_frag[4];
- /* Convert pixel and fragment data from AoS to SoA format.
- */
- _transpose_matrix4x4((vec_float4 *) soa_pix, aos_pix);
- _transpose_matrix4x4((vec_float4 *) soa_frag, colors);
+ if (spu.read_fb) {
+ /* Convert pixel data from AoS to SoA format.
+ */
+ vec_float4 aos_pix[4] = {
+ spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+0]),
+ spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+1]),
+ spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+0]),
+ spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+1]),
+ };
+
+ _transpose_matrix4x4((vec_float4 *) soa_pix, aos_pix);
+ }
- const struct spu_blend_results result =
+
+ struct spu_blend_results result =
(*spu.blend)(soa_frag[0], soa_frag[1], soa_frag[2], soa_frag[3],
soa_pix[0], soa_pix[1], soa_pix[2], soa_pix[3],
spu.const_blend_color[0], spu.const_blend_color[1],
- spu.const_blend_color[2], spu.const_blend_color[3],
- (qword) mask);
+ spu.const_blend_color[2], spu.const_blend_color[3]);
/* Convert final pixel data from SoA to AoS format.
*/
- _transpose_matrix4x4(aos_pix, (const vec_float4 *) &result);
-
- spu.ctile.ui[iy+0][ix+0] = spu_pack_color_shuffle(aos_pix[0], shuffle);
- spu.ctile.ui[iy+0][ix+1] = spu_pack_color_shuffle(aos_pix[1], shuffle);
- spu.ctile.ui[iy+1][ix+0] = spu_pack_color_shuffle(aos_pix[2], shuffle);
- spu.ctile.ui[iy+1][ix+1] = spu_pack_color_shuffle(aos_pix[3], shuffle);
+ result = (*spu.logicop)(pix[0], pix[1], pix[2], pix[3],
+ result.r, result.g, result.b, result.a,
+ (qword) mask);
+
+ spu.ctile.ui[iy+0][ix+0] = spu_extract((vec_uint4) result.r, 0);
+ spu.ctile.ui[iy+0][ix+1] = spu_extract((vec_uint4) result.g, 0);
+ spu.ctile.ui[iy+1][ix+0] = spu_extract((vec_uint4) result.b, 0);
+ spu.ctile.ui[iy+1][ix+1] = spu_extract((vec_uint4) result.a, 0);
}
#endif
}