summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/cell/ppu
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/cell/ppu')
-rw-r--r--src/gallium/drivers/cell/ppu/Makefile4
-rw-r--r--src/gallium/drivers/cell/ppu/cell_batch.c56
-rw-r--r--src/gallium/drivers/cell/ppu/cell_batch.h3
-rw-r--r--src/gallium/drivers/cell/ppu/cell_clear.c30
-rw-r--r--src/gallium/drivers/cell/ppu/cell_context.c44
-rw-r--r--src/gallium/drivers/cell/ppu/cell_context.h29
-rw-r--r--src/gallium/drivers/cell/ppu/cell_draw_arrays.c42
-rw-r--r--src/gallium/drivers/cell/ppu/cell_draw_arrays.h20
-rw-r--r--src/gallium/drivers/cell/ppu/cell_flush.c14
-rw-r--r--src/gallium/drivers/cell/ppu/cell_flush.h2
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fragment.c862
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fragment.h38
-rw-r--r--src/gallium/drivers/cell/ppu/cell_pipe_state.c23
-rw-r--r--src/gallium/drivers/cell/ppu/cell_spu.c28
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state.h16
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_derived.c29
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_emit.c89
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_per_fragment.c30
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_shader.c26
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_vertex.c12
-rw-r--r--src/gallium/drivers/cell/ppu/cell_surface.c99
-rw-r--r--src/gallium/drivers/cell/ppu/cell_texture.c84
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vbuf.c12
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vertex_fetch.c5
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vertex_shader.c2
25 files changed, 1293 insertions, 306 deletions
diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile
index 0389a9554c..8699f3f8ec 100644
--- a/src/gallium/drivers/cell/ppu/Makefile
+++ b/src/gallium/drivers/cell/ppu/Makefile
@@ -5,7 +5,7 @@
TOP = ../../../../..
-include $(TOP)/configs/linux-cell
+include $(TOP)/configs/current
# This is the "top-level" cell PPU driver code, will get pulled into libGL.so
@@ -25,9 +25,9 @@ SOURCES = \
cell_context.c \
cell_draw_arrays.c \
cell_flush.c \
+ cell_gen_fragment.c \
cell_state_derived.c \
cell_state_emit.c \
- cell_state_per_fragment.c \
cell_state_shader.c \
cell_pipe_state.c \
cell_screen.c \
diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c
index f45e5f25b6..16882c0129 100644
--- a/src/gallium/drivers/cell/ppu/cell_batch.c
+++ b/src/gallium/drivers/cell/ppu/cell_batch.c
@@ -32,6 +32,13 @@
+/**
+ * Search the buffer pool for an empty/free buffer and return its index.
+ * Buffers are used for storing vertex data, state and commands which
+ * will be sent to the SPUs.
+ * If no empty buffers are available, wait for one.
+ * \return buffer index in [0, CELL_NUM_BUFFERS-1]
+ */
uint
cell_get_empty_buffer(struct cell_context *cell)
{
@@ -74,6 +81,11 @@ cell_get_empty_buffer(struct cell_context *cell)
}
+/**
+ * Flush the current batch buffer to the SPUs.
+ * An empty buffer will be found and set as the new current batch buffer
+ * for subsequent commands/data.
+ */
void
cell_batch_flush(struct cell_context *cell)
{
@@ -93,11 +105,11 @@ cell_batch_flush(struct cell_context *cell)
/*
printf("cell_batch_dispatch: buf %u at %p, size %u\n",
- batch, &cell->batch_buffer[batch][0], size);
+ batch, &cell->buffer[batch][0], size);
*/
/*
- * Build "BATCH" command and sent to all SPUs.
+ * Build "BATCH" command and send to all SPUs.
*/
cmd_word = CELL_CMD_BATCH | (batch << 8) | (size << 16);
@@ -120,6 +132,9 @@ cell_batch_flush(struct cell_context *cell)
}
+/**
+ * Return the number of bytes free in the current batch buffer.
+ */
uint
cell_batch_free_space(const struct cell_context *cell)
{
@@ -129,7 +144,9 @@ cell_batch_free_space(const struct cell_context *cell)
/**
- * Append data to current batch.
+ * Append data to the current batch buffer.
+ * \param data address of block of bytes to append
+ * \param bytes size of block of bytes
*/
void
cell_batch_append(struct cell_context *cell, const void *data, uint bytes)
@@ -165,6 +182,10 @@ cell_batch_append(struct cell_context *cell, const void *data, uint bytes)
}
+/**
+ * Allocate space in the current batch buffer for 'bytes' space.
+ * \return address in batch buffer to put data
+ */
void *
cell_batch_alloc(struct cell_context *cell, uint bytes)
{
@@ -172,6 +193,10 @@ cell_batch_alloc(struct cell_context *cell, uint bytes)
}
+/**
+ * Same as \sa cell_batch_alloc, but return an address at a particular
+ * alignment.
+ */
void *
cell_batch_alloc_aligned(struct cell_context *cell, uint bytes,
uint alignment)
@@ -215,3 +240,28 @@ cell_batch_alloc_aligned(struct cell_context *cell, uint bytes,
return pos;
}
+
+
+/**
+ * One-time init of batch buffers.
+ */
+void
+cell_init_batch_buffers(struct cell_context *cell)
+{
+ uint spu, buf;
+
+ /* init command, vertex/index buffer info */
+ for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) {
+ cell->buffer_size[buf] = 0;
+
+ /* init batch buffer status values,
+ * mark 0th buffer as used, rest as free.
+ */
+ for (spu = 0; spu < cell->num_spus; spu++) {
+ if (buf == 0)
+ cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED;
+ else
+ cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE;
+ }
+ }
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_batch.h b/src/gallium/drivers/cell/ppu/cell_batch.h
index a6eee0a8b1..f74dd60079 100644
--- a/src/gallium/drivers/cell/ppu/cell_batch.h
+++ b/src/gallium/drivers/cell/ppu/cell_batch.h
@@ -54,5 +54,8 @@ extern void *
cell_batch_alloc_aligned(struct cell_context *cell, uint bytes,
uint alignment);
+extern void
+cell_init_batch_buffers(struct cell_context *cell);
+
#endif /* CELL_BATCH_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c
index a421c95c8e..c9c0c721bb 100644
--- a/src/gallium/drivers/cell/ppu/cell_clear.c
+++ b/src/gallium/drivers/cell/ppu/cell_clear.c
@@ -35,6 +35,7 @@
#include <stdint.h>
#include "pipe/p_inlines.h"
#include "util/u_memory.h"
+#include "util/u_pack_color.h"
#include "cell/common.h"
#include "cell_clear.h"
#include "cell_context.h"
@@ -44,6 +45,27 @@
#include "cell_state.h"
+/**
+ * Convert packed pixel from one format to another.
+ */
+static unsigned
+convert_color(enum pipe_format srcFormat, unsigned srcColor,
+ enum pipe_format dstFormat)
+{
+ ubyte r, g, b, a;
+ unsigned dstColor;
+
+ util_unpack_color_ub(srcFormat, &srcColor, &r, &g, &b, &a);
+ util_pack_color_ub(r, g, b, a, dstFormat, &dstColor);
+
+ return dstColor;
+}
+
+
+
+/**
+ * Called via pipe->clear()
+ */
void
cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps,
unsigned clearValue)
@@ -61,13 +83,21 @@ cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps,
PIPE_BUFFER_USAGE_GPU_WRITE);
if (ps == cell->framebuffer.zsbuf) {
+ /* clear z/stencil buffer */
surfIndex = 1;
}
else {
+ /* clear color buffer */
surfIndex = 0;
+
+ if (ps->format != PIPE_FORMAT_A8R8G8B8_UNORM) {
+ clearValue = convert_color(PIPE_FORMAT_A8R8G8B8_UNORM, clearValue,
+ ps->format);
+ }
}
+ /* Build a CLEAR command and place it in the current batch buffer */
{
struct cell_command_clear_surface *clr
= (struct cell_command_clear_surface *)
diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c
index 9ff4e86943..71f1a3049d 100644
--- a/src/gallium/drivers/cell/ppu/cell_context.c
+++ b/src/gallium/drivers/cell/ppu/cell_context.c
@@ -43,11 +43,11 @@
#include "draw/draw_private.h"
#include "cell/common.h"
+#include "cell_batch.h"
#include "cell_clear.h"
#include "cell_context.h"
#include "cell_draw_arrays.h"
#include "cell_flush.h"
-#include "cell_render.h"
#include "cell_state.h"
#include "cell_surface.h"
#include "cell_spu.h"
@@ -85,12 +85,20 @@ cell_draw_create(struct cell_context *cell)
}
+#ifdef DEBUG
+static const struct debug_named_value cell_debug_flags[] = {
+ {"checker", CELL_DEBUG_CHECKER},/**< modulate tile clear color by SPU ID */
+ {"sync", CELL_DEBUG_SYNC}, /**< SPUs do synchronous DMA */
+ {NULL, 0}
+};
+#endif
+
+
struct pipe_context *
cell_create_context(struct pipe_screen *screen,
struct cell_winsys *cws)
{
struct cell_context *cell;
- uint spu, buf;
/* some fields need to be 16-byte aligned, so align the whole object */
cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16);
@@ -104,15 +112,6 @@ cell_create_context(struct pipe_screen *screen,
cell->pipe.screen = screen;
cell->pipe.destroy = cell_destroy_context;
- /* state setters */
- cell->pipe.set_vertex_buffers = cell_set_vertex_buffers;
- cell->pipe.set_vertex_elements = cell_set_vertex_elements;
-
- cell->pipe.draw_arrays = cell_draw_arrays;
- cell->pipe.draw_elements = cell_draw_elements;
- cell->pipe.draw_range_elements = cell_draw_range_elements;
- cell->pipe.set_edgeflags = cell_set_edgeflags;
-
cell->pipe.clear = cell_clear_surface;
cell->pipe.flush = cell_flush;
@@ -122,20 +121,28 @@ cell_create_context(struct pipe_screen *screen,
cell->pipe.wait_query = cell_wait_query;
#endif
+ cell_init_draw_functions(cell);
cell_init_state_functions(cell);
cell_init_shader_functions(cell);
cell_init_surface_functions(cell);
cell_init_texture_functions(cell);
+ cell_init_vertex_functions(cell);
cell->draw = cell_draw_create(cell);
cell_init_vbuf(cell);
+
draw_set_rasterize_stage(cell->draw, cell->vbuf);
/* convert all points/lines to tris for the time being */
draw_wide_point_threshold(cell->draw, 0.0);
draw_wide_line_threshold(cell->draw, 0.0);
+ /* get env vars or read config file to get debug flags */
+ cell->debug_flags = debug_get_flags_option("CELL_DEBUG",
+ cell_debug_flags,
+ 0 );
+
/*
* SPU stuff
*/
@@ -146,20 +153,7 @@ cell_create_context(struct pipe_screen *screen,
cell_start_spus(cell);
- /* init command, vertex/index buffer info */
- for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) {
- cell->buffer_size[buf] = 0;
-
- /* init batch buffer status values,
- * mark 0th buffer as used, rest as free.
- */
- for (spu = 0; spu < cell->num_spus; spu++) {
- if (buf == 0)
- cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED;
- else
- cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE;
- }
- }
+ cell_init_batch_buffers(cell);
return &cell->pipe;
}
diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h
index f1d1ca89a9..8cec9f45b2 100644
--- a/src/gallium/drivers/cell/ppu/cell_context.h
+++ b/src/gallium/drivers/cell/ppu/cell_context.h
@@ -39,8 +39,13 @@
#include "rtasm/rtasm_ppc_spe.h"
#include "tgsi/tgsi_scan.h"
+
struct cell_vbuf_render;
+
+/**
+ * Cell vertex shader state, subclass of pipe_shader_state.
+ */
struct cell_vertex_shader_state
{
struct pipe_shader_state shader;
@@ -49,6 +54,9 @@ struct cell_vertex_shader_state
};
+/**
+ * Cell fragment shader state, subclass of pipe_shader_state.
+ */
struct cell_fragment_shader_state
{
struct pipe_shader_state shader;
@@ -57,7 +65,11 @@ struct cell_fragment_shader_state
};
-struct cell_blend_state {
+/**
+ * Cell blend state atom, subclass of pipe_blend_state.
+ */
+struct cell_blend_state
+{
struct pipe_blend_state base;
/**
@@ -67,17 +79,24 @@ struct cell_blend_state {
};
-struct cell_depth_stencil_alpha_state {
- struct pipe_depth_stencil_alpha_state base;
+/**
+ * Cell depth/stencil/alpha state atom, subclass of
+ * pipe_depth_stencil_alpha_state.
+ */
+struct cell_depth_stencil_alpha_state
+{
+ struct pipe_depth_stencil_alpha_state base;
/**
* Generated code to perform alpha, stencil, and depth testing on the SPE
*/
struct spe_function code;
-
};
+/**
+ * Per-context state, subclass of pipe_context.
+ */
struct cell_context
{
struct pipe_context pipe;
@@ -144,6 +163,8 @@ struct cell_context
struct spe_function attrib_fetch;
unsigned attrib_fetch_offsets[PIPE_MAX_ATTRIBS];
+
+ unsigned debug_flags;
};
diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
index f02dffe124..880d535320 100644
--- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
+++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
@@ -34,6 +34,7 @@
#include "pipe/p_defines.h"
#include "pipe/p_context.h"
#include "pipe/p_winsys.h"
+#include "pipe/p_inlines.h"
#include "cell_context.h"
#include "cell_draw_arrays.h"
@@ -76,14 +77,6 @@ cell_unmap_constant_buffers(struct cell_context *sp)
}
-boolean
-cell_draw_arrays(struct pipe_context *pipe, unsigned mode,
- unsigned start, unsigned count)
-{
- return cell_draw_elements(pipe, NULL, 0, mode, start, count);
-}
-
-
/**
* Draw vertex arrays, with optional indexing.
@@ -92,7 +85,7 @@ cell_draw_arrays(struct pipe_context *pipe, unsigned mode,
*
* XXX should the element buffer be specified/bound with a separate function?
*/
-boolean
+static boolean
cell_draw_range_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
@@ -116,7 +109,7 @@ cell_draw_range_elements(struct pipe_context *pipe,
* Map vertex buffers
*/
for (i = 0; i < sp->num_vertex_buffers; i++) {
- void *buf = pipe->winsys->buffer_map(pipe->winsys,
+ void *buf = pipe_buffer_map(pipe->screen,
sp->vertex_buffer[i].buffer,
PIPE_BUFFER_USAGE_CPU_READ);
cell_flush_buffer_range(sp, buf, sp->vertex_buffer[i].buffer->size);
@@ -124,7 +117,7 @@ cell_draw_range_elements(struct pipe_context *pipe,
}
/* Map index buffer, if present */
if (indexBuffer) {
- void *mapped_indexes = pipe->winsys->buffer_map(pipe->winsys,
+ void *mapped_indexes = pipe_buffer_map(pipe->screen,
indexBuffer,
PIPE_BUFFER_USAGE_CPU_READ);
draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes);
@@ -143,11 +136,11 @@ cell_draw_range_elements(struct pipe_context *pipe,
*/
for (i = 0; i < sp->num_vertex_buffers; i++) {
draw_set_mapped_vertex_buffer(draw, i, NULL);
- pipe->winsys->buffer_unmap(pipe->winsys, sp->vertex_buffer[i].buffer);
+ pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer);
}
if (indexBuffer) {
draw_set_mapped_element_buffer(draw, 0, NULL);
- pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer);
+ pipe_buffer_unmap(pipe->screen, indexBuffer);
}
/* Note: leave drawing surfaces mapped */
@@ -157,7 +150,7 @@ cell_draw_range_elements(struct pipe_context *pipe,
}
-boolean
+static boolean
cell_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
@@ -170,10 +163,29 @@ cell_draw_elements(struct pipe_context *pipe,
}
+static boolean
+cell_draw_arrays(struct pipe_context *pipe, unsigned mode,
+ unsigned start, unsigned count)
+{
+ return cell_draw_elements(pipe, NULL, 0, mode, start, count);
+}
+
-void
+static void
cell_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags)
{
struct cell_context *cell = cell_context(pipe);
draw_set_edgeflags(cell->draw, edgeflags);
}
+
+
+
+void
+cell_init_draw_functions(struct cell_context *cell)
+{
+ cell->pipe.draw_arrays = cell_draw_arrays;
+ cell->pipe.draw_elements = cell_draw_elements;
+ cell->pipe.draw_range_elements = cell_draw_range_elements;
+ cell->pipe.set_edgeflags = cell_set_edgeflags;
+}
+
diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h
index cd35ec17b4..148873aa67 100644
--- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h
+++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h
@@ -29,26 +29,8 @@
#define CELL_DRAW_ARRAYS_H
-extern boolean
-cell_draw_arrays(struct pipe_context *pipe, unsigned mode,
- unsigned start, unsigned count);
-
-extern boolean
-cell_draw_elements(struct pipe_context *pipe,
- struct pipe_buffer *indexBuffer,
- unsigned indexSize,
- unsigned mode, unsigned start, unsigned count);
-
-extern boolean
-cell_draw_range_elements(struct pipe_context *pipe,
- struct pipe_buffer *indexBuffer,
- unsigned indexSize,
- unsigned min_index,
- unsigned max_index,
- unsigned mode, unsigned start, unsigned count);
-
extern void
-cell_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags);
+cell_init_draw_functions(struct cell_context *cell);
#endif /* CELL_DRAW_ARRAYS_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c
index 3aaf3de668..6596b72010 100644
--- a/src/gallium/drivers/cell/ppu/cell_flush.c
+++ b/src/gallium/drivers/cell/ppu/cell_flush.c
@@ -34,6 +34,9 @@
#include "draw/draw_context.h"
+/**
+ * Called via pipe->flush()
+ */
void
cell_flush(struct pipe_context *pipe, unsigned flags,
struct pipe_fence_handle **fence)
@@ -50,16 +53,19 @@ cell_flush(struct pipe_context *pipe, unsigned flags,
flags |= CELL_FLUSH_WAIT;
draw_flush( cell->draw );
- cell_flush_int(pipe, flags);
+ cell_flush_int(cell, flags);
}
-/** internal flush */
+/**
+ * Cell internal flush function. Send the current batch buffer to all SPUs.
+ * If flags & CELL_FLUSH_WAIT, do not return until the SPUs are idle.
+ * \param flags bitmask of flags CELL_FLUSH_WAIT, or zero
+ */
void
-cell_flush_int(struct pipe_context *pipe, unsigned flags)
+cell_flush_int(struct cell_context *cell, unsigned flags)
{
static boolean flushing = FALSE; /* recursion catcher */
- struct cell_context *cell = cell_context(pipe);
uint i;
ASSERT(!flushing);
diff --git a/src/gallium/drivers/cell/ppu/cell_flush.h b/src/gallium/drivers/cell/ppu/cell_flush.h
index 8f0645c429..509ae6239a 100644
--- a/src/gallium/drivers/cell/ppu/cell_flush.h
+++ b/src/gallium/drivers/cell/ppu/cell_flush.h
@@ -36,7 +36,7 @@ cell_flush(struct pipe_context *pipe, unsigned flags,
struct pipe_fence_handle **fence);
extern void
-cell_flush_int(struct pipe_context *pipe, unsigned flags);
+cell_flush_int(struct cell_context *cell, unsigned flags);
extern void
cell_flush_buffer_range(struct cell_context *cell, void *ptr,
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
new file mode 100644
index 0000000000..79a82ef72b
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
@@ -0,0 +1,862 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+
+/**
+ * Generate SPU per-fragment code (actually per-quad code).
+ * \author Brian Paul
+ */
+
+
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "rtasm/rtasm_ppc_spe.h"
+#include "cell_context.h"
+#include "cell_gen_fragment.h"
+
+
+
+/** Do extra optimizations? */
+#define OPTIMIZATIONS 1
+
+
+/**
+ * Generate SPE code to perform Z/depth testing.
+ *
+ * \param dsa Gallium depth/stencil/alpha state to gen code for
+ * \param f SPE function to append instruction onto.
+ * \param mask_reg register containing quad/pixel "alive" mask (in/out)
+ * \param ifragZ_reg register containing integer fragment Z values (in)
+ * \param ifbZ_reg register containing integer frame buffer Z values (in/out)
+ * \param zmask_reg register containing result of Z test/comparison (out)
+ */
+static void
+gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa,
+ struct spe_function *f,
+ int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg)
+{
+ ASSERT(dsa->depth.enabled);
+
+ switch (dsa->depth.func) {
+ case PIPE_FUNC_EQUAL:
+ /* zmask = (ifragZ == ref) */
+ spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg);
+ /* mask = (mask & zmask) */
+ spe_and(f, mask_reg, mask_reg, zmask_reg);
+ break;
+
+ case PIPE_FUNC_NOTEQUAL:
+ /* zmask = (ifragZ == ref) */
+ spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg);
+ /* mask = (mask & ~zmask) */
+ spe_andc(f, mask_reg, mask_reg, zmask_reg);
+ break;
+
+ case PIPE_FUNC_GREATER:
+ /* zmask = (ifragZ > ref) */
+ spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
+ /* mask = (mask & zmask) */
+ spe_and(f, mask_reg, mask_reg, zmask_reg);
+ break;
+
+ case PIPE_FUNC_LESS:
+ /* zmask = (ref > ifragZ) */
+ spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
+ /* mask = (mask & zmask) */
+ spe_and(f, mask_reg, mask_reg, zmask_reg);
+ break;
+
+ case PIPE_FUNC_LEQUAL:
+ /* zmask = (ifragZ > ref) */
+ spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
+ /* mask = (mask & ~zmask) */
+ spe_andc(f, mask_reg, mask_reg, zmask_reg);
+ break;
+
+ case PIPE_FUNC_GEQUAL:
+ /* zmask = (ref > ifragZ) */
+ spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
+ /* mask = (mask & ~zmask) */
+ spe_andc(f, mask_reg, mask_reg, zmask_reg);
+ break;
+
+ case PIPE_FUNC_NEVER:
+ spe_il(f, mask_reg, 0); /* mask = {0,0,0,0} */
+ spe_move(f, zmask_reg, mask_reg); /* zmask = mask */
+ break;
+
+ case PIPE_FUNC_ALWAYS:
+ /* mask unchanged */
+ spe_il(f, zmask_reg, ~0); /* zmask = {~0,~0,~0,~0} */
+ break;
+
+ default:
+ ASSERT(0);
+ break;
+ }
+
+ if (dsa->depth.writemask) {
+ /*
+ * If (ztest passed) {
+ * framebufferZ = fragmentZ;
+ * }
+ * OR,
+ * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ;
+ */
+ spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg);
+ }
+}
+
+
+/**
+ * Generate SPE code to perform alpha testing.
+ *
+ * \param dsa Gallium depth/stencil/alpha state to gen code for
+ * \param f SPE function to append instruction onto.
+ * \param mask_reg register containing quad/pixel "alive" mask (in/out)
+ * \param fragA_reg register containing four fragment alpha values (in)
+ */
+static void
+gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa,
+ struct spe_function *f, int mask_reg, int fragA_reg)
+{
+ int ref_reg = spe_allocate_available_register(f);
+ int amask_reg = spe_allocate_available_register(f);
+
+ ASSERT(dsa->alpha.enabled);
+
+ if ((dsa->alpha.func != PIPE_FUNC_NEVER) &&
+ (dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
+ /* load/splat the alpha reference float value */
+ spe_load_float(f, ref_reg, dsa->alpha.ref);
+ }
+
+ /* emit code to do the alpha comparison, updating 'mask' */
+ switch (dsa->alpha.func) {
+ case PIPE_FUNC_EQUAL:
+ /* amask = (fragA == ref) */
+ spe_fceq(f, amask_reg, fragA_reg, ref_reg);
+ /* mask = (mask & amask) */
+ spe_and(f, mask_reg, mask_reg, amask_reg);
+ break;
+
+ case PIPE_FUNC_NOTEQUAL:
+ /* amask = (fragA == ref) */
+ spe_fceq(f, amask_reg, fragA_reg, ref_reg);
+ /* mask = (mask & ~amask) */
+ spe_andc(f, mask_reg, mask_reg, amask_reg);
+ break;
+
+ case PIPE_FUNC_GREATER:
+ /* amask = (fragA > ref) */
+ spe_fcgt(f, amask_reg, fragA_reg, ref_reg);
+ /* mask = (mask & amask) */
+ spe_and(f, mask_reg, mask_reg, amask_reg);
+ break;
+
+ case PIPE_FUNC_LESS:
+ /* amask = (ref > fragA) */
+ spe_fcgt(f, amask_reg, ref_reg, fragA_reg);
+ /* mask = (mask & amask) */
+ spe_and(f, mask_reg, mask_reg, amask_reg);
+ break;
+
+ case PIPE_FUNC_LEQUAL:
+ /* amask = (fragA > ref) */
+ spe_fcgt(f, amask_reg, fragA_reg, ref_reg);
+ /* mask = (mask & ~amask) */
+ spe_andc(f, mask_reg, mask_reg, amask_reg);
+ break;
+
+ case PIPE_FUNC_GEQUAL:
+ /* amask = (ref > fragA) */
+ spe_fcgt(f, amask_reg, ref_reg, fragA_reg);
+ /* mask = (mask & ~amask) */
+ spe_andc(f, mask_reg, mask_reg, amask_reg);
+ break;
+
+ case PIPE_FUNC_NEVER:
+ spe_il(f, mask_reg, 0); /* mask = [0,0,0,0] */
+ break;
+
+ case PIPE_FUNC_ALWAYS:
+ /* no-op, mask unchanged */
+ break;
+
+ default:
+ ASSERT(0);
+ break;
+ }
+
+#if OPTIMIZATIONS
+ /* if mask == {0,0,0,0} we're all done, return */
+ {
+ /* re-use amask reg here */
+ int tmp_reg = amask_reg;
+ /* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */
+ spe_orx(f, tmp_reg, mask_reg);
+ /* if tmp[0] == 0 then return from function call */
+ spe_biz(f, tmp_reg, SPE_REG_RA, 0, 0);
+ }
+#endif
+
+ spe_release_register(f, ref_reg);
+ spe_release_register(f, amask_reg);
+}
+
+
+
+/**
+ * Generate SPE code to implement the given blend mode for a quad of pixels.
+ * \param f SPE function to append instruction onto.
+ * \param fragR_reg register with fragment red values (float) (in/out)
+ * \param fragG_reg register with fragment green values (float) (in/out)
+ * \param fragB_reg register with fragment blue values (float) (in/out)
+ * \param fragA_reg register with fragment alpha values (float) (in/out)
+ * \param fbRGBA_reg register with packed framebuffer colors (integer) (in)
+ */
+static void
+gen_blend(const struct pipe_blend_state *blend,
+ struct spe_function *f,
+ enum pipe_format color_format,
+ int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg,
+ int fbRGBA_reg)
+{
+ int term1R_reg = spe_allocate_available_register(f);
+ int term1G_reg = spe_allocate_available_register(f);
+ int term1B_reg = spe_allocate_available_register(f);
+ int term1A_reg = spe_allocate_available_register(f);
+
+ int term2R_reg = spe_allocate_available_register(f);
+ int term2G_reg = spe_allocate_available_register(f);
+ int term2B_reg = spe_allocate_available_register(f);
+ int term2A_reg = spe_allocate_available_register(f);
+
+ int fbR_reg = spe_allocate_available_register(f);
+ int fbG_reg = spe_allocate_available_register(f);
+ int fbB_reg = spe_allocate_available_register(f);
+ int fbA_reg = spe_allocate_available_register(f);
+
+ int one_reg = spe_allocate_available_register(f);
+ int tmp_reg = spe_allocate_available_register(f);
+
+ ASSERT(blend->blend_enable);
+
+ /* Unpack/convert framebuffer colors from four 32-bit packed colors
+ * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA).
+ * Each 8-bit color component is expanded into a float in [0.0, 1.0].
+ */
+ {
+ int mask_reg = spe_allocate_available_register(f);
+
+ /* mask = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff} */
+ spe_fsmbi(f, mask_reg, 0x1111);
+
+ /* XXX there may be more clever ways to implement the following code */
+ switch (color_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ /* fbB = fbB & mask */
+ spe_and(f, fbB_reg, fbRGBA_reg, mask_reg);
+ /* mask = mask << 8 */
+ spe_roti(f, mask_reg, mask_reg, 8);
+
+ /* fbG = fbRGBA & mask */
+ spe_and(f, fbG_reg, fbRGBA_reg, mask_reg);
+ /* fbG = fbG >> 8 */
+ spe_roti(f, fbG_reg, fbG_reg, -8);
+ /* mask = mask << 8 */
+ spe_roti(f, mask_reg, mask_reg, 8);
+
+ /* fbR = fbRGBA & mask */
+ spe_and(f, fbR_reg, fbRGBA_reg, mask_reg);
+ /* fbR = fbR >> 16 */
+ spe_roti(f, fbR_reg, fbR_reg, -16);
+ /* mask = mask << 8 */
+ spe_roti(f, mask_reg, mask_reg, 8);
+
+ /* fbA = fbRGBA & mask */
+ spe_and(f, fbA_reg, fbRGBA_reg, mask_reg);
+ /* fbA = fbA >> 24 */
+ spe_roti(f, fbA_reg, fbA_reg, -24);
+ break;
+
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ /* fbA = fbA & mask */
+ spe_and(f, fbA_reg, fbRGBA_reg, mask_reg);
+ /* mask = mask << 8 */
+ spe_roti(f, mask_reg, mask_reg, 8);
+
+ /* fbR = fbRGBA & mask */
+ spe_and(f, fbR_reg, fbRGBA_reg, mask_reg);
+ /* fbR = fbR >> 8 */
+ spe_roti(f, fbR_reg, fbR_reg, -8);
+ /* mask = mask << 8 */
+ spe_roti(f, mask_reg, mask_reg, 8);
+
+ /* fbG = fbRGBA & mask */
+ spe_and(f, fbG_reg, fbRGBA_reg, mask_reg);
+ /* fbG = fbG >> 16 */
+ spe_roti(f, fbG_reg, fbG_reg, -16);
+ /* mask = mask << 8 */
+ spe_roti(f, mask_reg, mask_reg, 8);
+
+ /* fbB = fbRGBA & mask */
+ spe_and(f, fbB_reg, fbRGBA_reg, mask_reg);
+ /* fbB = fbB >> 24 */
+ spe_roti(f, fbB_reg, fbB_reg, -24);
+ break;
+
+ default:
+ ASSERT(0);
+ }
+
+ /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */
+ spe_cuflt(f, fbR_reg, fbR_reg, 8);
+ spe_cuflt(f, fbG_reg, fbG_reg, 8);
+ spe_cuflt(f, fbB_reg, fbB_reg, 8);
+ spe_cuflt(f, fbA_reg, fbA_reg, 8);
+
+ spe_release_register(f, mask_reg);
+ }
+
+
+ /*
+ * Compute Src RGB terms
+ */
+ switch (blend->rgb_src_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ spe_move(f, term1R_reg, fragR_reg);
+ spe_move(f, term1G_reg, fragG_reg);
+ spe_move(f, term1B_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ spe_zero(f, term1R_reg);
+ spe_zero(f, term1G_reg);
+ spe_zero(f, term1B_reg);
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ spe_fm(f, term1R_reg, fragR_reg, fragR_reg);
+ spe_fm(f, term1G_reg, fragG_reg, fragG_reg);
+ spe_fm(f, term1B_reg, fragB_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ spe_fm(f, term1R_reg, fragR_reg, fragA_reg);
+ spe_fm(f, term1G_reg, fragG_reg, fragA_reg);
+ spe_fm(f, term1B_reg, fragB_reg, fragA_reg);
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Compute Src Alpha term
+ */
+ switch (blend->alpha_src_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ spe_move(f, term1A_reg, fragA_reg);
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Compute Dest RGB terms
+ */
+ switch (blend->rgb_dst_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ spe_move(f, term2R_reg, fbR_reg);
+ spe_move(f, term2G_reg, fbG_reg);
+ spe_move(f, term2B_reg, fbB_reg);
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ spe_zero(f, term2R_reg);
+ spe_zero(f, term2G_reg);
+ spe_zero(f, term2B_reg);
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ spe_fm(f, term2R_reg, fbR_reg, fragR_reg);
+ spe_fm(f, term2G_reg, fbG_reg, fragG_reg);
+ spe_fm(f, term2B_reg, fbB_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ spe_fm(f, term2R_reg, fbR_reg, fragA_reg);
+ spe_fm(f, term2G_reg, fbG_reg, fragA_reg);
+ spe_fm(f, term2B_reg, fbB_reg, fragA_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ /* one = {1.0, 1.0, 1.0, 1.0} */
+ spe_load_float(f, one_reg, 1.0f);
+ /* tmp = one - fragA */
+ spe_fs(f, tmp_reg, one_reg, fragA_reg);
+ /* term = fb * tmp */
+ spe_fm(f, term2R_reg, fbR_reg, tmp_reg);
+ spe_fm(f, term2G_reg, fbG_reg, tmp_reg);
+ spe_fm(f, term2B_reg, fbB_reg, tmp_reg);
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Compute Dest Alpha term
+ */
+ switch (blend->alpha_dst_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ spe_move(f, term2A_reg, fbA_reg);
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ spe_zero(f, term2A_reg);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ spe_fm(f, term2A_reg, fbA_reg, fragA_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ /* one = {1.0, 1.0, 1.0, 1.0} */
+ spe_load_float(f, one_reg, 1.0f);
+ /* tmp = one - fragA */
+ spe_fs(f, tmp_reg, one_reg, fragA_reg);
+ /* termA = fbA * tmp */
+ spe_fm(f, term2A_reg, fbA_reg, tmp_reg);
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Combine Src/Dest RGB terms
+ */
+ switch (blend->rgb_func) {
+ case PIPE_BLEND_ADD:
+ spe_fa(f, fragR_reg, term1R_reg, term2R_reg);
+ spe_fa(f, fragG_reg, term1G_reg, term2G_reg);
+ spe_fa(f, fragB_reg, term1B_reg, term2B_reg);
+ break;
+ case PIPE_BLEND_SUBTRACT:
+ spe_fs(f, fragR_reg, term1R_reg, term2R_reg);
+ spe_fs(f, fragG_reg, term1G_reg, term2G_reg);
+ spe_fs(f, fragB_reg, term1B_reg, term2B_reg);
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Combine Src/Dest A term
+ */
+ switch (blend->alpha_func) {
+ case PIPE_BLEND_ADD:
+ spe_fa(f, fragA_reg, term1A_reg, term2A_reg);
+ break;
+ case PIPE_BLEND_SUBTRACT:
+ spe_fs(f, fragA_reg, term1A_reg, term2A_reg);
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ spe_release_register(f, term1R_reg);
+ spe_release_register(f, term1G_reg);
+ spe_release_register(f, term1B_reg);
+ spe_release_register(f, term1A_reg);
+
+ spe_release_register(f, term2R_reg);
+ spe_release_register(f, term2G_reg);
+ spe_release_register(f, term2B_reg);
+ spe_release_register(f, term2A_reg);
+
+ spe_release_register(f, fbR_reg);
+ spe_release_register(f, fbG_reg);
+ spe_release_register(f, fbB_reg);
+ spe_release_register(f, fbA_reg);
+
+ spe_release_register(f, one_reg);
+ spe_release_register(f, tmp_reg);
+}
+
+
+static void
+gen_logicop(const struct pipe_blend_state *blend,
+ struct spe_function *f,
+ int fragRGBA_reg, int fbRGBA_reg)
+{
+ /* XXX to-do */
+ /* operate on 32-bit packed pixels, not float colors */
+}
+
+
+static void
+gen_colormask(uint colormask,
+ struct spe_function *f,
+ int fragRGBA_reg, int fbRGBA_reg)
+{
+ /* XXX to-do */
+ /* operate on 32-bit packed pixels, not float colors */
+}
+
+
+
+/**
+ * Generate code to pack a quad of float colors into a four 32-bit integers.
+ *
+ * \param f SPE function to append instruction onto.
+ * \param color_format the dest color packing format
+ * \param r_reg register containing four red values (in/clobbered)
+ * \param g_reg register containing four green values (in/clobbered)
+ * \param b_reg register containing four blue values (in/clobbered)
+ * \param a_reg register containing four alpha values (in/clobbered)
+ * \param rgba_reg register to store the packed RGBA colors (out)
+ */
+static void
+gen_pack_colors(struct spe_function *f,
+ enum pipe_format color_format,
+ int r_reg, int g_reg, int b_reg, int a_reg,
+ int rgba_reg)
+{
+ /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */
+ spe_cfltu(f, r_reg, r_reg, 32);
+ spe_cfltu(f, g_reg, g_reg, 32);
+ spe_cfltu(f, b_reg, b_reg, 32);
+ spe_cfltu(f, a_reg, a_reg, 32);
+
+ /* Shift the most significant bytes to least the significant positions.
+ * I.e.: reg = reg >> 24
+ */
+ spe_rotmi(f, r_reg, r_reg, -24);
+ spe_rotmi(f, g_reg, g_reg, -24);
+ spe_rotmi(f, b_reg, b_reg, -24);
+ spe_rotmi(f, a_reg, a_reg, -24);
+
+ /* Shift the color bytes according to the surface format */
+ if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) {
+ spe_roti(f, g_reg, g_reg, 8); /* green <<= 8 */
+ spe_roti(f, r_reg, r_reg, 16); /* red <<= 16 */
+ spe_roti(f, a_reg, a_reg, 24); /* alpha <<= 24 */
+ }
+ else if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) {
+ spe_roti(f, r_reg, r_reg, 8); /* red <<= 8 */
+ spe_roti(f, g_reg, g_reg, 16); /* green <<= 16 */
+ spe_roti(f, b_reg, b_reg, 24); /* blue <<= 24 */
+ }
+ else {
+ ASSERT(0);
+ }
+
+ /* Merge red, green, blue, alpha registers to make packed RGBA colors.
+ * Eg: after shifting according to color_format we might have:
+ * R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000}
+ * G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600}
+ * B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099}
+ * A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000}
+ * OR-ing all those together gives us four packed colors:
+ * RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699}
+ */
+ spe_or(f, rgba_reg, r_reg, g_reg);
+ spe_or(f, rgba_reg, rgba_reg, b_reg);
+ spe_or(f, rgba_reg, rgba_reg, a_reg);
+}
+
+
+
+
+/**
+ * Generate SPE code to implement the fragment operations (alpha test,
+ * depth test, stencil test, blending, colormask, and final
+ * framebuffer write) as specified by the current context state.
+ *
+ * Logically, this code will be called after running the fragment
+ * shader. But under some circumstances we could run some of this
+ * code before the fragment shader to cull fragments/quads that are
+ * totally occluded/discarded.
+ *
+ * XXX we only support PIPE_FORMAT_Z24S8_UNORM z/stencil buffer right now.
+ *
+ * See the spu_default_fragment_ops() function to see how the per-fragment
+ * operations would be done with ordinary C code.
+ * The code we generate here though has no branches, is SIMD, etc and
+ * should be much faster.
+ *
+ * \param cell the rendering context (in)
+ * \param f the generated function (out)
+ */
+void
+gen_fragment_function(struct cell_context *cell, struct spe_function *f)
+{
+ const struct pipe_depth_stencil_alpha_state *dsa =
+ &cell->depth_stencil->base;
+ const struct pipe_blend_state *blend = &cell->blend->base;
+ const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format;
+
+ /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
+ const int x_reg = 3; /* uint */
+ const int y_reg = 4; /* uint */
+ const int color_tile_reg = 5; /* tile_t * */
+ const int depth_tile_reg = 6; /* tile_t * */
+ const int fragZ_reg = 7; /* vector float */
+ const int fragR_reg = 8; /* vector float */
+ const int fragG_reg = 9; /* vector float */
+ const int fragB_reg = 10; /* vector float */
+ const int fragA_reg = 11; /* vector float */
+ const int mask_reg = 12; /* vector uint */
+
+ /* offset of quad from start of tile
+ * XXX assuming 4-byte pixels for color AND Z/stencil!!!!
+ */
+ int quad_offset_reg;
+
+ int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */
+ int fbZS_reg; /**< framebuffer's combined z/stencil values for quad */
+
+ spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
+ spe_allocate_register(f, x_reg);
+ spe_allocate_register(f, y_reg);
+ spe_allocate_register(f, color_tile_reg);
+ spe_allocate_register(f, depth_tile_reg);
+ spe_allocate_register(f, fragZ_reg);
+ spe_allocate_register(f, fragR_reg);
+ spe_allocate_register(f, fragG_reg);
+ spe_allocate_register(f, fragB_reg);
+ spe_allocate_register(f, fragA_reg);
+ spe_allocate_register(f, mask_reg);
+
+ quad_offset_reg = spe_allocate_available_register(f);
+ fbRGBA_reg = spe_allocate_available_register(f);
+ fbZS_reg = spe_allocate_available_register(f);
+
+ /* compute offset of quad from start of tile, in bytes */
+ {
+ int x2_reg = spe_allocate_available_register(f);
+ int y2_reg = spe_allocate_available_register(f);
+
+ ASSERT(TILE_SIZE == 32);
+
+ spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */
+ spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */
+ spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */
+ spe_a(f, quad_offset_reg, y2_reg, x2_reg); /* offset = y2 + x2 */
+ spe_shli(f, quad_offset_reg, quad_offset_reg, 4); /* offset *= 16 */
+
+ spe_release_register(f, x2_reg);
+ spe_release_register(f, y2_reg);
+ }
+
+
+ if (dsa->alpha.enabled) {
+ gen_alpha_test(dsa, f, mask_reg, fragA_reg);
+ }
+
+ if (dsa->depth.enabled || dsa->stencil[0].enabled) {
+ const enum pipe_format zs_format = cell->framebuffer.zsbuf->format;
+ boolean write_depth_stencil;
+
+ int fbZ_reg = spe_allocate_available_register(f); /* Z values */
+ int fbS_reg = spe_allocate_available_register(f); /* Stencil values */
+
+ /* fetch quad of depth/stencil values from tile at (x,y) */
+ /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
+ spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
+
+ if (dsa->depth.enabled) {
+ /* Extract Z bits from fbZS_reg into fbZ_reg */
+ if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
+ zs_format == PIPE_FORMAT_X8Z24_UNORM) {
+ int mask_reg = spe_allocate_available_register(f);
+ spe_fsmbi(f, mask_reg, 0x7777); /* mask[0,1,2,3] = 0x00ffffff */
+ spe_and(f, fbZ_reg, fbZS_reg, mask_reg); /* fbZ = fbZS & mask */
+ spe_release_register(f, mask_reg);
+ /* OK, fbZ_reg has four 24-bit Z values now */
+ }
+ else {
+ /* XXX handle other z/stencil formats */
+ ASSERT(0);
+ }
+
+ /* Convert fragZ values from float[4] to uint[4] */
+ if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
+ zs_format == PIPE_FORMAT_X8Z24_UNORM ||
+ zs_format == PIPE_FORMAT_Z24S8_UNORM ||
+ zs_format == PIPE_FORMAT_Z24X8_UNORM) {
+ /* 24-bit Z values */
+ int scale_reg = spe_allocate_available_register(f);
+
+ /* scale_reg[0,1,2,3] = float(2^24-1) */
+ spe_load_float(f, scale_reg, (float) 0xffffff);
+
+ /* XXX these two instructions might be combined */
+ spe_fm(f, fragZ_reg, fragZ_reg, scale_reg); /* fragZ *= scale */
+ spe_cfltu(f, fragZ_reg, fragZ_reg, 0); /* fragZ = (int) fragZ */
+
+ spe_release_register(f, scale_reg);
+ }
+ else {
+ /* XXX handle 16-bit Z format */
+ ASSERT(0);
+ }
+ }
+
+ if (dsa->stencil[0].enabled) {
+ /* Extract Stencil bit sfrom fbZS_reg into fbS_reg */
+ if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
+ zs_format == PIPE_FORMAT_X8Z24_UNORM) {
+ /* XXX extract with a shift */
+ ASSERT(0);
+ }
+ else if (zs_format == PIPE_FORMAT_Z24S8_UNORM ||
+ zs_format == PIPE_FORMAT_Z24X8_UNORM) {
+ /* XXX extract with a mask */
+ ASSERT(0);
+ }
+ }
+
+
+ if (dsa->stencil[0].enabled) {
+ /* XXX this may involve depth testing too */
+ // gen_stencil_test(dsa, f, ... );
+ ASSERT(0);
+ }
+ else if (dsa->depth.enabled) {
+ int zmask_reg = spe_allocate_available_register(f);
+ gen_depth_test(dsa, f, mask_reg, fragZ_reg, fbZ_reg, zmask_reg);
+ spe_release_register(f, zmask_reg);
+ }
+
+ /* do we need to write Z and/or Stencil back into framebuffer? */
+ write_depth_stencil = (dsa->depth.writemask |
+ dsa->stencil[0].write_mask |
+ dsa->stencil[1].write_mask);
+
+ if (write_depth_stencil) {
+ /* Merge latest Z and Stencil values into fbZS_reg.
+ * fbZ_reg has four Z vals in bits [23..0] or bits [15..0].
+ * fbS_reg has four 8-bit Z values in bits [7..0].
+ */
+ if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
+ zs_format == PIPE_FORMAT_X8Z24_UNORM) {
+ spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */
+ spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */
+ }
+ else if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
+ zs_format == PIPE_FORMAT_X8Z24_UNORM) {
+ /* XXX to do */
+ ASSERT(0);
+ }
+ else if (zs_format == PIPE_FORMAT_Z16_UNORM) {
+ /* XXX to do */
+ ASSERT(0);
+ }
+ else if (zs_format == PIPE_FORMAT_S8_UNORM) {
+ /* XXX to do */
+ ASSERT(0);
+ }
+ else {
+ /* bad zs_format */
+ ASSERT(0);
+ }
+
+ /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */
+ spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
+ }
+
+ spe_release_register(f, fbZ_reg);
+ spe_release_register(f, fbS_reg);
+ }
+
+
+ /* Get framebuffer quad/colors. We'll need these for blending,
+ * color masking, and to obey the quad/pixel mask.
+ * Load: fbRGBA_reg = memory[color_tile + quad_offset]
+ * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking
+ * we could skip this load.
+ */
+ spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg);
+
+
+ if (blend->blend_enable) {
+ gen_blend(blend, f, color_format,
+ fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg);
+ }
+
+ /*
+ * Write fragment colors to framebuffer/tile.
+ * This involves converting the fragment colors from float[4] to the
+ * tile's specific format and obeying the quad/pixel mask.
+ */
+ {
+ int rgba_reg = spe_allocate_available_register(f);
+
+ /* Pack four float colors as four 32-bit int colors */
+ gen_pack_colors(f, color_format,
+ fragR_reg, fragG_reg, fragB_reg, fragA_reg,
+ rgba_reg);
+
+ if (blend->logicop_enable) {
+ gen_logicop(blend, f, rgba_reg, fbRGBA_reg);
+ }
+
+ if (blend->colormask != 0xf) {
+ gen_colormask(blend->colormask, f, rgba_reg, fbRGBA_reg);
+ }
+
+
+ /* Mix fragment colors with framebuffer colors using the quad/pixel mask:
+ * if (mask[i])
+ * rgba[i] = rgba[i];
+ * else
+ * rgba[i] = framebuffer[i];
+ */
+ spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg);
+
+ /* Store updated quad in tile:
+ * memory[color_tile + quad_offset] = rgba_reg;
+ */
+ spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg);
+
+ spe_release_register(f, rgba_reg);
+ }
+
+ printf("gen_fragment_ops nr instructions: %u\n", f->num_inst);
+
+ spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */
+
+
+ spe_release_register(f, fbRGBA_reg);
+ spe_release_register(f, fbZS_reg);
+ spe_release_register(f, quad_offset_reg);
+}
+
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
new file mode 100644
index 0000000000..0ea0fc690c
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
@@ -0,0 +1,38 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef CELL_GEN_FRAGMENT_H
+#define CELL_GEN_FRAGMENT_H
+
+
+extern void
+gen_fragment_function(struct cell_context *cell, struct spe_function *f);
+
+
+#endif /* CELL_GEN_FRAGMENT_H */
+
diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c
index fe5437023b..e04cf5f274 100644
--- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c
+++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c
@@ -34,6 +34,7 @@
#include "pipe/p_inlines.h"
#include "draw/draw_context.h"
#include "cell_context.h"
+#include "cell_flush.h"
#include "cell_state.h"
#include "cell_texture.h"
#include "cell_state_per_fragment.h"
@@ -130,8 +131,9 @@ cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth)
}
-static void cell_set_clip_state( struct pipe_context *pipe,
- const struct pipe_clip_state *clip )
+static void
+cell_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
{
struct cell_context *cell = cell_context(pipe);
@@ -310,8 +312,21 @@ cell_set_framebuffer_state(struct pipe_context *pipe,
cell->zsbuf_map = NULL;
}
- /* update my state */
- cell->framebuffer = *fb;
+ /* Finish any pending rendering to the current surface before
+ * installing a new surface!
+ */
+ cell_flush_int(cell, CELL_FLUSH_WAIT);
+
+ /* update my state
+ * (this is also where old surfaces will finally get freed)
+ */
+ cell->framebuffer.width = fb->width;
+ cell->framebuffer.height = fb->height;
+ cell->framebuffer.num_cbufs = fb->num_cbufs;
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+ pipe_surface_reference(&cell->framebuffer.cbufs[i], fb->cbufs[i]);
+ }
+ pipe_surface_reference(&cell->framebuffer.zsbuf, fb->zsbuf);
/* map new surfaces */
if (csurf)
diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c
index 973c0b1aa1..9508227e29 100644
--- a/src/gallium/drivers/cell/ppu/cell_spu.c
+++ b/src/gallium/drivers/cell/ppu/cell_spu.c
@@ -26,6 +26,11 @@
**************************************************************************/
+/**
+ * Utility/wrappers for communicating with the SPUs.
+ */
+
+
#include <pthread.h>
#include "cell_spu.h"
@@ -40,6 +45,9 @@ helpful headers:
*/
+/**
+ * Cell/SPU info that's not per-context.
+ */
struct cell_global_info cell_global;
@@ -74,7 +82,11 @@ wait_mbox_message(spe_context_ptr_t ctx)
}
-static void *cell_thread_function(void *arg)
+/**
+ * Called by pthread_create() to spawn an SPU thread.
+ */
+static void *
+cell_thread_function(void *arg)
{
struct cell_init_info *init = (struct cell_init_info *) arg;
unsigned entry = SPE_DEFAULT_ENTRY;
@@ -92,7 +104,10 @@ static void *cell_thread_function(void *arg)
/**
- * Create the SPU threads
+ * Create the SPU threads. This is done once during driver initialization.
+ * This involves setting the the "init" message which is sent to each SPU.
+ * The init message specifies an SPU id, total number of SPUs, location
+ * and number of batch buffers, etc.
*/
void
cell_start_spus(struct cell_context *cell)
@@ -100,7 +115,6 @@ cell_start_spus(struct cell_context *cell)
static boolean one_time_init = FALSE;
uint i, j;
-
if (one_time_init) {
fprintf(stderr, "PPU: Multiple rendering contexts not yet supported "
"on Cell.\n");
@@ -120,6 +134,7 @@ cell_start_spus(struct cell_context *cell)
for (i = 0; i < cell->num_spus; i++) {
cell_global.inits[i].id = i;
cell_global.inits[i].num_spus = cell->num_spus;
+ cell_global.inits[i].debug_flags = cell->debug_flags;
cell_global.inits[i].cmd = &cell_global.command[i];
for (j = 0; j < CELL_NUM_BUFFERS; j++) {
cell_global.inits[i].buffers[j] = cell->buffer[j];
@@ -137,14 +152,17 @@ cell_start_spus(struct cell_context *cell)
exit(1);
}
- pthread_create(&cell_global.spe_threads[i], NULL, &cell_thread_function,
- &cell_global.inits[i]);
+ pthread_create(&cell_global.spe_threads[i], /* returned thread handle */
+ NULL, /* pthread attribs */
+ &cell_thread_function, /* start routine */
+ &cell_global.inits[i]); /* thread argument */
}
}
/**
* Tell all the SPUs to stop/exit.
+ * This is done when the driver's exiting / cleaning up.
*/
void
cell_spu_exit(struct cell_context *cell)
diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h
index 82580ea35a..a7771a55a3 100644
--- a/src/gallium/drivers/cell/ppu/cell_state.h
+++ b/src/gallium/drivers/cell/ppu/cell_state.h
@@ -48,19 +48,17 @@
#define CELL_NEW_VERTEX_INFO 0x8000
-void cell_set_vertex_elements(struct pipe_context *,
- unsigned count,
- const struct pipe_vertex_element *);
+extern void
+cell_update_derived( struct cell_context *softpipe );
-void cell_set_vertex_buffers(struct pipe_context *,
- unsigned count,
- const struct pipe_vertex_buffer *);
-void cell_update_derived( struct cell_context *softpipe );
+extern void
+cell_init_shader_functions(struct cell_context *cell);
-void
-cell_init_shader_functions(struct cell_context *cell);
+extern void
+cell_init_vertex_functions(struct cell_context *cell);
+
#endif /* CELL_STATE_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c
index 8ab938a02a..efc4f78364 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_derived.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c
@@ -35,21 +35,6 @@
#include "cell_state_emit.h"
-static int
-find_vs_output(const struct cell_vertex_shader_state *vs,
- uint semantic_name,
- uint semantic_index)
-{
- uint i;
- for (i = 0; i < vs->info.num_outputs; i++) {
- if (vs->info.output_semantic_name[i] == semantic_name &&
- vs->info.output_semantic_index[i] == semantic_index)
- return i;
- }
- return -1;
-}
-
-
/**
* Determine how to map vertex program outputs to fragment program inputs.
* Basically, this will be used when computing the triangle interpolation
@@ -58,7 +43,6 @@ find_vs_output(const struct cell_vertex_shader_state *vs,
static void
calculate_vertex_layout( struct cell_context *cell )
{
- const struct cell_vertex_shader_state *vs = cell->vs;
const struct cell_fragment_shader_state *fs = cell->fs;
const enum interp_mode colorInterp
= cell->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR;
@@ -82,7 +66,7 @@ calculate_vertex_layout( struct cell_context *cell )
vinfo->num_attribs = 0;
/* we always want to emit vertex pos */
- src = find_vs_output(vs, TGSI_SEMANTIC_POSITION, 0);
+ src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_POSITION, 0);
assert(src >= 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src);
@@ -98,14 +82,14 @@ calculate_vertex_layout( struct cell_context *cell )
break;
case TGSI_SEMANTIC_COLOR:
- src = find_vs_output(vs, TGSI_SEMANTIC_COLOR,
- fs->info.input_semantic_index[i]);
+ src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_COLOR,
+ fs->info.input_semantic_index[i]);
assert(src >= 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
break;
case TGSI_SEMANTIC_FOG:
- src = find_vs_output(vs, TGSI_SEMANTIC_FOG, 0);
+ src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_FOG, 0);
#if 1
if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */
src = 0;
@@ -116,7 +100,7 @@ calculate_vertex_layout( struct cell_context *cell )
case TGSI_SEMANTIC_GENERIC:
/* this includes texcoords and varying vars */
- src = find_vs_output(vs, TGSI_SEMANTIC_GENERIC,
+ src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_GENERIC,
fs->info.input_semantic_index[i]);
assert(src >= 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
@@ -163,6 +147,9 @@ compute_cliprect(struct cell_context *sp)
+/**
+ * Update derived state, send current state to SPUs prior to rendering.
+ */
void cell_update_derived( struct cell_context *cell )
{
if (cell->dirty & (CELL_NEW_RASTERIZER |
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c
index 9d88c1cf3d..180b89c1f6 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_emit.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c
@@ -27,6 +27,7 @@
#include "util/u_memory.h"
#include "cell_context.h"
+#include "cell_gen_fragment.h"
#include "cell_state.h"
#include "cell_state_emit.h"
#include "cell_state_per_fragment.h"
@@ -47,27 +48,13 @@ emit_state_cmd(struct cell_context *cell, uint cmd,
}
-
+/**
+ * For state marked as 'dirty', construct a state-update command block
+ * and insert it into the current batch buffer.
+ */
void
cell_emit_state(struct cell_context *cell)
{
- if (cell->dirty & (CELL_NEW_FRAMEBUFFER | CELL_NEW_BLEND)) {
- struct cell_command_logicop logicop;
-
- if (cell->logic_op.store != NULL) {
- spe_release_func(& cell->logic_op);
- }
-
- cell_generate_logic_op(& cell->logic_op,
- & cell->blend->base,
- cell->framebuffer.cbufs[0]);
-
- logicop.base = (intptr_t) cell->logic_op.store;
- logicop.size = 64 * 4;
- emit_state_cmd(cell, CELL_CMD_STATE_LOGICOP, &logicop,
- sizeof(logicop));
- }
-
if (cell->dirty & CELL_NEW_FRAMEBUFFER) {
struct pipe_surface *cbuf = cell->framebuffer.cbufs[0];
struct pipe_surface *zbuf = cell->framebuffer.zsbuf;
@@ -80,44 +67,33 @@ cell_emit_state(struct cell_context *cell)
fb->depth_format = zbuf ? zbuf->format : PIPE_FORMAT_NONE;
fb->width = cell->framebuffer.width;
fb->height = cell->framebuffer.height;
+#if 0
+ printf("EMIT color format %s\n", pf_name(fb->color_format));
+ printf("EMIT depth format %s\n", pf_name(fb->depth_format));
+#endif
}
- if (cell->dirty & CELL_NEW_BLEND) {
- struct cell_command_blend blend;
- if (cell->blend != NULL) {
- blend.base = (intptr_t) cell->blend->code.store;
- blend.size = (char *) cell->blend->code.csr
- - (char *) cell->blend->code.store;
- blend.read_fb = TRUE;
- } else {
- blend.base = 0;
- blend.size = 0;
- blend.read_fb = FALSE;
- }
-
- emit_state_cmd(cell, CELL_CMD_STATE_BLEND, &blend, sizeof(blend));
- }
-
- if (cell->dirty & CELL_NEW_DEPTH_STENCIL) {
- struct cell_command_depth_stencil_alpha_test dsat;
-
-
- if (cell->depth_stencil != NULL) {
- dsat.base = (intptr_t) cell->depth_stencil->code.store;
- dsat.size = (char *) cell->depth_stencil->code.csr
- - (char *) cell->depth_stencil->code.store;
- dsat.read_depth = TRUE;
- dsat.read_stencil = FALSE;
- } else {
- dsat.base = 0;
- dsat.size = 0;
- dsat.read_depth = FALSE;
- dsat.read_stencil = FALSE;
- }
-
- emit_state_cmd(cell, CELL_CMD_STATE_DEPTH_STENCIL, &dsat,
- sizeof(dsat));
+ if (cell->dirty & (CELL_NEW_FRAMEBUFFER |
+ CELL_NEW_DEPTH_STENCIL |
+ CELL_NEW_BLEND)) {
+ /* XXX we don't want to always do codegen here. We should have
+ * a hash/lookup table to cache previous results...
+ */
+ struct cell_command_fragment_ops *fops
+ = cell_batch_alloc(cell, sizeof(*fops));
+ struct spe_function spe_code;
+
+ /* generate new code */
+ gen_fragment_function(cell, &spe_code);
+ /* put the new code into the batch buffer */
+ fops->opcode = CELL_CMD_STATE_FRAGMENT_OPS;
+ memcpy(&fops->code, spe_code.store,
+ SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
+ fops->dsa = cell->depth_stencil->base;
+ fops->blend = cell->blend->base;
+ /* free codegen buffer */
+ spe_release_func(&spe_code);
}
if (cell->dirty & CELL_NEW_SAMPLER) {
@@ -157,7 +133,8 @@ cell_emit_state(struct cell_context *cell)
emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO,
&cell->vertex_info, sizeof(struct vertex_info));
}
-
+
+#if 0
if (cell->dirty & CELL_NEW_VS) {
const struct draw_context *const draw = cell->draw;
struct cell_shader_info info;
@@ -170,7 +147,7 @@ cell_emit_state(struct cell_context *cell)
info.immediates = (uintptr_t) draw->vs.machine.Imms;
info.num_immediates = draw->vs.machine.ImmLimit / 4;
- emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS,
- & info, sizeof(info));
+ emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, &info, sizeof(info));
}
+#endif
}
diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c
index 53ae3aa50e..78cb446c14 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c
@@ -132,9 +132,9 @@ emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa,
/**
+ * Generate code to perform Z testing. Four Z values are tested at once.
* \param dsa Current depth-test state
* \param f Function to which code should be appended
- * \param m Mask of allocated / free SPE registers
* \param mask Index of register to contain depth-pass mask
* \param stored Index of register containing values from depth buffer
* \param calculated Index of register containing per-fragment depth values
@@ -198,6 +198,7 @@ emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa,
/**
+ * Generate code to apply the stencil operation (after testing).
* \note Emits a maximum of 5 instructions.
*
* \warning
@@ -222,9 +223,13 @@ emit_stencil_op(struct spe_function *f,
spe_il(f, result, ref);
break;
case PIPE_STENCIL_OP_INCR:
+ /* clamp = [0xff, 0xff, 0xff, 0xff] */
spe_il(f, clamp, 0x0ff);
+ /* result[i] = in[i] + 1 */
spe_ai(f, result, in, 1);
+ /* clamp_mask[i] = (result[i] > 0xff) */
spe_clgti(f, clamp_mask, result, 0x0ff);
+ /* result[i] = clamp_mask[i] ? clamp[i] : result[i] */
spe_selb(f, result, result, clamp, clamp_mask);
break;
case PIPE_STENCIL_OP_DECR:
@@ -259,10 +264,10 @@ emit_stencil_op(struct spe_function *f,
/**
+ * Generate code to do stencil test. Four pixels are tested at once.
* \param dsa Depth / stencil test state
* \param face 0 for front face, 1 for back face
* \param f Function to append instructions to
- * \param reg_mask Mask of allocated registers
* \param mask Register containing mask of fragments passing the
* alpha test
* \param depth_mask Register containing mask of fragments passing the
@@ -310,13 +315,14 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
switch (dsa->stencil[face].func) {
case PIPE_FUNC_NEVER:
- spe_il(f, stencil_mask, 0);
+ spe_il(f, stencil_mask, 0); /* stencil_mask[0..3] = [0,0,0,0] */
break;
case PIPE_FUNC_NOTEQUAL:
complement = TRUE;
/* FALLTHROUGH */
case PIPE_FUNC_EQUAL:
+ /* stencil_mask[i] = (stored[i] == ref) */
spe_ceqi(f, stencil_mask, stored, ref);
break;
@@ -324,6 +330,8 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
complement = TRUE;
/* FALLTHROUGH */
case PIPE_FUNC_GREATER:
+ complement = TRUE;
+ /* stencil_mask[i] = (stored[i] > ref) */
spe_clgti(f, stencil_mask, stored, ref);
break;
@@ -331,8 +339,11 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
complement = TRUE;
/* FALLTHROUGH */
case PIPE_FUNC_GEQUAL:
+ /* stencil_mask[i] = (stored[i] > ref) */
spe_clgti(f, stencil_mask, stored, ref);
+ /* tmp[i] = (stored[i] == ref) */
spe_ceqi(f, tmp, stored, ref);
+ /* stencil_mask[i] = stencil_mask[i] | tmp[i] */
spe_or(f, stencil_mask, stencil_mask, tmp);
break;
@@ -461,7 +472,7 @@ cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa)
* + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round
* up to 64 to make it a happy power-of-two.
*/
- spe_init_func(f, 4 * 64);
+ spe_init_func(f, SPE_INST_SIZE * 64);
/* Allocate registers for the function's input parameters. Cleverly (and
@@ -540,7 +551,7 @@ cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa)
spe_selb(f, depth, depth, zvals, mask);
}
- spe_bi(f, 0, 0, 0);
+ spe_bi(f, 0, 0, 0); /* return from function call */
#if 0
@@ -956,7 +967,7 @@ cell_generate_alpha_blend(struct cell_blend_state *cb)
* + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to
* make it a happy power-of-two.
*/
- spe_init_func(f, 4 * 64);
+ spe_init_func(f, SPE_INST_SIZE * 64);
const int frag[4] = {
@@ -1144,9 +1155,10 @@ cell_generate_alpha_blend(struct cell_blend_state *cb)
}
-int PC_OFFSET(const struct spe_function *f, const void *d)
+static int
+PC_OFFSET(const struct spe_function *f, const void *d)
{
- const intptr_t pc = (intptr_t) f->csr;
+ const intptr_t pc = (intptr_t) &f->store[f->num_inst];
const intptr_t ea = ~0x0f & (intptr_t) d;
return (ea - pc) >> 2;
@@ -1178,7 +1190,7 @@ cell_generate_logic_op(struct spe_function *f,
* bytes (equiv. to 8 instructions) are needed for data storage. Round up
* to 64 to make it a happy power-of-two.
*/
- spe_init_func(f, 4 * 64);
+ spe_init_func(f, SPE_INST_SIZE * 64);
/* Pixel colors in framebuffer format in AoS layout.
diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c
index 86bcad05e9..97e44eeb1a 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_shader.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c
@@ -53,7 +53,10 @@ cell_vertex_shader_state(void *shader)
}
-
+/**
+ * Create fragment shader state.
+ * Called via pipe->create_fs_state()
+ */
static void *
cell_create_fs_state(struct pipe_context *pipe,
const struct pipe_shader_state *templ)
@@ -77,6 +80,9 @@ cell_create_fs_state(struct pipe_context *pipe,
}
+/**
+ * Called via pipe->bind_fs_state()
+ */
static void
cell_bind_fs_state(struct pipe_context *pipe, void *fs)
{
@@ -88,6 +94,9 @@ cell_bind_fs_state(struct pipe_context *pipe, void *fs)
}
+/**
+ * Called via pipe->delete_fs_state()
+ */
static void
cell_delete_fs_state(struct pipe_context *pipe, void *fs)
{
@@ -98,6 +107,10 @@ cell_delete_fs_state(struct pipe_context *pipe, void *fs)
}
+/**
+ * Create vertex shader state.
+ * Called via pipe->create_vs_state()
+ */
static void *
cell_create_vs_state(struct pipe_context *pipe,
const struct pipe_shader_state *templ)
@@ -128,6 +141,9 @@ cell_create_vs_state(struct pipe_context *pipe,
}
+/**
+ * Called via pipe->bind_vs_state()
+ */
static void
cell_bind_vs_state(struct pipe_context *pipe, void *vs)
{
@@ -142,6 +158,9 @@ cell_bind_vs_state(struct pipe_context *pipe, void *vs)
}
+/**
+ * Called via pipe->delete_vs_state()
+ */
static void
cell_delete_vs_state(struct pipe_context *pipe, void *vs)
{
@@ -154,6 +173,9 @@ cell_delete_vs_state(struct pipe_context *pipe, void *vs)
}
+/**
+ * Called via pipe->set_constant_buffer()
+ */
static void
cell_set_constant_buffer(struct pipe_context *pipe,
uint shader, uint index,
@@ -166,7 +188,7 @@ cell_set_constant_buffer(struct pipe_context *pipe,
assert(index == 0);
/* note: reference counting */
- pipe_buffer_reference(ws,
+ winsys_buffer_reference(ws,
&cell->constants[shader].buffer,
buf->buffer);
cell->constants[shader].size = buf->size;
diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
index 114684c2a3..fbe55c8472 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
@@ -35,7 +35,7 @@
#include "draw/draw_context.h"
-void
+static void
cell_set_vertex_elements(struct pipe_context *pipe,
unsigned count,
const struct pipe_vertex_element *elements)
@@ -53,7 +53,7 @@ cell_set_vertex_elements(struct pipe_context *pipe,
}
-void
+static void
cell_set_vertex_buffers(struct pipe_context *pipe,
unsigned count,
const struct pipe_vertex_buffer *buffers)
@@ -69,3 +69,11 @@ cell_set_vertex_buffers(struct pipe_context *pipe,
draw_set_vertex_buffers(cell->draw, count, buffers);
}
+
+
+void
+cell_init_vertex_functions(struct cell_context *cell)
+{
+ cell->pipe.set_vertex_buffers = cell_set_vertex_buffers;
+ cell->pipe.set_vertex_elements = cell_set_vertex_elements;
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c
index d9e3b510dc..732c64082e 100644
--- a/src/gallium/drivers/cell/ppu/cell_surface.c
+++ b/src/gallium/drivers/cell/ppu/cell_surface.c
@@ -25,108 +25,13 @@
*
**************************************************************************/
-#include "pipe/p_defines.h"
-#include "pipe/p_inlines.h"
-#include "pipe/p_winsys.h"
-#include "util/u_memory.h"
#include "util/u_rect.h"
-#include "util/u_tile.h"
-
#include "cell_context.h"
-#include "cell_surface.h"
-
-
-static void
-cell_surface_copy(struct pipe_context *pipe,
- boolean do_flip,
- struct pipe_surface *dst,
- unsigned dstx, unsigned dsty,
- struct pipe_surface *src,
- unsigned srcx, unsigned srcy,
- unsigned width, unsigned height)
-{
- assert( dst->cpp == src->cpp );
-
- pipe_copy_rect(pipe_surface_map(dst, PIPE_BUFFER_USAGE_CPU_WRITE),
- &dst->block,
- dst->stride,
- dstx, dsty,
- width, height,
- pipe_surface_map(src, PIPE_BUFFER_USAGE_CPU_READ),
- do_flip ? -src->stride : src->stride,
- srcx, do_flip ? height - 1 - srcy : srcy);
-
- pipe_surface_unmap(src);
- pipe_surface_unmap(dst);
-}
-
-
-static void *
-get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y)
-{
- return (char *)dst_map + y / dst->block.height * dst->stride + x / dst->block.width * dst->block.size;
-}
-
-
-#define UBYTE_TO_USHORT(B) ((B) | ((B) << 8))
-
-
-/**
- * Fill a rectangular sub-region. Need better logic about when to
- * push buffers into AGP - will currently do so whenever possible.
- */
-static void
-cell_surface_fill(struct pipe_context *pipe,
- struct pipe_surface *dst,
- unsigned dstx, unsigned dsty,
- unsigned width, unsigned height, unsigned value)
-{
- unsigned i, j;
- void *dst_map = pipe_surface_map(dst, PIPE_BUFFER_USAGE_CPU_WRITE);
-
- assert(dst->stride > 0);
-
- switch (dst->block.size) {
- case 1:
- case 2:
- case 4:
- pipe_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value);
- break;
- case 8:
- {
- /* expand the 4-byte clear value to an 8-byte value */
- ushort *row = (ushort *) get_pointer(dst, dst_map, dstx, dsty);
- ushort val0 = UBYTE_TO_USHORT((value >> 0) & 0xff);
- ushort val1 = UBYTE_TO_USHORT((value >> 8) & 0xff);
- ushort val2 = UBYTE_TO_USHORT((value >> 16) & 0xff);
- ushort val3 = UBYTE_TO_USHORT((value >> 24) & 0xff);
- val0 = (val0 << 8) | val0;
- val1 = (val1 << 8) | val1;
- val2 = (val2 << 8) | val2;
- val3 = (val3 << 8) | val3;
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- row[j*4+0] = val0;
- row[j*4+1] = val1;
- row[j*4+2] = val2;
- row[j*4+3] = val3;
- }
- row += dst->stride/2;
- }
- }
- break;
- default:
- assert(0);
- break;
- }
-
- pipe_surface_unmap( dst );
-}
void
cell_init_surface_functions(struct cell_context *cell)
{
- cell->pipe.surface_copy = cell_surface_copy;
- cell->pipe.surface_fill = cell_surface_fill;
+ cell->pipe.surface_copy = util_surface_copy;
+ cell->pipe.surface_fill = util_surface_fill;
}
diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c
index 5a0942bbd6..b6590dfb86 100644
--- a/src/gallium/drivers/cell/ppu/cell_texture.c
+++ b/src/gallium/drivers/cell/ppu/cell_texture.c
@@ -63,19 +63,30 @@ cell_texture_layout(struct cell_texture * spt)
spt->buffer_size = 0;
for ( level = 0 ; level <= pt->last_level ; level++ ) {
+ unsigned size;
+ unsigned w_tile, h_tile;
+
+ /* width, height, rounded up to tile size */
+ w_tile = align(width, TILE_SIZE);
+ h_tile = align(height, TILE_SIZE);
+
pt->width[level] = width;
pt->height[level] = height;
pt->depth[level] = depth;
- pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width);
- pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height);
+ pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w_tile);
+ pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h_tile);
spt->stride[level] = pt->nblocksx[level] * pt->block.size;
spt->level_offset[level] = spt->buffer_size;
- spt->buffer_size += (pt->nblocksy[level] *
- ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) *
- pt->nblocksx[level] * pt->block.size);
+ size = pt->nblocksx[level] * pt->nblocksy[level] * pt->block.size;
+ if (pt->target == PIPE_TEXTURE_CUBE)
+ size *= 6;
+ else
+ size *= depth;
+
+ spt->buffer_size += size;
width = minify(width);
height = minify(height);
@@ -85,8 +96,8 @@ cell_texture_layout(struct cell_texture * spt)
static struct pipe_texture *
-cell_texture_create_screen(struct pipe_screen *screen,
- const struct pipe_texture *templat)
+cell_texture_create(struct pipe_screen *screen,
+ const struct pipe_texture *templat)
{
struct pipe_winsys *ws = screen->winsys;
struct cell_texture *spt = CALLOC_STRUCT(cell_texture);
@@ -113,8 +124,8 @@ cell_texture_create_screen(struct pipe_screen *screen,
static void
-cell_texture_release_screen(struct pipe_screen *screen,
- struct pipe_texture **pt)
+cell_texture_release(struct pipe_screen *screen,
+ struct pipe_texture **pt)
{
if (!*pt)
return;
@@ -130,7 +141,7 @@ cell_texture_release_screen(struct pipe_screen *screen,
DBG("%s deleting %p\n", __FUNCTION__, (void *) spt);
*/
- pipe_buffer_reference(screen->winsys, &spt->buffer, NULL);
+ pipe_buffer_reference(screen, &spt->buffer, NULL);
FREE(spt);
}
@@ -138,6 +149,7 @@ cell_texture_release_screen(struct pipe_screen *screen,
}
+#if 0
static void
cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture,
uint face, uint levelsMask)
@@ -145,13 +157,14 @@ cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture,
/* XXX TO DO: re-tile the texture data ... */
}
+#endif
static struct pipe_surface *
-cell_get_tex_surface_screen(struct pipe_screen *screen,
- struct pipe_texture *pt,
- unsigned face, unsigned level, unsigned zslice,
- unsigned usage)
+cell_get_tex_surface(struct pipe_screen *screen,
+ struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned usage)
{
struct pipe_winsys *ws = screen->winsys;
struct cell_texture *spt = cell_texture(pt);
@@ -161,7 +174,7 @@ cell_get_tex_surface_screen(struct pipe_screen *screen,
if (ps) {
assert(ps->refcount);
assert(ps->winsys);
- pipe_buffer_reference(ws, &ps->buffer, spt->buffer);
+ winsys_buffer_reference(ws, &ps->buffer, spt->buffer);
ps->format = pt->format;
ps->block = pt->block;
ps->width = pt->width[level];
@@ -174,12 +187,17 @@ cell_get_tex_surface_screen(struct pipe_screen *screen,
/* XXX may need to override usage flags (see sp_texture.c) */
+ pipe_texture_reference(&ps->texture, pt);
+ ps->face = face;
+ ps->level = level;
+ ps->zslice = zslice;
if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) {
ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) *
ps->nblocksy *
ps->stride;
- } else {
+ }
+ else {
assert(face == 0);
assert(zslice == 0);
}
@@ -189,6 +207,11 @@ cell_get_tex_surface_screen(struct pipe_screen *screen,
+/**
+ * Copy tile data from linear layout to tiled layout.
+ * XXX this should be rolled into the future surface-creation code.
+ * XXX also need "untile" code...
+ */
static void
tile_copy_data(uint w, uint h, uint tile_size, uint *dst, const uint *src)
{
@@ -219,6 +242,7 @@ tile_copy_data(uint w, uint h, uint tile_size, uint *dst, const uint *src)
/**
* Convert linear texture image data to tiled format for SPU usage.
+ * XXX recast this in terms of pipe_surfaces (aka texture views).
*/
static void
cell_tile_texture(struct cell_context *cell,
@@ -285,6 +309,21 @@ cell_update_texture_mapping(struct cell_context *cell)
}
+static void
+cell_tex_surface_release(struct pipe_screen *screen,
+ struct pipe_surface **s)
+{
+ /* Effectively do the texture_update work here - if texture images
+ * needed post-processing to put them into hardware layout, this is
+ * where it would happen. For softpipe, nothing to do.
+ */
+ assert ((*s)->texture);
+ pipe_texture_reference(&(*s)->texture, NULL);
+
+ screen->winsys->surface_release(screen->winsys, s);
+}
+
+
static void *
cell_surface_map( struct pipe_screen *screen,
struct pipe_surface *surface,
@@ -297,7 +336,7 @@ cell_surface_map( struct pipe_screen *screen,
return NULL;
}
- map = screen->winsys->buffer_map( screen->winsys, surface->buffer, flags );
+ map = pipe_buffer_map( screen, surface->buffer, flags );
if (map == NULL)
return NULL;
@@ -323,7 +362,7 @@ static void
cell_surface_unmap(struct pipe_screen *screen,
struct pipe_surface *surface)
{
- screen->winsys->buffer_unmap( screen->winsys, surface->buffer );
+ pipe_buffer_unmap( screen, surface->buffer );
}
@@ -333,12 +372,15 @@ cell_init_texture_functions(struct cell_context *cell)
/*cell->pipe.texture_update = cell_texture_update;*/
}
+
void
cell_init_screen_texture_funcs(struct pipe_screen *screen)
{
- screen->texture_create = cell_texture_create_screen;
- screen->texture_release = cell_texture_release_screen;
- screen->get_tex_surface = cell_get_tex_surface_screen;
+ screen->texture_create = cell_texture_create;
+ screen->texture_release = cell_texture_release;
+
+ screen->get_tex_surface = cell_get_tex_surface;
+ screen->tex_surface_release = cell_tex_surface_release;
screen->surface_map = cell_surface_map;
screen->surface_unmap = cell_surface_unmap;
diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c
index e4230c7a5f..aa63435b93 100644
--- a/src/gallium/drivers/cell/ppu/cell_vbuf.c
+++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c
@@ -26,6 +26,11 @@
**************************************************************************/
/**
+ * Vertex buffer code. The draw module transforms vertices to window
+ * coords, etc. and emits the vertices into buffer supplied by this module.
+ * When a vertex buffer is full, or we flush, we'll send the vertex data
+ * to the SPUs.
+ *
* Authors
* Brian Paul
*/
@@ -113,7 +118,7 @@ cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices,
}
cvbr->vertex_buf = ~0;
- cell_flush_int(&cell->pipe, 0x0);
+ cell_flush_int(cell, 0x0);
assert(vertices == cvbr->vertex_buffer);
cvbr->vertex_buffer = NULL;
@@ -121,12 +126,13 @@ cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices,
-static void
+static boolean
cell_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim)
{
struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
cvbr->prim = prim;
/*printf("cell_set_prim %u\n", prim);*/
+ return TRUE;
}
@@ -244,7 +250,7 @@ cell_vbuf_draw(struct vbuf_render *vbr,
#if 0
/* helpful for debug */
- cell_flush_int(&cell->pipe, CELL_FLUSH_WAIT);
+ cell_flush_int(cell, CELL_FLUSH_WAIT);
#endif
}
diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
index 2ece0250f6..566df7f59e 100644
--- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
+++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
@@ -297,10 +297,9 @@ void cell_update_vertex_fetch(struct draw_context *draw)
/* Each fetch function can be a maximum of 34 instructions (note: this is
- * actually a slight over-estimate). That means (34 * 4) = 136 bytes
- * each maximum.
+ * actually a slight over-estimate).
*/
- spe_init_func(p, 136 * unique_attr_formats);
+ spe_init_func(p, 34 * SPE_INST_SIZE * unique_attr_formats);
/* Allocate registers for the function's input parameters.
diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c
index 3658947715..2b10c116fa 100644
--- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c
+++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c
@@ -135,7 +135,7 @@ cell_vertex_shader_queue_flush(struct draw_context *draw)
vs->num_elts = n;
send_mbox_message(cell_global.spe_contexts[0], CELL_CMD_VS_EXECUTE);
- cell_flush_int(& cell->pipe, CELL_FLUSH_WAIT);
+ cell_flush_int(cell, CELL_FLUSH_WAIT);
}
draw->vs.post_nr = draw->vs.queue_nr;