summaryrefslogtreecommitdiff
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorKeith Whitwell <keith@tungstengraphics.com>2008-09-11 18:32:05 +0100
committerKeith Whitwell <keith@tungstengraphics.com>2008-09-11 18:32:05 +0100
commitcc7dd4fc1b3c765ca1ecd943d189bb156dae529d (patch)
tree1a3560eb6df8a443c4f0e5af0a916f190b1542f6 /src/gallium/drivers
parent685248bea1fef5fd6335982570e34d0f6672030d (diff)
parentd50d68a1c940ed9c8d8c65e8e33667fa90d5baa1 (diff)
Merge commit 'origin/gallium-0.1' into gallium-0.2
Conflicts: Makefile progs/demos/Makefile progs/glsl/Makefile progs/redbook/Makefile progs/samples/Makefile progs/tests/Makefile progs/trivial/Makefile progs/xdemos/Makefile src/gallium/Makefile src/mesa/main/attrib.c src/mesa/main/bufferobj.c src/mesa/vbo/vbo_exec_draw.c
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/cell/common.h43
-rw-r--r--src/gallium/drivers/cell/ppu/Makefile4
-rw-r--r--src/gallium/drivers/cell/ppu/cell_batch.c56
-rw-r--r--src/gallium/drivers/cell/ppu/cell_batch.h3
-rw-r--r--src/gallium/drivers/cell/ppu/cell_clear.c30
-rw-r--r--src/gallium/drivers/cell/ppu/cell_context.c44
-rw-r--r--src/gallium/drivers/cell/ppu/cell_context.h29
-rw-r--r--src/gallium/drivers/cell/ppu/cell_draw_arrays.c42
-rw-r--r--src/gallium/drivers/cell/ppu/cell_draw_arrays.h20
-rw-r--r--src/gallium/drivers/cell/ppu/cell_flush.c14
-rw-r--r--src/gallium/drivers/cell/ppu/cell_flush.h2
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fragment.c862
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fragment.h (renamed from src/gallium/drivers/trace/tr_stream.h)41
-rw-r--r--src/gallium/drivers/cell/ppu/cell_pipe_state.c23
-rw-r--r--src/gallium/drivers/cell/ppu/cell_spu.c28
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state.h16
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_derived.c29
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_emit.c89
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_per_fragment.c30
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_shader.c26
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_vertex.c12
-rw-r--r--src/gallium/drivers/cell/ppu/cell_surface.c99
-rw-r--r--src/gallium/drivers/cell/ppu/cell_texture.c84
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vbuf.c12
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vertex_fetch.c5
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vertex_shader.c2
-rw-r--r--src/gallium/drivers/cell/spu/Makefile9
-rw-r--r--src/gallium/drivers/cell/spu/spu_colorpack.h6
-rw-r--r--src/gallium/drivers/cell/spu/spu_exec.c150
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.c308
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.h88
-rw-r--r--src/gallium/drivers/cell/spu/spu_per_fragment_op.c604
-rw-r--r--src/gallium/drivers/cell/spu/spu_per_fragment_op.h58
-rw-r--r--src/gallium/drivers/cell/spu/spu_texture.c2
-rw-r--r--src/gallium/drivers/cell/spu/spu_tile.c6
-rw-r--r--src/gallium/drivers/cell/spu/spu_tri.c99
-rw-r--r--src/gallium/drivers/cell/spu/spu_util.c21
-rw-r--r--src/gallium/drivers/cell/spu/spu_vertex_fetch.c2
-rw-r--r--src/gallium/drivers/cell/spu/spu_vertex_shader.c2
-rw-r--r--src/gallium/drivers/i915simple/i915_context.c9
-rw-r--r--src/gallium/drivers/i915simple/i915_prim_vbuf.c38
-rw-r--r--src/gallium/drivers/i915simple/i915_screen.c5
-rw-r--r--src/gallium/drivers/i915simple/i915_texture.c12
-rw-r--r--src/gallium/drivers/i965simple/brw_state_pool.c10
-rw-r--r--src/gallium/drivers/i965simple/brw_tex_layout.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_clear.c31
-rw-r--r--src/gallium/drivers/softpipe/sp_context.c51
-rw-r--r--src/gallium/drivers/softpipe/sp_context.h10
-rw-r--r--src/gallium/drivers/softpipe/sp_draw_arrays.c9
-rw-r--r--src/gallium/drivers/softpipe/sp_flush.c13
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_exec.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_sse.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_headers.h29
-rw-r--r--src/gallium/drivers/softpipe/sp_quad.c102
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_alpha_test.c7
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_blend.c16
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_colormask.c8
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_coverage.c14
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_depth_test.c52
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_earlyz.c12
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_fs.c22
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_occlusion.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_output.c10
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_stencil.c44
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_stipple.c22
-rw-r--r--src/gallium/drivers/softpipe/sp_setup.c375
-rw-r--r--src/gallium/drivers/softpipe/sp_state_fs.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_surface.c122
-rw-r--r--src/gallium/drivers/softpipe/sp_texture.c10
-rw-r--r--src/gallium/drivers/trace/SConscript2
-rw-r--r--src/gallium/drivers/trace/tr_dump.c12
-rw-r--r--src/gallium/drivers/trace/tr_stream_stdc.c104
-rw-r--r--src/gallium/drivers/trace/tr_stream_wd.c183
73 files changed, 2681 insertions, 1665 deletions
diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h
index 6bace0bb11..e989d8c2e5 100644
--- a/src/gallium/drivers/cell/common.h
+++ b/src/gallium/drivers/cell/common.h
@@ -84,7 +84,7 @@
#define CELL_CMD_BATCH 5
#define CELL_CMD_RELEASE_VERTS 6
#define CELL_CMD_STATE_FRAMEBUFFER 10
-#define CELL_CMD_STATE_DEPTH_STENCIL 11
+#define CELL_CMD_STATE_FRAGMENT_OPS 11
#define CELL_CMD_STATE_SAMPLER 12
#define CELL_CMD_STATE_TEXTURE 13
#define CELL_CMD_STATE_VERTEX_INFO 14
@@ -92,9 +92,7 @@
#define CELL_CMD_STATE_UNIFORMS 16
#define CELL_CMD_STATE_VS_ARRAY_INFO 17
#define CELL_CMD_STATE_BIND_VS 18
-#define CELL_CMD_STATE_BLEND 19
#define CELL_CMD_STATE_ATTRIB_FETCH 20
-#define CELL_CMD_STATE_LOGICOP 21
#define CELL_CMD_VS_EXECUTE 22
#define CELL_CMD_FLUSH_BUFFER_RANGE 23
@@ -106,30 +104,24 @@
#define CELL_BUFFER_STATUS_USED 20
+#define CELL_DEBUG_CHECKER (1 << 0)
+#define CELL_DEBUG_SYNC (1 << 1)
-/**
- */
-struct cell_command_depth_stencil_alpha_test {
- uint64_t base; /**< Effective address of code start. */
- unsigned size; /**< Size in bytes of SPE code. */
- unsigned read_depth; /**< Flag: should depth be read? */
- unsigned read_stencil; /**< Flag: should stencil be read? */
-};
-/**
- * Upload code to perform framebuffer blend operation
- */
-struct cell_command_blend {
- uint64_t base; /**< Effective address of code start. */
- unsigned size; /**< Size in bytes of SPE code. */
- unsigned read_fb; /**< Flag: should framebuffer be read? */
-};
+/** Max instructions for doing per-fragment operations */
+#define SPU_MAX_FRAGMENT_OPS_INSTS 64
-struct cell_command_logicop {
- uint64_t base; /**< Effective address of code start. */
- unsigned size; /**< Size in bytes of SPE code. */
+/**
+ * Command to specify per-fragment operations state and generated code.
+ */
+struct cell_command_fragment_ops
+{
+ uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */
+ struct pipe_depth_stencil_alpha_state dsa;
+ struct pipe_blend_state blend;
+ unsigned code[SPU_MAX_FRAGMENT_OPS_INSTS];
};
@@ -169,13 +161,15 @@ struct cell_array_info
};
-struct cell_attribute_fetch_code {
+struct cell_attribute_fetch_code
+{
uint64_t base;
uint size;
};
-struct cell_buffer_range {
+struct cell_buffer_range
+{
uint64_t base;
unsigned size;
};
@@ -263,6 +257,7 @@ struct cell_init_info
{
unsigned id;
unsigned num_spus;
+ unsigned debug_flags; /**< mask of CELL_DEBUG_x flags */
struct cell_command *cmd;
/** Buffers for command batches, vertex/index data */
diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile
index 0389a9554c..8699f3f8ec 100644
--- a/src/gallium/drivers/cell/ppu/Makefile
+++ b/src/gallium/drivers/cell/ppu/Makefile
@@ -5,7 +5,7 @@
TOP = ../../../../..
-include $(TOP)/configs/linux-cell
+include $(TOP)/configs/current
# This is the "top-level" cell PPU driver code, will get pulled into libGL.so
@@ -25,9 +25,9 @@ SOURCES = \
cell_context.c \
cell_draw_arrays.c \
cell_flush.c \
+ cell_gen_fragment.c \
cell_state_derived.c \
cell_state_emit.c \
- cell_state_per_fragment.c \
cell_state_shader.c \
cell_pipe_state.c \
cell_screen.c \
diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c
index f45e5f25b6..16882c0129 100644
--- a/src/gallium/drivers/cell/ppu/cell_batch.c
+++ b/src/gallium/drivers/cell/ppu/cell_batch.c
@@ -32,6 +32,13 @@
+/**
+ * Search the buffer pool for an empty/free buffer and return its index.
+ * Buffers are used for storing vertex data, state and commands which
+ * will be sent to the SPUs.
+ * If no empty buffers are available, wait for one.
+ * \return buffer index in [0, CELL_NUM_BUFFERS-1]
+ */
uint
cell_get_empty_buffer(struct cell_context *cell)
{
@@ -74,6 +81,11 @@ cell_get_empty_buffer(struct cell_context *cell)
}
+/**
+ * Flush the current batch buffer to the SPUs.
+ * An empty buffer will be found and set as the new current batch buffer
+ * for subsequent commands/data.
+ */
void
cell_batch_flush(struct cell_context *cell)
{
@@ -93,11 +105,11 @@ cell_batch_flush(struct cell_context *cell)
/*
printf("cell_batch_dispatch: buf %u at %p, size %u\n",
- batch, &cell->batch_buffer[batch][0], size);
+ batch, &cell->buffer[batch][0], size);
*/
/*
- * Build "BATCH" command and sent to all SPUs.
+ * Build "BATCH" command and send to all SPUs.
*/
cmd_word = CELL_CMD_BATCH | (batch << 8) | (size << 16);
@@ -120,6 +132,9 @@ cell_batch_flush(struct cell_context *cell)
}
+/**
+ * Return the number of bytes free in the current batch buffer.
+ */
uint
cell_batch_free_space(const struct cell_context *cell)
{
@@ -129,7 +144,9 @@ cell_batch_free_space(const struct cell_context *cell)
/**
- * Append data to current batch.
+ * Append data to the current batch buffer.
+ * \param data address of block of bytes to append
+ * \param bytes size of block of bytes
*/
void
cell_batch_append(struct cell_context *cell, const void *data, uint bytes)
@@ -165,6 +182,10 @@ cell_batch_append(struct cell_context *cell, const void *data, uint bytes)
}
+/**
+ * Allocate space in the current batch buffer for 'bytes' space.
+ * \return address in batch buffer to put data
+ */
void *
cell_batch_alloc(struct cell_context *cell, uint bytes)
{
@@ -172,6 +193,10 @@ cell_batch_alloc(struct cell_context *cell, uint bytes)
}
+/**
+ * Same as \sa cell_batch_alloc, but return an address at a particular
+ * alignment.
+ */
void *
cell_batch_alloc_aligned(struct cell_context *cell, uint bytes,
uint alignment)
@@ -215,3 +240,28 @@ cell_batch_alloc_aligned(struct cell_context *cell, uint bytes,
return pos;
}
+
+
+/**
+ * One-time init of batch buffers.
+ */
+void
+cell_init_batch_buffers(struct cell_context *cell)
+{
+ uint spu, buf;
+
+ /* init command, vertex/index buffer info */
+ for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) {
+ cell->buffer_size[buf] = 0;
+
+ /* init batch buffer status values,
+ * mark 0th buffer as used, rest as free.
+ */
+ for (spu = 0; spu < cell->num_spus; spu++) {
+ if (buf == 0)
+ cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED;
+ else
+ cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE;
+ }
+ }
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_batch.h b/src/gallium/drivers/cell/ppu/cell_batch.h
index a6eee0a8b1..f74dd60079 100644
--- a/src/gallium/drivers/cell/ppu/cell_batch.h
+++ b/src/gallium/drivers/cell/ppu/cell_batch.h
@@ -54,5 +54,8 @@ extern void *
cell_batch_alloc_aligned(struct cell_context *cell, uint bytes,
uint alignment);
+extern void
+cell_init_batch_buffers(struct cell_context *cell);
+
#endif /* CELL_BATCH_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c
index a421c95c8e..c9c0c721bb 100644
--- a/src/gallium/drivers/cell/ppu/cell_clear.c
+++ b/src/gallium/drivers/cell/ppu/cell_clear.c
@@ -35,6 +35,7 @@
#include <stdint.h>
#include "pipe/p_inlines.h"
#include "util/u_memory.h"
+#include "util/u_pack_color.h"
#include "cell/common.h"
#include "cell_clear.h"
#include "cell_context.h"
@@ -44,6 +45,27 @@
#include "cell_state.h"
+/**
+ * Convert packed pixel from one format to another.
+ */
+static unsigned
+convert_color(enum pipe_format srcFormat, unsigned srcColor,
+ enum pipe_format dstFormat)
+{
+ ubyte r, g, b, a;
+ unsigned dstColor;
+
+ util_unpack_color_ub(srcFormat, &srcColor, &r, &g, &b, &a);
+ util_pack_color_ub(r, g, b, a, dstFormat, &dstColor);
+
+ return dstColor;
+}
+
+
+
+/**
+ * Called via pipe->clear()
+ */
void
cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps,
unsigned clearValue)
@@ -61,13 +83,21 @@ cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps,
PIPE_BUFFER_USAGE_GPU_WRITE);
if (ps == cell->framebuffer.zsbuf) {
+ /* clear z/stencil buffer */
surfIndex = 1;
}
else {
+ /* clear color buffer */
surfIndex = 0;
+
+ if (ps->format != PIPE_FORMAT_A8R8G8B8_UNORM) {
+ clearValue = convert_color(PIPE_FORMAT_A8R8G8B8_UNORM, clearValue,
+ ps->format);
+ }
}
+ /* Build a CLEAR command and place it in the current batch buffer */
{
struct cell_command_clear_surface *clr
= (struct cell_command_clear_surface *)
diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c
index 9ff4e86943..71f1a3049d 100644
--- a/src/gallium/drivers/cell/ppu/cell_context.c
+++ b/src/gallium/drivers/cell/ppu/cell_context.c
@@ -43,11 +43,11 @@
#include "draw/draw_private.h"
#include "cell/common.h"
+#include "cell_batch.h"
#include "cell_clear.h"
#include "cell_context.h"
#include "cell_draw_arrays.h"
#include "cell_flush.h"
-#include "cell_render.h"
#include "cell_state.h"
#include "cell_surface.h"
#include "cell_spu.h"
@@ -85,12 +85,20 @@ cell_draw_create(struct cell_context *cell)
}
+#ifdef DEBUG
+static const struct debug_named_value cell_debug_flags[] = {
+ {"checker", CELL_DEBUG_CHECKER},/**< modulate tile clear color by SPU ID */
+ {"sync", CELL_DEBUG_SYNC}, /**< SPUs do synchronous DMA */
+ {NULL, 0}
+};
+#endif
+
+
struct pipe_context *
cell_create_context(struct pipe_screen *screen,
struct cell_winsys *cws)
{
struct cell_context *cell;
- uint spu, buf;
/* some fields need to be 16-byte aligned, so align the whole object */
cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16);
@@ -104,15 +112,6 @@ cell_create_context(struct pipe_screen *screen,
cell->pipe.screen = screen;
cell->pipe.destroy = cell_destroy_context;
- /* state setters */
- cell->pipe.set_vertex_buffers = cell_set_vertex_buffers;
- cell->pipe.set_vertex_elements = cell_set_vertex_elements;
-
- cell->pipe.draw_arrays = cell_draw_arrays;
- cell->pipe.draw_elements = cell_draw_elements;
- cell->pipe.draw_range_elements = cell_draw_range_elements;
- cell->pipe.set_edgeflags = cell_set_edgeflags;
-
cell->pipe.clear = cell_clear_surface;
cell->pipe.flush = cell_flush;
@@ -122,20 +121,28 @@ cell_create_context(struct pipe_screen *screen,
cell->pipe.wait_query = cell_wait_query;
#endif
+ cell_init_draw_functions(cell);
cell_init_state_functions(cell);
cell_init_shader_functions(cell);
cell_init_surface_functions(cell);
cell_init_texture_functions(cell);
+ cell_init_vertex_functions(cell);
cell->draw = cell_draw_create(cell);
cell_init_vbuf(cell);
+
draw_set_rasterize_stage(cell->draw, cell->vbuf);
/* convert all points/lines to tris for the time being */
draw_wide_point_threshold(cell->draw, 0.0);
draw_wide_line_threshold(cell->draw, 0.0);
+ /* get env vars or read config file to get debug flags */
+ cell->debug_flags = debug_get_flags_option("CELL_DEBUG",
+ cell_debug_flags,
+ 0 );
+
/*
* SPU stuff
*/
@@ -146,20 +153,7 @@ cell_create_context(struct pipe_screen *screen,
cell_start_spus(cell);
- /* init command, vertex/index buffer info */
- for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) {
- cell->buffer_size[buf] = 0;
-
- /* init batch buffer status values,
- * mark 0th buffer as used, rest as free.
- */
- for (spu = 0; spu < cell->num_spus; spu++) {
- if (buf == 0)
- cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED;
- else
- cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE;
- }
- }
+ cell_init_batch_buffers(cell);
return &cell->pipe;
}
diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h
index f1d1ca89a9..8cec9f45b2 100644
--- a/src/gallium/drivers/cell/ppu/cell_context.h
+++ b/src/gallium/drivers/cell/ppu/cell_context.h
@@ -39,8 +39,13 @@
#include "rtasm/rtasm_ppc_spe.h"
#include "tgsi/tgsi_scan.h"
+
struct cell_vbuf_render;
+
+/**
+ * Cell vertex shader state, subclass of pipe_shader_state.
+ */
struct cell_vertex_shader_state
{
struct pipe_shader_state shader;
@@ -49,6 +54,9 @@ struct cell_vertex_shader_state
};
+/**
+ * Cell fragment shader state, subclass of pipe_shader_state.
+ */
struct cell_fragment_shader_state
{
struct pipe_shader_state shader;
@@ -57,7 +65,11 @@ struct cell_fragment_shader_state
};
-struct cell_blend_state {
+/**
+ * Cell blend state atom, subclass of pipe_blend_state.
+ */
+struct cell_blend_state
+{
struct pipe_blend_state base;
/**
@@ -67,17 +79,24 @@ struct cell_blend_state {
};
-struct cell_depth_stencil_alpha_state {
- struct pipe_depth_stencil_alpha_state base;
+/**
+ * Cell depth/stencil/alpha state atom, subclass of
+ * pipe_depth_stencil_alpha_state.
+ */
+struct cell_depth_stencil_alpha_state
+{
+ struct pipe_depth_stencil_alpha_state base;
/**
* Generated code to perform alpha, stencil, and depth testing on the SPE
*/
struct spe_function code;
-
};
+/**
+ * Per-context state, subclass of pipe_context.
+ */
struct cell_context
{
struct pipe_context pipe;
@@ -144,6 +163,8 @@ struct cell_context
struct spe_function attrib_fetch;
unsigned attrib_fetch_offsets[PIPE_MAX_ATTRIBS];
+
+ unsigned debug_flags;
};
diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
index f02dffe124..880d535320 100644
--- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
+++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
@@ -34,6 +34,7 @@
#include "pipe/p_defines.h"
#include "pipe/p_context.h"
#include "pipe/p_winsys.h"
+#include "pipe/p_inlines.h"
#include "cell_context.h"
#include "cell_draw_arrays.h"
@@ -76,14 +77,6 @@ cell_unmap_constant_buffers(struct cell_context *sp)
}
-boolean
-cell_draw_arrays(struct pipe_context *pipe, unsigned mode,
- unsigned start, unsigned count)
-{
- return cell_draw_elements(pipe, NULL, 0, mode, start, count);
-}
-
-
/**
* Draw vertex arrays, with optional indexing.
@@ -92,7 +85,7 @@ cell_draw_arrays(struct pipe_context *pipe, unsigned mode,
*
* XXX should the element buffer be specified/bound with a separate function?
*/
-boolean
+static boolean
cell_draw_range_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
@@ -116,7 +109,7 @@ cell_draw_range_elements(struct pipe_context *pipe,
* Map vertex buffers
*/
for (i = 0; i < sp->num_vertex_buffers; i++) {
- void *buf = pipe->winsys->buffer_map(pipe->winsys,
+ void *buf = pipe_buffer_map(pipe->screen,
sp->vertex_buffer[i].buffer,
PIPE_BUFFER_USAGE_CPU_READ);
cell_flush_buffer_range(sp, buf, sp->vertex_buffer[i].buffer->size);
@@ -124,7 +117,7 @@ cell_draw_range_elements(struct pipe_context *pipe,
}
/* Map index buffer, if present */
if (indexBuffer) {
- void *mapped_indexes = pipe->winsys->buffer_map(pipe->winsys,
+ void *mapped_indexes = pipe_buffer_map(pipe->screen,
indexBuffer,
PIPE_BUFFER_USAGE_CPU_READ);
draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes);
@@ -143,11 +136,11 @@ cell_draw_range_elements(struct pipe_context *pipe,
*/
for (i = 0; i < sp->num_vertex_buffers; i++) {
draw_set_mapped_vertex_buffer(draw, i, NULL);
- pipe->winsys->buffer_unmap(pipe->winsys, sp->vertex_buffer[i].buffer);
+ pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer);
}
if (indexBuffer) {
draw_set_mapped_element_buffer(draw, 0, NULL);
- pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer);
+ pipe_buffer_unmap(pipe->screen, indexBuffer);
}
/* Note: leave drawing surfaces mapped */
@@ -157,7 +150,7 @@ cell_draw_range_elements(struct pipe_context *pipe,
}
-boolean
+static boolean
cell_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
@@ -170,10 +163,29 @@ cell_draw_elements(struct pipe_context *pipe,
}
+static boolean
+cell_draw_arrays(struct pipe_context *pipe, unsigned mode,
+ unsigned start, unsigned count)
+{
+ return cell_draw_elements(pipe, NULL, 0, mode, start, count);
+}
+
-void
+static void
cell_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags)
{
struct cell_context *cell = cell_context(pipe);
draw_set_edgeflags(cell->draw, edgeflags);
}
+
+
+
+void
+cell_init_draw_functions(struct cell_context *cell)
+{
+ cell->pipe.draw_arrays = cell_draw_arrays;
+ cell->pipe.draw_elements = cell_draw_elements;
+ cell->pipe.draw_range_elements = cell_draw_range_elements;
+ cell->pipe.set_edgeflags = cell_set_edgeflags;
+}
+
diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h
index cd35ec17b4..148873aa67 100644
--- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h
+++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h
@@ -29,26 +29,8 @@
#define CELL_DRAW_ARRAYS_H
-extern boolean
-cell_draw_arrays(struct pipe_context *pipe, unsigned mode,
- unsigned start, unsigned count);
-
-extern boolean
-cell_draw_elements(struct pipe_context *pipe,
- struct pipe_buffer *indexBuffer,
- unsigned indexSize,
- unsigned mode, unsigned start, unsigned count);
-
-extern boolean
-cell_draw_range_elements(struct pipe_context *pipe,
- struct pipe_buffer *indexBuffer,
- unsigned indexSize,
- unsigned min_index,
- unsigned max_index,
- unsigned mode, unsigned start, unsigned count);
-
extern void
-cell_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags);
+cell_init_draw_functions(struct cell_context *cell);
#endif /* CELL_DRAW_ARRAYS_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c
index 3aaf3de668..6596b72010 100644
--- a/src/gallium/drivers/cell/ppu/cell_flush.c
+++ b/src/gallium/drivers/cell/ppu/cell_flush.c
@@ -34,6 +34,9 @@
#include "draw/draw_context.h"
+/**
+ * Called via pipe->flush()
+ */
void
cell_flush(struct pipe_context *pipe, unsigned flags,
struct pipe_fence_handle **fence)
@@ -50,16 +53,19 @@ cell_flush(struct pipe_context *pipe, unsigned flags,
flags |= CELL_FLUSH_WAIT;
draw_flush( cell->draw );
- cell_flush_int(pipe, flags);
+ cell_flush_int(cell, flags);
}
-/** internal flush */
+/**
+ * Cell internal flush function. Send the current batch buffer to all SPUs.
+ * If flags & CELL_FLUSH_WAIT, do not return until the SPUs are idle.
+ * \param flags bitmask of flags CELL_FLUSH_WAIT, or zero
+ */
void
-cell_flush_int(struct pipe_context *pipe, unsigned flags)
+cell_flush_int(struct cell_context *cell, unsigned flags)
{
static boolean flushing = FALSE; /* recursion catcher */
- struct cell_context *cell = cell_context(pipe);
uint i;
ASSERT(!flushing);
diff --git a/src/gallium/drivers/cell/ppu/cell_flush.h b/src/gallium/drivers/cell/ppu/cell_flush.h
index 8f0645c429..509ae6239a 100644
--- a/src/gallium/drivers/cell/ppu/cell_flush.h
+++ b/src/gallium/drivers/cell/ppu/cell_flush.h
@@ -36,7 +36,7 @@ cell_flush(struct pipe_context *pipe, unsigned flags,
struct pipe_fence_handle **fence);
extern void
-cell_flush_int(struct pipe_context *pipe, unsigned flags);
+cell_flush_int(struct cell_context *cell, unsigned flags);
extern void
cell_flush_buffer_range(struct cell_context *cell, void *ptr,
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
new file mode 100644
index 0000000000..79a82ef72b
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
@@ -0,0 +1,862 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+
+/**
+ * Generate SPU per-fragment code (actually per-quad code).
+ * \author Brian Paul
+ */
+
+
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "rtasm/rtasm_ppc_spe.h"
+#include "cell_context.h"
+#include "cell_gen_fragment.h"
+
+
+
+/** Do extra optimizations? */
+#define OPTIMIZATIONS 1
+
+
+/**
+ * Generate SPE code to perform Z/depth testing.
+ *
+ * \param dsa Gallium depth/stencil/alpha state to gen code for
+ * \param f SPE function to append instruction onto.
+ * \param mask_reg register containing quad/pixel "alive" mask (in/out)
+ * \param ifragZ_reg register containing integer fragment Z values (in)
+ * \param ifbZ_reg register containing integer frame buffer Z values (in/out)
+ * \param zmask_reg register containing result of Z test/comparison (out)
+ */
+static void
+gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa,
+ struct spe_function *f,
+ int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg)
+{
+ ASSERT(dsa->depth.enabled);
+
+ switch (dsa->depth.func) {
+ case PIPE_FUNC_EQUAL:
+ /* zmask = (ifragZ == ref) */
+ spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg);
+ /* mask = (mask & zmask) */
+ spe_and(f, mask_reg, mask_reg, zmask_reg);
+ break;
+
+ case PIPE_FUNC_NOTEQUAL:
+ /* zmask = (ifragZ == ref) */
+ spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg);
+ /* mask = (mask & ~zmask) */
+ spe_andc(f, mask_reg, mask_reg, zmask_reg);
+ break;
+
+ case PIPE_FUNC_GREATER:
+ /* zmask = (ifragZ > ref) */
+ spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
+ /* mask = (mask & zmask) */
+ spe_and(f, mask_reg, mask_reg, zmask_reg);
+ break;
+
+ case PIPE_FUNC_LESS:
+ /* zmask = (ref > ifragZ) */
+ spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
+ /* mask = (mask & zmask) */
+ spe_and(f, mask_reg, mask_reg, zmask_reg);
+ break;
+
+ case PIPE_FUNC_LEQUAL:
+ /* zmask = (ifragZ > ref) */
+ spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
+ /* mask = (mask & ~zmask) */
+ spe_andc(f, mask_reg, mask_reg, zmask_reg);
+ break;
+
+ case PIPE_FUNC_GEQUAL:
+ /* zmask = (ref > ifragZ) */
+ spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
+ /* mask = (mask & ~zmask) */
+ spe_andc(f, mask_reg, mask_reg, zmask_reg);
+ break;
+
+ case PIPE_FUNC_NEVER:
+ spe_il(f, mask_reg, 0); /* mask = {0,0,0,0} */
+ spe_move(f, zmask_reg, mask_reg); /* zmask = mask */
+ break;
+
+ case PIPE_FUNC_ALWAYS:
+ /* mask unchanged */
+ spe_il(f, zmask_reg, ~0); /* zmask = {~0,~0,~0,~0} */
+ break;
+
+ default:
+ ASSERT(0);
+ break;
+ }
+
+ if (dsa->depth.writemask) {
+ /*
+ * If (ztest passed) {
+ * framebufferZ = fragmentZ;
+ * }
+ * OR,
+ * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ;
+ */
+ spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg);
+ }
+}
+
+
+/**
+ * Generate SPE code to perform alpha testing.
+ *
+ * \param dsa Gallium depth/stencil/alpha state to gen code for
+ * \param f SPE function to append instruction onto.
+ * \param mask_reg register containing quad/pixel "alive" mask (in/out)
+ * \param fragA_reg register containing four fragment alpha values (in)
+ */
+static void
+gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa,
+ struct spe_function *f, int mask_reg, int fragA_reg)
+{
+ int ref_reg = spe_allocate_available_register(f);
+ int amask_reg = spe_allocate_available_register(f);
+
+ ASSERT(dsa->alpha.enabled);
+
+ if ((dsa->alpha.func != PIPE_FUNC_NEVER) &&
+ (dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
+ /* load/splat the alpha reference float value */
+ spe_load_float(f, ref_reg, dsa->alpha.ref);
+ }
+
+ /* emit code to do the alpha comparison, updating 'mask' */
+ switch (dsa->alpha.func) {
+ case PIPE_FUNC_EQUAL:
+ /* amask = (fragA == ref) */
+ spe_fceq(f, amask_reg, fragA_reg, ref_reg);
+ /* mask = (mask & amask) */
+ spe_and(f, mask_reg, mask_reg, amask_reg);
+ break;
+
+ case PIPE_FUNC_NOTEQUAL:
+ /* amask = (fragA == ref) */
+ spe_fceq(f, amask_reg, fragA_reg, ref_reg);
+ /* mask = (mask & ~amask) */
+ spe_andc(f, mask_reg, mask_reg, amask_reg);
+ break;
+
+ case PIPE_FUNC_GREATER:
+ /* amask = (fragA > ref) */
+ spe_fcgt(f, amask_reg, fragA_reg, ref_reg);
+ /* mask = (mask & amask) */
+ spe_and(f, mask_reg, mask_reg, amask_reg);
+ break;
+
+ case PIPE_FUNC_LESS:
+ /* amask = (ref > fragA) */
+ spe_fcgt(f, amask_reg, ref_reg, fragA_reg);
+ /* mask = (mask & amask) */
+ spe_and(f, mask_reg, mask_reg, amask_reg);
+ break;
+
+ case PIPE_FUNC_LEQUAL:
+ /* amask = (fragA > ref) */
+ spe_fcgt(f, amask_reg, fragA_reg, ref_reg);
+ /* mask = (mask & ~amask) */
+ spe_andc(f, mask_reg, mask_reg, amask_reg);
+ break;
+
+ case PIPE_FUNC_GEQUAL:
+ /* amask = (ref > fragA) */
+ spe_fcgt(f, amask_reg, ref_reg, fragA_reg);
+ /* mask = (mask & ~amask) */
+ spe_andc(f, mask_reg, mask_reg, amask_reg);
+ break;
+
+ case PIPE_FUNC_NEVER:
+ spe_il(f, mask_reg, 0); /* mask = [0,0,0,0] */
+ break;
+
+ case PIPE_FUNC_ALWAYS:
+ /* no-op, mask unchanged */
+ break;
+
+ default:
+ ASSERT(0);
+ break;
+ }
+
+#if OPTIMIZATIONS
+ /* if mask == {0,0,0,0} we're all done, return */
+ {
+ /* re-use amask reg here */
+ int tmp_reg = amask_reg;
+ /* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */
+ spe_orx(f, tmp_reg, mask_reg);
+ /* if tmp[0] == 0 then return from function call */
+ spe_biz(f, tmp_reg, SPE_REG_RA, 0, 0);
+ }
+#endif
+
+ spe_release_register(f, ref_reg);
+ spe_release_register(f, amask_reg);
+}
+
+
+
+/**
+ * Generate SPE code to implement the given blend mode for a quad of pixels.
+ * \param f SPE function to append instruction onto.
+ * \param fragR_reg register with fragment red values (float) (in/out)
+ * \param fragG_reg register with fragment green values (float) (in/out)
+ * \param fragB_reg register with fragment blue values (float) (in/out)
+ * \param fragA_reg register with fragment alpha values (float) (in/out)
+ * \param fbRGBA_reg register with packed framebuffer colors (integer) (in)
+ */
+static void
+gen_blend(const struct pipe_blend_state *blend,
+ struct spe_function *f,
+ enum pipe_format color_format,
+ int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg,
+ int fbRGBA_reg)
+{
+ int term1R_reg = spe_allocate_available_register(f);
+ int term1G_reg = spe_allocate_available_register(f);
+ int term1B_reg = spe_allocate_available_register(f);
+ int term1A_reg = spe_allocate_available_register(f);
+
+ int term2R_reg = spe_allocate_available_register(f);
+ int term2G_reg = spe_allocate_available_register(f);
+ int term2B_reg = spe_allocate_available_register(f);
+ int term2A_reg = spe_allocate_available_register(f);
+
+ int fbR_reg = spe_allocate_available_register(f);
+ int fbG_reg = spe_allocate_available_register(f);
+ int fbB_reg = spe_allocate_available_register(f);
+ int fbA_reg = spe_allocate_available_register(f);
+
+ int one_reg = spe_allocate_available_register(f);
+ int tmp_reg = spe_allocate_available_register(f);
+
+ ASSERT(blend->blend_enable);
+
+ /* Unpack/convert framebuffer colors from four 32-bit packed colors
+ * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA).
+ * Each 8-bit color component is expanded into a float in [0.0, 1.0].
+ */
+ {
+ int mask_reg = spe_allocate_available_register(f);
+
+ /* mask = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff} */
+ spe_fsmbi(f, mask_reg, 0x1111);
+
+ /* XXX there may be more clever ways to implement the following code */
+ switch (color_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ /* fbB = fbB & mask */
+ spe_and(f, fbB_reg, fbRGBA_reg, mask_reg);
+ /* mask = mask << 8 */
+ spe_roti(f, mask_reg, mask_reg, 8);
+
+ /* fbG = fbRGBA & mask */
+ spe_and(f, fbG_reg, fbRGBA_reg, mask_reg);
+ /* fbG = fbG >> 8 */
+ spe_roti(f, fbG_reg, fbG_reg, -8);
+ /* mask = mask << 8 */
+ spe_roti(f, mask_reg, mask_reg, 8);
+
+ /* fbR = fbRGBA & mask */
+ spe_and(f, fbR_reg, fbRGBA_reg, mask_reg);
+ /* fbR = fbR >> 16 */
+ spe_roti(f, fbR_reg, fbR_reg, -16);
+ /* mask = mask << 8 */
+ spe_roti(f, mask_reg, mask_reg, 8);
+
+ /* fbA = fbRGBA & mask */
+ spe_and(f, fbA_reg, fbRGBA_reg, mask_reg);
+ /* fbA = fbA >> 24 */
+ spe_roti(f, fbA_reg, fbA_reg, -24);
+ break;
+
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ /* fbA = fbA & mask */
+ spe_and(f, fbA_reg, fbRGBA_reg, mask_reg);
+ /* mask = mask << 8 */
+ spe_roti(f, mask_reg, mask_reg, 8);
+
+ /* fbR = fbRGBA & mask */
+ spe_and(f, fbR_reg, fbRGBA_reg, mask_reg);
+ /* fbR = fbR >> 8 */
+ spe_roti(f, fbR_reg, fbR_reg, -8);
+ /* mask = mask << 8 */
+ spe_roti(f, mask_reg, mask_reg, 8);
+
+ /* fbG = fbRGBA & mask */
+ spe_and(f, fbG_reg, fbRGBA_reg, mask_reg);
+ /* fbG = fbG >> 16 */
+ spe_roti(f, fbG_reg, fbG_reg, -16);
+ /* mask = mask << 8 */
+ spe_roti(f, mask_reg, mask_reg, 8);
+
+ /* fbB = fbRGBA & mask */
+ spe_and(f, fbB_reg, fbRGBA_reg, mask_reg);
+ /* fbB = fbB >> 24 */
+ spe_roti(f, fbB_reg, fbB_reg, -24);
+ break;
+
+ default:
+ ASSERT(0);
+ }
+
+ /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */
+ spe_cuflt(f, fbR_reg, fbR_reg, 8);
+ spe_cuflt(f, fbG_reg, fbG_reg, 8);
+ spe_cuflt(f, fbB_reg, fbB_reg, 8);
+ spe_cuflt(f, fbA_reg, fbA_reg, 8);
+
+ spe_release_register(f, mask_reg);
+ }
+
+
+ /*
+ * Compute Src RGB terms
+ */
+ switch (blend->rgb_src_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ spe_move(f, term1R_reg, fragR_reg);
+ spe_move(f, term1G_reg, fragG_reg);
+ spe_move(f, term1B_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ spe_zero(f, term1R_reg);
+ spe_zero(f, term1G_reg);
+ spe_zero(f, term1B_reg);
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ spe_fm(f, term1R_reg, fragR_reg, fragR_reg);
+ spe_fm(f, term1G_reg, fragG_reg, fragG_reg);
+ spe_fm(f, term1B_reg, fragB_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ spe_fm(f, term1R_reg, fragR_reg, fragA_reg);
+ spe_fm(f, term1G_reg, fragG_reg, fragA_reg);
+ spe_fm(f, term1B_reg, fragB_reg, fragA_reg);
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Compute Src Alpha term
+ */
+ switch (blend->alpha_src_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ spe_move(f, term1A_reg, fragA_reg);
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Compute Dest RGB terms
+ */
+ switch (blend->rgb_dst_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ spe_move(f, term2R_reg, fbR_reg);
+ spe_move(f, term2G_reg, fbG_reg);
+ spe_move(f, term2B_reg, fbB_reg);
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ spe_zero(f, term2R_reg);
+ spe_zero(f, term2G_reg);
+ spe_zero(f, term2B_reg);
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ spe_fm(f, term2R_reg, fbR_reg, fragR_reg);
+ spe_fm(f, term2G_reg, fbG_reg, fragG_reg);
+ spe_fm(f, term2B_reg, fbB_reg, fragB_reg);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ spe_fm(f, term2R_reg, fbR_reg, fragA_reg);
+ spe_fm(f, term2G_reg, fbG_reg, fragA_reg);
+ spe_fm(f, term2B_reg, fbB_reg, fragA_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ /* one = {1.0, 1.0, 1.0, 1.0} */
+ spe_load_float(f, one_reg, 1.0f);
+ /* tmp = one - fragA */
+ spe_fs(f, tmp_reg, one_reg, fragA_reg);
+ /* term = fb * tmp */
+ spe_fm(f, term2R_reg, fbR_reg, tmp_reg);
+ spe_fm(f, term2G_reg, fbG_reg, tmp_reg);
+ spe_fm(f, term2B_reg, fbB_reg, tmp_reg);
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Compute Dest Alpha term
+ */
+ switch (blend->alpha_dst_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ spe_move(f, term2A_reg, fbA_reg);
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ spe_zero(f, term2A_reg);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ spe_fm(f, term2A_reg, fbA_reg, fragA_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ /* one = {1.0, 1.0, 1.0, 1.0} */
+ spe_load_float(f, one_reg, 1.0f);
+ /* tmp = one - fragA */
+ spe_fs(f, tmp_reg, one_reg, fragA_reg);
+ /* termA = fbA * tmp */
+ spe_fm(f, term2A_reg, fbA_reg, tmp_reg);
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Combine Src/Dest RGB terms
+ */
+ switch (blend->rgb_func) {
+ case PIPE_BLEND_ADD:
+ spe_fa(f, fragR_reg, term1R_reg, term2R_reg);
+ spe_fa(f, fragG_reg, term1G_reg, term2G_reg);
+ spe_fa(f, fragB_reg, term1B_reg, term2B_reg);
+ break;
+ case PIPE_BLEND_SUBTRACT:
+ spe_fs(f, fragR_reg, term1R_reg, term2R_reg);
+ spe_fs(f, fragG_reg, term1G_reg, term2G_reg);
+ spe_fs(f, fragB_reg, term1B_reg, term2B_reg);
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Combine Src/Dest A term
+ */
+ switch (blend->alpha_func) {
+ case PIPE_BLEND_ADD:
+ spe_fa(f, fragA_reg, term1A_reg, term2A_reg);
+ break;
+ case PIPE_BLEND_SUBTRACT:
+ spe_fs(f, fragA_reg, term1A_reg, term2A_reg);
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ spe_release_register(f, term1R_reg);
+ spe_release_register(f, term1G_reg);
+ spe_release_register(f, term1B_reg);
+ spe_release_register(f, term1A_reg);
+
+ spe_release_register(f, term2R_reg);
+ spe_release_register(f, term2G_reg);
+ spe_release_register(f, term2B_reg);
+ spe_release_register(f, term2A_reg);
+
+ spe_release_register(f, fbR_reg);
+ spe_release_register(f, fbG_reg);
+ spe_release_register(f, fbB_reg);
+ spe_release_register(f, fbA_reg);
+
+ spe_release_register(f, one_reg);
+ spe_release_register(f, tmp_reg);
+}
+
+
+static void
+gen_logicop(const struct pipe_blend_state *blend,
+ struct spe_function *f,
+ int fragRGBA_reg, int fbRGBA_reg)
+{
+ /* XXX to-do */
+ /* operate on 32-bit packed pixels, not float colors */
+}
+
+
+static void
+gen_colormask(uint colormask,
+ struct spe_function *f,
+ int fragRGBA_reg, int fbRGBA_reg)
+{
+ /* XXX to-do */
+ /* operate on 32-bit packed pixels, not float colors */
+}
+
+
+
+/**
+ * Generate code to pack a quad of float colors into a four 32-bit integers.
+ *
+ * \param f SPE function to append instruction onto.
+ * \param color_format the dest color packing format
+ * \param r_reg register containing four red values (in/clobbered)
+ * \param g_reg register containing four green values (in/clobbered)
+ * \param b_reg register containing four blue values (in/clobbered)
+ * \param a_reg register containing four alpha values (in/clobbered)
+ * \param rgba_reg register to store the packed RGBA colors (out)
+ */
+static void
+gen_pack_colors(struct spe_function *f,
+ enum pipe_format color_format,
+ int r_reg, int g_reg, int b_reg, int a_reg,
+ int rgba_reg)
+{
+ /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */
+ spe_cfltu(f, r_reg, r_reg, 32);
+ spe_cfltu(f, g_reg, g_reg, 32);
+ spe_cfltu(f, b_reg, b_reg, 32);
+ spe_cfltu(f, a_reg, a_reg, 32);
+
+ /* Shift the most significant bytes to least the significant positions.
+ * I.e.: reg = reg >> 24
+ */
+ spe_rotmi(f, r_reg, r_reg, -24);
+ spe_rotmi(f, g_reg, g_reg, -24);
+ spe_rotmi(f, b_reg, b_reg, -24);
+ spe_rotmi(f, a_reg, a_reg, -24);
+
+ /* Shift the color bytes according to the surface format */
+ if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) {
+ spe_roti(f, g_reg, g_reg, 8); /* green <<= 8 */
+ spe_roti(f, r_reg, r_reg, 16); /* red <<= 16 */
+ spe_roti(f, a_reg, a_reg, 24); /* alpha <<= 24 */
+ }
+ else if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) {
+ spe_roti(f, r_reg, r_reg, 8); /* red <<= 8 */
+ spe_roti(f, g_reg, g_reg, 16); /* green <<= 16 */
+ spe_roti(f, b_reg, b_reg, 24); /* blue <<= 24 */
+ }
+ else {
+ ASSERT(0);
+ }
+
+ /* Merge red, green, blue, alpha registers to make packed RGBA colors.
+ * Eg: after shifting according to color_format we might have:
+ * R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000}
+ * G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600}
+ * B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099}
+ * A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000}
+ * OR-ing all those together gives us four packed colors:
+ * RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699}
+ */
+ spe_or(f, rgba_reg, r_reg, g_reg);
+ spe_or(f, rgba_reg, rgba_reg, b_reg);
+ spe_or(f, rgba_reg, rgba_reg, a_reg);
+}
+
+
+
+
+/**
+ * Generate SPE code to implement the fragment operations (alpha test,
+ * depth test, stencil test, blending, colormask, and final
+ * framebuffer write) as specified by the current context state.
+ *
+ * Logically, this code will be called after running the fragment
+ * shader. But under some circumstances we could run some of this
+ * code before the fragment shader to cull fragments/quads that are
+ * totally occluded/discarded.
+ *
+ * XXX we only support PIPE_FORMAT_Z24S8_UNORM z/stencil buffer right now.
+ *
+ * See the spu_default_fragment_ops() function to see how the per-fragment
+ * operations would be done with ordinary C code.
+ * The code we generate here though has no branches, is SIMD, etc and
+ * should be much faster.
+ *
+ * \param cell the rendering context (in)
+ * \param f the generated function (out)
+ */
+void
+gen_fragment_function(struct cell_context *cell, struct spe_function *f)
+{
+ const struct pipe_depth_stencil_alpha_state *dsa =
+ &cell->depth_stencil->base;
+ const struct pipe_blend_state *blend = &cell->blend->base;
+ const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format;
+
+ /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
+ const int x_reg = 3; /* uint */
+ const int y_reg = 4; /* uint */
+ const int color_tile_reg = 5; /* tile_t * */
+ const int depth_tile_reg = 6; /* tile_t * */
+ const int fragZ_reg = 7; /* vector float */
+ const int fragR_reg = 8; /* vector float */
+ const int fragG_reg = 9; /* vector float */
+ const int fragB_reg = 10; /* vector float */
+ const int fragA_reg = 11; /* vector float */
+ const int mask_reg = 12; /* vector uint */
+
+ /* offset of quad from start of tile
+ * XXX assuming 4-byte pixels for color AND Z/stencil!!!!
+ */
+ int quad_offset_reg;
+
+ int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */
+ int fbZS_reg; /**< framebuffer's combined z/stencil values for quad */
+
+ spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
+ spe_allocate_register(f, x_reg);
+ spe_allocate_register(f, y_reg);
+ spe_allocate_register(f, color_tile_reg);
+ spe_allocate_register(f, depth_tile_reg);
+ spe_allocate_register(f, fragZ_reg);
+ spe_allocate_register(f, fragR_reg);
+ spe_allocate_register(f, fragG_reg);
+ spe_allocate_register(f, fragB_reg);
+ spe_allocate_register(f, fragA_reg);
+ spe_allocate_register(f, mask_reg);
+
+ quad_offset_reg = spe_allocate_available_register(f);
+ fbRGBA_reg = spe_allocate_available_register(f);
+ fbZS_reg = spe_allocate_available_register(f);
+
+ /* compute offset of quad from start of tile, in bytes */
+ {
+ int x2_reg = spe_allocate_available_register(f);
+ int y2_reg = spe_allocate_available_register(f);
+
+ ASSERT(TILE_SIZE == 32);
+
+ spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */
+ spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */
+ spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */
+ spe_a(f, quad_offset_reg, y2_reg, x2_reg); /* offset = y2 + x2 */
+ spe_shli(f, quad_offset_reg, quad_offset_reg, 4); /* offset *= 16 */
+
+ spe_release_register(f, x2_reg);
+ spe_release_register(f, y2_reg);
+ }
+
+
+ if (dsa->alpha.enabled) {
+ gen_alpha_test(dsa, f, mask_reg, fragA_reg);
+ }
+
+ if (dsa->depth.enabled || dsa->stencil[0].enabled) {
+ const enum pipe_format zs_format = cell->framebuffer.zsbuf->format;
+ boolean write_depth_stencil;
+
+ int fbZ_reg = spe_allocate_available_register(f); /* Z values */
+ int fbS_reg = spe_allocate_available_register(f); /* Stencil values */
+
+ /* fetch quad of depth/stencil values from tile at (x,y) */
+ /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
+ spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
+
+ if (dsa->depth.enabled) {
+ /* Extract Z bits from fbZS_reg into fbZ_reg */
+ if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
+ zs_format == PIPE_FORMAT_X8Z24_UNORM) {
+ int mask_reg = spe_allocate_available_register(f);
+ spe_fsmbi(f, mask_reg, 0x7777); /* mask[0,1,2,3] = 0x00ffffff */
+ spe_and(f, fbZ_reg, fbZS_reg, mask_reg); /* fbZ = fbZS & mask */
+ spe_release_register(f, mask_reg);
+ /* OK, fbZ_reg has four 24-bit Z values now */
+ }
+ else {
+ /* XXX handle other z/stencil formats */
+ ASSERT(0);
+ }
+
+ /* Convert fragZ values from float[4] to uint[4] */
+ if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
+ zs_format == PIPE_FORMAT_X8Z24_UNORM ||
+ zs_format == PIPE_FORMAT_Z24S8_UNORM ||
+ zs_format == PIPE_FORMAT_Z24X8_UNORM) {
+ /* 24-bit Z values */
+ int scale_reg = spe_allocate_available_register(f);
+
+ /* scale_reg[0,1,2,3] = float(2^24-1) */
+ spe_load_float(f, scale_reg, (float) 0xffffff);
+
+ /* XXX these two instructions might be combined */
+ spe_fm(f, fragZ_reg, fragZ_reg, scale_reg); /* fragZ *= scale */
+ spe_cfltu(f, fragZ_reg, fragZ_reg, 0); /* fragZ = (int) fragZ */
+
+ spe_release_register(f, scale_reg);
+ }
+ else {
+ /* XXX handle 16-bit Z format */
+ ASSERT(0);
+ }
+ }
+
+ if (dsa->stencil[0].enabled) {
+ /* Extract Stencil bit sfrom fbZS_reg into fbS_reg */
+ if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
+ zs_format == PIPE_FORMAT_X8Z24_UNORM) {
+ /* XXX extract with a shift */
+ ASSERT(0);
+ }
+ else if (zs_format == PIPE_FORMAT_Z24S8_UNORM ||
+ zs_format == PIPE_FORMAT_Z24X8_UNORM) {
+ /* XXX extract with a mask */
+ ASSERT(0);
+ }
+ }
+
+
+ if (dsa->stencil[0].enabled) {
+ /* XXX this may involve depth testing too */
+ // gen_stencil_test(dsa, f, ... );
+ ASSERT(0);
+ }
+ else if (dsa->depth.enabled) {
+ int zmask_reg = spe_allocate_available_register(f);
+ gen_depth_test(dsa, f, mask_reg, fragZ_reg, fbZ_reg, zmask_reg);
+ spe_release_register(f, zmask_reg);
+ }
+
+ /* do we need to write Z and/or Stencil back into framebuffer? */
+ write_depth_stencil = (dsa->depth.writemask |
+ dsa->stencil[0].write_mask |
+ dsa->stencil[1].write_mask);
+
+ if (write_depth_stencil) {
+ /* Merge latest Z and Stencil values into fbZS_reg.
+ * fbZ_reg has four Z vals in bits [23..0] or bits [15..0].
+ * fbS_reg has four 8-bit Z values in bits [7..0].
+ */
+ if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
+ zs_format == PIPE_FORMAT_X8Z24_UNORM) {
+ spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */
+ spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */
+ }
+ else if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
+ zs_format == PIPE_FORMAT_X8Z24_UNORM) {
+ /* XXX to do */
+ ASSERT(0);
+ }
+ else if (zs_format == PIPE_FORMAT_Z16_UNORM) {
+ /* XXX to do */
+ ASSERT(0);
+ }
+ else if (zs_format == PIPE_FORMAT_S8_UNORM) {
+ /* XXX to do */
+ ASSERT(0);
+ }
+ else {
+ /* bad zs_format */
+ ASSERT(0);
+ }
+
+ /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */
+ spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
+ }
+
+ spe_release_register(f, fbZ_reg);
+ spe_release_register(f, fbS_reg);
+ }
+
+
+ /* Get framebuffer quad/colors. We'll need these for blending,
+ * color masking, and to obey the quad/pixel mask.
+ * Load: fbRGBA_reg = memory[color_tile + quad_offset]
+ * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking
+ * we could skip this load.
+ */
+ spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg);
+
+
+ if (blend->blend_enable) {
+ gen_blend(blend, f, color_format,
+ fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg);
+ }
+
+ /*
+ * Write fragment colors to framebuffer/tile.
+ * This involves converting the fragment colors from float[4] to the
+ * tile's specific format and obeying the quad/pixel mask.
+ */
+ {
+ int rgba_reg = spe_allocate_available_register(f);
+
+ /* Pack four float colors as four 32-bit int colors */
+ gen_pack_colors(f, color_format,
+ fragR_reg, fragG_reg, fragB_reg, fragA_reg,
+ rgba_reg);
+
+ if (blend->logicop_enable) {
+ gen_logicop(blend, f, rgba_reg, fbRGBA_reg);
+ }
+
+ if (blend->colormask != 0xf) {
+ gen_colormask(blend->colormask, f, rgba_reg, fbRGBA_reg);
+ }
+
+
+ /* Mix fragment colors with framebuffer colors using the quad/pixel mask:
+ * if (mask[i])
+ * rgba[i] = rgba[i];
+ * else
+ * rgba[i] = framebuffer[i];
+ */
+ spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg);
+
+ /* Store updated quad in tile:
+ * memory[color_tile + quad_offset] = rgba_reg;
+ */
+ spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg);
+
+ spe_release_register(f, rgba_reg);
+ }
+
+ printf("gen_fragment_ops nr instructions: %u\n", f->num_inst);
+
+ spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */
+
+
+ spe_release_register(f, fbRGBA_reg);
+ spe_release_register(f, fbZS_reg);
+ spe_release_register(f, quad_offset_reg);
+}
+
diff --git a/src/gallium/drivers/trace/tr_stream.h b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
index 6111174d6a..0ea0fc690c 100644
--- a/src/gallium/drivers/trace/tr_stream.h
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
@@ -1,8 +1,8 @@
/**************************************************************************
- *
+ *
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
- *
+ *
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
- *
+ *
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
- *
+ *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,38 +22,17 @@
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * @file
- * Cross-platform sequential access stream abstraction.
*
- * These are really general purpose file access functions, and might one day
- * be moved into the util module.
- */
-
-#ifndef TR_STREAM_H
-#define TR_STREAM_H
-
-
-#include "pipe/p_compiler.h"
-
-
-struct trace_stream;
+ **************************************************************************/
-struct trace_stream *
-trace_stream_create(const char *filename);
+#ifndef CELL_GEN_FRAGMENT_H
+#define CELL_GEN_FRAGMENT_H
-boolean
-trace_stream_write(struct trace_stream *stream, const void *data, size_t size);
-void
-trace_stream_flush(struct trace_stream *stream);
+extern void
+gen_fragment_function(struct cell_context *cell, struct spe_function *f);
-void
-trace_stream_close(struct trace_stream *stream);
+#endif /* CELL_GEN_FRAGMENT_H */
-#endif /* TR_STREAM_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c
index fe5437023b..e04cf5f274 100644
--- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c
+++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c
@@ -34,6 +34,7 @@
#include "pipe/p_inlines.h"
#include "draw/draw_context.h"
#include "cell_context.h"
+#include "cell_flush.h"
#include "cell_state.h"
#include "cell_texture.h"
#include "cell_state_per_fragment.h"
@@ -130,8 +131,9 @@ cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth)
}
-static void cell_set_clip_state( struct pipe_context *pipe,
- const struct pipe_clip_state *clip )
+static void
+cell_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
{
struct cell_context *cell = cell_context(pipe);
@@ -310,8 +312,21 @@ cell_set_framebuffer_state(struct pipe_context *pipe,
cell->zsbuf_map = NULL;
}
- /* update my state */
- cell->framebuffer = *fb;
+ /* Finish any pending rendering to the current surface before
+ * installing a new surface!
+ */
+ cell_flush_int(cell, CELL_FLUSH_WAIT);
+
+ /* update my state
+ * (this is also where old surfaces will finally get freed)
+ */
+ cell->framebuffer.width = fb->width;
+ cell->framebuffer.height = fb->height;
+ cell->framebuffer.num_cbufs = fb->num_cbufs;
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+ pipe_surface_reference(&cell->framebuffer.cbufs[i], fb->cbufs[i]);
+ }
+ pipe_surface_reference(&cell->framebuffer.zsbuf, fb->zsbuf);
/* map new surfaces */
if (csurf)
diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c
index 973c0b1aa1..9508227e29 100644
--- a/src/gallium/drivers/cell/ppu/cell_spu.c
+++ b/src/gallium/drivers/cell/ppu/cell_spu.c
@@ -26,6 +26,11 @@
**************************************************************************/
+/**
+ * Utility/wrappers for communicating with the SPUs.
+ */
+
+
#include <pthread.h>
#include "cell_spu.h"
@@ -40,6 +45,9 @@ helpful headers:
*/
+/**
+ * Cell/SPU info that's not per-context.
+ */
struct cell_global_info cell_global;
@@ -74,7 +82,11 @@ wait_mbox_message(spe_context_ptr_t ctx)
}
-static void *cell_thread_function(void *arg)
+/**
+ * Called by pthread_create() to spawn an SPU thread.
+ */
+static void *
+cell_thread_function(void *arg)
{
struct cell_init_info *init = (struct cell_init_info *) arg;
unsigned entry = SPE_DEFAULT_ENTRY;
@@ -92,7 +104,10 @@ static void *cell_thread_function(void *arg)
/**
- * Create the SPU threads
+ * Create the SPU threads. This is done once during driver initialization.
+ * This involves setting the the "init" message which is sent to each SPU.
+ * The init message specifies an SPU id, total number of SPUs, location
+ * and number of batch buffers, etc.
*/
void
cell_start_spus(struct cell_context *cell)
@@ -100,7 +115,6 @@ cell_start_spus(struct cell_context *cell)
static boolean one_time_init = FALSE;
uint i, j;
-
if (one_time_init) {
fprintf(stderr, "PPU: Multiple rendering contexts not yet supported "
"on Cell.\n");
@@ -120,6 +134,7 @@ cell_start_spus(struct cell_context *cell)
for (i = 0; i < cell->num_spus; i++) {
cell_global.inits[i].id = i;
cell_global.inits[i].num_spus = cell->num_spus;
+ cell_global.inits[i].debug_flags = cell->debug_flags;
cell_global.inits[i].cmd = &cell_global.command[i];
for (j = 0; j < CELL_NUM_BUFFERS; j++) {
cell_global.inits[i].buffers[j] = cell->buffer[j];
@@ -137,14 +152,17 @@ cell_start_spus(struct cell_context *cell)
exit(1);
}
- pthread_create(&cell_global.spe_threads[i], NULL, &cell_thread_function,
- &cell_global.inits[i]);
+ pthread_create(&cell_global.spe_threads[i], /* returned thread handle */
+ NULL, /* pthread attribs */
+ &cell_thread_function, /* start routine */
+ &cell_global.inits[i]); /* thread argument */
}
}
/**
* Tell all the SPUs to stop/exit.
+ * This is done when the driver's exiting / cleaning up.
*/
void
cell_spu_exit(struct cell_context *cell)
diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h
index 82580ea35a..a7771a55a3 100644
--- a/src/gallium/drivers/cell/ppu/cell_state.h
+++ b/src/gallium/drivers/cell/ppu/cell_state.h
@@ -48,19 +48,17 @@
#define CELL_NEW_VERTEX_INFO 0x8000
-void cell_set_vertex_elements(struct pipe_context *,
- unsigned count,
- const struct pipe_vertex_element *);
+extern void
+cell_update_derived( struct cell_context *softpipe );
-void cell_set_vertex_buffers(struct pipe_context *,
- unsigned count,
- const struct pipe_vertex_buffer *);
-void cell_update_derived( struct cell_context *softpipe );
+extern void
+cell_init_shader_functions(struct cell_context *cell);
-void
-cell_init_shader_functions(struct cell_context *cell);
+extern void
+cell_init_vertex_functions(struct cell_context *cell);
+
#endif /* CELL_STATE_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c
index 8ab938a02a..efc4f78364 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_derived.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c
@@ -35,21 +35,6 @@
#include "cell_state_emit.h"
-static int
-find_vs_output(const struct cell_vertex_shader_state *vs,
- uint semantic_name,
- uint semantic_index)
-{
- uint i;
- for (i = 0; i < vs->info.num_outputs; i++) {
- if (vs->info.output_semantic_name[i] == semantic_name &&
- vs->info.output_semantic_index[i] == semantic_index)
- return i;
- }
- return -1;
-}
-
-
/**
* Determine how to map vertex program outputs to fragment program inputs.
* Basically, this will be used when computing the triangle interpolation
@@ -58,7 +43,6 @@ find_vs_output(const struct cell_vertex_shader_state *vs,
static void
calculate_vertex_layout( struct cell_context *cell )
{
- const struct cell_vertex_shader_state *vs = cell->vs;
const struct cell_fragment_shader_state *fs = cell->fs;
const enum interp_mode colorInterp
= cell->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR;
@@ -82,7 +66,7 @@ calculate_vertex_layout( struct cell_context *cell )
vinfo->num_attribs = 0;
/* we always want to emit vertex pos */
- src = find_vs_output(vs, TGSI_SEMANTIC_POSITION, 0);
+ src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_POSITION, 0);
assert(src >= 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src);
@@ -98,14 +82,14 @@ calculate_vertex_layout( struct cell_context *cell )
break;
case TGSI_SEMANTIC_COLOR:
- src = find_vs_output(vs, TGSI_SEMANTIC_COLOR,
- fs->info.input_semantic_index[i]);
+ src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_COLOR,
+ fs->info.input_semantic_index[i]);
assert(src >= 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
break;
case TGSI_SEMANTIC_FOG:
- src = find_vs_output(vs, TGSI_SEMANTIC_FOG, 0);
+ src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_FOG, 0);
#if 1
if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */
src = 0;
@@ -116,7 +100,7 @@ calculate_vertex_layout( struct cell_context *cell )
case TGSI_SEMANTIC_GENERIC:
/* this includes texcoords and varying vars */
- src = find_vs_output(vs, TGSI_SEMANTIC_GENERIC,
+ src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_GENERIC,
fs->info.input_semantic_index[i]);
assert(src >= 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
@@ -163,6 +147,9 @@ compute_cliprect(struct cell_context *sp)
+/**
+ * Update derived state, send current state to SPUs prior to rendering.
+ */
void cell_update_derived( struct cell_context *cell )
{
if (cell->dirty & (CELL_NEW_RASTERIZER |
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c
index 9d88c1cf3d..180b89c1f6 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_emit.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c
@@ -27,6 +27,7 @@
#include "util/u_memory.h"
#include "cell_context.h"
+#include "cell_gen_fragment.h"
#include "cell_state.h"
#include "cell_state_emit.h"
#include "cell_state_per_fragment.h"
@@ -47,27 +48,13 @@ emit_state_cmd(struct cell_context *cell, uint cmd,
}
-
+/**
+ * For state marked as 'dirty', construct a state-update command block
+ * and insert it into the current batch buffer.
+ */
void
cell_emit_state(struct cell_context *cell)
{
- if (cell->dirty & (CELL_NEW_FRAMEBUFFER | CELL_NEW_BLEND)) {
- struct cell_command_logicop logicop;
-
- if (cell->logic_op.store != NULL) {
- spe_release_func(& cell->logic_op);
- }
-
- cell_generate_logic_op(& cell->logic_op,
- & cell->blend->base,
- cell->framebuffer.cbufs[0]);
-
- logicop.base = (intptr_t) cell->logic_op.store;
- logicop.size = 64 * 4;
- emit_state_cmd(cell, CELL_CMD_STATE_LOGICOP, &logicop,
- sizeof(logicop));
- }
-
if (cell->dirty & CELL_NEW_FRAMEBUFFER) {
struct pipe_surface *cbuf = cell->framebuffer.cbufs[0];
struct pipe_surface *zbuf = cell->framebuffer.zsbuf;
@@ -80,44 +67,33 @@ cell_emit_state(struct cell_context *cell)
fb->depth_format = zbuf ? zbuf->format : PIPE_FORMAT_NONE;
fb->width = cell->framebuffer.width;
fb->height = cell->framebuffer.height;
+#if 0
+ printf("EMIT color format %s\n", pf_name(fb->color_format));
+ printf("EMIT depth format %s\n", pf_name(fb->depth_format));
+#endif
}
- if (cell->dirty & CELL_NEW_BLEND) {
- struct cell_command_blend blend;
- if (cell->blend != NULL) {
- blend.base = (intptr_t) cell->blend->code.store;
- blend.size = (char *) cell->blend->code.csr
- - (char *) cell->blend->code.store;
- blend.read_fb = TRUE;
- } else {
- blend.base = 0;
- blend.size = 0;
- blend.read_fb = FALSE;
- }
-
- emit_state_cmd(cell, CELL_CMD_STATE_BLEND, &blend, sizeof(blend));
- }
-
- if (cell->dirty & CELL_NEW_DEPTH_STENCIL) {
- struct cell_command_depth_stencil_alpha_test dsat;
-
-
- if (cell->depth_stencil != NULL) {
- dsat.base = (intptr_t) cell->depth_stencil->code.store;
- dsat.size = (char *) cell->depth_stencil->code.csr
- - (char *) cell->depth_stencil->code.store;
- dsat.read_depth = TRUE;
- dsat.read_stencil = FALSE;
- } else {
- dsat.base = 0;
- dsat.size = 0;
- dsat.read_depth = FALSE;
- dsat.read_stencil = FALSE;
- }
-
- emit_state_cmd(cell, CELL_CMD_STATE_DEPTH_STENCIL, &dsat,
- sizeof(dsat));
+ if (cell->dirty & (CELL_NEW_FRAMEBUFFER |
+ CELL_NEW_DEPTH_STENCIL |
+ CELL_NEW_BLEND)) {
+ /* XXX we don't want to always do codegen here. We should have
+ * a hash/lookup table to cache previous results...
+ */
+ struct cell_command_fragment_ops *fops
+ = cell_batch_alloc(cell, sizeof(*fops));
+ struct spe_function spe_code;
+
+ /* generate new code */
+ gen_fragment_function(cell, &spe_code);
+ /* put the new code into the batch buffer */
+ fops->opcode = CELL_CMD_STATE_FRAGMENT_OPS;
+ memcpy(&fops->code, spe_code.store,
+ SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
+ fops->dsa = cell->depth_stencil->base;
+ fops->blend = cell->blend->base;
+ /* free codegen buffer */
+ spe_release_func(&spe_code);
}
if (cell->dirty & CELL_NEW_SAMPLER) {
@@ -157,7 +133,8 @@ cell_emit_state(struct cell_context *cell)
emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO,
&cell->vertex_info, sizeof(struct vertex_info));
}
-
+
+#if 0
if (cell->dirty & CELL_NEW_VS) {
const struct draw_context *const draw = cell->draw;
struct cell_shader_info info;
@@ -170,7 +147,7 @@ cell_emit_state(struct cell_context *cell)
info.immediates = (uintptr_t) draw->vs.machine.Imms;
info.num_immediates = draw->vs.machine.ImmLimit / 4;
- emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS,
- & info, sizeof(info));
+ emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, &info, sizeof(info));
}
+#endif
}
diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c
index 53ae3aa50e..78cb446c14 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c
@@ -132,9 +132,9 @@ emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa,
/**
+ * Generate code to perform Z testing. Four Z values are tested at once.
* \param dsa Current depth-test state
* \param f Function to which code should be appended
- * \param m Mask of allocated / free SPE registers
* \param mask Index of register to contain depth-pass mask
* \param stored Index of register containing values from depth buffer
* \param calculated Index of register containing per-fragment depth values
@@ -198,6 +198,7 @@ emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa,
/**
+ * Generate code to apply the stencil operation (after testing).
* \note Emits a maximum of 5 instructions.
*
* \warning
@@ -222,9 +223,13 @@ emit_stencil_op(struct spe_function *f,
spe_il(f, result, ref);
break;
case PIPE_STENCIL_OP_INCR:
+ /* clamp = [0xff, 0xff, 0xff, 0xff] */
spe_il(f, clamp, 0x0ff);
+ /* result[i] = in[i] + 1 */
spe_ai(f, result, in, 1);
+ /* clamp_mask[i] = (result[i] > 0xff) */
spe_clgti(f, clamp_mask, result, 0x0ff);
+ /* result[i] = clamp_mask[i] ? clamp[i] : result[i] */
spe_selb(f, result, result, clamp, clamp_mask);
break;
case PIPE_STENCIL_OP_DECR:
@@ -259,10 +264,10 @@ emit_stencil_op(struct spe_function *f,
/**
+ * Generate code to do stencil test. Four pixels are tested at once.
* \param dsa Depth / stencil test state
* \param face 0 for front face, 1 for back face
* \param f Function to append instructions to
- * \param reg_mask Mask of allocated registers
* \param mask Register containing mask of fragments passing the
* alpha test
* \param depth_mask Register containing mask of fragments passing the
@@ -310,13 +315,14 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
switch (dsa->stencil[face].func) {
case PIPE_FUNC_NEVER:
- spe_il(f, stencil_mask, 0);
+ spe_il(f, stencil_mask, 0); /* stencil_mask[0..3] = [0,0,0,0] */
break;
case PIPE_FUNC_NOTEQUAL:
complement = TRUE;
/* FALLTHROUGH */
case PIPE_FUNC_EQUAL:
+ /* stencil_mask[i] = (stored[i] == ref) */
spe_ceqi(f, stencil_mask, stored, ref);
break;
@@ -324,6 +330,8 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
complement = TRUE;
/* FALLTHROUGH */
case PIPE_FUNC_GREATER:
+ complement = TRUE;
+ /* stencil_mask[i] = (stored[i] > ref) */
spe_clgti(f, stencil_mask, stored, ref);
break;
@@ -331,8 +339,11 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
complement = TRUE;
/* FALLTHROUGH */
case PIPE_FUNC_GEQUAL:
+ /* stencil_mask[i] = (stored[i] > ref) */
spe_clgti(f, stencil_mask, stored, ref);
+ /* tmp[i] = (stored[i] == ref) */
spe_ceqi(f, tmp, stored, ref);
+ /* stencil_mask[i] = stencil_mask[i] | tmp[i] */
spe_or(f, stencil_mask, stencil_mask, tmp);
break;
@@ -461,7 +472,7 @@ cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa)
* + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round
* up to 64 to make it a happy power-of-two.
*/
- spe_init_func(f, 4 * 64);
+ spe_init_func(f, SPE_INST_SIZE * 64);
/* Allocate registers for the function's input parameters. Cleverly (and
@@ -540,7 +551,7 @@ cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa)
spe_selb(f, depth, depth, zvals, mask);
}
- spe_bi(f, 0, 0, 0);
+ spe_bi(f, 0, 0, 0); /* return from function call */
#if 0
@@ -956,7 +967,7 @@ cell_generate_alpha_blend(struct cell_blend_state *cb)
* + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to
* make it a happy power-of-two.
*/
- spe_init_func(f, 4 * 64);
+ spe_init_func(f, SPE_INST_SIZE * 64);
const int frag[4] = {
@@ -1144,9 +1155,10 @@ cell_generate_alpha_blend(struct cell_blend_state *cb)
}
-int PC_OFFSET(const struct spe_function *f, const void *d)
+static int
+PC_OFFSET(const struct spe_function *f, const void *d)
{
- const intptr_t pc = (intptr_t) f->csr;
+ const intptr_t pc = (intptr_t) &f->store[f->num_inst];
const intptr_t ea = ~0x0f & (intptr_t) d;
return (ea - pc) >> 2;
@@ -1178,7 +1190,7 @@ cell_generate_logic_op(struct spe_function *f,
* bytes (equiv. to 8 instructions) are needed for data storage. Round up
* to 64 to make it a happy power-of-two.
*/
- spe_init_func(f, 4 * 64);
+ spe_init_func(f, SPE_INST_SIZE * 64);
/* Pixel colors in framebuffer format in AoS layout.
diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c
index 86bcad05e9..97e44eeb1a 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_shader.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c
@@ -53,7 +53,10 @@ cell_vertex_shader_state(void *shader)
}
-
+/**
+ * Create fragment shader state.
+ * Called via pipe->create_fs_state()
+ */
static void *
cell_create_fs_state(struct pipe_context *pipe,
const struct pipe_shader_state *templ)
@@ -77,6 +80,9 @@ cell_create_fs_state(struct pipe_context *pipe,
}
+/**
+ * Called via pipe->bind_fs_state()
+ */
static void
cell_bind_fs_state(struct pipe_context *pipe, void *fs)
{
@@ -88,6 +94,9 @@ cell_bind_fs_state(struct pipe_context *pipe, void *fs)
}
+/**
+ * Called via pipe->delete_fs_state()
+ */
static void
cell_delete_fs_state(struct pipe_context *pipe, void *fs)
{
@@ -98,6 +107,10 @@ cell_delete_fs_state(struct pipe_context *pipe, void *fs)
}
+/**
+ * Create vertex shader state.
+ * Called via pipe->create_vs_state()
+ */
static void *
cell_create_vs_state(struct pipe_context *pipe,
const struct pipe_shader_state *templ)
@@ -128,6 +141,9 @@ cell_create_vs_state(struct pipe_context *pipe,
}
+/**
+ * Called via pipe->bind_vs_state()
+ */
static void
cell_bind_vs_state(struct pipe_context *pipe, void *vs)
{
@@ -142,6 +158,9 @@ cell_bind_vs_state(struct pipe_context *pipe, void *vs)
}
+/**
+ * Called via pipe->delete_vs_state()
+ */
static void
cell_delete_vs_state(struct pipe_context *pipe, void *vs)
{
@@ -154,6 +173,9 @@ cell_delete_vs_state(struct pipe_context *pipe, void *vs)
}
+/**
+ * Called via pipe->set_constant_buffer()
+ */
static void
cell_set_constant_buffer(struct pipe_context *pipe,
uint shader, uint index,
@@ -166,7 +188,7 @@ cell_set_constant_buffer(struct pipe_context *pipe,
assert(index == 0);
/* note: reference counting */
- pipe_buffer_reference(ws,
+ winsys_buffer_reference(ws,
&cell->constants[shader].buffer,
buf->buffer);
cell->constants[shader].size = buf->size;
diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
index 114684c2a3..fbe55c8472 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
@@ -35,7 +35,7 @@
#include "draw/draw_context.h"
-void
+static void
cell_set_vertex_elements(struct pipe_context *pipe,
unsigned count,
const struct pipe_vertex_element *elements)
@@ -53,7 +53,7 @@ cell_set_vertex_elements(struct pipe_context *pipe,
}
-void
+static void
cell_set_vertex_buffers(struct pipe_context *pipe,
unsigned count,
const struct pipe_vertex_buffer *buffers)
@@ -69,3 +69,11 @@ cell_set_vertex_buffers(struct pipe_context *pipe,
draw_set_vertex_buffers(cell->draw, count, buffers);
}
+
+
+void
+cell_init_vertex_functions(struct cell_context *cell)
+{
+ cell->pipe.set_vertex_buffers = cell_set_vertex_buffers;
+ cell->pipe.set_vertex_elements = cell_set_vertex_elements;
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c
index d9e3b510dc..732c64082e 100644
--- a/src/gallium/drivers/cell/ppu/cell_surface.c
+++ b/src/gallium/drivers/cell/ppu/cell_surface.c
@@ -25,108 +25,13 @@
*
**************************************************************************/
-#include "pipe/p_defines.h"
-#include "pipe/p_inlines.h"
-#include "pipe/p_winsys.h"
-#include "util/u_memory.h"
#include "util/u_rect.h"
-#include "util/u_tile.h"
-
#include "cell_context.h"
-#include "cell_surface.h"
-
-
-static void
-cell_surface_copy(struct pipe_context *pipe,
- boolean do_flip,
- struct pipe_surface *dst,
- unsigned dstx, unsigned dsty,
- struct pipe_surface *src,
- unsigned srcx, unsigned srcy,
- unsigned width, unsigned height)
-{
- assert( dst->cpp == src->cpp );
-
- pipe_copy_rect(pipe_surface_map(dst, PIPE_BUFFER_USAGE_CPU_WRITE),
- &dst->block,
- dst->stride,
- dstx, dsty,
- width, height,
- pipe_surface_map(src, PIPE_BUFFER_USAGE_CPU_READ),
- do_flip ? -src->stride : src->stride,
- srcx, do_flip ? height - 1 - srcy : srcy);
-
- pipe_surface_unmap(src);
- pipe_surface_unmap(dst);
-}
-
-
-static void *
-get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y)
-{
- return (char *)dst_map + y / dst->block.height * dst->stride + x / dst->block.width * dst->block.size;
-}
-
-
-#define UBYTE_TO_USHORT(B) ((B) | ((B) << 8))
-
-
-/**
- * Fill a rectangular sub-region. Need better logic about when to
- * push buffers into AGP - will currently do so whenever possible.
- */
-static void
-cell_surface_fill(struct pipe_context *pipe,
- struct pipe_surface *dst,
- unsigned dstx, unsigned dsty,
- unsigned width, unsigned height, unsigned value)
-{
- unsigned i, j;
- void *dst_map = pipe_surface_map(dst, PIPE_BUFFER_USAGE_CPU_WRITE);
-
- assert(dst->stride > 0);
-
- switch (dst->block.size) {
- case 1:
- case 2:
- case 4:
- pipe_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value);
- break;
- case 8:
- {
- /* expand the 4-byte clear value to an 8-byte value */
- ushort *row = (ushort *) get_pointer(dst, dst_map, dstx, dsty);
- ushort val0 = UBYTE_TO_USHORT((value >> 0) & 0xff);
- ushort val1 = UBYTE_TO_USHORT((value >> 8) & 0xff);
- ushort val2 = UBYTE_TO_USHORT((value >> 16) & 0xff);
- ushort val3 = UBYTE_TO_USHORT((value >> 24) & 0xff);
- val0 = (val0 << 8) | val0;
- val1 = (val1 << 8) | val1;
- val2 = (val2 << 8) | val2;
- val3 = (val3 << 8) | val3;
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- row[j*4+0] = val0;
- row[j*4+1] = val1;
- row[j*4+2] = val2;
- row[j*4+3] = val3;
- }
- row += dst->stride/2;
- }
- }
- break;
- default:
- assert(0);
- break;
- }
-
- pipe_surface_unmap( dst );
-}
void
cell_init_surface_functions(struct cell_context *cell)
{
- cell->pipe.surface_copy = cell_surface_copy;
- cell->pipe.surface_fill = cell_surface_fill;
+ cell->pipe.surface_copy = util_surface_copy;
+ cell->pipe.surface_fill = util_surface_fill;
}
diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c
index 5a0942bbd6..b6590dfb86 100644
--- a/src/gallium/drivers/cell/ppu/cell_texture.c
+++ b/src/gallium/drivers/cell/ppu/cell_texture.c
@@ -63,19 +63,30 @@ cell_texture_layout(struct cell_texture * spt)
spt->buffer_size = 0;
for ( level = 0 ; level <= pt->last_level ; level++ ) {
+ unsigned size;
+ unsigned w_tile, h_tile;
+
+ /* width, height, rounded up to tile size */
+ w_tile = align(width, TILE_SIZE);
+ h_tile = align(height, TILE_SIZE);
+
pt->width[level] = width;
pt->height[level] = height;
pt->depth[level] = depth;
- pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width);
- pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height);
+ pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w_tile);
+ pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h_tile);
spt->stride[level] = pt->nblocksx[level] * pt->block.size;
spt->level_offset[level] = spt->buffer_size;
- spt->buffer_size += (pt->nblocksy[level] *
- ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) *
- pt->nblocksx[level] * pt->block.size);
+ size = pt->nblocksx[level] * pt->nblocksy[level] * pt->block.size;
+ if (pt->target == PIPE_TEXTURE_CUBE)
+ size *= 6;
+ else
+ size *= depth;
+
+ spt->buffer_size += size;
width = minify(width);
height = minify(height);
@@ -85,8 +96,8 @@ cell_texture_layout(struct cell_texture * spt)
static struct pipe_texture *
-cell_texture_create_screen(struct pipe_screen *screen,
- const struct pipe_texture *templat)
+cell_texture_create(struct pipe_screen *screen,
+ const struct pipe_texture *templat)
{
struct pipe_winsys *ws = screen->winsys;
struct cell_texture *spt = CALLOC_STRUCT(cell_texture);
@@ -113,8 +124,8 @@ cell_texture_create_screen(struct pipe_screen *screen,
static void
-cell_texture_release_screen(struct pipe_screen *screen,
- struct pipe_texture **pt)
+cell_texture_release(struct pipe_screen *screen,
+ struct pipe_texture **pt)
{
if (!*pt)
return;
@@ -130,7 +141,7 @@ cell_texture_release_screen(struct pipe_screen *screen,
DBG("%s deleting %p\n", __FUNCTION__, (void *) spt);
*/
- pipe_buffer_reference(screen->winsys, &spt->buffer, NULL);
+ pipe_buffer_reference(screen, &spt->buffer, NULL);
FREE(spt);
}
@@ -138,6 +149,7 @@ cell_texture_release_screen(struct pipe_screen *screen,
}
+#if 0
static void
cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture,
uint face, uint levelsMask)
@@ -145,13 +157,14 @@ cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture,
/* XXX TO DO: re-tile the texture data ... */
}
+#endif
static struct pipe_surface *
-cell_get_tex_surface_screen(struct pipe_screen *screen,
- struct pipe_texture *pt,
- unsigned face, unsigned level, unsigned zslice,
- unsigned usage)
+cell_get_tex_surface(struct pipe_screen *screen,
+ struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned usage)
{
struct pipe_winsys *ws = screen->winsys;
struct cell_texture *spt = cell_texture(pt);
@@ -161,7 +174,7 @@ cell_get_tex_surface_screen(struct pipe_screen *screen,
if (ps) {
assert(ps->refcount);
assert(ps->winsys);
- pipe_buffer_reference(ws, &ps->buffer, spt->buffer);
+ winsys_buffer_reference(ws, &ps->buffer, spt->buffer);
ps->format = pt->format;
ps->block = pt->block;
ps->width = pt->width[level];
@@ -174,12 +187,17 @@ cell_get_tex_surface_screen(struct pipe_screen *screen,
/* XXX may need to override usage flags (see sp_texture.c) */
+ pipe_texture_reference(&ps->texture, pt);
+ ps->face = face;
+ ps->level = level;
+ ps->zslice = zslice;
if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) {
ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) *
ps->nblocksy *
ps->stride;
- } else {
+ }
+ else {
assert(face == 0);
assert(zslice == 0);
}
@@ -189,6 +207,11 @@ cell_get_tex_surface_screen(struct pipe_screen *screen,
+/**
+ * Copy tile data from linear layout to tiled layout.
+ * XXX this should be rolled into the future surface-creation code.
+ * XXX also need "untile" code...
+ */
static void
tile_copy_data(uint w, uint h, uint tile_size, uint *dst, const uint *src)
{
@@ -219,6 +242,7 @@ tile_copy_data(uint w, uint h, uint tile_size, uint *dst, const uint *src)
/**
* Convert linear texture image data to tiled format for SPU usage.
+ * XXX recast this in terms of pipe_surfaces (aka texture views).
*/
static void
cell_tile_texture(struct cell_context *cell,
@@ -285,6 +309,21 @@ cell_update_texture_mapping(struct cell_context *cell)
}
+static void
+cell_tex_surface_release(struct pipe_screen *screen,
+ struct pipe_surface **s)
+{
+ /* Effectively do the texture_update work here - if texture images
+ * needed post-processing to put them into hardware layout, this is
+ * where it would happen. For softpipe, nothing to do.
+ */
+ assert ((*s)->texture);
+ pipe_texture_reference(&(*s)->texture, NULL);
+
+ screen->winsys->surface_release(screen->winsys, s);
+}
+
+
static void *
cell_surface_map( struct pipe_screen *screen,
struct pipe_surface *surface,
@@ -297,7 +336,7 @@ cell_surface_map( struct pipe_screen *screen,
return NULL;
}
- map = screen->winsys->buffer_map( screen->winsys, surface->buffer, flags );
+ map = pipe_buffer_map( screen, surface->buffer, flags );
if (map == NULL)
return NULL;
@@ -323,7 +362,7 @@ static void
cell_surface_unmap(struct pipe_screen *screen,
struct pipe_surface *surface)
{
- screen->winsys->buffer_unmap( screen->winsys, surface->buffer );
+ pipe_buffer_unmap( screen, surface->buffer );
}
@@ -333,12 +372,15 @@ cell_init_texture_functions(struct cell_context *cell)
/*cell->pipe.texture_update = cell_texture_update;*/
}
+
void
cell_init_screen_texture_funcs(struct pipe_screen *screen)
{
- screen->texture_create = cell_texture_create_screen;
- screen->texture_release = cell_texture_release_screen;
- screen->get_tex_surface = cell_get_tex_surface_screen;
+ screen->texture_create = cell_texture_create;
+ screen->texture_release = cell_texture_release;
+
+ screen->get_tex_surface = cell_get_tex_surface;
+ screen->tex_surface_release = cell_tex_surface_release;
screen->surface_map = cell_surface_map;
screen->surface_unmap = cell_surface_unmap;
diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c
index e4230c7a5f..aa63435b93 100644
--- a/src/gallium/drivers/cell/ppu/cell_vbuf.c
+++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c
@@ -26,6 +26,11 @@
**************************************************************************/
/**
+ * Vertex buffer code. The draw module transforms vertices to window
+ * coords, etc. and emits the vertices into buffer supplied by this module.
+ * When a vertex buffer is full, or we flush, we'll send the vertex data
+ * to the SPUs.
+ *
* Authors
* Brian Paul
*/
@@ -113,7 +118,7 @@ cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices,
}
cvbr->vertex_buf = ~0;
- cell_flush_int(&cell->pipe, 0x0);
+ cell_flush_int(cell, 0x0);
assert(vertices == cvbr->vertex_buffer);
cvbr->vertex_buffer = NULL;
@@ -121,12 +126,13 @@ cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices,
-static void
+static boolean
cell_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim)
{
struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
cvbr->prim = prim;
/*printf("cell_set_prim %u\n", prim);*/
+ return TRUE;
}
@@ -244,7 +250,7 @@ cell_vbuf_draw(struct vbuf_render *vbr,
#if 0
/* helpful for debug */
- cell_flush_int(&cell->pipe, CELL_FLUSH_WAIT);
+ cell_flush_int(cell, CELL_FLUSH_WAIT);
#endif
}
diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
index 2ece0250f6..566df7f59e 100644
--- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
+++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
@@ -297,10 +297,9 @@ void cell_update_vertex_fetch(struct draw_context *draw)
/* Each fetch function can be a maximum of 34 instructions (note: this is
- * actually a slight over-estimate). That means (34 * 4) = 136 bytes
- * each maximum.
+ * actually a slight over-estimate).
*/
- spe_init_func(p, 136 * unique_attr_formats);
+ spe_init_func(p, 34 * SPE_INST_SIZE * unique_attr_formats);
/* Allocate registers for the function's input parameters.
diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c
index 3658947715..2b10c116fa 100644
--- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c
+++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c
@@ -135,7 +135,7 @@ cell_vertex_shader_queue_flush(struct draw_context *draw)
vs->num_elts = n;
send_mbox_message(cell_global.spe_contexts[0], CELL_CMD_VS_EXECUTE);
- cell_flush_int(& cell->pipe, CELL_FLUSH_WAIT);
+ cell_flush_int(cell, CELL_FLUSH_WAIT);
}
draw->vs.post_nr = draw->vs.queue_nr;
diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile
index 8e83610790..1ae0dfb8c1 100644
--- a/src/gallium/drivers/cell/spu/Makefile
+++ b/src/gallium/drivers/cell/spu/Makefile
@@ -5,7 +5,7 @@
TOP = ../../../../..
-include $(TOP)/configs/linux-cell
+include $(TOP)/configs/current
PROG = g3d
@@ -22,12 +22,15 @@ SOURCES = \
spu_render.c \
spu_texture.c \
spu_tile.c \
- spu_tri.c \
+ spu_tri.c
+
+OLD_SOURCES = \
spu_exec.c \
spu_util.c \
spu_vertex_fetch.c \
spu_vertex_shader.c
+
SPU_OBJECTS = $(SOURCES:.c=.o) \
SPU_ASM_OUT = $(SOURCES:.c=.s) \
@@ -43,7 +46,7 @@ INCLUDE_DIRS = \
$(SPU_CC) $(SPU_CFLAGS) -c $<
.c.s:
- $(SPU_CC) $(SPU_CFLAGS) -S $<
+ $(SPU_CC) $(SPU_CFLAGS) -O3 -S $<
# The .a file will be linked into the main/PPU executable
diff --git a/src/gallium/drivers/cell/spu/spu_colorpack.h b/src/gallium/drivers/cell/spu/spu_colorpack.h
index e9fee8a3a6..fd8dc6ded3 100644
--- a/src/gallium/drivers/cell/spu/spu_colorpack.h
+++ b/src/gallium/drivers/cell/spu/spu_colorpack.h
@@ -79,14 +79,14 @@ spu_pack_color_shuffle(vector float rgba, vector unsigned char shuffle)
static INLINE vector float
-spu_unpack_color(uint color)
+spu_unpack_B8G8R8A8(uint color)
{
vector unsigned int color_u4 = spu_splats(color);
color_u4 = spu_shuffle(color_u4, color_u4,
((vector unsigned char) {
- 0, 0, 0, 0,
- 5, 5, 5, 5,
10, 10, 10, 10,
+ 5, 5, 5, 5,
+ 0, 0, 0, 0,
15, 15, 15, 15}) );
return spu_convtf(color_u4, 32);
}
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c
index 89c61136a4..e27df2dfb3 100644
--- a/src/gallium/drivers/cell/spu/spu_exec.c
+++ b/src/gallium/drivers/cell/spu/spu_exec.c
@@ -382,10 +382,10 @@ fetch_src_file_channel(
break;
case TGSI_FILE_IMMEDIATE:
- assert( index->i[0] < (int) mach->ImmLimit );
- assert( index->i[1] < (int) mach->ImmLimit );
- assert( index->i[2] < (int) mach->ImmLimit );
- assert( index->i[3] < (int) mach->ImmLimit );
+ ASSERT( index->i[0] < (int) mach->ImmLimit );
+ ASSERT( index->i[1] < (int) mach->ImmLimit );
+ ASSERT( index->i[2] < (int) mach->ImmLimit );
+ ASSERT( index->i[3] < (int) mach->ImmLimit );
chan->f[0] = mach->Imms[index->i[0]][swizzle];
chan->f[1] = mach->Imms[index->i[1]][swizzle];
@@ -409,7 +409,7 @@ fetch_src_file_channel(
break;
default:
- assert( 0 );
+ ASSERT( 0 );
}
break;
@@ -422,7 +422,7 @@ fetch_src_file_channel(
break;
default:
- assert( 0 );
+ ASSERT( 0 );
}
}
@@ -471,7 +471,7 @@ fetch_source(
index.q = si_shli(index.q, 12);
break;
default:
- assert( 0 );
+ ASSERT( 0 );
}
index.i[0] += reg->SrcRegisterDim.Index;
@@ -558,7 +558,7 @@ store_dest(
break;
default:
- assert( 0 );
+ ASSERT( 0 );
return;
}
@@ -582,11 +582,11 @@ store_dest(
break;
case TGSI_SAT_MINUS_PLUS_ONE:
- assert( 0 );
+ ASSERT( 0 );
break;
default:
- assert( 0 );
+ ASSERT( 0 );
}
}
@@ -769,7 +769,7 @@ exec_tex(struct spu_exec_machine *mach,
break;
default:
- assert (0);
+ ASSERT (0);
}
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
@@ -861,7 +861,7 @@ exec_declaration(struct spu_exec_machine *mach,
break;
default:
- assert( 0 );
+ ASSERT( 0 );
}
if( mask == TGSI_WRITEMASK_XYZW ) {
@@ -971,11 +971,11 @@ exec_instruction(
break;
case TGSI_OPCODE_EXP:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_LOG:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_MUL:
@@ -1151,24 +1151,24 @@ exec_instruction(
break;
case TGSI_OPCODE_CND:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_CND0:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_DOT2ADD:
/* TGSI_OPCODE_DP2A */
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_INDEX:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_NEGATE:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_FRAC:
@@ -1181,7 +1181,7 @@ exec_instruction(
break;
case TGSI_OPCODE_CLAMP:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_FLOOR:
@@ -1276,7 +1276,7 @@ exec_instruction(
break;
case TGSI_OPCODE_MULTIPLYMATRIX:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_ABS:
@@ -1290,7 +1290,7 @@ exec_instruction(
break;
case TGSI_OPCODE_RCC:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_DPH:
@@ -1353,23 +1353,23 @@ exec_instruction(
break;
case TGSI_OPCODE_PK2H:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_PK2US:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_PK4B:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_PK4UB:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_RFL:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_SEQ:
@@ -1384,7 +1384,7 @@ exec_instruction(
break;
case TGSI_OPCODE_SFL:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_SGT:
@@ -1429,7 +1429,7 @@ exec_instruction(
break;
case TGSI_OPCODE_STR:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_TEX:
@@ -1452,7 +1452,7 @@ exec_instruction(
/* src[1] = d[strq]/dx */
/* src[2] = d[strq]/dy */
/* src[3] = sampler unit */
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_TXL:
@@ -1470,35 +1470,35 @@ exec_instruction(
break;
case TGSI_OPCODE_UP2H:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_UP2US:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_UP4B:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_UP4UB:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_X2D:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_ARA:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_ARR:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_BRA:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_CAL:
@@ -1507,14 +1507,14 @@ exec_instruction(
/* do the call */
/* push the Cond, Loop, Cont stacks */
- assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
+ ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
mach->CondStack[mach->CondStackTop++] = mach->CondMask;
- assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
- assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
mach->ContStack[mach->ContStackTop++] = mach->ContMask;
- assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
+ ASSERT(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
/* note that PC was already incremented above */
@@ -1538,13 +1538,13 @@ exec_instruction(
*pc = mach->CallStack[--mach->CallStackTop];
/* pop the Cond, Loop, Cont stacks */
- assert(mach->CondStackTop > 0);
+ ASSERT(mach->CondStackTop > 0);
mach->CondMask = mach->CondStack[--mach->CondStackTop];
- assert(mach->LoopStackTop > 0);
+ ASSERT(mach->LoopStackTop > 0);
mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
- assert(mach->ContStackTop > 0);
+ ASSERT(mach->ContStackTop > 0);
mach->ContMask = mach->ContStack[--mach->ContStackTop];
- assert(mach->FuncStackTop > 0);
+ ASSERT(mach->FuncStackTop > 0);
mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
UPDATE_EXEC_MASK(mach);
@@ -1552,7 +1552,7 @@ exec_instruction(
break;
case TGSI_OPCODE_SSG:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_CMP:
@@ -1592,11 +1592,11 @@ exec_instruction(
break;
case TGSI_OPCODE_NRM:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_DIV:
- assert( 0 );
+ ASSERT( 0 );
break;
case TGSI_OPCODE_DP2:
@@ -1615,7 +1615,7 @@ exec_instruction(
case TGSI_OPCODE_IF:
/* push CondMask */
- assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
+ ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
mach->CondStack[mach->CondStackTop++] = mach->CondMask;
FETCH( &r[0], 0, CHAN_X );
/* update CondMask */
@@ -1639,7 +1639,7 @@ exec_instruction(
/* invert CondMask wrt previous mask */
{
uint prevMask;
- assert(mach->CondStackTop > 0);
+ ASSERT(mach->CondStackTop > 0);
prevMask = mach->CondStack[mach->CondStackTop - 1];
mach->CondMask = ~mach->CondMask & prevMask;
UPDATE_EXEC_MASK(mach);
@@ -1649,7 +1649,7 @@ exec_instruction(
case TGSI_OPCODE_ENDIF:
/* pop CondMask */
- assert(mach->CondStackTop > 0);
+ ASSERT(mach->CondStackTop > 0);
mach->CondMask = mach->CondStack[--mach->CondStackTop];
UPDATE_EXEC_MASK(mach);
break;
@@ -1660,19 +1660,19 @@ exec_instruction(
break;
case TGSI_OPCODE_REP:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_ENDREP:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_PUSHA:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_POPA:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_CEIL:
@@ -1746,7 +1746,7 @@ exec_instruction(
break;
case TGSI_OPCODE_MOD:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_XOR:
@@ -1759,15 +1759,15 @@ exec_instruction(
break;
case TGSI_OPCODE_SAD:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_TXF:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_TXQ:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_EMIT:
@@ -1784,9 +1784,9 @@ exec_instruction(
/* fall-through (for now) */
case TGSI_OPCODE_BGNLOOP2:
/* push LoopMask and ContMasks */
- assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
- assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
mach->ContStack[mach->ContStackTop++] = mach->ContMask;
break;
@@ -1794,7 +1794,7 @@ exec_instruction(
/* fall-through (for now at least) */
case TGSI_OPCODE_ENDLOOP2:
/* Restore ContMask, but don't pop */
- assert(mach->ContStackTop > 0);
+ ASSERT(mach->ContStackTop > 0);
mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
if (mach->LoopMask) {
/* repeat loop: jump to instruction just past BGNLOOP */
@@ -1802,10 +1802,10 @@ exec_instruction(
}
else {
/* exit loop: pop LoopMask */
- assert(mach->LoopStackTop > 0);
+ ASSERT(mach->LoopStackTop > 0);
mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
/* pop ContMask */
- assert(mach->ContStackTop > 0);
+ ASSERT(mach->ContStackTop > 0);
mach->ContMask = mach->ContStack[--mach->ContStackTop];
}
UPDATE_EXEC_MASK(mach);
@@ -1834,26 +1834,26 @@ exec_instruction(
break;
case TGSI_OPCODE_NOISE1:
- assert( 0 );
+ ASSERT( 0 );
break;
case TGSI_OPCODE_NOISE2:
- assert( 0 );
+ ASSERT( 0 );
break;
case TGSI_OPCODE_NOISE3:
- assert( 0 );
+ ASSERT( 0 );
break;
case TGSI_OPCODE_NOISE4:
- assert( 0 );
+ ASSERT( 0 );
break;
case TGSI_OPCODE_NOP:
break;
default:
- assert( 0 );
+ ASSERT( 0 );
}
}
@@ -1874,11 +1874,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach )
mach->FuncMask = 0xf;
mach->ExecMask = 0xf;
- mach->CondStackTop = 0; /* temporarily subvert this assertion */
- assert(mach->CondStackTop == 0);
- assert(mach->LoopStackTop == 0);
- assert(mach->ContStackTop == 0);
- assert(mach->CallStackTop == 0);
+ mach->CondStackTop = 0; /* temporarily subvert this ASSERTion */
+ ASSERT(mach->CondStackTop == 0);
+ ASSERT(mach->LoopStackTop == 0);
+ ASSERT(mach->ContStackTop == 0);
+ ASSERT(mach->CallStackTop == 0);
mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c
index e04ffeb9b1..2a7cb75f59 100644
--- a/src/gallium/drivers/cell/spu/spu_main.c
+++ b/src/gallium/drivers/cell/spu/spu_main.c
@@ -34,6 +34,7 @@
#include "spu_main.h"
#include "spu_render.h"
+#include "spu_per_fragment_op.h"
#include "spu_texture.h"
#include "spu_tile.h"
//#include "spu_test.h"
@@ -46,7 +47,7 @@
/*
helpful headers:
/usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h
-/opt/ibm/cell-sdk/prototype/sysroot/usr/include/libmisc.h
+/opt/cell/sdk/usr/include/libmisc.h
*/
boolean Debug = FALSE;
@@ -55,17 +56,13 @@ struct spu_global spu;
struct spu_vs_context draw;
-static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]
- ALIGN16_ATTRIB;
-static unsigned char depth_stencil_code_buffer[4 * 64]
- ALIGN16_ATTRIB;
-
-static unsigned char fb_blend_code_buffer[4 * 64]
+/**
+ * Buffers containing dynamically generated SPU code:
+ */
+static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]
ALIGN16_ATTRIB;
-static unsigned char logicop_code_buffer[4 * 64]
- ALIGN16_ATTRIB;
/**
@@ -136,54 +133,75 @@ really_clear_tiles(uint surfaceIndex)
static void
cmd_clear_surface(const struct cell_command_clear_surface *clear)
{
- const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
- uint i;
-
if (Debug)
printf("SPU %u: CLEAR SURF %u to 0x%08x\n", spu.init.id,
clear->surface, clear->value);
-#define CLEAR_OPT 1
-#if CLEAR_OPT
- /* set all tile's status to CLEAR */
if (clear->surface == 0) {
- memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status));
spu.fb.color_clear_value = clear->value;
+ if (spu.init.debug_flags & CELL_DEBUG_CHECKER) {
+ uint x = (spu.init.id << 4) | (spu.init.id << 12) |
+ (spu.init.id << 20) | (spu.init.id << 28);
+ spu.fb.color_clear_value ^= x;
+ }
}
else {
- memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status));
spu.fb.depth_clear_value = clear->value;
}
- return;
-#endif
+#define CLEAR_OPT 1
+#if CLEAR_OPT
+
+ /* Simply set all tiles' status to CLEAR.
+ * When we actually begin rendering into a tile, we'll initialize it to
+ * the clear value. If any tiles go untouched during the frame,
+ * really_clear_tiles() will set them to the clear value.
+ */
if (clear->surface == 0) {
- spu.fb.color_clear_value = clear->value;
- clear_c_tile(&spu.ctile);
+ memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status));
}
else {
- spu.fb.depth_clear_value = clear->value;
- clear_z_tile(&spu.ztile);
+ memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status));
}
+#else
+
+ /*
+ * This path clears the whole framebuffer to the clear color right now.
+ */
+
/*
printf("SPU: %s num=%d w=%d h=%d\n",
__FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles);
*/
- for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
- uint tx = i % spu.fb.width_tiles;
- uint ty = i / spu.fb.width_tiles;
- if (clear->surface == 0)
- put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
- else
- put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1);
- /* XXX we don't want this here, but it fixes bad tile results */
+ /* init a single tile to the clear value */
+ if (clear->surface == 0) {
+ clear_c_tile(&spu.ctile);
+ }
+ else {
+ clear_z_tile(&spu.ztile);
}
-#if 0
- wait_on_mask(1 << TAG_SURFACE_CLEAR);
-#endif
+ /* walk over my tiles, writing the 'clear' tile's data */
+ {
+ const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
+ uint i;
+ for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
+ uint tx = i % spu.fb.width_tiles;
+ uint ty = i / spu.fb.width_tiles;
+ if (clear->surface == 0)
+ put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
+ else
+ put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1);
+ }
+ }
+
+ if (spu.init.debug_flags & CELL_DEBUG_SYNC) {
+ wait_on_mask(1 << TAG_SURFACE_CLEAR);
+ }
+
+#endif /* CLEAR_OPT */
if (Debug)
printf("SPU %u: CLEAR SURF done\n", spu.init.id);
@@ -201,6 +219,31 @@ cmd_release_verts(const struct cell_command_release_verts *release)
}
+/**
+ * Process a CELL_CMD_STATE_FRAGMENT_OPS command.
+ * This involves installing new fragment ops SPU code.
+ * If this function is never called, we'll use a regular C fallback function
+ * for fragment processing.
+ */
+static void
+cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
+{
+ if (Debug)
+ printf("SPU %u: CMD_STATE_FRAGMENT_OPS\n", spu.init.id);
+ /* Copy SPU code from batch buffer to spu buffer */
+ memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
+ /* Copy state info */
+ memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
+ memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
+
+ /* Point function pointer at new code */
+ spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code;
+
+ spu.read_depth = spu.depth_stencil_alpha.depth.enabled;
+ spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled;
+}
+
+
static void
cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
{
@@ -227,87 +270,24 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
switch (spu.fb.depth_format) {
case PIPE_FORMAT_Z32_UNORM:
+ spu.fb.zsize = 4;
+ spu.fb.zscale = (float) 0xffffffffu;
+ break;
case PIPE_FORMAT_Z24S8_UNORM:
case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
spu.fb.zsize = 4;
+ spu.fb.zscale = (float) 0x00ffffffu;
break;
case PIPE_FORMAT_Z16_UNORM:
spu.fb.zsize = 2;
+ spu.fb.zscale = (float) 0xffffu;
break;
default:
spu.fb.zsize = 0;
break;
}
-
- if (spu.fb.color_format == PIPE_FORMAT_A8R8G8B8_UNORM)
- spu.color_shuffle = ((vector unsigned char) {
- 12, 0, 4, 8, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0});
- else if (spu.fb.color_format == PIPE_FORMAT_B8G8R8A8_UNORM)
- spu.color_shuffle = ((vector unsigned char) {
- 8, 4, 0, 12, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0});
- else
- ASSERT(0);
-}
-
-
-static void
-cmd_state_blend(const struct cell_command_blend *state)
-{
- if (Debug)
- printf("SPU %u: BLEND: enabled %d\n",
- spu.init.id,
- (state->size != 0));
-
- ASSERT_ALIGN16(state->base);
-
- if (state->size != 0) {
- mfc_get(fb_blend_code_buffer,
- (unsigned int) state->base, /* src */
- ROUNDUP16(state->size),
- TAG_BATCH_BUFFER,
- 0, /* tid */
- 0 /* rid */);
- wait_on_mask(1 << TAG_BATCH_BUFFER);
- spu.blend = (blend_func) fb_blend_code_buffer;
- spu.read_fb = state->read_fb;
- } else {
- spu.read_fb = FALSE;
- }
-}
-
-
-static void
-cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test *state)
-{
- if (Debug)
- printf("SPU %u: DEPTH_STENCIL: ztest %d\n",
- spu.init.id,
- state->read_depth);
-
- ASSERT_ALIGN16(state->base);
-
- if (state->size != 0) {
- mfc_get(depth_stencil_code_buffer,
- (unsigned int) state->base, /* src */
- ROUNDUP16(state->size),
- TAG_BATCH_BUFFER,
- 0, /* tid */
- 0 /* rid */);
- wait_on_mask(1 << TAG_BATCH_BUFFER);
- } else {
- /* If there is no code, emit a return instruction.
- */
- depth_stencil_code_buffer[0] = 0x35;
- depth_stencil_code_buffer[1] = 0x00;
- depth_stencil_code_buffer[2] = 0x00;
- depth_stencil_code_buffer[3] = 0x00;
- }
-
- spu.frag_test = (frag_test_func) depth_stencil_code_buffer;
- spu.read_depth = state->read_depth;
- spu.read_stencil = state->read_stencil;
}
@@ -381,6 +361,21 @@ cmd_state_vs_array_info(const struct cell_array_info *vs_info)
static void
+cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code)
+{
+ mfc_get(attribute_fetch_code_buffer,
+ (unsigned int) code->base, /* src */
+ code->size,
+ TAG_BATCH_BUFFER,
+ 0, /* tid */
+ 0 /* rid */);
+ wait_on_mask(1 << TAG_BATCH_BUFFER);
+
+ draw.vertex_fetch.code = attribute_fetch_code_buffer;
+}
+
+
+static void
cmd_finish(void)
{
if (Debug)
@@ -395,7 +390,9 @@ cmd_finish(void)
/**
- * Execute a batch of commands
+ * Execute a batch of commands which was sent to us by the PPU.
+ * See the cell_emit_state.c code to see where the commands come from.
+ *
* The opcode param encodes the location of the buffer and its size.
*/
static void
@@ -432,16 +429,14 @@ cmd_batch(uint opcode)
printf("SPU %u: release batch buf %u\n", spu.init.id, buf);
release_buffer(buf);
+ /*
+ * Loop over commands in the batch buffer
+ */
for (pos = 0; pos < usize; /* no incr */) {
switch (buffer[pos]) {
- case CELL_CMD_STATE_FRAMEBUFFER:
- {
- struct cell_command_framebuffer *fb
- = (struct cell_command_framebuffer *) &buffer[pos];
- cmd_state_framebuffer(fb);
- pos += sizeof(*fb) / 8;
- }
- break;
+ /*
+ * rendering commands
+ */
case CELL_CMD_CLEAR_SURFACE:
{
struct cell_command_clear_surface *clr
@@ -459,26 +454,24 @@ cmd_batch(uint opcode)
pos += pos_incr;
}
break;
- case CELL_CMD_RELEASE_VERTS:
+ /*
+ * state-update commands
+ */
+ case CELL_CMD_STATE_FRAMEBUFFER:
{
- struct cell_command_release_verts *release
- = (struct cell_command_release_verts *) &buffer[pos];
- cmd_release_verts(release);
- pos += sizeof(*release) / 8;
+ struct cell_command_framebuffer *fb
+ = (struct cell_command_framebuffer *) &buffer[pos];
+ cmd_state_framebuffer(fb);
+ pos += sizeof(*fb) / 8;
}
break;
- case CELL_CMD_FINISH:
- cmd_finish();
- pos += 1;
- break;
- case CELL_CMD_STATE_BLEND:
- cmd_state_blend((struct cell_command_blend *) &buffer[pos+1]);
- pos += (1 + ROUNDUP8(sizeof(struct cell_command_blend)) / 8);
- break;
- case CELL_CMD_STATE_DEPTH_STENCIL:
- cmd_state_depth_stencil((struct cell_command_depth_stencil_alpha_test *)
- &buffer[pos+1]);
- pos += (1 + ROUNDUP8(sizeof(struct cell_command_depth_stencil_alpha_test)) / 8);
+ case CELL_CMD_STATE_FRAGMENT_OPS:
+ {
+ struct cell_command_fragment_ops *fops
+ = (struct cell_command_fragment_ops *) &buffer[pos];
+ cmd_state_fragment_ops(fops);
+ pos += sizeof(*fops) / 8;
+ }
break;
case CELL_CMD_STATE_SAMPLER:
{
@@ -514,42 +507,32 @@ cmd_batch(uint opcode)
pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8);
break;
case CELL_CMD_STATE_BIND_VS:
+#if 0
spu_bind_vertex_shader(&draw,
(struct cell_shader_info *) &buffer[pos+1]);
+#endif
pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8);
break;
- case CELL_CMD_STATE_ATTRIB_FETCH: {
- struct cell_attribute_fetch_code *code =
- (struct cell_attribute_fetch_code *) &buffer[pos+1];
-
- mfc_get(attribute_fetch_code_buffer,
- (unsigned int) code->base, /* src */
- code->size,
- TAG_BATCH_BUFFER,
- 0, /* tid */
- 0 /* rid */);
- wait_on_mask(1 << TAG_BATCH_BUFFER);
-
- draw.vertex_fetch.code = attribute_fetch_code_buffer;
+ case CELL_CMD_STATE_ATTRIB_FETCH:
+ cmd_state_attrib_fetch((struct cell_attribute_fetch_code *)
+ &buffer[pos+1]);
pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8);
break;
- }
- case CELL_CMD_STATE_LOGICOP: {
- struct cell_command_logicop *code =
- (struct cell_command_logicop *) &buffer[pos+1];
-
- mfc_get(logicop_code_buffer,
- (unsigned int) code->base, /* src */
- code->size,
- TAG_BATCH_BUFFER,
- 0, /* tid */
- 0 /* rid */);
- wait_on_mask(1 << TAG_BATCH_BUFFER);
-
- spu.logicop = (logicop_func) logicop_code_buffer;
- pos += (1 + ROUNDUP8(sizeof(struct cell_command_logicop)) / 8);
+ /*
+ * misc commands
+ */
+ case CELL_CMD_FINISH:
+ cmd_finish();
+ pos += 1;
+ break;
+ case CELL_CMD_RELEASE_VERTS:
+ {
+ struct cell_command_release_verts *release
+ = (struct cell_command_release_verts *) &buffer[pos];
+ cmd_release_verts(release);
+ pos += sizeof(*release) / 8;
+ }
break;
- }
case CELL_CMD_FLUSH_BUFFER_RANGE: {
struct cell_buffer_range *br = (struct cell_buffer_range *)
&buffer[pos+1];
@@ -618,7 +601,9 @@ main_loop(void)
exitFlag = 1;
break;
case CELL_CMD_VS_EXECUTE:
+#if 0
spu_execute_vertex_shader(&draw, &cmd.vs);
+#endif
break;
case CELL_CMD_BATCH:
cmd_batch(opcode);
@@ -643,6 +628,11 @@ one_time_init(void)
memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status));
memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status));
invalidate_tex_cache();
+
+ /* Install default/fallback fragment processing function.
+ * This will normally be overriden by a code-gen'd function.
+ */
+ spu.fragment_ops = spu_fallback_fragment_ops;
}
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h
index e962e1426c..d40539da83 100644
--- a/src/gallium/drivers/cell/spu/spu_main.h
+++ b/src/gallium/drivers/cell/spu/spu_main.h
@@ -41,6 +41,10 @@
#define MAX_HEIGHT 1024
+/**
+ * A tile is basically a TILE_SIZE x TILE_SIZE block of 4-byte pixels.
+ * The data may be addressed through several different types.
+ */
typedef union {
ushort us[TILE_SIZE][TILE_SIZE];
uint ui[TILE_SIZE][TILE_SIZE];
@@ -56,38 +60,23 @@ typedef union {
#define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */
-struct spu_frag_test_results {
- qword mask;
- qword depth;
- qword stencil;
-};
-
-typedef struct spu_frag_test_results (*frag_test_func)(qword frag_mask,
- qword pixel_depth, qword pixel_stencil, qword frag_depth,
- qword frag_alpha, qword facing);
-
-
-struct spu_blend_results {
- qword r;
- qword g;
- qword b;
- qword a;
-};
+/** Function for sampling textures */
+typedef vector float (*spu_sample_texture_func)(uint unit,
+ vector float texcoord);
-typedef struct spu_blend_results (*blend_func)(
- qword frag_r, qword frag_g, qword frag_b, qword frag_a,
- qword pixel_r, qword pixel_g, qword pixel_b, qword pixel_a,
- qword const_r, qword const_g, qword const_b, qword const_a);
+/** Function for performing per-fragment ops */
+typedef void (*spu_fragment_ops_func)(uint x, uint y,
+ tile_t *colorTile,
+ tile_t *depthStencilTile,
+ vector float fragZ,
+ vector float fragRed,
+ vector float fragGreen,
+ vector float fragBlue,
+ vector float fragAlpha,
+ vector unsigned int mask);
-typedef struct spu_blend_results (*logicop_func)(
- qword pixel_r, qword pixel_g, qword pixel_b, qword pixel_a,
- qword frag_r, qword frag_g, qword frag_b, qword frag_a,
- qword frag_mask);
-
-
-typedef vector float (*sample_texture_func)(uint unit, vector float texcoord);
-
-struct spu_framebuffer {
+struct spu_framebuffer
+{
void *color_start; /**< addr of color surface in main memory */
void *depth_start; /**< addr of depth surface in main memory */
enum pipe_format color_format;
@@ -99,6 +88,7 @@ struct spu_framebuffer {
uint depth_clear_value;
uint zsize; /**< 0, 2 or 4 bytes per Z */
+ float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */
} ALIGN16_ATTRIB;
@@ -115,35 +105,31 @@ struct spu_texture
/**
- * All SPU global/context state will be in singleton object of this type:
+ * All SPU global/context state will be in a singleton object of this type:
*/
struct spu_global
{
+ /** One-time init/constant info */
struct cell_init_info init;
+ /*
+ * Current state
+ */
struct spu_framebuffer fb;
- boolean read_depth;
- boolean read_stencil;
- frag_test_func frag_test;
-
- boolean read_fb;
- blend_func blend;
- qword const_blend_color[4] ALIGN16_ATTRIB;
-
- logicop_func logicop;
-
+ struct pipe_depth_stencil_alpha_state depth_stencil_alpha;
+ struct pipe_blend_state blend;
struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS];
struct spu_texture texture[PIPE_MAX_SAMPLERS];
-
struct vertex_info vertex_info;
- /* XXX more state to come */
-
-
- /** current color and Z tiles */
+ /** Current color and Z tiles */
tile_t ctile ALIGN16_ATTRIB;
tile_t ztile ALIGN16_ATTRIB;
+ /** Read depth/stencil tiles? */
+ boolean read_depth;
+ boolean read_stencil;
+
/** Current tiles' status */
ubyte cur_ctile_status, cur_ztile_status;
@@ -151,11 +137,13 @@ struct spu_global
ubyte ctile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
ubyte ztile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
+ /** Current fragment ops machine code */
+ uint fragment_ops_code[SPU_MAX_FRAGMENT_OPS_INSTS];
+ /** Current fragment ops function */
+ spu_fragment_ops_func fragment_ops;
- /** for converting RGBA to PIPE_FORMAT_x colors */
- vector unsigned char color_shuffle;
-
- sample_texture_func sample_texture[CELL_MAX_SAMPLERS];
+ /** Current texture sampler function */
+ spu_sample_texture_func sample_texture[CELL_MAX_SAMPLERS];
} ALIGN16_ATTRIB;
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
index b4cffeeb32..03dd547845 100644
--- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
+++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
@@ -1,211 +1,475 @@
-/*
- * (C) Copyright IBM Corporation 2008
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
/**
- * \file spu_per_fragment_op.c
- * SPU implementation various per-fragment operations.
- *
- * \author Ian Romanick <idr@us.ibm.com>
+ * \author Brian Paul
*/
+
+#include <transpose_matrix4x4.h>
#include "pipe/p_format.h"
#include "spu_main.h"
+#include "spu_colorpack.h"
#include "spu_per_fragment_op.h"
-#define ZERO 0x80
-
-static void
-read_ds_quad(tile_t *buffer, unsigned x, unsigned y,
- enum pipe_format depth_format, qword *depth,
- qword *stencil)
-{
- const int ix = x / 2;
- const int iy = y / 2;
-
- switch (depth_format) {
- case PIPE_FORMAT_Z16_UNORM: {
- qword *ptr = (qword *) &buffer->us8[iy][ix / 2];
-
- const qword shuf_vec = (qword) {
- ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3,
- ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7
- };
+#define LINEAR_QUAD_LAYOUT 1
- /* At even X values we want the first 4 shorts, and at odd X values we
- * want the second 4 shorts.
- */
- qword bias = (qword) spu_splats((unsigned char) ((ix & 0x01) << 3));
- qword bias_mask = si_fsmbi(0x3333);
- qword sv = si_a(shuf_vec, si_and(bias_mask, bias));
-
- *depth = si_shufb(*ptr, *ptr, sv);
- *stencil = si_il(0);
- break;
- }
-
-
- case PIPE_FORMAT_Z32_UNORM: {
- qword *ptr = (qword *) &buffer->ui4[iy][ix];
-
- *depth = *ptr;
- *stencil = si_il(0);
- break;
- }
-
- case PIPE_FORMAT_Z24S8_UNORM: {
- qword *ptr = (qword *) &buffer->ui4[iy][ix];
- qword mask = si_fsmbi(0xEEEE);
-
- *depth = si_rotmai(si_and(*ptr, mask), -8);
- *stencil = si_andc(*ptr, mask);
- break;
+/**
+ * Called by rasterizer for each quad after the shader has run. Do
+ * all the per-fragment operations including alpha test, z test,
+ * stencil test, blend, colormask and logicops. This is a
+ * fallback/debug function. In reality we'll use a generated function
+ * produced by the PPU. But this function is useful for
+ * debug/validation.
+ */
+void
+spu_fallback_fragment_ops(uint x, uint y,
+ tile_t *colorTile,
+ tile_t *depthStencilTile,
+ vector float fragZ,
+ vector float fragR,
+ vector float fragG,
+ vector float fragB,
+ vector float fragA,
+ vector unsigned int mask)
+{
+ vector float frag_aos[4];
+ unsigned int c0, c1, c2, c3;
+
+ /* do alpha test */
+ if (spu.depth_stencil_alpha.alpha.enabled) {
+ vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref);
+ vector unsigned int amask;
+
+ switch (spu.depth_stencil_alpha.alpha.func) {
+ case PIPE_FUNC_LESS:
+ amask = spu_cmpgt(ref, fragA); /* mask = (fragA < ref) */
+ break;
+ case PIPE_FUNC_GREATER:
+ amask = spu_cmpgt(fragA, ref); /* mask = (fragA > ref) */
+ break;
+ case PIPE_FUNC_GEQUAL:
+ amask = spu_cmpgt(ref, fragA);
+ amask = spu_nor(amask, amask);
+ break;
+ case PIPE_FUNC_LEQUAL:
+ amask = spu_cmpgt(fragA, ref);
+ amask = spu_nor(amask, amask);
+ break;
+ case PIPE_FUNC_EQUAL:
+ amask = spu_cmpeq(ref, fragA);
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ amask = spu_cmpeq(ref, fragA);
+ amask = spu_nor(amask, amask);
+ break;
+ case PIPE_FUNC_ALWAYS:
+ amask = spu_splats(0xffffffffU);
+ break;
+ case PIPE_FUNC_NEVER:
+ amask = spu_splats( 0x0U);
+ break;
+ default:
+ ;
+ }
+
+ mask = spu_and(mask, amask);
}
-
- case PIPE_FORMAT_S8Z24_UNORM: {
- qword *ptr = (qword *) &buffer->ui4[iy][ix];
-
- *depth = si_and(*ptr, si_fsmbi(0x7777));
- *stencil = si_andi(si_roti(*ptr, 8), 0x0ff);
- break;
+ /* Z and/or stencil testing... */
+ if (spu.depth_stencil_alpha.depth.enabled ||
+ spu.depth_stencil_alpha.stencil[0].enabled) {
+
+ /* get four Z/Stencil values from tile */
+ vector unsigned int mask24 = spu_splats((unsigned int)0x00ffffffU);
+ vector unsigned int ifbZS = depthStencilTile->ui4[y/2][x/2];
+ vector unsigned int ifbZ = spu_and(ifbZS, mask24);
+ vector unsigned int ifbS = spu_andc(ifbZS, mask24);
+
+ if (spu.depth_stencil_alpha.stencil[0].enabled) {
+ /* do stencil test */
+ ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM);
+
+ }
+ else if (spu.depth_stencil_alpha.depth.enabled) {
+ /* do depth test */
+
+ ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM ||
+ spu.fb.depth_format == PIPE_FORMAT_X8Z24_UNORM);
+
+ vector unsigned int ifragZ;
+ vector unsigned int zmask;
+
+ /* convert four fragZ from float to uint */
+ fragZ = spu_mul(fragZ, spu_splats((float) 0xffffff));
+ ifragZ = spu_convtu(fragZ, 0);
+
+ /* do depth comparison, setting zmask with results */
+ switch (spu.depth_stencil_alpha.depth.func) {
+ case PIPE_FUNC_LESS:
+ zmask = spu_cmpgt(ifbZ, ifragZ); /* mask = (ifragZ < ifbZ) */
+ break;
+ case PIPE_FUNC_GREATER:
+ zmask = spu_cmpgt(ifragZ, ifbZ); /* mask = (ifbZ > ifragZ) */
+ break;
+ case PIPE_FUNC_GEQUAL:
+ zmask = spu_cmpgt(ifbZ, ifragZ);
+ zmask = spu_nor(zmask, zmask);
+ break;
+ case PIPE_FUNC_LEQUAL:
+ zmask = spu_cmpgt(ifragZ, ifbZ);
+ zmask = spu_nor(zmask, zmask);
+ break;
+ case PIPE_FUNC_EQUAL:
+ zmask = spu_cmpeq(ifbZ, ifragZ);
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ zmask = spu_cmpeq(ifbZ, ifragZ);
+ zmask = spu_nor(zmask, zmask);
+ break;
+ case PIPE_FUNC_ALWAYS:
+ zmask = spu_splats(0xffffffffU);
+ break;
+ case PIPE_FUNC_NEVER:
+ zmask = spu_splats( 0x0U);
+ break;
+ default:
+ ;
+ }
+
+ mask = spu_and(mask, zmask);
+
+ /* merge framebuffer Z and fragment Z according to the mask */
+ ifbZ = spu_or(spu_and(ifragZ, mask),
+ spu_andc(ifbZ, mask));
+ }
+
+ if (spu_extract(spu_orx(mask), 0)) {
+ /* put new fragment Z/Stencil values back into Z/Stencil tile */
+ depthStencilTile->ui4[y/2][x/2] = spu_or(ifbZ, ifbS);
+
+ spu.cur_ztile_status = TILE_STATUS_DIRTY;
+ }
}
-
- default:
- assert(0);
- break;
+ if (spu.blend.blend_enable) {
+ /* blending terms, misc regs */
+ vector float term1r, term1g, term1b, term1a;
+ vector float term2r, term2g, term2b, term2a;
+ vector float one, tmp;
+
+ vector float fbRGBA[4]; /* current framebuffer colors */
+
+ /* get colors from framebuffer/tile */
+ {
+ vector float fc[4];
+ uint c0, c1, c2, c3;
+
+#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */
+ c0 = colorTile->ui[y][x*2+0];
+ c1 = colorTile->ui[y][x*2+1];
+ c2 = colorTile->ui[y][x*2+2];
+ c3 = colorTile->ui[y][x*2+3];
+#else
+ c0 = colorTile->ui[y+0][x+0];
+ c1 = colorTile->ui[y+0][x+1];
+ c2 = colorTile->ui[y+1][x+0];
+ c3 = colorTile->ui[y+1][x+1];
+#endif
+ switch (spu.fb.color_format) {
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ fc[0] = spu_unpack_B8G8R8A8(c0);
+ fc[1] = spu_unpack_B8G8R8A8(c1);
+ fc[2] = spu_unpack_B8G8R8A8(c2);
+ fc[3] = spu_unpack_B8G8R8A8(c3);
+ break;
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ fc[0] = spu_unpack_A8R8G8B8(c0);
+ fc[1] = spu_unpack_A8R8G8B8(c1);
+ fc[2] = spu_unpack_A8R8G8B8(c2);
+ fc[3] = spu_unpack_A8R8G8B8(c3);
+ break;
+ default:
+ ASSERT(0);
+ }
+ _transpose_matrix4x4(fbRGBA, fc);
+ }
+
+ /*
+ * Compute Src RGB terms
+ */
+ switch (spu.blend.rgb_src_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ term1r = fragR;
+ term1g = fragG;
+ term1b = fragB;
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ term1r =
+ term1g =
+ term1b = spu_splats(0.0f);
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ term1r = spu_mul(fragR, fragR);
+ term1g = spu_mul(fragG, fragG);
+ term1b = spu_mul(fragB, fragB);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ term1r = spu_mul(fragR, fragA);
+ term1g = spu_mul(fragG, fragA);
+ term1b = spu_mul(fragB, fragA);
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Compute Src Alpha term
+ */
+ switch (spu.blend.alpha_src_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ term1a = fragA;
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ term1a = spu_splats(0.0f);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ term1a = spu_mul(fragA, fragA);
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Compute Dest RGB terms
+ */
+ switch (spu.blend.rgb_dst_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ term2r = fragR;
+ term2g = fragG;
+ term2b = fragB;
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ term2r =
+ term2g =
+ term2b = spu_splats(0.0f);
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ term2r = spu_mul(fbRGBA[0], fragR);
+ term2g = spu_mul(fbRGBA[1], fragG);
+ term2b = spu_mul(fbRGBA[2], fragB);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ term2r = spu_mul(fbRGBA[0], fragA);
+ term2g = spu_mul(fbRGBA[1], fragA);
+ term2b = spu_mul(fbRGBA[2], fragA);
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ one = spu_splats(1.0f);
+ tmp = spu_sub(one, fragA);
+ term2r = spu_mul(fbRGBA[0], tmp);
+ term2g = spu_mul(fbRGBA[1], tmp);
+ term2b = spu_mul(fbRGBA[2], tmp);
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Compute Dest Alpha term
+ */
+ switch (spu.blend.alpha_dst_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ term2a = fragA;
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ term2a = spu_splats(0.0f);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ term2a = spu_mul(fbRGBA[3], fragA);
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ one = spu_splats(1.0f);
+ tmp = spu_sub(one, fragA);
+ term2a = spu_mul(fbRGBA[3], tmp);
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Combine Src/Dest RGB terms
+ */
+ switch (spu.blend.rgb_func) {
+ case PIPE_BLEND_ADD:
+ fragR = spu_add(term1r, term2r);
+ fragG = spu_add(term1g, term2g);
+ fragB = spu_add(term1b, term2b);
+ break;
+ case PIPE_BLEND_SUBTRACT:
+ fragR = spu_sub(term1r, term2r);
+ fragG = spu_sub(term1g, term2g);
+ fragB = spu_sub(term1b, term2b);
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
+
+ /*
+ * Combine Src/Dest A term
+ */
+ switch (spu.blend.alpha_func) {
+ case PIPE_BLEND_ADD:
+ fragA = spu_add(term1a, term2a);
+ break;
+ case PIPE_BLEND_SUBTRACT:
+ fragA = spu_sub(term1a, term2a);
+ break;
+ /* XXX more cases */
+ default:
+ ASSERT(0);
+ }
}
-}
-
-
-static void
-write_ds_quad(tile_t *buffer, unsigned x, unsigned y,
- enum pipe_format depth_format,
- qword depth, qword stencil)
-{
- const int ix = x / 2;
- const int iy = y / 2;
-
- (void) stencil;
- switch (depth_format) {
- case PIPE_FORMAT_Z16_UNORM: {
- qword *ptr = (qword *) &buffer->us8[iy][ix / 2];
- qword sv = ((ix & 0x01) == 0)
- ? (qword) { 2, 3, 6, 7, 10, 11, 14, 15,
- 24, 25, 26, 27, 28, 29, 30, 31 }
- : (qword) { 16, 17, 18, 19, 20 , 21, 22, 23,
- 2, 3, 6, 7, 10, 11, 14, 15 };
- *ptr = si_shufb(depth, *ptr, sv);
- break;
+ /*
+ * Convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA.
+ */
+#if 0
+ /* original code */
+ {
+ vector float frag_soa[4];
+ frag_soa[0] = fragR;
+ frag_soa[1] = fragG;
+ frag_soa[2] = fragB;
+ frag_soa[3] = fragA;
+ _transpose_matrix4x4(frag_aos, frag_soa);
}
-
-
- case PIPE_FORMAT_Z32_UNORM: {
- qword *ptr = (qword *) &buffer->ui4[iy][ix];
- *ptr = depth;
+#else
+ /* short-cut relying on function parameter layout: */
+ _transpose_matrix4x4(frag_aos, &fragR);
+ (void) fragG;
+ (void) fragB;
+#endif
+
+ /*
+ * Pack float colors into 32-bit RGBA words.
+ */
+ switch (spu.fb.color_format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ c0 = spu_pack_A8R8G8B8(frag_aos[0]);
+ c1 = spu_pack_A8R8G8B8(frag_aos[1]);
+ c2 = spu_pack_A8R8G8B8(frag_aos[2]);
+ c3 = spu_pack_A8R8G8B8(frag_aos[3]);
break;
- }
-
- case PIPE_FORMAT_Z24S8_UNORM: {
- qword *ptr = (qword *) &buffer->ui4[iy][ix];
- qword mask = si_fsmbi(0xEEEE);
-
- depth = si_shli(depth, 8);
- *ptr = si_selb(stencil, depth, mask);
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ c0 = spu_pack_B8G8R8A8(frag_aos[0]);
+ c1 = spu_pack_B8G8R8A8(frag_aos[1]);
+ c2 = spu_pack_B8G8R8A8(frag_aos[2]);
+ c3 = spu_pack_B8G8R8A8(frag_aos[3]);
break;
+ default:
+ fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n");
+ ASSERT(0);
}
- case PIPE_FORMAT_S8Z24_UNORM: {
- qword *ptr = (qword *) &buffer->ui4[iy][ix];
- qword mask = si_fsmbi(0x7777);
-
- stencil = si_shli(stencil, 24);
- *ptr = si_selb(stencil, depth, mask);
- break;
+ /*
+ * Color masking
+ */
+ if (spu.blend.colormask != 0xf) {
+ /* XXX to do */
+ /* apply color mask to 32-bit packed colors */
}
- default:
- assert(0);
- break;
+ /*
+ * Logic Ops
+ */
+ if (spu.blend.logicop_enable) {
+ /* XXX to do */
+ /* apply logicop to 32-bit packed colors */
}
-}
-qword
-spu_do_depth_stencil(int x, int y,
- qword frag_mask, qword frag_depth, qword frag_alpha,
- qword facing)
-{
- struct spu_frag_test_results result;
- qword pixel_depth;
- qword pixel_stencil;
-
- /* All of this preable code (everthing before the call to frag_test) should
- * be generated on the PPU and upload to the SPU.
+ /*
+ * If mask is non-zero, mark tile as dirty.
*/
- if (spu.read_depth || spu.read_stencil) {
- read_ds_quad(&spu.ztile, x, y, spu.fb.depth_format,
- &pixel_depth, &pixel_stencil);
+ if (spu_extract(spu_orx(mask), 0)) {
+ spu.cur_ctile_status = TILE_STATUS_DIRTY;
}
-
- switch (spu.fb.depth_format) {
- case PIPE_FORMAT_Z16_UNORM:
- frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x0000ffffu)));
- frag_depth = si_cfltu(frag_depth, 0);
- break;
- case PIPE_FORMAT_Z32_UNORM:
- frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0xffffffffu)));
- frag_depth = si_cfltu(frag_depth, 0);
- break;
- case PIPE_FORMAT_Z24S8_UNORM:
- case PIPE_FORMAT_S8Z24_UNORM:
- frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x00ffffffu)));
- frag_depth = si_cfltu(frag_depth, 0);
- break;
- default:
- ASSERT(0);
- break;
+ else {
+ return;
}
- result = (*spu.frag_test)(frag_mask, pixel_depth, pixel_stencil,
- frag_depth, frag_alpha, facing);
-
- /* This code (everthing after the call to frag_test) should
- * be generated on the PPU and upload to the SPU.
+ /*
+ * Write new quad colors to the framebuffer/tile.
+ * Only write pixels where the corresponding mask word is set.
*/
- if (spu.read_depth || spu.read_stencil) {
- write_ds_quad(&spu.ztile, x, y, spu.fb.depth_format,
- result.depth, result.stencil);
- }
-
- return result.mask;
+#if LINEAR_QUAD_LAYOUT
+ /*
+ * Quad layout:
+ * +--+--+--+--+
+ * |p0|p1|p2|p3|
+ * +--+--+--+--+
+ */
+ if (spu_extract(mask, 0))
+ colorTile->ui[y][x*2] = c0;
+ if (spu_extract(mask, 1))
+ colorTile->ui[y][x*2+1] = c1;
+ if (spu_extract(mask, 2))
+ colorTile->ui[y][x*2+2] = c2;
+ if (spu_extract(mask, 3))
+ colorTile->ui[y][x*2+3] = c3;
+#else
+ /*
+ * Quad layout:
+ * +--+--+
+ * |p0|p1|
+ * +--+--+
+ * |p2|p3|
+ * +--+--+
+ */
+ if (spu_extract(mask, 0))
+ colorTile->ui[y+0][x+0] = c0;
+ if (spu_extract(mask, 1))
+ colorTile->ui[y+0][x+1] = c1;
+ if (spu_extract(mask, 2))
+ colorTile->ui[y+1][x+0] = c2;
+ if (spu_extract(mask, 3))
+ colorTile->ui[y+1][x+1] = c3;
+#endif
}
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h
index 6571258699..f817abf046 100644
--- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h
+++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h
@@ -1,32 +1,44 @@
-/*
- * (C) Copyright IBM Corporation 2008
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
#ifndef SPU_PER_FRAGMENT_OP
#define SPU_PER_FRAGMENT_OP
-extern qword
-spu_do_depth_stencil(int x, int y, qword frag_mask, qword frag_depth,
- qword frag_alpha, qword facing);
+
+extern void
+spu_fallback_fragment_ops(uint x, uint y,
+ tile_t *colorTile,
+ tile_t *depthStencilTile,
+ vector float fragZ,
+ vector float fragRed,
+ vector float fragGreen,
+ vector float fragBlue,
+ vector float fragAlpha,
+ vector unsigned int mask);
+
#endif /* SPU_PER_FRAGMENT_OP */
diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c
index 5051774f00..117b8a36f8 100644
--- a/src/gallium/drivers/cell/spu/spu_texture.c
+++ b/src/gallium/drivers/cell/spu/spu_texture.c
@@ -97,7 +97,7 @@ get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels)
const qword offset_y = si_andi((qword) y, 0x1f);
const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].tiles_per_row);
- const qword tile_size = (qword) spu_splats(sizeof(tile_t));
+ const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t));
qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x);
tile_offset = si_mpy((qword) tile_offset, tile_size);
diff --git a/src/gallium/drivers/cell/spu/spu_tile.c b/src/gallium/drivers/cell/spu/spu_tile.c
index 12dc246328..216a33126b 100644
--- a/src/gallium/drivers/cell/spu/spu_tile.c
+++ b/src/gallium/drivers/cell/spu/spu_tile.c
@@ -31,6 +31,9 @@
#include "spu_main.h"
+/**
+ * Get tile of color or Z values from main memory, put into SPU memory.
+ */
void
get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf)
{
@@ -56,6 +59,9 @@ get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf)
}
+/**
+ * Move tile of color or Z values from SPU memory to main memory.
+ */
void
put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf)
{
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c
index 2a4e0b423c..f02cdd1f76 100644
--- a/src/gallium/drivers/cell/spu/spu_tri.c
+++ b/src/gallium/drivers/cell/spu/spu_tri.c
@@ -38,7 +38,6 @@
#include "spu_texture.h"
#include "spu_tile.h"
#include "spu_tri.h"
-#include "spu_per_fragment_op.h"
/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
@@ -209,7 +208,7 @@ clip_emit_quad(struct setup_stage *setup)
/**
* Evaluate attribute coefficients (plane equations) to compute
* attribute values for the four fragments in a quad.
- * Eg: four colors will be compute.
+ * Eg: four colors will be computed (in AoS format).
*/
static INLINE void
eval_coeff(uint slot, float x, float y, vector float result[4])
@@ -255,31 +254,6 @@ eval_z(float x, float y)
}
-static INLINE mask_t
-do_depth_test(int x, int y, mask_t quadmask)
-{
- float4 zvals;
- mask_t mask;
-
- if (spu.fb.depth_format == PIPE_FORMAT_NONE)
- return quadmask;
-
- zvals.v = eval_z((float) x, (float) y);
-
- mask = (mask_t) spu_do_depth_stencil(x - setup.cliprect_minx,
- y - setup.cliprect_miny,
- (qword) quadmask,
- (qword) zvals.v,
- (qword) spu_splats((unsigned char) 0x0ffu),
- (qword) spu_splats((unsigned int) 0x01u));
-
- if (spu_extract(spu_orx(mask), 0))
- spu.cur_ztile_status = TILE_STATUS_DIRTY;
-
- return mask;
-}
-
-
/**
* Emit a quad (pass to next stage). No clipping is done.
* Note: about 1/5 to 1/7 of the time, mask is zero and this function
@@ -289,18 +263,6 @@ do_depth_test(int x, int y, mask_t quadmask)
static INLINE void
emit_quad( int x, int y, mask_t mask )
{
-#if 0
- struct softpipe_context *sp = setup.softpipe;
- setup.quad.x0 = x;
- setup.quad.y0 = y;
- setup.quad.mask = mask;
- sp->quad.first->run(sp->quad.first, &setup.quad);
-#else
-
- if (spu.read_depth) {
- mask = do_depth_test(x, y, mask);
- }
-
/* If any bits in mask are set... */
if (spu_extract(spu_orx(mask), 0)) {
const int ix = x - setup.cliprect_minx;
@@ -308,6 +270,7 @@ emit_quad( int x, int y, mask_t mask )
vector float colors[4];
spu.cur_ctile_status = TILE_STATUS_DIRTY;
+ spu.cur_ztile_status = TILE_STATUS_DIRTY;
if (spu.texture[0].start) {
/* texture mapping */
@@ -355,55 +318,29 @@ emit_quad( int x, int y, mask_t mask )
}
- /* Convert fragment data from AoS to SoA format.
- */
- qword soa_frag[4];
- _transpose_matrix4x4((vec_float4 *) soa_frag, colors);
+ {
+ /* Convert fragment data from AoS to SoA format.
+ * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA)
+ * This is temporary!
+ */
+ vector float soa_frag[4];
+ _transpose_matrix4x4(soa_frag, colors);
- /* Read the current framebuffer values.
- */
- const qword pix[4] = {
- (qword) spu_splats(spu.ctile.ui[iy+0][ix+0]),
- (qword) spu_splats(spu.ctile.ui[iy+0][ix+1]),
- (qword) spu_splats(spu.ctile.ui[iy+1][ix+0]),
- (qword) spu_splats(spu.ctile.ui[iy+1][ix+1]),
- };
+ float4 fragZ;
- qword soa_pix[4];
+ fragZ.v = eval_z((float) x, (float) y);
- if (spu.read_fb) {
- /* Convert pixel data from AoS to SoA format.
+ /* Do all per-fragment/quad operations here, including:
+ * alpha test, z test, stencil test, blend and framebuffer writing.
*/
- vec_float4 aos_pix[4] = {
- spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+0]),
- spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+1]),
- spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+0]),
- spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+1]),
- };
-
- _transpose_matrix4x4((vec_float4 *) soa_pix, aos_pix);
+ spu.fragment_ops(ix, iy, &spu.ctile, &spu.ztile,
+ fragZ.v,
+ soa_frag[0], soa_frag[1],
+ soa_frag[2], soa_frag[3],
+ mask);
}
-
- struct spu_blend_results result =
- (*spu.blend)(soa_frag[0], soa_frag[1], soa_frag[2], soa_frag[3],
- soa_pix[0], soa_pix[1], soa_pix[2], soa_pix[3],
- spu.const_blend_color[0], spu.const_blend_color[1],
- spu.const_blend_color[2], spu.const_blend_color[3]);
-
-
- /* Convert final pixel data from SoA to AoS format.
- */
- result = (*spu.logicop)(pix[0], pix[1], pix[2], pix[3],
- result.r, result.g, result.b, result.a,
- (qword) mask);
-
- spu.ctile.ui[iy+0][ix+0] = spu_extract((vec_uint4) result.r, 0);
- spu.ctile.ui[iy+0][ix+1] = spu_extract((vec_uint4) result.g, 0);
- spu.ctile.ui[iy+1][ix+0] = spu_extract((vec_uint4) result.b, 0);
- spu.ctile.ui[iy+1][ix+1] = spu_extract((vec_uint4) result.a, 0);
}
-#endif
}
diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c
index b25ca4eafc..b8a0d4a265 100644
--- a/src/gallium/drivers/cell/spu/spu_util.c
+++ b/src/gallium/drivers/cell/spu/spu_util.c
@@ -1,4 +1,5 @@
+#include "cell/common.h"
#include "pipe/p_shader_tokens.h"
#include "pipe/p_debug.h"
#include "tgsi/tgsi_parse.h"
@@ -20,7 +21,7 @@ tgsi_util_get_src_register_swizzle(
case 3:
return reg->SwizzleW;
default:
- assert( 0 );
+ ASSERT( 0 );
}
return 0;
}
@@ -40,7 +41,7 @@ tgsi_util_get_src_register_extswizzle(
case 3:
return reg->ExtSwizzleW;
default:
- assert( 0 );
+ ASSERT( 0 );
}
return 0;
}
@@ -60,12 +61,12 @@ tgsi_util_get_full_src_register_extswizzle(
&reg->SrcRegisterExtSwz,
component );
- assert (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X);
- assert (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y);
- assert (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z);
- assert (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W);
- assert (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W);
- assert (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W);
+ ASSERT (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X);
+ ASSERT (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y);
+ ASSERT (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z);
+ ASSERT (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W);
+ ASSERT (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W);
+ ASSERT (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W);
/*
* Second, calculate the simple swizzle for the unswizzled channel index.
@@ -95,7 +96,7 @@ tgsi_util_get_src_register_extnegate(
case 3:
return reg->NegateW;
default:
- assert( 0 );
+ ASSERT( 0 );
}
return 0;
}
@@ -120,7 +121,7 @@ tgsi_util_set_src_register_extnegate(
reg->NegateW = negate;
break;
default:
- assert( 0 );
+ ASSERT( 0 );
}
}
diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
index 26f2363749..03375d84a5 100644
--- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
+++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
@@ -92,7 +92,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw,
unsigned nr_attrs = draw->vertex_fetch.nr_attrs;
unsigned attr;
- assert(count <= 4);
+ ASSERT(count <= 4);
#if DRAW_DBG
printf("SPU: %s count = %u, nr_attrs = %u\n",
diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c
index f81d19fea1..fbe5b34d39 100644
--- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c
+++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c
@@ -112,7 +112,7 @@ run_vertex_program(struct spu_vs_context *draw,
const float *scale = draw->viewport.scale;
const float *trans = draw->viewport.translate;
- assert(count <= 4);
+ ASSERT(count <= 4);
machine->Processor = TGSI_PROCESSOR_VERTEX;
diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c
index c6776716a2..6dd3eda85d 100644
--- a/src/gallium/drivers/i915simple/i915_context.c
+++ b/src/gallium/drivers/i915simple/i915_context.c
@@ -35,6 +35,7 @@
#include "draw/draw_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_winsys.h"
+#include "pipe/p_inlines.h"
#include "util/u_memory.h"
#include "pipe/p_screen.h"
@@ -72,7 +73,7 @@ i915_draw_range_elements(struct pipe_context *pipe,
*/
for (i = 0; i < i915->num_vertex_buffers; i++) {
void *buf
- = pipe->winsys->buffer_map(pipe->winsys,
+ = pipe_buffer_map(pipe->screen,
i915->vertex_buffer[i].buffer,
PIPE_BUFFER_USAGE_CPU_READ);
draw_set_mapped_vertex_buffer(draw, i, buf);
@@ -80,7 +81,7 @@ i915_draw_range_elements(struct pipe_context *pipe,
/* Map index buffer, if present */
if (indexBuffer) {
void *mapped_indexes
- = pipe->winsys->buffer_map(pipe->winsys, indexBuffer,
+ = pipe_buffer_map(pipe->screen, indexBuffer,
PIPE_BUFFER_USAGE_CPU_READ);
draw_set_mapped_element_buffer_range(draw, indexSize,
min_index,
@@ -105,11 +106,11 @@ i915_draw_range_elements(struct pipe_context *pipe,
* unmap vertex/index buffers
*/
for (i = 0; i < i915->num_vertex_buffers; i++) {
- pipe->winsys->buffer_unmap(pipe->winsys, i915->vertex_buffer[i].buffer);
+ pipe_buffer_unmap(pipe->screen, i915->vertex_buffer[i].buffer);
draw_set_mapped_vertex_buffer(draw, i, NULL);
}
if (indexBuffer) {
- pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer);
+ pipe_buffer_unmap(pipe->screen, indexBuffer);
draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL);
}
diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c
index e4ece55098..4fda1ab64f 100644
--- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c
+++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c
@@ -115,7 +115,7 @@ i915_vbuf_render_allocate_vertices( struct vbuf_render *render,
{
struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
struct i915_context *i915 = i915_render->i915;
- struct pipe_winsys *winsys = i915->pipe.winsys;
+ struct pipe_screen *screen = i915->pipe.screen;
size_t size = (size_t)vertex_size * (size_t)nr_vertices;
/* FIXME: handle failure */
@@ -124,20 +124,20 @@ i915_vbuf_render_allocate_vertices( struct vbuf_render *render,
if (i915_render->vbo_size > size + i915_render->vbo_offset && !i915->vbo_flushed) {
} else {
i915->vbo_flushed = 0;
- pipe_buffer_reference(winsys, &i915_render->vbo, NULL);
+ pipe_buffer_reference(screen, &i915_render->vbo, NULL);
}
if (!i915_render->vbo) {
i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size);
i915_render->vbo_offset = 0;
- i915_render->vbo = winsys->buffer_create(winsys,
- 64,
- I915_BUFFER_USAGE_LIT_VERTEX,
- i915_render->vbo_size);
- i915_render->vbo_ptr = winsys->buffer_map(winsys,
- i915_render->vbo,
- PIPE_BUFFER_USAGE_CPU_WRITE);
- winsys->buffer_unmap(winsys, i915_render->vbo);
+ i915_render->vbo = pipe_buffer_create(screen,
+ 64,
+ I915_BUFFER_USAGE_LIT_VERTEX,
+ i915_render->vbo_size);
+ i915_render->vbo_ptr = pipe_buffer_map(screen,
+ i915_render->vbo,
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+ pipe_buffer_unmap(screen, i915_render->vbo);
}
i915->vbo = i915_render->vbo;
@@ -488,7 +488,7 @@ static struct vbuf_render *
i915_vbuf_render_create( struct i915_context *i915 )
{
struct i915_vbuf_render *i915_render = CALLOC_STRUCT(i915_vbuf_render);
- struct pipe_winsys *winsys = i915->pipe.winsys;
+ struct pipe_screen *screen = i915->pipe.screen;
i915_render->i915 = i915;
@@ -510,14 +510,14 @@ i915_vbuf_render_create( struct i915_context *i915 )
i915_render->vbo_alloc_size = 128 * 4096;
i915_render->vbo_size = i915_render->vbo_alloc_size;
i915_render->vbo_offset = 0;
- i915_render->vbo = winsys->buffer_create(winsys,
- 64,
- I915_BUFFER_USAGE_LIT_VERTEX,
- i915_render->vbo_size);
- i915_render->vbo_ptr = winsys->buffer_map(winsys,
- i915_render->vbo,
- PIPE_BUFFER_USAGE_CPU_WRITE);
- winsys->buffer_unmap(winsys, i915_render->vbo);
+ i915_render->vbo = pipe_buffer_create(screen,
+ 64,
+ I915_BUFFER_USAGE_LIT_VERTEX,
+ i915_render->vbo_size);
+ i915_render->vbo_ptr = pipe_buffer_map(screen,
+ i915_render->vbo,
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+ pipe_buffer_unmap(screen, i915_render->vbo);
return &i915_render->base;
}
diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c
index e9e40c3f0b..1c976082df 100644
--- a/src/gallium/drivers/i915simple/i915_screen.c
+++ b/src/gallium/drivers/i915simple/i915_screen.c
@@ -28,6 +28,7 @@
#include "util/u_memory.h"
#include "pipe/p_winsys.h"
+#include "pipe/p_inlines.h"
#include "util/u_string.h"
#include "i915_reg.h"
@@ -207,7 +208,7 @@ i915_surface_map( struct pipe_screen *screen,
struct pipe_surface *surface,
unsigned flags )
{
- char *map = screen->winsys->buffer_map( screen->winsys, surface->buffer, flags );
+ char *map = pipe_buffer_map( screen, surface->buffer, flags );
if (map == NULL)
return NULL;
@@ -226,7 +227,7 @@ static void
i915_surface_unmap(struct pipe_screen *screen,
struct pipe_surface *surface)
{
- screen->winsys->buffer_unmap( screen->winsys, surface->buffer );
+ pipe_buffer_unmap( screen, surface->buffer );
}
diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c
index 32344da4d5..bd87217063 100644
--- a/src/gallium/drivers/i915simple/i915_texture.c
+++ b/src/gallium/drivers/i915simple/i915_texture.c
@@ -80,7 +80,7 @@ static unsigned
power_of_two(unsigned x)
{
unsigned value = 1;
- while (value <= x)
+ while (value < x)
value = value << 1;
return value;
}
@@ -207,7 +207,7 @@ i945_miptree_layout_2d( struct i915_texture *tex )
unsigned nblocksy = pt->nblocksy[0];
#if 0 /* used for tiled display targets */
- if (pt->last_level == 0 && pt->cpp == 4)
+ if (pt->last_level == 0 && pt->block.size == 4)
if (i915_displaytarget_layout(tex))
return;
#endif
@@ -645,7 +645,7 @@ i915_texture_release(struct pipe_screen *screen,
DBG("%s deleting %p\n", __FUNCTION__, (void *) tex);
*/
- pipe_buffer_reference(screen->winsys, &tex->buffer, NULL);
+ pipe_buffer_reference(screen, &tex->buffer, NULL);
for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++)
if (tex->image_offset[i])
@@ -684,7 +684,7 @@ i915_get_tex_surface(struct pipe_screen *screen,
ps->refcount = 1;
ps->winsys = ws;
pipe_texture_reference(&ps->texture, pt);
- pipe_buffer_reference(ws, &ps->buffer, tex->buffer);
+ pipe_buffer_reference(screen, &ps->buffer, tex->buffer);
ps->format = pt->format;
ps->width = pt->width[level];
ps->height = pt->height[level];
@@ -728,7 +728,7 @@ i915_texture_blanket(struct pipe_screen * screen,
i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1);
i915_miptree_set_image_offset(tex, 0, 0, 0, 0);
- pipe_buffer_reference(screen->winsys, &tex->buffer, buffer);
+ pipe_buffer_reference(screen, &tex->buffer, buffer);
return &tex->base;
}
@@ -756,7 +756,7 @@ i915_tex_surface_release(struct pipe_screen *screen,
}
pipe_texture_reference(&surf->texture, NULL);
- pipe_buffer_reference(screen->winsys, &surf->buffer, NULL);
+ pipe_buffer_reference(screen, &surf->buffer, NULL);
FREE(surf);
}
diff --git a/src/gallium/drivers/i965simple/brw_state_pool.c b/src/gallium/drivers/i965simple/brw_state_pool.c
index 78d4c0e411..007dc8f9de 100644
--- a/src/gallium/drivers/i965simple/brw_state_pool.c
+++ b/src/gallium/drivers/i965simple/brw_state_pool.c
@@ -92,10 +92,10 @@ static void brw_init_pool( struct brw_context *brw,
pool->size = size;
pool->brw = brw;
- pool->buffer = brw->pipe.winsys->buffer_create(brw->pipe.winsys,
- 4096,
- 0 /* DRM_BO_FLAG_MEM_TT */,
- size);
+ pool->buffer = pipe_buffer_create(brw->pipe.screen,
+ 4096,
+ 0 /* DRM_BO_FLAG_MEM_TT */,
+ size);
}
static void brw_destroy_pool( struct brw_context *brw,
@@ -103,7 +103,7 @@ static void brw_destroy_pool( struct brw_context *brw,
{
struct brw_mem_pool *pool = &brw->pool[pool_id];
- pipe_buffer_reference( pool->brw->pipe.winsys,
+ pipe_buffer_reference( pool->brw->pipe.screen,
&pool->buffer,
NULL );
}
diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c
index 05eda9d1f2..cc0c665e02 100644
--- a/src/gallium/drivers/i965simple/brw_tex_layout.c
+++ b/src/gallium/drivers/i965simple/brw_tex_layout.c
@@ -330,7 +330,7 @@ brw_texture_release_screen(struct pipe_screen *screen,
DBG("%s deleting %p\n", __FUNCTION__, (void *) tex);
*/
- pipe_buffer_reference(ws, &tex->buffer, NULL);
+ winsys_buffer_reference(ws, &tex->buffer, NULL);
for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++)
if (tex->image_offset[i])
@@ -369,7 +369,7 @@ brw_get_tex_surface_screen(struct pipe_screen *screen,
if (ps) {
assert(ps->format);
assert(ps->refcount);
- pipe_buffer_reference(ws, &ps->buffer, tex->buffer);
+ winsys_buffer_reference(ws, &ps->buffer, tex->buffer);
ps->format = pt->format;
ps->width = pt->width[level];
ps->height = pt->height[level];
diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c
index 1236706891..dfa46c9fb7 100644
--- a/src/gallium/drivers/softpipe/sp_clear.c
+++ b/src/gallium/drivers/softpipe/sp_clear.c
@@ -31,6 +31,7 @@
#include "pipe/p_defines.h"
+#include "util/u_pack_color.h"
#include "sp_clear.h"
#include "sp_context.h"
#include "sp_surface.h"
@@ -39,8 +40,28 @@
/**
+ * Convert packed pixel from one format to another.
+ */
+static unsigned
+convert_color(enum pipe_format srcFormat, unsigned srcColor,
+ enum pipe_format dstFormat)
+{
+ ubyte r, g, b, a;
+ unsigned dstColor;
+
+ util_unpack_color_ub(srcFormat, &srcColor, &r, &g, &b, &a);
+ util_pack_color_ub(r, g, b, a, dstFormat, &dstColor);
+
+ return dstColor;
+}
+
+
+
+/**
* Clear the given surface to the specified value.
* No masking, no scissor (clear entire buffer).
+ * Note: when clearing a color buffer, the clearValue is always
+ * encoded as PIPE_FORMAT_A8R8G8B8_UNORM.
*/
void
softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps,
@@ -66,7 +87,15 @@ softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps,
for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) {
if (ps == sp_tile_cache_get_surface(softpipe->cbuf_cache[i])) {
- sp_tile_cache_clear(softpipe->cbuf_cache[i], clearValue);
+ unsigned cv;
+ if (ps->format != PIPE_FORMAT_A8R8G8B8_UNORM) {
+ cv = convert_color(PIPE_FORMAT_A8R8G8B8_UNORM, clearValue,
+ ps->format);
+ }
+ else {
+ cv = clearValue;
+ }
+ sp_tile_cache_clear(softpipe->cbuf_cache[i], cv);
softpipe->framebuffer.cbufs[i]->status = PIPE_SURFACE_STATUS_CLEAR;
}
}
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index dda90f760a..cd1e6663d8 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -92,17 +92,19 @@ static void softpipe_destroy( struct pipe_context *pipe )
if (softpipe->draw)
draw_destroy( softpipe->draw );
- softpipe->quad.polygon_stipple->destroy( softpipe->quad.polygon_stipple );
- softpipe->quad.earlyz->destroy( softpipe->quad.earlyz );
- softpipe->quad.shade->destroy( softpipe->quad.shade );
- softpipe->quad.alpha_test->destroy( softpipe->quad.alpha_test );
- softpipe->quad.depth_test->destroy( softpipe->quad.depth_test );
- softpipe->quad.stencil_test->destroy( softpipe->quad.stencil_test );
- softpipe->quad.occlusion->destroy( softpipe->quad.occlusion );
- softpipe->quad.coverage->destroy( softpipe->quad.coverage );
- softpipe->quad.blend->destroy( softpipe->quad.blend );
- softpipe->quad.colormask->destroy( softpipe->quad.colormask );
- softpipe->quad.output->destroy( softpipe->quad.output );
+ for (i = 0; i < SP_NUM_QUAD_THREADS; i++) {
+ softpipe->quad[i].polygon_stipple->destroy( softpipe->quad[i].polygon_stipple );
+ softpipe->quad[i].earlyz->destroy( softpipe->quad[i].earlyz );
+ softpipe->quad[i].shade->destroy( softpipe->quad[i].shade );
+ softpipe->quad[i].alpha_test->destroy( softpipe->quad[i].alpha_test );
+ softpipe->quad[i].depth_test->destroy( softpipe->quad[i].depth_test );
+ softpipe->quad[i].stencil_test->destroy( softpipe->quad[i].stencil_test );
+ softpipe->quad[i].occlusion->destroy( softpipe->quad[i].occlusion );
+ softpipe->quad[i].coverage->destroy( softpipe->quad[i].coverage );
+ softpipe->quad[i].blend->destroy( softpipe->quad[i].blend );
+ softpipe->quad[i].colormask->destroy( softpipe->quad[i].colormask );
+ softpipe->quad[i].output->destroy( softpipe->quad[i].output );
+ }
for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
sp_destroy_tile_cache(softpipe->cbuf_cache[i]);
@@ -113,7 +115,7 @@ static void softpipe_destroy( struct pipe_context *pipe )
for (i = 0; i < Elements(softpipe->constants); i++) {
if (softpipe->constants[i].buffer) {
- pipe_buffer_reference(ws, &softpipe->constants[i].buffer, NULL);
+ winsys_buffer_reference(ws, &softpipe->constants[i].buffer, NULL);
}
}
@@ -205,17 +207,19 @@ softpipe_create( struct pipe_screen *screen,
/* setup quad rendering stages */
- softpipe->quad.polygon_stipple = sp_quad_polygon_stipple_stage(softpipe);
- softpipe->quad.earlyz = sp_quad_earlyz_stage(softpipe);
- softpipe->quad.shade = sp_quad_shade_stage(softpipe);
- softpipe->quad.alpha_test = sp_quad_alpha_test_stage(softpipe);
- softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe);
- softpipe->quad.stencil_test = sp_quad_stencil_test_stage(softpipe);
- softpipe->quad.occlusion = sp_quad_occlusion_stage(softpipe);
- softpipe->quad.coverage = sp_quad_coverage_stage(softpipe);
- softpipe->quad.blend = sp_quad_blend_stage(softpipe);
- softpipe->quad.colormask = sp_quad_colormask_stage(softpipe);
- softpipe->quad.output = sp_quad_output_stage(softpipe);
+ for (i = 0; i < SP_NUM_QUAD_THREADS; i++) {
+ softpipe->quad[i].polygon_stipple = sp_quad_polygon_stipple_stage(softpipe);
+ softpipe->quad[i].earlyz = sp_quad_earlyz_stage(softpipe);
+ softpipe->quad[i].shade = sp_quad_shade_stage(softpipe);
+ softpipe->quad[i].alpha_test = sp_quad_alpha_test_stage(softpipe);
+ softpipe->quad[i].depth_test = sp_quad_depth_test_stage(softpipe);
+ softpipe->quad[i].stencil_test = sp_quad_stencil_test_stage(softpipe);
+ softpipe->quad[i].occlusion = sp_quad_occlusion_stage(softpipe);
+ softpipe->quad[i].coverage = sp_quad_coverage_stage(softpipe);
+ softpipe->quad[i].blend = sp_quad_blend_stage(softpipe);
+ softpipe->quad[i].colormask = sp_quad_colormask_stage(softpipe);
+ softpipe->quad[i].output = sp_quad_output_stage(softpipe);
+ }
/*
* Create drawing context and plug our rendering stage into it.
@@ -257,3 +261,4 @@ softpipe_create( struct pipe_screen *screen,
softpipe_destroy(&softpipe->pipe);
return NULL;
}
+
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index 078886f93c..2b9a2a8ee5 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -45,6 +45,10 @@
*/
#define USE_DRAW_STAGE_PSTIPPLE 1
+/* Number of threads working on individual quads.
+ * Setting to 1 disables this feature.
+ */
+#define SP_NUM_QUAD_THREADS 1
struct softpipe_winsys;
struct softpipe_vbuf_render;
@@ -133,7 +137,7 @@ struct softpipe_context {
struct quad_stage *output;
struct quad_stage *first; /**< points to one of the above stages */
- } quad;
+ } quad[SP_NUM_QUAD_THREADS];
/** The primitive drawing context */
struct draw_context *draw;
@@ -151,13 +155,11 @@ struct softpipe_context {
};
-
-
static INLINE struct softpipe_context *
softpipe_context( struct pipe_context *pipe )
{
return (struct softpipe_context *)pipe;
}
-
#endif /* SP_CONTEXT_H */
+
diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c
index 12b44a8211..424bd56846 100644
--- a/src/gallium/drivers/softpipe/sp_draw_arrays.c
+++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c
@@ -34,6 +34,7 @@
#include "pipe/p_defines.h"
#include "pipe/p_context.h"
#include "pipe/p_winsys.h"
+#include "pipe/p_inlines.h"
#include "sp_context.h"
#include "sp_state.h"
@@ -135,7 +136,7 @@ softpipe_draw_range_elements(struct pipe_context *pipe,
*/
for (i = 0; i < sp->num_vertex_buffers; i++) {
void *buf
- = pipe->winsys->buffer_map(pipe->winsys,
+ = pipe_buffer_map(pipe->screen,
sp->vertex_buffer[i].buffer,
PIPE_BUFFER_USAGE_CPU_READ);
draw_set_mapped_vertex_buffer(draw, i, buf);
@@ -143,7 +144,7 @@ softpipe_draw_range_elements(struct pipe_context *pipe,
/* Map index buffer, if present */
if (indexBuffer) {
void *mapped_indexes
- = pipe->winsys->buffer_map(pipe->winsys, indexBuffer,
+ = pipe_buffer_map(pipe->screen, indexBuffer,
PIPE_BUFFER_USAGE_CPU_READ);
draw_set_mapped_element_buffer_range(draw, indexSize,
min_index,
@@ -164,11 +165,11 @@ softpipe_draw_range_elements(struct pipe_context *pipe,
*/
for (i = 0; i < sp->num_vertex_buffers; i++) {
draw_set_mapped_vertex_buffer(draw, i, NULL);
- pipe->winsys->buffer_unmap(pipe->winsys, sp->vertex_buffer[i].buffer);
+ pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer);
}
if (indexBuffer) {
draw_set_mapped_element_buffer(draw, 0, NULL);
- pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer);
+ pipe_buffer_unmap(pipe->screen, indexBuffer);
}
diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c
index e03994b63b..401764bb43 100644
--- a/src/gallium/drivers/softpipe/sp_flush.c
+++ b/src/gallium/drivers/softpipe/sp_flush.c
@@ -73,6 +73,19 @@ softpipe_flush( struct pipe_context *pipe,
softpipe_unmap_surfaces(softpipe);
}
+ /* Enable to dump BMPs of the color/depth buffers each frame */
+#if 0
+ if(flags & PIPE_FLUSH_FRAME) {
+ static unsigned frame_no = 1;
+ static char filename[256];
+ util_snprintf(filename, sizeof(filename), "cbuf_%u.bmp", frame_no);
+ debug_dump_surface_bmp(filename, softpipe->framebuffer.cbufs[0]);
+ util_snprintf(filename, sizeof(filename), "zsbuf_%u.bmp", frame_no);
+ debug_dump_surface_bmp(filename, softpipe->framebuffer.zsbuf);
+ ++frame_no;
+ }
+#endif
+
if (fence)
*fence = NULL;
}
diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
index d0456731be..701ee4c72f 100644
--- a/src/gallium/drivers/softpipe/sp_fs_exec.c
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -106,7 +106,7 @@ exec_run( const struct sp_fragment_shader *base,
/* Compute X, Y, Z, W vals for this quad */
sp_setup_pos_vector(quad->posCoef,
- (float)quad->x0, (float)quad->y0,
+ (float)quad->input.x0, (float)quad->input.y0,
&machine->QuadPos);
return tgsi_exec_machine_run( machine );
diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c
index 35653a8e48..496ed43df2 100644
--- a/src/gallium/drivers/softpipe/sp_fs_sse.c
+++ b/src/gallium/drivers/softpipe/sp_fs_sse.c
@@ -88,7 +88,7 @@ fs_sse_run( const struct sp_fragment_shader *base,
/* Compute X, Y, Z, W vals for this quad -- place in temp[0] for now */
sp_setup_pos_vector(quad->posCoef,
- (float)quad->x0, (float)quad->y0,
+ (float)quad->input.x0, (float)quad->input.y0,
machine->Temps);
/* init kill mask */
diff --git a/src/gallium/drivers/softpipe/sp_headers.h b/src/gallium/drivers/softpipe/sp_headers.h
index ae2ee210fc..4a42cb3c19 100644
--- a/src/gallium/drivers/softpipe/sp_headers.h
+++ b/src/gallium/drivers/softpipe/sp_headers.h
@@ -59,20 +59,31 @@
* Encodes everything we need to know about a 2x2 pixel block. Uses
* "Channel-Serial" or "SoA" layout.
*/
-struct quad_header {
+struct quad_header_input
+{
int x0;
int y0;
- unsigned mask:4;
+ float coverage[QUAD_SIZE]; /** fragment coverage for antialiasing */
unsigned facing:1; /**< Front (0) or back (1) facing? */
unsigned prim:2; /**< PRIM_POINT, LINE, TRI */
+};
+
+struct quad_header_inout
+{
+ unsigned mask:4;
+};
- struct {
- /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */
- float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE];
- float depth[QUAD_SIZE];
- } outputs;
+struct quad_header_output
+{
+ /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */
+ float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE];
+ float depth[QUAD_SIZE];
+};
- float coverage[QUAD_SIZE]; /** fragment coverage for antialiasing */
+struct quad_header {
+ struct quad_header_input input;
+ struct quad_header_inout inout;
+ struct quad_header_output output;
const struct tgsi_interp_coef *coef;
const struct tgsi_interp_coef *posCoef;
@@ -80,5 +91,5 @@ struct quad_header {
unsigned nr_attrs;
};
-
#endif /* SP_HEADERS_H */
+
diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c
index bc83d78ea1..892ef87ee9 100644
--- a/src/gallium/drivers/softpipe/sp_quad.c
+++ b/src/gallium/drivers/softpipe/sp_quad.c
@@ -33,29 +33,33 @@
static void
sp_push_quad_first(
struct softpipe_context *sp,
- struct quad_stage *quad )
+ struct quad_stage *quad,
+ uint i )
{
- quad->next = sp->quad.first;
- sp->quad.first = quad;
+ quad->next = sp->quad[i].first;
+ sp->quad[i].first = quad;
}
static void
sp_build_depth_stencil(
- struct softpipe_context *sp )
+ struct softpipe_context *sp,
+ uint i )
{
if (sp->depth_stencil->stencil[0].enabled ||
sp->depth_stencil->stencil[1].enabled) {
- sp_push_quad_first( sp, sp->quad.stencil_test );
+ sp_push_quad_first( sp, sp->quad[i].stencil_test, i );
}
else if (sp->depth_stencil->depth.enabled &&
sp->framebuffer.zsbuf) {
- sp_push_quad_first( sp, sp->quad.depth_test );
+ sp_push_quad_first( sp, sp->quad[i].depth_test, i );
}
}
void
sp_build_quad_pipeline(struct softpipe_context *sp)
{
+ uint i;
+
boolean early_depth_test =
sp->depth_stencil->depth.enabled &&
sp->framebuffer.zsbuf &&
@@ -64,49 +68,51 @@ sp_build_quad_pipeline(struct softpipe_context *sp)
!sp->fs->info.writes_z;
/* build up the pipeline in reverse order... */
-
- sp->quad.first = sp->quad.output;
-
- if (sp->blend->colormask != 0xf) {
- sp_push_quad_first( sp, sp->quad.colormask );
- }
-
- if (sp->blend->blend_enable ||
- sp->blend->logicop_enable) {
- sp_push_quad_first( sp, sp->quad.blend );
- }
-
- if (sp->depth_stencil->depth.occlusion_count) {
- sp_push_quad_first( sp, sp->quad.occlusion );
- }
-
- if (sp->rasterizer->poly_smooth ||
- sp->rasterizer->line_smooth ||
- sp->rasterizer->point_smooth) {
- sp_push_quad_first( sp, sp->quad.coverage );
- }
-
- if (!early_depth_test) {
- sp_build_depth_stencil( sp );
- }
-
- if (sp->depth_stencil->alpha.enabled) {
- sp_push_quad_first( sp, sp->quad.alpha_test );
- }
-
- /* XXX always enable shader? */
- if (1) {
- sp_push_quad_first( sp, sp->quad.shade );
- }
-
- if (early_depth_test) {
- sp_build_depth_stencil( sp );
- sp_push_quad_first( sp, sp->quad.earlyz );
- }
+ for (i = 0; i < SP_NUM_QUAD_THREADS; i++) {
+ sp->quad[i].first = sp->quad[i].output;
+
+ if (sp->blend->colormask != 0xf) {
+ sp_push_quad_first( sp, sp->quad[i].colormask, i );
+ }
+
+ if (sp->blend->blend_enable ||
+ sp->blend->logicop_enable) {
+ sp_push_quad_first( sp, sp->quad[i].blend, i );
+ }
+
+ if (sp->depth_stencil->depth.occlusion_count) {
+ sp_push_quad_first( sp, sp->quad[i].occlusion, i );
+ }
+
+ if (sp->rasterizer->poly_smooth ||
+ sp->rasterizer->line_smooth ||
+ sp->rasterizer->point_smooth) {
+ sp_push_quad_first( sp, sp->quad[i].coverage, i );
+ }
+
+ if (!early_depth_test) {
+ sp_build_depth_stencil( sp, i );
+ }
+
+ if (sp->depth_stencil->alpha.enabled) {
+ sp_push_quad_first( sp, sp->quad[i].alpha_test, i );
+ }
+
+ /* XXX always enable shader? */
+ if (1) {
+ sp_push_quad_first( sp, sp->quad[i].shade, i );
+ }
+
+ if (early_depth_test) {
+ sp_build_depth_stencil( sp, i );
+ sp_push_quad_first( sp, sp->quad[i].earlyz, i );
+ }
#if !USE_DRAW_STAGE_PSTIPPLE
- if (sp->rasterizer->poly_stipple_enable) {
- sp_push_quad_first( sp, sp->quad.polygon_stipple );
- }
+ if (sp->rasterizer->poly_stipple_enable) {
+ sp_push_quad_first( sp, sp->quad[i].polygon_stipple, i );
+ }
#endif
+ }
}
+
diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c
index 7d3580fb4f..5bebd141e9 100644
--- a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c
+++ b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c
@@ -17,11 +17,10 @@ alpha_test_quad(struct quad_stage *qs, struct quad_header *quad)
const float ref = softpipe->depth_stencil->alpha.ref;
unsigned passMask = 0x0, j;
const uint cbuf = 0; /* only output[0].alpha is tested */
- const float *aaaa = quad->outputs.color[cbuf][3];
+ const float *aaaa = quad->output.color[cbuf][3];
switch (softpipe->depth_stencil->alpha.func) {
case PIPE_FUNC_NEVER:
- quad->mask = 0x0;
break;
case PIPE_FUNC_LESS:
/*
@@ -76,9 +75,9 @@ alpha_test_quad(struct quad_stage *qs, struct quad_header *quad)
assert(0);
}
- quad->mask &= passMask;
+ quad->inout.mask &= passMask;
- if (quad->mask)
+ if (quad->inout.mask)
qs->next->run(qs->next, quad);
}
diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c
index a834accb86..6f64c6e584 100644
--- a/src/gallium/drivers/softpipe/sp_quad_blend.c
+++ b/src/gallium/drivers/softpipe/sp_quad_blend.c
@@ -114,14 +114,14 @@ logicop_quad(struct quad_stage *qs, struct quad_header *quad)
struct softpipe_cached_tile *
tile = sp_get_cached_tile(softpipe,
softpipe->cbuf_cache[cbuf],
- quad->x0, quad->y0);
- float (*quadColor)[4] = quad->outputs.color[cbuf];
+ quad->input.x0, quad->input.y0);
+ float (*quadColor)[4] = quad->output.color[cbuf];
uint i, j;
/* get/swizzle dest colors */
for (j = 0; j < QUAD_SIZE; j++) {
- int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1);
- int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1);
+ int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1);
+ int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1);
for (i = 0; i < 4; i++) {
dest[i][j] = tile->data.color[y][x][i];
}
@@ -244,14 +244,14 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
struct softpipe_cached_tile *tile
= sp_get_cached_tile(softpipe,
softpipe->cbuf_cache[cbuf],
- quad->x0, quad->y0);
- float (*quadColor)[4] = quad->outputs.color[cbuf];
+ quad->input.x0, quad->input.y0);
+ float (*quadColor)[4] = quad->output.color[cbuf];
uint i, j;
/* get/swizzle dest colors */
for (j = 0; j < QUAD_SIZE; j++) {
- int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1);
- int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1);
+ int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1);
+ int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1);
for (i = 0; i < 4; i++) {
dest[i][j] = tile->data.color[y][x][i];
}
diff --git a/src/gallium/drivers/softpipe/sp_quad_colormask.c b/src/gallium/drivers/softpipe/sp_quad_colormask.c
index f72f31db97..f32bdfab78 100644
--- a/src/gallium/drivers/softpipe/sp_quad_colormask.c
+++ b/src/gallium/drivers/softpipe/sp_quad_colormask.c
@@ -56,14 +56,14 @@ colormask_quad(struct quad_stage *qs, struct quad_header *quad)
struct softpipe_cached_tile *tile
= sp_get_cached_tile(softpipe,
softpipe->cbuf_cache[cbuf],
- quad->x0, quad->y0);
- float (*quadColor)[4] = quad->outputs.color[cbuf];
+ quad->input.x0, quad->input.y0);
+ float (*quadColor)[4] = quad->output.color[cbuf];
uint i, j;
/* get/swizzle dest colors */
for (j = 0; j < QUAD_SIZE; j++) {
- int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1);
- int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1);
+ int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1);
+ int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1);
for (i = 0; i < 4; i++) {
dest[i][j] = tile->data.color[y][x][i];
}
diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c
index ad907ec25f..ee29aa7dfe 100644
--- a/src/gallium/drivers/softpipe/sp_quad_coverage.c
+++ b/src/gallium/drivers/softpipe/sp_quad_coverage.c
@@ -47,19 +47,19 @@ coverage_quad(struct quad_stage *qs, struct quad_header *quad)
{
struct softpipe_context *softpipe = qs->softpipe;
- if ((softpipe->rasterizer->poly_smooth && quad->prim == PRIM_TRI) ||
- (softpipe->rasterizer->line_smooth && quad->prim == PRIM_LINE) ||
- (softpipe->rasterizer->point_smooth && quad->prim == PRIM_POINT)) {
+ if ((softpipe->rasterizer->poly_smooth && quad->input.prim == PRIM_TRI) ||
+ (softpipe->rasterizer->line_smooth && quad->input.prim == PRIM_LINE) ||
+ (softpipe->rasterizer->point_smooth && quad->input.prim == PRIM_POINT)) {
uint cbuf;
/* loop over colorbuffer outputs */
for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) {
- float (*quadColor)[4] = quad->outputs.color[cbuf];
+ float (*quadColor)[4] = quad->output.color[cbuf];
unsigned j;
for (j = 0; j < QUAD_SIZE; j++) {
- assert(quad->coverage[j] >= 0.0);
- assert(quad->coverage[j] <= 1.0);
- quadColor[3][j] *= quad->coverage[j];
+ assert(quad->input.coverage[j] >= 0.0);
+ assert(quad->input.coverage[j] <= 1.0);
+ quadColor[3][j] *= quad->input.coverage[j];
}
}
}
diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c
index 227cb2014e..523bd3e080 100644
--- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c
+++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c
@@ -60,7 +60,7 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
unsigned zmask = 0;
unsigned j;
struct softpipe_cached_tile *tile
- = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->x0, quad->y0);
+ = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->input.x0, quad->input.y0);
assert(ps); /* shouldn't get here if there's no zbuffer */
@@ -79,12 +79,12 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
float scale = 65535.0;
for (j = 0; j < QUAD_SIZE; j++) {
- qzzzz[j] = (unsigned) (quad->outputs.depth[j] * scale);
+ qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
}
for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->x0 % TILE_SIZE + (j & 1);
- int y = quad->y0 % TILE_SIZE + (j >> 1);
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
bzzzz[j] = tile->data.depth16[y][x];
}
}
@@ -94,12 +94,12 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
double scale = (double) (uint) ~0UL;
for (j = 0; j < QUAD_SIZE; j++) {
- qzzzz[j] = (unsigned) (quad->outputs.depth[j] * scale);
+ qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
}
for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->x0 % TILE_SIZE + (j & 1);
- int y = quad->y0 % TILE_SIZE + (j >> 1);
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
bzzzz[j] = tile->data.depth32[y][x];
}
}
@@ -111,12 +111,12 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
float scale = (float) ((1 << 24) - 1);
for (j = 0; j < QUAD_SIZE; j++) {
- qzzzz[j] = (unsigned) (quad->outputs.depth[j] * scale);
+ qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
}
for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->x0 % TILE_SIZE + (j & 1);
- int y = quad->y0 % TILE_SIZE + (j >> 1);
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
bzzzz[j] = tile->data.depth32[y][x] & 0xffffff;
}
}
@@ -128,12 +128,12 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
float scale = (float) ((1 << 24) - 1);
for (j = 0; j < QUAD_SIZE; j++) {
- qzzzz[j] = (unsigned) (quad->outputs.depth[j] * scale);
+ qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
}
for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->x0 % TILE_SIZE + (j & 1);
- int y = quad->y0 % TILE_SIZE + (j >> 1);
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
bzzzz[j] = tile->data.depth32[y][x] >> 8;
}
}
@@ -192,14 +192,14 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
assert(0);
}
- quad->mask &= zmask;
+ quad->inout.mask &= zmask;
if (softpipe->depth_stencil->depth.writemask) {
/* This is also efficient with sse / spe instructions:
*/
for (j = 0; j < QUAD_SIZE; j++) {
- if (quad->mask & (1 << j)) {
+ if (quad->inout.mask & (1 << j)) {
bzzzz[j] = qzzzz[j];
}
}
@@ -208,8 +208,8 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->x0 % TILE_SIZE + (j & 1);
- int y = quad->y0 % TILE_SIZE + (j >> 1);
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
tile->data.depth16[y][x] = (ushort) bzzzz[j];
}
break;
@@ -218,15 +218,15 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
/* (yes, this falls through to a different case than above) */
case PIPE_FORMAT_Z32_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->x0 % TILE_SIZE + (j & 1);
- int y = quad->y0 % TILE_SIZE + (j >> 1);
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
tile->data.depth32[y][x] = bzzzz[j];
}
break;
case PIPE_FORMAT_S8Z24_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->x0 % TILE_SIZE + (j & 1);
- int y = quad->y0 % TILE_SIZE + (j >> 1);
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
uint s8z24 = tile->data.depth32[y][x];
s8z24 = (s8z24 & 0xff000000) | bzzzz[j];
tile->data.depth32[y][x] = s8z24;
@@ -234,8 +234,8 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
break;
case PIPE_FORMAT_Z24S8_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->x0 % TILE_SIZE + (j & 1);
- int y = quad->y0 % TILE_SIZE + (j >> 1);
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
uint z24s8 = tile->data.depth32[y][x];
z24s8 = (z24s8 & 0xff) | (bzzzz[j] << 8);
tile->data.depth32[y][x] = z24s8;
@@ -243,8 +243,8 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
break;
case PIPE_FORMAT_Z24X8_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->x0 % TILE_SIZE + (j & 1);
- int y = quad->y0 % TILE_SIZE + (j >> 1);
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
tile->data.depth32[y][x] = bzzzz[j] << 8;
}
break;
@@ -260,7 +260,7 @@ depth_test_quad(struct quad_stage *qs, struct quad_header *quad)
{
sp_depth_test_quad(qs, quad);
- if (quad->mask)
+ if (quad->inout.mask)
qs->next->run(qs->next, quad);
}
diff --git a/src/gallium/drivers/softpipe/sp_quad_earlyz.c b/src/gallium/drivers/softpipe/sp_quad_earlyz.c
index 5a66a86699..6e2dde304e 100644
--- a/src/gallium/drivers/softpipe/sp_quad_earlyz.c
+++ b/src/gallium/drivers/softpipe/sp_quad_earlyz.c
@@ -45,16 +45,16 @@ earlyz_quad(
struct quad_stage *qs,
struct quad_header *quad )
{
- const float fx = (float) quad->x0;
- const float fy = (float) quad->y0;
+ const float fx = (float) quad->input.x0;
+ const float fy = (float) quad->input.y0;
const float dzdx = quad->posCoef->dadx[2];
const float dzdy = quad->posCoef->dady[2];
const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
- quad->outputs.depth[0] = z0;
- quad->outputs.depth[1] = z0 + dzdx;
- quad->outputs.depth[2] = z0 + dzdy;
- quad->outputs.depth[3] = z0 + dzdx + dzdy;
+ quad->output.depth[0] = z0;
+ quad->output.depth[1] = z0 + dzdx;
+ quad->output.depth[2] = z0 + dzdy;
+ quad->output.depth[3] = z0 + dzdx + dzdy;
qs->next->run( qs->next, quad );
}
diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c
index 5499ba5361..1f0cb3e035 100644
--- a/src/gallium/drivers/softpipe/sp_quad_fs.c
+++ b/src/gallium/drivers/softpipe/sp_quad_fs.c
@@ -85,7 +85,7 @@ shade_quad(
machine->InterpCoefs = quad->coef;
/* run shader */
- quad->mask &= softpipe->fs->run( softpipe->fs,
+ quad->inout.mask &= softpipe->fs->run( softpipe->fs,
&qss->machine,
quad );
@@ -101,16 +101,16 @@ shade_quad(
case TGSI_SEMANTIC_COLOR:
{
uint cbuf = sem_index[i];
- memcpy(quad->outputs.color[cbuf],
+ memcpy(quad->output.color[cbuf],
&machine->Outputs[i].xyzw[0].f[0],
- sizeof(quad->outputs.color[0]) );
+ sizeof(quad->output.color[0]) );
}
break;
case TGSI_SEMANTIC_POSITION:
{
uint j;
for (j = 0; j < 4; j++) {
- quad->outputs.depth[j] = machine->Outputs[0].xyzw[2].f[j];
+ quad->output.depth[j] = machine->Outputs[0].xyzw[2].f[j];
}
z_written = TRUE;
}
@@ -122,20 +122,20 @@ shade_quad(
if (!z_written) {
/* compute Z values now, as in the quad earlyz stage */
/* XXX we should really only do this if the earlyz stage is not used */
- const float fx = (float) quad->x0;
- const float fy = (float) quad->y0;
+ const float fx = (float) quad->input.x0;
+ const float fy = (float) quad->input.y0;
const float dzdx = quad->posCoef->dadx[2];
const float dzdy = quad->posCoef->dady[2];
const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
- quad->outputs.depth[0] = z0;
- quad->outputs.depth[1] = z0 + dzdx;
- quad->outputs.depth[2] = z0 + dzdy;
- quad->outputs.depth[3] = z0 + dzdx + dzdy;
+ quad->output.depth[0] = z0;
+ quad->output.depth[1] = z0 + dzdx;
+ quad->output.depth[2] = z0 + dzdy;
+ quad->output.depth[3] = z0 + dzdx + dzdy;
}
/* shader may cull fragments */
- if( quad->mask ) {
+ if( quad->inout.mask ) {
qs->next->run( qs->next, quad );
}
}
diff --git a/src/gallium/drivers/softpipe/sp_quad_occlusion.c b/src/gallium/drivers/softpipe/sp_quad_occlusion.c
index db13e73ae3..169bd82876 100644
--- a/src/gallium/drivers/softpipe/sp_quad_occlusion.c
+++ b/src/gallium/drivers/softpipe/sp_quad_occlusion.c
@@ -54,7 +54,7 @@ occlusion_count_quad(struct quad_stage *qs, struct quad_header *quad)
{
struct softpipe_context *softpipe = qs->softpipe;
- softpipe->occlusion_count += count_bits(quad->mask);
+ softpipe->occlusion_count += count_bits(quad->inout.mask);
qs->next->run(qs->next, quad);
}
diff --git a/src/gallium/drivers/softpipe/sp_quad_output.c b/src/gallium/drivers/softpipe/sp_quad_output.c
index b64646a449..d05e12d1d9 100644
--- a/src/gallium/drivers/softpipe/sp_quad_output.c
+++ b/src/gallium/drivers/softpipe/sp_quad_output.c
@@ -41,8 +41,8 @@ static void
output_quad(struct quad_stage *qs, struct quad_header *quad)
{
/* in-tile pos: */
- const int itx = quad->x0 % TILE_SIZE;
- const int ity = quad->y0 % TILE_SIZE;
+ const int itx = quad->input.x0 % TILE_SIZE;
+ const int ity = quad->input.y0 % TILE_SIZE;
struct softpipe_context *softpipe = qs->softpipe;
uint cbuf;
@@ -52,13 +52,13 @@ output_quad(struct quad_stage *qs, struct quad_header *quad)
struct softpipe_cached_tile *tile
= sp_get_cached_tile(softpipe,
softpipe->cbuf_cache[cbuf],
- quad->x0, quad->y0);
- float (*quadColor)[4] = quad->outputs.color[cbuf];
+ quad->input.x0, quad->input.y0);
+ float (*quadColor)[4] = quad->output.color[cbuf];
int i, j;
/* get/swizzle dest colors */
for (j = 0; j < QUAD_SIZE; j++) {
- if (quad->mask & (1 << j)) {
+ if (quad->inout.mask & (1 << j)) {
int x = itx + (j & 1);
int y = ity + (j >> 1);
for (i = 0; i < 4; i++) { /* loop over color chans */
diff --git a/src/gallium/drivers/softpipe/sp_quad_stencil.c b/src/gallium/drivers/softpipe/sp_quad_stencil.c
index ce9562e07c..abb5487748 100644
--- a/src/gallium/drivers/softpipe/sp_quad_stencil.c
+++ b/src/gallium/drivers/softpipe/sp_quad_stencil.c
@@ -206,9 +206,9 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad)
ubyte ref, wrtMask, valMask;
ubyte stencilVals[QUAD_SIZE];
struct softpipe_cached_tile *tile
- = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->x0, quad->y0);
+ = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->input.x0, quad->input.y0);
uint j;
- uint face = quad->facing;
+ uint face = quad->input.facing;
if (!softpipe->depth_stencil->stencil[1].enabled) {
/* single-sided stencil test, use front (face=0) state */
@@ -231,22 +231,22 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad)
switch (ps->format) {
case PIPE_FORMAT_S8Z24_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->x0 % TILE_SIZE + (j & 1);
- int y = quad->y0 % TILE_SIZE + (j >> 1);
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
stencilVals[j] = tile->data.depth32[y][x] >> 24;
}
break;
case PIPE_FORMAT_Z24S8_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->x0 % TILE_SIZE + (j & 1);
- int y = quad->y0 % TILE_SIZE + (j >> 1);
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
stencilVals[j] = tile->data.depth32[y][x] & 0xff;
}
break;
case PIPE_FORMAT_S8_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->x0 % TILE_SIZE + (j & 1);
- int y = quad->y0 % TILE_SIZE + (j >> 1);
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
stencilVals[j] = tile->data.stencil8[y][x];
}
break;
@@ -258,35 +258,35 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad)
{
unsigned passMask, failMask;
passMask = do_stencil_test(stencilVals, func, ref, valMask);
- failMask = quad->mask & ~passMask;
- quad->mask &= passMask;
+ failMask = quad->inout.mask & ~passMask;
+ quad->inout.mask &= passMask;
if (failOp != PIPE_STENCIL_OP_KEEP) {
apply_stencil_op(stencilVals, failMask, failOp, ref, wrtMask);
}
}
- if (quad->mask) {
+ if (quad->inout.mask) {
/* now the pixels that passed the stencil test are depth tested */
if (softpipe->depth_stencil->depth.enabled) {
- const unsigned origMask = quad->mask;
+ const unsigned origMask = quad->inout.mask;
sp_depth_test_quad(qs, quad); /* quad->mask is updated */
/* update stencil buffer values according to z pass/fail result */
if (zFailOp != PIPE_STENCIL_OP_KEEP) {
- const unsigned failMask = origMask & ~quad->mask;
+ const unsigned failMask = origMask & ~quad->inout.mask;
apply_stencil_op(stencilVals, failMask, zFailOp, ref, wrtMask);
}
if (zPassOp != PIPE_STENCIL_OP_KEEP) {
- const unsigned passMask = origMask & quad->mask;
+ const unsigned passMask = origMask & quad->inout.mask;
apply_stencil_op(stencilVals, passMask, zPassOp, ref, wrtMask);
}
}
else {
/* no depth test, apply Zpass operator to stencil buffer values */
- apply_stencil_op(stencilVals, quad->mask, zPassOp, ref, wrtMask);
+ apply_stencil_op(stencilVals, quad->inout.mask, zPassOp, ref, wrtMask);
}
}
@@ -295,8 +295,8 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad)
switch (ps->format) {
case PIPE_FORMAT_S8Z24_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->x0 % TILE_SIZE + (j & 1);
- int y = quad->y0 % TILE_SIZE + (j >> 1);
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
uint s8z24 = tile->data.depth32[y][x];
s8z24 = (stencilVals[j] << 24) | (s8z24 & 0xffffff);
tile->data.depth32[y][x] = s8z24;
@@ -304,8 +304,8 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad)
break;
case PIPE_FORMAT_Z24S8_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->x0 % TILE_SIZE + (j & 1);
- int y = quad->y0 % TILE_SIZE + (j >> 1);
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
uint z24s8 = tile->data.depth32[y][x];
z24s8 = (z24s8 & 0xffffff00) | stencilVals[j];
tile->data.depth32[y][x] = z24s8;
@@ -313,8 +313,8 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad)
break;
case PIPE_FORMAT_S8_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
- int x = quad->x0 % TILE_SIZE + (j & 1);
- int y = quad->y0 % TILE_SIZE + (j >> 1);
+ int x = quad->input.x0 % TILE_SIZE + (j & 1);
+ int y = quad->input.y0 % TILE_SIZE + (j >> 1);
tile->data.stencil8[y][x] = stencilVals[j];
}
break;
@@ -322,7 +322,7 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad)
assert(0);
}
- if (quad->mask)
+ if (quad->inout.mask)
qs->next->run(qs->next, quad);
}
diff --git a/src/gallium/drivers/softpipe/sp_quad_stipple.c b/src/gallium/drivers/softpipe/sp_quad_stipple.c
index a39ecc2e9d..ccf37f6be5 100644
--- a/src/gallium/drivers/softpipe/sp_quad_stipple.c
+++ b/src/gallium/drivers/softpipe/sp_quad_stipple.c
@@ -19,17 +19,17 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quad)
static const uint bit31 = 1 << 31;
static const uint bit30 = 1 << 30;
- if (quad->prim == PRIM_TRI) {
+ if (quad->input.prim == PRIM_TRI) {
struct softpipe_context *softpipe = qs->softpipe;
/* need to invert Y to index into OpenGL's stipple pattern */
int y0, y1;
uint stipple0, stipple1;
if (softpipe->rasterizer->origin_lower_left) {
- y0 = softpipe->framebuffer.height - 1 - quad->y0;
+ y0 = softpipe->framebuffer.height - 1 - quad->input.y0;
y1 = y0 - 1;
}
else {
- y0 = quad->y0;
+ y0 = quad->input.y0;
y1 = y0 + 1;
}
stipple0 = softpipe->poly_stipple.stipple[y0 % 32];
@@ -37,18 +37,18 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quad)
#if 1
{
- const int col0 = quad->x0 % 32;
+ const int col0 = quad->input.x0 % 32;
if ((stipple0 & (bit31 >> col0)) == 0)
- quad->mask &= ~MASK_TOP_LEFT;
+ quad->inout.mask &= ~MASK_TOP_LEFT;
if ((stipple0 & (bit30 >> col0)) == 0)
- quad->mask &= ~MASK_TOP_RIGHT;
+ quad->inout.mask &= ~MASK_TOP_RIGHT;
if ((stipple1 & (bit31 >> col0)) == 0)
- quad->mask &= ~MASK_BOTTOM_LEFT;
+ quad->inout.mask &= ~MASK_BOTTOM_LEFT;
if ((stipple1 & (bit30 >> col0)) == 0)
- quad->mask &= ~MASK_BOTTOM_RIGHT;
+ quad->inout.mask &= ~MASK_BOTTOM_RIGHT;
}
#else
/* We'd like to use this code, but we'd need to redefine
@@ -56,11 +56,11 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quad)
* and similarly for the BOTTOM bits. But that may have undesirable
* side effects elsewhere.
*/
- const int col0 = 30 - (quad->x0 % 32);
- quad->mask &= (((stipple0 >> col0) & 0x3) |
+ const int col0 = 30 - (quad->input.x0 % 32);
+ quad->inout.mask &= (((stipple0 >> col0) & 0x3) |
(((stipple1 >> col0) & 0x3) << 2));
#endif
- if (!quad->mask)
+ if (!quad->inout.mask)
return;
}
diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c
index 87336ab6e3..bc8263c33e 100644
--- a/src/gallium/drivers/softpipe/sp_setup.c
+++ b/src/gallium/drivers/softpipe/sp_setup.c
@@ -43,6 +43,7 @@
#include "draw/draw_private.h"
#include "draw/draw_vertex.h"
#include "pipe/p_shader_tokens.h"
+#include "pipe/p_thread.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -61,6 +62,87 @@ struct edge {
int lines; /**< number of lines on this edge */
};
+#if SP_NUM_QUAD_THREADS > 1
+
+/* Set to 1 if you want other threads to be instantly
+ * notified of pending jobs.
+ */
+#define INSTANT_NOTEMPTY_NOTIFY 0
+
+struct thread_info
+{
+ struct setup_context *setup;
+ uint id;
+ pipe_thread handle;
+};
+
+struct quad_job;
+
+typedef void (* quad_job_routine)( struct setup_context *setup, uint thread, struct quad_job *job );
+
+struct quad_job
+{
+ struct quad_header_input input;
+ struct quad_header_inout inout;
+ quad_job_routine routine;
+};
+
+#define NUM_QUAD_JOBS 64
+
+struct quad_job_que
+{
+ struct quad_job jobs[NUM_QUAD_JOBS];
+ uint first;
+ uint last;
+ pipe_mutex que_mutex;
+ pipe_condvar que_notfull_condvar;
+ pipe_condvar que_notempty_condvar;
+ uint jobs_added;
+ uint jobs_done;
+ pipe_condvar que_done_condvar;
+};
+
+static void
+add_quad_job( struct quad_job_que *que, struct quad_header *quad, quad_job_routine routine )
+{
+#if INSTANT_NOTEMPTY_NOTIFY
+ boolean empty;
+#endif
+
+ /* Wait for empty slot, see if the que is empty.
+ */
+ pipe_mutex_lock( que->que_mutex );
+ while ((que->last + 1) % NUM_QUAD_JOBS == que->first) {
+#if !INSTANT_NOTEMPTY_NOTIFY
+ pipe_condvar_broadcast( que->que_notempty_condvar );
+#endif
+ pipe_condvar_wait( que->que_notfull_condvar, que->que_mutex );
+ }
+#if INSTANT_NOTEMPTY_NOTIFY
+ empty = que->last == que->first;
+#endif
+ que->jobs_added++;
+ pipe_mutex_unlock( que->que_mutex );
+
+ /* Submit new job.
+ */
+ que->jobs[que->last].input = quad->input;
+ que->jobs[que->last].inout = quad->inout;
+ que->jobs[que->last].routine = routine;
+ que->last = (que->last + 1) % NUM_QUAD_JOBS;
+
+#if INSTANT_NOTEMPTY_NOTIFY
+ /* If the que was empty, notify consumers there's a job to be done.
+ */
+ if (empty) {
+ pipe_mutex_lock( que->que_mutex );
+ pipe_condvar_broadcast( que->que_notempty_condvar );
+ pipe_mutex_unlock( que->que_mutex );
+ }
+#endif
+}
+
+#endif
/**
* Triangle setup info (derived from draw_stage).
@@ -88,6 +170,11 @@ struct setup_context {
struct tgsi_interp_coef posCoef; /* For Z, W */
struct quad_header quad;
+#if SP_NUM_QUAD_THREADS > 1
+ struct quad_job_que que;
+ struct thread_info threads[SP_NUM_QUAD_THREADS];
+#endif
+
struct {
int left[2]; /**< [0] = row0, [1] = row1 */
int right[2];
@@ -104,7 +191,67 @@ struct setup_context {
unsigned winding; /* which winding to cull */
};
+#if SP_NUM_QUAD_THREADS > 1
+
+static PIPE_THREAD_ROUTINE( quad_thread, param )
+{
+ struct thread_info *info = (struct thread_info *) param;
+ struct quad_job_que *que = &info->setup->que;
+
+ for (;;) {
+ struct quad_job job;
+ boolean full;
+
+ /* Wait for an available job.
+ */
+ pipe_mutex_lock( que->que_mutex );
+ while (que->last == que->first)
+ pipe_condvar_wait( que->que_notempty_condvar, que->que_mutex );
+
+ /* See if the que is full.
+ */
+ full = (que->last + 1) % NUM_QUAD_JOBS == que->first;
+
+ /* Take a job and remove it from que.
+ */
+ job = que->jobs[que->first];
+ que->first = (que->first + 1) % NUM_QUAD_JOBS;
+
+ /* Notify the producer if the que is not full.
+ */
+ if (full)
+ pipe_condvar_signal( que->que_notfull_condvar );
+ pipe_mutex_unlock( que->que_mutex );
+
+ job.routine( info->setup, info->id, &job );
+
+ /* Notify the producer if that's the last finished job.
+ */
+ pipe_mutex_lock( que->que_mutex );
+ que->jobs_done++;
+ if (que->jobs_added == que->jobs_done)
+ pipe_condvar_signal( que->que_done_condvar );
+ pipe_mutex_unlock( que->que_mutex );
+ }
+
+ return NULL;
+}
+
+#define WAIT_FOR_COMPLETION(setup) \
+ do {\
+ pipe_mutex_lock( setup->que.que_mutex );\
+ if (!INSTANT_NOTEMPTY_NOTIFY)\
+ pipe_condvar_broadcast( setup->que.que_notempty_condvar );\
+ while (setup->que.jobs_added != setup->que.jobs_done)\
+ pipe_condvar_wait( setup->que.que_done_condvar, setup->que.que_mutex );\
+ pipe_mutex_unlock( setup->que.que_mutex );\
+ } while (0)
+
+#else
+
+#define WAIT_FOR_COMPLETION(setup) ((void) 0)
+#endif
/**
* Test if x is NaN or +/- infinity.
@@ -143,7 +290,7 @@ static boolean cull_tri( struct setup_context *setup,
* Clip setup->quad against the scissor/surface bounds.
*/
static INLINE void
-quad_clip(struct setup_context *setup)
+quad_clip( struct setup_context *setup, struct quad_header *quad )
{
const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
const int minx = (int) cliprect->minx;
@@ -151,22 +298,22 @@ quad_clip(struct setup_context *setup)
const int miny = (int) cliprect->miny;
const int maxy = (int) cliprect->maxy;
- if (setup->quad.x0 >= maxx ||
- setup->quad.y0 >= maxy ||
- setup->quad.x0 + 1 < minx ||
- setup->quad.y0 + 1 < miny) {
+ if (quad->input.x0 >= maxx ||
+ quad->input.y0 >= maxy ||
+ quad->input.x0 + 1 < minx ||
+ quad->input.y0 + 1 < miny) {
/* totally clipped */
- setup->quad.mask = 0x0;
+ quad->inout.mask = 0x0;
return;
}
- if (setup->quad.x0 < minx)
- setup->quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
- if (setup->quad.y0 < miny)
- setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
- if (setup->quad.x0 == maxx - 1)
- setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
- if (setup->quad.y0 == maxy - 1)
- setup->quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
+ if (quad->input.x0 < minx)
+ quad->inout.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
+ if (quad->input.y0 < miny)
+ quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
+ if (quad->input.x0 == maxx - 1)
+ quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
+ if (quad->input.y0 == maxy - 1)
+ quad->inout.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
}
@@ -174,35 +321,59 @@ quad_clip(struct setup_context *setup)
* Emit a quad (pass to next stage) with clipping.
*/
static INLINE void
-clip_emit_quad(struct setup_context *setup)
+clip_emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread )
{
- quad_clip(setup);
- if (setup->quad.mask) {
+ quad_clip( setup, quad );
+ if (quad->inout.mask) {
struct softpipe_context *sp = setup->softpipe;
- sp->quad.first->run(sp->quad.first, &setup->quad);
+
+ sp->quad[thread].first->run( sp->quad[thread].first, quad );
}
}
+#if SP_NUM_QUAD_THREADS > 1
+
+static void
+clip_emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job )
+{
+ struct quad_header quad;
+
+ quad.input = job->input;
+ quad.inout = job->inout;
+ quad.coef = setup->quad.coef;
+ quad.posCoef = setup->quad.posCoef;
+ quad.nr_attrs = setup->quad.nr_attrs;
+ clip_emit_quad( setup, &quad, thread );
+}
+
+#define CLIP_EMIT_QUAD(setup) add_quad_job( &setup->que, &setup->quad, clip_emit_quad_job )
+
+#else
+
+#define CLIP_EMIT_QUAD(setup) clip_emit_quad( setup, &setup->quad, 0 )
+
+#endif
/**
* Emit a quad (pass to next stage). No clipping is done.
*/
static INLINE void
-emit_quad( struct setup_context *setup, int x, int y, unsigned mask )
+emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread )
{
struct softpipe_context *sp = setup->softpipe;
- setup->quad.x0 = x;
- setup->quad.y0 = y;
- setup->quad.mask = mask;
+#if DEBUG_FRAGS
+ uint mask = quad->inout.mask;
+#endif
+
#if DEBUG_FRAGS
if (mask & 1) setup->numFragsEmitted++;
if (mask & 2) setup->numFragsEmitted++;
if (mask & 4) setup->numFragsEmitted++;
if (mask & 8) setup->numFragsEmitted++;
#endif
- sp->quad.first->run(sp->quad.first, &setup->quad);
+ sp->quad[thread].first->run( sp->quad[thread].first, quad );
#if DEBUG_FRAGS
- mask = setup->quad.mask;
+ mask = quad->inout.mask;
if (mask & 1) setup->numFragsWritten++;
if (mask & 2) setup->numFragsWritten++;
if (mask & 4) setup->numFragsWritten++;
@@ -210,6 +381,38 @@ emit_quad( struct setup_context *setup, int x, int y, unsigned mask )
#endif
}
+#if SP_NUM_QUAD_THREADS > 1
+
+static void
+emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job )
+{
+ struct quad_header quad;
+
+ quad.input = job->input;
+ quad.inout = job->inout;
+ quad.coef = setup->quad.coef;
+ quad.posCoef = setup->quad.posCoef;
+ quad.nr_attrs = setup->quad.nr_attrs;
+ emit_quad( setup, &quad, thread );
+}
+
+#define EMIT_QUAD(setup,x,y,mask) do {\
+ setup->quad.input.x0 = x;\
+ setup->quad.input.y0 = y;\
+ setup->quad.inout.mask = mask;\
+ add_quad_job( &setup->que, &setup->quad, emit_quad_job );\
+ } while (0)
+
+#else
+
+#define EMIT_QUAD(setup,x,y,mask) do {\
+ setup->quad.input.x0 = x;\
+ setup->quad.input.y0 = y;\
+ setup->quad.inout.mask = mask;\
+ emit_quad( setup, &setup->quad, 0 );\
+ } while (0)
+
+#endif
/**
* Given an X or Y coordinate, return the block/quad coordinate that it
@@ -249,7 +452,7 @@ static void flush_spans( struct setup_context *setup )
mask |= MASK_TOP_RIGHT;
if (x+1 >= xleft1 && x+1 < xright1)
mask |= MASK_BOTTOM_RIGHT;
- emit_quad( setup, x, setup->span.y, mask );
+ EMIT_QUAD( setup, x, setup->span.y, mask );
}
break;
@@ -263,7 +466,7 @@ static void flush_spans( struct setup_context *setup )
mask |= MASK_TOP_LEFT;
if (x+1 >= xleft0 && x+1 < xright0)
mask |= MASK_TOP_RIGHT;
- emit_quad( setup, x, setup->span.y, mask );
+ EMIT_QUAD( setup, x, setup->span.y, mask );
}
break;
@@ -277,7 +480,7 @@ static void flush_spans( struct setup_context *setup )
mask |= MASK_BOTTOM_LEFT;
if (x+1 >= xleft1 && x+1 < xright1)
mask |= MASK_BOTTOM_RIGHT;
- emit_quad( setup, x, setup->span.y, mask );
+ EMIT_QUAD( setup, x, setup->span.y, mask );
}
break;
@@ -398,7 +601,7 @@ static boolean setup_sort_vertices( struct setup_context *setup,
* - the GLSL gl_FrontFacing fragment attribute (bool)
* - two-sided stencil test
*/
- setup->quad.facing = (det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW);
+ setup->quad.input.facing = (det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW);
return TRUE;
}
@@ -595,7 +798,7 @@ static void setup_tri_coefficients( struct setup_context *setup )
if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) {
/* FOG.y = front/back facing XXX fix this */
- setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing;
+ setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.input.facing;
setup->coef[fragSlot].dadx[1] = 0.0;
setup->coef[fragSlot].dady[1] = 0.0;
}
@@ -765,7 +968,7 @@ void setup_tri( struct setup_context *setup,
setup_tri_coefficients( setup );
setup_tri_edges( setup );
- setup->quad.prim = PRIM_TRI;
+ setup->quad.input.prim = PRIM_TRI;
setup->span.y = 0;
setup->span.y_flags = 0;
@@ -790,6 +993,8 @@ void setup_tri( struct setup_context *setup,
flush_spans( setup );
+ WAIT_FOR_COMPLETION(setup);
+
#if DEBUG_FRAGS
printf("Tri: %u frags emitted, %u written\n",
setup->numFragsEmitted,
@@ -904,7 +1109,7 @@ setup_line_coefficients(struct setup_context *setup,
if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) {
/* FOG.y = front/back facing XXX fix this */
- setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing;
+ setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.input.facing;
setup->coef[fragSlot].dadx[1] = 0.0;
setup->coef[fragSlot].dady[1] = 0.0;
}
@@ -925,20 +1130,20 @@ plot(struct setup_context *setup, int x, int y)
const int quadY = y - iy;
const int mask = (1 << ix) << (2 * iy);
- if (quadX != setup->quad.x0 ||
- quadY != setup->quad.y0)
+ if (quadX != setup->quad.input.x0 ||
+ quadY != setup->quad.input.y0)
{
/* flush prev quad, start new quad */
- if (setup->quad.x0 != -1)
- clip_emit_quad(setup);
+ if (setup->quad.input.x0 != -1)
+ CLIP_EMIT_QUAD(setup);
- setup->quad.x0 = quadX;
- setup->quad.y0 = quadY;
- setup->quad.mask = 0x0;
+ setup->quad.input.x0 = quadX;
+ setup->quad.input.y0 = quadY;
+ setup->quad.inout.mask = 0x0;
}
- setup->quad.mask |= mask;
+ setup->quad.inout.mask |= mask;
}
@@ -999,16 +1204,16 @@ setup_line(struct setup_context *setup,
assert(dx >= 0);
assert(dy >= 0);
- setup->quad.x0 = setup->quad.y0 = -1;
- setup->quad.mask = 0x0;
- setup->quad.prim = PRIM_LINE;
+ setup->quad.input.x0 = setup->quad.input.y0 = -1;
+ setup->quad.inout.mask = 0x0;
+ setup->quad.input.prim = PRIM_LINE;
/* XXX temporary: set coverage to 1.0 so the line appears
* if AA mode happens to be enabled.
*/
- setup->quad.coverage[0] =
- setup->quad.coverage[1] =
- setup->quad.coverage[2] =
- setup->quad.coverage[3] = 1.0;
+ setup->quad.input.coverage[0] =
+ setup->quad.input.coverage[1] =
+ setup->quad.input.coverage[2] =
+ setup->quad.input.coverage[3] = 1.0;
if (dx > dy) {
/*** X-major line ***/
@@ -1052,9 +1257,11 @@ setup_line(struct setup_context *setup,
}
/* draw final quad */
- if (setup->quad.mask) {
- clip_emit_quad(setup);
+ if (setup->quad.inout.mask) {
+ CLIP_EMIT_QUAD(setup);
}
+
+ WAIT_FOR_COMPLETION(setup);
}
@@ -1148,22 +1355,22 @@ setup_point( struct setup_context *setup,
if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) {
/* FOG.y = front/back facing XXX fix this */
- setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing;
+ setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.input.facing;
setup->coef[fragSlot].dadx[1] = 0.0;
setup->coef[fragSlot].dady[1] = 0.0;
}
}
- setup->quad.prim = PRIM_POINT;
+ setup->quad.input.prim = PRIM_POINT;
if (halfSize <= 0.5 && !round) {
/* special case for 1-pixel points */
const int ix = ((int) x) & 1;
const int iy = ((int) y) & 1;
- setup->quad.x0 = (int) x - ix;
- setup->quad.y0 = (int) y - iy;
- setup->quad.mask = (1 << ix) << (2 * iy);
- clip_emit_quad(setup);
+ setup->quad.input.x0 = (int) x - ix;
+ setup->quad.input.y0 = (int) y - iy;
+ setup->quad.inout.mask = (1 << ix) << (2 * iy);
+ CLIP_EMIT_QUAD(setup);
}
else {
if (round) {
@@ -1183,15 +1390,15 @@ setup_point( struct setup_context *setup,
for (ix = ixmin; ix <= ixmax; ix += 2) {
float dx, dy, dist2, cover;
- setup->quad.mask = 0x0;
+ setup->quad.inout.mask = 0x0;
dx = (ix + 0.5f) - x;
dy = (iy + 0.5f) - y;
dist2 = dx * dx + dy * dy;
if (dist2 <= rmax2) {
cover = 1.0F - (dist2 - rmin2) * cscale;
- setup->quad.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f);
- setup->quad.mask |= MASK_TOP_LEFT;
+ setup->quad.input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f);
+ setup->quad.inout.mask |= MASK_TOP_LEFT;
}
dx = (ix + 1.5f) - x;
@@ -1199,8 +1406,8 @@ setup_point( struct setup_context *setup,
dist2 = dx * dx + dy * dy;
if (dist2 <= rmax2) {
cover = 1.0F - (dist2 - rmin2) * cscale;
- setup->quad.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f);
- setup->quad.mask |= MASK_TOP_RIGHT;
+ setup->quad.input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f);
+ setup->quad.inout.mask |= MASK_TOP_RIGHT;
}
dx = (ix + 0.5f) - x;
@@ -1208,8 +1415,8 @@ setup_point( struct setup_context *setup,
dist2 = dx * dx + dy * dy;
if (dist2 <= rmax2) {
cover = 1.0F - (dist2 - rmin2) * cscale;
- setup->quad.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f);
- setup->quad.mask |= MASK_BOTTOM_LEFT;
+ setup->quad.input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f);
+ setup->quad.inout.mask |= MASK_BOTTOM_LEFT;
}
dx = (ix + 1.5f) - x;
@@ -1217,14 +1424,14 @@ setup_point( struct setup_context *setup,
dist2 = dx * dx + dy * dy;
if (dist2 <= rmax2) {
cover = 1.0F - (dist2 - rmin2) * cscale;
- setup->quad.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f);
- setup->quad.mask |= MASK_BOTTOM_RIGHT;
+ setup->quad.input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f);
+ setup->quad.inout.mask |= MASK_BOTTOM_RIGHT;
}
- if (setup->quad.mask) {
- setup->quad.x0 = ix;
- setup->quad.y0 = iy;
- clip_emit_quad(setup);
+ if (setup->quad.inout.mask) {
+ setup->quad.input.x0 = ix;
+ setup->quad.input.y0 = iy;
+ CLIP_EMIT_QUAD(setup);
}
}
}
@@ -1268,14 +1475,16 @@ setup_point( struct setup_context *setup,
mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
}
- setup->quad.mask = mask;
- setup->quad.x0 = ix;
- setup->quad.y0 = iy;
- clip_emit_quad(setup);
+ setup->quad.inout.mask = mask;
+ setup->quad.input.x0 = ix;
+ setup->quad.input.y0 = iy;
+ CLIP_EMIT_QUAD(setup);
}
}
}
}
+
+ WAIT_FOR_COMPLETION(setup);
}
void setup_prepare( struct setup_context *setup )
@@ -1300,7 +1509,9 @@ void setup_prepare( struct setup_context *setup )
/* Note: nr_attrs is only used for debugging (vertex printing) */
setup->quad.nr_attrs = draw_num_vs_outputs(sp->draw);
- sp->quad.first->begin(sp->quad.first);
+ for (i = 0; i < SP_NUM_QUAD_THREADS; i++) {
+ sp->quad[i].first->begin( sp->quad[i].first );
+ }
if (sp->reduced_api_prim == PIPE_PRIM_TRIANGLES &&
sp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL &&
@@ -1328,11 +1539,31 @@ void setup_destroy_context( struct setup_context *setup )
struct setup_context *setup_create_context( struct softpipe_context *softpipe )
{
struct setup_context *setup = CALLOC_STRUCT(setup_context);
+#if SP_NUM_QUAD_THREADS > 1
+ uint i;
+#endif
setup->softpipe = softpipe;
setup->quad.coef = setup->coef;
setup->quad.posCoef = &setup->posCoef;
+#if SP_NUM_QUAD_THREADS > 1
+ setup->que.first = 0;
+ setup->que.last = 0;
+ pipe_mutex_init( setup->que.que_mutex );
+ pipe_condvar_init( setup->que.que_notfull_condvar );
+ pipe_condvar_init( setup->que.que_notempty_condvar );
+ setup->que.jobs_added = 0;
+ setup->que.jobs_done = 0;
+ pipe_condvar_init( setup->que.que_done_condvar );
+ for (i = 0; i < SP_NUM_QUAD_THREADS; i++) {
+ setup->threads[i].setup = setup;
+ setup->threads[i].id = i;
+ setup->threads[i].handle = pipe_thread_create( quad_thread, &setup->threads[i] );
+ }
+#endif
+
return setup;
}
+
diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c
index 1be461b3a4..e5b609cf6c 100644
--- a/src/gallium/drivers/softpipe/sp_state_fs.c
+++ b/src/gallium/drivers/softpipe/sp_state_fs.c
@@ -152,7 +152,7 @@ softpipe_set_constant_buffer(struct pipe_context *pipe,
assert(index == 0);
/* note: reference counting */
- pipe_buffer_reference(ws,
+ winsys_buffer_reference(ws,
&softpipe->constants[shader].buffer,
buf ? buf->buffer : NULL);
softpipe->constants[shader].size = buf ? buf->size : 0;
diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c
index 389aceb27c..6ade732698 100644
--- a/src/gallium/drivers/softpipe/sp_surface.c
+++ b/src/gallium/drivers/softpipe/sp_surface.c
@@ -25,132 +25,14 @@
*
**************************************************************************/
-#include "pipe/p_defines.h"
-#include "pipe/p_inlines.h"
-#include "pipe/p_winsys.h"
-#include "util/u_tile.h"
#include "util/u_rect.h"
#include "sp_context.h"
-#include "sp_surface.h"
-/**
- * Copy a rectangular region from one surface to another.
- * Surfaces must have same bpp.
- *
- * Note that it's always the case that Y=0=top of the raster.
- * If do_flip is non-zero, the region being copied will be flipped vertically.
- *
- * Assumes all values are within bounds -- no checking at this level -
- * do it higher up if required.
- */
-static void
-sp_surface_copy(struct pipe_context *pipe,
- boolean do_flip,
- struct pipe_surface *dst,
- unsigned dstx, unsigned dsty,
- struct pipe_surface *src,
- unsigned srcx, unsigned srcy, unsigned width, unsigned height)
-{
- void *dst_map = pipe->screen->surface_map( pipe->screen,
- dst,
- PIPE_BUFFER_USAGE_CPU_WRITE );
-
- const void *src_map = pipe->screen->surface_map( pipe->screen,
- src,
- PIPE_BUFFER_USAGE_CPU_READ );
-
- assert(dst->block.size == src->block.size);
- assert(dst->block.width == src->block.width);
- assert(dst->block.height == src->block.height);
- assert(src_map);
- assert(dst_map);
-
- /* If do_flip, invert src_y position and pass negative src stride */
- pipe_copy_rect(dst_map,
- &dst->block,
- dst->stride,
- dstx, dsty,
- width, height,
- src_map,
- do_flip ? -(int) src->stride : src->stride,
- srcx, srcy);
-
- pipe->screen->surface_unmap(pipe->screen, src);
- pipe->screen->surface_unmap(pipe->screen, dst);
-}
-
-
-static void *
-get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y)
-{
- return (char *)dst_map + y / dst->block.height * dst->stride + x / dst->block.width * dst->block.size;
-}
-
-
-#define UBYTE_TO_USHORT(B) ((B) | ((B) << 8))
-
-
-/**
- * Fill a rectangular sub-region. Need better logic about when to
- * push buffers into AGP - will currently do so whenever possible.
- */
-static void
-sp_surface_fill(struct pipe_context *pipe,
- struct pipe_surface *dst,
- unsigned dstx, unsigned dsty,
- unsigned width, unsigned height, unsigned value)
-{
- unsigned i, j;
- void *dst_map = pipe->screen->surface_map( pipe->screen,
- dst,
- PIPE_BUFFER_USAGE_CPU_WRITE );
-
- assert(dst->stride > 0);
-
-
- switch (dst->block.size) {
- case 1:
- case 2:
- case 4:
- pipe_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value);
- break;
- case 8:
- {
- /* expand the 4-byte clear value to an 8-byte value */
- ushort *row = (ushort *) get_pointer(dst, dst_map, dstx, dsty);
- ushort val0 = UBYTE_TO_USHORT((value >> 0) & 0xff);
- ushort val1 = UBYTE_TO_USHORT((value >> 8) & 0xff);
- ushort val2 = UBYTE_TO_USHORT((value >> 16) & 0xff);
- ushort val3 = UBYTE_TO_USHORT((value >> 24) & 0xff);
- val0 = (val0 << 8) | val0;
- val1 = (val1 << 8) | val1;
- val2 = (val2 << 8) | val2;
- val3 = (val3 << 8) | val3;
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- row[j*4+0] = val0;
- row[j*4+1] = val1;
- row[j*4+2] = val2;
- row[j*4+3] = val3;
- }
- row += dst->stride/2;
- }
- }
- break;
- default:
- assert(0);
- break;
- }
-
- pipe->screen->surface_unmap(pipe->screen, dst);
-}
-
-
void
sp_init_surface_functions(struct softpipe_context *sp)
{
- sp->pipe.surface_copy = sp_surface_copy;
- sp->pipe.surface_fill = sp_surface_fill;
+ sp->pipe.surface_copy = util_surface_copy;
+ sp->pipe.surface_fill = util_surface_fill;
}
diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c
index 3a737d6f72..cb48035771 100644
--- a/src/gallium/drivers/softpipe/sp_texture.c
+++ b/src/gallium/drivers/softpipe/sp_texture.c
@@ -192,7 +192,7 @@ softpipe_texture_blanket(struct pipe_screen * screen,
spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]);
spt->stride[0] = stride[0];
- pipe_buffer_reference(screen->winsys, &spt->buffer, buffer);
+ pipe_buffer_reference(screen, &spt->buffer, buffer);
return &spt->base;
}
@@ -208,7 +208,7 @@ softpipe_texture_release(struct pipe_screen *screen,
if (--(*pt)->refcount <= 0) {
struct softpipe_texture *spt = softpipe_texture(*pt);
- pipe_buffer_reference(screen->winsys, &spt->buffer, NULL);
+ pipe_buffer_reference(screen, &spt->buffer, NULL);
FREE(spt);
}
*pt = NULL;
@@ -231,7 +231,7 @@ softpipe_get_tex_surface(struct pipe_screen *screen,
if (ps) {
assert(ps->refcount);
assert(ps->winsys);
- pipe_buffer_reference(ws, &ps->buffer, spt->buffer);
+ pipe_buffer_reference(screen, &ps->buffer, spt->buffer);
ps->format = pt->format;
ps->block = pt->block;
ps->width = pt->width[level];
@@ -307,7 +307,7 @@ softpipe_surface_map( struct pipe_screen *screen,
return NULL;
}
- map = screen->winsys->buffer_map( screen->winsys, surface->buffer, flags );
+ map = pipe_buffer_map( screen, surface->buffer, flags );
if (map == NULL)
return NULL;
@@ -331,7 +331,7 @@ static void
softpipe_surface_unmap(struct pipe_screen *screen,
struct pipe_surface *surface)
{
- screen->winsys->buffer_unmap( screen->winsys, surface->buffer );
+ pipe_buffer_unmap( screen, surface->buffer );
}
diff --git a/src/gallium/drivers/trace/SConscript b/src/gallium/drivers/trace/SConscript
index 5c49468c4e..0a6bfb8f4c 100644
--- a/src/gallium/drivers/trace/SConscript
+++ b/src/gallium/drivers/trace/SConscript
@@ -9,8 +9,6 @@ trace = env.ConvenienceLibrary(
'tr_dump.c',
'tr_screen.c',
'tr_state.c',
- 'tr_stream_stdc.c',
- 'tr_stream_wd.c',
'tr_texture.c',
'tr_winsys.c',
])
diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c
index 48032c1617..a0ead0ded3 100644
--- a/src/gallium/drivers/trace/tr_dump.c
+++ b/src/gallium/drivers/trace/tr_dump.c
@@ -48,12 +48,12 @@
#include "pipe/p_debug.h"
#include "util/u_memory.h"
#include "util/u_string.h"
+#include "util/u_stream.h"
-#include "tr_stream.h"
#include "tr_dump.h"
-static struct trace_stream *stream = NULL;
+static struct util_stream *stream = NULL;
static unsigned refcount = 0;
@@ -61,7 +61,7 @@ static INLINE void
trace_dump_write(const char *buf, size_t size)
{
if(stream)
- trace_stream_write(stream, buf, size);
+ util_stream_write(stream, buf, size);
}
@@ -212,7 +212,7 @@ trace_dump_trace_close(void)
{
if(stream) {
trace_dump_writes("</trace>\n");
- trace_stream_close(stream);
+ util_stream_close(stream);
stream = NULL;
refcount = 0;
}
@@ -228,7 +228,7 @@ boolean trace_dump_trace_begin()
if(!stream) {
- stream = trace_stream_create(filename);
+ stream = util_stream_create(filename, 0);
if(!stream)
return FALSE;
@@ -272,7 +272,7 @@ void trace_dump_call_end(void)
trace_dump_indent(1);
trace_dump_tag_end("call");
trace_dump_newline();
- trace_stream_flush(stream);
+ util_stream_flush(stream);
}
void trace_dump_arg_begin(const char *name)
diff --git a/src/gallium/drivers/trace/tr_stream_stdc.c b/src/gallium/drivers/trace/tr_stream_stdc.c
deleted file mode 100644
index 4c19ec0b24..0000000000
--- a/src/gallium/drivers/trace/tr_stream_stdc.c
+++ /dev/null
@@ -1,104 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * @file
- * Stream implementation based on the Standard C Library.
- */
-
-#include "pipe/p_config.h"
-
-#if defined(PIPE_OS_LINUX)
-
-#include <stdio.h>
-
-#include "util/u_memory.h"
-
-#include "tr_stream.h"
-
-
-struct trace_stream
-{
- FILE *file;
-};
-
-
-struct trace_stream *
-trace_stream_create(const char *filename)
-{
- struct trace_stream *stream;
-
- stream = CALLOC_STRUCT(trace_stream);
- if(!stream)
- goto error1;
-
- stream->file = fopen(filename, "w");
- if(!stream->file)
- goto error2;
-
- return stream;
-
-error2:
- FREE(stream);
-error1:
- return NULL;
-}
-
-
-boolean
-trace_stream_write(struct trace_stream *stream, const void *data, size_t size)
-{
- if(!stream)
- return FALSE;
-
- return fwrite(data, size, 1, stream->file) == size ? TRUE : FALSE;
-}
-
-
-void
-trace_stream_flush(struct trace_stream *stream)
-{
- if(!stream)
- return;
-
- fflush(stream->file);
-}
-
-
-void
-trace_stream_close(struct trace_stream *stream)
-{
- if(!stream)
- return;
-
- fclose(stream->file);
-
- FREE(stream);
-}
-
-
-#endif
diff --git a/src/gallium/drivers/trace/tr_stream_wd.c b/src/gallium/drivers/trace/tr_stream_wd.c
deleted file mode 100644
index 704eb15bd7..0000000000
--- a/src/gallium/drivers/trace/tr_stream_wd.c
+++ /dev/null
@@ -1,183 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * @file
- * Stream implementation for the Windows Display driver.
- */
-
-#include "pipe/p_config.h"
-
-#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY)
-
-#include <windows.h>
-#include <winddi.h>
-
-#include "util/u_memory.h"
-#include "util/u_string.h"
-
-#include "tr_stream.h"
-
-
-#define MAP_FILE_SIZE (4*1024*1024)
-
-
-struct trace_stream
-{
- char filename[MAX_PATH + 1];
- WCHAR wFileName[MAX_PATH + 1];
- ULONG_PTR iFile;
- char *pMap;
- size_t written;
- unsigned suffix;
-};
-
-
-static INLINE boolean
-trace_stream_map(struct trace_stream *stream)
-{
- ULONG BytesInUnicodeString;
- static char filename[MAX_PATH + 1];
- unsigned filename_len;
-
- filename_len = util_snprintf(filename,
- sizeof(filename),
- "\\??\\%s.%04x",
- stream->filename,
- stream->suffix++);
-
- EngMultiByteToUnicodeN(
- stream->wFileName,
- sizeof(stream->wFileName),
- &BytesInUnicodeString,
- filename,
- filename_len);
-
- stream->pMap = EngMapFile(stream->wFileName, MAP_FILE_SIZE, &stream->iFile);
- if(!stream->pMap)
- return FALSE;
-
- memset(stream->pMap, 0, MAP_FILE_SIZE);
- stream->written = 0;
-
- return TRUE;
-}
-
-
-static INLINE void
-trace_stream_unmap(struct trace_stream *stream)
-{
- EngUnmapFile(stream->iFile);
- if(stream->written < MAP_FILE_SIZE) {
- /* Truncate file size */
- stream->pMap = EngMapFile(stream->wFileName, stream->written, &stream->iFile);
- if(stream->pMap)
- EngUnmapFile(stream->iFile);
- }
-
- stream->pMap = NULL;
-}
-
-
-struct trace_stream *
-trace_stream_create(const char *filename)
-{
- struct trace_stream *stream;
-
- stream = CALLOC_STRUCT(trace_stream);
- if(!stream)
- goto error1;
-
- strncpy(stream->filename, filename, sizeof(stream->filename));
-
- if(!trace_stream_map(stream))
- goto error2;
-
- return stream;
-
-error2:
- FREE(stream);
-error1:
- return NULL;
-}
-
-
-static INLINE void
-trace_stream_copy(struct trace_stream *stream, const char *data, size_t size)
-{
- assert(stream->written + size <= MAP_FILE_SIZE);
- memcpy(stream->pMap + stream->written, data, size);
- stream->written += size;
-}
-
-
-boolean
-trace_stream_write(struct trace_stream *stream, const void *data, size_t size)
-{
- if(!stream)
- return FALSE;
-
- if(!stream->pMap)
- return FALSE;
-
- while(stream->written + size > MAP_FILE_SIZE) {
- size_t step = MAP_FILE_SIZE - stream->written;
- trace_stream_copy(stream, data, step);
- data = (const char *)data + step;
- size -= step;
-
- trace_stream_unmap(stream);
- if(!trace_stream_map(stream))
- return FALSE;
- }
-
- trace_stream_copy(stream, data, size);
-
- return TRUE;
-}
-
-
-void
-trace_stream_flush(struct trace_stream *stream)
-{
- (void)stream;
-}
-
-
-void
-trace_stream_close(struct trace_stream *stream)
-{
- if(!stream)
- return;
-
- trace_stream_unmap(stream);
-
- FREE(stream);
-}
-
-
-#endif