summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/cell/ppu
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/cell/ppu')
-rw-r--r--src/gallium/drivers/cell/ppu/Makefile1
-rw-r--r--src/gallium/drivers/cell/ppu/cell_batch.c43
-rw-r--r--src/gallium/drivers/cell/ppu/cell_context.c23
-rw-r--r--src/gallium/drivers/cell/ppu/cell_context.h26
-rw-r--r--src/gallium/drivers/cell/ppu/cell_fence.c158
-rw-r--r--src/gallium/drivers/cell/ppu/cell_fence.h57
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fp.c125
-rw-r--r--src/gallium/drivers/cell/ppu/cell_pipe_state.c41
-rw-r--r--src/gallium/drivers/cell/ppu/cell_spu.c38
-rw-r--r--src/gallium/drivers/cell/ppu/cell_spu.h12
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_emit.c68
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_shader.c2
-rw-r--r--src/gallium/drivers/cell/ppu/cell_texture.c27
-rw-r--r--src/gallium/drivers/cell/ppu/cell_texture.h5
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vbuf.c7
15 files changed, 554 insertions, 79 deletions
diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile
index b28f4c5c31..9358a47284 100644
--- a/src/gallium/drivers/cell/ppu/Makefile
+++ b/src/gallium/drivers/cell/ppu/Makefile
@@ -24,6 +24,7 @@ SOURCES = \
cell_clear.c \
cell_context.c \
cell_draw_arrays.c \
+ cell_fence.c \
cell_flush.c \
cell_gen_fragment.c \
cell_gen_fp.c \
diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c
index 16882c0129..448b723d85 100644
--- a/src/gallium/drivers/cell/ppu/cell_batch.c
+++ b/src/gallium/drivers/cell/ppu/cell_batch.c
@@ -28,6 +28,7 @@
#include "cell_context.h"
#include "cell_batch.h"
+#include "cell_fence.h"
#include "cell_spu.h"
@@ -42,7 +43,9 @@
uint
cell_get_empty_buffer(struct cell_context *cell)
{
- uint buf = 0, tries = 0;
+ static uint prev_buffer = 0;
+ uint buf = (prev_buffer + 1) % CELL_NUM_BUFFERS;
+ uint tries = 0;
/* Find a buffer that's marked as free by all SPUs */
while (1) {
@@ -58,8 +61,13 @@ cell_get_empty_buffer(struct cell_context *cell)
cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED;
}
/*
- printf("PPU: ALLOC BUFFER %u\n", buf);
+ printf("PPU: ALLOC BUFFER %u, %u tries\n", buf, tries);
*/
+ prev_buffer = buf;
+
+ /* release tex buffer associated w/ prev use of this batch buf */
+ cell_free_fenced_buffers(cell, &cell->fenced_buffers[buf]);
+
return buf;
}
}
@@ -82,6 +90,26 @@ cell_get_empty_buffer(struct cell_context *cell)
/**
+ * Append a fence command to the current batch buffer.
+ * Note that we're sure there's always room for this because of the
+ * adjusted size check in cell_batch_free_space().
+ */
+static void
+emit_fence(struct cell_context *cell)
+{
+ const uint batch = cell->cur_batch;
+ const uint size = cell->buffer_size[batch];
+ struct cell_command_fence *fence_cmd;
+
+ ASSERT(size + sizeof(struct cell_command_fence) <= CELL_BUFFER_SIZE);
+
+ fence_cmd = (struct cell_command_fence *) (cell->buffer[batch] + size);
+ fence_cmd->opcode = CELL_CMD_FENCE;
+ fence_cmd->fence = &cell->fenced_buffers[batch].fence;
+}
+
+
+/**
* Flush the current batch buffer to the SPUs.
* An empty buffer will be found and set as the new current batch buffer
* for subsequent commands/data.
@@ -99,6 +127,12 @@ cell_batch_flush(struct cell_context *cell)
if (size == 0)
return;
+ /* Before we use this batch buffer, make sure any fenced texture buffers
+ * are released.
+ */
+ if (cell->fenced_buffers[batch].head)
+ emit_fence(cell);
+
flushing = TRUE;
assert(batch < CELL_NUM_BUFFERS);
@@ -139,6 +173,7 @@ uint
cell_batch_free_space(const struct cell_context *cell)
{
uint free = CELL_BUFFER_SIZE - cell->buffer_size[cell->cur_batch];
+ free -= sizeof(struct cell_command_fence);
return free;
}
@@ -169,7 +204,7 @@ cell_batch_append(struct cell_context *cell, const void *data, uint bytes)
size = cell->buffer_size[cell->cur_batch];
- if (size + bytes > CELL_BUFFER_SIZE) {
+ if (bytes > cell_batch_free_space(cell)) {
cell_batch_flush(cell);
size = 0;
}
@@ -223,7 +258,7 @@ cell_batch_alloc_aligned(struct cell_context *cell, uint bytes,
padbytes = (alignment - (size % alignment)) % alignment;
- if (padbytes + size + bytes > CELL_BUFFER_SIZE) {
+ if (padbytes + bytes > cell_batch_free_space(cell)) {
cell_batch_flush(cell);
size = 0;
}
diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c
index b66aa9c9d9..22d552d8e3 100644
--- a/src/gallium/drivers/cell/ppu/cell_context.c
+++ b/src/gallium/drivers/cell/ppu/cell_context.c
@@ -47,6 +47,7 @@
#include "cell_clear.h"
#include "cell_context.h"
#include "cell_draw_arrays.h"
+#include "cell_fence.h"
#include "cell_flush.h"
#include "cell_state.h"
#include "cell_surface.h"
@@ -93,6 +94,8 @@ static const struct debug_named_value cell_debug_flags[] = {
{"sync", CELL_DEBUG_SYNC}, /**< SPUs do synchronous DMA */
{"fragops", CELL_DEBUG_FRAGMENT_OPS}, /**< SPUs emit fragment ops debug messages*/
{"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK}, /**< SPUs use reference implementation for fragment ops*/
+ {"cmd", CELL_DEBUG_CMD}, /**< SPUs dump command buffer info */
+ {"cache", CELL_DEBUG_CACHE}, /**< report texture cache stats on exit */
{NULL, 0}
};
@@ -102,6 +105,7 @@ cell_create_context(struct pipe_screen *screen,
struct cell_winsys *cws)
{
struct cell_context *cell;
+ uint i;
/* some fields need to be 16-byte aligned, so align the whole object */
cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16);
@@ -149,13 +153,24 @@ cell_create_context(struct pipe_screen *screen,
cell_debug_flags,
0 );
+ for (i = 0; i < CELL_NUM_BUFFERS; i++)
+ cell_fence_init(&cell->fenced_buffers[i].fence);
+
+
/*
* SPU stuff
*/
- cell->num_spus = 6;
- /* XXX is this in SDK 3.0 only?
- cell->num_spus = spe_cpu_info_get(SPE_COUNT_PHYSICAL_SPES, -1);
- */
+ /* This call only works with SDK 3.0. Anyone still using 2.1??? */
+ cell->num_cells = spe_cpu_info_get(SPE_COUNT_PHYSICAL_CPU_NODES, -1);
+ cell->num_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, 0);
+ if (cell->debug_flags) {
+ printf("Cell: found %d Cell(s) with %u SPUs\n",
+ cell->num_cells, cell->num_spus);
+ }
+ if (getenv("CELL_NUM_SPUS")) {
+ cell->num_spus = atoi(getenv("CELL_NUM_SPUS"));
+ assert(cell->num_spus > 0);
+ }
cell_start_spus(cell);
diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h
index 80a9b3d7e1..4491ae8cdf 100644
--- a/src/gallium/drivers/cell/ppu/cell_context.h
+++ b/src/gallium/drivers/cell/ppu/cell_context.h
@@ -74,12 +74,26 @@ struct cell_fragment_shader_state
struct cell_fragment_ops_key
{
struct pipe_blend_state blend;
+ struct pipe_blend_color blend_color;
struct pipe_depth_stencil_alpha_state dsa;
enum pipe_format color_format;
enum pipe_format zs_format;
};
+struct cell_buffer_node;
+
+/**
+ * Fenced buffer list. List of buffers which can be unreferenced after
+ * the fence has been executed/signalled.
+ */
+struct cell_buffer_list
+{
+ struct cell_fence fence;
+ struct cell_buffer_node *head;
+};
+
+
/**
* Per-context state, subclass of pipe_context.
*/
@@ -120,6 +134,8 @@ struct cell_context
uint *tex_map;
uint dirty;
+ uint dirty_textures; /* bitmask of texture units */
+ uint dirty_samplers; /* bitmask of sampler units */
/** Cache of code generated for per-fragment ops */
struct keymap *fragment_ops_cache;
@@ -139,7 +155,7 @@ struct cell_context
struct cell_spu_function_info spu_functions ALIGN16_ATTRIB;
- uint num_spus;
+ uint num_cells, num_spus;
/** Buffers for command batches, vertex/index data */
uint buffer_size[CELL_NUM_BUFFERS];
@@ -151,6 +167,14 @@ struct cell_context
uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB;
+ /** Associated with each command/batch buffer is a list of pipe_buffers
+ * that are fenced. When the last command in a buffer is executed, the
+ * fence will be signalled, indicating that any pipe_buffers preceeding
+ * that fence can be unreferenced (and probably freed).
+ */
+ struct cell_buffer_list fenced_buffers[CELL_NUM_BUFFERS];
+
+
struct spe_function attrib_fetch;
unsigned attrib_fetch_offsets[PIPE_MAX_ATTRIBS];
diff --git a/src/gallium/drivers/cell/ppu/cell_fence.c b/src/gallium/drivers/cell/ppu/cell_fence.c
new file mode 100644
index 0000000000..ffb3bea12b
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_fence.c
@@ -0,0 +1,158 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <unistd.h>
+#include "util/u_memory.h"
+#include "pipe/p_inlines.h"
+#include "cell_context.h"
+#include "cell_batch.h"
+#include "cell_fence.h"
+#include "cell_texture.h"
+
+
+void
+cell_fence_init(struct cell_fence *fence)
+{
+ uint i;
+ for (i = 0; i < CELL_MAX_SPUS; i++) {
+ fence->status[i][0] = CELL_FENCE_IDLE;
+ }
+}
+
+
+boolean
+cell_fence_signalled(const struct cell_context *cell,
+ const struct cell_fence *fence)
+{
+ uint i;
+ for (i = 0; i < cell->num_spus; i++) {
+ //ASSERT(fence->status[i][0] != CELL_FENCE_IDLE);
+ if (fence->status[i][0] == CELL_FENCE_EMITTED)
+ return FALSE;
+ }
+ return TRUE;
+}
+
+
+void
+cell_fence_finish(const struct cell_context *cell,
+ const struct cell_fence *fence)
+{
+ while (!cell_fence_signalled(cell, fence)) {
+ usleep(10);
+ }
+}
+
+
+
+
+struct cell_buffer_node
+{
+ struct pipe_buffer *buffer;
+ struct cell_buffer_node *next;
+};
+
+
+static void
+cell_add_buffer_to_list(struct cell_context *cell,
+ struct cell_buffer_list *list,
+ struct pipe_buffer *buffer)
+{
+ struct pipe_screen *ps = cell->pipe.screen;
+ struct cell_buffer_node *node = CALLOC_STRUCT(cell_buffer_node);
+ /* create new list node which references the buffer, insert at head */
+ if (node) {
+ pipe_buffer_reference(ps, &node->buffer, buffer);
+ node->next = list->head;
+ list->head = node;
+ }
+}
+
+
+/**
+ * Wait for completion of the given fence, then unreference any buffers
+ * on the list.
+ * This typically unrefs/frees texture buffers after any rendering which uses
+ * them has completed.
+ */
+void
+cell_free_fenced_buffers(struct cell_context *cell,
+ struct cell_buffer_list *list)
+{
+ if (list->head) {
+ struct pipe_screen *ps = cell->pipe.screen;
+ struct cell_buffer_node *node;
+
+ cell_fence_finish(cell, &list->fence);
+
+ /* traverse the list, unreferencing buffers, freeing nodes */
+ node = list->head;
+ while (node) {
+ struct cell_buffer_node *next = node->next;
+ assert(node->buffer);
+ pipe_buffer_unmap(ps, node->buffer);
+#if 0
+ printf("Unref buffer %p\n", node->buffer);
+ if (node->buffer->refcount == 1)
+ printf(" Delete!\n");
+#endif
+ pipe_buffer_reference(ps, &node->buffer, NULL);
+ FREE(node);
+ node = next;
+ }
+ list->head = NULL;
+ }
+}
+
+
+/**
+ * This should be called for each render command.
+ * Any texture buffers that are current bound will be added to a fenced
+ * list to be freed later when the fence is executed/signalled.
+ */
+void
+cell_add_fenced_textures(struct cell_context *cell)
+{
+ struct cell_buffer_list *list = &cell->fenced_buffers[cell->cur_batch];
+ uint i;
+
+ for (i = 0; i < cell->num_textures; i++) {
+ struct cell_texture *ct = cell->texture[i];
+ if (ct) {
+ uint level;
+ for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) {
+ if (ct->tiled_buffer[level]) {
+#if 0
+ printf("Adding texture %p buffer %p to list\n",
+ ct, ct->tiled_buffer[level]);
+#endif
+ cell_add_buffer_to_list(cell, list, ct->tiled_buffer[level]);
+ }
+ }
+ }
+ }
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_fence.h b/src/gallium/drivers/cell/ppu/cell_fence.h
new file mode 100644
index 0000000000..536b4ba411
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_fence.h
@@ -0,0 +1,57 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef CELL_FENCE_H
+#define CELL_FENCE_H
+
+
+extern void
+cell_fence_init(struct cell_fence *fence);
+
+
+extern boolean
+cell_fence_signalled(const struct cell_context *cell,
+ const struct cell_fence *fence);
+
+
+extern void
+cell_fence_finish(const struct cell_context *cell,
+ const struct cell_fence *fence);
+
+
+
+extern void
+cell_free_fenced_buffers(struct cell_context *cell,
+ struct cell_buffer_list *list);
+
+
+extern void
+cell_add_fenced_textures(struct cell_context *cell);
+
+
+#endif /* CELL_FENCE_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
index 3dfd5f673d..d4d644d6e8 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -84,6 +84,9 @@ struct codegen
/** Index of execution mask register */
int exec_mask_reg;
+ /** KIL mask: indicates which fragments have been killed */
+ int kill_mask_reg;
+
int frame_size; /**< Stack frame size, in words */
struct spe_function *f;
@@ -346,6 +349,22 @@ store_dest_reg(struct codegen *gen,
int value_reg, int channel,
const struct tgsi_full_dst_register *dest)
{
+ /*
+ * XXX need to implement dst reg clamping/saturation
+ */
+#if 0
+ switch (inst->Instruction.Saturate) {
+ case TGSI_SAT_NONE:
+ break;
+ case TGSI_SAT_ZERO_ONE:
+ break;
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ break;
+ default:
+ assert( 0 );
+ }
+#endif
+
switch (dest->DstRegister.File) {
case TGSI_FILE_TEMPORARY:
if (gen->if_nesting > 0) {
@@ -431,8 +450,21 @@ emit_prologue(struct codegen *gen)
static void
emit_epilogue(struct codegen *gen)
{
+ const int return_reg = 3;
+
spe_comment(gen->f, -4, "Function epilogue:");
+ spe_comment(gen->f, 0, "return the killed mask");
+ if (gen->kill_mask_reg > 0) {
+ /* shader called KIL, return the "alive" mask */
+ spe_move(gen->f, return_reg, gen->kill_mask_reg);
+ }
+ else {
+ /* return {0,0,0,0} */
+ spe_load_uint(gen->f, return_reg, 0);
+ }
+
+ spe_comment(gen->f, 0, "restore stack and return");
if (gen->frame_size >= 512) {
/* offset is too large for ai instruction */
int offset_reg = spe_allocate_available_register(gen->f);
@@ -1337,16 +1369,33 @@ emit_function_call(struct codegen *gen,
static boolean
-emit_TXP(struct codegen *gen, const struct tgsi_full_instruction *inst)
+emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
- const uint addr = lookup_function(gen->cell, "spu_txp");
+ const uint target = inst->InstructionExtTexture.Texture;
const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+ uint addr;
int ch;
int coord_regs[4], d_regs[4];
+ switch (target) {
+ case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_2D:
+ addr = lookup_function(gen->cell, "spu_tex_2d");
+ break;
+ case TGSI_TEXTURE_3D:
+ addr = lookup_function(gen->cell, "spu_tex_3d");
+ break;
+ case TGSI_TEXTURE_CUBE:
+ addr = lookup_function(gen->cell, "spu_tex_cube");
+ break;
+ default:
+ ASSERT(0 && "unsupported texture target");
+ return FALSE;
+ }
+
assert(inst->FullSrcRegisters[1].SrcRegister.File == TGSI_FILE_SAMPLER);
- spe_comment(gen->f, -4, "CALL txp:");
+ spe_comment(gen->f, -4, "CALL tex:");
/* get src/dst reg info */
for (ch = 0; ch < 4; ch++) {
@@ -1368,7 +1417,7 @@ emit_TXP(struct codegen *gen, const struct tgsi_full_instruction *inst)
spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
}
- /* setup function arguments */
+ /* setup function arguments (XXX depends on target) */
for (i = 0; i < 4; i++) {
spe_move(gen->f, 3 + i, coord_regs[i]);
}
@@ -1407,6 +1456,68 @@ emit_TXP(struct codegen *gen, const struct tgsi_full_instruction *inst)
/**
+ * KILL if any of src reg values are less than zero.
+ */
+static boolean
+emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+ int s_regs[4], kil_reg = -1, cmp_reg, zero_reg;
+
+ spe_comment(gen->f, -4, "CALL kil:");
+
+ /* zero = {0,0,0,0} */
+ zero_reg = get_itemp(gen);
+ spe_load_uint(gen->f, zero_reg, 0);
+
+ cmp_reg = get_itemp(gen);
+
+ /* get src regs */
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ s_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ }
+ }
+
+ /* test if any src regs are < 0 */
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ if (kil_reg >= 0) {
+ /* cmp = 0 > src ? : ~0 : 0 */
+ spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]);
+ /* kil = kil | cmp */
+ spe_or(gen->f, kil_reg, kil_reg, cmp_reg);
+ }
+ else {
+ kil_reg = get_itemp(gen);
+ /* kil = 0 > src ? : ~0 : 0 */
+ spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]);
+ }
+ }
+ }
+
+ if (gen->if_nesting) {
+ /* may have been a conditional kil */
+ spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg);
+ }
+
+ /* allocate the kill mask reg if needed */
+ if (gen->kill_mask_reg <= 0) {
+ gen->kill_mask_reg = spe_allocate_available_register(gen->f);
+ spe_move(gen->f, gen->kill_mask_reg, kil_reg);
+ }
+ else {
+ spe_or(gen->f, gen->kill_mask_reg, gen->kill_mask_reg, kil_reg);
+ }
+
+ free_itemps(gen);
+
+ return TRUE;
+}
+
+
+
+/**
* Emit max. See emit_SGT for comments.
*/
static boolean
@@ -1674,8 +1785,12 @@ emit_instruction(struct codegen *gen,
/* fall-through for now */
case TGSI_OPCODE_TXB:
/* fall-through for now */
+ case TGSI_OPCODE_TXL:
+ /* fall-through for now */
case TGSI_OPCODE_TXP:
- return emit_TXP(gen, inst);
+ return emit_TEX(gen, inst);
+ case TGSI_OPCODE_KIL:
+ return emit_KIL(gen, inst);
case TGSI_OPCODE_IF:
return emit_IF(gen, inst);
diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c
index 2e3086c4fa..825110c62b 100644
--- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c
+++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c
@@ -212,17 +212,24 @@ cell_bind_sampler_states(struct pipe_context *pipe,
unsigned num, void **samplers)
{
struct cell_context *cell = cell_context(pipe);
+ uint i, changed = 0x0;
assert(num <= CELL_MAX_SAMPLERS);
draw_flush(cell->draw);
- memcpy(cell->sampler, samplers, num * sizeof(void *));
- memset(&cell->sampler[num], 0, (CELL_MAX_SAMPLERS - num) *
- sizeof(void *));
- cell->num_samplers = num;
+ for (i = 0; i < CELL_MAX_SAMPLERS; i++) {
+ struct pipe_sampler_state *new_samp = i < num ? samplers[i] : NULL;
+ if (cell->sampler[i] != new_samp) {
+ cell->sampler[i] = new_samp;
+ changed |= (1 << i);
+ }
+ }
- cell->dirty |= CELL_NEW_SAMPLER;
+ if (changed) {
+ cell->dirty |= CELL_NEW_SAMPLER;
+ cell->dirty_samplers |= changed;
+ }
}
@@ -240,25 +247,25 @@ cell_set_sampler_textures(struct pipe_context *pipe,
unsigned num, struct pipe_texture **texture)
{
struct cell_context *cell = cell_context(pipe);
- uint i;
+ uint i, changed = 0x0;
assert(num <= CELL_MAX_SAMPLERS);
- /* Check for no-op */
- if (num == cell->num_textures &&
- !memcmp(cell->texture, texture, num * sizeof(struct pipe_texture *)))
- return;
-
- draw_flush(cell->draw);
-
for (i = 0; i < CELL_MAX_SAMPLERS; i++) {
- struct pipe_texture *tex = i < num ? texture[i] : NULL;
-
- pipe_texture_reference((struct pipe_texture **) &cell->texture[i], tex);
+ struct pipe_texture *new_tex = i < num ? texture[i] : NULL;
+ if ((struct pipe_texture *) cell->texture[i] != new_tex) {
+ pipe_texture_reference((struct pipe_texture **) &cell->texture[i],
+ new_tex);
+ changed |= (1 << i);
+ }
}
+
cell->num_textures = num;
- cell->dirty |= CELL_NEW_TEXTURE;
+ if (changed) {
+ cell->dirty |= CELL_NEW_TEXTURE;
+ cell->dirty_textures |= changed;
+ }
}
diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c
index df020c4146..28e5e6d706 100644
--- a/src/gallium/drivers/cell/ppu/cell_spu.c
+++ b/src/gallium/drivers/cell/ppu/cell_spu.c
@@ -53,6 +53,35 @@ struct cell_global_info cell_global;
/**
+ * Scan /proc/cpuinfo to determine the timebase for the system.
+ * This is used by the SPUs to convert 'decrementer' ticks to seconds.
+ * There may be a better way to get this value...
+ */
+static unsigned
+get_timebase(void)
+{
+ FILE *f = fopen("/proc/cpuinfo", "r");
+ unsigned timebase;
+
+ assert(f);
+ while (!feof(f)) {
+ char line[80];
+ fgets(line, sizeof(line), f);
+ if (strncmp(line, "timebase", 8) == 0) {
+ char *colon = strchr(line, ':');
+ if (colon) {
+ timebase = atoi(colon + 2);
+ break;
+ }
+ }
+ }
+ fclose(f);
+
+ return timebase;
+}
+
+
+/**
* Write a 1-word message to the given SPE mailbox.
*/
void
@@ -115,6 +144,7 @@ cell_start_spus(struct cell_context *cell)
{
static boolean one_time_init = FALSE;
uint i, j;
+ uint timebase = get_timebase();
if (one_time_init) {
fprintf(stderr, "PPU: Multiple rendering contexts not yet supported "
@@ -124,10 +154,7 @@ cell_start_spus(struct cell_context *cell)
one_time_init = TRUE;
- assert(cell->num_spus <= MAX_SPUS);
-
- ASSERT_ALIGN16(&cell_global.command[0]);
- ASSERT_ALIGN16(&cell_global.command[1]);
+ assert(cell->num_spus <= CELL_MAX_SPUS);
ASSERT_ALIGN16(&cell_global.inits[0]);
ASSERT_ALIGN16(&cell_global.inits[1]);
@@ -141,7 +168,8 @@ cell_start_spus(struct cell_context *cell)
cell_global.inits[i].id = i;
cell_global.inits[i].num_spus = cell->num_spus;
cell_global.inits[i].debug_flags = cell->debug_flags;
- cell_global.inits[i].cmd = &cell_global.command[i];
+ cell_global.inits[i].inv_timebase = 1000.0f / timebase;
+
for (j = 0; j < CELL_NUM_BUFFERS; j++) {
cell_global.inits[i].buffers[j] = cell->buffer[j];
}
diff --git a/src/gallium/drivers/cell/ppu/cell_spu.h b/src/gallium/drivers/cell/ppu/cell_spu.h
index 137f26612e..b633880c25 100644
--- a/src/gallium/drivers/cell/ppu/cell_spu.h
+++ b/src/gallium/drivers/cell/ppu/cell_spu.h
@@ -31,13 +31,12 @@
#include <libspe2.h>
#include <libmisc.h>
+#include <pthread.h>
#include "cell/common.h"
#include "cell_context.h"
-#define MAX_SPUS 8
-
/**
* Global vars, for now anyway.
*/
@@ -46,14 +45,13 @@ struct cell_global_info
/**
* SPU/SPE handles, etc
*/
- spe_context_ptr_t spe_contexts[MAX_SPUS];
- pthread_t spe_threads[MAX_SPUS];
+ spe_context_ptr_t spe_contexts[CELL_MAX_SPUS];
+ pthread_t spe_threads[CELL_MAX_SPUS];
/**
- * Data sent to SPUs
+ * Data sent to SPUs at start-up
*/
- struct cell_init_info inits[MAX_SPUS];
- struct cell_command command[MAX_SPUS];
+ struct cell_init_info inits[CELL_MAX_SPUS];
};
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c
index bb694aa107..dd2d7f7d1e 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_emit.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c
@@ -52,6 +52,7 @@ lookup_fragment_ops(struct cell_context *cell)
*/
memset(&key, 0, sizeof(key));
key.blend = *cell->blend;
+ key.blend_color = cell->blend_color;
key.dsa = *cell->depth_stencil;
if (cell->framebuffer.cbufs[0])
@@ -146,6 +147,13 @@ cell_emit_state(struct cell_context *cell)
#endif
}
+ if (cell->dirty & (CELL_NEW_RASTERIZER)) {
+ struct cell_command_rasterizer *rast =
+ cell_batch_alloc(cell, sizeof(*rast));
+ rast->opcode = CELL_CMD_STATE_RASTERIZER;
+ rast->rasterizer = *cell->rasterizer;
+ }
+
if (cell->dirty & (CELL_NEW_FS)) {
/* Send new fragment program to SPUs */
struct cell_command_fragment_program *fp
@@ -193,44 +201,50 @@ cell_emit_state(struct cell_context *cell)
if (cell->dirty & CELL_NEW_SAMPLER) {
uint i;
for (i = 0; i < CELL_MAX_SAMPLERS; i++) {
- if (cell->sampler[i]) {
- struct cell_command_sampler *sampler
- = cell_batch_alloc(cell, sizeof(*sampler));
- sampler->opcode = CELL_CMD_STATE_SAMPLER;
- sampler->unit = i;
- sampler->state = *cell->sampler[i];
+ if (cell->dirty_samplers & (1 << i)) {
+ if (cell->sampler[i]) {
+ struct cell_command_sampler *sampler
+ = cell_batch_alloc(cell, sizeof(*sampler));
+ sampler->opcode = CELL_CMD_STATE_SAMPLER;
+ sampler->unit = i;
+ sampler->state = *cell->sampler[i];
+ }
}
}
+ cell->dirty_samplers = 0x0;
}
if (cell->dirty & CELL_NEW_TEXTURE) {
uint i;
for (i = 0;i < CELL_MAX_SAMPLERS; i++) {
- struct cell_command_texture *texture
- = cell_batch_alloc(cell, sizeof(*texture));
- texture->opcode = CELL_CMD_STATE_TEXTURE;
- texture->unit = i;
- if (cell->texture[i]) {
- uint level;
- for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) {
- texture->start[level] = cell->texture[i]->tiled_data[level];
- texture->width[level] = cell->texture[i]->base.width[level];
- texture->height[level] = cell->texture[i]->base.height[level];
- texture->depth[level] = cell->texture[i]->base.depth[level];
+ if (cell->dirty_textures & (1 << i)) {
+ struct cell_command_texture *texture
+ = cell_batch_alloc(cell, sizeof(*texture));
+ texture->opcode = CELL_CMD_STATE_TEXTURE;
+ texture->unit = i;
+ if (cell->texture[i]) {
+ uint level;
+ for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) {
+ texture->start[level] = cell->texture[i]->tiled_mapped[level];
+ texture->width[level] = cell->texture[i]->base.width[level];
+ texture->height[level] = cell->texture[i]->base.height[level];
+ texture->depth[level] = cell->texture[i]->base.depth[level];
+ }
+ texture->target = cell->texture[i]->base.target;
}
- texture->target = cell->texture[i]->base.target;
- }
- else {
- uint level;
- for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) {
- texture->start[level] = NULL;
- texture->width[level] = 0;
- texture->height[level] = 0;
- texture->depth[level] = 0;
+ else {
+ uint level;
+ for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) {
+ texture->start[level] = NULL;
+ texture->width[level] = 0;
+ texture->height[level] = 0;
+ texture->depth[level] = 0;
+ }
+ texture->target = 0;
}
- texture->target = 0;
}
}
+ cell->dirty_textures = 0x0;
}
if (cell->dirty & CELL_NEW_VERTEX_INFO) {
diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c
index 54a17eaf2b..cda39f8d59 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_shader.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c
@@ -191,6 +191,8 @@ cell_set_constant_buffer(struct pipe_context *pipe,
assert(shader < PIPE_SHADER_TYPES);
assert(index == 0);
+ draw_flush(cell->draw);
+
/* note: reference counting */
winsys_buffer_reference(ws,
&cell->constants[shader].buffer,
diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c
index 230e192573..9ac2f3bbb9 100644
--- a/src/gallium/drivers/cell/ppu/cell_texture.c
+++ b/src/gallium/drivers/cell/ppu/cell_texture.c
@@ -136,6 +136,9 @@ cell_texture_release(struct pipe_screen *screen,
__FUNCTION__, (void *) *pt, (*pt)->refcount - 1);
*/
if (--(*pt)->refcount <= 0) {
+ /* Delete this texture now.
+ * But note that the underlying pipe_buffer may linger...
+ */
struct cell_texture *ct = cell_texture(*pt);
uint i;
@@ -146,8 +149,12 @@ cell_texture_release(struct pipe_screen *screen,
pipe_buffer_reference(screen, &ct->buffer, NULL);
for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) {
- if (ct->tiled_data[i]) {
- align_free(ct->tiled_data[i]);
+ /* Unreference the tiled image buffer.
+ * It may not actually be deleted until a fence is hit.
+ */
+ if (ct->tiled_buffer[i]) {
+ ct->tiled_mapped[i] = NULL;
+ winsys_buffer_reference(screen->winsys, &ct->tiled_buffer[i], NULL);
}
}
@@ -228,12 +235,18 @@ cell_twiddle_texture(struct pipe_screen *screen,
int offset = bufWidth * bufHeight * 4 * surface->face;
uint *dst;
- if (!ct->tiled_data[level]) {
- ct->tiled_data[level] =
- align_malloc(bufWidth * bufHeight * 4 * numFaces, 16);
+ if (!ct->tiled_buffer[level]) {
+ /* allocate buffer for tiled data now */
+ struct pipe_winsys *ws = screen->winsys;
+ uint bytes = bufWidth * bufHeight * 4 * numFaces;
+ ct->tiled_buffer[level] = ws->buffer_create(ws, 16,
+ PIPE_BUFFER_USAGE_PIXEL,
+ bytes);
+ /* and map it */
+ ct->tiled_mapped[level] = ws->buffer_map(ws, ct->tiled_buffer[level],
+ PIPE_BUFFER_USAGE_GPU_READ);
}
-
- dst = (uint *) ((ubyte *) ct->tiled_data[level] + offset);
+ dst = (uint *) ((ubyte *) ct->tiled_mapped[level] + offset);
twiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst,
surface->stride, src);
diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h
index a0757091b0..2f5fe0dd1b 100644
--- a/src/gallium/drivers/cell/ppu/cell_texture.h
+++ b/src/gallium/drivers/cell/ppu/cell_texture.h
@@ -48,7 +48,10 @@ struct cell_texture
struct pipe_buffer *buffer;
unsigned long buffer_size;
- void *tiled_data[CELL_MAX_TEXTURE_LEVELS]; /* XXX this may be temporary */ /*ALIGN16*/
+ /** Texture data in tiled layout is held here */
+ struct pipe_buffer *tiled_buffer[CELL_MAX_TEXTURE_LEVELS];
+ /** Mapped, tiled texture data */
+ void *tiled_mapped[CELL_MAX_TEXTURE_LEVELS];
};
diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c
index 578ddf62dc..65ba51b6bb 100644
--- a/src/gallium/drivers/cell/ppu/cell_vbuf.c
+++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c
@@ -38,6 +38,7 @@
#include "cell_batch.h"
#include "cell_context.h"
+#include "cell_fence.h"
#include "cell_flush.h"
#include "cell_spu.h"
#include "cell_vbuf.h"
@@ -108,6 +109,11 @@ cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices,
__FUNCTION__, cvbr->vertex_buf, vertices_used);
*/
+ /* Make sure texture buffers aren't released until we're done rendering
+ * with them.
+ */
+ cell_add_fenced_textures(cell);
+
/* Tell SPUs they can release the vert buf */
if (cvbr->vertex_buf != ~0U) {
struct cell_command_release_verts *release
@@ -214,7 +220,6 @@ cell_vbuf_draw(struct vbuf_render *vbr,
render->opcode = CELL_CMD_RENDER;
render->prim_type = cvbr->prim;
- render->front_winding = cell->rasterizer->front_winding;
render->num_indexes = nr_indices;
render->min_index = min_index;