summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/cell/spu
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/cell/spu')
-rw-r--r--src/gallium/drivers/cell/spu/Makefile2
-rw-r--r--src/gallium/drivers/cell/spu/spu_exec.c151
-rw-r--r--src/gallium/drivers/cell/spu/spu_exec.h2
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.c138
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.h8
-rw-r--r--src/gallium/drivers/cell/spu/spu_per_fragment_op.c4
-rw-r--r--src/gallium/drivers/cell/spu/spu_render.c2
-rw-r--r--src/gallium/drivers/cell/spu/spu_tile.c6
-rw-r--r--src/gallium/drivers/cell/spu/spu_tri.c2
-rw-r--r--src/gallium/drivers/cell/spu/spu_util.c24
-rw-r--r--src/gallium/drivers/cell/spu/spu_vertex_fetch.c3
-rw-r--r--src/gallium/drivers/cell/spu/spu_vertex_shader.c29
12 files changed, 215 insertions, 156 deletions
diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile
index 8e83610790..d49abb2e82 100644
--- a/src/gallium/drivers/cell/spu/Makefile
+++ b/src/gallium/drivers/cell/spu/Makefile
@@ -5,7 +5,7 @@
TOP = ../../../../..
-include $(TOP)/configs/linux-cell
+include $(TOP)/configs/current
PROG = g3d
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c
index 42e5022f30..e27df2dfb3 100644
--- a/src/gallium/drivers/cell/spu/spu_exec.c
+++ b/src/gallium/drivers/cell/spu/spu_exec.c
@@ -63,7 +63,6 @@
#include "pipe/p_compiler.h"
#include "pipe/p_state.h"
-#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
@@ -383,10 +382,10 @@ fetch_src_file_channel(
break;
case TGSI_FILE_IMMEDIATE:
- assert( index->i[0] < (int) mach->ImmLimit );
- assert( index->i[1] < (int) mach->ImmLimit );
- assert( index->i[2] < (int) mach->ImmLimit );
- assert( index->i[3] < (int) mach->ImmLimit );
+ ASSERT( index->i[0] < (int) mach->ImmLimit );
+ ASSERT( index->i[1] < (int) mach->ImmLimit );
+ ASSERT( index->i[2] < (int) mach->ImmLimit );
+ ASSERT( index->i[3] < (int) mach->ImmLimit );
chan->f[0] = mach->Imms[index->i[0]][swizzle];
chan->f[1] = mach->Imms[index->i[1]][swizzle];
@@ -410,7 +409,7 @@ fetch_src_file_channel(
break;
default:
- assert( 0 );
+ ASSERT( 0 );
}
break;
@@ -423,7 +422,7 @@ fetch_src_file_channel(
break;
default:
- assert( 0 );
+ ASSERT( 0 );
}
}
@@ -472,7 +471,7 @@ fetch_source(
index.q = si_shli(index.q, 12);
break;
default:
- assert( 0 );
+ ASSERT( 0 );
}
index.i[0] += reg->SrcRegisterDim.Index;
@@ -559,7 +558,7 @@ store_dest(
break;
default:
- assert( 0 );
+ ASSERT( 0 );
return;
}
@@ -583,11 +582,11 @@ store_dest(
break;
case TGSI_SAT_MINUS_PLUS_ONE:
- assert( 0 );
+ ASSERT( 0 );
break;
default:
- assert( 0 );
+ ASSERT( 0 );
}
}
@@ -770,7 +769,7 @@ exec_tex(struct spu_exec_machine *mach,
break;
default:
- assert (0);
+ ASSERT (0);
}
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
@@ -862,7 +861,7 @@ exec_declaration(struct spu_exec_machine *mach,
break;
default:
- assert( 0 );
+ ASSERT( 0 );
}
if( mask == TGSI_WRITEMASK_XYZW ) {
@@ -972,11 +971,11 @@ exec_instruction(
break;
case TGSI_OPCODE_EXP:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_LOG:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_MUL:
@@ -1152,24 +1151,24 @@ exec_instruction(
break;
case TGSI_OPCODE_CND:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_CND0:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_DOT2ADD:
/* TGSI_OPCODE_DP2A */
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_INDEX:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_NEGATE:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_FRAC:
@@ -1182,7 +1181,7 @@ exec_instruction(
break;
case TGSI_OPCODE_CLAMP:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_FLOOR:
@@ -1277,7 +1276,7 @@ exec_instruction(
break;
case TGSI_OPCODE_MULTIPLYMATRIX:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_ABS:
@@ -1291,7 +1290,7 @@ exec_instruction(
break;
case TGSI_OPCODE_RCC:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_DPH:
@@ -1354,23 +1353,23 @@ exec_instruction(
break;
case TGSI_OPCODE_PK2H:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_PK2US:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_PK4B:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_PK4UB:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_RFL:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_SEQ:
@@ -1385,7 +1384,7 @@ exec_instruction(
break;
case TGSI_OPCODE_SFL:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_SGT:
@@ -1430,7 +1429,7 @@ exec_instruction(
break;
case TGSI_OPCODE_STR:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_TEX:
@@ -1453,7 +1452,7 @@ exec_instruction(
/* src[1] = d[strq]/dx */
/* src[2] = d[strq]/dy */
/* src[3] = sampler unit */
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_TXL:
@@ -1471,35 +1470,35 @@ exec_instruction(
break;
case TGSI_OPCODE_UP2H:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_UP2US:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_UP4B:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_UP4UB:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_X2D:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_ARA:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_ARR:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_BRA:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_CAL:
@@ -1508,14 +1507,14 @@ exec_instruction(
/* do the call */
/* push the Cond, Loop, Cont stacks */
- assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
+ ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
mach->CondStack[mach->CondStackTop++] = mach->CondMask;
- assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
- assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
mach->ContStack[mach->ContStackTop++] = mach->ContMask;
- assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
+ ASSERT(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
/* note that PC was already incremented above */
@@ -1539,13 +1538,13 @@ exec_instruction(
*pc = mach->CallStack[--mach->CallStackTop];
/* pop the Cond, Loop, Cont stacks */
- assert(mach->CondStackTop > 0);
+ ASSERT(mach->CondStackTop > 0);
mach->CondMask = mach->CondStack[--mach->CondStackTop];
- assert(mach->LoopStackTop > 0);
+ ASSERT(mach->LoopStackTop > 0);
mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
- assert(mach->ContStackTop > 0);
+ ASSERT(mach->ContStackTop > 0);
mach->ContMask = mach->ContStack[--mach->ContStackTop];
- assert(mach->FuncStackTop > 0);
+ ASSERT(mach->FuncStackTop > 0);
mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
UPDATE_EXEC_MASK(mach);
@@ -1553,7 +1552,7 @@ exec_instruction(
break;
case TGSI_OPCODE_SSG:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_CMP:
@@ -1593,11 +1592,11 @@ exec_instruction(
break;
case TGSI_OPCODE_NRM:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_DIV:
- assert( 0 );
+ ASSERT( 0 );
break;
case TGSI_OPCODE_DP2:
@@ -1616,7 +1615,7 @@ exec_instruction(
case TGSI_OPCODE_IF:
/* push CondMask */
- assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
+ ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
mach->CondStack[mach->CondStackTop++] = mach->CondMask;
FETCH( &r[0], 0, CHAN_X );
/* update CondMask */
@@ -1640,7 +1639,7 @@ exec_instruction(
/* invert CondMask wrt previous mask */
{
uint prevMask;
- assert(mach->CondStackTop > 0);
+ ASSERT(mach->CondStackTop > 0);
prevMask = mach->CondStack[mach->CondStackTop - 1];
mach->CondMask = ~mach->CondMask & prevMask;
UPDATE_EXEC_MASK(mach);
@@ -1650,7 +1649,7 @@ exec_instruction(
case TGSI_OPCODE_ENDIF:
/* pop CondMask */
- assert(mach->CondStackTop > 0);
+ ASSERT(mach->CondStackTop > 0);
mach->CondMask = mach->CondStack[--mach->CondStackTop];
UPDATE_EXEC_MASK(mach);
break;
@@ -1661,19 +1660,19 @@ exec_instruction(
break;
case TGSI_OPCODE_REP:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_ENDREP:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_PUSHA:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_POPA:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_CEIL:
@@ -1747,7 +1746,7 @@ exec_instruction(
break;
case TGSI_OPCODE_MOD:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_XOR:
@@ -1760,15 +1759,15 @@ exec_instruction(
break;
case TGSI_OPCODE_SAD:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_TXF:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_TXQ:
- assert (0);
+ ASSERT (0);
break;
case TGSI_OPCODE_EMIT:
@@ -1785,9 +1784,9 @@ exec_instruction(
/* fall-through (for now) */
case TGSI_OPCODE_BGNLOOP2:
/* push LoopMask and ContMasks */
- assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
- assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
mach->ContStack[mach->ContStackTop++] = mach->ContMask;
break;
@@ -1795,7 +1794,7 @@ exec_instruction(
/* fall-through (for now at least) */
case TGSI_OPCODE_ENDLOOP2:
/* Restore ContMask, but don't pop */
- assert(mach->ContStackTop > 0);
+ ASSERT(mach->ContStackTop > 0);
mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
if (mach->LoopMask) {
/* repeat loop: jump to instruction just past BGNLOOP */
@@ -1803,10 +1802,10 @@ exec_instruction(
}
else {
/* exit loop: pop LoopMask */
- assert(mach->LoopStackTop > 0);
+ ASSERT(mach->LoopStackTop > 0);
mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
/* pop ContMask */
- assert(mach->ContStackTop > 0);
+ ASSERT(mach->ContStackTop > 0);
mach->ContMask = mach->ContStack[--mach->ContStackTop];
}
UPDATE_EXEC_MASK(mach);
@@ -1835,26 +1834,26 @@ exec_instruction(
break;
case TGSI_OPCODE_NOISE1:
- assert( 0 );
+ ASSERT( 0 );
break;
case TGSI_OPCODE_NOISE2:
- assert( 0 );
+ ASSERT( 0 );
break;
case TGSI_OPCODE_NOISE3:
- assert( 0 );
+ ASSERT( 0 );
break;
case TGSI_OPCODE_NOISE4:
- assert( 0 );
+ ASSERT( 0 );
break;
case TGSI_OPCODE_NOP:
break;
default:
- assert( 0 );
+ ASSERT( 0 );
}
}
@@ -1875,11 +1874,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach )
mach->FuncMask = 0xf;
mach->ExecMask = 0xf;
- mach->CondStackTop = 0; /* temporarily subvert this assertion */
- assert(mach->CondStackTop == 0);
- assert(mach->LoopStackTop == 0);
- assert(mach->ContStackTop == 0);
- assert(mach->CallStackTop == 0);
+ mach->CondStackTop = 0; /* temporarily subvert this ASSERTion */
+ ASSERT(mach->CondStackTop == 0);
+ ASSERT(mach->LoopStackTop == 0);
+ ASSERT(mach->ContStackTop == 0);
+ ASSERT(mach->CallStackTop == 0);
mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h
index c68f78f59b..8605679940 100644
--- a/src/gallium/drivers/cell/spu/spu_exec.h
+++ b/src/gallium/drivers/cell/spu/spu_exec.h
@@ -99,7 +99,7 @@ struct spu_exec_machine
* 1 address
*/
struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS
- + TGSI_EXEC_NUM_ADDRS + 1]
+ + TGSI_EXEC_NUM_TEMP_EXTRAS + 1]
ALIGN16_ATTRIB;
struct spu_exec_vector *Addrs;
diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c
index e04ffeb9b1..d223f32d94 100644
--- a/src/gallium/drivers/cell/spu/spu_main.c
+++ b/src/gallium/drivers/cell/spu/spu_main.c
@@ -55,6 +55,10 @@ struct spu_global spu;
struct spu_vs_context draw;
+
+/**
+ * Buffers containing dynamically generated SPU code:
+ */
static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]
ALIGN16_ATTRIB;
@@ -136,54 +140,75 @@ really_clear_tiles(uint surfaceIndex)
static void
cmd_clear_surface(const struct cell_command_clear_surface *clear)
{
- const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
- uint i;
-
if (Debug)
printf("SPU %u: CLEAR SURF %u to 0x%08x\n", spu.init.id,
clear->surface, clear->value);
-#define CLEAR_OPT 1
-#if CLEAR_OPT
- /* set all tile's status to CLEAR */
if (clear->surface == 0) {
- memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status));
spu.fb.color_clear_value = clear->value;
+ if (spu.init.debug_flags & CELL_DEBUG_CHECKER) {
+ uint x = (spu.init.id << 4) | (spu.init.id << 12) |
+ (spu.init.id << 20) | (spu.init.id << 28);
+ spu.fb.color_clear_value ^= x;
+ }
}
else {
- memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status));
spu.fb.depth_clear_value = clear->value;
}
- return;
-#endif
+#define CLEAR_OPT 1
+#if CLEAR_OPT
+
+ /* Simply set all tiles' status to CLEAR.
+ * When we actually begin rendering into a tile, we'll initialize it to
+ * the clear value. If any tiles go untouched during the frame,
+ * really_clear_tiles() will set them to the clear value.
+ */
if (clear->surface == 0) {
- spu.fb.color_clear_value = clear->value;
- clear_c_tile(&spu.ctile);
+ memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status));
}
else {
- spu.fb.depth_clear_value = clear->value;
- clear_z_tile(&spu.ztile);
+ memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status));
}
+#else
+
+ /*
+ * This path clears the whole framebuffer to the clear color right now.
+ */
+
/*
printf("SPU: %s num=%d w=%d h=%d\n",
__FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles);
*/
- for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
- uint tx = i % spu.fb.width_tiles;
- uint ty = i / spu.fb.width_tiles;
- if (clear->surface == 0)
- put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
- else
- put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1);
- /* XXX we don't want this here, but it fixes bad tile results */
+ /* init a single tile to the clear value */
+ if (clear->surface == 0) {
+ clear_c_tile(&spu.ctile);
+ }
+ else {
+ clear_z_tile(&spu.ztile);
}
-#if 0
- wait_on_mask(1 << TAG_SURFACE_CLEAR);
-#endif
+ /* walk over my tiles, writing the 'clear' tile's data */
+ {
+ const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
+ uint i;
+ for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
+ uint tx = i % spu.fb.width_tiles;
+ uint ty = i / spu.fb.width_tiles;
+ if (clear->surface == 0)
+ put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
+ else
+ put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1);
+ }
+ }
+
+ if (spu.init.debug_flags & CELL_DEBUG_SYNC) {
+ wait_on_mask(1 << TAG_SURFACE_CLEAR);
+ }
+
+#endif /* CLEAR_OPT */
if (Debug)
printf("SPU %u: CLEAR SURF done\n", spu.init.id);
@@ -312,6 +337,21 @@ cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test *stat
static void
+cmd_state_logicop(const struct cell_command_logicop * code)
+{
+ mfc_get(logicop_code_buffer,
+ (unsigned int) code->base, /* src */
+ code->size,
+ TAG_BATCH_BUFFER,
+ 0, /* tid */
+ 0 /* rid */);
+ wait_on_mask(1 << TAG_BATCH_BUFFER);
+
+ spu.logicop = (logicop_func) logicop_code_buffer;
+}
+
+
+static void
cmd_state_sampler(const struct cell_command_sampler *sampler)
{
if (Debug)
@@ -381,6 +421,21 @@ cmd_state_vs_array_info(const struct cell_array_info *vs_info)
static void
+cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code)
+{
+ mfc_get(attribute_fetch_code_buffer,
+ (unsigned int) code->base, /* src */
+ code->size,
+ TAG_BATCH_BUFFER,
+ 0, /* tid */
+ 0 /* rid */);
+ wait_on_mask(1 << TAG_BATCH_BUFFER);
+
+ draw.vertex_fetch.code = attribute_fetch_code_buffer;
+}
+
+
+static void
cmd_finish(void)
{
if (Debug)
@@ -518,38 +573,15 @@ cmd_batch(uint opcode)
(struct cell_shader_info *) &buffer[pos+1]);
pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8);
break;
- case CELL_CMD_STATE_ATTRIB_FETCH: {
- struct cell_attribute_fetch_code *code =
- (struct cell_attribute_fetch_code *) &buffer[pos+1];
-
- mfc_get(attribute_fetch_code_buffer,
- (unsigned int) code->base, /* src */
- code->size,
- TAG_BATCH_BUFFER,
- 0, /* tid */
- 0 /* rid */);
- wait_on_mask(1 << TAG_BATCH_BUFFER);
-
- draw.vertex_fetch.code = attribute_fetch_code_buffer;
+ case CELL_CMD_STATE_ATTRIB_FETCH:
+ cmd_state_attrib_fetch((struct cell_attribute_fetch_code *)
+ &buffer[pos+1]);
pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8);
break;
- }
- case CELL_CMD_STATE_LOGICOP: {
- struct cell_command_logicop *code =
- (struct cell_command_logicop *) &buffer[pos+1];
-
- mfc_get(logicop_code_buffer,
- (unsigned int) code->base, /* src */
- code->size,
- TAG_BATCH_BUFFER,
- 0, /* tid */
- 0 /* rid */);
- wait_on_mask(1 << TAG_BATCH_BUFFER);
-
- spu.logicop = (logicop_func) logicop_code_buffer;
+ case CELL_CMD_STATE_LOGICOP:
+ cmd_state_logicop((struct cell_command_logicop *) &buffer[pos+1]);
pos += (1 + ROUNDUP8(sizeof(struct cell_command_logicop)) / 8);
break;
- }
case CELL_CMD_FLUSH_BUFFER_RANGE: {
struct cell_buffer_range *br = (struct cell_buffer_range *)
&buffer[pos+1];
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h
index e962e1426c..4879f8c9c8 100644
--- a/src/gallium/drivers/cell/spu/spu_main.h
+++ b/src/gallium/drivers/cell/spu/spu_main.h
@@ -124,13 +124,13 @@ struct spu_global
struct spu_framebuffer fb;
boolean read_depth;
boolean read_stencil;
- frag_test_func frag_test;
+ frag_test_func frag_test; /**< Current depth/stencil test code */
- boolean read_fb;
- blend_func blend;
+ boolean read_fb; /**< Does current blend mode require framebuffer read? */
+ blend_func blend; /**< Current blend code */
qword const_blend_color[4] ALIGN16_ATTRIB;
- logicop_func logicop;
+ logicop_func logicop; /**< Current logicop code **/
struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS];
struct spu_texture texture[PIPE_MAX_SAMPLERS];
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
index b4cffeeb32..c0a729b3d2 100644
--- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
+++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
@@ -95,7 +95,7 @@ read_ds_quad(tile_t *buffer, unsigned x, unsigned y,
default:
- assert(0);
+ ASSERT(0);
break;
}
}
@@ -153,7 +153,7 @@ write_ds_quad(tile_t *buffer, unsigned x, unsigned y,
default:
- assert(0);
+ ASSERT(0);
break;
}
}
diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c
index 6df59abd36..305dc98881 100644
--- a/src/gallium/drivers/cell/spu/spu_render.c
+++ b/src/gallium/drivers/cell/spu/spu_render.c
@@ -35,7 +35,7 @@
#include "spu_tri.h"
#include "spu_tile.h"
#include "cell/common.h"
-
+#include "util/u_memory.h"
/**
diff --git a/src/gallium/drivers/cell/spu/spu_tile.c b/src/gallium/drivers/cell/spu/spu_tile.c
index 12dc246328..216a33126b 100644
--- a/src/gallium/drivers/cell/spu/spu_tile.c
+++ b/src/gallium/drivers/cell/spu/spu_tile.c
@@ -31,6 +31,9 @@
#include "spu_main.h"
+/**
+ * Get tile of color or Z values from main memory, put into SPU memory.
+ */
void
get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf)
{
@@ -56,6 +59,9 @@ get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf)
}
+/**
+ * Move tile of color or Z values from SPU memory to main memory.
+ */
void
put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf)
{
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c
index ab4ff8160a..2a4e0b423c 100644
--- a/src/gallium/drivers/cell/spu/spu_tri.c
+++ b/src/gallium/drivers/cell/spu/spu_tri.c
@@ -32,7 +32,7 @@
#include <transpose_matrix4x4.h>
#include "pipe/p_compiler.h"
#include "pipe/p_format.h"
-#include "pipe/p_util.h"
+#include "util/u_math.h"
#include "spu_colorpack.h"
#include "spu_main.h"
#include "spu_texture.h"
diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c
index 74ab2bbd1f..b8a0d4a265 100644
--- a/src/gallium/drivers/cell/spu/spu_util.c
+++ b/src/gallium/drivers/cell/spu/spu_util.c
@@ -1,5 +1,7 @@
-#include "pipe/p_util.h"
+
+#include "cell/common.h"
#include "pipe/p_shader_tokens.h"
+#include "pipe/p_debug.h"
#include "tgsi/tgsi_parse.h"
//#include "tgsi_build.h"
#include "tgsi/tgsi_util.h"
@@ -19,7 +21,7 @@ tgsi_util_get_src_register_swizzle(
case 3:
return reg->SwizzleW;
default:
- assert( 0 );
+ ASSERT( 0 );
}
return 0;
}
@@ -39,7 +41,7 @@ tgsi_util_get_src_register_extswizzle(
case 3:
return reg->ExtSwizzleW;
default:
- assert( 0 );
+ ASSERT( 0 );
}
return 0;
}
@@ -59,12 +61,12 @@ tgsi_util_get_full_src_register_extswizzle(
&reg->SrcRegisterExtSwz,
component );
- assert (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X);
- assert (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y);
- assert (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z);
- assert (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W);
- assert (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W);
- assert (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W);
+ ASSERT (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X);
+ ASSERT (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y);
+ ASSERT (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z);
+ ASSERT (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W);
+ ASSERT (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W);
+ ASSERT (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W);
/*
* Second, calculate the simple swizzle for the unswizzled channel index.
@@ -94,7 +96,7 @@ tgsi_util_get_src_register_extnegate(
case 3:
return reg->NegateW;
default:
- assert( 0 );
+ ASSERT( 0 );
}
return 0;
}
@@ -119,7 +121,7 @@ tgsi_util_set_src_register_extnegate(
reg->NegateW = negate;
break;
default:
- assert( 0 );
+ ASSERT( 0 );
}
}
diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
index 219fd90cc0..03375d84a5 100644
--- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
+++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
@@ -32,7 +32,6 @@
* Ian Romanick <idr@us.ibm.com>
*/
-#include "pipe/p_util.h"
#include "pipe/p_state.h"
#include "pipe/p_shader_tokens.h"
#include "spu_exec.h"
@@ -93,7 +92,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw,
unsigned nr_attrs = draw->vertex_fetch.nr_attrs;
unsigned attr;
- assert(count <= 4);
+ ASSERT(count <= 4);
#if DRAW_DBG
printf("SPU: %s count = %u, nr_attrs = %u\n",
diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c
index 3119a78c06..fbe5b34d39 100644
--- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c
+++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c
@@ -34,16 +34,37 @@
#include <spu_mfcio.h>
-#include "pipe/p_util.h"
#include "pipe/p_state.h"
#include "pipe/p_shader_tokens.h"
-#include "spu_vertex_shader.h"
-#include "spu_exec.h"
+#include "util/u_math.h"
#include "draw/draw_private.h"
#include "draw/draw_context.h"
#include "cell/common.h"
+#include "spu_vertex_shader.h"
+#include "spu_exec.h"
#include "spu_main.h"
+
+#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float))
+
+
+#define CLIP_RIGHT_BIT 0x01
+#define CLIP_LEFT_BIT 0x02
+#define CLIP_TOP_BIT 0x04
+#define CLIP_BOTTOM_BIT 0x08
+#define CLIP_FAR_BIT 0x10
+#define CLIP_NEAR_BIT 0x20
+
+
+static INLINE float
+dot4(const float *a, const float *b)
+{
+ return (a[0]*b[0] +
+ a[1]*b[1] +
+ a[2]*b[2] +
+ a[3]*b[3]);
+}
+
static INLINE unsigned
compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr)
{
@@ -91,7 +112,7 @@ run_vertex_program(struct spu_vs_context *draw,
const float *scale = draw->viewport.scale;
const float *trans = draw->viewport.translate;
- assert(count <= 4);
+ ASSERT(count <= 4);
machine->Processor = TGSI_PROCESSOR_VERTEX;