summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/cell/common.h11
-rw-r--r--src/gallium/drivers/cell/ppu/cell_context.c2
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fragment.c272
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_emit.c5
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.c115
5 files changed, 337 insertions, 68 deletions
diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h
index 8f08854117..f0ff96eb47 100644
--- a/src/gallium/drivers/cell/common.h
+++ b/src/gallium/drivers/cell/common.h
@@ -104,12 +104,11 @@
#define CELL_BUFFER_STATUS_FREE 10
#define CELL_BUFFER_STATUS_USED 20
-
-#define CELL_DEBUG_CHECKER (1 << 0)
-#define CELL_DEBUG_ASM (1 << 1)
-#define CELL_DEBUG_SYNC (1 << 2)
-
-
+#define CELL_DEBUG_CHECKER (1 << 0)
+#define CELL_DEBUG_ASM (1 << 1)
+#define CELL_DEBUG_SYNC (1 << 2)
+#define CELL_DEBUG_FRAGMENT_OPS (1 << 3)
+#define CELL_DEBUG_FRAGMENT_OP_FALLBACK (1 << 4)
/** Max instructions for doing per-fragment operations */
#define SPU_MAX_FRAGMENT_OPS_INSTS 64
diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c
index b418271dca..62e213ea35 100644
--- a/src/gallium/drivers/cell/ppu/cell_context.c
+++ b/src/gallium/drivers/cell/ppu/cell_context.c
@@ -89,6 +89,8 @@ static const struct debug_named_value cell_debug_flags[] = {
{"checker", CELL_DEBUG_CHECKER},/**< modulate tile clear color by SPU ID */
{"asm", CELL_DEBUG_ASM}, /**< dump SPU asm code */
{"sync", CELL_DEBUG_SYNC}, /**< SPUs do synchronous DMA */
+ {"fragops", CELL_DEBUG_FRAGMENT_OPS}, /**< SPUs emit fragment ops debug messages*/
+ {"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK}, /**< SPUs use reference implementation for fragment ops*/
{NULL, 0}
};
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
index 06219d4e98..2c8c9e0d2c 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
@@ -229,7 +229,36 @@ gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa,
spe_release_register(f, amask_reg);
}
+/* This is a convenient and oft-used sequence. It chooses
+ * the smaller of each element of reg1 and reg2, and combines them
+ * into the result register, as follows:
+ *
+ * The Float Compare Greater Than (fcgt) instruction will put
+ * 1s into compare_reg where reg1 > reg2, and 0s where reg1 <= reg2.
+ *
+ * Then the Select Bits (selb) instruction will take bits from
+ * reg1 where compare_reg is 0, and from reg2 where compare_reg is
+ * 1. Ergo, result_reg will have the bits from reg1 where reg1 <= reg2,
+ * and the bits from reg2 where reg1 > reg2, which is exactly the
+ * MIN operation.
+ */
+#define FLOAT_VECTOR_MIN(f, result_reg, reg1, reg2) {\
+ int compare_reg = spe_allocate_available_register(f); \
+ spe_fcgt(f, compare_reg, reg1, reg2); \
+ spe_selb(f, result_reg, reg1, reg2, compare_reg); \
+ spe_release_register(f, compare_reg); \
+}
+/* The FLOAT_VECTOR_MAX sequence is similar to the FLOAT_VECTOR_MIN
+ * sequence above, except that the registers specified when selecting
+ * bits are reversed.
+ */
+#define FLOAT_VECTOR_MAX(f, result_reg, reg1, reg2) {\
+ int compare_reg = spe_allocate_available_register(f); \
+ spe_fcgt(f, compare_reg, reg1, reg2); \
+ spe_selb(f, result_reg, reg2, reg1, compare_reg); \
+ spe_release_register(f, compare_reg); \
+}
/**
* Generate SPE code to implement the given blend mode for a quad of pixels.
@@ -242,6 +271,7 @@ gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa,
*/
static void
gen_blend(const struct pipe_blend_state *blend,
+ const struct pipe_blend_color *blend_color,
struct spe_function *f,
enum pipe_format color_format,
int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg,
@@ -262,10 +292,53 @@ gen_blend(const struct pipe_blend_state *blend,
int fbB_reg = spe_allocate_available_register(f);
int fbA_reg = spe_allocate_available_register(f);
- int one_reg = spe_allocate_available_register(f);
int tmp_reg = spe_allocate_available_register(f);
- boolean one_reg_set = false; /* avoid setting one_reg more than once */
+ /* These values might or might not eventually get put into
+ * registers. We avoid allocating them and setting them until
+ * they're actually needed; then we avoid setting them more than
+ * once, and release them at the end of code generation.
+ */
+ boolean one_reg_set = false;
+ int one_reg;
+#define SET_ONE_REG_IF_UNSET(f) if (!one_reg_set) {\
+ one_reg = spe_allocate_available_register(f); \
+ spe_load_float(f, one_reg, 1.0f); \
+ one_reg_set = true; \
+}
+#define RELEASE_ONE_REG_IF_USED(f) if (one_reg_set) {\
+ spe_release_register(f, one_reg); \
+}
+
+ boolean const_color_set = false;
+ int constR_reg, constG_reg, constB_reg;
+#define SET_CONST_COLOR_IF_UNSET(f, blend_color) if (!const_color_set) {\
+ constR_reg = spe_allocate_available_register(f); \
+ constG_reg = spe_allocate_available_register(f); \
+ constG_reg = spe_allocate_available_register(f); \
+ spe_load_float(f, constR_reg, blend_color->color[0]); \
+ spe_load_float(f, constG_reg, blend_color->color[1]); \
+ spe_load_float(f, constB_reg, blend_color->color[2]); \
+ const_color_set = true;\
+}
+#define RELEASE_CONST_COLOR_IF_USED(f) if (const_color_set) {\
+ spe_release_register(f, constR_reg); \
+ spe_release_register(f, constG_reg); \
+ spe_release_register(f, constB_reg); \
+}
+
+ boolean const_alpha_set = false;
+ int constA_reg;
+#define SET_CONST_ALPHA_IF_UNSET(f, blend_color) if (!const_alpha_set) {\
+ constA_reg = spe_allocate_available_register(f); \
+ spe_load_float(f, constA_reg, blend_color->color[3]); \
+ const_alpha_set = true; \
+}
+#define RELEASE_CONST_ALPHA_IF_USED(f) if (const_alpha_set) {\
+ spe_release_register(f, constA_reg); \
+}
+
+ /* Real code starts here */
ASSERT(blend->blend_enable);
@@ -348,30 +421,161 @@ gen_blend(const struct pipe_blend_state *blend,
/*
- * Compute Src RGB terms
+ * Compute Src RGB terms. We're actually looking for the value
+ * of (the appropriate RGB factors) * (the incoming source RGB color).
*/
switch (blend->rgb_src_factor) {
case PIPE_BLENDFACTOR_ONE:
+ /* factors = (1,1,1), so term = (R,G,B) */
spe_move(f, term1R_reg, fragR_reg);
spe_move(f, term1G_reg, fragG_reg);
spe_move(f, term1B_reg, fragB_reg);
break;
case PIPE_BLENDFACTOR_ZERO:
- spe_zero(f, term1R_reg);
- spe_zero(f, term1G_reg);
- spe_zero(f, term1B_reg);
+ /* factors = (0,0,0), so term = (0,0,0) */
+ spe_load_float(f, term1R_reg, 0.0f);
+ spe_load_float(f, term1G_reg, 0.0f);
+ spe_load_float(f, term1B_reg, 0.0f);
break;
case PIPE_BLENDFACTOR_SRC_COLOR:
+ /* factors = (R,G,B), so term = (R*R, G*G, B*B) */
spe_fm(f, term1R_reg, fragR_reg, fragR_reg);
spe_fm(f, term1G_reg, fragG_reg, fragG_reg);
spe_fm(f, term1B_reg, fragB_reg, fragB_reg);
break;
case PIPE_BLENDFACTOR_SRC_ALPHA:
+ /* factors = (A,A,A), so term = (R*A, G*A, B*A) */
spe_fm(f, term1R_reg, fragR_reg, fragA_reg);
spe_fm(f, term1G_reg, fragG_reg, fragA_reg);
spe_fm(f, term1B_reg, fragB_reg, fragA_reg);
break;
- /* XXX more cases */
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B)) */
+ /* we'll need the optional constant {1,1,1,1} register */
+ SET_ONE_REG_IF_UNSET(f)
+ /* tmp = 1 - R */
+ spe_fs(f, tmp_reg, one_reg, fragR_reg);
+ /* term = R * tmp */
+ spe_fm(f, term1R_reg, fragR_reg, tmp_reg);
+ /* repeat for G and B */
+ spe_fs(f, tmp_reg, one_reg, fragG_reg);
+ spe_fm(f, term1G_reg, fragG_reg, tmp_reg);
+ spe_fs(f, tmp_reg, one_reg, fragB_reg);
+ spe_fm(f, term1B_reg, fragB_reg, tmp_reg);
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ /* factors = (Rfb,Gfb,Bfb), so term = (R*Rfb, G*Gfb, B*Bfb) */
+ spe_fm(f, term1R_reg, fragR_reg, fbR_reg);
+ spe_fm(f, term1G_reg, fragG_reg, fbG_reg);
+ spe_fm(f, term1B_reg, fragB_reg, fbB_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb)) */
+ /* we'll need the optional constant {1,1,1,1} register */
+ SET_ONE_REG_IF_UNSET(f)
+ /* tmp = 1 - Rfb */
+ spe_fs(f, tmp_reg, one_reg, fbR_reg);
+ /* term = R * tmp */
+ spe_fm(f, term1R_reg, fragR_reg, tmp_reg);
+ /* repeat for G and B */
+ spe_fs(f, tmp_reg, one_reg, fbG_reg);
+ spe_fm(f, term1G_reg, fragG_reg, tmp_reg);
+ spe_fs(f, tmp_reg, one_reg, fbB_reg);
+ spe_fm(f, term1B_reg, fragB_reg, tmp_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A)) */
+ /* we'll need the optional constant {1,1,1,1} register */
+ SET_ONE_REG_IF_UNSET(f)
+ /* tmp = 1 - A */
+ spe_fs(f, tmp_reg, one_reg, fragA_reg);
+ /* term = R * tmp */
+ spe_fm(f, term1R_reg, fragR_reg, tmp_reg);
+ /* repeat for G and B with the same (1-A) factor */
+ spe_fm(f, term1G_reg, fragG_reg, tmp_reg);
+ spe_fm(f, term1B_reg, fragB_reg, tmp_reg);
+ break;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ /* factors = (Afb, Afb, Afb), so term = (R*Afb, G*Afb, B*Afb) */
+ spe_fm(f, term1R_reg, fragR_reg, fbA_reg);
+ spe_fm(f, term1G_reg, fragG_reg, fbA_reg);
+ spe_fm(f, term1B_reg, fragB_reg, fbA_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb)) */
+ /* we'll need the optional constant {1,1,1,1} register */
+ SET_ONE_REG_IF_UNSET(f)
+ /* tmp = 1 - A */
+ spe_fs(f, tmp_reg, one_reg, fbA_reg);
+ /* term = R * tmp, G*tmp, and B*tmp */
+ spe_fm(f, term1R_reg, fragR_reg, tmp_reg);
+ spe_fm(f, term1G_reg, fragG_reg, tmp_reg);
+ spe_fm(f, term1B_reg, fragB_reg, tmp_reg);
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ /* We'll need the optional blend color registers */
+ SET_CONST_COLOR_IF_UNSET(f,blend_color)
+ /* now, factor = (Rc,Gc,Bc), so term = (R*Rc,G*Gc,B*Bc) */
+ spe_fm(f, term1R_reg, fragR_reg, constR_reg);
+ spe_fm(f, term1G_reg, fragG_reg, constG_reg);
+ spe_fm(f, term1B_reg, fragB_reg, constB_reg);
+ break;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ /* we'll need the optional constant alpha register */
+ SET_CONST_ALPHA_IF_UNSET(f, blend_color)
+ /* factor = (Ac,Ac,Ac), so term = (R*Ac,G*Ac,B*Ac) */
+ spe_fm(f, term1R_reg, fragR_reg, constA_reg);
+ spe_fm(f, term1G_reg, fragG_reg, constA_reg);
+ spe_fm(f, term1B_reg, fragB_reg, constA_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ /* We need both the optional {1,1,1,1} register, and the optional
+ * constant color registers
+ */
+ SET_ONE_REG_IF_UNSET(f)
+ SET_CONST_COLOR_IF_UNSET(f, blend_color)
+ /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc)) */
+ spe_fs(f, tmp_reg, one_reg, constR_reg);
+ spe_fm(f, term1R_reg, fragR_reg, tmp_reg);
+ spe_fs(f, tmp_reg, one_reg, constG_reg);
+ spe_fm(f, term1G_reg, fragG_reg, tmp_reg);
+ spe_fs(f, tmp_reg, one_reg, constB_reg);
+ spe_fm(f, term1B_reg, fragB_reg, tmp_reg);
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ /* We need the optional {1,1,1,1} register and the optional
+ * constant alpha register
+ */
+ SET_ONE_REG_IF_UNSET(f)
+ SET_CONST_ALPHA_IF_UNSET(f, blend_color)
+ /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac)) */
+ spe_fs(f, tmp_reg, one_reg, constA_reg);
+ spe_fm(f, term1R_reg, fragR_reg, tmp_reg);
+ spe_fm(f, term1G_reg, fragG_reg, tmp_reg);
+ spe_fm(f, term1B_reg, fragB_reg, tmp_reg);
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ /* We'll need the optional {1,1,1,1} register */
+ SET_ONE_REG_IF_UNSET(f)
+ /* factor = (min(A,1-Afb),min(A,1-Afb),min(A,1-Afb)), so
+ * term = (R*min(A,1-Afb), G*min(A,1-Afb), B*min(A,1-Afb))
+ */
+ /* tmp = 1 - Afb */
+ spe_fs(f, tmp_reg, one_reg, fbA_reg);
+ /* tmp = min(A,tmp) */
+ FLOAT_VECTOR_MIN(f, tmp_reg, fragA_reg, tmp_reg)
+ /* term = R*tmp */
+ spe_fm(f, term1R_reg, fragR_reg, tmp_reg);
+ spe_fm(f, term1G_reg, fragG_reg, tmp_reg);
+ spe_fm(f, term1B_reg, fragB_reg, tmp_reg);
+ break;
+
+ /* non-OpenGL cases? */
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+
default:
ASSERT(0);
}
@@ -421,6 +625,7 @@ gen_blend(const struct pipe_blend_state *blend,
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
/* one = {1.0, 1.0, 1.0, 1.0} */
if (!one_reg_set) {
+ one_reg = spe_allocate_available_register(f);
spe_load_float(f, one_reg, 1.0f);
one_reg_set = true;
}
@@ -432,6 +637,14 @@ gen_blend(const struct pipe_blend_state *blend,
spe_fm(f, term2B_reg, fbB_reg, tmp_reg);
break;
/* XXX more cases */
+ // GL_ONE_MINUS_SRC_COLOR
+ // GL_DST_COLOR
+ // GL_ONE_MINUS_DST_COLOR
+ // GL_DST_ALPHA
+ // GL_CONSTANT_COLOR
+ // GL_ONE_MINUS_CONSTANT_COLOR
+ // GL_CONSTANT_ALPHA
+ // GL_ONE_MINUS_CONSTANT_ALPHA
default:
ASSERT(0);
}
@@ -452,6 +665,7 @@ gen_blend(const struct pipe_blend_state *blend,
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
/* one = {1.0, 1.0, 1.0, 1.0} */
if (!one_reg_set) {
+ one_reg = spe_allocate_available_register(f);
spe_load_float(f, one_reg, 1.0f);
one_reg_set = true;
}
@@ -461,6 +675,14 @@ gen_blend(const struct pipe_blend_state *blend,
spe_fm(f, term2A_reg, fbA_reg, tmp_reg);
break;
/* XXX more cases */
+ // GL_ONE_MINUS_SRC_COLOR
+ // GL_DST_COLOR
+ // GL_ONE_MINUS_DST_COLOR
+ // GL_DST_ALPHA
+ // GL_CONSTANT_COLOR
+ // GL_ONE_MINUS_CONSTANT_COLOR
+ // GL_CONSTANT_ALPHA
+ // GL_ONE_MINUS_CONSTANT_ALPHA
default:
ASSERT(0);
}
@@ -479,7 +701,21 @@ gen_blend(const struct pipe_blend_state *blend,
spe_fs(f, fragG_reg, term1G_reg, term2G_reg);
spe_fs(f, fragB_reg, term1B_reg, term2B_reg);
break;
- /* XXX more cases */
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ spe_fs(f, fragR_reg, term2R_reg, term1R_reg);
+ spe_fs(f, fragG_reg, term2G_reg, term1G_reg);
+ spe_fs(f, fragB_reg, term2B_reg, term1B_reg);
+ break;
+ case PIPE_BLEND_MIN:
+ FLOAT_VECTOR_MIN(f, fragR_reg, term1R_reg, term2R_reg)
+ FLOAT_VECTOR_MIN(f, fragG_reg, term1G_reg, term2G_reg)
+ FLOAT_VECTOR_MIN(f, fragB_reg, term1B_reg, term2B_reg)
+ break;
+ case PIPE_BLEND_MAX:
+ FLOAT_VECTOR_MAX(f, fragR_reg, term1R_reg, term2R_reg)
+ FLOAT_VECTOR_MAX(f, fragG_reg, term1G_reg, term2G_reg)
+ FLOAT_VECTOR_MAX(f, fragB_reg, term1B_reg, term2B_reg)
+ break;
default:
ASSERT(0);
}
@@ -494,7 +730,15 @@ gen_blend(const struct pipe_blend_state *blend,
case PIPE_BLEND_SUBTRACT:
spe_fs(f, fragA_reg, term1A_reg, term2A_reg);
break;
- /* XXX more cases */
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ spe_fs(f, fragA_reg, term2A_reg, term1A_reg);
+ break;
+ case PIPE_BLEND_MIN:
+ FLOAT_VECTOR_MIN(f, fragA_reg, term1A_reg, term2A_reg)
+ break;
+ case PIPE_BLEND_MAX:
+ FLOAT_VECTOR_MAX(f, fragA_reg, term1A_reg, term2A_reg)
+ break;
default:
ASSERT(0);
}
@@ -514,8 +758,12 @@ gen_blend(const struct pipe_blend_state *blend,
spe_release_register(f, fbB_reg);
spe_release_register(f, fbA_reg);
- spe_release_register(f, one_reg);
spe_release_register(f, tmp_reg);
+
+ /* Free any optional registers that actually got used */
+ RELEASE_ONE_REG_IF_USED(f)
+ RELEASE_CONST_COLOR_IF_USED(f)
+ RELEASE_CONST_ALPHA_IF_USED(f)
}
@@ -629,6 +877,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
const struct pipe_depth_stencil_alpha_state *dsa =
&cell->depth_stencil->base;
const struct pipe_blend_state *blend = &cell->blend->base;
+ const struct pipe_blend_color *blend_color = &cell->blend_color;
const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format;
/* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
@@ -651,7 +900,6 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */
int fbZS_reg; /**< framebuffer's combined z/stencil values for quad */
- spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
spe_allocate_register(f, x_reg);
spe_allocate_register(f, y_reg);
spe_allocate_register(f, color_tile_reg);
@@ -816,7 +1064,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
if (blend->blend_enable) {
- gen_blend(blend, f, color_format,
+ gen_blend(blend, blend_color, f, color_format,
fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg);
}
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c
index 2da3097983..8a389cd6aa 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_emit.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c
@@ -100,14 +100,19 @@ cell_emit_state(struct cell_context *cell)
= cell_batch_alloc(cell, sizeof(*fops));
struct spe_function spe_code;
+ /* Prepare the buffer that will hold the generated code. */
+ spe_init_func(&spe_code, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
+
/* generate new code */
cell_gen_fragment_function(cell, &spe_code);
+
/* put the new code into the batch buffer */
fops->opcode = CELL_CMD_STATE_FRAGMENT_OPS;
memcpy(&fops->code, spe_code.store,
SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
fops->dsa = cell->depth_stencil->base;
fops->blend = cell->blend->base;
+
/* free codegen buffer */
spe_release_func(&spe_code);
}
diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c
index 78260c4259..da2cb08972 100644
--- a/src/gallium/drivers/cell/spu/spu_main.c
+++ b/src/gallium/drivers/cell/spu/spu_main.c
@@ -50,7 +50,31 @@ helpful headers:
/opt/cell/sdk/usr/include/libmisc.h
*/
+/* Set to 0 to disable all extraneous debugging code */
+#define DEBUG 1
+
+#if DEBUG
boolean Debug = FALSE;
+boolean force_fragment_ops_fallback = TRUE;
+
+/* These debug macros use the unusual construction ", ##__VA_ARGS__"
+ * which expands to the expected comma + args if variadic arguments
+ * are supplied, but swallows the comma if there are no variadic
+ * arguments (which avoids syntax errors that would otherwise occur).
+ */
+#define DEBUG_PRINTF(format,...) \
+ if (Debug) \
+ printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__)
+#define D_PRINTF(flag, format,...) \
+ if (spu.init.debug_flags & (flag)) \
+ printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__)
+
+#else
+
+#define DEBUG_PRINTF(...)
+#define D_PRINTF(...)
+
+#endif
struct spu_global spu;
@@ -133,9 +157,7 @@ really_clear_tiles(uint surfaceIndex)
static void
cmd_clear_surface(const struct cell_command_clear_surface *clear)
{
- if (Debug)
- printf("SPU %u: CLEAR SURF %u to 0x%08x\n", spu.init.id,
- clear->surface, clear->value);
+ DEBUG_PRINTF("CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value);
if (clear->surface == 0) {
spu.fb.color_clear_value = clear->value;
@@ -203,17 +225,14 @@ cmd_clear_surface(const struct cell_command_clear_surface *clear)
#endif /* CLEAR_OPT */
- if (Debug)
- printf("SPU %u: CLEAR SURF done\n", spu.init.id);
+ DEBUG_PRINTF("CLEAR SURF done\n");
}
static void
cmd_release_verts(const struct cell_command_release_verts *release)
{
- if (Debug)
- printf("SPU %u: RELEASE VERTS %u\n",
- spu.init.id, release->vertex_buf);
+ DEBUG_PRINTF("RELEASE VERTS %u\n", release->vertex_buf);
ASSERT(release->vertex_buf != ~0U);
release_buffer(release->vertex_buf);
}
@@ -228,16 +247,30 @@ cmd_release_verts(const struct cell_command_release_verts *release)
static void
cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
{
- if (Debug)
- printf("SPU %u: CMD_STATE_FRAGMENT_OPS\n", spu.init.id);
+ DEBUG_PRINTF("CMD_STATE_FRAGMENT_OPS\n");
/* Copy SPU code from batch buffer to spu buffer */
memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
/* Copy state info (for fallback case only) */
memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
- /* Point function pointer at new code */
- spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code;
+ /* Parity twist! For now, always use the fallback code by default,
+ * only switching to codegen when specifically requested. This
+ * allows us to develop freely without risking taking down the
+ * branch.
+ *
+ * Later, the parity of this check will be reversed, so that
+ * codegen is *always* used, unless we specifically indicate that
+ * we don't want it.
+ *
+ * Eventually, the option will be removed completely, because in
+ * final code we'll always use codegen and won't even provide the
+ * raw state records that the fallback code requires.
+ */
+ if (spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) {
+ spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code;
+ }
+ /* otherwise, the default fallback code remains in place */
spu.read_depth = spu.depth_stencil_alpha.depth.enabled;
spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled;
@@ -247,8 +280,7 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
static void
cmd_state_fragment_program(const struct cell_command_fragment_program *fp)
{
- if (Debug)
- printf("SPU %u: CMD_STATE_FRAGMENT_PROGRAM\n", spu.init.id);
+ DEBUG_PRINTF("CMD_STATE_FRAGMENT_PROGRAM\n");
/* Copy SPU code from batch buffer to spu buffer */
memcpy(spu.fragment_program_code, fp->code,
SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4);
@@ -262,9 +294,7 @@ cmd_state_fragment_program(const struct cell_command_fragment_program *fp)
static void
cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
{
- if (Debug)
- printf("SPU %u: FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n",
- spu.init.id,
+ DEBUG_PRINTF("FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n",
cmd->width,
cmd->height,
cmd->color_start,
@@ -309,9 +339,7 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
static void
cmd_state_sampler(const struct cell_command_sampler *sampler)
{
- if (Debug)
- printf("SPU %u: SAMPLER [%u]\n",
- spu.init.id, sampler->unit);
+ DEBUG_PRINTF("SAMPLER [%u]\n", sampler->unit);
spu.sampler[sampler->unit] = sampler->state;
if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR)
@@ -328,11 +356,9 @@ cmd_state_texture(const struct cell_command_texture *texture)
const uint width = texture->width;
const uint height = texture->height;
- if (Debug) {
- printf("SPU %u: TEXTURE [%u] at %p size %u x %u\n", spu.init.id,
+ DEBUG_PRINTF("TEXTURE [%u] at %p size %u x %u\n",
texture->unit, texture->start,
texture->width, texture->height);
- }
spu.texture[unit].start = texture->start;
spu.texture[unit].width = width;
@@ -351,10 +377,7 @@ cmd_state_texture(const struct cell_command_texture *texture)
static void
cmd_state_vertex_info(const struct vertex_info *vinfo)
{
- if (Debug) {
- printf("SPU %u: VERTEX_INFO num_attribs=%u\n", spu.init.id,
- vinfo->num_attribs);
- }
+ DEBUG_PRINTF("VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs);
ASSERT(vinfo->num_attribs >= 1);
ASSERT(vinfo->num_attribs <= 8);
memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo));
@@ -393,8 +416,7 @@ cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code)
static void
cmd_finish(void)
{
- if (Debug)
- printf("SPU %u: FINISH\n", spu.init.id);
+ DEBUG_PRINTF("FINISH\n");
really_clear_tiles(0);
/* wait for all outstanding DMAs to finish */
mfc_write_tag_mask(~0);
@@ -419,9 +441,8 @@ cmd_batch(uint opcode)
const unsigned usize = size / sizeof(buffer[0]);
uint pos;
- if (Debug)
- printf("SPU %u: BATCH buffer %u, len %u, from %p\n",
- spu.init.id, buf, size, spu.init.buffers[buf]);
+ DEBUG_PRINTF("BATCH buffer %u, len %u, from %p\n",
+ buf, size, spu.init.buffers[buf]);
ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH);
@@ -440,8 +461,7 @@ cmd_batch(uint opcode)
wait_on_mask(1 << TAG_BATCH_BUFFER);
/* Tell PPU we're done copying the buffer to local store */
- if (Debug)
- printf("SPU %u: release batch buf %u\n", spu.init.id, buf);
+ DEBUG_PRINTF("release batch buf %u\n", buf);
release_buffer(buf);
/*
@@ -571,8 +591,7 @@ cmd_batch(uint opcode)
}
}
- if (Debug)
- printf("SPU %u: BATCH complete\n", spu.init.id);
+ DEBUG_PRINTF("BATCH complete\n");
}
@@ -585,8 +604,7 @@ main_loop(void)
struct cell_command cmd;
int exitFlag = 0;
- if (Debug)
- printf("SPU %u: Enter main loop\n", spu.init.id);
+ DEBUG_PRINTF("Enter main loop\n");
ASSERT((sizeof(struct cell_command) & 0xf) == 0);
ASSERT_ALIGN16(&cmd);
@@ -595,14 +613,12 @@ main_loop(void)
unsigned opcode;
int tag = 0;
- if (Debug)
- printf("SPU %u: Wait for cmd...\n", spu.init.id);
+ DEBUG_PRINTF("Wait for cmd...\n");
/* read/wait from mailbox */
opcode = (unsigned int) spu_read_in_mbox();
- if (Debug)
- printf("SPU %u: got cmd 0x%x\n", spu.init.id, opcode);
+ DEBUG_PRINTF("got cmd 0x%x\n", opcode);
/* command payload */
mfc_get(&cmd, /* dest */
@@ -619,8 +635,7 @@ main_loop(void)
switch (opcode & CELL_CMD_OPCODE_MASK) {
case CELL_CMD_EXIT:
- if (Debug)
- printf("SPU %u: EXIT\n", spu.init.id);
+ DEBUG_PRINTF("EXIT\n");
exitFlag = 1;
break;
case CELL_CMD_VS_EXECUTE:
@@ -632,13 +647,12 @@ main_loop(void)
cmd_batch(opcode);
break;
default:
- printf("Bad opcode!\n");
+ printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK);
}
}
- if (Debug)
- printf("SPU %u: Exit main loop\n", spu.init.id);
+ DEBUG_PRINTF("Exit main loop\n");
spu_dcache_report();
}
@@ -653,7 +667,8 @@ one_time_init(void)
invalidate_tex_cache();
/* Install default/fallback fragment processing function.
- * This will normally be overriden by a code-gen'd function.
+ * This will normally be overriden by a code-gen'd function
+ * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set.
*/
spu.fragment_ops = spu_fallback_fragment_ops;
}
@@ -685,8 +700,8 @@ main(main_param_t speid, main_param_t argp)
one_time_init();
- if (Debug)
- printf("SPU: main() speid=%lu\n", (unsigned long) speid);
+ DEBUG_PRINTF("main() speid=%lu\n", (unsigned long) speid);
+ D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n");
mfc_get(&spu.init, /* dest */
(unsigned int) argp, /* src */