summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorRobert Ellison <papillo@tungstengraphics.com>2008-11-11 13:57:10 -0700
committerRobert Ellison <papillo@tungstengraphics.com>2008-11-11 13:57:10 -0700
commit90027f85786406133a5180998a75fb612b6a221e (patch)
tree595a268f7be19e2e763855b22a66efb0566123d2 /src
parent2b66417402bc595be301ab9ed7b9ea2a5f79e180 (diff)
CELL: two-sided stencil fixes
With these changes, the tests/stencil_twoside test now works. - Eliminate blending from the stencil_twoside test, as it produces an unneeded dependency on having blending working - The spe_splat() function will now work if the register being splatted and the destination register are the same - Separate fragment code generated for front-facing and back-facing fragments. Often these are the same; if two-sided stenciling is on, they can be different. This is easier and faster than generating code that does both tests and merges the results. - Fixed a cut/paste bug where if the back Z-pass stencil operation were different from all the other operations, the back Z-fail results were incorrect.
Diffstat (limited to 'src')
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c7
-rw-r--r--src/gallium/drivers/cell/common.h6
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fragment.c239
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fragment.h2
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_emit.c19
-rw-r--r--src/gallium/drivers/cell/spu/spu_command.c6
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.c6
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.h10
-rw-r--r--src/gallium/drivers/cell/spu/spu_per_fragment_op.c3
-rw-r--r--src/gallium/drivers/cell/spu/spu_per_fragment_op.h3
-rw-r--r--src/gallium/drivers/cell/spu/spu_tri.c20
11 files changed, 115 insertions, 206 deletions
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
index f8568f690b..1bd9f1c8dd 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
@@ -958,9 +958,12 @@ spe_compare_greater_uint(struct spe_function *p, unsigned rT, unsigned rA, unsig
void
spe_splat(struct spe_function *p, unsigned rT, unsigned rA)
{
+ /* Use a temporary, just in case rT == rA */
+ unsigned int tmp_reg = spe_allocate_available_register(p);
/* Duplicate bytes 0, 1, 2, and 3 across the whole register */
- spe_ila(p, rT, 0x00010203);
- spe_shufb(p, rT, rA, rA, rT);
+ spe_ila(p, tmp_reg, 0x00010203);
+ spe_shufb(p, rT, rA, rA, tmp_reg);
+ spe_release_register(p, tmp_reg);
}
diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h
index 87488ea2d7..a670ed3c6e 100644
--- a/src/gallium/drivers/cell/common.h
+++ b/src/gallium/drivers/cell/common.h
@@ -130,6 +130,9 @@
#define CELL_FENCE_EMITTED 1
#define CELL_FENCE_SIGNALLED 2
+#define CELL_FACING_FRONT 0
+#define CELL_FACING_BACK 1
+
struct cell_fence
{
/** There's a 16-byte status qword per SPU */
@@ -160,7 +163,8 @@ struct cell_command_fragment_ops
struct pipe_depth_stencil_alpha_state dsa;
struct pipe_blend_state blend;
struct pipe_blend_color blend_color;
- unsigned code[SPU_MAX_FRAGMENT_OPS_INSTS];
+ unsigned code_front[SPU_MAX_FRAGMENT_OPS_INSTS];
+ unsigned code_back[SPU_MAX_FRAGMENT_OPS_INSTS];
};
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
index d9c3ff3f4d..6e425eafaa 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
@@ -1412,144 +1412,72 @@ gen_stencil_values(struct spe_function *f, unsigned int stencil_op,
* and released by the corresponding spe_release_register_set() call.
*/
static void
-gen_get_stencil_values(struct spe_function *f, const struct pipe_depth_stencil_alpha_state *dsa,
+gen_get_stencil_values(struct spe_function *f, const struct pipe_stencil_state *stencil,
+ const unsigned int depth_enabled,
unsigned int fbS_reg,
unsigned int *fail_reg, unsigned int *zfail_reg,
- unsigned int *zpass_reg, unsigned int *back_fail_reg,
- unsigned int *back_zfail_reg, unsigned int *back_zpass_reg)
+ unsigned int *zpass_reg)
{
- unsigned zfail_op, back_zfail_op;
+ unsigned zfail_op;
/* Stenciling had better be enabled here */
- ASSERT(dsa->stencil[0].enabled);
+ ASSERT(stencil->enabled);
/* If the depth test is not enabled, it is treated as though it always
- * passes. In particular, that means that the "zfail_op" (and the backfacing
- * counterpart, if active) are not considered - a failing stencil test will
- * trigger the "fail_op", and a passing stencil test will trigger the
- * "zpass_op".
+ * passes, which means that the zfail_op is not considered - a
+ * failing stencil test triggers the fail_op, and a passing one
+ * triggers the zpass_op
*
- * By overriding the operations in this case to be PIPE_STENCIL_OP_KEEP,
- * we keep them from being calculated.
+ * As an optimization, override calculation of the zfail_op values
+ * if they aren't going to be used. By setting the value of
+ * the operation to PIPE_STENCIL_OP_KEEP, its value will be assumed
+ * to match the incoming stencil values, and no calculation will
+ * be done.
*/
- if (dsa->depth.enabled) {
- zfail_op = dsa->stencil[0].zfail_op;
- back_zfail_op = dsa->stencil[1].zfail_op;
+ if (depth_enabled) {
+ zfail_op = stencil->zfail_op;
}
else {
zfail_op = PIPE_STENCIL_OP_KEEP;
- back_zfail_op = PIPE_STENCIL_OP_KEEP;
}
/* One-sided or front-facing stencil */
- if (dsa->stencil[0].fail_op == PIPE_STENCIL_OP_KEEP) {
+ if (stencil->fail_op == PIPE_STENCIL_OP_KEEP) {
*fail_reg = fbS_reg;
}
else {
*fail_reg = spe_allocate_available_register(f);
- gen_stencil_values(f, dsa->stencil[0].fail_op, dsa->stencil[0].ref_value,
+ gen_stencil_values(f, stencil->fail_op, stencil->ref_value,
0xff, fbS_reg, *fail_reg);
}
+ /* Check the possibly overridden value, not the structure value */
if (zfail_op == PIPE_STENCIL_OP_KEEP) {
*zfail_reg = fbS_reg;
}
- else if (zfail_op == dsa->stencil[0].fail_op) {
+ else if (zfail_op == stencil->fail_op) {
*zfail_reg = *fail_reg;
}
else {
*zfail_reg = spe_allocate_available_register(f);
- gen_stencil_values(f, dsa->stencil[0].zfail_op, dsa->stencil[0].ref_value,
+ gen_stencil_values(f, stencil->zfail_op, stencil->ref_value,
0xff, fbS_reg, *zfail_reg);
}
- if (dsa->stencil[0].zpass_op == PIPE_STENCIL_OP_KEEP) {
+ if (stencil->zpass_op == PIPE_STENCIL_OP_KEEP) {
*zpass_reg = fbS_reg;
}
- else if (dsa->stencil[0].zpass_op == dsa->stencil[0].fail_op) {
+ else if (stencil->zpass_op == stencil->fail_op) {
*zpass_reg = *fail_reg;
}
- else if (dsa->stencil[0].zpass_op == zfail_op) {
+ else if (stencil->zpass_op == zfail_op) {
*zpass_reg = *zfail_reg;
}
else {
*zpass_reg = spe_allocate_available_register(f);
- gen_stencil_values(f, dsa->stencil[0].zpass_op, dsa->stencil[0].ref_value,
+ gen_stencil_values(f, stencil->zpass_op, stencil->ref_value,
0xff, fbS_reg, *zpass_reg);
}
-
- /* If two-sided stencil is enabled, we have more work to do. */
- if (!dsa->stencil[1].enabled) {
- /* This just flags that the registers need not be deallocated later */
- *back_fail_reg = fbS_reg;
- *back_zfail_reg = fbS_reg;
- *back_zpass_reg = fbS_reg;
- }
- else {
- /* Same calculations as above, but for the back stencil */
- if (dsa->stencil[1].fail_op == PIPE_STENCIL_OP_KEEP) {
- *back_fail_reg = fbS_reg;
- }
- else if (dsa->stencil[1].fail_op == dsa->stencil[0].fail_op) {
- *back_fail_reg = *fail_reg;
- }
- else if (dsa->stencil[1].fail_op == zfail_op) {
- *back_fail_reg = *zfail_reg;
- }
- else if (dsa->stencil[1].fail_op == dsa->stencil[0].zpass_op) {
- *back_fail_reg = *zpass_reg;
- }
- else {
- *back_fail_reg = spe_allocate_available_register(f);
- gen_stencil_values(f, dsa->stencil[1].fail_op, dsa->stencil[1].ref_value,
- 0xff, fbS_reg, *back_fail_reg);
- }
-
- if (back_zfail_op == PIPE_STENCIL_OP_KEEP) {
- *back_zfail_reg = fbS_reg;
- }
- else if (back_zfail_op == dsa->stencil[0].fail_op) {
- *back_zfail_reg = *fail_reg;
- }
- else if (back_zfail_op == zfail_op) {
- *back_zfail_reg = *zfail_reg;
- }
- else if (back_zfail_op == dsa->stencil[0].zpass_op) {
- *back_zfail_reg = *zpass_reg;
- }
- else if (back_zfail_op == dsa->stencil[1].fail_op) {
- *back_zfail_reg = *back_fail_reg;
- }
- else {
- *back_zfail_reg = spe_allocate_available_register(f);
- gen_stencil_values(f, dsa->stencil[1].zfail_op, dsa->stencil[1].ref_value,
- 0xff, fbS_reg, *back_zfail_reg);
- }
-
- if (dsa->stencil[1].zpass_op == PIPE_STENCIL_OP_KEEP) {
- *back_zpass_reg = fbS_reg;
- }
- else if (dsa->stencil[1].zpass_op == dsa->stencil[0].fail_op) {
- *back_zpass_reg = *fail_reg;
- }
- else if (dsa->stencil[1].zpass_op == zfail_op) {
- *back_zpass_reg = *zfail_reg;
- }
- else if (dsa->stencil[1].zpass_op == dsa->stencil[0].zpass_op) {
- *back_zpass_reg = *zpass_reg;
- }
- else if (dsa->stencil[1].zpass_op == dsa->stencil[1].fail_op) {
- *back_zpass_reg = *back_fail_reg;
- }
- else if (dsa->stencil[1].zpass_op == back_zfail_op) {
- *back_zpass_reg = *back_zfail_reg;
- }
- else {
- *back_zfail_reg = spe_allocate_available_register(f);
- gen_stencil_values(f, dsa->stencil[1].zpass_op, dsa->stencil[1].ref_value,
- 0xff, fbS_reg, *back_zpass_reg);
- }
- } /* End of calculations for back-facing stencil */
}
/* Note that fbZ_reg may *not* be set on entry, if in fact
@@ -1559,7 +1487,7 @@ gen_get_stencil_values(struct spe_function *f, const struct pipe_depth_stencil_a
static boolean
gen_stencil_depth_test(struct spe_function *f,
const struct pipe_depth_stencil_alpha_state *dsa,
- const int const facing_reg,
+ const uint facing,
const int mask_reg, const int fragZ_reg,
const int fbZ_reg, const int fbS_reg)
{
@@ -1571,6 +1499,8 @@ gen_stencil_depth_test(struct spe_function *f,
boolean need_to_calculate_stencil_values;
boolean need_to_writemask_stencil_values;
+ struct pipe_stencil_state *stencil;
+
/* Registers. We may or may not actually allocate these, depending
* on whether the state values indicate that we need them.
*/
@@ -1598,6 +1528,20 @@ gen_stencil_depth_test(struct spe_function *f,
spe_comment(f, 0, "Allocating stencil register set");
spe_allocate_register_set(f);
+ /* The facing we're given is the fragment facing; it doesn't
+ * exactly match the stencil facing. If stencil is enabled,
+ * but two-sided stencil is *not* enabled, we use the same
+ * stencil settings for both front- and back-facing fragments.
+ * We only use the "back-facing" stencil for backfacing fragments
+ * if two-sided stenciling is enabled.
+ */
+ if (facing == CELL_FACING_BACK && dsa->stencil[1].enabled) {
+ stencil = &dsa->stencil[1];
+ }
+ else {
+ stencil = &dsa->stencil[0];
+ }
+
/* Calculate the writemask. If the writemask is trivial (either
* all 0s, meaning that we don't need to calculate any stencil values
* because they're not going to change the stencil anyway, or all 1s,
@@ -1608,24 +1552,20 @@ gen_stencil_depth_test(struct spe_function *f,
* Note that if the backface stencil is *not* enabled, the backface
* stencil will have the same values as the frontface stencil.
*/
- if (dsa->stencil[0].fail_op == PIPE_STENCIL_OP_KEEP &&
- dsa->stencil[0].zfail_op == PIPE_STENCIL_OP_KEEP &&
- dsa->stencil[0].zpass_op == PIPE_STENCIL_OP_KEEP &&
- dsa->stencil[1].fail_op == PIPE_STENCIL_OP_KEEP &&
- dsa->stencil[1].zfail_op == PIPE_STENCIL_OP_KEEP &&
- dsa->stencil[1].zpass_op == PIPE_STENCIL_OP_KEEP) {
- /* No changes to any stencil values */
+ if (stencil->fail_op == PIPE_STENCIL_OP_KEEP &&
+ stencil->zfail_op == PIPE_STENCIL_OP_KEEP &&
+ stencil->zpass_op == PIPE_STENCIL_OP_KEEP) {
need_to_calculate_stencil_values = false;
need_to_writemask_stencil_values = false;
}
- else if (dsa->stencil[0].write_mask == 0x0 && dsa->stencil[1].write_mask == 0x0) {
+ else if (stencil->write_mask == 0x0) {
/* All changes are writemasked out, so no need to calculate
* what those changes might be, and no need to write anything back.
*/
need_to_calculate_stencil_values = false;
need_to_writemask_stencil_values = false;
}
- else if (dsa->stencil[0].write_mask == 0xff && dsa->stencil[1].write_mask == 0xff) {
+ else if (stencil->write_mask == 0xff) {
/* Still trivial, but a little less so. We need to write the stencil
* values, but we don't need to mask them.
*/
@@ -1645,14 +1585,7 @@ gen_stencil_depth_test(struct spe_function *f,
*/
spe_comment(f, 0, "Computing stencil writemask");
stencil_writemask_reg = spe_allocate_available_register(f);
- spe_load_uint(f, stencil_writemask_reg, dsa->stencil[0].write_mask);
- if (dsa->stencil[1].enabled && dsa->stencil[0].write_mask != dsa->stencil[1].write_mask) {
- unsigned int back_write_mask_reg = spe_allocate_available_register(f);
- spe_comment(f, 0, "Resolving two-sided stencil writemask");
- spe_load_uint(f, back_write_mask_reg, dsa->stencil[1].write_mask);
- spe_selb(f, stencil_writemask_reg, stencil_writemask_reg, back_write_mask_reg, facing_reg);
- spe_release_register(f, back_write_mask_reg);
- }
+ spe_load_uint(f, stencil_writemask_reg, dsa->stencil[facing].write_mask);
}
/* At least one-sided stenciling must be on. Generate code that
@@ -1666,19 +1599,7 @@ gen_stencil_depth_test(struct spe_function *f,
*/
spe_comment(f, 0, "Running basic stencil test");
stencil_pass_reg = spe_allocate_available_register(f);
- gen_stencil_test(f, &dsa->stencil[0], 0xff, mask_reg, fbS_reg, stencil_pass_reg);
-
- /* If two-sided stenciling is on, generate code to run the stencil
- * test on the backfacing stencil as well, and combine the two results
- * into the one correct result based on facing.
- */
- if (dsa->stencil[1].enabled) {
- unsigned int temp_reg = spe_allocate_available_register(f);
- spe_comment(f, 0, "Running backface stencil test");
- gen_stencil_test(f, &dsa->stencil[1], 0xff, mask_reg, fbS_reg, temp_reg);
- spe_selb(f, stencil_pass_reg, stencil_pass_reg, temp_reg, facing_reg);
- spe_release_register(f, temp_reg);
- }
+ gen_stencil_test(f, stencil, 0xff, mask_reg, fbS_reg, stencil_pass_reg);
/* Generate code that, given the mask of valid fragments and the
* mask of valid fragments that passed the stencil test, computes
@@ -1698,9 +1619,6 @@ gen_stencil_depth_test(struct spe_function *f,
/* We may not need to calculate stencil values, if the writemask is off */
if (need_to_calculate_stencil_values) {
- unsigned int back_stencil_fail_values, back_stencil_pass_depth_fail_values, back_stencil_pass_depth_pass_values;
- unsigned int front_stencil_fail_values, front_stencil_pass_depth_fail_values, front_stencil_pass_depth_pass_values;
-
/* Generate code that calculates exactly which stencil values we need,
* without calculating the same value twice (say, if two different
* stencil ops have the same value). This code will work for one-sided
@@ -1715,51 +1633,11 @@ gen_stencil_depth_test(struct spe_function *f,
* This function will allocate a variant number of registers that
* will be released as part of the register set.
*/
- spe_comment(f, 0, "Computing stencil values");
- gen_get_stencil_values(f, dsa, fbS_reg,
- &front_stencil_fail_values, &front_stencil_pass_depth_fail_values,
- &front_stencil_pass_depth_pass_values, &back_stencil_fail_values,
- &back_stencil_pass_depth_fail_values, &back_stencil_pass_depth_pass_values);
-
- /* Tricky, tricky, tricky - the things we do to create optimal
- * code...
- *
- * The various stencil values registers may overlap with each other
- * and with fbS_reg arbitrarily (as any particular operation is
- * only calculated once and stored in one register, no matter
- * how many times it is used). So we can't change the values
- * within those registers directly - if we change a value in a
- * register that's being referenced by two different calculations,
- * we've just unwittingly changed the second value as well...
- *
- * Avoid this by allocating new registers to hold the results
- * (there may be 2, if the depth test is off, or 3, if it is on).
- * These will be released as part of the register set.
- */
- if (!dsa->stencil[1].enabled) {
- /* The easy case: if two-sided stenciling is *not* enabled, we
- * just use the front-sided values.
- */
- stencil_fail_values = front_stencil_fail_values;
- stencil_pass_depth_fail_values = front_stencil_pass_depth_fail_values;
- stencil_pass_depth_pass_values = front_stencil_pass_depth_pass_values;
- }
- else { /* two-sided stencil enabled */
- spe_comment(f, 0, "Resolving backface stencil values");
- /* Allocate new registers for the needed merged values */
- stencil_fail_values = spe_allocate_available_register(f);
- spe_selb(f, stencil_fail_values, front_stencil_fail_values, back_stencil_fail_values, facing_reg);
- if (dsa->depth.enabled) {
- stencil_pass_depth_fail_values = spe_allocate_available_register(f);
- spe_selb(f, stencil_pass_depth_fail_values, front_stencil_pass_depth_fail_values, back_stencil_pass_depth_fail_values, facing_reg);
- }
- else {
- stencil_pass_depth_fail_values = fbS_reg;
- }
- stencil_pass_depth_pass_values = spe_allocate_available_register(f);
- spe_selb(f, stencil_pass_depth_pass_values, front_stencil_pass_depth_pass_values, back_stencil_pass_depth_pass_values, facing_reg);
- }
- }
+ spe_comment(f, 0, facing == CELL_FACING_FRONT ? "Computing front-facing stencil values" : "Computing back-facing stencil values");
+ gen_get_stencil_values(f, stencil, dsa->depth.enabled, fbS_reg,
+ &stencil_fail_values, &stencil_pass_depth_fail_values,
+ &stencil_pass_depth_pass_values);
+ }
/* We now have all the stencil values we need. We also need
* the results of the depth test to figure out which
@@ -1896,10 +1774,12 @@ gen_stencil_depth_test(struct spe_function *f,
* should be much faster.
*
* \param cell the rendering context (in)
+ * \param facing whether the generated code is for front-facing or
+ * back-facing fragments
* \param f the generated function (out)
*/
void
-cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
+cell_gen_fragment_function(struct cell_context *cell, uint facing, struct spe_function *f)
{
const struct pipe_depth_stencil_alpha_state *dsa = cell->depth_stencil;
const struct pipe_blend_state *blend = cell->blend;
@@ -1917,7 +1797,8 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
const int fragB_reg = 10; /* vector float */
const int fragA_reg = 11; /* vector float */
const int mask_reg = 12; /* vector uint */
- const int facing_reg = 13; /* uint */
+
+ ASSERT(facing == CELL_FACING_FRONT || facing == CELL_FACING_BACK);
/* offset of quad from start of tile
* XXX assuming 4-byte pixels for color AND Z/stencil!!!!
@@ -1945,7 +1826,6 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
spe_allocate_register(f, fragB_reg);
spe_allocate_register(f, fragA_reg);
spe_allocate_register(f, mask_reg);
- spe_allocate_register(f, facing_reg);
quad_offset_reg = spe_allocate_available_register(f);
fbRGBA_reg = spe_allocate_available_register(f);
@@ -1969,6 +1849,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
spe_release_register(f, y2_reg);
}
+ /* Generate the alpha test, if needed. */
if (dsa->alpha.enabled) {
gen_alpha_test(dsa, f, mask_reg, fragA_reg);
}
@@ -2095,7 +1976,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
* gen_stencil_depth_test() function must ignore the
* fbZ_reg register if depth is not enabled.
*/
- write_depth_stencil = gen_stencil_depth_test(f, dsa, facing_reg, mask_reg, fragZ_reg, fbZ_reg, fbS_reg);
+ write_depth_stencil = gen_stencil_depth_test(f, dsa, facing, mask_reg, fragZ_reg, fbZ_reg, fbS_reg);
}
else if (dsa->depth.enabled) {
int zmask_reg = spe_allocate_available_register(f);
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
index b59de198dc..2fabfdfb08 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
@@ -31,7 +31,7 @@
extern void
-cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f);
+cell_gen_fragment_function(struct cell_context *cell, uint facing, struct spe_function *f);
#endif /* CELL_GEN_FRAGMENT_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c
index dd2d7f7d1e..031b27f11f 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_emit.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c
@@ -75,23 +75,29 @@ lookup_fragment_ops(struct cell_context *cell)
* If not found, create/save new fragment ops command.
*/
if (!ops) {
- struct spe_function spe_code;
+ struct spe_function spe_code_front, spe_code_back;
if (0)
debug_printf("**** Create New Fragment Ops\n");
/* Prepare the buffer that will hold the generated code. */
- spe_init_func(&spe_code, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
+ spe_init_func(&spe_code_front, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
+ spe_init_func(&spe_code_back, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
- /* generate new code */
- cell_gen_fragment_function(cell, &spe_code);
+ /* generate new code. Always generate new code for both front-facing
+ * and back-facing fragments, even if it's the same code in both
+ * cases.
+ */
+ cell_gen_fragment_function(cell, CELL_FACING_FRONT, &spe_code_front);
+ cell_gen_fragment_function(cell, CELL_FACING_BACK, &spe_code_back);
/* alloc new fragment ops command */
ops = CALLOC_STRUCT(cell_command_fragment_ops);
/* populate the new cell_command_fragment_ops object */
ops->opcode = CELL_CMD_STATE_FRAGMENT_OPS;
- memcpy(ops->code, spe_code.store, spe_code_size(&spe_code));
+ memcpy(ops->code_front, spe_code_front.store, spe_code_size(&spe_code_front));
+ memcpy(ops->code_back, spe_code_back.store, spe_code_size(&spe_code_back));
ops->dsa = *cell->depth_stencil;
ops->blend = *cell->blend;
@@ -99,7 +105,8 @@ lookup_fragment_ops(struct cell_context *cell)
util_keymap_insert(cell->fragment_ops_cache, &key, ops, NULL);
/* release rtasm buffer */
- spe_release_func(&spe_code);
+ spe_release_func(&spe_code_front);
+ spe_release_func(&spe_code_back);
}
else {
if (0)
diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c
index d726622d94..d5faf4e3aa 100644
--- a/src/gallium/drivers/cell/spu/spu_command.c
+++ b/src/gallium/drivers/cell/spu/spu_command.c
@@ -214,7 +214,8 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n");
/* Copy SPU code from batch buffer to spu buffer */
- memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
+ memcpy(spu.fragment_ops_code_front, fops->code_front, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
+ memcpy(spu.fragment_ops_code_back, fops->code_back, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
/* Copy state info (for fallback case only) */
memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
@@ -234,7 +235,8 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
* raw state records that the fallback code requires.
*/
if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) == 0) {
- spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code;
+ spu.fragment_ops[CELL_FACING_FRONT] = (spu_fragment_ops_func) spu.fragment_ops_code_front;
+ spu.fragment_ops[CELL_FACING_BACK] = (spu_fragment_ops_func) spu.fragment_ops_code_back;
}
else {
/* otherwise, the default fallback code remains in place */
diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c
index c8bb251905..7033f6037d 100644
--- a/src/gallium/drivers/cell/spu/spu_main.c
+++ b/src/gallium/drivers/cell/spu/spu_main.c
@@ -63,7 +63,8 @@ one_time_init(void)
* This will normally be overriden by a code-gen'd function
* unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set.
*/
- spu.fragment_ops = spu_fallback_fragment_ops;
+ spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
+ spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
}
@@ -90,7 +91,8 @@ main(main_param_t speid, main_param_t argp)
ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4);
ASSERT(sizeof(struct cell_command_render) % 8 == 0);
- ASSERT(((unsigned long) &spu.fragment_ops_code) % 8 == 0);
+ ASSERT(((unsigned long) &spu.fragment_ops_code_front) % 8 == 0);
+ ASSERT(((unsigned long) &spu.fragment_ops_code_back) % 8 == 0);
ASSERT(((unsigned long) &spu.fragment_program_code) % 8 == 0);
one_time_init();
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h
index 692790c9f3..24cf7d77ce 100644
--- a/src/gallium/drivers/cell/spu/spu_main.h
+++ b/src/gallium/drivers/cell/spu/spu_main.h
@@ -85,8 +85,7 @@ typedef void (*spu_fragment_ops_func)(uint x, uint y,
vector float fragGreen,
vector float fragBlue,
vector float fragAlpha,
- vector unsigned int mask,
- uint facing);
+ vector unsigned int mask);
/** Function for running fragment program */
typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs,
@@ -170,9 +169,10 @@ struct spu_global
ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
/** Current fragment ops machine code, at 8-byte boundary */
- uint fragment_ops_code[SPU_MAX_FRAGMENT_OPS_INSTS] ALIGN8_ATTRIB;
- /** Current fragment ops function */
- spu_fragment_ops_func fragment_ops;
+ uint fragment_ops_code_front[SPU_MAX_FRAGMENT_OPS_INSTS] ALIGN8_ATTRIB;
+ uint fragment_ops_code_back[SPU_MAX_FRAGMENT_OPS_INSTS] ALIGN8_ATTRIB;
+ /** Current fragment ops functions, 0 = frontfacing, 1 = backfacing */
+ spu_fragment_ops_func fragment_ops[2];
/** Current fragment program machine code, at 8-byte boundary */
uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN8_ATTRIB;
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
index f8ffc70492..683664e8a4 100644
--- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
+++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
@@ -75,8 +75,7 @@ spu_fallback_fragment_ops(uint x, uint y,
vector float fragG,
vector float fragB,
vector float fragA,
- vector unsigned int mask,
- uint facing)
+ vector unsigned int mask)
{
vector float frag_aos[4];
unsigned int fbc0, fbc1, fbc2, fbc3 ; /* framebuffer/tile colors */
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h
index a61689c83a..f817abf046 100644
--- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h
+++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h
@@ -38,8 +38,7 @@ spu_fallback_fragment_ops(uint x, uint y,
vector float fragGreen,
vector float fragBlue,
vector float fragAlpha,
- vector unsigned int mask,
- uint facing);
+ vector unsigned int mask);
#endif /* SPU_PER_FRAGMENT_OP */
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c
index 5f908159bb..22e51a86ae 100644
--- a/src/gallium/drivers/cell/spu/spu_tri.c
+++ b/src/gallium/drivers/cell/spu/spu_tri.c
@@ -275,15 +275,20 @@ emit_quad( int x, int y, mask_t mask)
/* Execute per-fragment/quad operations, including:
* alpha test, z test, stencil test, blend and framebuffer writing.
+ * Note that there are two different fragment operations functions
+ * that can be called, one for front-facing fragments, and one
+ * for back-facing fragments. (Often the two are the same;
+ * but in some cases, like two-sided stenciling, they can be
+ * very different.) So choose the correct function depending
+ * on the calculated facing.
*/
- spu.fragment_ops(ix, iy, &spu.ctile, &spu.ztile,
+ spu.fragment_ops[setup.facing](ix, iy, &spu.ctile, &spu.ztile,
fragZ,
outputs[0*4+0],
outputs[0*4+1],
outputs[0*4+2],
outputs[0*4+3],
- mask,
- setup.facing);
+ mask);
}
}
}
@@ -519,7 +524,14 @@ setup_sort_vertices(const struct vertex_header *v0,
setup.oneOverArea = 1.0f / area;
- /* The product of area * sign indicates front/back orientation (0/1) */
+ /* The product of area * sign indicates front/back orientation (0/1).
+ * Just in case someone gets the bright idea of switching the front
+ * and back constants without noticing that we're assuming their
+ * values in this operation, also assert that the values are
+ * what we think they are.
+ */
+ ASSERT(CELL_FACING_FRONT == 0);
+ ASSERT(CELL_FACING_BACK == 1);
setup.facing = (area * sign > 0.0f)
^ (spu.rasterizer.front_winding == PIPE_WINDING_CW);