summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/cell/ppu
diff options
context:
space:
mode:
authorRobert Ellison <papillo@tungstengraphics.com>2008-11-11 13:57:10 -0700
committerRobert Ellison <papillo@tungstengraphics.com>2008-11-11 13:57:10 -0700
commit90027f85786406133a5180998a75fb612b6a221e (patch)
tree595a268f7be19e2e763855b22a66efb0566123d2 /src/gallium/drivers/cell/ppu
parent2b66417402bc595be301ab9ed7b9ea2a5f79e180 (diff)
CELL: two-sided stencil fixes
With these changes, the tests/stencil_twoside test now works. - Eliminate blending from the stencil_twoside test, as it produces an unneeded dependency on having blending working - The spe_splat() function will now work if the register being splatted and the destination register are the same - Separate fragment code generated for front-facing and back-facing fragments. Often these are the same; if two-sided stenciling is on, they can be different. This is easier and faster than generating code that does both tests and merges the results. - Fixed a cut/paste bug where if the back Z-pass stencil operation were different from all the other operations, the back Z-fail results were incorrect.
Diffstat (limited to 'src/gallium/drivers/cell/ppu')
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fragment.c239
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fragment.h2
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_emit.c19
3 files changed, 74 insertions, 186 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
index d9c3ff3f4d..6e425eafaa 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
@@ -1412,144 +1412,72 @@ gen_stencil_values(struct spe_function *f, unsigned int stencil_op,
* and released by the corresponding spe_release_register_set() call.
*/
static void
-gen_get_stencil_values(struct spe_function *f, const struct pipe_depth_stencil_alpha_state *dsa,
+gen_get_stencil_values(struct spe_function *f, const struct pipe_stencil_state *stencil,
+ const unsigned int depth_enabled,
unsigned int fbS_reg,
unsigned int *fail_reg, unsigned int *zfail_reg,
- unsigned int *zpass_reg, unsigned int *back_fail_reg,
- unsigned int *back_zfail_reg, unsigned int *back_zpass_reg)
+ unsigned int *zpass_reg)
{
- unsigned zfail_op, back_zfail_op;
+ unsigned zfail_op;
/* Stenciling had better be enabled here */
- ASSERT(dsa->stencil[0].enabled);
+ ASSERT(stencil->enabled);
/* If the depth test is not enabled, it is treated as though it always
- * passes. In particular, that means that the "zfail_op" (and the backfacing
- * counterpart, if active) are not considered - a failing stencil test will
- * trigger the "fail_op", and a passing stencil test will trigger the
- * "zpass_op".
+ * passes, which means that the zfail_op is not considered - a
+ * failing stencil test triggers the fail_op, and a passing one
+ * triggers the zpass_op
*
- * By overriding the operations in this case to be PIPE_STENCIL_OP_KEEP,
- * we keep them from being calculated.
+ * As an optimization, override calculation of the zfail_op values
+ * if they aren't going to be used. By setting the value of
+ * the operation to PIPE_STENCIL_OP_KEEP, its value will be assumed
+ * to match the incoming stencil values, and no calculation will
+ * be done.
*/
- if (dsa->depth.enabled) {
- zfail_op = dsa->stencil[0].zfail_op;
- back_zfail_op = dsa->stencil[1].zfail_op;
+ if (depth_enabled) {
+ zfail_op = stencil->zfail_op;
}
else {
zfail_op = PIPE_STENCIL_OP_KEEP;
- back_zfail_op = PIPE_STENCIL_OP_KEEP;
}
/* One-sided or front-facing stencil */
- if (dsa->stencil[0].fail_op == PIPE_STENCIL_OP_KEEP) {
+ if (stencil->fail_op == PIPE_STENCIL_OP_KEEP) {
*fail_reg = fbS_reg;
}
else {
*fail_reg = spe_allocate_available_register(f);
- gen_stencil_values(f, dsa->stencil[0].fail_op, dsa->stencil[0].ref_value,
+ gen_stencil_values(f, stencil->fail_op, stencil->ref_value,
0xff, fbS_reg, *fail_reg);
}
+ /* Check the possibly overridden value, not the structure value */
if (zfail_op == PIPE_STENCIL_OP_KEEP) {
*zfail_reg = fbS_reg;
}
- else if (zfail_op == dsa->stencil[0].fail_op) {
+ else if (zfail_op == stencil->fail_op) {
*zfail_reg = *fail_reg;
}
else {
*zfail_reg = spe_allocate_available_register(f);
- gen_stencil_values(f, dsa->stencil[0].zfail_op, dsa->stencil[0].ref_value,
+ gen_stencil_values(f, stencil->zfail_op, stencil->ref_value,
0xff, fbS_reg, *zfail_reg);
}
- if (dsa->stencil[0].zpass_op == PIPE_STENCIL_OP_KEEP) {
+ if (stencil->zpass_op == PIPE_STENCIL_OP_KEEP) {
*zpass_reg = fbS_reg;
}
- else if (dsa->stencil[0].zpass_op == dsa->stencil[0].fail_op) {
+ else if (stencil->zpass_op == stencil->fail_op) {
*zpass_reg = *fail_reg;
}
- else if (dsa->stencil[0].zpass_op == zfail_op) {
+ else if (stencil->zpass_op == zfail_op) {
*zpass_reg = *zfail_reg;
}
else {
*zpass_reg = spe_allocate_available_register(f);
- gen_stencil_values(f, dsa->stencil[0].zpass_op, dsa->stencil[0].ref_value,
+ gen_stencil_values(f, stencil->zpass_op, stencil->ref_value,
0xff, fbS_reg, *zpass_reg);
}
-
- /* If two-sided stencil is enabled, we have more work to do. */
- if (!dsa->stencil[1].enabled) {
- /* This just flags that the registers need not be deallocated later */
- *back_fail_reg = fbS_reg;
- *back_zfail_reg = fbS_reg;
- *back_zpass_reg = fbS_reg;
- }
- else {
- /* Same calculations as above, but for the back stencil */
- if (dsa->stencil[1].fail_op == PIPE_STENCIL_OP_KEEP) {
- *back_fail_reg = fbS_reg;
- }
- else if (dsa->stencil[1].fail_op == dsa->stencil[0].fail_op) {
- *back_fail_reg = *fail_reg;
- }
- else if (dsa->stencil[1].fail_op == zfail_op) {
- *back_fail_reg = *zfail_reg;
- }
- else if (dsa->stencil[1].fail_op == dsa->stencil[0].zpass_op) {
- *back_fail_reg = *zpass_reg;
- }
- else {
- *back_fail_reg = spe_allocate_available_register(f);
- gen_stencil_values(f, dsa->stencil[1].fail_op, dsa->stencil[1].ref_value,
- 0xff, fbS_reg, *back_fail_reg);
- }
-
- if (back_zfail_op == PIPE_STENCIL_OP_KEEP) {
- *back_zfail_reg = fbS_reg;
- }
- else if (back_zfail_op == dsa->stencil[0].fail_op) {
- *back_zfail_reg = *fail_reg;
- }
- else if (back_zfail_op == zfail_op) {
- *back_zfail_reg = *zfail_reg;
- }
- else if (back_zfail_op == dsa->stencil[0].zpass_op) {
- *back_zfail_reg = *zpass_reg;
- }
- else if (back_zfail_op == dsa->stencil[1].fail_op) {
- *back_zfail_reg = *back_fail_reg;
- }
- else {
- *back_zfail_reg = spe_allocate_available_register(f);
- gen_stencil_values(f, dsa->stencil[1].zfail_op, dsa->stencil[1].ref_value,
- 0xff, fbS_reg, *back_zfail_reg);
- }
-
- if (dsa->stencil[1].zpass_op == PIPE_STENCIL_OP_KEEP) {
- *back_zpass_reg = fbS_reg;
- }
- else if (dsa->stencil[1].zpass_op == dsa->stencil[0].fail_op) {
- *back_zpass_reg = *fail_reg;
- }
- else if (dsa->stencil[1].zpass_op == zfail_op) {
- *back_zpass_reg = *zfail_reg;
- }
- else if (dsa->stencil[1].zpass_op == dsa->stencil[0].zpass_op) {
- *back_zpass_reg = *zpass_reg;
- }
- else if (dsa->stencil[1].zpass_op == dsa->stencil[1].fail_op) {
- *back_zpass_reg = *back_fail_reg;
- }
- else if (dsa->stencil[1].zpass_op == back_zfail_op) {
- *back_zpass_reg = *back_zfail_reg;
- }
- else {
- *back_zfail_reg = spe_allocate_available_register(f);
- gen_stencil_values(f, dsa->stencil[1].zpass_op, dsa->stencil[1].ref_value,
- 0xff, fbS_reg, *back_zpass_reg);
- }
- } /* End of calculations for back-facing stencil */
}
/* Note that fbZ_reg may *not* be set on entry, if in fact
@@ -1559,7 +1487,7 @@ gen_get_stencil_values(struct spe_function *f, const struct pipe_depth_stencil_a
static boolean
gen_stencil_depth_test(struct spe_function *f,
const struct pipe_depth_stencil_alpha_state *dsa,
- const int const facing_reg,
+ const uint facing,
const int mask_reg, const int fragZ_reg,
const int fbZ_reg, const int fbS_reg)
{
@@ -1571,6 +1499,8 @@ gen_stencil_depth_test(struct spe_function *f,
boolean need_to_calculate_stencil_values;
boolean need_to_writemask_stencil_values;
+ struct pipe_stencil_state *stencil;
+
/* Registers. We may or may not actually allocate these, depending
* on whether the state values indicate that we need them.
*/
@@ -1598,6 +1528,20 @@ gen_stencil_depth_test(struct spe_function *f,
spe_comment(f, 0, "Allocating stencil register set");
spe_allocate_register_set(f);
+ /* The facing we're given is the fragment facing; it doesn't
+ * exactly match the stencil facing. If stencil is enabled,
+ * but two-sided stencil is *not* enabled, we use the same
+ * stencil settings for both front- and back-facing fragments.
+ * We only use the "back-facing" stencil for backfacing fragments
+ * if two-sided stenciling is enabled.
+ */
+ if (facing == CELL_FACING_BACK && dsa->stencil[1].enabled) {
+ stencil = &dsa->stencil[1];
+ }
+ else {
+ stencil = &dsa->stencil[0];
+ }
+
/* Calculate the writemask. If the writemask is trivial (either
* all 0s, meaning that we don't need to calculate any stencil values
* because they're not going to change the stencil anyway, or all 1s,
@@ -1608,24 +1552,20 @@ gen_stencil_depth_test(struct spe_function *f,
* Note that if the backface stencil is *not* enabled, the backface
* stencil will have the same values as the frontface stencil.
*/
- if (dsa->stencil[0].fail_op == PIPE_STENCIL_OP_KEEP &&
- dsa->stencil[0].zfail_op == PIPE_STENCIL_OP_KEEP &&
- dsa->stencil[0].zpass_op == PIPE_STENCIL_OP_KEEP &&
- dsa->stencil[1].fail_op == PIPE_STENCIL_OP_KEEP &&
- dsa->stencil[1].zfail_op == PIPE_STENCIL_OP_KEEP &&
- dsa->stencil[1].zpass_op == PIPE_STENCIL_OP_KEEP) {
- /* No changes to any stencil values */
+ if (stencil->fail_op == PIPE_STENCIL_OP_KEEP &&
+ stencil->zfail_op == PIPE_STENCIL_OP_KEEP &&
+ stencil->zpass_op == PIPE_STENCIL_OP_KEEP) {
need_to_calculate_stencil_values = false;
need_to_writemask_stencil_values = false;
}
- else if (dsa->stencil[0].write_mask == 0x0 && dsa->stencil[1].write_mask == 0x0) {
+ else if (stencil->write_mask == 0x0) {
/* All changes are writemasked out, so no need to calculate
* what those changes might be, and no need to write anything back.
*/
need_to_calculate_stencil_values = false;
need_to_writemask_stencil_values = false;
}
- else if (dsa->stencil[0].write_mask == 0xff && dsa->stencil[1].write_mask == 0xff) {
+ else if (stencil->write_mask == 0xff) {
/* Still trivial, but a little less so. We need to write the stencil
* values, but we don't need to mask them.
*/
@@ -1645,14 +1585,7 @@ gen_stencil_depth_test(struct spe_function *f,
*/
spe_comment(f, 0, "Computing stencil writemask");
stencil_writemask_reg = spe_allocate_available_register(f);
- spe_load_uint(f, stencil_writemask_reg, dsa->stencil[0].write_mask);
- if (dsa->stencil[1].enabled && dsa->stencil[0].write_mask != dsa->stencil[1].write_mask) {
- unsigned int back_write_mask_reg = spe_allocate_available_register(f);
- spe_comment(f, 0, "Resolving two-sided stencil writemask");
- spe_load_uint(f, back_write_mask_reg, dsa->stencil[1].write_mask);
- spe_selb(f, stencil_writemask_reg, stencil_writemask_reg, back_write_mask_reg, facing_reg);
- spe_release_register(f, back_write_mask_reg);
- }
+ spe_load_uint(f, stencil_writemask_reg, dsa->stencil[facing].write_mask);
}
/* At least one-sided stenciling must be on. Generate code that
@@ -1666,19 +1599,7 @@ gen_stencil_depth_test(struct spe_function *f,
*/
spe_comment(f, 0, "Running basic stencil test");
stencil_pass_reg = spe_allocate_available_register(f);
- gen_stencil_test(f, &dsa->stencil[0], 0xff, mask_reg, fbS_reg, stencil_pass_reg);
-
- /* If two-sided stenciling is on, generate code to run the stencil
- * test on the backfacing stencil as well, and combine the two results
- * into the one correct result based on facing.
- */
- if (dsa->stencil[1].enabled) {
- unsigned int temp_reg = spe_allocate_available_register(f);
- spe_comment(f, 0, "Running backface stencil test");
- gen_stencil_test(f, &dsa->stencil[1], 0xff, mask_reg, fbS_reg, temp_reg);
- spe_selb(f, stencil_pass_reg, stencil_pass_reg, temp_reg, facing_reg);
- spe_release_register(f, temp_reg);
- }
+ gen_stencil_test(f, stencil, 0xff, mask_reg, fbS_reg, stencil_pass_reg);
/* Generate code that, given the mask of valid fragments and the
* mask of valid fragments that passed the stencil test, computes
@@ -1698,9 +1619,6 @@ gen_stencil_depth_test(struct spe_function *f,
/* We may not need to calculate stencil values, if the writemask is off */
if (need_to_calculate_stencil_values) {
- unsigned int back_stencil_fail_values, back_stencil_pass_depth_fail_values, back_stencil_pass_depth_pass_values;
- unsigned int front_stencil_fail_values, front_stencil_pass_depth_fail_values, front_stencil_pass_depth_pass_values;
-
/* Generate code that calculates exactly which stencil values we need,
* without calculating the same value twice (say, if two different
* stencil ops have the same value). This code will work for one-sided
@@ -1715,51 +1633,11 @@ gen_stencil_depth_test(struct spe_function *f,
* This function will allocate a variant number of registers that
* will be released as part of the register set.
*/
- spe_comment(f, 0, "Computing stencil values");
- gen_get_stencil_values(f, dsa, fbS_reg,
- &front_stencil_fail_values, &front_stencil_pass_depth_fail_values,
- &front_stencil_pass_depth_pass_values, &back_stencil_fail_values,
- &back_stencil_pass_depth_fail_values, &back_stencil_pass_depth_pass_values);
-
- /* Tricky, tricky, tricky - the things we do to create optimal
- * code...
- *
- * The various stencil values registers may overlap with each other
- * and with fbS_reg arbitrarily (as any particular operation is
- * only calculated once and stored in one register, no matter
- * how many times it is used). So we can't change the values
- * within those registers directly - if we change a value in a
- * register that's being referenced by two different calculations,
- * we've just unwittingly changed the second value as well...
- *
- * Avoid this by allocating new registers to hold the results
- * (there may be 2, if the depth test is off, or 3, if it is on).
- * These will be released as part of the register set.
- */
- if (!dsa->stencil[1].enabled) {
- /* The easy case: if two-sided stenciling is *not* enabled, we
- * just use the front-sided values.
- */
- stencil_fail_values = front_stencil_fail_values;
- stencil_pass_depth_fail_values = front_stencil_pass_depth_fail_values;
- stencil_pass_depth_pass_values = front_stencil_pass_depth_pass_values;
- }
- else { /* two-sided stencil enabled */
- spe_comment(f, 0, "Resolving backface stencil values");
- /* Allocate new registers for the needed merged values */
- stencil_fail_values = spe_allocate_available_register(f);
- spe_selb(f, stencil_fail_values, front_stencil_fail_values, back_stencil_fail_values, facing_reg);
- if (dsa->depth.enabled) {
- stencil_pass_depth_fail_values = spe_allocate_available_register(f);
- spe_selb(f, stencil_pass_depth_fail_values, front_stencil_pass_depth_fail_values, back_stencil_pass_depth_fail_values, facing_reg);
- }
- else {
- stencil_pass_depth_fail_values = fbS_reg;
- }
- stencil_pass_depth_pass_values = spe_allocate_available_register(f);
- spe_selb(f, stencil_pass_depth_pass_values, front_stencil_pass_depth_pass_values, back_stencil_pass_depth_pass_values, facing_reg);
- }
- }
+ spe_comment(f, 0, facing == CELL_FACING_FRONT ? "Computing front-facing stencil values" : "Computing back-facing stencil values");
+ gen_get_stencil_values(f, stencil, dsa->depth.enabled, fbS_reg,
+ &stencil_fail_values, &stencil_pass_depth_fail_values,
+ &stencil_pass_depth_pass_values);
+ }
/* We now have all the stencil values we need. We also need
* the results of the depth test to figure out which
@@ -1896,10 +1774,12 @@ gen_stencil_depth_test(struct spe_function *f,
* should be much faster.
*
* \param cell the rendering context (in)
+ * \param facing whether the generated code is for front-facing or
+ * back-facing fragments
* \param f the generated function (out)
*/
void
-cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
+cell_gen_fragment_function(struct cell_context *cell, uint facing, struct spe_function *f)
{
const struct pipe_depth_stencil_alpha_state *dsa = cell->depth_stencil;
const struct pipe_blend_state *blend = cell->blend;
@@ -1917,7 +1797,8 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
const int fragB_reg = 10; /* vector float */
const int fragA_reg = 11; /* vector float */
const int mask_reg = 12; /* vector uint */
- const int facing_reg = 13; /* uint */
+
+ ASSERT(facing == CELL_FACING_FRONT || facing == CELL_FACING_BACK);
/* offset of quad from start of tile
* XXX assuming 4-byte pixels for color AND Z/stencil!!!!
@@ -1945,7 +1826,6 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
spe_allocate_register(f, fragB_reg);
spe_allocate_register(f, fragA_reg);
spe_allocate_register(f, mask_reg);
- spe_allocate_register(f, facing_reg);
quad_offset_reg = spe_allocate_available_register(f);
fbRGBA_reg = spe_allocate_available_register(f);
@@ -1969,6 +1849,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
spe_release_register(f, y2_reg);
}
+ /* Generate the alpha test, if needed. */
if (dsa->alpha.enabled) {
gen_alpha_test(dsa, f, mask_reg, fragA_reg);
}
@@ -2095,7 +1976,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
* gen_stencil_depth_test() function must ignore the
* fbZ_reg register if depth is not enabled.
*/
- write_depth_stencil = gen_stencil_depth_test(f, dsa, facing_reg, mask_reg, fragZ_reg, fbZ_reg, fbS_reg);
+ write_depth_stencil = gen_stencil_depth_test(f, dsa, facing, mask_reg, fragZ_reg, fbZ_reg, fbS_reg);
}
else if (dsa->depth.enabled) {
int zmask_reg = spe_allocate_available_register(f);
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
index b59de198dc..2fabfdfb08 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
@@ -31,7 +31,7 @@
extern void
-cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f);
+cell_gen_fragment_function(struct cell_context *cell, uint facing, struct spe_function *f);
#endif /* CELL_GEN_FRAGMENT_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c
index dd2d7f7d1e..031b27f11f 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_emit.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c
@@ -75,23 +75,29 @@ lookup_fragment_ops(struct cell_context *cell)
* If not found, create/save new fragment ops command.
*/
if (!ops) {
- struct spe_function spe_code;
+ struct spe_function spe_code_front, spe_code_back;
if (0)
debug_printf("**** Create New Fragment Ops\n");
/* Prepare the buffer that will hold the generated code. */
- spe_init_func(&spe_code, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
+ spe_init_func(&spe_code_front, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
+ spe_init_func(&spe_code_back, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
- /* generate new code */
- cell_gen_fragment_function(cell, &spe_code);
+ /* generate new code. Always generate new code for both front-facing
+ * and back-facing fragments, even if it's the same code in both
+ * cases.
+ */
+ cell_gen_fragment_function(cell, CELL_FACING_FRONT, &spe_code_front);
+ cell_gen_fragment_function(cell, CELL_FACING_BACK, &spe_code_back);
/* alloc new fragment ops command */
ops = CALLOC_STRUCT(cell_command_fragment_ops);
/* populate the new cell_command_fragment_ops object */
ops->opcode = CELL_CMD_STATE_FRAGMENT_OPS;
- memcpy(ops->code, spe_code.store, spe_code_size(&spe_code));
+ memcpy(ops->code_front, spe_code_front.store, spe_code_size(&spe_code_front));
+ memcpy(ops->code_back, spe_code_back.store, spe_code_size(&spe_code_back));
ops->dsa = *cell->depth_stencil;
ops->blend = *cell->blend;
@@ -99,7 +105,8 @@ lookup_fragment_ops(struct cell_context *cell)
util_keymap_insert(cell->fragment_ops_cache, &key, ops, NULL);
/* release rtasm buffer */
- spe_release_func(&spe_code);
+ spe_release_func(&spe_code_front);
+ spe_release_func(&spe_code_back);
}
else {
if (0)