summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorBrian Paul <brian.paul@tungstengraphics.com>2008-10-16 13:49:42 -0600
committerBrian Paul <brian.paul@tungstengraphics.com>2008-10-16 13:49:42 -0600
commit033c90f4c16c1da517d676282508208319bd5ec5 (patch)
tree0faff761891c2d236d03f0cdfdcf736ac64183f6 /src
parent8bff2fccc9774e3f3af3c0f8ea345037051cf40e (diff)
cell: implement KIL instruction
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fp.c80
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.h6
-rw-r--r--src/gallium/drivers/cell/spu/spu_tri.c5
3 files changed, 87 insertions, 4 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
index 2b34cf1e23..493ee1a0c9 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -84,6 +84,9 @@ struct codegen
/** Index of execution mask register */
int exec_mask_reg;
+ /** KIL mask: indicates which fragments have been killed */
+ int kill_mask_reg;
+
int frame_size; /**< Stack frame size, in words */
struct spe_function *f;
@@ -431,8 +434,21 @@ emit_prologue(struct codegen *gen)
static void
emit_epilogue(struct codegen *gen)
{
+ const int return_reg = 3;
+
spe_comment(gen->f, -4, "Function epilogue:");
+ spe_comment(gen->f, 0, "return the killed mask");
+ if (gen->kill_mask_reg > 0) {
+ /* shader called KIL, return the "alive" mask */
+ spe_move(gen->f, return_reg, gen->kill_mask_reg);
+ }
+ else {
+ /* return {0,0,0,0} */
+ spe_load_uint(gen->f, return_reg, 0);
+ }
+
+ spe_comment(gen->f, 0, "restore stack and return");
if (gen->frame_size >= 512) {
/* offset is too large for ai instruction */
int offset_reg = spe_allocate_available_register(gen->f);
@@ -1424,6 +1440,68 @@ emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst)
/**
+ * KILL if any of src reg values are less than zero.
+ */
+static boolean
+emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+ int s_regs[4], kil_reg = -1, cmp_reg, zero_reg;
+
+ spe_comment(gen->f, -4, "CALL kil:");
+
+ /* zero = {0,0,0,0} */
+ zero_reg = get_itemp(gen);
+ spe_load_uint(gen->f, zero_reg, 0);
+
+ cmp_reg = get_itemp(gen);
+
+ /* get src regs */
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ s_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ }
+ }
+
+ /* test if any src regs are < 0 */
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ if (kil_reg >= 0) {
+ /* cmp = 0 > src ? : ~0 : 0 */
+ spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]);
+ /* kil = kil | cmp */
+ spe_or(gen->f, kil_reg, kil_reg, cmp_reg);
+ }
+ else {
+ kil_reg = get_itemp(gen);
+ /* kil = 0 > src ? : ~0 : 0 */
+ spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]);
+ }
+ }
+ }
+
+ if (gen->if_nesting) {
+ /* may have been a conditional kil */
+ spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg);
+ }
+
+ /* allocate the kill mask reg if needed */
+ if (gen->kill_mask_reg <= 0) {
+ gen->kill_mask_reg = spe_allocate_available_register(gen->f);
+ spe_move(gen->f, gen->kill_mask_reg, kil_reg);
+ }
+ else {
+ spe_or(gen->f, gen->kill_mask_reg, gen->kill_mask_reg, kil_reg);
+ }
+
+ free_itemps(gen);
+
+ return TRUE;
+}
+
+
+
+/**
* Emit max. See emit_SGT for comments.
*/
static boolean
@@ -1695,6 +1773,8 @@ emit_instruction(struct codegen *gen,
/* fall-through for now */
case TGSI_OPCODE_TXP:
return emit_TEX(gen, inst);
+ case TGSI_OPCODE_KIL:
+ return emit_KIL(gen, inst);
case TGSI_OPCODE_IF:
return emit_IF(gen, inst);
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h
index 80e9c696f8..95ef4c9244 100644
--- a/src/gallium/drivers/cell/spu/spu_main.h
+++ b/src/gallium/drivers/cell/spu/spu_main.h
@@ -89,9 +89,9 @@ typedef void (*spu_fragment_ops_func)(uint x, uint y,
uint facing);
/** Function for running fragment program */
-typedef void (*spu_fragment_program_func)(vector float *inputs,
- vector float *outputs,
- vector float *constants);
+typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs,
+ vector float *outputs,
+ vector float *constants);
struct spu_framebuffer
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c
index d83085d0f9..4caf7d6b61 100644
--- a/src/gallium/drivers/cell/spu/spu_tri.c
+++ b/src/gallium/drivers/cell/spu/spu_tri.c
@@ -254,6 +254,7 @@ emit_quad( int x, int y, mask_t mask)
vector float inputs[4*4], outputs[2*4];
vector float fragZ = eval_z((float) x, (float) y);
vector float fragW = eval_w((float) x, (float) y);
+ vector unsigned int kill_mask;
/* setup inputs */
#if 0
@@ -268,7 +269,9 @@ emit_quad( int x, int y, mask_t mask)
ASSERT(spu.fragment_ops);
/* Execute the current fragment program */
- spu.fragment_program(inputs, outputs, spu.constants);
+ kill_mask = spu.fragment_program(inputs, outputs, spu.constants);
+
+ mask = spu_andc(mask, kill_mask);
/* Execute per-fragment/quad operations, including:
* alpha test, z test, stencil test, blend and framebuffer writing.