From 926b8dbb3e86360e5968882df94785ae84d0ad43 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 16 Oct 2008 09:00:05 -0600 Subject: cell: clean up various texture-related things Distinguish among texture targets in codegen. progs/demos/cubemap.c runs correctly now too. --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers/cell/ppu/cell_gen_fp.c') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 3dfd5f673d..2b34cf1e23 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -1337,16 +1337,33 @@ emit_function_call(struct codegen *gen, static boolean -emit_TXP(struct codegen *gen, const struct tgsi_full_instruction *inst) +emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) { - const uint addr = lookup_function(gen->cell, "spu_txp"); + const uint target = inst->InstructionExtTexture.Texture; const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + uint addr; int ch; int coord_regs[4], d_regs[4]; + switch (target) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_2D: + addr = lookup_function(gen->cell, "spu_tex_2d"); + break; + case TGSI_TEXTURE_3D: + addr = lookup_function(gen->cell, "spu_tex_3d"); + break; + case TGSI_TEXTURE_CUBE: + addr = lookup_function(gen->cell, "spu_tex_cube"); + break; + default: + ASSERT(0 && "unsupported texture target"); + return FALSE; + } + assert(inst->FullSrcRegisters[1].SrcRegister.File == TGSI_FILE_SAMPLER); - spe_comment(gen->f, -4, "CALL txp:"); + spe_comment(gen->f, -4, "CALL tex:"); /* get src/dst reg info */ for (ch = 0; ch < 4; ch++) { @@ -1368,7 +1385,7 @@ emit_TXP(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); } - /* setup function arguments */ + /* setup function arguments (XXX depends on target) */ for (i = 0; i < 4; i++) { spe_move(gen->f, 3 + i, coord_regs[i]); } @@ -1674,8 +1691,10 @@ emit_instruction(struct codegen *gen, /* fall-through for now */ case TGSI_OPCODE_TXB: /* fall-through for now */ + case TGSI_OPCODE_TXL: + /* fall-through for now */ case TGSI_OPCODE_TXP: - return emit_TXP(gen, inst); + return emit_TEX(gen, inst); case TGSI_OPCODE_IF: return emit_IF(gen, inst); -- cgit v1.2.3 From 033c90f4c16c1da517d676282508208319bd5ec5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 16 Oct 2008 13:49:42 -0600 Subject: cell: implement KIL instruction --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 80 ++++++++++++++++++++++++++++++ src/gallium/drivers/cell/spu/spu_main.h | 6 +-- src/gallium/drivers/cell/spu/spu_tri.c | 5 +- 3 files changed, 87 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers/cell/ppu/cell_gen_fp.c') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 2b34cf1e23..493ee1a0c9 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -84,6 +84,9 @@ struct codegen /** Index of execution mask register */ int exec_mask_reg; + /** KIL mask: indicates which fragments have been killed */ + int kill_mask_reg; + int frame_size; /**< Stack frame size, in words */ struct spe_function *f; @@ -431,8 +434,21 @@ emit_prologue(struct codegen *gen) static void emit_epilogue(struct codegen *gen) { + const int return_reg = 3; + spe_comment(gen->f, -4, "Function epilogue:"); + spe_comment(gen->f, 0, "return the killed mask"); + if (gen->kill_mask_reg > 0) { + /* shader called KIL, return the "alive" mask */ + spe_move(gen->f, return_reg, gen->kill_mask_reg); + } + else { + /* return {0,0,0,0} */ + spe_load_uint(gen->f, return_reg, 0); + } + + spe_comment(gen->f, 0, "restore stack and return"); if (gen->frame_size >= 512) { /* offset is too large for ai instruction */ int offset_reg = spe_allocate_available_register(gen->f); @@ -1423,6 +1439,68 @@ emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) } +/** + * KILL if any of src reg values are less than zero. + */ +static boolean +emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + int s_regs[4], kil_reg = -1, cmp_reg, zero_reg; + + spe_comment(gen->f, -4, "CALL kil:"); + + /* zero = {0,0,0,0} */ + zero_reg = get_itemp(gen); + spe_load_uint(gen->f, zero_reg, 0); + + cmp_reg = get_itemp(gen); + + /* get src regs */ + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + s_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + } + } + + /* test if any src regs are < 0 */ + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + if (kil_reg >= 0) { + /* cmp = 0 > src ? : ~0 : 0 */ + spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]); + /* kil = kil | cmp */ + spe_or(gen->f, kil_reg, kil_reg, cmp_reg); + } + else { + kil_reg = get_itemp(gen); + /* kil = 0 > src ? : ~0 : 0 */ + spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]); + } + } + } + + if (gen->if_nesting) { + /* may have been a conditional kil */ + spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg); + } + + /* allocate the kill mask reg if needed */ + if (gen->kill_mask_reg <= 0) { + gen->kill_mask_reg = spe_allocate_available_register(gen->f); + spe_move(gen->f, gen->kill_mask_reg, kil_reg); + } + else { + spe_or(gen->f, gen->kill_mask_reg, gen->kill_mask_reg, kil_reg); + } + + free_itemps(gen); + + return TRUE; +} + + + /** * Emit max. See emit_SGT for comments. */ @@ -1695,6 +1773,8 @@ emit_instruction(struct codegen *gen, /* fall-through for now */ case TGSI_OPCODE_TXP: return emit_TEX(gen, inst); + case TGSI_OPCODE_KIL: + return emit_KIL(gen, inst); case TGSI_OPCODE_IF: return emit_IF(gen, inst); diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 80e9c696f8..95ef4c9244 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -89,9 +89,9 @@ typedef void (*spu_fragment_ops_func)(uint x, uint y, uint facing); /** Function for running fragment program */ -typedef void (*spu_fragment_program_func)(vector float *inputs, - vector float *outputs, - vector float *constants); +typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs, + vector float *outputs, + vector float *constants); struct spu_framebuffer diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index d83085d0f9..4caf7d6b61 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -254,6 +254,7 @@ emit_quad( int x, int y, mask_t mask) vector float inputs[4*4], outputs[2*4]; vector float fragZ = eval_z((float) x, (float) y); vector float fragW = eval_w((float) x, (float) y); + vector unsigned int kill_mask; /* setup inputs */ #if 0 @@ -268,7 +269,9 @@ emit_quad( int x, int y, mask_t mask) ASSERT(spu.fragment_ops); /* Execute the current fragment program */ - spu.fragment_program(inputs, outputs, spu.constants); + kill_mask = spu.fragment_program(inputs, outputs, spu.constants); + + mask = spu_andc(mask, kill_mask); /* Execute per-fragment/quad operations, including: * alpha test, z test, stencil test, blend and framebuffer writing. -- cgit v1.2.3 From 0c1e98d9598bb5a30224583bdf211a1352b96d44 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 08:12:42 -0600 Subject: cell: note that dst reg writing needs clamping --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'src/gallium/drivers/cell/ppu/cell_gen_fp.c') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 493ee1a0c9..d4d644d6e8 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -349,6 +349,22 @@ store_dest_reg(struct codegen *gen, int value_reg, int channel, const struct tgsi_full_dst_register *dest) { + /* + * XXX need to implement dst reg clamping/saturation + */ +#if 0 + switch (inst->Instruction.Saturate) { + case TGSI_SAT_NONE: + break; + case TGSI_SAT_ZERO_ONE: + break; + case TGSI_SAT_MINUS_PLUS_ONE: + break; + default: + assert( 0 ); + } +#endif + switch (dest->DstRegister.File) { case TGSI_FILE_TEMPORARY: if (gen->if_nesting > 0) { -- cgit v1.2.3