summaryrefslogtreecommitdiff
path: root/src/gallium
diff options
context:
space:
mode:
authorKeith Whitwell <keith@tungstengraphics.com>2008-09-12 10:31:23 +0100
committerKeith Whitwell <keith@tungstengraphics.com>2008-09-12 10:31:23 +0100
commit176c454765b88c71d8b1ef474bc0fd53cb253a08 (patch)
tree76a2aee9ab3ae7f94661faf305d1b7b7b82b5cb3 /src/gallium
parent1f135456795adfb1d739a6fb66ab9540aa79b461 (diff)
parentaa66f08a21b791f338b519f0c2162cd8f7b3aeb0 (diff)
Merge commit 'origin/gallium-0.1' into gallium-0.2
Conflicts: progs/fp/Makefile
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c45
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h4
-rw-r--r--src/gallium/drivers/cell/common.h15
-rw-r--r--src/gallium/drivers/cell/ppu/Makefile1
-rw-r--r--src/gallium/drivers/cell/ppu/cell_context.h1
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fp.c523
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fp.h42
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fragment.c18
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fragment.h2
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_emit.c18
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_shader.c8
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.c25
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.h15
-rw-r--r--src/gallium/drivers/cell/spu/spu_tri.c35
-rw-r--r--src/gallium/winsys/xlib/xm_winsys.c19
15 files changed, 742 insertions, 29 deletions
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
index 61010e4333..a04cc6c4ff 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
@@ -473,21 +473,48 @@ EMIT_R (spe_mtspr, 0x10c);
void
spe_load_float(struct spe_function *p, unsigned rT, float x)
{
- union {
- float f;
- unsigned u;
- } bits;
- bits.f = x;
- spe_ilhu(p, rT, bits.u >> 16);
- spe_iohl(p, rT, bits.u & 0xffff);
+ if (x == 0.0f) {
+ spe_il(p, rT, 0x0);
+ }
+ else if (x == 0.5f) {
+ spe_ilhu(p, rT, 0x3f00);
+ }
+ else if (x == 1.0f) {
+ spe_ilhu(p, rT, 0x3f80);
+ }
+ else if (x == -1.0f) {
+ spe_ilhu(p, rT, 0xbf80);
+ }
+ else {
+ union {
+ float f;
+ unsigned u;
+ } bits;
+ bits.f = x;
+ spe_ilhu(p, rT, bits.u >> 16);
+ spe_iohl(p, rT, bits.u & 0xffff);
+ }
}
void
spe_load_int(struct spe_function *p, unsigned rT, int i)
{
- spe_ilhu(p, rT, i >> 16);
- spe_iohl(p, rT, i & 0xffff);
+ if (-32768 <= i && i <= 32767) {
+ spe_il(p, rT, i);
+ }
+ else {
+ spe_ilhu(p, rT, i >> 16);
+ spe_iohl(p, rT, i & 0xffff);
+ }
+}
+
+
+void
+spe_splat(struct spe_function *p, unsigned rT, unsigned rA)
+{
+ spe_ila(p, rT, 66051);
+ spe_shufb(p, rT, rA, rA, rT);
}
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
index dee8c55c4a..d95e5aace3 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
@@ -292,6 +292,10 @@ spe_load_float(struct spe_function *p, unsigned rT, float x);
extern void
spe_load_int(struct spe_function *p, unsigned rT, int i);
+/** Replicate word 0 of rA across rT. */
+extern void
+spe_splat(struct spe_function *p, unsigned rT, unsigned rA);
+
/** Complement/invert all bits in rT. */
extern void
spe_complement(struct spe_function *p, unsigned rT);
diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h
index e989d8c2e5..cb0631baf5 100644
--- a/src/gallium/drivers/cell/common.h
+++ b/src/gallium/drivers/cell/common.h
@@ -92,6 +92,7 @@
#define CELL_CMD_STATE_UNIFORMS 16
#define CELL_CMD_STATE_VS_ARRAY_INFO 17
#define CELL_CMD_STATE_BIND_VS 18
+#define CELL_CMD_STATE_FRAGMENT_PROGRAM 19
#define CELL_CMD_STATE_ATTRIB_FETCH 20
#define CELL_CMD_VS_EXECUTE 22
#define CELL_CMD_FLUSH_BUFFER_RANGE 23
@@ -125,6 +126,20 @@ struct cell_command_fragment_ops
};
+/** Max instructions for fragment programs */
+#define SPU_MAX_FRAGMENT_PROGRAM_INSTS 128
+
+/**
+ * Command to send a fragment progra to SPUs.
+ */
+struct cell_command_fragment_program
+{
+ uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_PROGRAM */
+ uint num_inst; /**< Number of instructions */
+ unsigned code[SPU_MAX_FRAGMENT_PROGRAM_INSTS];
+};
+
+
/**
* Tell SPUs about the framebuffer size, location
*/
diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile
index 8699f3f8ec..b28f4c5c31 100644
--- a/src/gallium/drivers/cell/ppu/Makefile
+++ b/src/gallium/drivers/cell/ppu/Makefile
@@ -26,6 +26,7 @@ SOURCES = \
cell_draw_arrays.c \
cell_flush.c \
cell_gen_fragment.c \
+ cell_gen_fp.c \
cell_state_derived.c \
cell_state_emit.c \
cell_state_shader.c \
diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h
index 8cec9f45b2..14914b9c6f 100644
--- a/src/gallium/drivers/cell/ppu/cell_context.h
+++ b/src/gallium/drivers/cell/ppu/cell_context.h
@@ -61,6 +61,7 @@ struct cell_fragment_shader_state
{
struct pipe_shader_state shader;
struct tgsi_shader_info info;
+ struct spe_function code;
void *data;
};
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
new file mode 100644
index 0000000000..6ffe94eb14
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -0,0 +1,523 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+
+/**
+ * Generate SPU fragment program/shader code.
+ *
+ * Note that we generate SOA-style code here. So each TGSI instruction
+ * operates on four pixels (and is translated into four SPU instructions,
+ * generally speaking).
+ *
+ * \author Brian Paul
+ */
+
+
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_exec.h"
+#include "tgsi/tgsi_dump.h"
+#include "rtasm/rtasm_ppc_spe.h"
+#include "util/u_memory.h"
+#include "cell_context.h"
+#include "cell_gen_fp.h"
+
+
+/** Set to 1 to enable debug/disassembly printfs */
+#define DISASSEM 01
+
+
+/**
+ * Context needed during code generation.
+ */
+struct codegen
+{
+ int inputs_reg; /**< 1st function parameter */
+ int outputs_reg; /**< 2nd function parameter */
+ int constants_reg; /**< 3rd function parameter */
+ int temp_regs[8][4]; /**< maps TGSI temps to SPE registers */
+
+ int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */
+
+ /** Per-instruction temps / intermediate temps */
+ int num_itemps;
+ int itemps[3];
+
+ struct spe_function *f;
+ boolean error;
+};
+
+
+/**
+ * Allocate an intermediate temporary register.
+ */
+static int
+get_itemp(struct codegen *gen)
+{
+ int t = spe_allocate_available_register(gen->f);
+ assert(gen->num_itemps < Elements(gen->itemps));
+ gen->itemps[gen->num_itemps++] = t;
+ return t;
+}
+
+/**
+ * Free all intermediate temporary registers. To be called after each
+ * instruction has been emitted.
+ */
+static void
+free_itemps(struct codegen *gen)
+{
+ int i;
+ for (i = 0; i < gen->num_itemps; i++) {
+ spe_release_register(gen->f, gen->itemps[i]);
+ }
+ gen->num_itemps = 0;
+}
+
+
+/**
+ * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
+ * The register is allocated and initialized upon the first call.
+ */
+static int
+get_const_one_reg(struct codegen *gen)
+{
+ if (gen->one_reg <= 0) {
+ gen->one_reg = spe_allocate_available_register(gen->f);
+ }
+
+ /* one = {1.0, 1.0, 1.0, 1.0} */
+ spe_load_float(gen->f, gen->one_reg, 1.0f);
+#if DISASSEM
+ printf("il\tr%d, 1.0f\n", gen->one_reg);
+#endif
+
+ return gen->one_reg;
+}
+
+
+/**
+ * Return the index of the SPU temporary containing the named TGSI
+ * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we
+ * just return the corresponding SPE register. If the TGIS register
+ * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register
+ * and emit an SPE load instruction.
+ */
+static int
+get_src_reg(struct codegen *gen,
+ int channel,
+ const struct tgsi_full_src_register *src)
+{
+ int reg;
+
+ /* XXX need to examine src swizzle info here.
+ * That will involve changing the channel var...
+ */
+
+
+ switch (src->SrcRegister.File) {
+ case TGSI_FILE_TEMPORARY:
+ reg = gen->temp_regs[src->SrcRegister.Index][channel];
+ break;
+ case TGSI_FILE_INPUT:
+ {
+ /* offset is measured in quadwords, not bytes */
+ int offset = src->SrcRegister.Index * 4 + channel;
+ reg = get_itemp(gen);
+ /* Load: reg = memory[(machine_reg) + offset] */
+ spe_lqd(gen->f, reg, gen->inputs_reg, offset);
+#if DISASSEM
+ printf("lqd\tr%d, r%d + %d\n", reg, gen->inputs_reg, offset);
+#endif
+ }
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ /* xxx fall-through for now / fix */
+ case TGSI_FILE_CONSTANT:
+ /* xxx fall-through for now / fix */
+ default:
+ assert(0);
+ }
+
+ return reg;
+}
+
+
+/**
+ * Return the index of an SPE register to use for the given TGSI register.
+ * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the
+ * corresponding SPE register is returned. If the TGSI register is
+ * TGSI_FILE_OUTPUT we allocate an intermediate temporary register.
+ * See store_dest_reg() below...
+ */
+static int
+get_dst_reg(struct codegen *gen,
+ int channel,
+ const struct tgsi_full_dst_register *dest)
+{
+ int reg;
+
+ switch (dest->DstRegister.File) {
+ case TGSI_FILE_TEMPORARY:
+ reg = gen->temp_regs[dest->DstRegister.Index][channel];
+ break;
+ case TGSI_FILE_OUTPUT:
+ reg = get_itemp(gen);
+ break;
+ default:
+ assert(0);
+ }
+
+ return reg;
+}
+
+
+/**
+ * When a TGSI instruction is writing to an output register, this
+ * function emits the SPE store instruction to store the value_reg.
+ * \param value_reg the SPE register containing the value to store.
+ * This would have been returned by get_dst_reg().
+ */
+static void
+store_dest_reg(struct codegen *gen,
+ int value_reg, int channel,
+ const struct tgsi_full_dst_register *dest)
+{
+ switch (dest->DstRegister.File) {
+ case TGSI_FILE_TEMPORARY:
+ /* no-op */
+ break;
+ case TGSI_FILE_OUTPUT:
+ {
+ /* offset is measured in quadwords, not bytes */
+ int offset = dest->DstRegister.Index * 4 + channel;
+ /* Store: memory[(machine_reg) + offset] = reg */
+ spe_stqd(gen->f, value_reg, gen->outputs_reg, offset);
+#if DISASSEM
+ printf("stqd\tr%d, r%d + %d\n", value_reg, gen->outputs_reg, offset);
+#endif
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
+static boolean
+emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ int dst_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ /* XXX we don't always need to actually emit a mov instruction here */
+ spe_move(gen->f, dst_reg, src_reg);
+#if DISASSEM
+ printf("mov\tr%d, r%d\n", dst_reg, src_reg);
+#endif
+ store_dest_reg(gen, dst_reg, ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+ }
+ return true;
+}
+
+
+/**
+ * Emit addition instructions. Recall that a single TGSI_OPCODE_ADD
+ * becomes (up to) four SPU "fa" instructions because we're doing SOA
+ * processing.
+ */
+static boolean
+emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+ /* Loop over Red/Green/Blue/Alpha channels */
+ for (ch = 0; ch < 4; ch++) {
+ /* If the dest R, G, B or A writemask is enabled... */
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ /* get indexes of the two src, one dest SPE registers */
+ int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+
+ /* Emit actual SPE instruction: d = s1 + s2 */
+ spe_fa(gen->f, d_reg, s1_reg, s2_reg);
+#if DISASSEM
+ printf("fa\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
+#endif
+
+ /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ /* Free any intermediate temps we allocated */
+ free_itemps(gen);
+ }
+ }
+ return true;
+}
+
+
+/**
+ * Emit multiply. See emit_ADD for comments.
+ */
+static boolean
+emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ /* d = s1 * s2 */
+ spe_fm(gen->f, d_reg, s1_reg, s2_reg);
+#if DISASSEM
+ printf("fm\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
+#endif
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+ }
+ return true;
+}
+
+
+/**
+ * Emit set-if-greater-than.
+ * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
+ * the result but OpenGL/TGSI needs 0.0 and 1.0 results.
+ * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
+ */
+static boolean
+emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+
+ /* d = (s1 > s2) */
+ spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
+#if DISASSEM
+ printf("fcgt\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
+#endif
+
+ /* convert d from 0x0/0xffffffff to 0.0/1.0 */
+ /* d = d & one_reg */
+ spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
+#if DISASSEM
+ printf("and\tr%d, r%d, r%d\n", d_reg, d_reg, get_const_one_reg(gen));
+#endif
+
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+ }
+
+ return true;
+}
+
+
+/**
+ * Emit END instruction.
+ * We just return from the shader function at this point.
+ *
+ * Note that there may be more code after this that would be
+ * called by TGSI_OPCODE_CALL.
+ */
+static boolean
+emit_END(struct codegen *gen)
+{
+ /* return from function call */
+ spe_bi(gen->f, SPE_REG_RA, 0, 0);
+#if DISASSEM
+ printf("bi\trRA\n");
+#endif
+ return true;
+}
+
+
+/**
+ * Emit code for the given instruction. Just a big switch stmt.
+ */
+static boolean
+emit_instruction(struct codegen *gen,
+ const struct tgsi_full_instruction *inst)
+{
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_MOV:
+ return emit_MOV(gen, inst);
+ case TGSI_OPCODE_MUL:
+ return emit_MUL(gen, inst);
+ case TGSI_OPCODE_ADD:
+ return emit_ADD(gen, inst);
+ case TGSI_OPCODE_SGT:
+ return emit_SGT(gen, inst);
+ case TGSI_OPCODE_END:
+ return emit_END(gen);
+
+ /* XXX lots more cases to do... */
+
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+
+
+/**
+ * Emit "code" for a TGSI declaration.
+ * We only care about TGSI TEMPORARY register declarations at this time.
+ * For each TGSI TEMPORARY we allocate four SPE registers.
+ */
+static void
+emit_declaration(struct codegen *gen, const struct tgsi_full_declaration *decl)
+{
+ int i, ch;
+
+ switch (decl->Declaration.File) {
+ case TGSI_FILE_TEMPORARY:
+#if DISASSEM
+ printf("Declare temp reg %d .. %d\n",
+ decl->DeclarationRange.First,
+ decl->DeclarationRange.Last);
+#endif
+ for (i = decl->DeclarationRange.First;
+ i <= decl->DeclarationRange.Last;
+ i++) {
+ for (ch = 0; ch < 4; ch++) {
+ gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f);
+ }
+
+ /* XXX if we run out of SPE registers, we need to spill
+ * to SPU memory. someday...
+ */
+
+#if DISASSEM
+ printf(" SPE regs: %d %d %d %d\n",
+ gen->temp_regs[i][0],
+ gen->temp_regs[i][1],
+ gen->temp_regs[i][2],
+ gen->temp_regs[i][3]);
+#endif
+ }
+ break;
+ default:
+ ; /* ignore */
+ }
+}
+
+
+/**
+ * Translate TGSI shader code to SPE instructions. This is done when
+ * the state tracker gives us a new shader (via pipe->create_fs_state()).
+ *
+ * \param cell the rendering context (in)
+ * \param tokens the TGSI shader (in)
+ * \param f the generated function (out)
+ */
+boolean
+cell_gen_fragment_program(struct cell_context *cell,
+ const struct tgsi_token *tokens,
+ struct spe_function *f)
+{
+ struct tgsi_parse_context parse;
+ struct codegen gen;
+
+ memset(&gen, 0, sizeof(gen));
+ gen.f = f;
+
+ /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
+ gen.inputs_reg = 3; /* pointer to inputs array */
+ gen.outputs_reg = 4; /* pointer to outputs array */
+ gen.constants_reg = 5; /* pointer to constants array */
+
+ spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
+ spe_allocate_register(f, gen.inputs_reg);
+ spe_allocate_register(f, gen.outputs_reg);
+ spe_allocate_register(f, gen.constants_reg);
+
+#if DISASSEM
+ printf("Begin %s\n", __FUNCTION__);
+ tgsi_dump(tokens, 0);
+#endif
+
+ tgsi_parse_init(&parse, tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+#if 0
+ if (!note_immediate(&gen, &parse.FullToken.FullImmediate ))
+ goto fail;
+#endif
+ break;
+
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ emit_declaration(&gen, &parse.FullToken.FullDeclaration);
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ if (!emit_instruction(&gen, &parse.FullToken.FullInstruction )) {
+ gen.error = true;
+ }
+ break;
+
+ default:
+ assert(0);
+
+ }
+ }
+
+
+ if (gen.error) {
+ /* terminate the SPE code */
+ return emit_END(&gen);
+ }
+
+#if DISASSEM
+ printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst);
+ printf("End %s\n", __FUNCTION__);
+#endif
+
+ tgsi_parse_free( &parse );
+
+ return !gen.error;
+}
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.h b/src/gallium/drivers/cell/ppu/cell_gen_fp.h
new file mode 100644
index 0000000000..99faea7046
--- /dev/null
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.h
@@ -0,0 +1,42 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+
+#ifndef CELL_GEN_FP_H
+#define CELL_GEN_FP_H
+
+
+
+extern boolean
+cell_gen_fragment_program(struct cell_context *cell,
+ const struct tgsi_token *tokens,
+ struct spe_function *f);
+
+
+#endif /* CELL_GEN_FP_H */
+
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
index 79a82ef72b..06219d4e98 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
@@ -265,6 +265,8 @@ gen_blend(const struct pipe_blend_state *blend,
int one_reg = spe_allocate_available_register(f);
int tmp_reg = spe_allocate_available_register(f);
+ boolean one_reg_set = false; /* avoid setting one_reg more than once */
+
ASSERT(blend->blend_enable);
/* Unpack/convert framebuffer colors from four 32-bit packed colors
@@ -275,7 +277,7 @@ gen_blend(const struct pipe_blend_state *blend,
int mask_reg = spe_allocate_available_register(f);
/* mask = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff} */
- spe_fsmbi(f, mask_reg, 0x1111);
+ spe_load_int(f, mask_reg, 0xff);
/* XXX there may be more clever ways to implement the following code */
switch (color_format) {
@@ -418,7 +420,10 @@ gen_blend(const struct pipe_blend_state *blend,
break;
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
/* one = {1.0, 1.0, 1.0, 1.0} */
- spe_load_float(f, one_reg, 1.0f);
+ if (!one_reg_set) {
+ spe_load_float(f, one_reg, 1.0f);
+ one_reg_set = true;
+ }
/* tmp = one - fragA */
spe_fs(f, tmp_reg, one_reg, fragA_reg);
/* term = fb * tmp */
@@ -446,7 +451,10 @@ gen_blend(const struct pipe_blend_state *blend,
break;
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
/* one = {1.0, 1.0, 1.0, 1.0} */
- spe_load_float(f, one_reg, 1.0f);
+ if (!one_reg_set) {
+ spe_load_float(f, one_reg, 1.0f);
+ one_reg_set = true;
+ }
/* tmp = one - fragA */
spe_fs(f, tmp_reg, one_reg, fragA_reg);
/* termA = fbA * tmp */
@@ -616,7 +624,7 @@ gen_pack_colors(struct spe_function *f,
* \param f the generated function (out)
*/
void
-gen_fragment_function(struct cell_context *cell, struct spe_function *f)
+cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
{
const struct pipe_depth_stencil_alpha_state *dsa =
&cell->depth_stencil->base;
@@ -850,7 +858,7 @@ gen_fragment_function(struct cell_context *cell, struct spe_function *f)
spe_release_register(f, rgba_reg);
}
- printf("gen_fragment_ops nr instructions: %u\n", f->num_inst);
+ //printf("gen_fragment_ops nr instructions: %u\n", f->num_inst);
spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
index 0ea0fc690c..b59de198dc 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
@@ -31,7 +31,7 @@
extern void
-gen_fragment_function(struct cell_context *cell, struct spe_function *f);
+cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f);
#endif /* CELL_GEN_FRAGMENT_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c
index 180b89c1f6..2da3097983 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_emit.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c
@@ -73,6 +73,22 @@ cell_emit_state(struct cell_context *cell)
#endif
}
+ if (cell->dirty & (CELL_NEW_FS)) {
+ /* Send new fragment program to SPUs */
+ struct cell_command_fragment_program *fp
+ = cell_batch_alloc(cell, sizeof(*fp));
+ fp->opcode = CELL_CMD_STATE_FRAGMENT_PROGRAM;
+ fp->num_inst = cell->fs->code.num_inst;
+ memcpy(&fp->code, cell->fs->code.store,
+ SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
+ if (0) {
+ int i;
+ printf("PPU Emit CELL_CMD_STATE_FRAGMENT_PROGRAM:\n");
+ for (i = 0; i < fp->num_inst; i++) {
+ printf(" %3d: 0x%08x\n", i, fp->code[i]);
+ }
+ }
+ }
if (cell->dirty & (CELL_NEW_FRAMEBUFFER |
CELL_NEW_DEPTH_STENCIL |
@@ -85,7 +101,7 @@ cell_emit_state(struct cell_context *cell)
struct spe_function spe_code;
/* generate new code */
- gen_fragment_function(cell, &spe_code);
+ cell_gen_fragment_function(cell, &spe_code);
/* put the new code into the batch buffer */
fops->opcode = CELL_CMD_STATE_FRAGMENT_OPS;
memcpy(&fops->code, spe_code.store,
diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c
index 97e44eeb1a..3a0d066da2 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_shader.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c
@@ -34,7 +34,7 @@
#include "cell_context.h"
#include "cell_state.h"
-
+#include "cell_gen_fp.h"
/** cast wrapper */
@@ -61,7 +61,7 @@ static void *
cell_create_fs_state(struct pipe_context *pipe,
const struct pipe_shader_state *templ)
{
- /*struct cell_context *cell = cell_context(pipe);*/
+ struct cell_context *cell = cell_context(pipe);
struct cell_fragment_shader_state *cfs;
cfs = CALLOC_STRUCT(cell_fragment_shader_state);
@@ -76,6 +76,8 @@ cell_create_fs_state(struct pipe_context *pipe,
tgsi_scan_shader(templ->tokens, &cfs->info);
+ cell_gen_fragment_program(cell, cfs->shader.tokens, &cfs->code);
+
return cfs;
}
@@ -102,6 +104,8 @@ cell_delete_fs_state(struct pipe_context *pipe, void *fs)
{
struct cell_fragment_shader_state *cfs = cell_fragment_shader_state(fs);
+ spe_release_func(&cfs->code);
+
FREE((void *) cfs->shader.tokens);
FREE(cfs);
}
diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c
index 2a7cb75f59..78260c4259 100644
--- a/src/gallium/drivers/cell/spu/spu_main.c
+++ b/src/gallium/drivers/cell/spu/spu_main.c
@@ -232,7 +232,7 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
printf("SPU %u: CMD_STATE_FRAGMENT_OPS\n", spu.init.id);
/* Copy SPU code from batch buffer to spu buffer */
memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
- /* Copy state info */
+ /* Copy state info (for fallback case only) */
memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
@@ -245,6 +245,21 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
static void
+cmd_state_fragment_program(const struct cell_command_fragment_program *fp)
+{
+ if (Debug)
+ printf("SPU %u: CMD_STATE_FRAGMENT_PROGRAM\n", spu.init.id);
+ /* Copy SPU code from batch buffer to spu buffer */
+ memcpy(spu.fragment_program_code, fp->code,
+ SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4);
+#if 01
+ /* Point function pointer at new code */
+ spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code;
+#endif
+}
+
+
+static void
cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
{
if (Debug)
@@ -473,6 +488,14 @@ cmd_batch(uint opcode)
pos += sizeof(*fops) / 8;
}
break;
+ case CELL_CMD_STATE_FRAGMENT_PROGRAM:
+ {
+ struct cell_command_fragment_program *fp
+ = (struct cell_command_fragment_program *) &buffer[pos];
+ cmd_state_fragment_program(fp);
+ pos += sizeof(*fp) / 8;
+ }
+ break;
case CELL_CMD_STATE_SAMPLER:
{
struct cell_command_sampler *sampler
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h
index d40539da83..2c7b625840 100644
--- a/src/gallium/drivers/cell/spu/spu_main.h
+++ b/src/gallium/drivers/cell/spu/spu_main.h
@@ -75,6 +75,12 @@ typedef void (*spu_fragment_ops_func)(uint x, uint y,
vector float fragAlpha,
vector unsigned int mask);
+/** Function for running fragment program */
+typedef void (*spu_fragment_program_func)(vector float *inputs,
+ vector float *outputs,
+ vector float *constants);
+
+
struct spu_framebuffer
{
void *color_start; /**< addr of color surface in main memory */
@@ -142,9 +148,18 @@ struct spu_global
/** Current fragment ops function */
spu_fragment_ops_func fragment_ops;
+ /** Current fragment program machine code */
+ uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS];
+ /** Current fragment ops function */
+ spu_fragment_program_func fragment_program;
+
/** Current texture sampler function */
spu_sample_texture_func sample_texture[CELL_MAX_SAMPLERS];
+ /** Fragment program constants (XXX preliminary/used) */
+#define MAX_CONSTANTS 32
+ vector float constants[MAX_CONSTANTS];
+
} ALIGN16_ATTRIB;
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c
index f02cdd1f76..8b93878192 100644
--- a/src/gallium/drivers/cell/spu/spu_tri.c
+++ b/src/gallium/drivers/cell/spu/spu_tri.c
@@ -314,7 +314,42 @@ emit_quad( int x, int y, mask_t mask )
}
else {
/* simple shading */
+#if 0
eval_coeff(1, (float) x, (float) y, colors);
+
+#else
+ /* XXX new fragment program code */
+
+ if (spu.fragment_program) {
+ vector float inputs[4*4], outputs[2*4];
+
+ /* setup inputs */
+ eval_coeff(1, (float) x, (float) y, inputs);
+
+ /* Execute the current fragment program */
+ spu.fragment_program(inputs, outputs, spu.constants);
+
+ /* Copy outputs */
+ colors[0] = outputs[0*4+0];
+ colors[1] = outputs[0*4+1];
+ colors[2] = outputs[0*4+2];
+ colors[3] = outputs[0*4+3];
+
+ if (0 && spu.init.id==0 && y == 48) {
+ printf("colors[0] = %f %f %f %f\n",
+ spu_extract(colors[0], 0),
+ spu_extract(colors[0], 1),
+ spu_extract(colors[0], 2),
+ spu_extract(colors[0], 3));
+ printf("colors[1] = %f %f %f %f\n",
+ spu_extract(colors[1], 0),
+ spu_extract(colors[1], 1),
+ spu_extract(colors[1], 2),
+ spu_extract(colors[1], 3));
+ }
+
+ }
+#endif
}
diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c
index c4a30d3702..2acbc94fc8 100644
--- a/src/gallium/winsys/xlib/xm_winsys.c
+++ b/src/gallium/winsys/xlib/xm_winsys.c
@@ -289,21 +289,19 @@ xm_buffer_destroy(struct pipe_winsys *pws,
* +--+--+
*/
static void
-twiddle_tile(uint *tile)
+twiddle_tile(const uint *tileIn, uint *tileOut)
{
- uint tile2[TILE_SIZE * TILE_SIZE];
int y, x;
for (y = 0; y < TILE_SIZE; y+=2) {
for (x = 0; x < TILE_SIZE; x+=2) {
int k = 4 * (y/2 * TILE_SIZE/2 + x/2);
- tile2[y * TILE_SIZE + (x + 0)] = tile[k];
- tile2[y * TILE_SIZE + (x + 1)] = tile[k+1];
- tile2[(y + 1) * TILE_SIZE + (x + 0)] = tile[k+2];
- tile2[(y + 1) * TILE_SIZE + (x + 1)] = tile[k+3];
+ tileOut[y * TILE_SIZE + (x + 0)] = tileIn[k];
+ tileOut[y * TILE_SIZE + (x + 1)] = tileIn[k+1];
+ tileOut[(y + 1) * TILE_SIZE + (x + 0)] = tileIn[k+2];
+ tileOut[(y + 1) * TILE_SIZE + (x + 1)] = tileIn[k+3];
}
}
- memcpy(tile, tile2, sizeof(tile2));
}
@@ -339,6 +337,7 @@ xmesa_display_surface_tiled(XMesaBuffer b, const struct pipe_surface *surf)
for (y = 0; y < surf->height; y += TILE_SIZE) {
for (x = 0; x < surf->width; x += TILE_SIZE) {
+ uint tmpTile[TILE_SIZE * TILE_SIZE];
int tx = x / TILE_SIZE;
int ty = y / TILE_SIZE;
int offset = ty * tilesPerRow + tx;
@@ -352,9 +351,9 @@ xmesa_display_surface_tiled(XMesaBuffer b, const struct pipe_surface *surf)
offset *= 4 * TILE_SIZE * TILE_SIZE;
- ximage->data = (char *) xm_buf->data + offset;
-
- twiddle_tile((uint *) ximage->data);
+ twiddle_tile((uint *) ((char *) xm_buf->data + offset),
+ tmpTile);
+ ximage->data = (char*) tmpTile;
if (XSHM_ENABLED(xm_buf)) {
#if defined(USE_XSHM) && !defined(XFree86Server)