diff options
Diffstat (limited to 'src/gallium/drivers')
| -rw-r--r-- | src/gallium/drivers/cell/common.h | 15 | ||||
| -rw-r--r-- | src/gallium/drivers/cell/ppu/Makefile | 1 | ||||
| -rw-r--r-- | src/gallium/drivers/cell/ppu/cell_context.h | 1 | ||||
| -rw-r--r-- | src/gallium/drivers/cell/ppu/cell_gen_fp.c | 523 | ||||
| -rw-r--r-- | src/gallium/drivers/cell/ppu/cell_gen_fp.h | 42 | ||||
| -rw-r--r-- | src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 18 | ||||
| -rw-r--r-- | src/gallium/drivers/cell/ppu/cell_gen_fragment.h | 2 | ||||
| -rw-r--r-- | src/gallium/drivers/cell/ppu/cell_state_emit.c | 18 | ||||
| -rw-r--r-- | src/gallium/drivers/cell/ppu/cell_state_shader.c | 8 | ||||
| -rw-r--r-- | src/gallium/drivers/cell/spu/spu_main.c | 25 | ||||
| -rw-r--r-- | src/gallium/drivers/cell/spu/spu_main.h | 15 | ||||
| -rw-r--r-- | src/gallium/drivers/cell/spu/spu_tri.c | 35 | 
12 files changed, 693 insertions, 10 deletions
| diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index e989d8c2e5..cb0631baf5 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -92,6 +92,7 @@  #define CELL_CMD_STATE_UNIFORMS      16  #define CELL_CMD_STATE_VS_ARRAY_INFO 17  #define CELL_CMD_STATE_BIND_VS       18 +#define CELL_CMD_STATE_FRAGMENT_PROGRAM 19  #define CELL_CMD_STATE_ATTRIB_FETCH  20  #define CELL_CMD_VS_EXECUTE          22  #define CELL_CMD_FLUSH_BUFFER_RANGE  23 @@ -125,6 +126,20 @@ struct cell_command_fragment_ops  }; +/** Max instructions for fragment programs */ +#define SPU_MAX_FRAGMENT_PROGRAM_INSTS 128 + +/** + * Command to send a fragment progra to SPUs. + */ +struct cell_command_fragment_program +{ +   uint64_t opcode;      /**< CELL_CMD_STATE_FRAGMENT_PROGRAM */ +   uint num_inst;        /**< Number of instructions */ +   unsigned code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; +}; + +  /**   * Tell SPUs about the framebuffer size, location   */ diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index 8699f3f8ec..b28f4c5c31 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -26,6 +26,7 @@ SOURCES = \  	cell_draw_arrays.c \  	cell_flush.c \  	cell_gen_fragment.c \ +	cell_gen_fp.c \  	cell_state_derived.c \  	cell_state_emit.c \  	cell_state_shader.c \ diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 8cec9f45b2..14914b9c6f 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -61,6 +61,7 @@ struct cell_fragment_shader_state  {     struct pipe_shader_state shader;     struct tgsi_shader_info info; +   struct spe_function code;     void *data;  }; diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c new file mode 100644 index 0000000000..6ffe94eb14 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -0,0 +1,523 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + + +/** + * Generate SPU fragment program/shader code. + * + * Note that we generate SOA-style code here.  So each TGSI instruction + * operates on four pixels (and is translated into four SPU instructions, + * generally speaking). + * + * \author Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_dump.h" +#include "rtasm/rtasm_ppc_spe.h" +#include "util/u_memory.h" +#include "cell_context.h" +#include "cell_gen_fp.h" + + +/** Set to 1 to enable debug/disassembly printfs */ +#define DISASSEM 01 + + +/** + * Context needed during code generation. + */ +struct codegen +{ +   int inputs_reg;      /**< 1st function parameter */ +   int outputs_reg;     /**< 2nd function parameter */ +   int constants_reg;   /**< 3rd function parameter */ +   int temp_regs[8][4]; /**< maps TGSI temps to SPE registers */ + +   int one_reg;         /**< register containing {1.0, 1.0, 1.0, 1.0} */ + +   /** Per-instruction temps / intermediate temps */ +   int num_itemps; +   int itemps[3]; + +   struct spe_function *f; +   boolean error; +}; + + +/** + * Allocate an intermediate temporary register. + */ +static int +get_itemp(struct codegen *gen) +{ +   int t = spe_allocate_available_register(gen->f); +   assert(gen->num_itemps < Elements(gen->itemps)); +   gen->itemps[gen->num_itemps++] = t; +   return t; +} + +/** + * Free all intermediate temporary registers.  To be called after each + * instruction has been emitted. + */ +static void +free_itemps(struct codegen *gen) +{ +   int i; +   for (i = 0; i < gen->num_itemps; i++) { +      spe_release_register(gen->f, gen->itemps[i]); +   } +   gen->num_itemps = 0; +} + + +/** + * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}. + * The register is allocated and initialized upon the first call. + */ +static int +get_const_one_reg(struct codegen *gen) +{ +   if (gen->one_reg <= 0) { +      gen->one_reg = spe_allocate_available_register(gen->f); +   } + +   /* one = {1.0, 1.0, 1.0, 1.0} */ +   spe_load_float(gen->f, gen->one_reg, 1.0f); +#if DISASSEM +   printf("il\tr%d, 1.0f\n", gen->one_reg); +#endif + +   return gen->one_reg; +} + + +/** + * Return the index of the SPU temporary containing the named TGSI + * source register.  If the TGSI register is a TGSI_FILE_TEMPORARY we + * just return the corresponding SPE register.  If the TGIS register + * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register + * and emit an SPE load instruction. + */ +static int +get_src_reg(struct codegen *gen, +            int channel, +            const struct tgsi_full_src_register *src) +{ +   int reg; + +   /* XXX need to examine src swizzle info here. +    * That will involve changing the channel var... +    */ + + +   switch (src->SrcRegister.File) { +   case TGSI_FILE_TEMPORARY: +      reg = gen->temp_regs[src->SrcRegister.Index][channel]; +      break; +   case TGSI_FILE_INPUT: +      { +         /* offset is measured in quadwords, not bytes */ +         int offset = src->SrcRegister.Index * 4 + channel; +         reg = get_itemp(gen); +         /* Load:  reg = memory[(machine_reg) + offset] */ +         spe_lqd(gen->f, reg, gen->inputs_reg, offset); +#if DISASSEM +         printf("lqd\tr%d, r%d + %d\n", reg, gen->inputs_reg, offset); +#endif +      } +      break; +   case TGSI_FILE_IMMEDIATE: +      /* xxx fall-through for now / fix */ +   case TGSI_FILE_CONSTANT: +      /* xxx fall-through for now / fix */ +   default: +      assert(0); +   } + +   return reg; +} + + +/** + * Return the index of an SPE register to use for the given TGSI register. + * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the + * corresponding SPE register is returned.  If the TGSI register is + * TGSI_FILE_OUTPUT we allocate an intermediate temporary register. + * See store_dest_reg() below... + */ +static int +get_dst_reg(struct codegen *gen, +            int channel, +            const struct tgsi_full_dst_register *dest) +{ +   int reg; + +   switch (dest->DstRegister.File) { +   case TGSI_FILE_TEMPORARY: +      reg = gen->temp_regs[dest->DstRegister.Index][channel]; +      break; +   case TGSI_FILE_OUTPUT: +      reg = get_itemp(gen); +      break; +   default: +      assert(0); +   } + +   return reg; +} + + +/** + * When a TGSI instruction is writing to an output register, this + * function emits the SPE store instruction to store the value_reg. + * \param value_reg  the SPE register containing the value to store. + *                   This would have been returned by get_dst_reg(). + */ +static void +store_dest_reg(struct codegen *gen, +               int value_reg, int channel, +               const struct tgsi_full_dst_register *dest) +{ +   switch (dest->DstRegister.File) { +   case TGSI_FILE_TEMPORARY: +      /* no-op */ +      break; +   case TGSI_FILE_OUTPUT: +      { +         /* offset is measured in quadwords, not bytes */ +         int offset = dest->DstRegister.Index * 4 + channel; +         /* Store: memory[(machine_reg) + offset] = reg */ +         spe_stqd(gen->f, value_reg, gen->outputs_reg, offset); +#if DISASSEM +         printf("stqd\tr%d, r%d + %d\n", value_reg, gen->outputs_reg, offset); +#endif +      } +      break; +   default: +      assert(0); +   } +} + + +static boolean +emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   int ch; +   for (ch = 0; ch < 4; ch++) { +      if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { +         int src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); +         int dst_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); +         /* XXX we don't always need to actually emit a mov instruction here */ +         spe_move(gen->f, dst_reg, src_reg); +#if DISASSEM +         printf("mov\tr%d, r%d\n", dst_reg, src_reg); +#endif +         store_dest_reg(gen, dst_reg, ch, &inst->FullDstRegisters[0]); +         free_itemps(gen); +      } +   } +   return true; +} + + +/** + * Emit addition instructions.  Recall that a single TGSI_OPCODE_ADD + * becomes (up to) four SPU "fa" instructions because we're doing SOA + * processing. + */ +static boolean +emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   int ch; +   /* Loop over Red/Green/Blue/Alpha channels */ +   for (ch = 0; ch < 4; ch++) { +      /* If the dest R, G, B or A writemask is enabled... */ +      if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { +         /* get indexes of the two src, one dest SPE registers */ +         int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); +         int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); +         int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + +         /* Emit actual SPE instruction: d = s1 + s2 */ +         spe_fa(gen->f, d_reg, s1_reg, s2_reg); +#if DISASSEM +         printf("fa\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); +#endif + +         /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */ +         store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); +         /* Free any intermediate temps we allocated */ +         free_itemps(gen); +      } +   } +   return true; +} + + +/** + * Emit multiply.  See emit_ADD for comments. + */ +static boolean +emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   int ch; +   for (ch = 0; ch < 4; ch++) { +      if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { +         int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); +         int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); +         int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); +         /* d = s1 * s2 */ +         spe_fm(gen->f, d_reg, s1_reg, s2_reg); +#if DISASSEM +         printf("fm\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); +#endif +         store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); +         free_itemps(gen); +      } +   } +   return true; +} + + +/** + * Emit set-if-greater-than. + * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as + * the result but OpenGL/TGSI needs 0.0 and 1.0 results. + * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND. + */ +static boolean +emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   int ch; + +   for (ch = 0; ch < 4; ch++) { +      if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { +         int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); +         int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); +         int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + +         /* d = (s1 > s2) */ +         spe_fcgt(gen->f, d_reg, s1_reg, s2_reg); +#if DISASSEM +         printf("fcgt\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); +#endif + +         /* convert d from 0x0/0xffffffff to 0.0/1.0 */ +         /* d = d & one_reg */ +         spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen)); +#if DISASSEM +         printf("and\tr%d, r%d, r%d\n", d_reg, d_reg, get_const_one_reg(gen)); +#endif + +         store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); +         free_itemps(gen); +      } +   } + +   return true; +} + + +/** + * Emit END instruction. + * We just return from the shader function at this point. + * + * Note that there may be more code after this that would be + * called by TGSI_OPCODE_CALL. + */ +static boolean +emit_END(struct codegen *gen) +{ +   /* return from function call */ +   spe_bi(gen->f, SPE_REG_RA, 0, 0); +#if DISASSEM +   printf("bi\trRA\n"); +#endif +   return true; +} + + +/** + * Emit code for the given instruction.  Just a big switch stmt. + */ +static boolean +emit_instruction(struct codegen *gen, +                 const struct tgsi_full_instruction *inst) +{ +   switch (inst->Instruction.Opcode) { +   case TGSI_OPCODE_MOV: +      return emit_MOV(gen, inst); +   case TGSI_OPCODE_MUL: +      return emit_MUL(gen, inst); +   case TGSI_OPCODE_ADD: +      return emit_ADD(gen, inst); +   case TGSI_OPCODE_SGT: +      return emit_SGT(gen, inst); +   case TGSI_OPCODE_END: +      return emit_END(gen); + +   /* XXX lots more cases to do... */ + +   default: +      return false; +   } + +   return true; +} + + + +/** + * Emit "code" for a TGSI declaration. + * We only care about TGSI TEMPORARY register declarations at this time. + * For each TGSI TEMPORARY we allocate four SPE registers. + */ +static void +emit_declaration(struct codegen *gen, const struct tgsi_full_declaration *decl) +{ +   int i, ch; + +   switch (decl->Declaration.File) { +   case TGSI_FILE_TEMPORARY: +#if DISASSEM +      printf("Declare temp reg %d .. %d\n", +             decl->DeclarationRange.First, +             decl->DeclarationRange.Last); +#endif +      for (i = decl->DeclarationRange.First; +           i <= decl->DeclarationRange.Last; +           i++) { +         for (ch = 0; ch < 4; ch++) { +            gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f); +         } + +         /* XXX if we run out of SPE registers, we need to spill +          * to SPU memory.  someday... +          */ + +#if DISASSEM +         printf("  SPE regs: %d %d %d %d\n", +                gen->temp_regs[i][0], +                gen->temp_regs[i][1], +                gen->temp_regs[i][2], +                gen->temp_regs[i][3]); +#endif +      } +      break; +   default: +      ; /* ignore */ +   } +} + + +/** + * Translate TGSI shader code to SPE instructions.  This is done when + * the state tracker gives us a new shader (via pipe->create_fs_state()). + * + * \param cell    the rendering context (in) + * \param tokens  the TGSI shader (in) + * \param f       the generated function (out) + */ +boolean +cell_gen_fragment_program(struct cell_context *cell, +                          const struct tgsi_token *tokens, +                          struct spe_function *f) +{ +   struct tgsi_parse_context parse; +   struct codegen gen; + +   memset(&gen, 0, sizeof(gen)); +   gen.f = f; + +   /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ +   gen.inputs_reg = 3;     /* pointer to inputs array */ +   gen.outputs_reg = 4;    /* pointer to outputs array */ +   gen.constants_reg = 5;  /* pointer to constants array */ + +   spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE); +   spe_allocate_register(f, gen.inputs_reg); +   spe_allocate_register(f, gen.outputs_reg); +   spe_allocate_register(f, gen.constants_reg); + +#if DISASSEM +   printf("Begin %s\n", __FUNCTION__); +   tgsi_dump(tokens, 0); +#endif + +   tgsi_parse_init(&parse, tokens); + +   while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) { +      tgsi_parse_token(&parse); + +      switch (parse.FullToken.Token.Type) { +      case TGSI_TOKEN_TYPE_IMMEDIATE: +#if 0 +         if (!note_immediate(&gen, &parse.FullToken.FullImmediate )) +            goto fail; +#endif +         break; + +      case TGSI_TOKEN_TYPE_DECLARATION: +         emit_declaration(&gen, &parse.FullToken.FullDeclaration); +         break; + +      case TGSI_TOKEN_TYPE_INSTRUCTION: +         if (!emit_instruction(&gen, &parse.FullToken.FullInstruction )) { +            gen.error = true; +         } +         break; + +      default: +         assert(0); + +      } +   } + + +   if (gen.error) { +      /* terminate the SPE code */ +      return emit_END(&gen); +   } + +#if DISASSEM +   printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst); +   printf("End %s\n", __FUNCTION__); +#endif + +   tgsi_parse_free( &parse ); + +   return !gen.error; +} diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.h b/src/gallium/drivers/cell/ppu/cell_gen_fp.h new file mode 100644 index 0000000000..99faea7046 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.h @@ -0,0 +1,42 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + + +#ifndef CELL_GEN_FP_H +#define CELL_GEN_FP_H + + + +extern boolean +cell_gen_fragment_program(struct cell_context *cell, +                          const struct tgsi_token *tokens, +                          struct spe_function *f); + + +#endif /* CELL_GEN_FP_H */ + diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 79a82ef72b..06219d4e98 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -265,6 +265,8 @@ gen_blend(const struct pipe_blend_state *blend,     int one_reg = spe_allocate_available_register(f);     int tmp_reg = spe_allocate_available_register(f); +   boolean one_reg_set = false; /* avoid setting one_reg more than once */ +     ASSERT(blend->blend_enable);     /* Unpack/convert framebuffer colors from four 32-bit packed colors @@ -275,7 +277,7 @@ gen_blend(const struct pipe_blend_state *blend,        int mask_reg = spe_allocate_available_register(f);        /* mask = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff} */ -      spe_fsmbi(f, mask_reg, 0x1111); +      spe_load_int(f, mask_reg, 0xff);        /* XXX there may be more clever ways to implement the following code */        switch (color_format) { @@ -418,7 +420,10 @@ gen_blend(const struct pipe_blend_state *blend,        break;     case PIPE_BLENDFACTOR_INV_SRC_ALPHA:        /* one = {1.0, 1.0, 1.0, 1.0} */ -      spe_load_float(f, one_reg, 1.0f); +      if (!one_reg_set) { +         spe_load_float(f, one_reg, 1.0f); +         one_reg_set = true; +      }        /* tmp = one - fragA */        spe_fs(f, tmp_reg, one_reg, fragA_reg);        /* term = fb * tmp */ @@ -446,7 +451,10 @@ gen_blend(const struct pipe_blend_state *blend,        break;     case PIPE_BLENDFACTOR_INV_SRC_ALPHA:        /* one = {1.0, 1.0, 1.0, 1.0} */ -      spe_load_float(f, one_reg, 1.0f); +      if (!one_reg_set) { +         spe_load_float(f, one_reg, 1.0f); +         one_reg_set = true; +      }        /* tmp = one - fragA */        spe_fs(f, tmp_reg, one_reg, fragA_reg);        /* termA = fbA * tmp */ @@ -616,7 +624,7 @@ gen_pack_colors(struct spe_function *f,   * \param f     the generated function (out)   */  void -gen_fragment_function(struct cell_context *cell, struct spe_function *f) +cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)  {     const struct pipe_depth_stencil_alpha_state *dsa =        &cell->depth_stencil->base; @@ -850,7 +858,7 @@ gen_fragment_function(struct cell_context *cell, struct spe_function *f)        spe_release_register(f, rgba_reg);     } -   printf("gen_fragment_ops nr instructions: %u\n", f->num_inst); +   //printf("gen_fragment_ops nr instructions: %u\n", f->num_inst);     spe_bi(f, SPE_REG_RA, 0, 0);  /* return from function call */ diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h index 0ea0fc690c..b59de198dc 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h @@ -31,7 +31,7 @@  extern void -gen_fragment_function(struct cell_context *cell, struct spe_function *f); +cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f);  #endif /* CELL_GEN_FRAGMENT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 180b89c1f6..2da3097983 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -73,6 +73,22 @@ cell_emit_state(struct cell_context *cell)  #endif     } +   if (cell->dirty & (CELL_NEW_FS)) { +      /* Send new fragment program to SPUs */ +      struct cell_command_fragment_program *fp +            = cell_batch_alloc(cell, sizeof(*fp)); +      fp->opcode = CELL_CMD_STATE_FRAGMENT_PROGRAM; +      fp->num_inst = cell->fs->code.num_inst; +      memcpy(&fp->code, cell->fs->code.store, +             SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE); +      if (0) { +         int i; +         printf("PPU Emit CELL_CMD_STATE_FRAGMENT_PROGRAM:\n"); +         for (i = 0; i < fp->num_inst; i++) { +            printf(" %3d: 0x%08x\n", i, fp->code[i]); +         } +      } +   }     if (cell->dirty & (CELL_NEW_FRAMEBUFFER |                        CELL_NEW_DEPTH_STENCIL | @@ -85,7 +101,7 @@ cell_emit_state(struct cell_context *cell)        struct spe_function spe_code;        /* generate new code */ -      gen_fragment_function(cell, &spe_code); +      cell_gen_fragment_function(cell, &spe_code);        /* put the new code into the batch buffer */        fops->opcode = CELL_CMD_STATE_FRAGMENT_OPS;        memcpy(&fops->code, spe_code.store, diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index 97e44eeb1a..3a0d066da2 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -34,7 +34,7 @@  #include "cell_context.h"  #include "cell_state.h" - +#include "cell_gen_fp.h"  /** cast wrapper */ @@ -61,7 +61,7 @@ static void *  cell_create_fs_state(struct pipe_context *pipe,                       const struct pipe_shader_state *templ)  { -   /*struct cell_context *cell = cell_context(pipe);*/ +   struct cell_context *cell = cell_context(pipe);     struct cell_fragment_shader_state *cfs;     cfs = CALLOC_STRUCT(cell_fragment_shader_state); @@ -76,6 +76,8 @@ cell_create_fs_state(struct pipe_context *pipe,     tgsi_scan_shader(templ->tokens, &cfs->info); +   cell_gen_fragment_program(cell, cfs->shader.tokens, &cfs->code); +     return cfs;  } @@ -102,6 +104,8 @@ cell_delete_fs_state(struct pipe_context *pipe, void *fs)  {     struct cell_fragment_shader_state *cfs = cell_fragment_shader_state(fs); +   spe_release_func(&cfs->code); +     FREE((void *) cfs->shader.tokens);     FREE(cfs);  } diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 2a7cb75f59..78260c4259 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -232,7 +232,7 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)        printf("SPU %u: CMD_STATE_FRAGMENT_OPS\n", spu.init.id);     /* Copy SPU code from batch buffer to spu buffer */     memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4); -   /* Copy state info */ +   /* Copy state info (for fallback case only) */     memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));     memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); @@ -245,6 +245,21 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)  static void +cmd_state_fragment_program(const struct cell_command_fragment_program *fp) +{ +   if (Debug) +      printf("SPU %u: CMD_STATE_FRAGMENT_PROGRAM\n", spu.init.id); +   /* Copy SPU code from batch buffer to spu buffer */ +   memcpy(spu.fragment_program_code, fp->code, +          SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4); +#if 01 +   /* Point function pointer at new code */ +   spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code; +#endif +} + + +static void  cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)  {     if (Debug) @@ -473,6 +488,14 @@ cmd_batch(uint opcode)              pos += sizeof(*fops) / 8;           }           break; +      case CELL_CMD_STATE_FRAGMENT_PROGRAM: +         { +            struct cell_command_fragment_program *fp +               = (struct cell_command_fragment_program *) &buffer[pos]; +            cmd_state_fragment_program(fp); +            pos += sizeof(*fp) / 8; +         } +         break;        case CELL_CMD_STATE_SAMPLER:           {              struct cell_command_sampler *sampler diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index d40539da83..2c7b625840 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -75,6 +75,12 @@ typedef void (*spu_fragment_ops_func)(uint x, uint y,                                        vector float fragAlpha,                                        vector unsigned int mask); +/** Function for running fragment program */ +typedef void (*spu_fragment_program_func)(vector float *inputs, +                                          vector float *outputs, +                                          vector float *constants); + +  struct spu_framebuffer  {     void *color_start;              /**< addr of color surface in main memory */ @@ -142,9 +148,18 @@ struct spu_global     /** Current fragment ops function */     spu_fragment_ops_func fragment_ops; +   /** Current fragment program machine code */ +   uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; +   /** Current fragment ops function */ +   spu_fragment_program_func fragment_program; +     /** Current texture sampler function */     spu_sample_texture_func sample_texture[CELL_MAX_SAMPLERS]; +   /** Fragment program constants (XXX preliminary/used) */ +#define MAX_CONSTANTS 32 +   vector float constants[MAX_CONSTANTS]; +  } ALIGN16_ATTRIB; diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index f02cdd1f76..8b93878192 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -314,7 +314,42 @@ emit_quad( int x, int y, mask_t mask )        }        else {           /* simple shading */ +#if 0           eval_coeff(1, (float) x, (float) y, colors); + +#else +         /* XXX new fragment program code */ + +         if (spu.fragment_program) { +            vector float inputs[4*4], outputs[2*4]; + +            /* setup inputs */ +            eval_coeff(1, (float) x, (float) y, inputs); + +            /* Execute the current fragment program */ +            spu.fragment_program(inputs, outputs, spu.constants); + +            /* Copy outputs */ +            colors[0] = outputs[0*4+0]; +            colors[1] = outputs[0*4+1]; +            colors[2] = outputs[0*4+2]; +            colors[3] = outputs[0*4+3]; + +            if (0 && spu.init.id==0 && y == 48) { +               printf("colors[0] = %f %f %f %f\n", +                      spu_extract(colors[0], 0), +                      spu_extract(colors[0], 1), +                      spu_extract(colors[0], 2), +                      spu_extract(colors[0], 3)); +               printf("colors[1] = %f %f %f %f\n", +                      spu_extract(colors[1], 0), +                      spu_extract(colors[1], 1), +                      spu_extract(colors[1], 2), +                      spu_extract(colors[1], 3)); +            } + +         } +#endif        } | 
