diff options
| author | Brian <brian.paul@tungstengraphics.com> | 2007-08-21 16:24:38 -0600 | 
|---|---|---|
| committer | Brian <brian.paul@tungstengraphics.com> | 2007-08-21 16:24:38 -0600 | 
| commit | 3af1f3b9220733f5e3a76fe38fbc397974678234 (patch) | |
| tree | c72bc7a37b1cadb570c00500d6f5584b66148dc2 /src | |
| parent | d640198b2d52c104c707522e79d53a36f708ccd0 (diff) | |
Initial check-in of i915 fragment program translation (from tgsi).
Diffstat (limited to 'src')
| -rw-r--r-- | src/mesa/pipe/i915simple/i915_fpc.c | 183 | ||||
| -rw-r--r-- | src/mesa/pipe/i915simple/i915_fpc.h | 339 | ||||
| -rw-r--r-- | src/mesa/pipe/i915simple/i915_fpc_debug.c | 346 | ||||
| -rw-r--r-- | src/mesa/pipe/i915simple/i915_fpc_emit.c | 430 | ||||
| -rw-r--r-- | src/mesa/pipe/i915simple/i915_fpc_translate.c | 838 | 
5 files changed, 2136 insertions, 0 deletions
| diff --git a/src/mesa/pipe/i915simple/i915_fpc.c b/src/mesa/pipe/i915simple/i915_fpc.c new file mode 100644 index 0000000000..fd0bbbc482 --- /dev/null +++ b/src/mesa/pipe/i915simple/i915_fpc.c @@ -0,0 +1,183 @@ +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#if 0 +#include <strings.h> + +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#endif + +#include "i915_fpc.h" + + + +void +i915_program_error(struct i915_fp_compile *p, const char *msg) +{ +   fprintf(stderr, "i915_program_error: %s", msg); +   p->fp->error = 1; +} + + +static struct i915_fp_compile * +i915_init_compile(struct i915_context *i915, struct i915_fragment_program *fp) +{ +   struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile); + +   p->fp = fp; +#if 0 +   p->env_param = NULL; /*i915->intel.ctx.FragmentProgram.Parameters;*/ +#endif +   p->constants = i915->fs.constants; +   p->nr_tex_indirect = 1;      /* correct? */ +   p->nr_tex_insn = 0; +   p->nr_alu_insn = 0; +   p->nr_decl_insn = 0; + +   memset(p->constant_flags, 0, sizeof(p->constant_flags)); + +   p->csr = p->program; +   p->decl = p->declarations; +   p->decl_s = 0; +   p->decl_t = 0; +   p->temp_flag = 0xffff000; +   p->utemp_flag = ~0x7; + +#if 0 +   p->fp->translated = 0; +   p->fp->error = 0; +   p->fp->nr_constants = 0; +#endif +   p->fp->wpos_tex = -1; +   p->fp->nr_params = 0; + +   *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM; + +   return p; +} + +/* Copy compile results to the fragment program struct and destroy the + * compilation context. + */ +static void +i915_fini_compile(struct i915_fp_compile *p) +{ +   uint program_size = p->csr - p->program; +   uint decl_size = p->decl - p->declarations; + +   if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) +      i915_program_error(p, "Exceeded max nr indirect texture lookups"); + +   if (p->nr_tex_insn > I915_MAX_TEX_INSN) +      i915_program_error(p, "Exceeded max TEX instructions"); + +   if (p->nr_alu_insn > I915_MAX_ALU_INSN) +      i915_program_error(p, "Exceeded max ALU instructions"); + +   if (p->nr_decl_insn > I915_MAX_DECL_INSN) +      i915_program_error(p, "Exceeded max DECL instructions"); + +   if (p->fp->error) { +      p->fp->NumNativeInstructions = 0; +      p->fp->NumNativeAluInstructions = 0; +      p->fp->NumNativeTexInstructions = 0; +      p->fp->NumNativeTexIndirections = 0; +      return; +   } +   else { +      p->fp->NumNativeInstructions = (p->nr_alu_insn + +                                      p->nr_tex_insn + +                                      p->nr_decl_insn); +      p->fp->NumNativeAluInstructions = p->nr_alu_insn; +      p->fp->NumNativeTexInstructions = p->nr_tex_insn; +      p->fp->NumNativeTexIndirections = p->nr_tex_indirect; +   } + +   p->declarations[0] |= program_size + decl_size - 2; + +   /* Copy compilation results to fragment program struct:  +    */ +   memcpy(p->fp->program,  +	  p->declarations,  +	  decl_size * sizeof(uint)); + +   memcpy(p->fp->program + decl_size,  +	  p->program,  +	  program_size * sizeof(uint)); +       +   p->fp->program_size = program_size + decl_size; + +   /* Release the compilation struct:  +    */ +   free(p); +} + + +/** + * Find an unused texture coordinate slot to use for fragment WPOS. + * Update p->fp->wpos_tex with the result (-1 if no used texcoord slot is found). + */ +static void +find_wpos_space(struct i915_fp_compile *p) +{ +   const uint inputs = p->shader->inputs_read; +   uint i; + +   p->fp->wpos_tex = -1; + +   if (inputs & FRAG_BIT_WPOS) { +      for (i = 0; i < I915_TEX_UNITS; i++) { +	 if ((inputs & (FRAG_BIT_TEX0 << i)) == 0) { +	    p->fp->wpos_tex = i; +	    return; +	 } +      } + +      i915_program_error(p, "No free texcoord for wpos value"); +   } +} + + + +void i915_compile_fragment_program( struct i915_context *i915, +				    struct i915_fragment_program *fp ) +{ +   struct i915_fp_compile *p = i915_init_compile(i915, fp); +   struct tgsi_token *tokens = i915->fs.tokens; + +   find_wpos_space(p); + +   i915_translate_program(p, tokens); +   i915_fixup_depth_write(p); + +   i915_fini_compile(p); +#if 0 +   fp->translated = 1; +#endif +} diff --git a/src/mesa/pipe/i915simple/i915_fpc.h b/src/mesa/pipe/i915simple/i915_fpc.h new file mode 100644 index 0000000000..0a8bffcd9a --- /dev/null +++ b/src/mesa/pipe/i915simple/i915_fpc.h @@ -0,0 +1,339 @@ +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#ifndef I915_FPC_H +#define I915_FPC_H + +#include "pipe/p_util.h" + +#include "i915_context.h" +#include "i915_reg.h" + + + +#define I915_PROGRAM_SIZE 192 +#define I915_MAX_CONSTANT  32 + +#define MAX_VARYING 8 + +enum +{ +   FRAG_ATTRIB_WPOS = 0, +   FRAG_ATTRIB_COL0 = 1, +   FRAG_ATTRIB_COL1 = 2, +   FRAG_ATTRIB_FOGC = 3, +   FRAG_ATTRIB_TEX0 = 4, +   FRAG_ATTRIB_TEX1 = 5, +   FRAG_ATTRIB_TEX2 = 6, +   FRAG_ATTRIB_TEX3 = 7, +   FRAG_ATTRIB_TEX4 = 8, +   FRAG_ATTRIB_TEX5 = 9, +   FRAG_ATTRIB_TEX6 = 10, +   FRAG_ATTRIB_TEX7 = 11, +   FRAG_ATTRIB_VAR0 = 12,  /**< shader varying */ +   FRAG_ATTRIB_MAX = (FRAG_ATTRIB_VAR0 + MAX_VARYING) +}; + +/** + * Bitflags for fragment program input attributes. + */ +/*@{*/ +#define FRAG_BIT_WPOS  (1 << FRAG_ATTRIB_WPOS) +#define FRAG_BIT_COL0  (1 << FRAG_ATTRIB_COL0) +#define FRAG_BIT_COL1  (1 << FRAG_ATTRIB_COL1) +#define FRAG_BIT_FOGC  (1 << FRAG_ATTRIB_FOGC) +#define FRAG_BIT_TEX0  (1 << FRAG_ATTRIB_TEX0) +#define FRAG_BIT_TEX1  (1 << FRAG_ATTRIB_TEX1) +#define FRAG_BIT_TEX2  (1 << FRAG_ATTRIB_TEX2) +#define FRAG_BIT_TEX3  (1 << FRAG_ATTRIB_TEX3) +#define FRAG_BIT_TEX4  (1 << FRAG_ATTRIB_TEX4) +#define FRAG_BIT_TEX5  (1 << FRAG_ATTRIB_TEX5) +#define FRAG_BIT_TEX6  (1 << FRAG_ATTRIB_TEX6) +#define FRAG_BIT_TEX7  (1 << FRAG_ATTRIB_TEX7) +#define FRAG_BIT_VAR0  (1 << FRAG_ATTRIB_VAR0) + +#define MAX_DRAW_BUFFERS 4 + +enum +{ +   FRAG_RESULT_COLR = 0, +   FRAG_RESULT_COLH = 1, +   FRAG_RESULT_DEPR = 2, +   FRAG_RESULT_DATA0 = 3, +   FRAG_RESULT_MAX = (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS) +}; + + + +#if 1 /*XXX temp */ +/* Hardware version of a parsed fragment program.  "Derived" from the + * mesa fragment_program struct. + */ +struct i915_fragment_program +{ +#if 0 +   struct gl_fragment_program Base; +#else +   uint NumNativeInstructions; +   uint NumNativeAluInstructions; +   uint NumNativeTexInstructions; +   uint NumNativeTexIndirections; +#endif + +   boolean error;      /**< Set if i915_program_error() is called */ +#if 0 +   uint id;            /**< String id */ +   boolean translated; +#endif + +   /* Decls + instructions:  +    */ +   uint program[I915_PROGRAM_SIZE]; +   uint program_size; +    +#if 0 +   /* Constant buffer: +    */ +   float constant[I915_MAX_CONSTANT][4]; +   uint nr_constants; +#endif + +   /* Some of which are parameters:  +    */ +   struct +   { +      uint reg;               /* Hardware constant idx */ +      const float *values;    /* Pointer to tracked values */ +   } param[I915_MAX_CONSTANT]; +   uint nr_params; + +#if 0 +   uint param_state; +#endif +   uint wpos_tex; +}; +#endif + + +/*********************************************************************** + * Public interface for the compiler + */ + +void i915_compile_fragment_program( struct i915_context *i915, +				    struct i915_fragment_program *fp ); + + +/*********************************************************************** + * Private details of the compiler + */ + +struct i915_fp_compile { +   struct i915_fragment_program *fp; + +   struct pipe_shader_state *shader; + +   uint declarations[I915_PROGRAM_SIZE]; +   uint program[I915_PROGRAM_SIZE]; + +   uint constant_flags[I915_MAX_CONSTANT]; + +   struct pipe_constant_buffer *constants; + +   uint *csr;                 /* Cursor, points into program. +                                 */ + +   uint *decl;                /* Cursor, points into declarations. +                                 */ + +   uint decl_s;               /* flags for which s regs need to be decl'd */ +   uint decl_t;               /* flags for which t regs need to be decl'd */ + +   uint temp_flag;            /* Tracks temporary regs which are in +                                 * use. +                                 */ + +   uint utemp_flag;           /* Tracks TYPE_U temporary regs which are in +                                 * use. +                                 */ + +   uint nr_tex_indirect; +   uint nr_tex_insn; +   uint nr_alu_insn; +   uint nr_decl_insn; + +#if 0 +   float (*env_param)[4]; +#endif +}; + + +/* Having zero and one in here makes the definition of swizzle a lot + * easier. + */ +#define UREG_TYPE_SHIFT               29 +#define UREG_NR_SHIFT                 24 +#define UREG_CHANNEL_X_NEGATE_SHIFT   23 +#define UREG_CHANNEL_X_SHIFT          20 +#define UREG_CHANNEL_Y_NEGATE_SHIFT   19 +#define UREG_CHANNEL_Y_SHIFT          16 +#define UREG_CHANNEL_Z_NEGATE_SHIFT   15 +#define UREG_CHANNEL_Z_SHIFT          12 +#define UREG_CHANNEL_W_NEGATE_SHIFT   11 +#define UREG_CHANNEL_W_SHIFT          8 +#define UREG_CHANNEL_ZERO_NEGATE_MBZ  5 +#define UREG_CHANNEL_ZERO_SHIFT       4 +#define UREG_CHANNEL_ONE_NEGATE_MBZ   1 +#define UREG_CHANNEL_ONE_SHIFT        0 + +#define UREG_BAD          0xffffffff    /* not a valid ureg */ + +#define X    SRC_X +#define Y    SRC_Y +#define Z    SRC_Z +#define W    SRC_W +#define ZERO SRC_ZERO +#define ONE  SRC_ONE + +/* Construct a ureg: + */ +#define UREG( type, nr ) (((type)<< UREG_TYPE_SHIFT) |		\ +			  ((nr)  << UREG_NR_SHIFT) |		\ +			  (X     << UREG_CHANNEL_X_SHIFT) |	\ +			  (Y     << UREG_CHANNEL_Y_SHIFT) |	\ +			  (Z     << UREG_CHANNEL_Z_SHIFT) |	\ +			  (W     << UREG_CHANNEL_W_SHIFT) |	\ +			  (ZERO  << UREG_CHANNEL_ZERO_SHIFT) |	\ +			  (ONE   << UREG_CHANNEL_ONE_SHIFT)) + +#define GET_CHANNEL_SRC( reg, channel ) ((reg<<(channel*4)) & (0xf<<20)) +#define CHANNEL_SRC( src, channel ) (src>>(channel*4)) + +#define GET_UREG_TYPE(reg) (((reg)>>UREG_TYPE_SHIFT)®_TYPE_MASK) +#define GET_UREG_NR(reg)   (((reg)>>UREG_NR_SHIFT)®_NR_MASK) + + + +#define UREG_XYZW_CHANNEL_MASK 0x00ffff00 + +/* One neat thing about the UREG representation:   + */ +static INLINE int +swizzle(int reg, int x, int y, int z, int w) +{ +   assert(x < 4); +   assert(y < 4); +   assert(z < 4); +   assert(w < 4); +   return ((reg & ~UREG_XYZW_CHANNEL_MASK) | +           CHANNEL_SRC(GET_CHANNEL_SRC(reg, x), 0) | +           CHANNEL_SRC(GET_CHANNEL_SRC(reg, y), 1) | +           CHANNEL_SRC(GET_CHANNEL_SRC(reg, z), 2) | +           CHANNEL_SRC(GET_CHANNEL_SRC(reg, w), 3)); +} + +/* Another neat thing about the UREG representation:   + */ +static INLINE int +negate(int reg, int x, int y, int z, int w) +{ +   return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) | +                 ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) | +                 ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) | +                 ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT)); +} + + +extern uint i915_get_temp(struct i915_fp_compile *p); +extern uint i915_get_utemp(struct i915_fp_compile *p); +extern void i915_release_utemps(struct i915_fp_compile *p); + + +extern uint i915_emit_texld(struct i915_fp_compile *p, +                              uint dest, +                              uint destmask, +                              uint sampler, uint coord, uint op); + +extern uint i915_emit_arith(struct i915_fp_compile *p, +                              uint op, +                              uint dest, +                              uint mask, +                              uint saturate, +                              uint src0, uint src1, uint src2); + +extern uint i915_emit_decl(struct i915_fp_compile *p, +                             uint type, uint nr, uint d0_flags); + + +extern uint i915_emit_const1f(struct i915_fp_compile *p, float c0); + +extern uint i915_emit_const2f(struct i915_fp_compile *p, +                                float c0, float c1); + +extern uint i915_emit_const4fv(struct i915_fp_compile *p, +                                 const float * c); + +extern uint i915_emit_const4f(struct i915_fp_compile *p, +                                float c0, float c1, +                                float c2, float c3); + + +#if 0 +extern uint i915_emit_param4fv(struct i915_fp_compile *p, +                                 const float * values); +#endif + + + +/*====================================================================== + * i915_fpc_debug.c + */ +extern void i915_program_error(struct i915_fp_compile *p, +                               const char *msg); + + +/*====================================================================== + * i915_fpc_debug.c + */ +extern void i915_disassemble_program(const uint * program, uint sz); + +#if 0 +extern void i915_print_mesa_instructions( const struct prog_instruction *insn, +					  uint nr ); +#endif + +/*====================================================================== + * i915_fpc_translate.c + */ +void i915_fixup_depth_write(struct i915_fp_compile *p); + +extern void +i915_translate_program(struct i915_fp_compile *p, const struct tgsi_token *token); + + + +#endif diff --git a/src/mesa/pipe/i915simple/i915_fpc_debug.c b/src/mesa/pipe/i915simple/i915_fpc_debug.c new file mode 100644 index 0000000000..77deab38bb --- /dev/null +++ b/src/mesa/pipe/i915simple/i915_fpc_debug.c @@ -0,0 +1,346 @@ +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#if 0 +#include <stdio.h> +#endif + +#include "i915_reg.h" +#include "i915_fpc.h" + +#if 0 +#include "shader/program.h" +#include "shader/prog_instruction.h" +#include "shader/prog_print.h" +#endif + +static const char *opcodes[0x20] = { +   "NOP", +   "ADD", +   "MOV", +   "MUL", +   "MAD", +   "DP2ADD", +   "DP3", +   "DP4", +   "FRC", +   "RCP", +   "RSQ", +   "EXP", +   "LOG", +   "CMP", +   "MIN", +   "MAX", +   "FLR", +   "MOD", +   "TRC", +   "SGE", +   "SLT", +   "TEXLD", +   "TEXLDP", +   "TEXLDB", +   "TEXKILL", +   "DCL", +   "0x1a", +   "0x1b", +   "0x1c", +   "0x1d", +   "0x1e", +   "0x1f", +}; + + +static const int args[0x20] = { +   0,                           /* 0 nop */ +   2,                           /* 1 add */ +   1,                           /* 2 mov */ +   2,                           /* 3 m ul */ +   3,                           /* 4 mad */ +   3,                           /* 5 dp2add */ +   2,                           /* 6 dp3 */ +   2,                           /* 7 dp4 */ +   1,                           /* 8 frc */ +   1,                           /* 9 rcp */ +   1,                           /* a rsq */ +   1,                           /* b exp */ +   1,                           /* c log */ +   3,                           /* d cmp */ +   2,                           /* e min */ +   2,                           /* f max */ +   1,                           /* 10 flr */ +   1,                           /* 11 mod */ +   1,                           /* 12 trc */ +   2,                           /* 13 sge */ +   2,                           /* 14 slt */ +   1, +   1, +   1, +   1, +   0, +   0, +   0, +   0, +   0, +   0, +   0, +}; + + +static const char *regname[0x8] = { +   "R", +   "T", +   "CONST", +   "S", +   "OC", +   "OD", +   "U", +   "UNKNOWN", +}; + +static void +print_reg_type_nr(uint type, uint nr) +{ +   switch (type) { +   case REG_TYPE_T: +      switch (nr) { +      case T_DIFFUSE: +         printf("T_DIFFUSE"); +         return; +      case T_SPECULAR: +         printf("T_SPECULAR"); +         return; +      case T_FOG_W: +         printf("T_FOG_W"); +         return; +      default: +         printf("T_TEX%d", nr); +         return; +      } +   case REG_TYPE_OC: +      if (nr == 0) { +         printf("oC"); +         return; +      } +      break; +   case REG_TYPE_OD: +      if (nr == 0) { +         printf("oD"); +         return; +      } +      break; +   default: +      break; +   } + +   printf("%s[%d]", regname[type], nr); +} + +#define REG_SWIZZLE_MASK 0x7777 +#define REG_NEGATE_MASK 0x8888 + +#define REG_SWIZZLE_XYZW ((SRC_X << A2_SRC2_CHANNEL_X_SHIFT) |	\ +		      (SRC_Y << A2_SRC2_CHANNEL_Y_SHIFT) |	\ +		      (SRC_Z << A2_SRC2_CHANNEL_Z_SHIFT) |	\ +		      (SRC_W << A2_SRC2_CHANNEL_W_SHIFT)) + + +static void +print_reg_neg_swizzle(uint reg) +{ +   int i; + +   if ((reg & REG_SWIZZLE_MASK) == REG_SWIZZLE_XYZW && +       (reg & REG_NEGATE_MASK) == 0) +      return; + +   printf("."); + +   for (i = 3; i >= 0; i--) { +      if (reg & (1 << ((i * 4) + 3))) +         printf("-"); + +      switch ((reg >> (i * 4)) & 0x7) { +      case 0: +         printf("x"); +         break; +      case 1: +         printf("y"); +         break; +      case 2: +         printf("z"); +         break; +      case 3: +         printf("w"); +         break; +      case 4: +         printf("0"); +         break; +      case 5: +         printf("1"); +         break; +      default: +         printf("?"); +         break; +      } +   } +} + + +static void +print_src_reg(uint dword) +{ +   uint nr = (dword >> A2_SRC2_NR_SHIFT) & REG_NR_MASK; +   uint type = (dword >> A2_SRC2_TYPE_SHIFT) & REG_TYPE_MASK; +   print_reg_type_nr(type, nr); +   print_reg_neg_swizzle(dword); +} + + +static void +print_dest_reg(uint dword) +{ +   uint nr = (dword >> A0_DEST_NR_SHIFT) & REG_NR_MASK; +   uint type = (dword >> A0_DEST_TYPE_SHIFT) & REG_TYPE_MASK; +   print_reg_type_nr(type, nr); +   if ((dword & A0_DEST_CHANNEL_ALL) == A0_DEST_CHANNEL_ALL) +      return; +   printf("."); +   if (dword & A0_DEST_CHANNEL_X) +      printf("x"); +   if (dword & A0_DEST_CHANNEL_Y) +      printf("y"); +   if (dword & A0_DEST_CHANNEL_Z) +      printf("z"); +   if (dword & A0_DEST_CHANNEL_W) +      printf("w"); +} + + +#define GET_SRC0_REG(r0, r1) ((r0<<14)|(r1>>A1_SRC0_CHANNEL_W_SHIFT)) +#define GET_SRC1_REG(r0, r1) ((r0<<8)|(r1>>A2_SRC1_CHANNEL_W_SHIFT)) +#define GET_SRC2_REG(r)      (r) + + +static void +print_arith_op(uint opcode, const uint * program) +{ +   if (opcode != A0_NOP) { +      print_dest_reg(program[0]); +      if (program[0] & A0_DEST_SATURATE) +         printf(" = SATURATE "); +      else +         printf(" = "); +   } + +   printf("%s ", opcodes[opcode]); + +   print_src_reg(GET_SRC0_REG(program[0], program[1])); +   if (args[opcode] == 1) { +      printf("\n"); +      return; +   } + +   printf(", "); +   print_src_reg(GET_SRC1_REG(program[1], program[2])); +   if (args[opcode] == 2) { +      printf("\n"); +      return; +   } + +   printf(", "); +   print_src_reg(GET_SRC2_REG(program[2])); +   printf("\n"); +   return; +} + + +static void +print_tex_op(uint opcode, const uint * program) +{ +   print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL); +   printf(" = "); + +   printf("%s ", opcodes[opcode]); + +   printf("S[%d],", program[0] & T0_SAMPLER_NR_MASK); + +   print_reg_type_nr((program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) & +                     REG_TYPE_MASK, +                     (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK); +   printf("\n"); +} + +static void +print_dcl_op(uint opcode, const uint * program) +{ +   printf("%s ", opcodes[opcode]); +   print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL); +   printf("\n"); +} + + +void +i915_disassemble_program(const uint * program, uint sz) +{ +   uint size = program[0] & 0x1ff; +   int i; + +   printf("\t\tBEGIN\n"); + +   assert(size + 2 == sz); + +   program++; +   for (i = 1; i < sz; i += 3, program += 3) { +      uint opcode = program[0] & (0x1f << 24); + +      printf("\t\t"); + +      if ((int) opcode >= A0_NOP && opcode <= A0_SLT) +         print_arith_op(opcode >> 24, program); +      else if (opcode >= T0_TEXLD && opcode <= T0_TEXKILL) +         print_tex_op(opcode >> 24, program); +      else if (opcode == D0_DCL) +         print_dcl_op(opcode >> 24, program); +      else +         printf("Unknown opcode 0x%x\n", opcode); +   } + +   printf("\t\tEND\n\n"); +} + + +#if 0 +void i915_print_mesa_instructions( const struct prog_instruction *insn, +				   uint nr ) +{ +   uint i; +   for (i = 0; i < nr; i++, insn++) { +      printf("%3d: ", i); +      print_instruction(insn); +   } +} +#endif diff --git a/src/mesa/pipe/i915simple/i915_fpc_emit.c b/src/mesa/pipe/i915simple/i915_fpc_emit.c new file mode 100644 index 0000000000..7259bb503d --- /dev/null +++ b/src/mesa/pipe/i915simple/i915_fpc_emit.c @@ -0,0 +1,430 @@ +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#if 0 +#include <strings.h> +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#endif + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_fpc.h" + + +#define A0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) +#define D0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) +#define T0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) +#define A0_SRC0( reg ) (((reg)&UREG_MASK)>>UREG_A0_SRC0_SHIFT_LEFT) +#define A1_SRC0( reg ) (((reg)&UREG_MASK)<<UREG_A1_SRC0_SHIFT_RIGHT) +#define A1_SRC1( reg ) (((reg)&UREG_MASK)>>UREG_A1_SRC1_SHIFT_LEFT) +#define A2_SRC1( reg ) (((reg)&UREG_MASK)<<UREG_A2_SRC1_SHIFT_RIGHT) +#define A2_SRC2( reg ) (((reg)&UREG_MASK)>>UREG_A2_SRC2_SHIFT_LEFT) + +/* These are special, and don't have swizzle/negate bits. + */ +#define T0_SAMPLER( reg )     (GET_UREG_NR(reg)<<T0_SAMPLER_NR_SHIFT) +#define T1_ADDRESS_REG( reg ) ((GET_UREG_NR(reg)<<T1_ADDRESS_REG_NR_SHIFT) | \ +			       (GET_UREG_TYPE(reg)<<T1_ADDRESS_REG_TYPE_SHIFT)) + + +/* Macros for translating UREG's into the various register fields used + * by the I915 programmable unit. + */ +#define UREG_A0_DEST_SHIFT_LEFT  (UREG_TYPE_SHIFT - A0_DEST_TYPE_SHIFT) +#define UREG_A0_SRC0_SHIFT_LEFT  (UREG_TYPE_SHIFT - A0_SRC0_TYPE_SHIFT) +#define UREG_A1_SRC0_SHIFT_RIGHT (A1_SRC0_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT) +#define UREG_A1_SRC1_SHIFT_LEFT  (UREG_TYPE_SHIFT - A1_SRC1_TYPE_SHIFT) +#define UREG_A2_SRC1_SHIFT_RIGHT (A2_SRC1_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT) +#define UREG_A2_SRC2_SHIFT_LEFT  (UREG_TYPE_SHIFT - A2_SRC2_TYPE_SHIFT) + +#define UREG_MASK         0xffffff00 +#define UREG_TYPE_NR_MASK ((REG_TYPE_MASK << UREG_TYPE_SHIFT) | \ +  			   (REG_NR_MASK << UREG_NR_SHIFT)) + + +#define I915_CONSTFLAG_PARAM 0x1f + +uint +i915_get_temp(struct i915_fp_compile *p) +{ +   int bit = ffs(~p->temp_flag); +   if (!bit) { +      i915_program_error(p, "i915_get_temp: out of temporaries\n"); +      return 0; +   } + +   p->temp_flag |= 1 << (bit - 1); +   return UREG(REG_TYPE_R, (bit - 1)); +} + + +uint +i915_get_utemp(struct i915_fp_compile * p) +{ +   int bit = ffs(~p->utemp_flag); +   if (!bit) { +      i915_program_error(p, "i915_get_utemp: out of temporaries\n"); +      return 0; +   } + +   p->utemp_flag |= 1 << (bit - 1); +   return UREG(REG_TYPE_U, (bit - 1)); +} + +void +i915_release_utemps(struct i915_fp_compile *p) +{ +   p->utemp_flag = ~0x7; +} + + +uint +i915_emit_decl(struct i915_fp_compile *p, +               uint type, uint nr, uint d0_flags) +{ +   uint reg = UREG(type, nr); + +   if (type == REG_TYPE_T) { +      if (p->decl_t & (1 << nr)) +         return reg; + +      p->decl_t |= (1 << nr); +   } +   else if (type == REG_TYPE_S) { +      if (p->decl_s & (1 << nr)) +         return reg; + +      p->decl_s |= (1 << nr); +   } +   else +      return reg; + +   *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags); +   *(p->decl++) = D1_MBZ; +   *(p->decl++) = D2_MBZ; + +   p->nr_decl_insn++; +   return reg; +} + +uint +i915_emit_arith(struct i915_fp_compile * p, +                uint op, +                uint dest, +                uint mask, +                uint saturate, uint src0, uint src1, uint src2) +{ +   uint c[3]; +   uint nr_const = 0; + +   assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); +   dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)); +   assert(dest); + +   if (GET_UREG_TYPE(src0) == REG_TYPE_CONST) +      c[nr_const++] = 0; +   if (GET_UREG_TYPE(src1) == REG_TYPE_CONST) +      c[nr_const++] = 1; +   if (GET_UREG_TYPE(src2) == REG_TYPE_CONST) +      c[nr_const++] = 2; + +   /* Recursively call this function to MOV additional const values +    * into temporary registers.  Use utemp registers for this - +    * currently shouldn't be possible to run out, but keep an eye on +    * this. +    */ +   if (nr_const > 1) { +      uint s[3], first, i, old_utemp_flag; + +      s[0] = src0; +      s[1] = src1; +      s[2] = src2; +      old_utemp_flag = p->utemp_flag; + +      first = GET_UREG_NR(s[c[0]]); +      for (i = 1; i < nr_const; i++) { +         if (GET_UREG_NR(s[c[i]]) != first) { +            uint tmp = i915_get_utemp(p); + +            i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0, +                            s[c[i]], 0, 0); +            s[c[i]] = tmp; +         } +      } + +      src0 = s[0]; +      src1 = s[1]; +      src2 = s[2]; +      p->utemp_flag = old_utemp_flag;   /* restore */ +   } + +   *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0)); +   *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1)); +   *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2)); + +   p->nr_alu_insn++; +   return dest; +} + +uint i915_emit_texld( struct i915_fp_compile *p, +			uint dest, +			uint destmask, +			uint sampler, +			uint coord, +			uint op ) +{ +   if (coord != UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord))) { +      /* No real way to work around this in the general case - need to +       * allocate and declare a new temporary register (a utemp won't +       * do).  Will fallback for now. +       */ +      i915_program_error(p, "Can't (yet) swizzle TEX arguments"); +      return 0; +   } + +   /* Don't worry about saturate as we only support   +    */ +   if (destmask != A0_DEST_CHANNEL_ALL) { +      uint tmp = i915_get_utemp(p); +      i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, op ); +      i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 ); +      return dest; +   } +   else { +      assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); +      assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest))); + +      if (GET_UREG_TYPE(coord) != REG_TYPE_T) { +	 p->nr_tex_indirect++; +      } + +      *(p->csr++) = (op |  +		     T0_DEST( dest ) | +		     T0_SAMPLER( sampler )); + +      *(p->csr++) = T1_ADDRESS_REG( coord ); +      *(p->csr++) = T2_MBZ; + +      p->nr_tex_insn++; +      return dest; +   } +} + + +uint +i915_emit_const1f(struct i915_fp_compile * p, float c0) +{ +   int reg, idx; + +   if (c0 == 0.0) +      return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO); +   if (c0 == 1.0) +      return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE); + +   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { +      if (p->constant_flags[reg] == I915_CONSTFLAG_PARAM) +         continue; +      for (idx = 0; idx < 4; idx++) { +#if 0 +         if (!(p->constant_flags[reg] & (1 << idx)) || +             p->fp->constant[reg][idx] == c0) { +            p->fp->constant[reg][idx] = c0; +            p->constant_flags[reg] |= 1 << idx; +            if (reg + 1 > p->fp->nr_constants) +               p->fp->nr_constants = reg + 1; +            return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE); +         } +#else +         if (!(p->constant_flags[reg] & (1 << idx)) || +             p->constants->constant[reg][idx] == c0) { +            p->constants->constant[reg][idx] = c0; +            p->constant_flags[reg] |= 1 << idx; +            if (reg + 1 > p->constants->nr_constants) +               p->constants->nr_constants = reg + 1; +            return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE); +         } +#endif +      } +   } + +   i915_program_error(p, "i915_emit_const1f: out of constants\n"); +   return 0; +} + +uint +i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1) +{ +   int reg, idx; + +   if (c0 == 0.0) +      return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W); +   if (c0 == 1.0) +      return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W); + +   if (c1 == 0.0) +      return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W); +   if (c1 == 1.0) +      return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W); + +   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { +      if (p->constant_flags[reg] == 0xf || +          p->constant_flags[reg] == I915_CONSTFLAG_PARAM) +         continue; +      for (idx = 0; idx < 3; idx++) { +         if (!(p->constant_flags[reg] & (3 << idx))) { +#if 0 +            p->fp->constant[reg][idx] = c0; +            p->fp->constant[reg][idx + 1] = c1; +            p->constant_flags[reg] |= 3 << idx; +            if (reg + 1 > p->fp->nr_constants) +               p->fp->nr_constants = reg + 1; +            return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, +                           ONE); +#else +            p->constants->constant[reg][idx + 0] = c0; +            p->constants->constant[reg][idx + 1] = c1; +            p->constant_flags[reg] |= 3 << idx; +            if (reg + 1 > p->constants->nr_constants) +               p->constants->nr_constants = reg + 1; +            return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, +                           ONE); +#endif +         } +      } +   } + +   i915_program_error(p, "i915_emit_const2f: out of constants\n"); +   return 0; +} + + + +uint +i915_emit_const4f(struct i915_fp_compile * p, +                  float c0, float c1, float c2, float c3) +{ +   int reg; + +   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { +      if (p->constant_flags[reg] == 0xf && +#if 0 +          p->fp->constant[reg][0] == c0 && +          p->fp->constant[reg][1] == c1 && +          p->fp->constant[reg][2] == c2 &&  +	  p->fp->constant[reg][3] == c3 +#else +          p->constants->constant[reg][0] == c0 && +          p->constants->constant[reg][1] == c1 && +          p->constants->constant[reg][2] == c2 && +          p->constants->constant[reg][3] == c3 +#endif +          ) { +         return UREG(REG_TYPE_CONST, reg); +      } +      else if (p->constant_flags[reg] == 0) { +#if 0 +         p->fp->constant[reg][0] = c0; +         p->fp->constant[reg][1] = c1; +         p->fp->constant[reg][2] = c2; +         p->fp->constant[reg][3] = c3; +#else +         p->constants->constant[reg][0] = c0; +         p->constants->constant[reg][1] = c1; +         p->constants->constant[reg][2] = c2; +         p->constants->constant[reg][3] = c3; +#endif +         p->constant_flags[reg] = 0xf; +#if 0 +         if (reg + 1 > p->fp->nr_constants) +            p->fp->nr_constants = reg + 1; +#else +         if (reg + 1 > p->constants->nr_constants) +            p->constants->nr_constants = reg + 1; +#endif +         return UREG(REG_TYPE_CONST, reg); +      } +   } + +   i915_program_error(p, "i915_emit_const4f: out of constants\n"); +   return 0; +} + + +uint +i915_emit_const4fv(struct i915_fp_compile * p, const float * c) +{ +   return i915_emit_const4f(p, c[0], c[1], c[2], c[3]); +} + + +#if 00000/*UNUSED*/ +/* Reserve a slot in the constant file for a Mesa state parameter. + * These will later need to be tracked on statechanges, but that is + * done elsewhere. + */ +uint +i915_emit_param4fv(struct i915_fp_compile * p, const float * values) +{ +   struct i915_fragment_program *fp = p->fp; +   int i; + +   for (i = 0; i < fp->nr_params; i++) { +      if (fp->param[i].values == values) +         return UREG(REG_TYPE_CONST, fp->param[i].reg); +   } + +#if 0 +   if (fp->nr_constants == I915_MAX_CONSTANT || +       fp->nr_params == I915_MAX_CONSTANT) { +#else +   if (p->constants->nr_constants == I915_MAX_CONSTANT || +       fp->nr_params == I915_MAX_CONSTANT) { +#endif +      i915_program_error(p, "i915_emit_param4fv: out of constants\n"); +      return 0; +   } + +   { +#if 0 +      int reg = fp->nr_constants++; +#else +      int reg = p->constants->nr_constants++; +#endif +      int i = fp->nr_params++; + +      assert (p->constant_flags[reg] == 0); +      p->constant_flags[reg] = I915_CONSTFLAG_PARAM; + +      fp->param[i].values = values; +      fp->param[i].reg = reg; + +      return UREG(REG_TYPE_CONST, reg); +   } +} +#endif diff --git a/src/mesa/pipe/i915simple/i915_fpc_translate.c b/src/mesa/pipe/i915simple/i915_fpc_translate.c new file mode 100644 index 0000000000..a034e734c3 --- /dev/null +++ b/src/mesa/pipe/i915simple/i915_fpc_translate.c @@ -0,0 +1,838 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_fpc.h" + +#include "pipe/tgsi/core/tgsi_parse.h" + + +/* 1, -1/3!, 1/5!, -1/7! */ +static const float sin_constants[4] = { 1.0, +   -1.0 / (3 * 2 * 1), +   1.0 / (5 * 4 * 3 * 2 * 1), +   -1.0 / (7 * 6 * 5 * 4 * 3 * 2 * 1) +}; + +/* 1, -1/2!, 1/4!, -1/6! */ +static const float cos_constants[4] = { 1.0, +   -1.0 / (2 * 1), +   1.0 / (4 * 3 * 2 * 1), +   -1.0 / (6 * 5 * 4 * 3 * 2 * 1) +}; + + +/** + * Construct a ureg for the given source register.  Will emit + * constants, apply swizzling and negation as needed. + */ +static uint +src_vector(struct i915_fp_compile *p, +           const struct tgsi_full_src_register *source) +{ +   const uint index = source->SrcRegister.Index; +   uint src; + +   switch (source->SrcRegisterInd.File) { +   case TGSI_FILE_TEMPORARY: +      if (source->SrcRegister.Index >= I915_MAX_TEMPORARY) { +         i915_program_error(p, "Exceeded max temporary reg"); +         return 0; +      } +      src = UREG(REG_TYPE_R, index); +      break; +   case TGSI_FILE_INPUT: +      /* XXX: Packing COL1, FOGC into a single attribute works for +       * texenv programs, but will fail for real fragment programs +       * that use these attributes and expect them to be a full 4 +       * components wide.  Could use a texcoord to pass these +       * attributes if necessary, but that won't work in the general +       * case. +       *  +       * We also use a texture coordinate to pass wpos when possible. +       */ +      switch (index) { +      case FRAG_ATTRIB_WPOS: +         src = i915_emit_decl(p, REG_TYPE_T, p->fp->wpos_tex, D0_CHANNEL_ALL); +         break; +      case FRAG_ATTRIB_COL0: +         src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); +         break; +      case FRAG_ATTRIB_COL1: +         src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ); +         src = swizzle(src, X, Y, Z, ONE); +         break; +      case FRAG_ATTRIB_FOGC: +         src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W); +         src = swizzle(src, W, W, W, W); +         break; +      case FRAG_ATTRIB_TEX0: +      case FRAG_ATTRIB_TEX1: +      case FRAG_ATTRIB_TEX2: +      case FRAG_ATTRIB_TEX3: +      case FRAG_ATTRIB_TEX4: +      case FRAG_ATTRIB_TEX5: +      case FRAG_ATTRIB_TEX6: +      case FRAG_ATTRIB_TEX7: +         src = i915_emit_decl(p, REG_TYPE_T, +                              T_TEX0 + (index - FRAG_ATTRIB_TEX0), +                              D0_CHANNEL_ALL); +         break; + +      default: +         i915_program_error(p, "Bad source->Index"); +         return 0; +      } +      break; + +      /* Various parameters and env values.  All emitted to +       * hardware as program constants. +       */ +#if 0 +   case PROGRAM_LOCAL_PARAM: +      src = i915_emit_param4fv(p, program->Base.LocalParams[index]); +      break; +   case PROGRAM_ENV_PARAM: +      src = i915_emit_param4fv(p, p->env_param[index]); +      break; +   case PROGRAM_CONSTANT: +   case PROGRAM_STATE_VAR: +   case PROGRAM_NAMED_PARAM: +      src = i915_emit_param4fv( +	 p, program->Base.Parameters->ParameterValues[index]); +      break; +#else +   case TGSI_FILE_CONSTANT: +      src = UREG(REG_TYPE_CONST, index); +      break; +#endif + +   default: +      i915_program_error(p, "Bad source->File"); +      return 0; +   } + +   src = swizzle(src, +                 source->SrcRegister.SwizzleX, +                 source->SrcRegister.SwizzleY, +                 source->SrcRegister.SwizzleZ, +                 source->SrcRegister.SwizzleW); + +   assert(!source->SrcRegister.Negate); +   assert(!source->SrcRegisterExtSwz.NegateX); +   assert(!source->SrcRegisterExtSwz.NegateY); +   assert(!source->SrcRegisterExtSwz.NegateZ); +   assert(!source->SrcRegisterExtSwz.NegateW); +   assert(!source->SrcRegisterExtMod.Absolute); +   assert(!source->SrcRegisterExtMod.Negate); +#if 0 +   if (source->SrcRegister.Negate) +      negate all  + +   if (extended source swiz per component) +      src = negate(src, +                   source->SrcRegisterExtSwz.NegateX, +                   source->SrcRegisterExtSwz.NegateY, +                   source->SrcRegisterExtSwz.NegateZ, +                   source->SrcRegisterExtSwz.NegateW); +   if (mod.abs) +      absolute value + +   if (mod.negate) +      another negate; +#endif +   return src; +} + + +static uint +get_result_vector(struct i915_fp_compile *p, +                  const struct tgsi_full_dst_register *dest) +{ +   switch (dest->DstRegister.File) { +   case TGSI_FILE_OUTPUT: +      switch (dest->DstRegister.Index) { +      case FRAG_RESULT_COLR: +         return UREG(REG_TYPE_OC, 0); +      case FRAG_RESULT_DEPR: +         return UREG(REG_TYPE_OD, 0); +      default: +         i915_program_error(p, "Bad inst->DstReg.Index"); +         return 0; +      } +   case TGSI_FILE_TEMPORARY: +      return UREG(REG_TYPE_R, dest->DstRegister.Index); +   default: +      i915_program_error(p, "Bad inst->DstReg.File"); +      return 0; +   } +} + + +/** + * Compute flags for saturation and writemask. + */ +static uint +get_result_flags(const struct tgsi_full_instruction *inst) +{ +   const uint writeMask +      = inst->FullDstRegisters[0].DstRegister.WriteMask; +   uint flags = 0x0; + +   if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) +      flags |= A0_DEST_SATURATE; + +   if (writeMask & TGSI_WRITEMASK_X) +      flags |= A0_DEST_CHANNEL_X; +   if (writeMask & TGSI_WRITEMASK_Y) +      flags |= A0_DEST_CHANNEL_Y; +   if (writeMask & TGSI_WRITEMASK_Z) +      flags |= A0_DEST_CHANNEL_Z; +   if (writeMask & TGSI_WRITEMASK_W) +      flags |= A0_DEST_CHANNEL_W; + +   return flags; +} + + +/** + * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token + */ +static uint +translate_tex_src_target(struct i915_fp_compile *p, uint tex) +{ +   switch (tex) { +   case TGSI_TEXTURE_1D: +      return D0_SAMPLE_TYPE_2D; +   case TGSI_TEXTURE_2D: +      return D0_SAMPLE_TYPE_2D; +   case TGSI_TEXTURE_RECT: +      return D0_SAMPLE_TYPE_2D; +   case TGSI_TEXTURE_3D: +      return D0_SAMPLE_TYPE_VOLUME; +   case TGSI_TEXTURE_CUBE: +      return D0_SAMPLE_TYPE_CUBE; +   default: +      i915_program_error(p, "TexSrc type"); +      return 0; +   } +} + + +/** + * Generate texel lookup instruction. + */ +static void +emit_tex(struct i915_fp_compile *p, +         const struct tgsi_full_instruction *inst, +         uint opcode) +{ +   uint texture = inst->InstructionExtTexture.Texture; +   uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; +   uint tex = translate_tex_src_target( p, texture ); +   uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex); +   uint coord = src_vector( p, &inst->FullSrcRegisters[0]); + +   i915_emit_texld( p, +                    get_result_vector( p, &inst->FullDstRegisters[0] ), +                    get_result_flags( inst ), +                    sampler, +                    coord, +                    opcode); +} + + +/** + * Generate a simple arithmetic instruction + * \param opcode  the i915 opcode + * \param numArgs  the number of input/src arguments + */ +static void +emit_simple_arith(struct i915_fp_compile *p, +                  const struct tgsi_full_instruction *inst, +                  uint opcode, uint numArgs) +{ +   uint arg1, arg2, arg3; + +   assert(numArgs <= 3); + +   arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->FullSrcRegisters[0] ); +   arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->FullSrcRegisters[1] ); +   arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->FullSrcRegisters[2] ); + +   i915_emit_arith( p, +                    opcode, +                    get_result_vector( p, &inst->FullDstRegisters[0]), +                    get_result_flags( inst ), 0, +                    arg1, +                    arg2, +                    arg3 ); +} + + +#define EMIT_1ARG_ARITH( OP ) emit_simple_arith(p, inst, OP, 1) +#define EMIT_2ARG_ARITH( OP ) emit_simple_arith(p, inst, OP, 2) +#define EMIT_3ARG_ARITH( OP ) emit_simple_arith(p, inst, OP, 3) + + + +static void +i915_translate_instruction(struct i915_fp_compile *p, +                           const struct tgsi_full_instruction *inst) +{ +   uint writemask; +   uint src0, src1, src2, flags; +   uint tmp = 0; + +   switch (inst->Instruction.Opcode) { +   case TGSI_OPCODE_ABS: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      i915_emit_arith(p, +                      A0_MAX, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      get_result_flags(inst), 0, +                      src0, negate(src0, 1, 1, 1, 1), 0); +      break; + +   case TGSI_OPCODE_ADD: +      EMIT_2ARG_ARITH(A0_ADD); +      break; + +   case TGSI_OPCODE_CMP: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      src1 = src_vector(p, &inst->FullSrcRegisters[1]); +      src2 = src_vector(p, &inst->FullSrcRegisters[2]); +      i915_emit_arith(p, A0_CMP,  +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      get_result_flags(inst),  +                      0, src0, src2, src1);   /* NOTE: order of src2, src1 */ +      break; + +   case TGSI_OPCODE_COS: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      tmp = i915_get_utemp(p); + +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_X, 0, +                      src0, i915_emit_const1f(p, 1.0 / (M_PI * 2)), 0); + +      i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); + +      /* By choosing different taylor constants, could get rid of this mul: +       */ +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_X, 0, +                      tmp, i915_emit_const1f(p, (M_PI * 2)), 0); + +      /*  +       * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1 +       * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1 +       * t0 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1 +       * result = DP4 t0, cos_constants +       */ +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_XY, 0, +                      swizzle(tmp, X, X, ONE, ONE), +                      swizzle(tmp, X, ONE, ONE, ONE), 0); + +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_XYZ, 0, +                      swizzle(tmp, X, Y, X, ONE), +                      swizzle(tmp, X, X, ONE, ONE), 0); + +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_XYZ, 0, +                      swizzle(tmp, X, X, Z, ONE), +                      swizzle(tmp, Z, ONE, ONE, ONE), 0); + +      i915_emit_arith(p, +                      A0_DP4, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      get_result_flags(inst), 0, +                      swizzle(tmp, ONE, Z, Y, X), +                      i915_emit_const4fv(p, cos_constants), 0); +      break; + +   case TGSI_OPCODE_DP3: +      EMIT_2ARG_ARITH(A0_DP3); +      break; + +   case TGSI_OPCODE_DP4: +      EMIT_2ARG_ARITH(A0_DP4); +      break; + +   case TGSI_OPCODE_DPH: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      src1 = src_vector(p, &inst->FullSrcRegisters[1]); + +      i915_emit_arith(p, +                      A0_DP4, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      get_result_flags(inst), 0, +                      swizzle(src0, X, Y, Z, ONE), src1, 0); +      break; + +   case TGSI_OPCODE_DST: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      src1 = src_vector(p, &inst->FullSrcRegisters[1]); + +      /* result[0] = 1    * 1; +       * result[1] = a[1] * b[1]; +       * result[2] = a[2] * 1; +       * result[3] = 1    * b[3]; +       */ +      i915_emit_arith(p, +                      A0_MUL, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      get_result_flags(inst), 0, +                      swizzle(src0, ONE, Y, Z, ONE), +                      swizzle(src1, ONE, Y, ONE, W), 0); +      break; + +   case TGSI_OPCODE_EX2: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); + +      i915_emit_arith(p, +                      A0_EXP, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      get_result_flags(inst), 0, +                      swizzle(src0, X, X, X, X), 0, 0); +      break; + +   case TGSI_OPCODE_FLR: +      EMIT_1ARG_ARITH(A0_FLR); +      break; + +   case TGSI_OPCODE_FRC: +      EMIT_1ARG_ARITH(A0_FRC); +      break; + +   case TGSI_OPCODE_KIL: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      tmp = i915_get_utemp(p); + +      i915_emit_texld(p, tmp, A0_DEST_CHANNEL_ALL,   /* use a dummy dest reg */ +                      0, src0, T0_TEXKILL); +      break; + +   case TGSI_OPCODE_LG2: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); + +      i915_emit_arith(p, +                      A0_LOG, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      get_result_flags(inst), 0, +                      swizzle(src0, X, X, X, X), 0, 0); +      break; + +   case TGSI_OPCODE_LIT: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      tmp = i915_get_utemp(p); + +      /* tmp = max( a.xyzw, a.00zw ) +       * XXX: Clamp tmp.w to -128..128 +       * tmp.y = log(tmp.y) +       * tmp.y = tmp.w * tmp.y +       * tmp.y = exp(tmp.y) +       * result = cmp (a.11-x1, a.1x01, a.1xy1 ) +       */ +      i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, +                      src0, swizzle(src0, ZERO, ZERO, Z, W), 0); + +      i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, +                      swizzle(tmp, Y, Y, Y, Y), 0, 0); + +      i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, +                      swizzle(tmp, ZERO, Y, ZERO, ZERO), +                      swizzle(tmp, ZERO, W, ZERO, ZERO), 0); + +      i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, +                      swizzle(tmp, Y, Y, Y, Y), 0, 0); + +      i915_emit_arith(p, A0_CMP, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      get_result_flags(inst), 0, +                      negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), +                      swizzle(tmp, ONE, X, ZERO, ONE), +                      swizzle(tmp, ONE, X, Y, ONE)); + +      break; + +   case TGSI_OPCODE_LRP: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      src1 = src_vector(p, &inst->FullSrcRegisters[1]); +      src2 = src_vector(p, &inst->FullSrcRegisters[2]); +      flags = get_result_flags(inst); +      tmp = i915_get_utemp(p); + +      /* b*a + c*(1-a) +       * +       * b*a + c - ca  +       * +       * tmp = b*a + c,  +       * result = (-c)*a + tmp  +       */ +      i915_emit_arith(p, A0_MAD, tmp, +                      flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2); + +      i915_emit_arith(p, A0_MAD, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp); +      break; + +   case TGSI_OPCODE_MAD: +      EMIT_3ARG_ARITH(A0_MAD); +      break; + +   case TGSI_OPCODE_MAX: +      EMIT_2ARG_ARITH(A0_MAX); +      break; + +   case TGSI_OPCODE_MIN: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      src1 = src_vector(p, &inst->FullSrcRegisters[1]); +      tmp = i915_get_utemp(p); +      flags = get_result_flags(inst); + +      i915_emit_arith(p, +                      A0_MAX, +                      tmp, flags & A0_DEST_CHANNEL_ALL, 0, +                      negate(src0, 1, 1, 1, 1), +                      negate(src1, 1, 1, 1, 1), 0); + +      i915_emit_arith(p, +                      A0_MOV, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0); +      break; + +   case TGSI_OPCODE_MOV: +      /* aka TGSI_OPCODE_SWZ */ +      EMIT_1ARG_ARITH(A0_MOV); +      break; + +   case TGSI_OPCODE_MUL: +      EMIT_2ARG_ARITH(A0_MUL); +      break; + +   case TGSI_OPCODE_POW: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      src1 = src_vector(p, &inst->FullSrcRegisters[1]); +      tmp = i915_get_utemp(p); +      flags = get_result_flags(inst); + +      /* XXX: masking on intermediate values, here and elsewhere. +       */ +      i915_emit_arith(p, +                      A0_LOG, +                      tmp, A0_DEST_CHANNEL_X, 0, +                      swizzle(src0, X, X, X, X), 0, 0); + +      i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0); + +      i915_emit_arith(p, +                      A0_EXP, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      flags, 0, swizzle(tmp, X, X, X, X), 0, 0); +      break; +       +   case TGSI_OPCODE_RCP: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); + +      i915_emit_arith(p, +                      A0_RCP, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                         get_result_flags(inst), 0, +                      swizzle(src0, X, X, X, X), 0, 0); +      break; + +   case TGSI_OPCODE_RSQ: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); + +      i915_emit_arith(p, +                      A0_RSQ, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      get_result_flags(inst), 0, +                      swizzle(src0, X, X, X, X), 0, 0); +      break; + +   case TGSI_OPCODE_SCS: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      tmp = i915_get_utemp(p); + +      /*  +       * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1 +       * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x +       * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x +       * scs.x = DP4 t1, sin_constants +       * t1 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1 +       * scs.y = DP4 t1, cos_constants +       */ +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_XY, 0, +                      swizzle(src0, X, X, ONE, ONE), +                      swizzle(src0, X, ONE, ONE, ONE), 0); + +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_ALL, 0, +                      swizzle(tmp, X, Y, X, Y), +                      swizzle(tmp, X, X, ONE, ONE), 0); + +      writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; + +      if (writemask & TGSI_WRITEMASK_Y) { +         uint tmp1; + +         if (writemask & TGSI_WRITEMASK_X) +            tmp1 = i915_get_utemp(p); +         else +            tmp1 = tmp; + +         i915_emit_arith(p, +                         A0_MUL, +                         tmp1, A0_DEST_CHANNEL_ALL, 0, +                         swizzle(tmp, X, Y, Y, W), +                         swizzle(tmp, X, Z, ONE, ONE), 0); + +         i915_emit_arith(p, +                         A0_DP4, +                         get_result_vector(p, &inst->FullDstRegisters[0]), +                         A0_DEST_CHANNEL_Y, 0, +                         swizzle(tmp1, W, Z, Y, X), +                         i915_emit_const4fv(p, sin_constants), 0); +      } + +      if (writemask & TGSI_WRITEMASK_X) { +         i915_emit_arith(p, +                         A0_MUL, +                         tmp, A0_DEST_CHANNEL_XYZ, 0, +                         swizzle(tmp, X, X, Z, ONE), +                         swizzle(tmp, Z, ONE, ONE, ONE), 0); + +         i915_emit_arith(p, +                         A0_DP4, +                         get_result_vector(p, &inst->FullDstRegisters[0]), +                         A0_DEST_CHANNEL_X, 0, +                         swizzle(tmp, ONE, Z, Y, X), +                         i915_emit_const4fv(p, cos_constants), 0); +      } +      break; + +   case TGSI_OPCODE_SGE: +      EMIT_2ARG_ARITH(A0_SGE); +      break; + +   case TGSI_OPCODE_SIN: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      tmp = i915_get_utemp(p); + +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_X, 0, +                      src0, i915_emit_const1f(p, 1.0 / (M_PI * 2)), 0); + +      i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); + +      /* By choosing different taylor constants, could get rid of this mul: +       */ +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_X, 0, +                      tmp, i915_emit_const1f(p, (M_PI * 2)), 0); + +      /*  +       * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1 +       * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x +       * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x +       * result = DP4 t1.wzyx, sin_constants +       */ +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_XY, 0, +                      swizzle(tmp, X, X, ONE, ONE), +                      swizzle(tmp, X, ONE, ONE, ONE), 0); + +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_ALL, 0, +                      swizzle(tmp, X, Y, X, Y), +                      swizzle(tmp, X, X, ONE, ONE), 0); + +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_ALL, 0, +                      swizzle(tmp, X, Y, Y, W), +                      swizzle(tmp, X, Z, ONE, ONE), 0); + +      i915_emit_arith(p, +                      A0_DP4, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      get_result_flags(inst), 0, +                      swizzle(tmp, W, Z, Y, X), +                      i915_emit_const4fv(p, sin_constants), 0); +      break; + +   case TGSI_OPCODE_SLT: +      EMIT_2ARG_ARITH(A0_SLT); +      break; + +   case TGSI_OPCODE_SUB: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      src1 = src_vector(p, &inst->FullSrcRegisters[1]); + +      i915_emit_arith(p, +                      A0_ADD, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      get_result_flags(inst), 0, +                      src0, negate(src1, 1, 1, 1, 1), 0); +      break; + +   case TGSI_OPCODE_TEX: +      emit_tex(p, inst, T0_TEXLD); +      break; + +   case TGSI_OPCODE_TXB: +      emit_tex(p, inst, T0_TEXLDB); +      break; + +   case TGSI_OPCODE_TXP: +      emit_tex(p, inst, T0_TEXLDP); +      break; + +   case TGSI_OPCODE_XPD: +      /* Cross product: +       *      result.x = src0.y * src1.z - src0.z * src1.y; +       *      result.y = src0.z * src1.x - src0.x * src1.z; +       *      result.z = src0.x * src1.y - src0.y * src1.x; +       *      result.w = undef; +       */ +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      src1 = src_vector(p, &inst->FullSrcRegisters[1]); +      tmp = i915_get_utemp(p); + +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_ALL, 0, +                      swizzle(src0, Z, X, Y, ONE), +                      swizzle(src1, Y, Z, X, ONE), 0); + +      i915_emit_arith(p, +                      A0_MAD, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      get_result_flags(inst), 0, +                      swizzle(src0, Y, Z, X, ONE), +                      swizzle(src1, Z, X, Y, ONE), +                      negate(tmp, 1, 1, 1, 0)); +      break; + +   default: +      i915_program_error(p, "bad opcode"); +      return; +   } + +   i915_release_utemps(p); +} + + +/** + * Translate TGSI fragment shader into i915 hardware instructions. + * + * Possible concerns: + * + * SIN, COS -- could use another taylor step? + * LIT      -- results seem a little different to sw mesa + * LOG      -- different to mesa on negative numbers, but this is conformant. + *  + * Parse failures -- Mesa doesn't currently give a good indication + * internally whether a particular program string parsed or not.  This + * can lead to confusion -- hopefully we cope with it ok now. + */ +void +i915_translate_program(struct i915_fp_compile *p, +                       const struct tgsi_token *tokens) +{ +   struct tgsi_parse_context parse; + +   tgsi_parse_init( &parse, tokens ); + +   while( !tgsi_parse_end_of_tokens( &parse ) ) { + +      tgsi_parse_token( &parse ); + +      switch( parse.FullToken.Token.Type ) { +      case TGSI_TOKEN_TYPE_DECLARATION: +         assert(0); +         break; + +      case TGSI_TOKEN_TYPE_IMMEDIATE: +         assert(0); +         break; + +      case TGSI_TOKEN_TYPE_INSTRUCTION: +         i915_translate_instruction(p, &parse.FullToken.FullInstruction); +         break; + +      default: +         assert( 0 ); +      } + +   } /* while */ + +   tgsi_parse_free (&parse); +} + + + + +/* Rather than trying to intercept and jiggle depth writes during + * emit, just move the value into its correct position at the end of + * the program: + */ +void +i915_fixup_depth_write(struct i915_fp_compile *p) +{ +   if (p->shader->outputs_written & (1<<FRAG_RESULT_DEPR)) { +      uint depth = UREG(REG_TYPE_OD, 0); + +      i915_emit_arith(p, +                      A0_MOV, +                      depth, A0_DEST_CHANNEL_W, 0, +                      swizzle(depth, X, Y, Z, Z), 0, 0); +   } +} + + + + | 
