summaryrefslogtreecommitdiff
path: root/src/mesa/pipe/i915simple
diff options
context:
space:
mode:
authorBrian <brian.paul@tungstengraphics.com>2007-08-21 16:24:38 -0600
committerBrian <brian.paul@tungstengraphics.com>2007-08-21 16:24:38 -0600
commit3af1f3b9220733f5e3a76fe38fbc397974678234 (patch)
treec72bc7a37b1cadb570c00500d6f5584b66148dc2 /src/mesa/pipe/i915simple
parentd640198b2d52c104c707522e79d53a36f708ccd0 (diff)
Initial check-in of i915 fragment program translation (from tgsi).
Diffstat (limited to 'src/mesa/pipe/i915simple')
-rw-r--r--src/mesa/pipe/i915simple/i915_fpc.c183
-rw-r--r--src/mesa/pipe/i915simple/i915_fpc.h339
-rw-r--r--src/mesa/pipe/i915simple/i915_fpc_debug.c346
-rw-r--r--src/mesa/pipe/i915simple/i915_fpc_emit.c430
-rw-r--r--src/mesa/pipe/i915simple/i915_fpc_translate.c838
5 files changed, 2136 insertions, 0 deletions
diff --git a/src/mesa/pipe/i915simple/i915_fpc.c b/src/mesa/pipe/i915simple/i915_fpc.c
new file mode 100644
index 0000000000..fd0bbbc482
--- /dev/null
+++ b/src/mesa/pipe/i915simple/i915_fpc.c
@@ -0,0 +1,183 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#if 0
+#include <strings.h>
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+#endif
+
+#include "i915_fpc.h"
+
+
+
+void
+i915_program_error(struct i915_fp_compile *p, const char *msg)
+{
+ fprintf(stderr, "i915_program_error: %s", msg);
+ p->fp->error = 1;
+}
+
+
+static struct i915_fp_compile *
+i915_init_compile(struct i915_context *i915, struct i915_fragment_program *fp)
+{
+ struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile);
+
+ p->fp = fp;
+#if 0
+ p->env_param = NULL; /*i915->intel.ctx.FragmentProgram.Parameters;*/
+#endif
+ p->constants = i915->fs.constants;
+ p->nr_tex_indirect = 1; /* correct? */
+ p->nr_tex_insn = 0;
+ p->nr_alu_insn = 0;
+ p->nr_decl_insn = 0;
+
+ memset(p->constant_flags, 0, sizeof(p->constant_flags));
+
+ p->csr = p->program;
+ p->decl = p->declarations;
+ p->decl_s = 0;
+ p->decl_t = 0;
+ p->temp_flag = 0xffff000;
+ p->utemp_flag = ~0x7;
+
+#if 0
+ p->fp->translated = 0;
+ p->fp->error = 0;
+ p->fp->nr_constants = 0;
+#endif
+ p->fp->wpos_tex = -1;
+ p->fp->nr_params = 0;
+
+ *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM;
+
+ return p;
+}
+
+/* Copy compile results to the fragment program struct and destroy the
+ * compilation context.
+ */
+static void
+i915_fini_compile(struct i915_fp_compile *p)
+{
+ uint program_size = p->csr - p->program;
+ uint decl_size = p->decl - p->declarations;
+
+ if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT)
+ i915_program_error(p, "Exceeded max nr indirect texture lookups");
+
+ if (p->nr_tex_insn > I915_MAX_TEX_INSN)
+ i915_program_error(p, "Exceeded max TEX instructions");
+
+ if (p->nr_alu_insn > I915_MAX_ALU_INSN)
+ i915_program_error(p, "Exceeded max ALU instructions");
+
+ if (p->nr_decl_insn > I915_MAX_DECL_INSN)
+ i915_program_error(p, "Exceeded max DECL instructions");
+
+ if (p->fp->error) {
+ p->fp->NumNativeInstructions = 0;
+ p->fp->NumNativeAluInstructions = 0;
+ p->fp->NumNativeTexInstructions = 0;
+ p->fp->NumNativeTexIndirections = 0;
+ return;
+ }
+ else {
+ p->fp->NumNativeInstructions = (p->nr_alu_insn +
+ p->nr_tex_insn +
+ p->nr_decl_insn);
+ p->fp->NumNativeAluInstructions = p->nr_alu_insn;
+ p->fp->NumNativeTexInstructions = p->nr_tex_insn;
+ p->fp->NumNativeTexIndirections = p->nr_tex_indirect;
+ }
+
+ p->declarations[0] |= program_size + decl_size - 2;
+
+ /* Copy compilation results to fragment program struct:
+ */
+ memcpy(p->fp->program,
+ p->declarations,
+ decl_size * sizeof(uint));
+
+ memcpy(p->fp->program + decl_size,
+ p->program,
+ program_size * sizeof(uint));
+
+ p->fp->program_size = program_size + decl_size;
+
+ /* Release the compilation struct:
+ */
+ free(p);
+}
+
+
+/**
+ * Find an unused texture coordinate slot to use for fragment WPOS.
+ * Update p->fp->wpos_tex with the result (-1 if no used texcoord slot is found).
+ */
+static void
+find_wpos_space(struct i915_fp_compile *p)
+{
+ const uint inputs = p->shader->inputs_read;
+ uint i;
+
+ p->fp->wpos_tex = -1;
+
+ if (inputs & FRAG_BIT_WPOS) {
+ for (i = 0; i < I915_TEX_UNITS; i++) {
+ if ((inputs & (FRAG_BIT_TEX0 << i)) == 0) {
+ p->fp->wpos_tex = i;
+ return;
+ }
+ }
+
+ i915_program_error(p, "No free texcoord for wpos value");
+ }
+}
+
+
+
+void i915_compile_fragment_program( struct i915_context *i915,
+ struct i915_fragment_program *fp )
+{
+ struct i915_fp_compile *p = i915_init_compile(i915, fp);
+ struct tgsi_token *tokens = i915->fs.tokens;
+
+ find_wpos_space(p);
+
+ i915_translate_program(p, tokens);
+ i915_fixup_depth_write(p);
+
+ i915_fini_compile(p);
+#if 0
+ fp->translated = 1;
+#endif
+}
diff --git a/src/mesa/pipe/i915simple/i915_fpc.h b/src/mesa/pipe/i915simple/i915_fpc.h
new file mode 100644
index 0000000000..0a8bffcd9a
--- /dev/null
+++ b/src/mesa/pipe/i915simple/i915_fpc.h
@@ -0,0 +1,339 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef I915_FPC_H
+#define I915_FPC_H
+
+#include "pipe/p_util.h"
+
+#include "i915_context.h"
+#include "i915_reg.h"
+
+
+
+#define I915_PROGRAM_SIZE 192
+#define I915_MAX_CONSTANT 32
+
+#define MAX_VARYING 8
+
+enum
+{
+ FRAG_ATTRIB_WPOS = 0,
+ FRAG_ATTRIB_COL0 = 1,
+ FRAG_ATTRIB_COL1 = 2,
+ FRAG_ATTRIB_FOGC = 3,
+ FRAG_ATTRIB_TEX0 = 4,
+ FRAG_ATTRIB_TEX1 = 5,
+ FRAG_ATTRIB_TEX2 = 6,
+ FRAG_ATTRIB_TEX3 = 7,
+ FRAG_ATTRIB_TEX4 = 8,
+ FRAG_ATTRIB_TEX5 = 9,
+ FRAG_ATTRIB_TEX6 = 10,
+ FRAG_ATTRIB_TEX7 = 11,
+ FRAG_ATTRIB_VAR0 = 12, /**< shader varying */
+ FRAG_ATTRIB_MAX = (FRAG_ATTRIB_VAR0 + MAX_VARYING)
+};
+
+/**
+ * Bitflags for fragment program input attributes.
+ */
+/*@{*/
+#define FRAG_BIT_WPOS (1 << FRAG_ATTRIB_WPOS)
+#define FRAG_BIT_COL0 (1 << FRAG_ATTRIB_COL0)
+#define FRAG_BIT_COL1 (1 << FRAG_ATTRIB_COL1)
+#define FRAG_BIT_FOGC (1 << FRAG_ATTRIB_FOGC)
+#define FRAG_BIT_TEX0 (1 << FRAG_ATTRIB_TEX0)
+#define FRAG_BIT_TEX1 (1 << FRAG_ATTRIB_TEX1)
+#define FRAG_BIT_TEX2 (1 << FRAG_ATTRIB_TEX2)
+#define FRAG_BIT_TEX3 (1 << FRAG_ATTRIB_TEX3)
+#define FRAG_BIT_TEX4 (1 << FRAG_ATTRIB_TEX4)
+#define FRAG_BIT_TEX5 (1 << FRAG_ATTRIB_TEX5)
+#define FRAG_BIT_TEX6 (1 << FRAG_ATTRIB_TEX6)
+#define FRAG_BIT_TEX7 (1 << FRAG_ATTRIB_TEX7)
+#define FRAG_BIT_VAR0 (1 << FRAG_ATTRIB_VAR0)
+
+#define MAX_DRAW_BUFFERS 4
+
+enum
+{
+ FRAG_RESULT_COLR = 0,
+ FRAG_RESULT_COLH = 1,
+ FRAG_RESULT_DEPR = 2,
+ FRAG_RESULT_DATA0 = 3,
+ FRAG_RESULT_MAX = (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS)
+};
+
+
+
+#if 1 /*XXX temp */
+/* Hardware version of a parsed fragment program. "Derived" from the
+ * mesa fragment_program struct.
+ */
+struct i915_fragment_program
+{
+#if 0
+ struct gl_fragment_program Base;
+#else
+ uint NumNativeInstructions;
+ uint NumNativeAluInstructions;
+ uint NumNativeTexInstructions;
+ uint NumNativeTexIndirections;
+#endif
+
+ boolean error; /**< Set if i915_program_error() is called */
+#if 0
+ uint id; /**< String id */
+ boolean translated;
+#endif
+
+ /* Decls + instructions:
+ */
+ uint program[I915_PROGRAM_SIZE];
+ uint program_size;
+
+#if 0
+ /* Constant buffer:
+ */
+ float constant[I915_MAX_CONSTANT][4];
+ uint nr_constants;
+#endif
+
+ /* Some of which are parameters:
+ */
+ struct
+ {
+ uint reg; /* Hardware constant idx */
+ const float *values; /* Pointer to tracked values */
+ } param[I915_MAX_CONSTANT];
+ uint nr_params;
+
+#if 0
+ uint param_state;
+#endif
+ uint wpos_tex;
+};
+#endif
+
+
+/***********************************************************************
+ * Public interface for the compiler
+ */
+
+void i915_compile_fragment_program( struct i915_context *i915,
+ struct i915_fragment_program *fp );
+
+
+/***********************************************************************
+ * Private details of the compiler
+ */
+
+struct i915_fp_compile {
+ struct i915_fragment_program *fp;
+
+ struct pipe_shader_state *shader;
+
+ uint declarations[I915_PROGRAM_SIZE];
+ uint program[I915_PROGRAM_SIZE];
+
+ uint constant_flags[I915_MAX_CONSTANT];
+
+ struct pipe_constant_buffer *constants;
+
+ uint *csr; /* Cursor, points into program.
+ */
+
+ uint *decl; /* Cursor, points into declarations.
+ */
+
+ uint decl_s; /* flags for which s regs need to be decl'd */
+ uint decl_t; /* flags for which t regs need to be decl'd */
+
+ uint temp_flag; /* Tracks temporary regs which are in
+ * use.
+ */
+
+ uint utemp_flag; /* Tracks TYPE_U temporary regs which are in
+ * use.
+ */
+
+ uint nr_tex_indirect;
+ uint nr_tex_insn;
+ uint nr_alu_insn;
+ uint nr_decl_insn;
+
+#if 0
+ float (*env_param)[4];
+#endif
+};
+
+
+/* Having zero and one in here makes the definition of swizzle a lot
+ * easier.
+ */
+#define UREG_TYPE_SHIFT 29
+#define UREG_NR_SHIFT 24
+#define UREG_CHANNEL_X_NEGATE_SHIFT 23
+#define UREG_CHANNEL_X_SHIFT 20
+#define UREG_CHANNEL_Y_NEGATE_SHIFT 19
+#define UREG_CHANNEL_Y_SHIFT 16
+#define UREG_CHANNEL_Z_NEGATE_SHIFT 15
+#define UREG_CHANNEL_Z_SHIFT 12
+#define UREG_CHANNEL_W_NEGATE_SHIFT 11
+#define UREG_CHANNEL_W_SHIFT 8
+#define UREG_CHANNEL_ZERO_NEGATE_MBZ 5
+#define UREG_CHANNEL_ZERO_SHIFT 4
+#define UREG_CHANNEL_ONE_NEGATE_MBZ 1
+#define UREG_CHANNEL_ONE_SHIFT 0
+
+#define UREG_BAD 0xffffffff /* not a valid ureg */
+
+#define X SRC_X
+#define Y SRC_Y
+#define Z SRC_Z
+#define W SRC_W
+#define ZERO SRC_ZERO
+#define ONE SRC_ONE
+
+/* Construct a ureg:
+ */
+#define UREG( type, nr ) (((type)<< UREG_TYPE_SHIFT) | \
+ ((nr) << UREG_NR_SHIFT) | \
+ (X << UREG_CHANNEL_X_SHIFT) | \
+ (Y << UREG_CHANNEL_Y_SHIFT) | \
+ (Z << UREG_CHANNEL_Z_SHIFT) | \
+ (W << UREG_CHANNEL_W_SHIFT) | \
+ (ZERO << UREG_CHANNEL_ZERO_SHIFT) | \
+ (ONE << UREG_CHANNEL_ONE_SHIFT))
+
+#define GET_CHANNEL_SRC( reg, channel ) ((reg<<(channel*4)) & (0xf<<20))
+#define CHANNEL_SRC( src, channel ) (src>>(channel*4))
+
+#define GET_UREG_TYPE(reg) (((reg)>>UREG_TYPE_SHIFT)&REG_TYPE_MASK)
+#define GET_UREG_NR(reg) (((reg)>>UREG_NR_SHIFT)&REG_NR_MASK)
+
+
+
+#define UREG_XYZW_CHANNEL_MASK 0x00ffff00
+
+/* One neat thing about the UREG representation:
+ */
+static INLINE int
+swizzle(int reg, int x, int y, int z, int w)
+{
+ assert(x < 4);
+ assert(y < 4);
+ assert(z < 4);
+ assert(w < 4);
+ return ((reg & ~UREG_XYZW_CHANNEL_MASK) |
+ CHANNEL_SRC(GET_CHANNEL_SRC(reg, x), 0) |
+ CHANNEL_SRC(GET_CHANNEL_SRC(reg, y), 1) |
+ CHANNEL_SRC(GET_CHANNEL_SRC(reg, z), 2) |
+ CHANNEL_SRC(GET_CHANNEL_SRC(reg, w), 3));
+}
+
+/* Another neat thing about the UREG representation:
+ */
+static INLINE int
+negate(int reg, int x, int y, int z, int w)
+{
+ return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) |
+ ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) |
+ ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) |
+ ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT));
+}
+
+
+extern uint i915_get_temp(struct i915_fp_compile *p);
+extern uint i915_get_utemp(struct i915_fp_compile *p);
+extern void i915_release_utemps(struct i915_fp_compile *p);
+
+
+extern uint i915_emit_texld(struct i915_fp_compile *p,
+ uint dest,
+ uint destmask,
+ uint sampler, uint coord, uint op);
+
+extern uint i915_emit_arith(struct i915_fp_compile *p,
+ uint op,
+ uint dest,
+ uint mask,
+ uint saturate,
+ uint src0, uint src1, uint src2);
+
+extern uint i915_emit_decl(struct i915_fp_compile *p,
+ uint type, uint nr, uint d0_flags);
+
+
+extern uint i915_emit_const1f(struct i915_fp_compile *p, float c0);
+
+extern uint i915_emit_const2f(struct i915_fp_compile *p,
+ float c0, float c1);
+
+extern uint i915_emit_const4fv(struct i915_fp_compile *p,
+ const float * c);
+
+extern uint i915_emit_const4f(struct i915_fp_compile *p,
+ float c0, float c1,
+ float c2, float c3);
+
+
+#if 0
+extern uint i915_emit_param4fv(struct i915_fp_compile *p,
+ const float * values);
+#endif
+
+
+
+/*======================================================================
+ * i915_fpc_debug.c
+ */
+extern void i915_program_error(struct i915_fp_compile *p,
+ const char *msg);
+
+
+/*======================================================================
+ * i915_fpc_debug.c
+ */
+extern void i915_disassemble_program(const uint * program, uint sz);
+
+#if 0
+extern void i915_print_mesa_instructions( const struct prog_instruction *insn,
+ uint nr );
+#endif
+
+/*======================================================================
+ * i915_fpc_translate.c
+ */
+void i915_fixup_depth_write(struct i915_fp_compile *p);
+
+extern void
+i915_translate_program(struct i915_fp_compile *p, const struct tgsi_token *token);
+
+
+
+#endif
diff --git a/src/mesa/pipe/i915simple/i915_fpc_debug.c b/src/mesa/pipe/i915simple/i915_fpc_debug.c
new file mode 100644
index 0000000000..77deab38bb
--- /dev/null
+++ b/src/mesa/pipe/i915simple/i915_fpc_debug.c
@@ -0,0 +1,346 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#if 0
+#include <stdio.h>
+#endif
+
+#include "i915_reg.h"
+#include "i915_fpc.h"
+
+#if 0
+#include "shader/program.h"
+#include "shader/prog_instruction.h"
+#include "shader/prog_print.h"
+#endif
+
+static const char *opcodes[0x20] = {
+ "NOP",
+ "ADD",
+ "MOV",
+ "MUL",
+ "MAD",
+ "DP2ADD",
+ "DP3",
+ "DP4",
+ "FRC",
+ "RCP",
+ "RSQ",
+ "EXP",
+ "LOG",
+ "CMP",
+ "MIN",
+ "MAX",
+ "FLR",
+ "MOD",
+ "TRC",
+ "SGE",
+ "SLT",
+ "TEXLD",
+ "TEXLDP",
+ "TEXLDB",
+ "TEXKILL",
+ "DCL",
+ "0x1a",
+ "0x1b",
+ "0x1c",
+ "0x1d",
+ "0x1e",
+ "0x1f",
+};
+
+
+static const int args[0x20] = {
+ 0, /* 0 nop */
+ 2, /* 1 add */
+ 1, /* 2 mov */
+ 2, /* 3 m ul */
+ 3, /* 4 mad */
+ 3, /* 5 dp2add */
+ 2, /* 6 dp3 */
+ 2, /* 7 dp4 */
+ 1, /* 8 frc */
+ 1, /* 9 rcp */
+ 1, /* a rsq */
+ 1, /* b exp */
+ 1, /* c log */
+ 3, /* d cmp */
+ 2, /* e min */
+ 2, /* f max */
+ 1, /* 10 flr */
+ 1, /* 11 mod */
+ 1, /* 12 trc */
+ 2, /* 13 sge */
+ 2, /* 14 slt */
+ 1,
+ 1,
+ 1,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+};
+
+
+static const char *regname[0x8] = {
+ "R",
+ "T",
+ "CONST",
+ "S",
+ "OC",
+ "OD",
+ "U",
+ "UNKNOWN",
+};
+
+static void
+print_reg_type_nr(uint type, uint nr)
+{
+ switch (type) {
+ case REG_TYPE_T:
+ switch (nr) {
+ case T_DIFFUSE:
+ printf("T_DIFFUSE");
+ return;
+ case T_SPECULAR:
+ printf("T_SPECULAR");
+ return;
+ case T_FOG_W:
+ printf("T_FOG_W");
+ return;
+ default:
+ printf("T_TEX%d", nr);
+ return;
+ }
+ case REG_TYPE_OC:
+ if (nr == 0) {
+ printf("oC");
+ return;
+ }
+ break;
+ case REG_TYPE_OD:
+ if (nr == 0) {
+ printf("oD");
+ return;
+ }
+ break;
+ default:
+ break;
+ }
+
+ printf("%s[%d]", regname[type], nr);
+}
+
+#define REG_SWIZZLE_MASK 0x7777
+#define REG_NEGATE_MASK 0x8888
+
+#define REG_SWIZZLE_XYZW ((SRC_X << A2_SRC2_CHANNEL_X_SHIFT) | \
+ (SRC_Y << A2_SRC2_CHANNEL_Y_SHIFT) | \
+ (SRC_Z << A2_SRC2_CHANNEL_Z_SHIFT) | \
+ (SRC_W << A2_SRC2_CHANNEL_W_SHIFT))
+
+
+static void
+print_reg_neg_swizzle(uint reg)
+{
+ int i;
+
+ if ((reg & REG_SWIZZLE_MASK) == REG_SWIZZLE_XYZW &&
+ (reg & REG_NEGATE_MASK) == 0)
+ return;
+
+ printf(".");
+
+ for (i = 3; i >= 0; i--) {
+ if (reg & (1 << ((i * 4) + 3)))
+ printf("-");
+
+ switch ((reg >> (i * 4)) & 0x7) {
+ case 0:
+ printf("x");
+ break;
+ case 1:
+ printf("y");
+ break;
+ case 2:
+ printf("z");
+ break;
+ case 3:
+ printf("w");
+ break;
+ case 4:
+ printf("0");
+ break;
+ case 5:
+ printf("1");
+ break;
+ default:
+ printf("?");
+ break;
+ }
+ }
+}
+
+
+static void
+print_src_reg(uint dword)
+{
+ uint nr = (dword >> A2_SRC2_NR_SHIFT) & REG_NR_MASK;
+ uint type = (dword >> A2_SRC2_TYPE_SHIFT) & REG_TYPE_MASK;
+ print_reg_type_nr(type, nr);
+ print_reg_neg_swizzle(dword);
+}
+
+
+static void
+print_dest_reg(uint dword)
+{
+ uint nr = (dword >> A0_DEST_NR_SHIFT) & REG_NR_MASK;
+ uint type = (dword >> A0_DEST_TYPE_SHIFT) & REG_TYPE_MASK;
+ print_reg_type_nr(type, nr);
+ if ((dword & A0_DEST_CHANNEL_ALL) == A0_DEST_CHANNEL_ALL)
+ return;
+ printf(".");
+ if (dword & A0_DEST_CHANNEL_X)
+ printf("x");
+ if (dword & A0_DEST_CHANNEL_Y)
+ printf("y");
+ if (dword & A0_DEST_CHANNEL_Z)
+ printf("z");
+ if (dword & A0_DEST_CHANNEL_W)
+ printf("w");
+}
+
+
+#define GET_SRC0_REG(r0, r1) ((r0<<14)|(r1>>A1_SRC0_CHANNEL_W_SHIFT))
+#define GET_SRC1_REG(r0, r1) ((r0<<8)|(r1>>A2_SRC1_CHANNEL_W_SHIFT))
+#define GET_SRC2_REG(r) (r)
+
+
+static void
+print_arith_op(uint opcode, const uint * program)
+{
+ if (opcode != A0_NOP) {
+ print_dest_reg(program[0]);
+ if (program[0] & A0_DEST_SATURATE)
+ printf(" = SATURATE ");
+ else
+ printf(" = ");
+ }
+
+ printf("%s ", opcodes[opcode]);
+
+ print_src_reg(GET_SRC0_REG(program[0], program[1]));
+ if (args[opcode] == 1) {
+ printf("\n");
+ return;
+ }
+
+ printf(", ");
+ print_src_reg(GET_SRC1_REG(program[1], program[2]));
+ if (args[opcode] == 2) {
+ printf("\n");
+ return;
+ }
+
+ printf(", ");
+ print_src_reg(GET_SRC2_REG(program[2]));
+ printf("\n");
+ return;
+}
+
+
+static void
+print_tex_op(uint opcode, const uint * program)
+{
+ print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL);
+ printf(" = ");
+
+ printf("%s ", opcodes[opcode]);
+
+ printf("S[%d],", program[0] & T0_SAMPLER_NR_MASK);
+
+ print_reg_type_nr((program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) &
+ REG_TYPE_MASK,
+ (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK);
+ printf("\n");
+}
+
+static void
+print_dcl_op(uint opcode, const uint * program)
+{
+ printf("%s ", opcodes[opcode]);
+ print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL);
+ printf("\n");
+}
+
+
+void
+i915_disassemble_program(const uint * program, uint sz)
+{
+ uint size = program[0] & 0x1ff;
+ int i;
+
+ printf("\t\tBEGIN\n");
+
+ assert(size + 2 == sz);
+
+ program++;
+ for (i = 1; i < sz; i += 3, program += 3) {
+ uint opcode = program[0] & (0x1f << 24);
+
+ printf("\t\t");
+
+ if ((int) opcode >= A0_NOP && opcode <= A0_SLT)
+ print_arith_op(opcode >> 24, program);
+ else if (opcode >= T0_TEXLD && opcode <= T0_TEXKILL)
+ print_tex_op(opcode >> 24, program);
+ else if (opcode == D0_DCL)
+ print_dcl_op(opcode >> 24, program);
+ else
+ printf("Unknown opcode 0x%x\n", opcode);
+ }
+
+ printf("\t\tEND\n\n");
+}
+
+
+#if 0
+void i915_print_mesa_instructions( const struct prog_instruction *insn,
+ uint nr )
+{
+ uint i;
+ for (i = 0; i < nr; i++, insn++) {
+ printf("%3d: ", i);
+ print_instruction(insn);
+ }
+}
+#endif
diff --git a/src/mesa/pipe/i915simple/i915_fpc_emit.c b/src/mesa/pipe/i915simple/i915_fpc_emit.c
new file mode 100644
index 0000000000..7259bb503d
--- /dev/null
+++ b/src/mesa/pipe/i915simple/i915_fpc_emit.c
@@ -0,0 +1,430 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#if 0
+#include <strings.h>
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+#endif
+
+#include "i915_reg.h"
+#include "i915_context.h"
+#include "i915_fpc.h"
+
+
+#define A0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT)
+#define D0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT)
+#define T0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT)
+#define A0_SRC0( reg ) (((reg)&UREG_MASK)>>UREG_A0_SRC0_SHIFT_LEFT)
+#define A1_SRC0( reg ) (((reg)&UREG_MASK)<<UREG_A1_SRC0_SHIFT_RIGHT)
+#define A1_SRC1( reg ) (((reg)&UREG_MASK)>>UREG_A1_SRC1_SHIFT_LEFT)
+#define A2_SRC1( reg ) (((reg)&UREG_MASK)<<UREG_A2_SRC1_SHIFT_RIGHT)
+#define A2_SRC2( reg ) (((reg)&UREG_MASK)>>UREG_A2_SRC2_SHIFT_LEFT)
+
+/* These are special, and don't have swizzle/negate bits.
+ */
+#define T0_SAMPLER( reg ) (GET_UREG_NR(reg)<<T0_SAMPLER_NR_SHIFT)
+#define T1_ADDRESS_REG( reg ) ((GET_UREG_NR(reg)<<T1_ADDRESS_REG_NR_SHIFT) | \
+ (GET_UREG_TYPE(reg)<<T1_ADDRESS_REG_TYPE_SHIFT))
+
+
+/* Macros for translating UREG's into the various register fields used
+ * by the I915 programmable unit.
+ */
+#define UREG_A0_DEST_SHIFT_LEFT (UREG_TYPE_SHIFT - A0_DEST_TYPE_SHIFT)
+#define UREG_A0_SRC0_SHIFT_LEFT (UREG_TYPE_SHIFT - A0_SRC0_TYPE_SHIFT)
+#define UREG_A1_SRC0_SHIFT_RIGHT (A1_SRC0_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT)
+#define UREG_A1_SRC1_SHIFT_LEFT (UREG_TYPE_SHIFT - A1_SRC1_TYPE_SHIFT)
+#define UREG_A2_SRC1_SHIFT_RIGHT (A2_SRC1_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT)
+#define UREG_A2_SRC2_SHIFT_LEFT (UREG_TYPE_SHIFT - A2_SRC2_TYPE_SHIFT)
+
+#define UREG_MASK 0xffffff00
+#define UREG_TYPE_NR_MASK ((REG_TYPE_MASK << UREG_TYPE_SHIFT) | \
+ (REG_NR_MASK << UREG_NR_SHIFT))
+
+
+#define I915_CONSTFLAG_PARAM 0x1f
+
+uint
+i915_get_temp(struct i915_fp_compile *p)
+{
+ int bit = ffs(~p->temp_flag);
+ if (!bit) {
+ i915_program_error(p, "i915_get_temp: out of temporaries\n");
+ return 0;
+ }
+
+ p->temp_flag |= 1 << (bit - 1);
+ return UREG(REG_TYPE_R, (bit - 1));
+}
+
+
+uint
+i915_get_utemp(struct i915_fp_compile * p)
+{
+ int bit = ffs(~p->utemp_flag);
+ if (!bit) {
+ i915_program_error(p, "i915_get_utemp: out of temporaries\n");
+ return 0;
+ }
+
+ p->utemp_flag |= 1 << (bit - 1);
+ return UREG(REG_TYPE_U, (bit - 1));
+}
+
+void
+i915_release_utemps(struct i915_fp_compile *p)
+{
+ p->utemp_flag = ~0x7;
+}
+
+
+uint
+i915_emit_decl(struct i915_fp_compile *p,
+ uint type, uint nr, uint d0_flags)
+{
+ uint reg = UREG(type, nr);
+
+ if (type == REG_TYPE_T) {
+ if (p->decl_t & (1 << nr))
+ return reg;
+
+ p->decl_t |= (1 << nr);
+ }
+ else if (type == REG_TYPE_S) {
+ if (p->decl_s & (1 << nr))
+ return reg;
+
+ p->decl_s |= (1 << nr);
+ }
+ else
+ return reg;
+
+ *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags);
+ *(p->decl++) = D1_MBZ;
+ *(p->decl++) = D2_MBZ;
+
+ p->nr_decl_insn++;
+ return reg;
+}
+
+uint
+i915_emit_arith(struct i915_fp_compile * p,
+ uint op,
+ uint dest,
+ uint mask,
+ uint saturate, uint src0, uint src1, uint src2)
+{
+ uint c[3];
+ uint nr_const = 0;
+
+ assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST);
+ dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest));
+ assert(dest);
+
+ if (GET_UREG_TYPE(src0) == REG_TYPE_CONST)
+ c[nr_const++] = 0;
+ if (GET_UREG_TYPE(src1) == REG_TYPE_CONST)
+ c[nr_const++] = 1;
+ if (GET_UREG_TYPE(src2) == REG_TYPE_CONST)
+ c[nr_const++] = 2;
+
+ /* Recursively call this function to MOV additional const values
+ * into temporary registers. Use utemp registers for this -
+ * currently shouldn't be possible to run out, but keep an eye on
+ * this.
+ */
+ if (nr_const > 1) {
+ uint s[3], first, i, old_utemp_flag;
+
+ s[0] = src0;
+ s[1] = src1;
+ s[2] = src2;
+ old_utemp_flag = p->utemp_flag;
+
+ first = GET_UREG_NR(s[c[0]]);
+ for (i = 1; i < nr_const; i++) {
+ if (GET_UREG_NR(s[c[i]]) != first) {
+ uint tmp = i915_get_utemp(p);
+
+ i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0,
+ s[c[i]], 0, 0);
+ s[c[i]] = tmp;
+ }
+ }
+
+ src0 = s[0];
+ src1 = s[1];
+ src2 = s[2];
+ p->utemp_flag = old_utemp_flag; /* restore */
+ }
+
+ *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0));
+ *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1));
+ *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2));
+
+ p->nr_alu_insn++;
+ return dest;
+}
+
+uint i915_emit_texld( struct i915_fp_compile *p,
+ uint dest,
+ uint destmask,
+ uint sampler,
+ uint coord,
+ uint op )
+{
+ if (coord != UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord))) {
+ /* No real way to work around this in the general case - need to
+ * allocate and declare a new temporary register (a utemp won't
+ * do). Will fallback for now.
+ */
+ i915_program_error(p, "Can't (yet) swizzle TEX arguments");
+ return 0;
+ }
+
+ /* Don't worry about saturate as we only support
+ */
+ if (destmask != A0_DEST_CHANNEL_ALL) {
+ uint tmp = i915_get_utemp(p);
+ i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, op );
+ i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 );
+ return dest;
+ }
+ else {
+ assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST);
+ assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)));
+
+ if (GET_UREG_TYPE(coord) != REG_TYPE_T) {
+ p->nr_tex_indirect++;
+ }
+
+ *(p->csr++) = (op |
+ T0_DEST( dest ) |
+ T0_SAMPLER( sampler ));
+
+ *(p->csr++) = T1_ADDRESS_REG( coord );
+ *(p->csr++) = T2_MBZ;
+
+ p->nr_tex_insn++;
+ return dest;
+ }
+}
+
+
+uint
+i915_emit_const1f(struct i915_fp_compile * p, float c0)
+{
+ int reg, idx;
+
+ if (c0 == 0.0)
+ return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO);
+ if (c0 == 1.0)
+ return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE);
+
+ for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
+ if (p->constant_flags[reg] == I915_CONSTFLAG_PARAM)
+ continue;
+ for (idx = 0; idx < 4; idx++) {
+#if 0
+ if (!(p->constant_flags[reg] & (1 << idx)) ||
+ p->fp->constant[reg][idx] == c0) {
+ p->fp->constant[reg][idx] = c0;
+ p->constant_flags[reg] |= 1 << idx;
+ if (reg + 1 > p->fp->nr_constants)
+ p->fp->nr_constants = reg + 1;
+ return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE);
+ }
+#else
+ if (!(p->constant_flags[reg] & (1 << idx)) ||
+ p->constants->constant[reg][idx] == c0) {
+ p->constants->constant[reg][idx] = c0;
+ p->constant_flags[reg] |= 1 << idx;
+ if (reg + 1 > p->constants->nr_constants)
+ p->constants->nr_constants = reg + 1;
+ return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE);
+ }
+#endif
+ }
+ }
+
+ i915_program_error(p, "i915_emit_const1f: out of constants\n");
+ return 0;
+}
+
+uint
+i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1)
+{
+ int reg, idx;
+
+ if (c0 == 0.0)
+ return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W);
+ if (c0 == 1.0)
+ return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W);
+
+ if (c1 == 0.0)
+ return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W);
+ if (c1 == 1.0)
+ return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W);
+
+ for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
+ if (p->constant_flags[reg] == 0xf ||
+ p->constant_flags[reg] == I915_CONSTFLAG_PARAM)
+ continue;
+ for (idx = 0; idx < 3; idx++) {
+ if (!(p->constant_flags[reg] & (3 << idx))) {
+#if 0
+ p->fp->constant[reg][idx] = c0;
+ p->fp->constant[reg][idx + 1] = c1;
+ p->constant_flags[reg] |= 3 << idx;
+ if (reg + 1 > p->fp->nr_constants)
+ p->fp->nr_constants = reg + 1;
+ return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO,
+ ONE);
+#else
+ p->constants->constant[reg][idx + 0] = c0;
+ p->constants->constant[reg][idx + 1] = c1;
+ p->constant_flags[reg] |= 3 << idx;
+ if (reg + 1 > p->constants->nr_constants)
+ p->constants->nr_constants = reg + 1;
+ return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO,
+ ONE);
+#endif
+ }
+ }
+ }
+
+ i915_program_error(p, "i915_emit_const2f: out of constants\n");
+ return 0;
+}
+
+
+
+uint
+i915_emit_const4f(struct i915_fp_compile * p,
+ float c0, float c1, float c2, float c3)
+{
+ int reg;
+
+ for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
+ if (p->constant_flags[reg] == 0xf &&
+#if 0
+ p->fp->constant[reg][0] == c0 &&
+ p->fp->constant[reg][1] == c1 &&
+ p->fp->constant[reg][2] == c2 &&
+ p->fp->constant[reg][3] == c3
+#else
+ p->constants->constant[reg][0] == c0 &&
+ p->constants->constant[reg][1] == c1 &&
+ p->constants->constant[reg][2] == c2 &&
+ p->constants->constant[reg][3] == c3
+#endif
+ ) {
+ return UREG(REG_TYPE_CONST, reg);
+ }
+ else if (p->constant_flags[reg] == 0) {
+#if 0
+ p->fp->constant[reg][0] = c0;
+ p->fp->constant[reg][1] = c1;
+ p->fp->constant[reg][2] = c2;
+ p->fp->constant[reg][3] = c3;
+#else
+ p->constants->constant[reg][0] = c0;
+ p->constants->constant[reg][1] = c1;
+ p->constants->constant[reg][2] = c2;
+ p->constants->constant[reg][3] = c3;
+#endif
+ p->constant_flags[reg] = 0xf;
+#if 0
+ if (reg + 1 > p->fp->nr_constants)
+ p->fp->nr_constants = reg + 1;
+#else
+ if (reg + 1 > p->constants->nr_constants)
+ p->constants->nr_constants = reg + 1;
+#endif
+ return UREG(REG_TYPE_CONST, reg);
+ }
+ }
+
+ i915_program_error(p, "i915_emit_const4f: out of constants\n");
+ return 0;
+}
+
+
+uint
+i915_emit_const4fv(struct i915_fp_compile * p, const float * c)
+{
+ return i915_emit_const4f(p, c[0], c[1], c[2], c[3]);
+}
+
+
+#if 00000/*UNUSED*/
+/* Reserve a slot in the constant file for a Mesa state parameter.
+ * These will later need to be tracked on statechanges, but that is
+ * done elsewhere.
+ */
+uint
+i915_emit_param4fv(struct i915_fp_compile * p, const float * values)
+{
+ struct i915_fragment_program *fp = p->fp;
+ int i;
+
+ for (i = 0; i < fp->nr_params; i++) {
+ if (fp->param[i].values == values)
+ return UREG(REG_TYPE_CONST, fp->param[i].reg);
+ }
+
+#if 0
+ if (fp->nr_constants == I915_MAX_CONSTANT ||
+ fp->nr_params == I915_MAX_CONSTANT) {
+#else
+ if (p->constants->nr_constants == I915_MAX_CONSTANT ||
+ fp->nr_params == I915_MAX_CONSTANT) {
+#endif
+ i915_program_error(p, "i915_emit_param4fv: out of constants\n");
+ return 0;
+ }
+
+ {
+#if 0
+ int reg = fp->nr_constants++;
+#else
+ int reg = p->constants->nr_constants++;
+#endif
+ int i = fp->nr_params++;
+
+ assert (p->constant_flags[reg] == 0);
+ p->constant_flags[reg] = I915_CONSTFLAG_PARAM;
+
+ fp->param[i].values = values;
+ fp->param[i].reg = reg;
+
+ return UREG(REG_TYPE_CONST, reg);
+ }
+}
+#endif
diff --git a/src/mesa/pipe/i915simple/i915_fpc_translate.c b/src/mesa/pipe/i915simple/i915_fpc_translate.c
new file mode 100644
index 0000000000..a034e734c3
--- /dev/null
+++ b/src/mesa/pipe/i915simple/i915_fpc_translate.c
@@ -0,0 +1,838 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "i915_reg.h"
+#include "i915_context.h"
+#include "i915_fpc.h"
+
+#include "pipe/tgsi/core/tgsi_parse.h"
+
+
+/* 1, -1/3!, 1/5!, -1/7! */
+static const float sin_constants[4] = { 1.0,
+ -1.0 / (3 * 2 * 1),
+ 1.0 / (5 * 4 * 3 * 2 * 1),
+ -1.0 / (7 * 6 * 5 * 4 * 3 * 2 * 1)
+};
+
+/* 1, -1/2!, 1/4!, -1/6! */
+static const float cos_constants[4] = { 1.0,
+ -1.0 / (2 * 1),
+ 1.0 / (4 * 3 * 2 * 1),
+ -1.0 / (6 * 5 * 4 * 3 * 2 * 1)
+};
+
+
+/**
+ * Construct a ureg for the given source register. Will emit
+ * constants, apply swizzling and negation as needed.
+ */
+static uint
+src_vector(struct i915_fp_compile *p,
+ const struct tgsi_full_src_register *source)
+{
+ const uint index = source->SrcRegister.Index;
+ uint src;
+
+ switch (source->SrcRegisterInd.File) {
+ case TGSI_FILE_TEMPORARY:
+ if (source->SrcRegister.Index >= I915_MAX_TEMPORARY) {
+ i915_program_error(p, "Exceeded max temporary reg");
+ return 0;
+ }
+ src = UREG(REG_TYPE_R, index);
+ break;
+ case TGSI_FILE_INPUT:
+ /* XXX: Packing COL1, FOGC into a single attribute works for
+ * texenv programs, but will fail for real fragment programs
+ * that use these attributes and expect them to be a full 4
+ * components wide. Could use a texcoord to pass these
+ * attributes if necessary, but that won't work in the general
+ * case.
+ *
+ * We also use a texture coordinate to pass wpos when possible.
+ */
+ switch (index) {
+ case FRAG_ATTRIB_WPOS:
+ src = i915_emit_decl(p, REG_TYPE_T, p->fp->wpos_tex, D0_CHANNEL_ALL);
+ break;
+ case FRAG_ATTRIB_COL0:
+ src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
+ break;
+ case FRAG_ATTRIB_COL1:
+ src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
+ src = swizzle(src, X, Y, Z, ONE);
+ break;
+ case FRAG_ATTRIB_FOGC:
+ src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
+ src = swizzle(src, W, W, W, W);
+ break;
+ case FRAG_ATTRIB_TEX0:
+ case FRAG_ATTRIB_TEX1:
+ case FRAG_ATTRIB_TEX2:
+ case FRAG_ATTRIB_TEX3:
+ case FRAG_ATTRIB_TEX4:
+ case FRAG_ATTRIB_TEX5:
+ case FRAG_ATTRIB_TEX6:
+ case FRAG_ATTRIB_TEX7:
+ src = i915_emit_decl(p, REG_TYPE_T,
+ T_TEX0 + (index - FRAG_ATTRIB_TEX0),
+ D0_CHANNEL_ALL);
+ break;
+
+ default:
+ i915_program_error(p, "Bad source->Index");
+ return 0;
+ }
+ break;
+
+ /* Various parameters and env values. All emitted to
+ * hardware as program constants.
+ */
+#if 0
+ case PROGRAM_LOCAL_PARAM:
+ src = i915_emit_param4fv(p, program->Base.LocalParams[index]);
+ break;
+ case PROGRAM_ENV_PARAM:
+ src = i915_emit_param4fv(p, p->env_param[index]);
+ break;
+ case PROGRAM_CONSTANT:
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_NAMED_PARAM:
+ src = i915_emit_param4fv(
+ p, program->Base.Parameters->ParameterValues[index]);
+ break;
+#else
+ case TGSI_FILE_CONSTANT:
+ src = UREG(REG_TYPE_CONST, index);
+ break;
+#endif
+
+ default:
+ i915_program_error(p, "Bad source->File");
+ return 0;
+ }
+
+ src = swizzle(src,
+ source->SrcRegister.SwizzleX,
+ source->SrcRegister.SwizzleY,
+ source->SrcRegister.SwizzleZ,
+ source->SrcRegister.SwizzleW);
+
+ assert(!source->SrcRegister.Negate);
+ assert(!source->SrcRegisterExtSwz.NegateX);
+ assert(!source->SrcRegisterExtSwz.NegateY);
+ assert(!source->SrcRegisterExtSwz.NegateZ);
+ assert(!source->SrcRegisterExtSwz.NegateW);
+ assert(!source->SrcRegisterExtMod.Absolute);
+ assert(!source->SrcRegisterExtMod.Negate);
+#if 0
+ if (source->SrcRegister.Negate)
+ negate all
+
+ if (extended source swiz per component)
+ src = negate(src,
+ source->SrcRegisterExtSwz.NegateX,
+ source->SrcRegisterExtSwz.NegateY,
+ source->SrcRegisterExtSwz.NegateZ,
+ source->SrcRegisterExtSwz.NegateW);
+ if (mod.abs)
+ absolute value
+
+ if (mod.negate)
+ another negate;
+#endif
+ return src;
+}
+
+
+static uint
+get_result_vector(struct i915_fp_compile *p,
+ const struct tgsi_full_dst_register *dest)
+{
+ switch (dest->DstRegister.File) {
+ case TGSI_FILE_OUTPUT:
+ switch (dest->DstRegister.Index) {
+ case FRAG_RESULT_COLR:
+ return UREG(REG_TYPE_OC, 0);
+ case FRAG_RESULT_DEPR:
+ return UREG(REG_TYPE_OD, 0);
+ default:
+ i915_program_error(p, "Bad inst->DstReg.Index");
+ return 0;
+ }
+ case TGSI_FILE_TEMPORARY:
+ return UREG(REG_TYPE_R, dest->DstRegister.Index);
+ default:
+ i915_program_error(p, "Bad inst->DstReg.File");
+ return 0;
+ }
+}
+
+
+/**
+ * Compute flags for saturation and writemask.
+ */
+static uint
+get_result_flags(const struct tgsi_full_instruction *inst)
+{
+ const uint writeMask
+ = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ uint flags = 0x0;
+
+ if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE)
+ flags |= A0_DEST_SATURATE;
+
+ if (writeMask & TGSI_WRITEMASK_X)
+ flags |= A0_DEST_CHANNEL_X;
+ if (writeMask & TGSI_WRITEMASK_Y)
+ flags |= A0_DEST_CHANNEL_Y;
+ if (writeMask & TGSI_WRITEMASK_Z)
+ flags |= A0_DEST_CHANNEL_Z;
+ if (writeMask & TGSI_WRITEMASK_W)
+ flags |= A0_DEST_CHANNEL_W;
+
+ return flags;
+}
+
+
+/**
+ * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token
+ */
+static uint
+translate_tex_src_target(struct i915_fp_compile *p, uint tex)
+{
+ switch (tex) {
+ case TGSI_TEXTURE_1D:
+ return D0_SAMPLE_TYPE_2D;
+ case TGSI_TEXTURE_2D:
+ return D0_SAMPLE_TYPE_2D;
+ case TGSI_TEXTURE_RECT:
+ return D0_SAMPLE_TYPE_2D;
+ case TGSI_TEXTURE_3D:
+ return D0_SAMPLE_TYPE_VOLUME;
+ case TGSI_TEXTURE_CUBE:
+ return D0_SAMPLE_TYPE_CUBE;
+ default:
+ i915_program_error(p, "TexSrc type");
+ return 0;
+ }
+}
+
+
+/**
+ * Generate texel lookup instruction.
+ */
+static void
+emit_tex(struct i915_fp_compile *p,
+ const struct tgsi_full_instruction *inst,
+ uint opcode)
+{
+ uint texture = inst->InstructionExtTexture.Texture;
+ uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+ uint tex = translate_tex_src_target( p, texture );
+ uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex);
+ uint coord = src_vector( p, &inst->FullSrcRegisters[0]);
+
+ i915_emit_texld( p,
+ get_result_vector( p, &inst->FullDstRegisters[0] ),
+ get_result_flags( inst ),
+ sampler,
+ coord,
+ opcode);
+}
+
+
+/**
+ * Generate a simple arithmetic instruction
+ * \param opcode the i915 opcode
+ * \param numArgs the number of input/src arguments
+ */
+static void
+emit_simple_arith(struct i915_fp_compile *p,
+ const struct tgsi_full_instruction *inst,
+ uint opcode, uint numArgs)
+{
+ uint arg1, arg2, arg3;
+
+ assert(numArgs <= 3);
+
+ arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->FullSrcRegisters[0] );
+ arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->FullSrcRegisters[1] );
+ arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->FullSrcRegisters[2] );
+
+ i915_emit_arith( p,
+ opcode,
+ get_result_vector( p, &inst->FullDstRegisters[0]),
+ get_result_flags( inst ), 0,
+ arg1,
+ arg2,
+ arg3 );
+}
+
+
+#define EMIT_1ARG_ARITH( OP ) emit_simple_arith(p, inst, OP, 1)
+#define EMIT_2ARG_ARITH( OP ) emit_simple_arith(p, inst, OP, 2)
+#define EMIT_3ARG_ARITH( OP ) emit_simple_arith(p, inst, OP, 3)
+
+
+
+static void
+i915_translate_instruction(struct i915_fp_compile *p,
+ const struct tgsi_full_instruction *inst)
+{
+ uint writemask;
+ uint src0, src1, src2, flags;
+ uint tmp = 0;
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ i915_emit_arith(p,
+ A0_MAX,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst), 0,
+ src0, negate(src0, 1, 1, 1, 1), 0);
+ break;
+
+ case TGSI_OPCODE_ADD:
+ EMIT_2ARG_ARITH(A0_ADD);
+ break;
+
+ case TGSI_OPCODE_CMP:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+ src2 = src_vector(p, &inst->FullSrcRegisters[2]);
+ i915_emit_arith(p, A0_CMP,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst),
+ 0, src0, src2, src1); /* NOTE: order of src2, src1 */
+ break;
+
+ case TGSI_OPCODE_COS:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ tmp = i915_get_utemp(p);
+
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_X, 0,
+ src0, i915_emit_const1f(p, 1.0 / (M_PI * 2)), 0);
+
+ i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
+
+ /* By choosing different taylor constants, could get rid of this mul:
+ */
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_X, 0,
+ tmp, i915_emit_const1f(p, (M_PI * 2)), 0);
+
+ /*
+ * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
+ * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1
+ * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1
+ * result = DP4 t0, cos_constants
+ */
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_XY, 0,
+ swizzle(tmp, X, X, ONE, ONE),
+ swizzle(tmp, X, ONE, ONE, ONE), 0);
+
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_XYZ, 0,
+ swizzle(tmp, X, Y, X, ONE),
+ swizzle(tmp, X, X, ONE, ONE), 0);
+
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_XYZ, 0,
+ swizzle(tmp, X, X, Z, ONE),
+ swizzle(tmp, Z, ONE, ONE, ONE), 0);
+
+ i915_emit_arith(p,
+ A0_DP4,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst), 0,
+ swizzle(tmp, ONE, Z, Y, X),
+ i915_emit_const4fv(p, cos_constants), 0);
+ break;
+
+ case TGSI_OPCODE_DP3:
+ EMIT_2ARG_ARITH(A0_DP3);
+ break;
+
+ case TGSI_OPCODE_DP4:
+ EMIT_2ARG_ARITH(A0_DP4);
+ break;
+
+ case TGSI_OPCODE_DPH:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+
+ i915_emit_arith(p,
+ A0_DP4,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst), 0,
+ swizzle(src0, X, Y, Z, ONE), src1, 0);
+ break;
+
+ case TGSI_OPCODE_DST:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+
+ /* result[0] = 1 * 1;
+ * result[1] = a[1] * b[1];
+ * result[2] = a[2] * 1;
+ * result[3] = 1 * b[3];
+ */
+ i915_emit_arith(p,
+ A0_MUL,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst), 0,
+ swizzle(src0, ONE, Y, Z, ONE),
+ swizzle(src1, ONE, Y, ONE, W), 0);
+ break;
+
+ case TGSI_OPCODE_EX2:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+
+ i915_emit_arith(p,
+ A0_EXP,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst), 0,
+ swizzle(src0, X, X, X, X), 0, 0);
+ break;
+
+ case TGSI_OPCODE_FLR:
+ EMIT_1ARG_ARITH(A0_FLR);
+ break;
+
+ case TGSI_OPCODE_FRC:
+ EMIT_1ARG_ARITH(A0_FRC);
+ break;
+
+ case TGSI_OPCODE_KIL:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ tmp = i915_get_utemp(p);
+
+ i915_emit_texld(p, tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */
+ 0, src0, T0_TEXKILL);
+ break;
+
+ case TGSI_OPCODE_LG2:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+
+ i915_emit_arith(p,
+ A0_LOG,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst), 0,
+ swizzle(src0, X, X, X, X), 0, 0);
+ break;
+
+ case TGSI_OPCODE_LIT:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ tmp = i915_get_utemp(p);
+
+ /* tmp = max( a.xyzw, a.00zw )
+ * XXX: Clamp tmp.w to -128..128
+ * tmp.y = log(tmp.y)
+ * tmp.y = tmp.w * tmp.y
+ * tmp.y = exp(tmp.y)
+ * result = cmp (a.11-x1, a.1x01, a.1xy1 )
+ */
+ i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
+ src0, swizzle(src0, ZERO, ZERO, Z, W), 0);
+
+ i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
+ swizzle(tmp, Y, Y, Y, Y), 0, 0);
+
+ i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
+ swizzle(tmp, ZERO, Y, ZERO, ZERO),
+ swizzle(tmp, ZERO, W, ZERO, ZERO), 0);
+
+ i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
+ swizzle(tmp, Y, Y, Y, Y), 0, 0);
+
+ i915_emit_arith(p, A0_CMP,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst), 0,
+ negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
+ swizzle(tmp, ONE, X, ZERO, ONE),
+ swizzle(tmp, ONE, X, Y, ONE));
+
+ break;
+
+ case TGSI_OPCODE_LRP:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+ src2 = src_vector(p, &inst->FullSrcRegisters[2]);
+ flags = get_result_flags(inst);
+ tmp = i915_get_utemp(p);
+
+ /* b*a + c*(1-a)
+ *
+ * b*a + c - ca
+ *
+ * tmp = b*a + c,
+ * result = (-c)*a + tmp
+ */
+ i915_emit_arith(p, A0_MAD, tmp,
+ flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2);
+
+ i915_emit_arith(p, A0_MAD,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp);
+ break;
+
+ case TGSI_OPCODE_MAD:
+ EMIT_3ARG_ARITH(A0_MAD);
+ break;
+
+ case TGSI_OPCODE_MAX:
+ EMIT_2ARG_ARITH(A0_MAX);
+ break;
+
+ case TGSI_OPCODE_MIN:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+ tmp = i915_get_utemp(p);
+ flags = get_result_flags(inst);
+
+ i915_emit_arith(p,
+ A0_MAX,
+ tmp, flags & A0_DEST_CHANNEL_ALL, 0,
+ negate(src0, 1, 1, 1, 1),
+ negate(src1, 1, 1, 1, 1), 0);
+
+ i915_emit_arith(p,
+ A0_MOV,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0);
+ break;
+
+ case TGSI_OPCODE_MOV:
+ /* aka TGSI_OPCODE_SWZ */
+ EMIT_1ARG_ARITH(A0_MOV);
+ break;
+
+ case TGSI_OPCODE_MUL:
+ EMIT_2ARG_ARITH(A0_MUL);
+ break;
+
+ case TGSI_OPCODE_POW:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+ tmp = i915_get_utemp(p);
+ flags = get_result_flags(inst);
+
+ /* XXX: masking on intermediate values, here and elsewhere.
+ */
+ i915_emit_arith(p,
+ A0_LOG,
+ tmp, A0_DEST_CHANNEL_X, 0,
+ swizzle(src0, X, X, X, X), 0, 0);
+
+ i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
+
+ i915_emit_arith(p,
+ A0_EXP,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ flags, 0, swizzle(tmp, X, X, X, X), 0, 0);
+ break;
+
+ case TGSI_OPCODE_RCP:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+
+ i915_emit_arith(p,
+ A0_RCP,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst), 0,
+ swizzle(src0, X, X, X, X), 0, 0);
+ break;
+
+ case TGSI_OPCODE_RSQ:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+
+ i915_emit_arith(p,
+ A0_RSQ,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst), 0,
+ swizzle(src0, X, X, X, X), 0, 0);
+ break;
+
+ case TGSI_OPCODE_SCS:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ tmp = i915_get_utemp(p);
+
+ /*
+ * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
+ * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
+ * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x
+ * scs.x = DP4 t1, sin_constants
+ * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1
+ * scs.y = DP4 t1, cos_constants
+ */
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_XY, 0,
+ swizzle(src0, X, X, ONE, ONE),
+ swizzle(src0, X, ONE, ONE, ONE), 0);
+
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_ALL, 0,
+ swizzle(tmp, X, Y, X, Y),
+ swizzle(tmp, X, X, ONE, ONE), 0);
+
+ writemask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+
+ if (writemask & TGSI_WRITEMASK_Y) {
+ uint tmp1;
+
+ if (writemask & TGSI_WRITEMASK_X)
+ tmp1 = i915_get_utemp(p);
+ else
+ tmp1 = tmp;
+
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp1, A0_DEST_CHANNEL_ALL, 0,
+ swizzle(tmp, X, Y, Y, W),
+ swizzle(tmp, X, Z, ONE, ONE), 0);
+
+ i915_emit_arith(p,
+ A0_DP4,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ A0_DEST_CHANNEL_Y, 0,
+ swizzle(tmp1, W, Z, Y, X),
+ i915_emit_const4fv(p, sin_constants), 0);
+ }
+
+ if (writemask & TGSI_WRITEMASK_X) {
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_XYZ, 0,
+ swizzle(tmp, X, X, Z, ONE),
+ swizzle(tmp, Z, ONE, ONE, ONE), 0);
+
+ i915_emit_arith(p,
+ A0_DP4,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ A0_DEST_CHANNEL_X, 0,
+ swizzle(tmp, ONE, Z, Y, X),
+ i915_emit_const4fv(p, cos_constants), 0);
+ }
+ break;
+
+ case TGSI_OPCODE_SGE:
+ EMIT_2ARG_ARITH(A0_SGE);
+ break;
+
+ case TGSI_OPCODE_SIN:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ tmp = i915_get_utemp(p);
+
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_X, 0,
+ src0, i915_emit_const1f(p, 1.0 / (M_PI * 2)), 0);
+
+ i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
+
+ /* By choosing different taylor constants, could get rid of this mul:
+ */
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_X, 0,
+ tmp, i915_emit_const1f(p, (M_PI * 2)), 0);
+
+ /*
+ * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
+ * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
+ * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x
+ * result = DP4 t1.wzyx, sin_constants
+ */
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_XY, 0,
+ swizzle(tmp, X, X, ONE, ONE),
+ swizzle(tmp, X, ONE, ONE, ONE), 0);
+
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_ALL, 0,
+ swizzle(tmp, X, Y, X, Y),
+ swizzle(tmp, X, X, ONE, ONE), 0);
+
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_ALL, 0,
+ swizzle(tmp, X, Y, Y, W),
+ swizzle(tmp, X, Z, ONE, ONE), 0);
+
+ i915_emit_arith(p,
+ A0_DP4,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst), 0,
+ swizzle(tmp, W, Z, Y, X),
+ i915_emit_const4fv(p, sin_constants), 0);
+ break;
+
+ case TGSI_OPCODE_SLT:
+ EMIT_2ARG_ARITH(A0_SLT);
+ break;
+
+ case TGSI_OPCODE_SUB:
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+
+ i915_emit_arith(p,
+ A0_ADD,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst), 0,
+ src0, negate(src1, 1, 1, 1, 1), 0);
+ break;
+
+ case TGSI_OPCODE_TEX:
+ emit_tex(p, inst, T0_TEXLD);
+ break;
+
+ case TGSI_OPCODE_TXB:
+ emit_tex(p, inst, T0_TEXLDB);
+ break;
+
+ case TGSI_OPCODE_TXP:
+ emit_tex(p, inst, T0_TEXLDP);
+ break;
+
+ case TGSI_OPCODE_XPD:
+ /* Cross product:
+ * result.x = src0.y * src1.z - src0.z * src1.y;
+ * result.y = src0.z * src1.x - src0.x * src1.z;
+ * result.z = src0.x * src1.y - src0.y * src1.x;
+ * result.w = undef;
+ */
+ src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+ tmp = i915_get_utemp(p);
+
+ i915_emit_arith(p,
+ A0_MUL,
+ tmp, A0_DEST_CHANNEL_ALL, 0,
+ swizzle(src0, Z, X, Y, ONE),
+ swizzle(src1, Y, Z, X, ONE), 0);
+
+ i915_emit_arith(p,
+ A0_MAD,
+ get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_flags(inst), 0,
+ swizzle(src0, Y, Z, X, ONE),
+ swizzle(src1, Z, X, Y, ONE),
+ negate(tmp, 1, 1, 1, 0));
+ break;
+
+ default:
+ i915_program_error(p, "bad opcode");
+ return;
+ }
+
+ i915_release_utemps(p);
+}
+
+
+/**
+ * Translate TGSI fragment shader into i915 hardware instructions.
+ *
+ * Possible concerns:
+ *
+ * SIN, COS -- could use another taylor step?
+ * LIT -- results seem a little different to sw mesa
+ * LOG -- different to mesa on negative numbers, but this is conformant.
+ *
+ * Parse failures -- Mesa doesn't currently give a good indication
+ * internally whether a particular program string parsed or not. This
+ * can lead to confusion -- hopefully we cope with it ok now.
+ */
+void
+i915_translate_program(struct i915_fp_compile *p,
+ const struct tgsi_token *tokens)
+{
+ struct tgsi_parse_context parse;
+
+ tgsi_parse_init( &parse, tokens );
+
+ while( !tgsi_parse_end_of_tokens( &parse ) ) {
+
+ tgsi_parse_token( &parse );
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ assert(0);
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ assert(0);
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ i915_translate_instruction(p, &parse.FullToken.FullInstruction);
+ break;
+
+ default:
+ assert( 0 );
+ }
+
+ } /* while */
+
+ tgsi_parse_free (&parse);
+}
+
+
+
+
+/* Rather than trying to intercept and jiggle depth writes during
+ * emit, just move the value into its correct position at the end of
+ * the program:
+ */
+void
+i915_fixup_depth_write(struct i915_fp_compile *p)
+{
+ if (p->shader->outputs_written & (1<<FRAG_RESULT_DEPR)) {
+ uint depth = UREG(REG_TYPE_OD, 0);
+
+ i915_emit_arith(p,
+ A0_MOV,
+ depth, A0_DEST_CHANNEL_W, 0,
+ swizzle(depth, X, Y, Z, Z), 0, 0);
+ }
+}
+
+
+
+