summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
diff options
context:
space:
mode:
authorZack Rusin <zackr@vmware.com>2010-02-08 18:05:22 -0500
committerZack Rusin <zackr@vmware.com>2010-02-08 18:22:11 -0500
commitc61bf363937f40624a5632745630d4f2b9907082 (patch)
treebb2388269117d6783400e17957ff8b0a68dfb5b5 /src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
parent74d23546c79fe8c8d87588fa423040e3b19a8fc6 (diff)
llvmpipe: export the tgsi translation code to a common layer
the llvmpipe tgsi translation is a lot more complete than what was in gallivm so replacing the latter with the former. this is needed since the draw llvm paths will use the same code. effectively the proven llvmpipe code becomes gallivm.
Diffstat (limited to 'src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c1467
1 files changed, 1467 insertions, 0 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
new file mode 100644
index 0000000000..85e3b1bdd4
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -0,0 +1,1467 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * TGSI to LLVM IR translation -- SoA.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ *
+ * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
+ * Brian Paul, and others.
+ */
+
+#include "pipe/p_config.h"
+#include "pipe/p_shader_tokens.h"
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_exec.h"
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_logic.h"
+#include "lp_bld_swizzle.h"
+#include "lp_bld_flow.h"
+#include "lp_bld_tgsi.h"
+
+
+#define LP_MAX_TEMPS 256
+#define LP_MAX_IMMEDIATES 256
+
+
+#define FOR_EACH_CHANNEL( CHAN )\
+ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
+
+#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
+ ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
+
+#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
+ if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
+
+#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
+ FOR_EACH_CHANNEL( CHAN )\
+ IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
+
+#define CHAN_X 0
+#define CHAN_Y 1
+#define CHAN_Z 2
+#define CHAN_W 3
+
+#define QUAD_TOP_LEFT 0
+#define QUAD_TOP_RIGHT 1
+#define QUAD_BOTTOM_LEFT 2
+#define QUAD_BOTTOM_RIGHT 3
+
+
+struct lp_build_tgsi_soa_context
+{
+ struct lp_build_context base;
+
+ LLVMValueRef consts_ptr;
+ const LLVMValueRef *pos;
+ const LLVMValueRef (*inputs)[NUM_CHANNELS];
+ LLVMValueRef (*outputs)[NUM_CHANNELS];
+
+ struct lp_build_sampler_soa *sampler;
+
+ LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
+ LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
+
+ struct lp_build_mask_context *mask;
+};
+
+
+static const unsigned char
+swizzle_left[4] = {
+ QUAD_TOP_LEFT, QUAD_TOP_LEFT,
+ QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
+};
+
+static const unsigned char
+swizzle_right[4] = {
+ QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
+ QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
+};
+
+static const unsigned char
+swizzle_top[4] = {
+ QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
+ QUAD_TOP_LEFT, QUAD_TOP_RIGHT
+};
+
+static const unsigned char
+swizzle_bottom[4] = {
+ QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
+ QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
+};
+
+
+static LLVMValueRef
+emit_ddx(struct lp_build_tgsi_soa_context *bld,
+ LLVMValueRef src)
+{
+ LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
+ LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
+ return lp_build_sub(&bld->base, src_right, src_left);
+}
+
+
+static LLVMValueRef
+emit_ddy(struct lp_build_tgsi_soa_context *bld,
+ LLVMValueRef src)
+{
+ LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
+ LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
+ return lp_build_sub(&bld->base, src_top, src_bottom);
+}
+
+
+/**
+ * Register fetch.
+ */
+static LLVMValueRef
+emit_fetch(
+ struct lp_build_tgsi_soa_context *bld,
+ const struct tgsi_full_instruction *inst,
+ unsigned index,
+ const unsigned chan_index )
+{
+ const struct tgsi_full_src_register *reg = &inst->Src[index];
+ unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
+ LLVMValueRef res;
+
+ switch (swizzle) {
+ case TGSI_SWIZZLE_X:
+ case TGSI_SWIZZLE_Y:
+ case TGSI_SWIZZLE_Z:
+ case TGSI_SWIZZLE_W:
+
+ switch (reg->Register.File) {
+ case TGSI_FILE_CONSTANT: {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
+ LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
+ LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
+ res = lp_build_broadcast_scalar(&bld->base, scalar);
+ break;
+ }
+
+ case TGSI_FILE_IMMEDIATE:
+ res = bld->immediates[reg->Register.Index][swizzle];
+ assert(res);
+ break;
+
+ case TGSI_FILE_INPUT:
+ res = bld->inputs[reg->Register.Index][swizzle];
+ assert(res);
+ break;
+
+ case TGSI_FILE_TEMPORARY:
+ res = bld->temps[reg->Register.Index][swizzle];
+ if(!res)
+ return bld->base.undef;
+ break;
+
+ default:
+ assert( 0 );
+ return bld->base.undef;
+ }
+ break;
+
+ default:
+ assert( 0 );
+ return bld->base.undef;
+ }
+
+ switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
+ case TGSI_UTIL_SIGN_CLEAR:
+ res = lp_build_abs( &bld->base, res );
+ break;
+
+ case TGSI_UTIL_SIGN_SET:
+ /* TODO: Use bitwese OR for floating point */
+ res = lp_build_abs( &bld->base, res );
+ res = LLVMBuildNeg( bld->base.builder, res, "" );
+ break;
+
+ case TGSI_UTIL_SIGN_TOGGLE:
+ res = LLVMBuildNeg( bld->base.builder, res, "" );
+ break;
+
+ case TGSI_UTIL_SIGN_KEEP:
+ break;
+ }
+
+ return res;
+}
+
+
+/**
+ * Register fetch with derivatives.
+ */
+static void
+emit_fetch_deriv(
+ struct lp_build_tgsi_soa_context *bld,
+ const struct tgsi_full_instruction *inst,
+ unsigned index,
+ const unsigned chan_index,
+ LLVMValueRef *res,
+ LLVMValueRef *ddx,
+ LLVMValueRef *ddy)
+{
+ LLVMValueRef src;
+
+ src = emit_fetch(bld, inst, index, chan_index);
+
+ if(res)
+ *res = src;
+
+ /* TODO: use interpolation coeffs for inputs */
+
+ if(ddx)
+ *ddx = emit_ddx(bld, src);
+
+ if(ddy)
+ *ddy = emit_ddy(bld, src);
+}
+
+
+/**
+ * Register store.
+ */
+static void
+emit_store(
+ struct lp_build_tgsi_soa_context *bld,
+ const struct tgsi_full_instruction *inst,
+ unsigned index,
+ unsigned chan_index,
+ LLVMValueRef value)
+{
+ const struct tgsi_full_dst_register *reg = &inst->Dst[index];
+
+ switch( inst->Instruction.Saturate ) {
+ case TGSI_SAT_NONE:
+ break;
+
+ case TGSI_SAT_ZERO_ONE:
+ value = lp_build_max(&bld->base, value, bld->base.zero);
+ value = lp_build_min(&bld->base, value, bld->base.one);
+ break;
+
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0));
+ value = lp_build_min(&bld->base, value, bld->base.one);
+ break;
+
+ default:
+ assert(0);
+ }
+
+ switch( reg->Register.File ) {
+ case TGSI_FILE_OUTPUT:
+ bld->outputs[reg->Register.Index][chan_index] = value;
+ break;
+
+ case TGSI_FILE_TEMPORARY:
+ bld->temps[reg->Register.Index][chan_index] = value;
+ break;
+
+ case TGSI_FILE_ADDRESS:
+ /* FIXME */
+ assert(0);
+ break;
+
+ default:
+ assert( 0 );
+ }
+}
+
+
+/**
+ * High-level instruction translators.
+ */
+
+
+static void
+emit_tex( struct lp_build_tgsi_soa_context *bld,
+ const struct tgsi_full_instruction *inst,
+ boolean apply_lodbias,
+ boolean projected,
+ LLVMValueRef *texel)
+{
+ const uint unit = inst->Src[1].Register.Index;
+ LLVMValueRef lodbias;
+ LLVMValueRef oow = NULL;
+ LLVMValueRef coords[3];
+ unsigned num_coords;
+ unsigned i;
+
+ switch (inst->Texture.Texture) {
+ case TGSI_TEXTURE_1D:
+ num_coords = 1;
+ break;
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ num_coords = 2;
+ break;
+ case TGSI_TEXTURE_SHADOW1D:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+ num_coords = 3;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ if(apply_lodbias)
+ lodbias = emit_fetch( bld, inst, 0, 3 );
+ else
+ lodbias = bld->base.zero;
+
+ if (projected) {
+ oow = emit_fetch( bld, inst, 0, 3 );
+ oow = lp_build_rcp(&bld->base, oow);
+ }
+
+ for (i = 0; i < num_coords; i++) {
+ coords[i] = emit_fetch( bld, inst, 0, i );
+ if (projected)
+ coords[i] = lp_build_mul(&bld->base, coords[i], oow);
+ }
+ for (i = num_coords; i < 3; i++) {
+ coords[i] = bld->base.undef;
+ }
+
+ bld->sampler->emit_fetch_texel(bld->sampler,
+ bld->base.builder,
+ bld->base.type,
+ unit, num_coords, coords, lodbias,
+ texel);
+}
+
+
+static void
+emit_kil(
+ struct lp_build_tgsi_soa_context *bld,
+ const struct tgsi_full_instruction *inst )
+{
+ const struct tgsi_full_src_register *reg = &inst->Src[0];
+ LLVMValueRef terms[NUM_CHANNELS];
+ LLVMValueRef mask;
+ unsigned chan_index;
+
+ memset(&terms, 0, sizeof terms);
+
+ FOR_EACH_CHANNEL( chan_index ) {
+ unsigned swizzle;
+
+ /* Unswizzle channel */
+ swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
+
+ /* Check if the component has not been already tested. */
+ assert(swizzle < NUM_CHANNELS);
+ if( !terms[swizzle] )
+ /* TODO: change the comparison operator instead of setting the sign */
+ terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
+ }
+
+ mask = NULL;
+ FOR_EACH_CHANNEL( chan_index ) {
+ if(terms[chan_index]) {
+ LLVMValueRef chan_mask;
+
+ chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
+
+ if(mask)
+ mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
+ else
+ mask = chan_mask;
+ }
+ }
+
+ if(mask)
+ lp_build_mask_update(bld->mask, mask);
+}
+
+
+/**
+ * Check if inst src/dest regs use indirect addressing into temporary
+ * register file.
+ */
+static boolean
+indirect_temp_reference(const struct tgsi_full_instruction *inst)
+{
+ uint i;
+ for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *reg = &inst->Src[i];
+ if (reg->Register.File == TGSI_FILE_TEMPORARY &&
+ reg->Register.Indirect)
+ return TRUE;
+ }
+ for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
+ const struct tgsi_full_dst_register *reg = &inst->Dst[i];
+ if (reg->Register.File == TGSI_FILE_TEMPORARY &&
+ reg->Register.Indirect)
+ return TRUE;
+ }
+ return FALSE;
+}
+
+
+static int
+emit_instruction(
+ struct lp_build_tgsi_soa_context *bld,
+ const struct tgsi_full_instruction *inst,
+ const struct tgsi_opcode_info *info)
+{
+ unsigned chan_index;
+ LLVMValueRef src0, src1, src2;
+ LLVMValueRef tmp0, tmp1, tmp2;
+ LLVMValueRef tmp3 = NULL;
+ LLVMValueRef tmp4 = NULL;
+ LLVMValueRef tmp5 = NULL;
+ LLVMValueRef tmp6 = NULL;
+ LLVMValueRef tmp7 = NULL;
+ LLVMValueRef res;
+ LLVMValueRef dst0[NUM_CHANNELS];
+
+ /* we can't handle indirect addressing into temp register file yet */
+ if (indirect_temp_reference(inst))
+ return FALSE;
+
+ assert(info->num_dst <= 1);
+ if(info->num_dst) {
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = bld->base.undef;
+ }
+ }
+
+ switch (inst->Instruction.Opcode) {
+#if 0
+ case TGSI_OPCODE_ARL:
+ /* FIXME */
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ emit_flr(bld, 0, 0);
+ emit_f2it( bld, 0 );
+ dst0[chan_index] = tmp0;
+ }
+ break;
+#endif
+
+ case TGSI_OPCODE_MOV:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_LIT:
+ if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
+ dst0[CHAN_X] = bld->base.one;
+ }
+ if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
+ src0 = emit_fetch( bld, inst, 0, CHAN_X );
+ dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
+ }
+ if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
+ /* XMM[1] = SrcReg[0].yyyy */
+ tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
+ /* XMM[1] = max(XMM[1], 0) */
+ tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
+ /* XMM[2] = SrcReg[0].wwww */
+ tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
+ tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
+ tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
+ dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
+ }
+ if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
+ dst0[CHAN_W] = bld->base.one;
+ }
+ break;
+
+ case TGSI_OPCODE_RCP:
+ /* TGSI_OPCODE_RECIP */
+ src0 = emit_fetch( bld, inst, 0, CHAN_X );
+ res = lp_build_rcp(&bld->base, src0);
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = res;
+ }
+ break;
+
+ case TGSI_OPCODE_RSQ:
+ /* TGSI_OPCODE_RECIPSQRT */
+ src0 = emit_fetch( bld, inst, 0, CHAN_X );
+ src0 = lp_build_abs(&bld->base, src0);
+ res = lp_build_rsqrt(&bld->base, src0);
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = res;
+ }
+ break;
+
+ case TGSI_OPCODE_EXP:
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
+ IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
+ IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
+ LLVMValueRef *p_exp2_int_part = NULL;
+ LLVMValueRef *p_frac_part = NULL;
+ LLVMValueRef *p_exp2 = NULL;
+
+ src0 = emit_fetch( bld, inst, 0, CHAN_X );
+
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
+ p_exp2_int_part = &tmp0;
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
+ p_frac_part = &tmp1;
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
+ p_exp2 = &tmp2;
+
+ lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
+
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
+ dst0[CHAN_X] = tmp0;
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
+ dst0[CHAN_Y] = tmp1;
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
+ dst0[CHAN_Z] = tmp2;
+ }
+ /* dst.w = 1.0 */
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
+ dst0[CHAN_W] = bld->base.one;
+ }
+ break;
+
+ case TGSI_OPCODE_LOG:
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
+ IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
+ IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
+ LLVMValueRef *p_floor_log2 = NULL;
+ LLVMValueRef *p_exp = NULL;
+ LLVMValueRef *p_log2 = NULL;
+
+ src0 = emit_fetch( bld, inst, 0, CHAN_X );
+ src0 = lp_build_abs( &bld->base, src0 );
+
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
+ p_floor_log2 = &tmp0;
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
+ p_exp = &tmp1;
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
+ p_log2 = &tmp2;
+
+ lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
+
+ /* dst.x = floor(lg2(abs(src.x))) */
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
+ dst0[CHAN_X] = tmp0;
+ /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
+ dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
+ }
+ /* dst.z = lg2(abs(src.x)) */
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
+ dst0[CHAN_Z] = tmp2;
+ }
+ /* dst.w = 1.0 */
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
+ dst0[CHAN_W] = bld->base.one;
+ }
+ break;
+
+ case TGSI_OPCODE_MUL:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
+ }
+ break;
+
+ case TGSI_OPCODE_ADD:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
+ }
+ break;
+
+ case TGSI_OPCODE_DP3:
+ /* TGSI_OPCODE_DOT3 */
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
+ tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
+ tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
+ tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
+ tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
+ tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
+ tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = tmp0;
+ }
+ break;
+
+ case TGSI_OPCODE_DP4:
+ /* TGSI_OPCODE_DOT4 */
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
+ tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
+ tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
+ tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
+ tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
+ tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
+ tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
+ tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
+ tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = tmp0;
+ }
+ break;
+
+ case TGSI_OPCODE_DST:
+ IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
+ dst0[CHAN_X] = bld->base.one;
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
+ tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
+ dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
+ dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
+ dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
+ }
+ break;
+
+ case TGSI_OPCODE_MIN:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
+ }
+ break;
+
+ case TGSI_OPCODE_MAX:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
+ }
+ break;
+
+ case TGSI_OPCODE_SLT:
+ /* TGSI_OPCODE_SETLT */
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
+ }
+ break;
+
+ case TGSI_OPCODE_SGE:
+ /* TGSI_OPCODE_SETGE */
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
+ }
+ break;
+
+ case TGSI_OPCODE_MAD:
+ /* TGSI_OPCODE_MADD */
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ tmp1 = emit_fetch( bld, inst, 1, chan_index );
+ tmp2 = emit_fetch( bld, inst, 2, chan_index );
+ tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
+ dst0[chan_index] = tmp0;
+ }
+ break;
+
+ case TGSI_OPCODE_SUB:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ tmp1 = emit_fetch( bld, inst, 1, chan_index );
+ dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
+ }
+ break;
+
+ case TGSI_OPCODE_LRP:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ src2 = emit_fetch( bld, inst, 2, chan_index );
+ tmp0 = lp_build_sub( &bld->base, src1, src2 );
+ tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
+ dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
+ }
+ break;
+
+ case TGSI_OPCODE_CND:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ src2 = emit_fetch( bld, inst, 2, chan_index );
+ tmp1 = lp_build_const_scalar(bld->base.type, 0.5);
+ tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
+ }
+ break;
+
+ case TGSI_OPCODE_DP2A:
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
+ tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
+ tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
+ tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
+ tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
+ tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
+ }
+ break;
+
+ case TGSI_OPCODE_FRC:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ tmp0 = lp_build_floor(&bld->base, src0);
+ tmp0 = lp_build_sub(&bld->base, src0, tmp0);
+ dst0[chan_index] = tmp0;
+ }
+ break;
+
+ case TGSI_OPCODE_CLAMP:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ src2 = emit_fetch( bld, inst, 2, chan_index );
+ tmp0 = lp_build_max(&bld->base, tmp0, src1);
+ tmp0 = lp_build_min(&bld->base, tmp0, src2);
+ dst0[chan_index] = tmp0;
+ }
+ break;
+
+ case TGSI_OPCODE_FLR:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
+ }
+ break;
+
+ case TGSI_OPCODE_ROUND:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ dst0[chan_index] = lp_build_round(&bld->base, tmp0);
+ }
+ break;
+
+ case TGSI_OPCODE_EX2: {
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
+ tmp0 = lp_build_exp2( &bld->base, tmp0);
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = tmp0;
+ }
+ break;
+ }
+
+ case TGSI_OPCODE_LG2:
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
+ tmp0 = lp_build_log2( &bld->base, tmp0);
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = tmp0;
+ }
+ break;
+
+ case TGSI_OPCODE_POW:
+ src0 = emit_fetch( bld, inst, 0, CHAN_X );
+ src1 = emit_fetch( bld, inst, 1, CHAN_X );
+ res = lp_build_pow( &bld->base, src0, src1 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = res;
+ }
+ break;
+
+ case TGSI_OPCODE_XPD:
+ if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
+ IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
+ tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
+ tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
+ }
+ if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
+ IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
+ tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
+ tmp2 = tmp0;
+ tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
+ tmp5 = tmp3;
+ tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
+ tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
+ dst0[CHAN_X] = tmp2;
+ }
+ if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
+ IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
+ tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
+ tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
+ tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
+ tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
+ dst0[CHAN_Y] = tmp3;
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
+ tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
+ tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
+ tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
+ dst0[CHAN_Z] = tmp5;
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
+ dst0[CHAN_W] = bld->base.one;
+ }
+ break;
+
+ case TGSI_OPCODE_ABS:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
+ }
+ break;
+
+ case TGSI_OPCODE_RCC:
+ /* deprecated? */
+ assert(0);
+ return 0;
+
+ case TGSI_OPCODE_DPH:
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
+ tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
+ tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
+ tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
+ tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
+ tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
+ tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
+ tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = tmp0;
+ }
+ break;
+
+ case TGSI_OPCODE_COS:
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
+ tmp0 = lp_build_cos( &bld->base, tmp0 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = tmp0;
+ }
+ break;
+
+ case TGSI_OPCODE_DDX:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
+ }
+ break;
+
+ case TGSI_OPCODE_DDY:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
+ }
+ break;
+
+ case TGSI_OPCODE_KILP:
+ /* predicated kill */
+ /* FIXME */
+ return 0;
+ break;
+
+ case TGSI_OPCODE_KIL:
+ /* conditional kill */
+ emit_kil( bld, inst );
+ break;
+
+ case TGSI_OPCODE_PK2H:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_PK2US:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_PK4B:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_PK4UB:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_RFL:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_SEQ:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
+ }
+ break;
+
+ case TGSI_OPCODE_SFL:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = bld->base.zero;
+ }
+ break;
+
+ case TGSI_OPCODE_SGT:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
+ }
+ break;
+
+ case TGSI_OPCODE_SIN:
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
+ tmp0 = lp_build_sin( &bld->base, tmp0 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = tmp0;
+ }
+ break;
+
+ case TGSI_OPCODE_SLE:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
+ }
+ break;
+
+ case TGSI_OPCODE_SNE:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
+ }
+ break;
+
+ case TGSI_OPCODE_STR:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = bld->base.one;
+ }
+ break;
+
+ case TGSI_OPCODE_TEX:
+ emit_tex( bld, inst, FALSE, FALSE, dst0 );
+ break;
+
+ case TGSI_OPCODE_TXD:
+ /* FIXME */
+ return 0;
+ break;
+
+ case TGSI_OPCODE_UP2H:
+ /* deprecated */
+ assert (0);
+ return 0;
+ break;
+
+ case TGSI_OPCODE_UP2US:
+ /* deprecated */
+ assert(0);
+ return 0;
+ break;
+
+ case TGSI_OPCODE_UP4B:
+ /* deprecated */
+ assert(0);
+ return 0;
+ break;
+
+ case TGSI_OPCODE_UP4UB:
+ /* deprecated */
+ assert(0);
+ return 0;
+ break;
+
+ case TGSI_OPCODE_X2D:
+ /* deprecated? */
+ assert(0);
+ return 0;
+ break;
+
+ case TGSI_OPCODE_ARA:
+ /* deprecated */
+ assert(0);
+ return 0;
+ break;
+
+#if 0
+ case TGSI_OPCODE_ARR:
+ /* FIXME */
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ emit_rnd( bld, 0, 0 );
+ emit_f2it( bld, 0 );
+ dst0[chan_index] = tmp0;
+ }
+ break;
+#endif
+
+ case TGSI_OPCODE_BRA:
+ /* deprecated */
+ assert(0);
+ return 0;
+ break;
+
+ case TGSI_OPCODE_CAL:
+ /* FIXME */
+ return 0;
+ break;
+
+ case TGSI_OPCODE_RET:
+ /* FIXME */
+ return 0;
+ break;
+
+ case TGSI_OPCODE_END:
+ break;
+
+ case TGSI_OPCODE_SSG:
+ /* TGSI_OPCODE_SGN */
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
+ }
+ break;
+
+ case TGSI_OPCODE_CMP:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ src2 = emit_fetch( bld, inst, 2, chan_index );
+ tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
+ }
+ break;
+
+ case TGSI_OPCODE_SCS:
+ IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
+ dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
+ dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
+ dst0[CHAN_Z] = bld->base.zero;
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
+ dst0[CHAN_W] = bld->base.one;
+ }
+ break;
+
+ case TGSI_OPCODE_TXB:
+ emit_tex( bld, inst, TRUE, FALSE, dst0 );
+ break;
+
+ case TGSI_OPCODE_NRM:
+ /* fall-through */
+ case TGSI_OPCODE_NRM4:
+ /* 3 or 4-component normalization */
+ {
+ uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
+
+ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
+ IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
+ IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
+ (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
+
+ /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
+
+ /* xmm4 = src.x */
+ /* xmm0 = src.x * src.x */
+ tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
+ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
+ tmp4 = tmp0;
+ }
+ tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
+
+ /* xmm5 = src.y */
+ /* xmm0 = xmm0 + src.y * src.y */
+ tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
+ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
+ tmp5 = tmp1;
+ }
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
+
+ /* xmm6 = src.z */
+ /* xmm0 = xmm0 + src.z * src.z */
+ tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
+ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
+ tmp6 = tmp1;
+ }
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
+
+ if (dims == 4) {
+ /* xmm7 = src.w */
+ /* xmm0 = xmm0 + src.w * src.w */
+ tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
+ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
+ tmp7 = tmp1;
+ }
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
+ }
+
+ /* xmm1 = 1 / sqrt(xmm0) */
+ tmp1 = lp_build_rsqrt( &bld->base, tmp0);
+
+ /* dst.x = xmm1 * src.x */
+ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
+ dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
+ }
+
+ /* dst.y = xmm1 * src.y */
+ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
+ dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
+ }
+
+ /* dst.z = xmm1 * src.z */
+ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
+ dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
+ }
+
+ /* dst.w = xmm1 * src.w */
+ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
+ dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
+ }
+ }
+
+ /* dst.w = 1.0 */
+ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
+ dst0[CHAN_W] = bld->base.one;
+ }
+ }
+ break;
+
+ case TGSI_OPCODE_DIV:
+ /* deprecated */
+ assert( 0 );
+ return 0;
+ break;
+
+ case TGSI_OPCODE_DP2:
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
+ tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
+ tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
+ tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
+ tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
+ }
+ break;
+
+ case TGSI_OPCODE_TXL:
+ emit_tex( bld, inst, TRUE, FALSE, dst0 );
+ break;
+
+ case TGSI_OPCODE_TXP:
+ emit_tex( bld, inst, FALSE, TRUE, dst0 );
+ break;
+
+ case TGSI_OPCODE_BRK:
+ /* FIXME */
+ return 0;
+ break;
+
+ case TGSI_OPCODE_IF:
+ /* FIXME */
+ return 0;
+ break;
+
+ case TGSI_OPCODE_BGNFOR:
+ /* deprecated */
+ assert(0);
+ return 0;
+ break;
+
+ case TGSI_OPCODE_REP:
+ /* deprecated */
+ assert(0);
+ return 0;
+ break;
+
+ case TGSI_OPCODE_ELSE:
+ /* FIXME */
+ return 0;
+ break;
+
+ case TGSI_OPCODE_ENDIF:
+ /* FIXME */
+ return 0;
+ break;
+
+ case TGSI_OPCODE_ENDFOR:
+ /* deprecated */
+ assert(0);
+ return 0;
+ break;
+
+ case TGSI_OPCODE_ENDREP:
+ /* deprecated */
+ assert(0);
+ return 0;
+ break;
+
+ case TGSI_OPCODE_PUSHA:
+ /* deprecated? */
+ assert(0);
+ return 0;
+ break;
+
+ case TGSI_OPCODE_POPA:
+ /* deprecated? */
+ assert(0);
+ return 0;
+ break;
+
+ case TGSI_OPCODE_CEIL:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
+ }
+ break;
+
+ case TGSI_OPCODE_I2F:
+ /* deprecated? */
+ assert(0);
+ return 0;
+ break;
+
+ case TGSI_OPCODE_NOT:
+ /* deprecated? */
+ assert(0);
+ return 0;
+ break;
+
+ case TGSI_OPCODE_TRUNC:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
+ }
+ break;
+
+ case TGSI_OPCODE_SHL:
+ /* deprecated? */
+ assert(0);
+ return 0;
+ break;
+
+ case TGSI_OPCODE_ISHR:
+ /* deprecated? */
+ assert(0);
+ return 0;
+ break;
+
+ case TGSI_OPCODE_AND:
+ /* deprecated? */
+ assert(0);
+ return 0;
+ break;
+
+ case TGSI_OPCODE_OR:
+ /* deprecated? */
+ assert(0);
+ return 0;
+ break;
+
+ case TGSI_OPCODE_MOD:
+ /* deprecated? */
+ assert(0);
+ return 0;
+ break;
+
+ case TGSI_OPCODE_XOR:
+ /* deprecated? */
+ assert(0);
+ return 0;
+ break;
+
+ case TGSI_OPCODE_SAD:
+ /* deprecated? */
+ assert(0);
+ return 0;
+ break;
+
+ case TGSI_OPCODE_TXF:
+ /* deprecated? */
+ assert(0);
+ return 0;
+ break;
+
+ case TGSI_OPCODE_TXQ:
+ /* deprecated? */
+ assert(0);
+ return 0;
+ break;
+
+ case TGSI_OPCODE_CONT:
+ /* deprecated? */
+ assert(0);
+ return 0;
+ break;
+
+ case TGSI_OPCODE_EMIT:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_ENDPRIM:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_NOP:
+ break;
+
+ default:
+ return 0;
+ }
+
+ if(info->num_dst) {
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
+ }
+ }
+
+ return 1;
+}
+
+
+void
+lp_build_tgsi_soa(LLVMBuilderRef builder,
+ const struct tgsi_token *tokens,
+ struct lp_type type,
+ struct lp_build_mask_context *mask,
+ LLVMValueRef consts_ptr,
+ const LLVMValueRef *pos,
+ const LLVMValueRef (*inputs)[NUM_CHANNELS],
+ LLVMValueRef (*outputs)[NUM_CHANNELS],
+ struct lp_build_sampler_soa *sampler)
+{
+ struct lp_build_tgsi_soa_context bld;
+ struct tgsi_parse_context parse;
+ uint num_immediates = 0;
+ unsigned i;
+
+ /* Setup build context */
+ memset(&bld, 0, sizeof bld);
+ lp_build_context_init(&bld.base, builder, type);
+ bld.mask = mask;
+ bld.pos = pos;
+ bld.inputs = inputs;
+ bld.outputs = outputs;
+ bld.consts_ptr = consts_ptr;
+ bld.sampler = sampler;
+
+ tgsi_parse_init( &parse, tokens );
+
+ while( !tgsi_parse_end_of_tokens( &parse ) ) {
+ tgsi_parse_token( &parse );
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ /* Inputs already interpolated */
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
+ const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
+ if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info ))
+ _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
+ info ? info->mnemonic : "<invalid>");
+ }
+
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ /* simply copy the immediate values into the next immediates[] slot */
+ {
+ const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
+ assert(size <= 4);
+ assert(num_immediates < LP_MAX_IMMEDIATES);
+ for( i = 0; i < size; ++i )
+ bld.immediates[num_immediates][i] =
+ lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float);
+ for( i = size; i < 4; ++i )
+ bld.immediates[num_immediates][i] = bld.base.undef;
+ num_immediates++;
+ }
+ break;
+
+ default:
+ assert( 0 );
+ }
+ }
+
+ tgsi_parse_free( &parse );
+}
+