summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJosé Fonseca <jfonseca@vmware.com>2010-09-02 12:45:50 +0100
committerJosé Fonseca <jfonseca@vmware.com>2010-09-11 13:25:49 +0100
commit93158622e26df1227f6eca8d619b5521f4cb1368 (patch)
tree289177e80a42c16ec34e44b0cb52be4e90e301e8
parent58daea741fa21fe3f89fd7bf106df1545c5b21af (diff)
gallivm: Basic AoS TGSI -> LLVM IR.
Essentially a variation of the SoA version.
-rw-r--r--src/gallium/auxiliary/Makefile1
-rw-r--r--src/gallium/auxiliary/SConscript1
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi.h29
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c1125
4 files changed, 1156 insertions, 0 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index eb86d83d2a..5388f4ecd5 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -171,6 +171,7 @@ GALLIVM_SOURCES = \
gallivm/lp_bld_sample_soa.c \
gallivm/lp_bld_struct.c \
gallivm/lp_bld_swizzle.c \
+ gallivm/lp_bld_tgsi_aos.c \
gallivm/lp_bld_tgsi_soa.c \
gallivm/lp_bld_type.c \
draw/draw_llvm.c \
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index 6210ada990..ba8be2efd1 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -222,6 +222,7 @@ if env['llvm']:
'gallivm/lp_bld_sample_soa.c',
'gallivm/lp_bld_struct.c',
'gallivm/lp_bld_swizzle.c',
+ 'gallivm/lp_bld_tgsi_aos.c',
'gallivm/lp_bld_tgsi_soa.c',
'gallivm/lp_bld_type.c',
'draw/draw_llvm.c',
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index e60ad1d904..bc9140852f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -82,6 +82,24 @@ struct lp_build_sampler_soa
};
+struct lp_build_sampler_aos
+{
+ void
+ (*destroy)( struct lp_build_sampler_aos *sampler );
+
+ LLVMValueRef
+ (*emit_fetch_texel)( const struct lp_build_sampler_aos *sampler,
+ LLVMBuilderRef builder,
+ struct lp_type type,
+ unsigned target, /* TGSI_TEXTURE_* */
+ unsigned unit,
+ LLVMValueRef coords,
+ LLVMValueRef ddx,
+ LLVMValueRef ddy,
+ enum lp_build_tex_modifier modifier);
+};
+
+
void
lp_build_tgsi_soa(LLVMBuilderRef builder,
const struct tgsi_token *tokens,
@@ -95,4 +113,15 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
const struct tgsi_shader_info *info);
+void
+lp_build_tgsi_aos(LLVMBuilderRef builder,
+ const struct tgsi_token *tokens,
+ struct lp_type type,
+ LLVMValueRef consts_ptr,
+ const LLVMValueRef *inputs,
+ LLVMValueRef *outputs,
+ struct lp_build_sampler_aos *sampler,
+ const struct tgsi_shader_info *info);
+
+
#endif /* LP_BLD_TGSI_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
new file mode 100644
index 0000000000..2793a1a397
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
@@ -0,0 +1,1125 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * TGSI to LLVM IR translation -- AoS.
+ *
+ * FIXME:
+ * - No control flow support: the existing control flow code should be factored
+ * out into from the SoA code into a common module and shared.
+ * - No derivatives. Derivate logic should be pluggable, just like the samplers.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_config.h"
+#include "pipe/p_shader_tokens.h"
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_scan.h"
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_gather.h"
+#include "lp_bld_logic.h"
+#include "lp_bld_swizzle.h"
+#include "lp_bld_flow.h"
+#include "lp_bld_quad.h"
+#include "lp_bld_tgsi.h"
+#include "lp_bld_limits.h"
+#include "lp_bld_debug.h"
+
+
+#define LP_MAX_INSTRUCTIONS 256
+
+
+struct lp_build_tgsi_aos_context
+{
+ struct lp_build_context base;
+
+ /* Builder for integer masks and indices */
+ struct lp_build_context int_bld;
+
+ LLVMValueRef consts_ptr;
+ const LLVMValueRef *inputs;
+ LLVMValueRef *outputs;
+
+ const struct lp_build_sampler_aos *sampler;
+
+ LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES];
+ LLVMValueRef temps[LP_MAX_TGSI_TEMPS];
+ LLVMValueRef addr[LP_MAX_TGSI_ADDRS];
+ LLVMValueRef preds[LP_MAX_TGSI_PREDS];
+
+ /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
+ * set in the indirect_files field.
+ * The temps[] array above is unused then.
+ */
+ LLVMValueRef temps_array;
+
+ /** bitmask indicating which register files are accessed indirectly */
+ unsigned indirect_files;
+
+ struct tgsi_full_instruction *instructions;
+ uint max_instructions;
+};
+
+
+/**
+ * Register fetch.
+ */
+static LLVMValueRef
+emit_fetch(
+ struct lp_build_tgsi_aos_context *bld,
+ const struct tgsi_full_instruction *inst,
+ unsigned src_op)
+{
+ struct lp_type type = bld->base.type;
+ const struct tgsi_full_src_register *reg = &inst->Src[src_op];
+ unsigned char swizzles[4];
+ LLVMValueRef res;
+ unsigned chan;
+
+ assert(!reg->Register.Indirect);
+
+ /*
+ * Fetch the from the register file.
+ */
+
+ switch (reg->Register.File) {
+ case TGSI_FILE_CONSTANT:
+ /*
+ * Get the constants components
+ */
+
+ res = bld->base.undef;
+ for (chan = 0; chan < 4; ++chan) {
+ LLVMValueRef index;
+ LLVMValueRef scalar_ptr;
+ LLVMValueRef scalar;
+
+ index = LLVMConstInt(LLVMInt32Type(),
+ reg->Register.Index*4 + chan, 0);
+
+ scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
+ &index, 1, "");
+
+ scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
+
+ lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
+
+ index = LLVMConstInt(LLVMInt32Type(), chan, 0);
+
+ res = LLVMBuildInsertElement(bld->base.builder, res, scalar, index, "");
+ }
+
+ /*
+ * Broadcast the first quaternion to all others.
+ *
+ * XXX: could be factored into a reusable function.
+ */
+
+ if (type.length > 4) {
+ LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
+ unsigned i;
+
+ for (chan = 0; chan < 4; ++chan) {
+ shuffles[chan] = LLVMConstInt(LLVMInt32Type(), chan, 0);
+ }
+
+ for (i = 4; i < type.length; ++i) {
+ shuffles[i] = shuffles[i % 4];
+ }
+
+ res = LLVMBuildShuffleVector(bld->base.builder,
+ res, bld->base.undef,
+ LLVMConstVector(shuffles, type.length),
+ "");
+ }
+ break;
+
+ case TGSI_FILE_IMMEDIATE:
+ res = bld->immediates[reg->Register.Index];
+ assert(res);
+ break;
+
+ case TGSI_FILE_INPUT:
+ res = bld->inputs[reg->Register.Index];
+ assert(res);
+ break;
+
+ case TGSI_FILE_TEMPORARY:
+ {
+ LLVMValueRef temp_ptr;
+ temp_ptr = bld->temps[reg->Register.Index];
+ res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
+ if (!res)
+ return bld->base.undef;
+ }
+ break;
+
+ default:
+ assert(0 && "invalid src register in emit_fetch()");
+ return bld->base.undef;
+ }
+
+ /*
+ * Apply sign modifier.
+ */
+
+ if (reg->Register.Absolute) {
+ res = lp_build_abs(&bld->base, res);
+ }
+
+ if(reg->Register.Negate) {
+ res = lp_build_negate(&bld->base, res);
+ }
+
+ /*
+ * Swizzle the argument
+ */
+
+ for (chan = 0; chan < 4; ++chan) {
+ const unsigned swizzle =
+ tgsi_util_get_full_src_register_swizzle(reg, chan);
+ if (swizzle > 3) {
+ assert(0 && "invalid swizzle in emit_fetch()");
+ return bld->base.undef;
+ }
+ swizzles[chan] = swizzle;
+ }
+
+ res = lp_build_swizzle_aos(&bld->base, res, swizzles);
+
+ return res;
+}
+
+
+/**
+ * Register store.
+ */
+static void
+emit_store(
+ struct lp_build_tgsi_aos_context *bld,
+ const struct tgsi_full_instruction *inst,
+ unsigned index,
+ LLVMValueRef value)
+{
+ const struct tgsi_full_dst_register *reg = &inst->Dst[index];
+ LLVMValueRef mask = NULL;
+ LLVMValueRef ptr;
+
+ /*
+ * Saturate the value
+ */
+
+ switch (inst->Instruction.Saturate) {
+ case TGSI_SAT_NONE:
+ break;
+
+ case TGSI_SAT_ZERO_ONE:
+ value = lp_build_max(&bld->base, value, bld->base.zero);
+ value = lp_build_min(&bld->base, value, bld->base.one);
+ break;
+
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
+ value = lp_build_min(&bld->base, value, bld->base.one);
+ break;
+
+ default:
+ assert(0);
+ }
+
+ /*
+ * Translate the register file
+ */
+
+ assert(!reg->Register.Indirect);
+
+ switch (reg->Register.File) {
+ case TGSI_FILE_OUTPUT:
+ ptr = bld->outputs[reg->Register.Index];
+ break;
+
+ case TGSI_FILE_TEMPORARY:
+ ptr = bld->temps[reg->Register.Index];
+ break;
+
+ case TGSI_FILE_ADDRESS:
+ ptr = bld->addr[reg->Indirect.Index];
+ break;
+
+ case TGSI_FILE_PREDICATE:
+ ptr = bld->preds[reg->Register.Index];
+ break;
+
+ default:
+ assert(0);
+ return;
+ }
+
+ /*
+ * Predicate
+ */
+
+ if (inst->Instruction.Predicate) {
+ unsigned char swizzles[4];
+ LLVMValueRef pred;
+
+ assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);
+
+ pred = LLVMBuildLoad(bld->base.builder,
+ bld->preds[inst->Predicate.Index], "");
+
+ /*
+ * Convert the value to an integer mask.
+ */
+ pred = lp_build_compare(bld->base.builder,
+ bld->base.type,
+ PIPE_FUNC_NOTEQUAL,
+ pred,
+ bld->base.zero);
+
+ if (inst->Predicate.Negate) {
+ pred = LLVMBuildNot(bld->base.builder, pred, "");
+ }
+
+ swizzles[0] = inst->Predicate.SwizzleX;
+ swizzles[1] = inst->Predicate.SwizzleY;
+ swizzles[2] = inst->Predicate.SwizzleZ;
+ swizzles[3] = inst->Predicate.SwizzleW;
+
+ pred = lp_build_swizzle_aos(&bld->base, pred, swizzles);
+
+ if (mask) {
+ mask = LLVMBuildAnd(bld->base.builder, mask, pred, "");
+ } else {
+ mask = pred;
+ }
+ }
+
+ /*
+ * Writemask
+ */
+
+ if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
+ LLVMValueRef writemask;
+
+ writemask = lp_build_const_mask_aos(bld->base.type, reg->Register.WriteMask);
+
+ if (mask) {
+ mask = LLVMBuildAnd(bld->base.builder, mask, writemask, "");
+ } else {
+ mask = writemask;
+ }
+ }
+
+ if (mask) {
+ LLVMValueRef orig_value;
+
+ orig_value = LLVMBuildLoad(bld->base.builder, ptr, "");
+ value = lp_build_select(&bld->base,
+ mask, value, orig_value);
+ }
+
+ LLVMBuildStore(bld->base.builder, value, ptr);
+}
+
+
+/**
+ * High-level instruction translators.
+ */
+
+static LLVMValueRef
+emit_tex(struct lp_build_tgsi_aos_context *bld,
+ const struct tgsi_full_instruction *inst,
+ enum lp_build_tex_modifier modifier)
+{
+ unsigned target;
+ unsigned unit;
+ LLVMValueRef coords;
+ LLVMValueRef ddx;
+ LLVMValueRef ddy;
+
+ if (!bld->sampler) {
+ _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
+ return bld->base.undef;
+ }
+
+ target = inst->Texture.Texture;
+
+ coords = emit_fetch( bld, inst, 0 );
+
+ if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
+ ddx = emit_fetch( bld, inst, 1 );
+ ddy = emit_fetch( bld, inst, 2 );
+ unit = inst->Src[3].Register.Index;
+ } else {
+#if 0
+ ddx = lp_build_ddx( &bld->base, coords );
+ ddy = lp_build_ddy( &bld->base, coords );
+#else
+ /* TODO */
+ ddx = bld->base.one;
+ ddy = bld->base.one;
+#endif
+ unit = inst->Src[1].Register.Index;
+ }
+
+ return bld->sampler->emit_fetch_texel(bld->sampler,
+ bld->base.builder,
+ bld->base.type,
+ target, unit,
+ coords, ddx, ddy,
+ modifier);
+}
+
+
+static void
+emit_declaration(
+ struct lp_build_tgsi_aos_context *bld,
+ const struct tgsi_full_declaration *decl)
+{
+ LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
+
+ unsigned first = decl->Range.First;
+ unsigned last = decl->Range.Last;
+ unsigned idx;
+
+ for (idx = first; idx <= last; ++idx) {
+ switch (decl->Declaration.File) {
+ case TGSI_FILE_TEMPORARY:
+ assert(idx < LP_MAX_TGSI_TEMPS);
+ if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
+ LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
+ last + 1, 0);
+ bld->temps_array = lp_build_array_alloca(bld->base.builder,
+ vec_type, array_size, "");
+ } else {
+ bld->temps[idx] = lp_build_alloca(bld->base.builder,
+ vec_type, "");
+ }
+ break;
+
+ case TGSI_FILE_OUTPUT:
+ bld->outputs[idx] = lp_build_alloca(bld->base.builder,
+ vec_type, "");
+ break;
+
+ case TGSI_FILE_ADDRESS:
+ assert(idx < LP_MAX_TGSI_ADDRS);
+ bld->addr[idx] = lp_build_alloca(bld->base.builder,
+ vec_type, "");
+ break;
+
+ case TGSI_FILE_PREDICATE:
+ assert(idx < LP_MAX_TGSI_PREDS);
+ bld->preds[idx] = lp_build_alloca(bld->base.builder,
+ vec_type, "");
+ break;
+
+ default:
+ /* don't need to declare other vars */
+ break;
+ }
+ }
+}
+
+
+/**
+ * Emit LLVM for one TGSI instruction.
+ * \param return TRUE for success, FALSE otherwise
+ */
+static boolean
+emit_instruction(
+ struct lp_build_tgsi_aos_context *bld,
+ const struct tgsi_full_instruction *inst,
+ const struct tgsi_opcode_info *info,
+ int *pc)
+{
+ LLVMValueRef src0, src1, src2;
+ LLVMValueRef tmp0, tmp1;
+ LLVMValueRef dst0;
+
+ /*
+ * Stores and write masks are handled in a general fashion after the long
+ * instruction opcode switch statement.
+ *
+ * Although not stricitly necessary, we avoid generating instructions for
+ * channels which won't be stored, in cases where's that easy. For some
+ * complex instructions, like texture sampling, it is more convenient to
+ * assume a full writemask and then let LLVM optimization passes eliminate
+ * redundant code.
+ */
+
+ (*pc)++;
+
+ assert(info->num_dst <= 1);
+ if (info->num_dst) {
+ dst0 = bld->base.undef;
+ }
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ARL:
+ src0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_floor(&bld->base, src0);
+ break;
+
+ case TGSI_OPCODE_MOV:
+ dst0 = emit_fetch(bld, inst, 0);
+ break;
+
+ case TGSI_OPCODE_LIT:
+ return FALSE;
+
+ case TGSI_OPCODE_RCP:
+ /* TGSI_OPCODE_RECIP */
+ src0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_rcp(&bld->base, src0);
+ break;
+
+ case TGSI_OPCODE_RSQ:
+ /* TGSI_OPCODE_RECIPSQRT */
+ src0 = emit_fetch(bld, inst, 0);
+ tmp0 = lp_build_abs(&bld->base, src0);
+ dst0 = lp_build_rsqrt(&bld->base, tmp0);
+ break;
+
+ case TGSI_OPCODE_EXP:
+ return FALSE;
+
+ case TGSI_OPCODE_LOG:
+ return FALSE;
+
+ case TGSI_OPCODE_MUL:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ dst0 = lp_build_mul(&bld->base, src0, src1);
+ break;
+
+ case TGSI_OPCODE_ADD:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ dst0 = lp_build_add(&bld->base, src0, src1);
+ break;
+
+ case TGSI_OPCODE_DP3:
+ /* TGSI_OPCODE_DOT3 */
+ return FALSE;
+
+ case TGSI_OPCODE_DP4:
+ /* TGSI_OPCODE_DOT4 */
+ return FALSE;
+
+ case TGSI_OPCODE_DST:
+ return FALSE;
+
+ case TGSI_OPCODE_MIN:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ dst0 = lp_build_max(&bld->base, src0, src1);
+ break;
+
+ case TGSI_OPCODE_MAX:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ dst0 = lp_build_max(&bld->base, src0, src1);
+ break;
+
+ case TGSI_OPCODE_SLT:
+ /* TGSI_OPCODE_SETLT */
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, src1);
+ dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+ break;
+
+ case TGSI_OPCODE_SGE:
+ /* TGSI_OPCODE_SETGE */
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, src0, src1);
+ dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+ break;
+
+ case TGSI_OPCODE_MAD:
+ /* TGSI_OPCODE_MADD */
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ src2 = emit_fetch(bld, inst, 2);
+ tmp0 = lp_build_mul(&bld->base, src0, src1);
+ dst0 = lp_build_add(&bld->base, tmp0, src2);
+ break;
+
+ case TGSI_OPCODE_SUB:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ dst0 = lp_build_sub(&bld->base, src0, src1);
+ break;
+
+ case TGSI_OPCODE_LRP:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ src2 = emit_fetch(bld, inst, 2);
+ tmp0 = lp_build_sub(&bld->base, src1, src2);
+ tmp0 = lp_build_mul(&bld->base, src0, tmp0);
+ dst0 = lp_build_add(&bld->base, tmp0, src2);
+ break;
+
+ case TGSI_OPCODE_CND:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ src2 = emit_fetch(bld, inst, 2);
+ tmp1 = lp_build_const_vec(bld->base.type, 0.5);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src2, tmp1);
+ dst0 = lp_build_select(&bld->base, tmp0, src0, src1);
+ break;
+
+ case TGSI_OPCODE_DP2A:
+ return FALSE;
+
+ case TGSI_OPCODE_FRC:
+ src0 = emit_fetch(bld, inst, 0);
+ tmp0 = lp_build_floor(&bld->base, src0);
+ dst0 = lp_build_sub(&bld->base, src0, tmp0);
+ break;
+
+ case TGSI_OPCODE_CLAMP:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ src2 = emit_fetch(bld, inst, 2);
+ tmp0 = lp_build_max(&bld->base, src0, src1);
+ dst0 = lp_build_min(&bld->base, tmp0, src2);
+ break;
+
+ case TGSI_OPCODE_FLR:
+ src0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_floor(&bld->base, src0);
+ break;
+
+ case TGSI_OPCODE_ROUND:
+ src0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_round(&bld->base, src0);
+ break;
+
+ case TGSI_OPCODE_EX2:
+ src0 = emit_fetch(bld, inst, 0);
+ tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X);
+ dst0 = lp_build_exp2(&bld->base, tmp0);
+ break;
+
+ case TGSI_OPCODE_LG2:
+ src0 = emit_fetch(bld, inst, 0);
+ tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X);
+ dst0 = lp_build_log2(&bld->base, tmp0);
+ break;
+
+ case TGSI_OPCODE_POW:
+ src0 = emit_fetch(bld, inst, 0);
+ src0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X);
+ src1 = emit_fetch(bld, inst, 1);
+ src1 = lp_build_swizzle_scalar_aos(&bld->base, src1, TGSI_SWIZZLE_X);
+ dst0 = lp_build_pow(&bld->base, src0, src1);
+ break;
+
+ case TGSI_OPCODE_XPD:
+ return FALSE;
+
+ case TGSI_OPCODE_ABS:
+ src0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_abs(&bld->base, src0);
+ break;
+
+ case TGSI_OPCODE_RCC:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+
+ case TGSI_OPCODE_DPH:
+ return FALSE;
+
+ case TGSI_OPCODE_COS:
+ src0 = emit_fetch(bld, inst, 0);
+ tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X);
+ dst0 = lp_build_cos(&bld->base, tmp0);
+ break;
+
+ case TGSI_OPCODE_DDX:
+ return FALSE;
+
+ case TGSI_OPCODE_DDY:
+ return FALSE;
+
+ case TGSI_OPCODE_KILP:
+ /* predicated kill */
+ return FALSE;
+
+ case TGSI_OPCODE_KIL:
+ /* conditional kill */
+ return FALSE;
+
+ case TGSI_OPCODE_PK2H:
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_PK2US:
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_PK4B:
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_PK4UB:
+ return FALSE;
+
+ case TGSI_OPCODE_RFL:
+ return FALSE;
+
+ case TGSI_OPCODE_SEQ:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_EQUAL, src0, src1);
+ dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+ break;
+
+ case TGSI_OPCODE_SFL:
+ dst0 = bld->base.zero;
+ break;
+
+ case TGSI_OPCODE_SGT:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src0, src1);
+ dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+ break;
+
+ case TGSI_OPCODE_SIN:
+ src0 = emit_fetch(bld, inst, 0);
+ tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X);
+ dst0 = lp_build_sin(&bld->base, tmp0);
+ break;
+
+ case TGSI_OPCODE_SLE:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LEQUAL, src0, src1);
+ dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+ break;
+
+ case TGSI_OPCODE_SNE:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, src0, src1);
+ dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+ break;
+
+ case TGSI_OPCODE_STR:
+ dst0 = bld->base.one;
+ break;
+
+ case TGSI_OPCODE_TEX:
+ dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
+ break;
+
+ case TGSI_OPCODE_TXD:
+ dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
+ break;
+
+ case TGSI_OPCODE_UP2H:
+ /* deprecated */
+ assert (0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_UP2US:
+ /* deprecated */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_UP4B:
+ /* deprecated */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_UP4UB:
+ /* deprecated */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_X2D:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_ARA:
+ /* deprecated */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_ARR:
+ src0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_round(&bld->base, src0);
+ break;
+
+ case TGSI_OPCODE_BRA:
+ /* deprecated */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_CAL:
+ return FALSE;
+
+ case TGSI_OPCODE_RET:
+ return FALSE;
+
+ case TGSI_OPCODE_END:
+ *pc = -1;
+ break;
+
+ case TGSI_OPCODE_SSG:
+ /* TGSI_OPCODE_SGN */
+ tmp0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_sgn(&bld->base, tmp0);
+ break;
+
+ case TGSI_OPCODE_CMP:
+ src0 = emit_fetch(bld, inst, 0);
+ src1 = emit_fetch(bld, inst, 1);
+ src2 = emit_fetch(bld, inst, 2);
+ tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, bld->base.zero);
+ dst0 = lp_build_select(&bld->base, tmp0, src1, src2);
+ break;
+
+ case TGSI_OPCODE_SCS:
+ return FALSE;
+
+ case TGSI_OPCODE_TXB:
+ dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
+ break;
+
+ case TGSI_OPCODE_NRM:
+ /* fall-through */
+ case TGSI_OPCODE_NRM4:
+ return FALSE;
+
+ case TGSI_OPCODE_DIV:
+ /* deprecated */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_DP2:
+ return FALSE;
+
+ case TGSI_OPCODE_TXL:
+ dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
+ break;
+
+ case TGSI_OPCODE_TXP:
+ dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
+ break;
+
+ case TGSI_OPCODE_BRK:
+ return FALSE;
+
+ case TGSI_OPCODE_IF:
+ return FALSE;
+
+ case TGSI_OPCODE_BGNLOOP:
+ return FALSE;
+
+ case TGSI_OPCODE_BGNSUB:
+ return FALSE;
+
+ case TGSI_OPCODE_ELSE:
+ return FALSE;
+
+ case TGSI_OPCODE_ENDIF:
+ return FALSE;
+
+ case TGSI_OPCODE_ENDLOOP:
+ return FALSE;
+
+ case TGSI_OPCODE_ENDSUB:
+ return FALSE;
+
+ case TGSI_OPCODE_PUSHA:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_POPA:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_CEIL:
+ src0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_ceil(&bld->base, src0);
+ break;
+
+ case TGSI_OPCODE_I2F:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_NOT:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_TRUNC:
+ src0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_trunc(&bld->base, src0);
+ break;
+
+ case TGSI_OPCODE_SHL:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_ISHR:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_AND:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_OR:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_MOD:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_XOR:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_SAD:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_TXF:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_TXQ:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_CONT:
+ return FALSE;
+
+ case TGSI_OPCODE_EMIT:
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_ENDPRIM:
+ return FALSE;
+ break;
+
+ case TGSI_OPCODE_NOP:
+ break;
+
+ default:
+ return FALSE;
+ }
+
+ if (info->num_dst) {
+ emit_store(bld, inst, 0, dst0);
+ }
+
+ return TRUE;
+}
+
+
+void
+lp_build_tgsi_aos(LLVMBuilderRef builder,
+ const struct tgsi_token *tokens,
+ struct lp_type type,
+ LLVMValueRef consts_ptr,
+ const LLVMValueRef *inputs,
+ LLVMValueRef *outputs,
+ struct lp_build_sampler_aos *sampler,
+ const struct tgsi_shader_info *info)
+{
+ struct lp_build_tgsi_aos_context bld;
+ struct tgsi_parse_context parse;
+ uint num_immediates = 0;
+ uint num_instructions = 0;
+ unsigned chan;
+ int pc = 0;
+
+ /* Setup build context */
+ memset(&bld, 0, sizeof bld);
+ lp_build_context_init(&bld.base, builder, type);
+ lp_build_context_init(&bld.int_bld, builder, lp_int_type(type));
+ bld.inputs = inputs;
+ bld.outputs = outputs;
+ bld.consts_ptr = consts_ptr;
+ bld.sampler = sampler;
+ bld.indirect_files = info->indirect_files;
+ bld.instructions = (struct tgsi_full_instruction *)
+ MALLOC(LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction));
+ bld.max_instructions = LP_MAX_INSTRUCTIONS;
+
+ if (!bld.instructions) {
+ return;
+ }
+
+ tgsi_parse_init(&parse, tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch(parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ /* Inputs already interpolated */
+ emit_declaration(&bld, &parse.FullToken.FullDeclaration);
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ /* save expanded instruction */
+ if (num_instructions == bld.max_instructions) {
+ struct tgsi_full_instruction *instructions;
+ instructions = REALLOC(bld.instructions,
+ bld.max_instructions
+ * sizeof(struct tgsi_full_instruction),
+ (bld.max_instructions + LP_MAX_INSTRUCTIONS)
+ * sizeof(struct tgsi_full_instruction));
+ if (!instructions) {
+ break;
+ }
+ bld.instructions = instructions;
+ bld.max_instructions += LP_MAX_INSTRUCTIONS;
+ }
+
+ memcpy(bld.instructions + num_instructions,
+ &parse.FullToken.FullInstruction,
+ sizeof(bld.instructions[0]));
+
+ num_instructions++;
+ }
+
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ /* simply copy the immediate values into the next immediates[] slot */
+ {
+ const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
+ float rgba[4];
+ assert(size <= 4);
+ assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
+ for (chan = 0; chan < size; ++chan) {
+ rgba[chan] = parse.FullToken.FullImmediate.u[chan].Float;
+ }
+ for (chan = size; chan < 4; ++chan) {
+ rgba[chan] = 0.0f;
+ }
+ bld.immediates[num_immediates] =
+ lp_build_const_aos(type,
+ rgba[0], rgba[1], rgba[2], rgba[3],
+ NULL);
+ num_immediates++;
+ }
+ break;
+
+ case TGSI_TOKEN_TYPE_PROPERTY:
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+
+ while (pc != -1) {
+ struct tgsi_full_instruction *instr = bld.instructions + pc;
+ const struct tgsi_opcode_info *opcode_info =
+ tgsi_get_opcode_info(instr->Instruction.Opcode);
+ if (!emit_instruction(&bld, instr, opcode_info, &pc))
+ _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
+ opcode_info->mnemonic);
+ }
+
+ if (0) {
+ LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
+ LLVMValueRef function = LLVMGetBasicBlockParent(block);
+ debug_printf("11111111111111111111111111111 \n");
+ tgsi_dump(tokens, 0);
+ lp_debug_dump_value(function);
+ debug_printf("2222222222222222222222222222 \n");
+ }
+ tgsi_parse_free(&parse);
+
+ if (0) {
+ LLVMModuleRef module = LLVMGetGlobalParent(
+ LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
+ LLVMDumpModule(module);
+ }
+
+ FREE(bld.instructions);
+}
+