summaryrefslogtreecommitdiff
path: root/src/gallium
diff options
context:
space:
mode:
authorZack Rusin <zackr@vmware.com>2010-02-22 22:02:58 -0500
committerZack Rusin <zackr@vmware.com>2010-02-22 22:02:58 -0500
commitc5c5cd7132e18f4aad8e73d8ee879f8823c4c1e7 (patch)
treef36680fba5d44945075e4b6f7d4269154b3c1e7d /src/gallium
parent902ccfcb40f21e1a5fca2f1bec1cbbabb053d8cf (diff)
gallium/draw: initial code to properly support llvm in the draw module
code generate big chunks of the vertex pipeline in order to speed up software vertex processing.
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/auxiliary/SConscript2
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c26
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h10
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.c311
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.h154
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm_translate.c653
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h11
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.h1
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c432
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_llvm.c9
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.c6
12 files changed, 1601 insertions, 18 deletions
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index b531ad2dbd..51c4a0cbbe 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -194,6 +194,8 @@ if drawllvm:
'gallivm/lp_bld_swizzle.c',
'gallivm/lp_bld_tgsi_soa.c',
'gallivm/lp_bld_type.c',
+ 'draw/draw_llvm.c',
+ 'draw/draw_pt_fetch_shade_pipeline_llvm.c'
]
gallium = env.ConvenienceLibrary(
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index d5ddc4a6a9..6fa73ad56b 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -44,6 +44,18 @@ struct draw_context *draw_create( void )
if (draw == NULL)
goto fail;
+ if (!draw_init(draw))
+ goto fail;
+
+ return draw;
+
+fail:
+ draw_destroy( draw );
+ return NULL;
+}
+
+boolean draw_init(struct draw_context *draw)
+{
ASSIGN_4V( draw->plane[0], -1, 0, 0, 1 );
ASSIGN_4V( draw->plane[1], 1, 0, 0, 1 );
ASSIGN_4V( draw->plane[2], 0, -1, 0, 1 );
@@ -57,22 +69,18 @@ struct draw_context *draw_create( void )
if (!draw_pipeline_init( draw ))
- goto fail;
+ return FALSE;
if (!draw_pt_init( draw ))
- goto fail;
+ return FALSE;
if (!draw_vs_init( draw ))
- goto fail;
+ return FALSE;
if (!draw_gs_init( draw ))
- goto fail;
+ return FALSE;
- return draw;
-
-fail:
- draw_destroy( draw );
- return NULL;
+ return TRUE;
}
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index acd81b9712..d42e400318 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -40,6 +40,9 @@
#include "pipe/p_state.h"
+#ifdef DRAW_LLVM
+#include <llvm-c/ExecutionEngine.h>
+#endif
struct pipe_context;
struct draw_context;
@@ -197,6 +200,11 @@ boolean draw_need_pipeline(const struct draw_context *draw,
const struct pipe_rasterizer_state *rasterizer,
unsigned prim );
-
+#ifdef DRAW_LLVM
+/*******************************************************************************
+ * LLVM integration
+ */
+struct draw_context *draw_create_with_llvm(LLVMExecutionEngineRef engine);
+#endif
#endif /* DRAW_CONTEXT_H */
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
new file mode 100644
index 0000000000..6b0ddfd064
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -0,0 +1,311 @@
+#include "draw_llvm.h"
+
+#include "draw_context.h"
+#include "draw_vs.h"
+
+#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_interp.h"
+#include "gallivm/lp_bld_struct.h"
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_flow.h"
+#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_tgsi.h"
+
+#include "util/u_cpu_detect.h"
+
+#include <llvm-c/Transforms/Scalar.h>
+
+static void
+init_globals(struct draw_llvm *llvm)
+{
+ LLVMTypeRef vertex_header;
+ LLVMTypeRef texture_type;
+
+ /* struct vertex_header */
+ {
+ LLVMTypeRef elem_types[3];
+
+ elem_types[0] = LLVMIntType(32);
+ elem_types[1] = LLVMArrayType(LLVMFloatType(), 4);
+ elem_types[2] = LLVMArrayType(elem_types[1], 0);
+
+ vertex_header = LLVMStructType(elem_types, Elements(elem_types), 0);
+
+ /* these are bit-fields and we can't take address of them
+ LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
+ llvm->target, vertex_header,
+ DRAW_JIT_VERTEX_CLIPMASK);
+ LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag,
+ llvm->target, vertex_header,
+ DRAW_JIT_VERTEX_EDGEFLAG);
+ LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad,
+ llvm->target, vertex_header,
+ DRAW_JIT_VERTEX_PAD);
+ LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id,
+ llvm->target, vertex_header,
+ DRAW_JIT_VERTEX_VERTEX_ID);
+ */
+ LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip,
+ llvm->target, vertex_header,
+ DRAW_JIT_VERTEX_CLIP);
+ LP_CHECK_MEMBER_OFFSET(struct vertex_header, data,
+ llvm->target, vertex_header,
+ DRAW_JIT_VERTEX_DATA);
+
+ LP_CHECK_STRUCT_SIZE(struct vertex_header,
+ llvm->target, vertex_header);
+
+ LLVMAddTypeName(llvm->module, "vertex_header", vertex_header);
+
+ llvm->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
+ }
+ /* struct draw_jit_texture */
+ {
+ LLVMTypeRef elem_types[4];
+
+ elem_types[DRAW_JIT_TEXTURE_WIDTH] = LLVMInt32Type();
+ elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type();
+ elem_types[DRAW_JIT_TEXTURE_STRIDE] = LLVMInt32Type();
+ elem_types[DRAW_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0);
+
+ texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);
+
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_WIDTH);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_HEIGHT);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, stride,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_STRIDE);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_DATA);
+ LP_CHECK_STRUCT_SIZE(struct draw_jit_texture,
+ llvm->target, texture_type);
+
+ LLVMAddTypeName(llvm->module, "texture", texture_type);
+ }
+
+
+ /* struct draw_jit_context */
+ {
+ LLVMTypeRef elem_types[3];
+ LLVMTypeRef context_type;
+
+ elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
+ elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
+ elem_types[2] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */
+
+ context_type = LLVMStructType(elem_types, Elements(elem_types), 0);
+
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
+ llvm->target, context_type, 0);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants,
+ llvm->target, context_type, 1);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,
+ llvm->target, context_type,
+ DRAW_JIT_CONTEXT_TEXTURES_INDEX);
+ LP_CHECK_STRUCT_SIZE(struct draw_jit_context,
+ llvm->target, context_type);
+
+ LLVMAddTypeName(llvm->module, "context", context_type);
+
+ llvm->context_ptr_type = LLVMPointerType(context_type, 0);
+ }
+}
+
+struct draw_llvm *
+draw_llvm_create(struct draw_context *draw)
+{
+ struct draw_llvm *llvm = CALLOC_STRUCT( draw_llvm );
+
+ util_cpu_detect();
+
+ llvm->draw = draw;
+ llvm->engine = draw->engine;
+
+ debug_assert(llvm->engine);
+
+ llvm->module = LLVMModuleCreateWithName("draw_llvm");
+ llvm->provider = LLVMCreateModuleProviderForExistingModule(llvm->module);
+
+ LLVMAddModuleProvider(llvm->engine, llvm->provider);
+
+ llvm->target = LLVMGetExecutionEngineTargetData(llvm->engine);
+
+ llvm->pass = LLVMCreateFunctionPassManager(llvm->provider);
+ LLVMAddTargetData(llvm->target, llvm->pass);
+ /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
+ * but there are more on SVN. */
+ /* TODO: Add more passes */
+ LLVMAddConstantPropagationPass(llvm->pass);
+ if(util_cpu_caps.has_sse4_1) {
+ /* FIXME: There is a bug in this pass, whereby the combination of fptosi
+ * and sitofp (necessary for trunc/floor/ceil/round implementation)
+ * somehow becomes invalid code.
+ */
+ LLVMAddInstructionCombiningPass(llvm->pass);
+ }
+ LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
+ LLVMAddGVNPass(llvm->pass);
+ LLVMAddCFGSimplificationPass(llvm->pass);
+
+ init_globals(llvm);
+
+
+#if 1
+ LLVMDumpModule(llvm->module);
+#endif
+
+ return llvm;
+}
+
+void
+draw_llvm_destroy(struct draw_llvm *llvm)
+{
+ free(llvm);
+}
+
+void
+draw_llvm_prepare(struct draw_llvm *llvm)
+{
+}
+
+
+struct draw_context *draw_create_with_llvm(LLVMExecutionEngineRef engine)
+{
+ struct draw_context *draw = CALLOC_STRUCT( draw_context );
+ if (draw == NULL)
+ goto fail;
+ draw->engine = engine;
+
+ if (!draw_init(draw))
+ goto fail;
+
+ return draw;
+
+fail:
+ draw_destroy( draw );
+ return NULL;
+}
+
+static void
+generate_vs(struct draw_llvm *llvm,
+ LLVMBuilderRef builder,
+ LLVMValueRef context_ptr,
+ LLVMValueRef io)
+{
+ const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
+ struct lp_type vs_type = lp_type_float(32);
+ LLVMValueRef vs_consts;
+ const LLVMValueRef (*inputs)[NUM_CHANNELS];
+ LLVMValueRef (*outputs)[NUM_CHANNELS];
+
+ lp_build_tgsi_soa(builder,
+ tokens,
+ vs_type,
+ NULL /*struct lp_build_mask_context *mask*/,
+ vs_consts,
+ NULL /*pos*/,
+ inputs,
+ outputs,
+ NULL/*sampler*/);
+}
+
+void
+draw_llvm_generate(struct draw_llvm *llvm)
+{
+ LLVMTypeRef arg_types[5];
+ LLVMTypeRef func_type;
+ LLVMValueRef context_ptr;
+ LLVMBasicBlockRef block;
+ LLVMBuilderRef builder;
+ LLVMValueRef function;
+ LLVMValueRef start, end, count, stride, step;
+ LLVMValueRef io_ptr;
+ unsigned i;
+ unsigned chan;
+ struct lp_build_context bld;
+ struct lp_build_loop_state lp_loop;
+ struct lp_type vs_type = lp_type_float(32);
+
+ arg_types[0] = llvm->context_ptr_type; /* context */
+ arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */
+ arg_types[2] = LLVMInt32Type(); /* start */
+ arg_types[3] = LLVMInt32Type(); /* count */
+ arg_types[4] = LLVMInt32Type(); /* stride */
+
+ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
+
+ function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type);
+ LLVMSetFunctionCallConv(function, LLVMCCallConv);
+ for(i = 0; i < Elements(arg_types); ++i)
+ if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
+ LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute);
+
+ context_ptr = LLVMGetParam(function, 0);
+ io_ptr = LLVMGetParam(function, 1);
+ start = LLVMGetParam(function, 2);
+ count = LLVMGetParam(function, 3);
+ stride = LLVMGetParam(function, 4);
+
+ lp_build_name(context_ptr, "context");
+ lp_build_name(io_ptr, "io");
+ lp_build_name(start, "start");
+ lp_build_name(count, "count");
+ lp_build_name(stride, "stride");
+
+ /*
+ * Function body
+ */
+
+ block = LLVMAppendBasicBlock(function, "entry");
+ builder = LLVMCreateBuilder();
+ LLVMPositionBuilderAtEnd(builder, block);
+
+ lp_build_context_init(&bld, builder, vs_type);
+
+ end = lp_build_add(&bld, start, count);
+
+ step = LLVMConstInt(LLVMInt32Type(), 1, 0);
+ lp_build_loop_begin(builder, start, &lp_loop);
+ {
+ LLVMValueRef io = LLVMBuildGEP(builder, io_ptr, &lp_loop.counter, 1, "");
+
+ generate_vs(llvm,
+ builder,
+ context_ptr,
+ io);
+ }
+ lp_build_loop_end(builder, end, step, &lp_loop);
+
+
+ LLVMBuildRetVoid(builder);
+
+ LLVMDisposeBuilder(builder);
+
+ /*
+ * Translate the LLVM IR into machine code.
+ */
+
+#ifdef DEBUG
+ if(LLVMVerifyFunction(function, LLVMPrintMessageAction)) {
+ LLVMDumpValue(function);
+ assert(0);
+ }
+#endif
+
+ LLVMRunFunctionPassManager(llvm->pass, function);
+
+ if (1) {
+ LLVMDumpValue(function);
+ debug_printf("\n");
+ }
+
+ llvm->jit_func = (draw_jit_vert_func)LLVMGetPointerToGlobal(llvm->draw->engine, function);
+
+ if (1)
+ lp_disassemble(llvm->jit_func);
+}
diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h
new file mode 100644
index 0000000000..0a1845f1fb
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -0,0 +1,154 @@
+#ifndef DRAW_LLVM_H
+#define DRAW_LLVM_H
+
+#include "draw/draw_private.h"
+
+#include "pipe/p_context.h"
+
+#include <llvm-c/Core.h>
+#include <llvm-c/Analysis.h>
+#include <llvm-c/Target.h>
+#include <llvm-c/ExecutionEngine.h>
+
+struct draw_jit_texture
+{
+ uint32_t width;
+ uint32_t height;
+ uint32_t stride;
+ const void *data;
+};
+
+enum {
+ DRAW_JIT_TEXTURE_WIDTH = 0,
+ DRAW_JIT_TEXTURE_HEIGHT,
+ DRAW_JIT_TEXTURE_STRIDE,
+ DRAW_JIT_TEXTURE_DATA
+};
+
+enum {
+ DRAW_JIT_VERTEX_VERTEX_ID = 0,
+ DRAW_JIT_VERTEX_CLIP,
+ DRAW_JIT_VERTEX_DATA
+};
+
+/**
+ * This structure is passed directly to the generated vertex shader.
+ *
+ * It contains the derived state.
+ *
+ * Changes here must be reflected in the draw_jit_context_* macros.
+ * Changes to the ordering should be avoided.
+ *
+ * Only use types with a clear size and padding here, in particular prefer the
+ * stdint.h types to the basic integer types.
+ */
+struct draw_jit_context
+{
+ const float *vs_constants;
+ const float *gs_constants;
+
+
+ struct draw_jit_texture textures[PIPE_MAX_SAMPLERS];
+};
+
+
+#define draw_jit_context_vs_constants(_builder, _ptr) \
+ lp_build_struct_get(_builder, _ptr, 0, "vs_constants")
+
+#define draw_jit_context_gs_constants(_builder, _ptr) \
+ lp_build_struct_get(_builder, _ptr, 1, "gs_constants")
+
+#define DRAW_JIT_CONTEXT_TEXTURES_INDEX 2
+
+#define draw_jit_context_textures(_builder, _ptr) \
+ lp_build_struct_get_ptr(_builder, _ptr, DRAW_JIT_CONTEXT_TEXTURES_INDEX, "textures")
+
+/* we are construction a function of the form:
+
+struct vertex_header {
+ uint32 vertex_id;
+
+ float clip[4];
+ float data[][4];
+};
+
+struct draw_jit_context
+{
+ const float *vs_constants;
+ const float *gs_constants;
+
+ struct draw_jit_texture textures[PIPE_MAX_SAMPLERS];
+ const void *vbuffers;
+};
+
+void
+draw_shader(struct draw_jit_context *context,
+ struct vertex_header *io,
+ unsigned start,
+ unsigned count,
+ unsigned stride)
+{
+ // do a fetch and a run vertex shader
+ for (int i = 0; i < count; ++i) {
+ struct vertex_header *header = &io[i];
+ header->vertex_id = 0xffff;
+ // follows code-genarted fetch/translate section
+ // for each vertex_element ...
+ codegened_translate(header->data[num_element],
+ context->vertex_elements[num_element],
+ context->vertex_buffers,
+ context->vbuffers);
+
+ codegened_vertex_shader(header->data, context->vs_constants);
+ }
+
+ for (int i = 0; i < count; i += context->primitive_size) {
+ struct vertex_header *prim[MAX_PRIMITIVE_SIZE];
+ for (int j = 0; j < context->primitive_size; ++j) {
+ header[j] = &io[i + j];
+ }
+ codegened_geometry_shader(prim, gs_constants);
+ }
+}
+*/
+
+typedef void
+(*draw_jit_vert_func)(struct draw_jit_context *context,
+ struct vertex_header *io,
+ unsigned start,
+ unsigned count,
+ unsigned stride);
+
+struct draw_llvm {
+ struct draw_context *draw;
+
+ struct draw_jit_context jit_context;
+
+ draw_jit_vert_func jit_func;
+
+ LLVMModuleRef module;
+ LLVMExecutionEngineRef engine;
+ LLVMModuleProviderRef provider;
+ LLVMTargetDataRef target;
+ LLVMPassManagerRef pass;
+
+ LLVMTypeRef context_ptr_type;
+ LLVMTypeRef vertex_header_ptr_type;
+};
+
+
+struct draw_llvm *
+draw_llvm_create(struct draw_context *draw);
+
+void
+draw_llvm_destroy(struct draw_llvm *llvm);
+
+void
+draw_llvm_prepare(struct draw_llvm *llvm);
+
+/* generates the draw jit function */
+void
+draw_llvm_generate(struct draw_llvm *llvm);
+
+
+#endif
diff --git a/src/gallium/auxiliary/draw/draw_llvm_translate.c b/src/gallium/auxiliary/draw/draw_llvm_translate.c
new file mode 100644
index 0000000000..588e97b29c
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_llvm_translate.c
@@ -0,0 +1,653 @@
+
+
+
+#include "util/u_memory.h"
+#include "pipe/p_state.h"
+#include "translate.h"
+
+
+#define DRAW_DBG 0
+
+typedef void (*fetch_func)(const void *ptr, float *attrib);
+typedef void (*emit_func)(const float *attrib, void *ptr);
+
+
+
+struct translate_generic {
+ struct translate translate;
+
+ struct {
+ enum translate_element_type type;
+
+ fetch_func fetch;
+ unsigned buffer;
+ unsigned input_offset;
+ unsigned instance_divisor;
+
+ emit_func emit;
+ unsigned output_offset;
+
+ char *input_ptr;
+ unsigned input_stride;
+
+ } attrib[PIPE_MAX_ATTRIBS];
+
+ unsigned nr_attrib;
+};
+
+
+static struct translate_generic *translate_generic( struct translate *translate )
+{
+ return (struct translate_generic *)translate;
+}
+
+/**
+ * Fetch a float[4] vertex attribute from memory, doing format/type
+ * conversion as needed.
+ *
+ * This is probably needed/dupliocated elsewhere, eg format
+ * conversion, texture sampling etc.
+ */
+#define ATTRIB( NAME, SZ, TYPE, FROM, TO ) \
+static void \
+fetch_##NAME(const void *ptr, float *attrib) \
+{ \
+ const float defaults[4] = { 0.0f,0.0f,0.0f,1.0f }; \
+ unsigned i; \
+ \
+ for (i = 0; i < SZ; i++) { \
+ attrib[i] = FROM(i); \
+ } \
+ \
+ for (; i < 4; i++) { \
+ attrib[i] = defaults[i]; \
+ } \
+} \
+ \
+static void \
+emit_##NAME(const float *attrib, void *ptr) \
+{ \
+ unsigned i; \
+ TYPE *out = (TYPE *)ptr; \
+ \
+ for (i = 0; i < SZ; i++) { \
+ out[i] = TO(attrib[i]); \
+ } \
+}
+
+{
+
+ return conv = instr(builder, bc, "");
+}
+
+static INLINE LLVMValueRef
+from_64_float(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+ LLVMDoubleType() , "");
+ LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+ return LLVMBuildFPTrunc(builder, l, LLVMFloatType(), "");
+}
+
+static INLINE LLVMValueRef
+from_32_float(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+ LLVMFloatType() , "");
+ return LLVMBuildLoad(builder, bc, "");
+}
+
+static INLINE LLVMValueRef
+from_8_uscaled(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, val, "");
+ return LLVMBuildUIToFP(builder, l, LLVMFloatType(), "");
+}
+
+static INLINE LLVMValueRef
+from_16_uscaled(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+ LLVMIntType(16) , "");
+ LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+ return LLVMBuildUIToFP(builder, l, LLVMFloatType(), "");
+}
+
+static INLINE LLVMValueRef
+from_32_uscaled(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+ LLVMIntType(32) , "");
+ LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+ return LLVMBuildUIToFP(builder, l, LLVMFloatType(), "");
+}
+
+static INLINE LLVMValueRef
+from_8_sscaled(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, val, "");
+ return LLVMBuildSIToFP(builder, l, LLVMFloatType(), "");
+}
+
+static INLINE LLVMValueRef
+from_16_sscaled(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+ LLVMIntType(16) , "");
+ LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+ return LLVMBuildSIToFP(builder, l, LLVMFloatType(), "");
+}
+
+static INLINE LLVMValueRef
+from_32_sscaled(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+ LLVMIntType(32) , "");
+ LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+ return LLVMBuildSIToFP(builder, l, LLVMFloatType(), "");
+}
+
+
+static INLINE LLVMValueRef
+from_8_unorm(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, val, "");
+ LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), "");
+ return LLVMBuildFDiv(builder, uscaled,
+ LLVMConstReal(builder, 255.));
+}
+
+static INLINE LLVMValueRef
+from_16_unorm(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+ LLVMIntType(16) , "");
+ LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+ LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), "");
+ return LLVMBuildFDiv(builder, uscaled,
+ LLVMConstReal(builder, 65535.));
+}
+
+static INLINE LLVMValueRef
+from_32_unorm(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+ LLVMIntType(32) , "");
+ LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+ LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), "");
+
+ return LLVMBuildFDiv(builder, uscaled,
+ LLVMConstReal(builder, 4294967295.));
+}
+
+static INLINE LLVMValueRef
+from_8_snorm(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, val, "");
+ LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), "");
+ return LLVMBuildFDiv(builder, uscaled,
+ LLVMConstReal(builder, 127.0));
+}
+
+static INLINE LLVMValueRef
+from_16_snorm(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+ LLVMIntType(16) , "");
+ LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+ LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), "");
+ return LLVMBuildFDiv(builder, uscaled,
+ LLVMConstReal(builder, 32767.0f));
+}
+
+static INLINE LLVMValueRef
+from_32_snorm(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+ LLVMIntType(32) , "");
+ LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+ LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), "");
+
+ return LLVMBuildFDiv(builder, uscaled,
+ LLVMConstReal(builder, 2147483647.0));
+}
+
+static INLINE LLVMValueRef
+from_32_fixed(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+ LLVMIntType(32) , "");
+ LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+ LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), "");
+
+ return LLVMBuildFDiv(builder, uscaled,
+ LLVMConstReal(builder, 65536.0));
+}
+
+static INLINE LLVMValueRef
+to_64_float(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ return LLVMBuildFPExt(builder, l, LLVMDoubleType(), "");
+}
+
+static INLINE LLVMValueRef
+to_32_float(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ return LLVMBuildLoad(builder, fp, "");
+}
+
+atic INLINE LLVMValueRef
+to_8_uscaled(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ return LLVMBuildFPToUI(builder, l, LLVMIntType(8), "");
+}
+
+static INLINE LLVMValueRef
+to_16_uscaled(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ return LLVMBuildFPToUI(builder, l, LLVMIntType(16), "");
+}
+
+static INLINE LLVMValueRef
+to_32_uscaled(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ return LLVMBuildFPToUI(builder, l, LLVMIntType(32), "");
+}
+
+static INLINE LLVMValueRef
+to_8_sscaled(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ return LLVMBuildFPToSI(builder, l, LLVMIntType(8), "");
+}
+
+static INLINE LLVMValueRef
+to_16_sscaled(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ return LLVMBuildFPToSI(builder, l, LLVMIntType(16), "");
+}
+
+static INLINE LLVMValueRef
+to_32_sscaled(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ return LLVMBuildFPToSI(builder, l, LLVMIntType(32), "");
+}
+
+static INLINE LLVMValueRef
+to_8_unorm(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(8), "");
+ return LLVMBuildFMul(builder, uscaled,
+ LLVMConstReal(builder, 255.));
+}
+
+static INLINE LLVMValueRef
+to_16_unorm(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(32), "");
+ return LLVMBuildFMul(builder, uscaled,
+ LLVMConstReal(builder, 65535.));
+}
+
+static INLINE LLVMValueRef
+to_32_unorm(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(32), "");
+
+ return LLVMBuildFMul(builder, uscaled,
+ LLVMConstReal(builder, 4294967295.));
+}
+
+static INLINE LLVMValueRef
+to_8_snorm(LLVMBuilderRef builder, LLVMValueRef val)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, val, "");
+ LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(8), "");
+ return LLVMBuildFMUL(builder, uscaled,
+ LLVMConstReal(builder, 127.0));
+}
+
+static INLINE LLVMValueRef
+to_16_snorm(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(16), "");
+ return LLVMBuildFMul(builder, uscaled,
+ LLVMConstReal(builder, 32767.0f));
+}
+
+static INLINE LLVMValueRef
+to_32_snorm(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(32), "");
+
+ return LLVMBuildFMUL(builder, uscaled,
+ LLVMConstReal(builder, 2147483647.0));
+}
+
+static INLINE LLVMValueRef
+to_32_fixed(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+ LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+ LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(32), "");
+
+ return LLVMBuildFMul(builder, uscaled,
+ LLVMConstReal(builder, 65536.0));
+}
+
+static LLVMValueRef
+fetch(LLVMValueRef ptr, int val_size, int nr_components,
+ LLVMValueRef res)
+{
+ int i;
+ int offset = 0;
+
+ for (i = 0; i < nr_components; ++i) {
+ LLVMValueRef src_index = LLVMConstInt(LLVMInt32Type(), offset, 0);
+ LLVMValueRef dst_index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ //getelementptr i8* ptr, i64 offset
+ LLVMValueRef src_tmp = LLVMBuildGEP(builder, ptr, &src_index, 1, "");
+ //getelementptr float* res, i64 i
+ LLVMValueRef res_tmp = LLVMBuildGEP(builder, res, &dst_index, 1, "");
+ //bitcast i8* src, to res_type*
+ //load res_type src
+ //convert res_type src to float
+ //store float src, float *dst src
+ offset += val_size;
+ }
+}
+
+
+static void
+fetch_B8G8R8A8_UNORM(const void *ptr, float *attrib)
+{
+ attrib[2] = FROM_8_UNORM(0);
+ attrib[1] = FROM_8_UNORM(1);
+ attrib[0] = FROM_8_UNORM(2);
+ attrib[3] = FROM_8_UNORM(3);
+}
+
+static void
+emit_B8G8R8A8_UNORM( const float *attrib, void *ptr)
+{
+ ubyte *out = (ubyte *)ptr;
+ out[2] = TO_8_UNORM(attrib[0]);
+ out[1] = TO_8_UNORM(attrib[1]);
+ out[0] = TO_8_UNORM(attrib[2]);
+ out[3] = TO_8_UNORM(attrib[3]);
+}
+
+static void
+fetch_NULL( const void *ptr, float *attrib )
+{
+ attrib[0] = 0;
+ attrib[1] = 0;
+ attrib[2] = 0;
+ attrib[3] = 1;
+}
+
+static void
+emit_NULL( const float *attrib, void *ptr )
+{
+ /* do nothing is the only sensible option */
+}
+
+typedef LLVMValueRef (*from_func)(LLVMBuilderRef, LLVMValueRef);
+typedef LLVMValueRef (*to_func)(LLVMBuilderRef, LLVMValueRef);
+
+struct draw_llvm_translate {
+ int format;
+ from_func from;
+ to_func to;
+ LLVMTypeRef type;
+ int num_components;
+} translates[] =
+{
+ {PIPE_FORMAT_R64_FLOAT, from_64_float, to_64_float, LLVMDoubleType(), 1},
+ {PIPE_FORMAT_R64G64_FLOAT, from_64_float, to_64_float, LLVMDoubleType(), 2},
+ {PIPE_FORMAT_R64G64B64_FLOAT, from_64_float, to_64_float, LLVMDoubleType(), 3},
+ {PIPE_FORMAT_R64G64B64A64_FLOAT, from_64_float, to_64_float, LLVMDoubleType(), 4},
+
+ {PIPE_FORMAT_R32_FLOAT, from_32_float, to_32_float, LLVMFloatType(), 1},
+ {PIPE_FORMAT_R32G32_FLOAT, from_32_float, to_32_float, LLVMFloatType(), 2},
+ {PIPE_FORMAT_R32G32B32_FLOAT, from_32_float, to_32_float, LLVMFloatType(), 3},
+ {PIPE_FORMAT_R32G32B32A32_FLOAT, from_32_float, to_32_float, LLVMFloatType(), 4},
+
+ {PIPE_FORMAT_R32_UNORM, from_32_unorm, to_32_unorm, LLVMIntType(32), 1},
+ {PIPE_FORMAT_R32G32_UNORM, from_32_unorm, to_32_unorm, LLVMIntType(32), 2},
+ {PIPE_FORMAT_R32G32B32_UNORM, from_32_unorm, to_32_unorm, LLVMIntType(32), 3},
+ {PIPE_FORMAT_R32G32B32A32_UNORM, from_32_unorm, to_32_unorm, LLVMIntType(32), 4},
+
+ {PIPE_FORMAT_R32_USCALED, from_32_uscaled, to_32_uscaled, LLVMIntType(32), 1},
+ {PIPE_FORMAT_R32G32_USCALED, from_32_uscaled, to_32_uscaled, LLVMIntType(32), 2},
+ {PIPE_FORMAT_R32G32B32_USCALED, from_32_uscaled, to_32_uscaled, LLVMIntType(32), 3},
+ {PIPE_FORMAT_R32G32B32A32_USCALED, from_32_uscaled, to_32_uscaled, LLVMIntType(32), 4},
+
+ {PIPE_FORMAT_R32_SNORM, from_32_snorm, to_32_snorm, LLVMIntType(32), 1},
+ {PIPE_FORMAT_R32G32_SNORM, from_32_snorm, to_32_snorm, LLVMIntType(32), 2},
+ {PIPE_FORMAT_R32G32B32_SNORM, from_32_snorm, to_32_snorm, LLVMIntType(32), 3},
+ {PIPE_FORMAT_R32G32B32A32_SNORM, from_32_snorm, to_32_snorm, LLVMIntType(32), 4},
+
+ {PIPE_FORMAT_R32_SSCALED, from_32_sscaled, to_32_sscaled, LLVMIntType(32), 1},
+ {PIPE_FORMAT_R32G32_SSCALED, from_32_sscaled, to_32_sscaled, LLVMIntType(32), 2},
+ {PIPE_FORMAT_R32G32B32_SSCALED, from_32_sscaled, to_32_sscaled, LLVMIntType(32), 3},
+ {PIPE_FORMAT_R32G32B32A32_SSCALED, from_32_sscaled, to_32_sscaled, LLVMIntType(32), 4},
+
+ {PIPE_FORMAT_R16_UNORM, from_16_unorm, to_16_unorm, LLVMIntType(16), 1},
+ {PIPE_FORMAT_R16G16_UNORM, from_16_unorm, to_16_unorm, LLVMIntType(16), 2},
+ {PIPE_FORMAT_R16G16B16_UNORM, from_16_unorm, to_16_unorm, LLVMIntType(16), 3},
+ {PIPE_FORMAT_R16G16B16A16_UNORM, from_16_unorm, to_16_unorm, LLVMIntType(16), 4},
+
+ {PIPE_FORMAT_R16_USCALED, from_16_uscaled, to_16_uscaled, LLVMIntType(16), 1},
+ {PIPE_FORMAT_R16G16_USCALED, from_16_uscaled, to_16_uscaled, LLVMIntType(16), 2},
+ {PIPE_FORMAT_R16G16B16_USCALED, from_16_uscaled, to_16_uscaled, LLVMIntType(16), 3},
+ {PIPE_FORMAT_R16G16B16A16_USCALED, from_16_uscaled, to_16_uscaled, LLVMIntType(16), 4},
+
+ {PIPE_FORMAT_R16_SNORM, from_16_snorm, to_16_snorm, LLVMIntType(16), 1},
+ {PIPE_FORMAT_R16G16_SNORM, from_16_snorm, to_16_snorm, LLVMIntType(16), 2},
+ {PIPE_FORMAT_R16G16B16_SNORM, from_16_snorm, to_16_snorm, LLVMIntType(16), 3},
+ {PIPE_FORMAT_R16G16B16A16_SNORM, from_16_snorm, to_16_snorm, LLVMIntType(16), 4},
+
+ {PIPE_FORMAT_R16_SSCALED, from_16_sscaled, to_16_sscaled, LLVMIntType(16), 1},
+ {PIPE_FORMAT_R16G16_SSCALED, from_16_sscaled, to_16_sscaled, LLVMIntType(16), 2},
+ {PIPE_FORMAT_R16G16B16_SSCALED, from_16_sscaled, to_16_sscaled, LLVMIntType(16), 3},
+ {PIPE_FORMAT_R16G16B16A16_SSCALED, from_16_sscaled, to_16_sscaled, LLVMIntType(16), 4},
+
+ {PIPE_FORMAT_R8_UNORM, from_8_unorm, to_8_unorm, LLVMIntType(8), 1},
+ {PIPE_FORMAT_R8G8_UNORM, from_8_unorm, to_8_unorm, LLVMIntType(8), 2},
+ {PIPE_FORMAT_R8G8B8_UNORM, from_8_unorm, to_8_unorm, LLVMIntType(8), 3},
+ {PIPE_FORMAT_R8G8B8A8_UNORM, from_8_unorm, to_8_unorm, LLVMIntType(8), 4},
+
+ {PIPE_FORMAT_R8_USCALED, from_8_uscaled, to_8_uscaled, LLVMIntType(8), 1},
+ {PIPE_FORMAT_R8G8_USCALED, from_8_uscaled, to_8_uscaled, LLVMIntType(8), 2},
+ {PIPE_FORMAT_R8G8B8_USCALED, from_8_uscaled, to_8_uscaled, LLVMIntType(8), 3},
+ {PIPE_FORMAT_R8G8B8A8_USCALED, from_8_uscaled, to_8_uscaled, LLVMIntType(8), 4},
+
+ {PIPE_FORMAT_R8_SNORM, from_8_snorm, to_8_snorm, LLVMIntType(8), 1},
+ {PIPE_FORMAT_R8G8_SNORM, from_8_snorm, to_8_snorm, LLVMIntType(8), 2},
+ {PIPE_FORMAT_R8G8B8_SNORM, from_8_snorm, to_8_snorm, LLVMIntType(8), 3},
+ {PIPE_FORMAT_R8G8B8A8_SNORM, from_8_snorm, to_8_snorm, LLVMIntType(8), 4},
+
+ {PIPE_FORMAT_R8_SSCALED, from_8_sscaled, to_8_sscaled, LLVMIntType(8), 1},
+ {PIPE_FORMAT_R8G8_SSCALED, from_8_sscaled, to_8_sscaled, LLVMIntType(8), 2},
+ {PIPE_FORMAT_R8G8B8_SSCALED, from_8_sscaled, to_8_sscaled, LLVMIntType(8), 3},
+ {PIPE_FORMAT_R8G8B8A8_SSCALED, from_8_sscaled, to_8_sscaled, LLVMIntType(8), 4},
+
+ {PIPE_FORMAT_R32_FIXED, from_32_fixed, to_32_fixed, LLVMIntType(32), 1},
+ {PIPE_FORMAT_R32G32_FIXED, from_32_fixed, to_32_fixed, LLVMIntType(32), 2},
+ {PIPE_FORMAT_R32G32B32_FIXED, from_32_fixed, to_32_fixed, LLVMIntType(32), 3},
+ {PIPE_FORMAT_R32G32B32A32_FIXED, from_32_fixed, to_32_fixed, LLVMIntType(32), 4},
+
+ {PIPE_FORMAT_A8R8G8B8_UNORM, from_8_unorm, to_8_unorm, LLVMIntType(8), 4},
+ {PIPE_FORMAT_B8G8R8A8_UNORM, from_8_unorm, to_8_unorm, LLVMIntType(), 4},
+};
+
+/**
+ * Fetch vertex attributes for 'count' vertices.
+ */
+static void PIPE_CDECL generic_run_elts( struct translate *translate,
+ const unsigned *elts,
+ unsigned count,
+ unsigned instance_id,
+ void *output_buffer )
+{
+ struct translate_generic *tg = translate_generic(translate);
+ char *vert = output_buffer;
+ unsigned nr_attrs = tg->nr_attrib;
+ unsigned attr;
+ unsigned i;
+
+ /* loop over vertex attributes (vertex shader inputs)
+ */
+ for (i = 0; i < count; i++) {
+ unsigned elt = *elts++;
+
+ for (attr = 0; attr < nr_attrs; attr++) {
+ float data[4];
+ const char *src;
+
+ char *dst = (vert +
+ tg->attrib[attr].output_offset);
+
+ if (tg->attrib[attr].instance_divisor) {
+ src = tg->attrib[attr].input_ptr +
+ tg->attrib[attr].input_stride *
+ (instance_id / tg->attrib[attr].instance_divisor);
+ } else {
+ src = tg->attrib[attr].input_ptr +
+ tg->attrib[attr].input_stride * elt;
+ }
+
+ tg->attrib[attr].fetch( src, data );
+
+ if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n",
+ i, elt, attr, data[0], data[1], data[2], data[3]);
+
+ tg->attrib[attr].emit( data, dst );
+ }
+
+ vert += tg->translate.key.output_stride;
+ }
+}
+
+
+
+static void PIPE_CDECL generic_run( struct translate *translate,
+ unsigned start,
+ unsigned count,
+ unsigned instance_id,
+ void *output_buffer )
+{
+ struct translate_generic *tg = translate_generic(translate);
+ char *vert = output_buffer;
+ unsigned nr_attrs = tg->nr_attrib;
+ unsigned attr;
+ unsigned i;
+
+ /* loop over vertex attributes (vertex shader inputs)
+ */
+ for (i = 0; i < count; i++) {
+ unsigned elt = start + i;
+
+ for (attr = 0; attr < nr_attrs; attr++) {
+ float data[4];
+
+ char *dst = (vert +
+ tg->attrib[attr].output_offset);
+
+ if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
+ const char *src;
+
+ if (tg->attrib[attr].instance_divisor) {
+ src = tg->attrib[attr].input_ptr +
+ tg->attrib[attr].input_stride *
+ (instance_id / tg->attrib[attr].instance_divisor);
+ } else {
+ src = tg->attrib[attr].input_ptr +
+ tg->attrib[attr].input_stride * elt;
+ }
+
+ tg->attrib[attr].fetch( src, data );
+ } else {
+ data[0] = (float)instance_id;
+ }
+
+ if (0) debug_printf("vert %d attr %d: %f %f %f %f\n",
+ i, attr, data[0], data[1], data[2], data[3]);
+
+ tg->attrib[attr].emit( data, dst );
+ }
+
+ vert += tg->translate.key.output_stride;
+ }
+}
+
+
+
+static void generic_set_buffer( struct translate *translate,
+ unsigned buf,
+ const void *ptr,
+ unsigned stride )
+{
+ struct translate_generic *tg = translate_generic(translate);
+ unsigned i;
+
+ for (i = 0; i < tg->nr_attrib; i++) {
+ if (tg->attrib[i].buffer == buf) {
+ tg->attrib[i].input_ptr = ((char *)ptr +
+ tg->attrib[i].input_offset);
+ tg->attrib[i].input_stride = stride;
+ }
+ }
+}
+
+
+static void generic_release( struct translate *translate )
+{
+ /* Refcount?
+ */
+ FREE(translate);
+}
+
+struct translate *translate_generic_create( const struct translate_key *key )
+{
+ struct translate_generic *tg = CALLOC_STRUCT(translate_generic);
+ unsigned i;
+
+ if (tg == NULL)
+ return NULL;
+
+ tg->translate.key = *key;
+ tg->translate.release = generic_release;
+ tg->translate.set_buffer = generic_set_buffer;
+ tg->translate.run_elts = generic_run_elts;
+ tg->translate.run = generic_run;
+
+ for (i = 0; i < key->nr_elements; i++) {
+ tg->attrib[i].type = key->element[i].type;
+
+ tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format);
+ tg->attrib[i].buffer = key->element[i].input_buffer;
+ tg->attrib[i].input_offset = key->element[i].input_offset;
+ tg->attrib[i].instance_divisor = key->element[i].instance_divisor;
+
+ tg->attrib[i].emit = get_emit_func(key->element[i].output_format);
+ tg->attrib[i].output_offset = key->element[i].output_offset;
+
+ }
+
+ tg->nr_attrib = key->nr_elements;
+
+
+ return &tg->translate;
+}
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 1e6e01af9e..7e24e5fd6f 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -46,6 +46,10 @@
#include "tgsi/tgsi_scan.h"
+#ifdef DRAW_LLVM
+#include <llvm-c/ExecutionEngine.h>
+#endif
+
struct pipe_context;
struct draw_vertex_shader;
@@ -237,9 +241,16 @@ struct draw_context
unsigned instance_id;
+#ifdef DRAW_LLVM
+ LLVMExecutionEngineRef engine;
+#endif
void *driver_private;
};
+/*******************************************************************************
+ * Draw common initialization code
+ */
+boolean draw_init(struct draw_context *draw);
/*******************************************************************************
* Vertex shader code:
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 341353f628..9b1e319551 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -142,7 +142,9 @@ boolean draw_pt_init( struct draw_context *draw )
if (!draw->pt.middle.fetch_shade_emit)
return FALSE;
- draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw );
+ draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit_llvm( draw );
+ if (!draw->pt.middle.general)
+ draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw );
if (!draw->pt.middle.general)
return FALSE;
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index d5e0d92a60..c2797a759e 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -147,6 +147,7 @@ struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw);
struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw );
struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw );
struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw);
+struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm(struct draw_context *draw);
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
new file mode 100644
index 0000000000..ae5956333e
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -0,0 +1,432 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMWare, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "draw/draw_context.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pt.h"
+#include "draw/draw_vs.h"
+#include "draw/draw_gs.h"
+#include "draw/draw_llvm.h"
+
+#include "translate/translate.h"
+
+
+struct llvm_middle_end {
+ struct draw_pt_middle_end base;
+ struct draw_context *draw;
+
+ struct pt_emit *emit;
+ struct pt_fetch *fetch;
+ struct pt_post_vs *post_vs;
+
+
+ unsigned vertex_data_offset;
+ unsigned vertex_size;
+ unsigned prim;
+ unsigned opt;
+
+ struct draw_llvm *llvm;
+};
+
+
+static void
+llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
+ unsigned prim,
+ unsigned opt,
+ unsigned *max_vertices )
+{
+ struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
+ struct draw_context *draw = fpme->draw;
+ struct draw_vertex_shader *vs = draw->vs.vertex_shader;
+ struct draw_geometry_shader *gs = draw->gs.geometry_shader;
+ unsigned i;
+ unsigned instance_id_index = ~0;
+
+ /* Add one to num_outputs because the pipeline occasionally tags on
+ * an additional texcoord, eg for AA lines.
+ */
+ unsigned nr = MAX2( vs->info.num_inputs,
+ vs->info.num_outputs + 1 );
+
+ /* Scan for instanceID system value.
+ */
+ for (i = 0; i < vs->info.num_inputs; i++) {
+ if (vs->info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) {
+ instance_id_index = i;
+ break;
+ }
+ }
+
+ fpme->prim = prim;
+ fpme->opt = opt;
+
+ /* Always leave room for the vertex header whether we need it or
+ * not. It's hard to get rid of it in particular because of the
+ * viewport code in draw_pt_post_vs.c.
+ */
+ fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
+
+
+
+ draw_pt_fetch_prepare( fpme->fetch,
+ vs->info.num_inputs,
+ fpme->vertex_size,
+ instance_id_index );
+ if (opt & PT_SHADE) {
+ vs->prepare(vs, draw);
+ draw_geometry_shader_prepare(gs, draw);
+ }
+
+
+ /* XXX: it's not really gl rasterization rules we care about here,
+ * but gl vs dx9 clip spaces.
+ */
+ draw_pt_post_vs_prepare( fpme->post_vs,
+ (boolean)draw->bypass_clipping,
+ (boolean)(draw->identity_viewport ||
+ draw->rasterizer->bypass_vs_clip_and_viewport),
+ (boolean)draw->rasterizer->gl_rasterization_rules,
+ (draw->vs.edgeflag_output ? true : false) );
+
+ if (!(opt & PT_PIPELINE)) {
+ draw_pt_emit_prepare( fpme->emit,
+ prim,
+ max_vertices );
+
+ *max_vertices = MAX2( *max_vertices,
+ DRAW_PIPE_MAX_VERTICES );
+ }
+ else {
+ *max_vertices = DRAW_PIPE_MAX_VERTICES;
+ }
+
+ /* return even number */
+ *max_vertices = *max_vertices & ~1;
+
+ draw_llvm_prepare(fpme->llvm);
+}
+
+
+
+static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
+ const unsigned *fetch_elts,
+ unsigned fetch_count,
+ const ushort *draw_elts,
+ unsigned draw_count )
+{
+ struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
+ struct draw_context *draw = fpme->draw;
+ struct draw_vertex_shader *vshader = draw->vs.vertex_shader;
+ struct draw_geometry_shader *gshader = draw->gs.geometry_shader;
+ unsigned opt = fpme->opt;
+ unsigned alloc_count = align( fetch_count, 4 );
+
+ struct vertex_header *pipeline_verts =
+ (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
+
+ if (!pipeline_verts) {
+ /* Not much we can do here - just skip the rendering.
+ */
+ assert(0);
+ return;
+ }
+
+ /* Fetch into our vertex buffer
+ */
+ draw_pt_fetch_run( fpme->fetch,
+ fetch_elts,
+ fetch_count,
+ (char *)pipeline_verts );
+
+ /* Run the shader, note that this overwrites the data[] parts of
+ * the pipeline verts. If there is no shader, eg if
+ * bypass_vs_clip_and_viewport, then the inputs == outputs, and are
+ * already in the correct place.*/
+ if (opt & PT_SHADE)
+ {
+ vshader->run_linear(vshader,
+ (const float (*)[4])pipeline_verts->data,
+ ( float (*)[4])pipeline_verts->data,
+ draw->pt.user.vs_constants,
+ fetch_count,
+ fpme->vertex_size,
+ fpme->vertex_size);
+ if (gshader)
+ draw_geometry_shader_run(gshader,
+ (const float (*)[4])pipeline_verts->data,
+ ( float (*)[4])pipeline_verts->data,
+ draw->pt.user.gs_constants,
+ fetch_count,
+ fpme->vertex_size,
+ fpme->vertex_size);
+ }
+
+ if (draw_pt_post_vs_run( fpme->post_vs,
+ pipeline_verts,
+ fetch_count,
+ fpme->vertex_size ))
+ {
+ opt |= PT_PIPELINE;
+ }
+
+ /* Do we need to run the pipeline?
+ */
+ if (opt & PT_PIPELINE) {
+ draw_pipeline_run( fpme->draw,
+ fpme->prim,
+ pipeline_verts,
+ fetch_count,
+ fpme->vertex_size,
+ draw_elts,
+ draw_count );
+ }
+ else {
+ draw_pt_emit( fpme->emit,
+ (const float (*)[4])pipeline_verts->data,
+ fetch_count,
+ fpme->vertex_size,
+ draw_elts,
+ draw_count );
+ }
+
+
+ FREE(pipeline_verts);
+}
+
+
+static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle,
+ unsigned start,
+ unsigned count)
+{
+ struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
+ struct draw_context *draw = fpme->draw;
+ unsigned opt = fpme->opt;
+ unsigned alloc_count = align( count, 4 );
+
+ struct vertex_header *pipeline_verts =
+ (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
+
+ if (!pipeline_verts) {
+ /* Not much we can do here - just skip the rendering.
+ */
+ assert(0);
+ return;
+ }
+
+ fpme->llvm->jit_func( &fpme->llvm->jit_context,
+ pipeline_verts,
+ start,
+ count,
+ fpme->vertex_size );
+
+ if (draw_pt_post_vs_run( fpme->post_vs,
+ pipeline_verts,
+ count,
+ fpme->vertex_size ))
+ {
+ opt |= PT_PIPELINE;
+ }
+
+ /* Do we need to run the pipeline?
+ */
+ if (opt & PT_PIPELINE) {
+ draw_pipeline_run_linear( fpme->draw,
+ fpme->prim,
+ pipeline_verts,
+ count,
+ fpme->vertex_size);
+ }
+ else {
+ draw_pt_emit_linear( fpme->emit,
+ (const float (*)[4])pipeline_verts->data,
+ fpme->vertex_size,
+ count );
+ }
+
+ FREE(pipeline_verts);
+}
+
+
+
+static boolean
+llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle,
+ unsigned start,
+ unsigned count,
+ const ushort *draw_elts,
+ unsigned draw_count )
+{
+ struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
+ struct draw_context *draw = fpme->draw;
+ struct draw_vertex_shader *shader = draw->vs.vertex_shader;
+ struct draw_geometry_shader *geometry_shader = draw->gs.geometry_shader;
+ unsigned opt = fpme->opt;
+ unsigned alloc_count = align( count, 4 );
+
+ struct vertex_header *pipeline_verts =
+ (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
+
+ if (!pipeline_verts)
+ return FALSE;
+
+ /* Fetch into our vertex buffer
+ */
+ draw_pt_fetch_run_linear( fpme->fetch,
+ start,
+ count,
+ (char *)pipeline_verts );
+
+ /* Run the shader, note that this overwrites the data[] parts of
+ * the pipeline verts. If there is no shader, ie if
+ * bypass_vs_clip_and_viewport, then the inputs == outputs, and are
+ * already in the correct place.
+ */
+ if (opt & PT_SHADE)
+ {
+ shader->run_linear(shader,
+ (const float (*)[4])pipeline_verts->data,
+ ( float (*)[4])pipeline_verts->data,
+ draw->pt.user.vs_constants,
+ count,
+ fpme->vertex_size,
+ fpme->vertex_size);
+
+ if (geometry_shader)
+ draw_geometry_shader_run(geometry_shader,
+ (const float (*)[4])pipeline_verts->data,
+ ( float (*)[4])pipeline_verts->data,
+ draw->pt.user.gs_constants,
+ count,
+ fpme->vertex_size,
+ fpme->vertex_size);
+ }
+
+ if (draw_pt_post_vs_run( fpme->post_vs,
+ pipeline_verts,
+ count,
+ fpme->vertex_size ))
+ {
+ opt |= PT_PIPELINE;
+ }
+
+ /* Do we need to run the pipeline?
+ */
+ if (opt & PT_PIPELINE) {
+ draw_pipeline_run( fpme->draw,
+ fpme->prim,
+ pipeline_verts,
+ count,
+ fpme->vertex_size,
+ draw_elts,
+ draw_count );
+ }
+ else {
+ draw_pt_emit( fpme->emit,
+ (const float (*)[4])pipeline_verts->data,
+ count,
+ fpme->vertex_size,
+ draw_elts,
+ draw_count );
+ }
+
+ FREE(pipeline_verts);
+ return TRUE;
+}
+
+
+
+static void llvm_middle_end_finish( struct draw_pt_middle_end *middle )
+{
+ /* nothing to do */
+}
+
+static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle )
+{
+ struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
+
+ if (fpme->fetch)
+ draw_pt_fetch_destroy( fpme->fetch );
+
+ if (fpme->emit)
+ draw_pt_emit_destroy( fpme->emit );
+
+ if (fpme->post_vs)
+ draw_pt_post_vs_destroy( fpme->post_vs );
+
+ if (fpme->llvm)
+ draw_llvm_destroy( fpme->llvm );
+
+ FREE(middle);
+}
+
+
+struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm( struct draw_context *draw )
+{
+ struct llvm_middle_end *fpme = 0;
+
+ if (!draw->engine)
+ return NULL;
+
+ fpme = CALLOC_STRUCT( llvm_middle_end );
+ if (!fpme)
+ goto fail;
+
+ fpme->base.prepare = llvm_middle_end_prepare;
+ fpme->base.run = llvm_middle_end_run;
+ fpme->base.run_linear = llvm_middle_end_linear_run;
+ fpme->base.run_linear_elts = llvm_middle_end_linear_run_elts;
+ fpme->base.finish = llvm_middle_end_finish;
+ fpme->base.destroy = llvm_middle_end_destroy;
+
+ fpme->draw = draw;
+
+ fpme->fetch = draw_pt_fetch_create( draw );
+ if (!fpme->fetch)
+ goto fail;
+
+ fpme->post_vs = draw_pt_post_vs_create( draw );
+ if (!fpme->post_vs)
+ goto fail;
+
+ fpme->emit = draw_pt_emit_create( draw );
+ if (!fpme->emit)
+ goto fail;
+
+ fpme->llvm = draw_llvm_create(draw);
+ if (!fpme->llvm)
+ goto fail;
+
+ return &fpme->base;
+
+ fail:
+ if (fpme)
+ llvm_middle_end_destroy( &fpme->base );
+
+ return NULL;
+}
diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
index 5f7a645f5d..0c483de407 100644
--- a/src/gallium/auxiliary/draw/draw_vs_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -40,7 +40,7 @@
#include "tgsi/tgsi_parse.h"
-#ifdef MESA_LLVM
+#ifdef DRAW_LLVM
struct draw_llvm_vertex_shader {
struct draw_vertex_shader base;
@@ -64,12 +64,8 @@ vs_llvm_run_linear( struct draw_vertex_shader *base,
unsigned input_stride,
unsigned output_stride )
{
- struct draw_llvm_vertex_shader *shader =
- (struct draw_llvm_vertex_shader *)base;
}
-
-
static void
vs_llvm_delete( struct draw_vertex_shader *base )
{
@@ -90,6 +86,7 @@ struct draw_vertex_shader *
draw_create_vs_llvm(struct draw_context *draw,
const struct pipe_shader_state *templ)
{
+#if 0
struct draw_llvm_vertex_shader *vs;
vs = CALLOC_STRUCT( draw_llvm_vertex_shader );
@@ -113,6 +110,8 @@ draw_create_vs_llvm(struct draw_context *draw,
vs->machine = draw->vs.machine;
return &vs->base;
+#endif
+ return NULL;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 9120226de0..3edc62d0c6 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -40,6 +40,7 @@
#include "lp_context.h"
#include "lp_flush.h"
#include "lp_perf.h"
+#include "lp_screen.h"
#include "lp_state.h"
#include "lp_surface.h"
#include "lp_query.h"
@@ -105,6 +106,7 @@ struct pipe_context *
llvmpipe_create_context( struct pipe_screen *screen, void *priv )
{
struct llvmpipe_context *llvmpipe;
+ struct llvmpipe_screen *llvmscreen = llvmpipe_screen(screen);
llvmpipe = align_malloc(sizeof(struct llvmpipe_context), 16);
if (!llvmpipe)
@@ -174,8 +176,8 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv )
/*
* Create drawing context and plug our rendering stage into it.
*/
- llvmpipe->draw = draw_create();
- if (!llvmpipe->draw)
+ llvmpipe->draw = draw_create_with_llvm(llvmscreen->engine);
+ if (!llvmpipe->draw)
goto fail;
/* FIXME: devise alternative to draw_texture_samplers */