diff options
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r-- | src/gallium/auxiliary/SConscript | 3 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_context.c | 26 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_context.h | 10 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_gs.c | 12 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_llvm.c | 697 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_llvm.h | 186 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_llvm_translate.c | 484 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_private.h | 11 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_pt.c | 4 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_pt.h | 1 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c | 464 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_pt_post_vs.c | 5 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_vs_llvm.c | 9 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_flow.c | 29 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_flow.h | 7 |
15 files changed, 1926 insertions, 22 deletions
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index ae749acc87..ca5fab6742 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -213,6 +213,9 @@ if drawllvm: 'gallivm/lp_bld_swizzle.c', 'gallivm/lp_bld_tgsi_soa.c', 'gallivm/lp_bld_type.c', + 'draw/draw_llvm.c', + 'draw/draw_pt_fetch_shade_pipeline_llvm.c', + 'draw/draw_llvm_translate.c' ] gallium = env.ConvenienceLibrary( diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index bb0988543f..b6574a9fea 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -44,6 +44,18 @@ struct draw_context *draw_create( void ) if (draw == NULL) goto fail; + if (!draw_init(draw)) + goto fail; + + return draw; + +fail: + draw_destroy( draw ); + return NULL; +} + +boolean draw_init(struct draw_context *draw) +{ ASSIGN_4V( draw->plane[0], -1, 0, 0, 1 ); ASSIGN_4V( draw->plane[1], 1, 0, 0, 1 ); ASSIGN_4V( draw->plane[2], 0, -1, 0, 1 ); @@ -57,22 +69,18 @@ struct draw_context *draw_create( void ) if (!draw_pipeline_init( draw )) - goto fail; + return FALSE; if (!draw_pt_init( draw )) - goto fail; + return FALSE; if (!draw_vs_init( draw )) - goto fail; + return FALSE; if (!draw_gs_init( draw )) - goto fail; + return FALSE; - return draw; - -fail: - draw_destroy( draw ); - return NULL; + return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index acd81b9712..d42e400318 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -40,6 +40,9 @@ #include "pipe/p_state.h" +#ifdef DRAW_LLVM +#include <llvm-c/ExecutionEngine.h> +#endif struct pipe_context; struct draw_context; @@ -197,6 +200,11 @@ boolean draw_need_pipeline(const struct draw_context *draw, const struct pipe_rasterizer_state *rasterizer, unsigned prim ); - +#ifdef DRAW_LLVM +/******************************************************************************* + * LLVM integration + */ +struct draw_context *draw_create_with_llvm(LLVMExecutionEngineRef engine); +#endif #endif /* DRAW_CONTEXT_H */ diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index 7069aa6b18..131deed43e 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -342,10 +342,10 @@ void draw_geometry_shader_delete(struct draw_geometry_shader *shader) void draw_geometry_shader_prepare(struct draw_geometry_shader *shader, struct draw_context *draw) { - if (shader->machine->Tokens != shader->state.tokens) { - tgsi_exec_machine_bind_shader(shader->machine, - shader->state.tokens, - draw->gs.num_samplers, - draw->gs.samplers); - } + if (shader && shader->machine->Tokens != shader->state.tokens) { + tgsi_exec_machine_bind_shader(shader->machine, + shader->state.tokens, + draw->gs.num_samplers, + draw->gs.samplers); + } } diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c new file mode 100644 index 0000000000..26f756c6f8 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -0,0 +1,697 @@ +#include "draw_llvm.h" + +#include "draw_context.h" +#include "draw_vs.h" + +#include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_interp.h" +#include "gallivm/lp_bld_struct.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_flow.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_tgsi.h" +#include "gallivm/lp_bld_printf.h" + +#include "util/u_cpu_detect.h" +#include "tgsi/tgsi_dump.h" + +#include <llvm-c/Transforms/Scalar.h> + +#define DEBUG_STORE 0 + + +/* generates the draw jit function */ +static void +draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var); + +static void +init_globals(struct draw_llvm *llvm) +{ + LLVMTypeRef texture_type; + + /* struct draw_jit_texture */ + { + LLVMTypeRef elem_types[4]; + + elem_types[DRAW_JIT_TEXTURE_WIDTH] = LLVMInt32Type(); + elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type(); + elem_types[DRAW_JIT_TEXTURE_STRIDE] = LLVMInt32Type(); + elem_types[DRAW_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0); + + texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); + + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_WIDTH); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_HEIGHT); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, stride, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_STRIDE); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_DATA); + LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, + llvm->target, texture_type); + + LLVMAddTypeName(llvm->module, "texture", texture_type); + } + + + /* struct draw_jit_context */ + { + LLVMTypeRef elem_types[3]; + LLVMTypeRef context_type; + + elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ + elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ + elem_types[2] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ + + context_type = LLVMStructType(elem_types, Elements(elem_types), 0); + + LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants, + llvm->target, context_type, 0); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants, + llvm->target, context_type, 1); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures, + llvm->target, context_type, + DRAW_JIT_CONTEXT_TEXTURES_INDEX); + LP_CHECK_STRUCT_SIZE(struct draw_jit_context, + llvm->target, context_type); + + LLVMAddTypeName(llvm->module, "draw_jit_context", context_type); + + llvm->context_ptr_type = LLVMPointerType(context_type, 0); + } + { + LLVMTypeRef buffer_ptr = LLVMPointerType(LLVMIntType(8), 0); + llvm->buffer_ptr_type = LLVMPointerType(buffer_ptr, 0); + } +} + +static LLVMTypeRef +create_vertex_header(struct draw_llvm *llvm, int data_elems) +{ + /* struct vertex_header */ + LLVMTypeRef elem_types[3]; + LLVMTypeRef vertex_header; + char struct_name[24]; + + snprintf(struct_name, 23, "vertex_header%d", data_elems); + + elem_types[0] = LLVMIntType(32); + elem_types[1] = LLVMArrayType(LLVMFloatType(), 4); + elem_types[2] = LLVMArrayType(elem_types[1], data_elems); + + vertex_header = LLVMStructType(elem_types, Elements(elem_types), 0); + + /* these are bit-fields and we can't take address of them + LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_CLIPMASK); + LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_EDGEFLAG); + LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_PAD); + LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_VERTEX_ID); + */ + LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_CLIP); + LP_CHECK_MEMBER_OFFSET(struct vertex_header, data, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_DATA); + + LLVMAddTypeName(llvm->module, struct_name, vertex_header); + + return LLVMPointerType(vertex_header, 0); +} + +struct draw_llvm * +draw_llvm_create(struct draw_context *draw) +{ + struct draw_llvm *llvm = CALLOC_STRUCT( draw_llvm ); + + util_cpu_detect(); + + llvm->draw = draw; + llvm->engine = draw->engine; + + debug_assert(llvm->engine); + + llvm->module = LLVMModuleCreateWithName("draw_llvm"); + llvm->provider = LLVMCreateModuleProviderForExistingModule(llvm->module); + + LLVMAddModuleProvider(llvm->engine, llvm->provider); + + llvm->target = LLVMGetExecutionEngineTargetData(llvm->engine); + + llvm->pass = LLVMCreateFunctionPassManager(llvm->provider); + LLVMAddTargetData(llvm->target, llvm->pass); + /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, + * but there are more on SVN. */ + /* TODO: Add more passes */ + LLVMAddConstantPropagationPass(llvm->pass); + if(util_cpu_caps.has_sse4_1) { + /* FIXME: There is a bug in this pass, whereby the combination of fptosi + * and sitofp (necessary for trunc/floor/ceil/round implementation) + * somehow becomes invalid code. + */ + LLVMAddInstructionCombiningPass(llvm->pass); + } + LLVMAddPromoteMemoryToRegisterPass(llvm->pass); + LLVMAddGVNPass(llvm->pass); + LLVMAddCFGSimplificationPass(llvm->pass); + + init_globals(llvm); + + +#if 1 + LLVMDumpModule(llvm->module); +#endif + + return llvm; +} + +void +draw_llvm_destroy(struct draw_llvm *llvm) +{ + free(llvm); +} + +struct draw_llvm_variant * +draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs) +{ + struct draw_llvm_variant *variant = MALLOC(sizeof(struct draw_llvm_variant)); + + draw_llvm_make_variant_key(llvm, &variant->key); + + llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs); + + draw_llvm_generate(llvm, variant); + + return variant; +} + + +struct draw_context *draw_create_with_llvm(LLVMExecutionEngineRef engine) +{ + struct draw_context *draw = CALLOC_STRUCT( draw_context ); + if (draw == NULL) + goto fail; + draw->engine = engine; + + if (!draw_init(draw)) + goto fail; + + return draw; + +fail: + draw_destroy( draw ); + return NULL; +} + +static void +generate_vs(struct draw_llvm *llvm, + LLVMBuilderRef builder, + LLVMValueRef (*outputs)[NUM_CHANNELS], + const LLVMValueRef (*inputs)[NUM_CHANNELS], + LLVMValueRef context_ptr) +{ + const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens; + struct lp_type vs_type; + LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr); + + memset(&vs_type, 0, sizeof vs_type); + vs_type.floating = TRUE; /* floating point values */ + vs_type.sign = TRUE; /* values are signed */ + vs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ + vs_type.width = 32; /* 32-bit float */ + vs_type.length = 4; /* 4 elements per vector */ +#if 0 + num_vs = 4; /* number of vertices per block */ +#endif + + tgsi_dump(tokens, 0); + lp_build_tgsi_soa(builder, + tokens, + vs_type, + NULL /*struct lp_build_mask_context *mask*/, + consts_ptr, + NULL /*pos*/, + inputs, + outputs, + NULL/*sampler*/); +} + +#if DEBUG_STORE +static void print_vectorf(LLVMBuilderRef builder, + LLVMValueRef vec) +{ + LLVMValueRef val[4]; + val[0] = LLVMBuildExtractElement(builder, vec, + LLVMConstInt(LLVMInt32Type(), 0, 0), ""); + val[1] = LLVMBuildExtractElement(builder, vec, + LLVMConstInt(LLVMInt32Type(), 1, 0), ""); + val[2] = LLVMBuildExtractElement(builder, vec, + LLVMConstInt(LLVMInt32Type(), 2, 0), ""); + val[3] = LLVMBuildExtractElement(builder, vec, + LLVMConstInt(LLVMInt32Type(), 3, 0), ""); + lp_build_printf(builder, "vector = [%f, %f, %f, %f]\n", + val[0], val[1], val[2], val[3]); +} +#endif + +static void +generate_fetch(LLVMBuilderRef builder, + LLVMValueRef vbuffers_ptr, + LLVMValueRef *res, + struct pipe_vertex_element *velem, + struct pipe_vertex_buffer *vbuf, + LLVMValueRef index) +{ + LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0); + LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, + &indices, 1, ""); + LLVMValueRef stride = LLVMBuildMul(builder, + LLVMConstInt(LLVMInt32Type(), vbuf->stride, 0), + index, ""); + + vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer"); + + stride = LLVMBuildAdd(builder, stride, + LLVMConstInt(LLVMInt32Type(), vbuf->buffer_offset, 0), + ""); + stride = LLVMBuildAdd(builder, stride, + LLVMConstInt(LLVMInt32Type(), velem->src_offset, 0), + ""); + + /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/ + vbuffer_ptr = LLVMBuildGEP(builder, vbuffer_ptr, &stride, 1, ""); + + *res = draw_llvm_translate_from(builder, vbuffer_ptr, velem->src_format); +} + +static LLVMValueRef +aos_to_soa(LLVMBuilderRef builder, + LLVMValueRef val0, + LLVMValueRef val1, + LLVMValueRef val2, + LLVMValueRef val3, + LLVMValueRef channel) +{ + LLVMValueRef ex, res; + + ex = LLVMBuildExtractElement(builder, val0, + channel, ""); + res = LLVMBuildInsertElement(builder, + LLVMConstNull(LLVMTypeOf(val0)), + ex, + LLVMConstInt(LLVMInt32Type(), 0, 0), + ""); + + ex = LLVMBuildExtractElement(builder, val1, + channel, ""); + res = LLVMBuildInsertElement(builder, + res, ex, + LLVMConstInt(LLVMInt32Type(), 1, 0), + ""); + + ex = LLVMBuildExtractElement(builder, val2, + channel, ""); + res = LLVMBuildInsertElement(builder, + res, ex, + LLVMConstInt(LLVMInt32Type(), 2, 0), + ""); + + ex = LLVMBuildExtractElement(builder, val3, + channel, ""); + res = LLVMBuildInsertElement(builder, + res, ex, + LLVMConstInt(LLVMInt32Type(), 3, 0), + ""); + + return res; +} + +static void +soa_to_aos(LLVMBuilderRef builder, + LLVMValueRef soa[NUM_CHANNELS], + LLVMValueRef aos[NUM_CHANNELS]) +{ + LLVMValueRef comp; + int i = 0; + + debug_assert(NUM_CHANNELS == 4); + + aos[0] = LLVMConstNull(LLVMTypeOf(soa[0])); + aos[1] = aos[2] = aos[3] = aos[0]; + + for (i = 0; i < NUM_CHANNELS; ++i) { + LLVMValueRef channel = LLVMConstInt(LLVMInt32Type(), i, 0); + + comp = LLVMBuildExtractElement(builder, soa[i], + LLVMConstInt(LLVMInt32Type(), 0, 0), ""); + aos[0] = LLVMBuildInsertElement(builder, aos[0], comp, channel, ""); + + comp = LLVMBuildExtractElement(builder, soa[i], + LLVMConstInt(LLVMInt32Type(), 1, 0), ""); + aos[1] = LLVMBuildInsertElement(builder, aos[1], comp, channel, ""); + + comp = LLVMBuildExtractElement(builder, soa[i], + LLVMConstInt(LLVMInt32Type(), 2, 0), ""); + aos[2] = LLVMBuildInsertElement(builder, aos[2], comp, channel, ""); + + comp = LLVMBuildExtractElement(builder, soa[i], + LLVMConstInt(LLVMInt32Type(), 3, 0), ""); + aos[3] = LLVMBuildInsertElement(builder, aos[3], comp, channel, ""); + + } +} + +static void +convert_to_soa(LLVMBuilderRef builder, + LLVMValueRef (*aos)[NUM_CHANNELS], + LLVMValueRef (*soa)[NUM_CHANNELS], + int num_attribs) +{ + int i; + + debug_assert(NUM_CHANNELS == 4); + + for (i = 0; i < num_attribs; ++i) { + LLVMValueRef val0 = aos[i][0]; + LLVMValueRef val1 = aos[i][1]; + LLVMValueRef val2 = aos[i][2]; + LLVMValueRef val3 = aos[i][3]; + + soa[i][0] = aos_to_soa(builder, val0, val1, val2, val3, + LLVMConstInt(LLVMInt32Type(), 0, 0)); + soa[i][1] = aos_to_soa(builder, val0, val1, val2, val3, + LLVMConstInt(LLVMInt32Type(), 1, 0)); + soa[i][2] = aos_to_soa(builder, val0, val1, val2, val3, + LLVMConstInt(LLVMInt32Type(), 2, 0)); + soa[i][3] = aos_to_soa(builder, val0, val1, val2, val3, + LLVMConstInt(LLVMInt32Type(), 3, 0)); + } +} + +static void +store_aos(LLVMBuilderRef builder, + LLVMValueRef io_ptr, + LLVMValueRef index, + LLVMValueRef value) +{ + LLVMValueRef id_ptr = draw_jit_header_id(builder, io_ptr); + LLVMValueRef data_ptr = draw_jit_header_data(builder, io_ptr); + LLVMValueRef indices[3]; + + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[1] = index; + indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0); + + /* undefined vertex */ + LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), + 0xffff, 0), id_ptr); + +#if DEBUG_STORE + lp_build_printf(builder, " ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr); +#endif +#if 0 + /*lp_build_printf(builder, " ---- %p storing at %d (%p) ", io_ptr, index, data_ptr); + print_vectorf(builder, value);*/ + data_ptr = LLVMBuildBitCast(builder, data_ptr, + LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatType(), 4), 0), 0), + "datavec"); + data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 2, ""); + + LLVMBuildStore(builder, value, data_ptr); +#else + { + LLVMValueRef x, y, z, w; + LLVMValueRef idx0, idx1, idx2, idx3; + LLVMValueRef gep0, gep1, gep2, gep3; + data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, ""); + + idx0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + idx1 = LLVMConstInt(LLVMInt32Type(), 1, 0); + idx2 = LLVMConstInt(LLVMInt32Type(), 2, 0); + idx3 = LLVMConstInt(LLVMInt32Type(), 3, 0); + + x = LLVMBuildExtractElement(builder, value, + idx0, ""); + y = LLVMBuildExtractElement(builder, value, + idx1, ""); + z = LLVMBuildExtractElement(builder, value, + idx2, ""); + w = LLVMBuildExtractElement(builder, value, + idx3, ""); + + gep0 = LLVMBuildGEP(builder, data_ptr, &idx0, 1, ""); + gep1 = LLVMBuildGEP(builder, data_ptr, &idx1, 1, ""); + gep2 = LLVMBuildGEP(builder, data_ptr, &idx2, 1, ""); + gep3 = LLVMBuildGEP(builder, data_ptr, &idx3, 1, ""); + + /*lp_build_printf(builder, "##### x = %f (%p), y = %f (%p), z = %f (%p), w = %f (%p)\n", + x, gep0, y, gep1, z, gep2, w, gep3);*/ + LLVMBuildStore(builder, x, gep0); + LLVMBuildStore(builder, y, gep1); + LLVMBuildStore(builder, z, gep2); + LLVMBuildStore(builder, w, gep3); + } +#endif +} + +static void +store_aos_array(LLVMBuilderRef builder, + LLVMValueRef io_ptr, + LLVMValueRef aos[NUM_CHANNELS], + int attrib, + int num_outputs) +{ + LLVMValueRef attr_index = LLVMConstInt(LLVMInt32Type(), attrib, 0); + LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0); + LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0); + LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0); + LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; + + debug_assert(NUM_CHANNELS == 4); + + io0_ptr = LLVMBuildGEP(builder, io_ptr, + &ind0, 1, ""); + io1_ptr = LLVMBuildGEP(builder, io_ptr, + &ind1, 1, ""); + io2_ptr = LLVMBuildGEP(builder, io_ptr, + &ind2, 1, ""); + io3_ptr = LLVMBuildGEP(builder, io_ptr, + &ind3, 1, ""); + +#if DEBUG_STORE + lp_build_printf(builder, " io = %p, indexes[%d, %d, %d, %d]\n", + io_ptr, ind0, ind1, ind2, ind3); +#endif + + store_aos(builder, io0_ptr, attr_index, aos[0]); + store_aos(builder, io1_ptr, attr_index, aos[1]); + store_aos(builder, io2_ptr, attr_index, aos[2]); + store_aos(builder, io3_ptr, attr_index, aos[3]); +} + +static void +convert_to_aos(LLVMBuilderRef builder, + LLVMValueRef io, + LLVMValueRef (*outputs)[NUM_CHANNELS], + int num_outputs, + int max_vertices) +{ + unsigned chan, attrib; + +#if DEBUG_STORE + lp_build_printf(builder, " # storing begin\n"); +#endif + for (attrib = 0; attrib < num_outputs; ++attrib) { + LLVMValueRef soa[4]; + LLVMValueRef aos[4]; + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + if(outputs[attrib][chan]) { + LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); + lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]); + /*lp_build_printf(builder, "output %d : %d ", + LLVMConstInt(LLVMInt32Type(), attrib, 0), + LLVMConstInt(LLVMInt32Type(), chan, 0)); + print_vectorf(builder, out);*/ + soa[chan] = out; + } else + soa[chan] = 0; + } + soa_to_aos(builder, soa, aos); + store_aos_array(builder, + io, + aos, + attrib, + num_outputs); + } +#if DEBUG_STORE + lp_build_printf(builder, " # storing end\n"); +#endif +} + +static void +draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) +{ + LLVMTypeRef arg_types[6]; + LLVMTypeRef func_type; + LLVMValueRef context_ptr; + LLVMBasicBlockRef block; + LLVMBuilderRef builder; + LLVMValueRef start, end, count, stride, step, io_itr; + LLVMValueRef io_ptr, vbuffers_ptr; + struct draw_context *draw = llvm->draw; + unsigned i, j; + struct lp_build_context bld; + struct lp_build_loop_state lp_loop; + struct lp_type vs_type = lp_type_float_vec(32); + const int max_vertices = 4; + LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; + + arg_types[0] = llvm->context_ptr_type; /* context */ + arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ + arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ + arg_types[3] = LLVMInt32Type(); /* start */ + arg_types[4] = LLVMInt32Type(); /* count */ + arg_types[5] = LLVMInt32Type(); /* stride */ + + func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); + + variant->function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type); + LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); + for(i = 0; i < Elements(arg_types); ++i) + if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) + LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute); + + context_ptr = LLVMGetParam(variant->function, 0); + io_ptr = LLVMGetParam(variant->function, 1); + vbuffers_ptr = LLVMGetParam(variant->function, 2); + start = LLVMGetParam(variant->function, 3); + count = LLVMGetParam(variant->function, 4); + stride = LLVMGetParam(variant->function, 5); + + lp_build_name(context_ptr, "context"); + lp_build_name(io_ptr, "io"); + lp_build_name(vbuffers_ptr, "vbuffers"); + lp_build_name(start, "start"); + lp_build_name(count, "count"); + lp_build_name(stride, "stride"); + + /* + * Function body + */ + + block = LLVMAppendBasicBlock(variant->function, "entry"); + builder = LLVMCreateBuilder(); + LLVMPositionBuilderAtEnd(builder, block); + + lp_build_context_init(&bld, builder, vs_type); + + end = lp_build_add(&bld, start, count); + + step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); + +#if DEBUG_STORE + lp_build_printf(builder, "start = %d, end = %d, step = %d\n", + start, end, step); +#endif + lp_build_loop_begin(builder, start, &lp_loop); + { + LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + LLVMValueRef io; + const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; + + io_itr = LLVMBuildSub(builder, lp_loop.counter, start, ""); + io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, ""); +#if DEBUG_STORE + lp_build_printf(builder, " --- io %d = %p, loop counter %d\n", + io_itr, io, lp_loop.counter); +#endif + for (i = 0; i < NUM_CHANNELS; ++i) { + LLVMValueRef true_index = LLVMBuildAdd( + builder, + lp_loop.counter, + LLVMConstInt(LLVMInt32Type(), i, 0), ""); + for (j = 0; j < draw->pt.nr_vertex_elements; ++j) { + struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; + struct pipe_vertex_buffer *vbuf = &draw->pt.vertex_buffer[ + velem->vertex_buffer_index]; + generate_fetch(builder, vbuffers_ptr, + &aos_attribs[j][i], velem, vbuf, true_index); + } + } + convert_to_soa(builder, aos_attribs, inputs, + draw->pt.nr_vertex_elements); + + ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs; + generate_vs(llvm, + builder, + outputs, + ptr_aos, + context_ptr); + + convert_to_aos(builder, io, outputs, + draw->vs.vertex_shader->info.num_outputs, + max_vertices); + } + lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop); + + LLVMBuildRetVoid(builder); + + LLVMDisposeBuilder(builder); + + /* + * Translate the LLVM IR into machine code. + */ + +#ifdef DEBUG + if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { + LLVMDumpValue(variant->function); + assert(0); + } +#endif + + LLVMRunFunctionPassManager(llvm->pass, variant->function); + + if (1) { + LLVMDumpValue(variant->function); + debug_printf("\n"); + } + variant->jit_func = (draw_jit_vert_func)LLVMGetPointerToGlobal(llvm->draw->engine, variant->function); + + if (1) + lp_disassemble(variant->jit_func); +} + +void +draw_llvm_make_variant_key(struct draw_llvm *llvm, + struct draw_llvm_variant_key *key) +{ + key->nr_vertex_buffers = llvm->draw->pt.nr_vertex_buffers; + key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements; + + memcpy(key->vertex_buffer, + llvm->draw->pt.vertex_buffer, + sizeof(struct pipe_vertex_buffer) * PIPE_MAX_ATTRIBS); + + memcpy(key->vertex_element, + llvm->draw->pt.vertex_element, + sizeof(struct pipe_vertex_element) * PIPE_MAX_ATTRIBS); + + memcpy(&key->vs, + &llvm->draw->vs.vertex_shader->state, + sizeof(struct pipe_shader_state)); +} diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h new file mode 100644 index 0000000000..774eb16d90 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -0,0 +1,186 @@ +#ifndef DRAW_LLVM_H +#define DRAW_LLVM_H + +#include "draw/draw_private.h" + +#include "pipe/p_context.h" + +#include <llvm-c/Core.h> +#include <llvm-c/Analysis.h> +#include <llvm-c/Target.h> +#include <llvm-c/ExecutionEngine.h> + +struct draw_jit_texture +{ + uint32_t width; + uint32_t height; + uint32_t stride; + const void *data; +}; + +enum { + DRAW_JIT_TEXTURE_WIDTH = 0, + DRAW_JIT_TEXTURE_HEIGHT, + DRAW_JIT_TEXTURE_STRIDE, + DRAW_JIT_TEXTURE_DATA +}; + +enum { + DRAW_JIT_VERTEX_VERTEX_ID = 0, + DRAW_JIT_VERTEX_CLIP, + DRAW_JIT_VERTEX_DATA +}; + +/** + * This structure is passed directly to the generated vertex shader. + * + * It contains the derived state. + * + * Changes here must be reflected in the draw_jit_context_* macros. + * Changes to the ordering should be avoided. + * + * Only use types with a clear size and padding here, in particular prefer the + * stdint.h types to the basic integer types. + */ +struct draw_jit_context +{ + const float *vs_constants; + const float *gs_constants; + + + struct draw_jit_texture textures[PIPE_MAX_SAMPLERS]; +}; + + +#define draw_jit_context_vs_constants(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 0, "vs_constants") + +#define draw_jit_context_gs_constants(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 1, "gs_constants") + +#define DRAW_JIT_CONTEXT_TEXTURES_INDEX 2 + +#define draw_jit_context_textures(_builder, _ptr) \ + lp_build_struct_get_ptr(_builder, _ptr, DRAW_JIT_CONTEXT_TEXTURES_INDEX, "textures") + + + +#define draw_jit_header_id(_builder, _ptr) \ + lp_build_struct_get_ptr(_builder, _ptr, 0, "id") + +#define draw_jit_header_clip(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 1, "clip") + +#define draw_jit_header_data(_builder, _ptr) \ + lp_build_struct_get_ptr(_builder, _ptr, 2, "data") + +/* we are construction a function of the form: + +struct vertex_header { + uint32 vertex_id; + + float clip[4]; + float data[][4]; +}; + +struct draw_jit_context +{ + const float *vs_constants; + const float *gs_constants; + + struct draw_jit_texture textures[PIPE_MAX_SAMPLERS]; +}; + +void +draw_shader(struct draw_jit_context *context, + struct vertex_header *io, + const void *vbuffers[PIPE_MAX_ATTRIBS], + unsigned start, + unsigned count, + unsigned stride) +{ + // do a fetch and a run vertex shader + for (int i = 0; i < count; ++i) { + struct vertex_header *header = &io[i]; + header->vertex_id = 0xffff; + // follows code-genarted fetch/translate section + // for each vertex_element ... + codegened_translate(header->data[num_element], + context->vertex_elements[num_element], + context->vertex_buffers, + context->vbuffers); + + codegened_vertex_shader(header->data, context->vs_constants); + } + + for (int i = 0; i < count; i += context->primitive_size) { + struct vertex_header *prim[MAX_PRIMITIVE_SIZE]; + for (int j = 0; j < context->primitive_size; ++j) { + header[j] = &io[i + j]; + } + codegened_geometry_shader(prim, gs_constants); + } +} +*/ + +typedef void +(*draw_jit_vert_func)(struct draw_jit_context *context, + struct vertex_header *io, + const char *vbuffers[PIPE_MAX_ATTRIBS], + unsigned start, + unsigned count, + unsigned stride); + +struct draw_llvm { + struct draw_context *draw; + + struct draw_jit_context jit_context; + + LLVMModuleRef module; + LLVMExecutionEngineRef engine; + LLVMModuleProviderRef provider; + LLVMTargetDataRef target; + LLVMPassManagerRef pass; + + LLVMTypeRef context_ptr_type; + LLVMTypeRef vertex_header_ptr_type; + LLVMTypeRef buffer_ptr_type; +}; + + +struct draw_llvm_variant_key +{ + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + unsigned nr_vertex_buffers; + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + unsigned nr_vertex_elements; + struct pipe_shader_state vs; +}; + +struct draw_llvm_variant +{ + struct draw_llvm_variant_key key; + LLVMValueRef function; + draw_jit_vert_func jit_func; + + struct draw_llvm_variant *next; +}; + +struct draw_llvm * +draw_llvm_create(struct draw_context *draw); + +void +draw_llvm_destroy(struct draw_llvm *llvm); + +struct draw_llvm_variant * +draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs); + +void +draw_llvm_make_variant_key(struct draw_llvm *llvm, + struct draw_llvm_variant_key *key); + +LLVMValueRef +draw_llvm_translate_from(LLVMBuilderRef builder, + LLVMValueRef vbuffer, + enum pipe_format from_format); +#endif diff --git a/src/gallium/auxiliary/draw/draw_llvm_translate.c b/src/gallium/auxiliary/draw/draw_llvm_translate.c new file mode 100644 index 0000000000..b29ebdec3a --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_llvm_translate.c @@ -0,0 +1,484 @@ +#include "draw_private.h" +#include "draw_context.h" + +#include "draw_llvm.h" + +#include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_interp.h" +#include "gallivm/lp_bld_struct.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_flow.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_tgsi.h" +#include "gallivm/lp_bld_printf.h" + +#include "util/u_memory.h" +#include "pipe/p_state.h" + + +#define DRAW_DBG 0 + +static LLVMValueRef +from_64_float(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMDoubleType(), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + return LLVMBuildFPTrunc(builder, l, LLVMFloatType(), ""); +} + +static LLVMValueRef +from_32_float(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMFloatType(), 0) , ""); + return LLVMBuildLoad(builder, bc, ""); +} + +static INLINE LLVMValueRef +from_8_uscaled(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef l = LLVMBuildLoad(builder, val, ""); + return LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); +} + +static INLINE LLVMValueRef +from_16_uscaled(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(16), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + return LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); +} + +static INLINE LLVMValueRef +from_32_uscaled(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(32), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + return LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); +} + +static INLINE LLVMValueRef +from_8_sscaled(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef l = LLVMBuildLoad(builder, val, ""); + return LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); +} + +static INLINE LLVMValueRef +from_16_sscaled(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(16), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + return LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); +} + +static INLINE LLVMValueRef +from_32_sscaled(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(32), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + return LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); +} + + +static INLINE LLVMValueRef +from_8_unorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef l = LLVMBuildLoad(builder, val, ""); + LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 255.), ""); +} + +static INLINE LLVMValueRef +from_16_unorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(16), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 65535.), ""); +} + +static INLINE LLVMValueRef +from_32_unorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(32), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); + + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 4294967295.), ""); +} + +static INLINE LLVMValueRef +from_8_snorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef l = LLVMBuildLoad(builder, val, ""); + LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 127.0), ""); +} + +static INLINE LLVMValueRef +from_16_snorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(16), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 32767.0f), ""); +} + +static INLINE LLVMValueRef +from_32_snorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(32), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); + + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 2147483647.0), ""); +} + +static INLINE LLVMValueRef +from_32_fixed(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(32), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); + + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 65536.0), ""); +} + +static LLVMValueRef +to_64_float(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPExt(builder, l, LLVMDoubleType(), ""); +} + +static LLVMValueRef +to_32_float(LLVMBuilderRef builder, LLVMValueRef fp) +{ + return LLVMBuildLoad(builder, fp, ""); +} + +static INLINE LLVMValueRef +to_8_uscaled(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPToUI(builder, l, LLVMIntType(8), ""); +} + +static INLINE LLVMValueRef +to_16_uscaled(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPToUI(builder, l, LLVMIntType(16), ""); +} + +static INLINE LLVMValueRef +to_32_uscaled(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPToUI(builder, l, LLVMIntType(32), ""); +} + +static INLINE LLVMValueRef +to_8_sscaled(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPToSI(builder, l, LLVMIntType(8), ""); +} + +static INLINE LLVMValueRef +to_16_sscaled(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPToSI(builder, l, LLVMIntType(16), ""); +} + +static INLINE LLVMValueRef +to_32_sscaled(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPToSI(builder, l, LLVMIntType(32), ""); +} + +static INLINE LLVMValueRef +to_8_unorm(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(8), ""); + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 255.), ""); +} + +static INLINE LLVMValueRef +to_16_unorm(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(32), ""); + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 65535.), ""); +} + +static INLINE LLVMValueRef +to_32_unorm(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(32), ""); + + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 4294967295.), ""); +} + +static INLINE LLVMValueRef +to_8_snorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef l = LLVMBuildLoad(builder, val, ""); + LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(8), ""); + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 127.0), ""); +} + +static INLINE LLVMValueRef +to_16_snorm(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(16), ""); + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 32767.0f), ""); +} + +static INLINE LLVMValueRef +to_32_snorm(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(32), ""); + + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 2147483647.0), ""); +} + +static INLINE LLVMValueRef +to_32_fixed(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(32), ""); + + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 65536.0), ""); +} + +typedef LLVMValueRef (*from_func)(LLVMBuilderRef, LLVMValueRef); +typedef LLVMValueRef (*to_func)(LLVMBuilderRef, LLVMValueRef); + +/* so that underneath can avoid function calls which are prohibited + * for static initialization we need this conversion */ +enum ll_type { + LL_Double, + LL_Float, + LL_Int32, + LL_Int16, + LL_Int8 +}; + +static INLINE LLVMTypeRef +ll_type_to_llvm(enum ll_type type) +{ + switch (type) { + case LL_Double: + return LLVMDoubleType(); + case LL_Float: + return LLVMFloatType(); + case LL_Int32: + return LLVMInt32Type(); + case LL_Int16: + return LLVMIntType(16); + case LL_Int8: + return LLVMIntType(8); + } + return LLVMIntType(8); +} + +static INLINE int +ll_type_size(enum ll_type type) +{ + switch (type) { + case LL_Double: + return 8; + case LL_Float: + return 4; + case LL_Int32: + return 4; + case LL_Int16: + return 2; + case LL_Int8: + return 1; + } + return 1; +} + +struct draw_llvm_translate { + int format; + from_func from; + to_func to; + enum ll_type type; + int num_components; +} translates[] = +{ + {PIPE_FORMAT_R64_FLOAT, from_64_float, to_64_float, LL_Double, 1}, + {PIPE_FORMAT_R64G64_FLOAT, from_64_float, to_64_float, LL_Double, 2}, + {PIPE_FORMAT_R64G64B64_FLOAT, from_64_float, to_64_float, LL_Double, 3}, + {PIPE_FORMAT_R64G64B64A64_FLOAT, from_64_float, to_64_float, LL_Double, 4}, + {PIPE_FORMAT_R32_FLOAT, from_32_float, to_32_float, LL_Float, 1}, + {PIPE_FORMAT_R32G32_FLOAT, from_32_float, to_32_float, LL_Float, 2}, + {PIPE_FORMAT_R32G32B32_FLOAT, from_32_float, to_32_float, LL_Float, 3}, + {PIPE_FORMAT_R32G32B32A32_FLOAT, from_32_float, to_32_float, LL_Float, 4}, + + {PIPE_FORMAT_R32_UNORM, from_32_unorm, to_32_unorm, LL_Int32, 1}, + {PIPE_FORMAT_R32G32_UNORM, from_32_unorm, to_32_unorm, LL_Int32, 2}, + {PIPE_FORMAT_R32G32B32_UNORM, from_32_unorm, to_32_unorm, LL_Int32, 3}, + {PIPE_FORMAT_R32G32B32A32_UNORM, from_32_unorm, to_32_unorm, LL_Int32, 4}, + + {PIPE_FORMAT_R32_USCALED, from_32_uscaled, to_32_uscaled, LL_Int32, 1}, + {PIPE_FORMAT_R32G32_USCALED, from_32_uscaled, to_32_uscaled, LL_Int32, 2}, + {PIPE_FORMAT_R32G32B32_USCALED, from_32_uscaled, to_32_uscaled, LL_Int32, 3}, + {PIPE_FORMAT_R32G32B32A32_USCALED, from_32_uscaled, to_32_uscaled, LL_Int32, 4}, + + {PIPE_FORMAT_R32_SNORM, from_32_snorm, to_32_snorm, LL_Int32, 1}, + {PIPE_FORMAT_R32G32_SNORM, from_32_snorm, to_32_snorm, LL_Int32, 2}, + {PIPE_FORMAT_R32G32B32_SNORM, from_32_snorm, to_32_snorm, LL_Int32, 3}, + {PIPE_FORMAT_R32G32B32A32_SNORM, from_32_snorm, to_32_snorm, LL_Int32, 4}, + + {PIPE_FORMAT_R32_SSCALED, from_32_sscaled, to_32_sscaled, LL_Int32, 1}, + {PIPE_FORMAT_R32G32_SSCALED, from_32_sscaled, to_32_sscaled, LL_Int32, 2}, + {PIPE_FORMAT_R32G32B32_SSCALED, from_32_sscaled, to_32_sscaled, LL_Int32, 3}, + {PIPE_FORMAT_R32G32B32A32_SSCALED, from_32_sscaled, to_32_sscaled, LL_Int32, 4}, + + {PIPE_FORMAT_R16_UNORM, from_16_unorm, to_16_unorm, LL_Int16, 1}, + {PIPE_FORMAT_R16G16_UNORM, from_16_unorm, to_16_unorm, LL_Int16, 2}, + {PIPE_FORMAT_R16G16B16_UNORM, from_16_unorm, to_16_unorm, LL_Int16, 3}, + {PIPE_FORMAT_R16G16B16A16_UNORM, from_16_unorm, to_16_unorm, LL_Int16, 4}, + + {PIPE_FORMAT_R16_USCALED, from_16_uscaled, to_16_uscaled, LL_Int16, 1}, + {PIPE_FORMAT_R16G16_USCALED, from_16_uscaled, to_16_uscaled, LL_Int16, 2}, + {PIPE_FORMAT_R16G16B16_USCALED, from_16_uscaled, to_16_uscaled, LL_Int16, 3}, + {PIPE_FORMAT_R16G16B16A16_USCALED, from_16_uscaled, to_16_uscaled, LL_Int16, 4}, + + {PIPE_FORMAT_R16_SNORM, from_16_snorm, to_16_snorm, LL_Int16, 1}, + {PIPE_FORMAT_R16G16_SNORM, from_16_snorm, to_16_snorm, LL_Int16, 2}, + {PIPE_FORMAT_R16G16B16_SNORM, from_16_snorm, to_16_snorm, LL_Int16, 3}, + {PIPE_FORMAT_R16G16B16A16_SNORM, from_16_snorm, to_16_snorm, LL_Int16, 4}, + + {PIPE_FORMAT_R16_SSCALED, from_16_sscaled, to_16_sscaled, LL_Int16, 1}, + {PIPE_FORMAT_R16G16_SSCALED, from_16_sscaled, to_16_sscaled, LL_Int16, 2}, + {PIPE_FORMAT_R16G16B16_SSCALED, from_16_sscaled, to_16_sscaled, LL_Int16, 3}, + {PIPE_FORMAT_R16G16B16A16_SSCALED, from_16_sscaled, to_16_sscaled, LL_Int16, 4}, + + {PIPE_FORMAT_R8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 1}, + {PIPE_FORMAT_R8G8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 2}, + {PIPE_FORMAT_R8G8B8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 3}, + {PIPE_FORMAT_R8G8B8A8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 4}, + + {PIPE_FORMAT_R8_USCALED, from_8_uscaled, to_8_uscaled, LL_Int8, 1}, + {PIPE_FORMAT_R8G8_USCALED, from_8_uscaled, to_8_uscaled, LL_Int8, 2}, + {PIPE_FORMAT_R8G8B8_USCALED, from_8_uscaled, to_8_uscaled, LL_Int8, 3}, + {PIPE_FORMAT_R8G8B8A8_USCALED, from_8_uscaled, to_8_uscaled, LL_Int8, 4}, + + {PIPE_FORMAT_R8_SNORM, from_8_snorm, to_8_snorm, LL_Int8, 1}, + {PIPE_FORMAT_R8G8_SNORM, from_8_snorm, to_8_snorm, LL_Int8, 2}, + {PIPE_FORMAT_R8G8B8_SNORM, from_8_snorm, to_8_snorm, LL_Int8, 3}, + {PIPE_FORMAT_R8G8B8A8_SNORM, from_8_snorm, to_8_snorm, LL_Int8, 4}, + + {PIPE_FORMAT_R8_SSCALED, from_8_sscaled, to_8_sscaled, LL_Int8, 1}, + {PIPE_FORMAT_R8G8_SSCALED, from_8_sscaled, to_8_sscaled, LL_Int8, 2}, + {PIPE_FORMAT_R8G8B8_SSCALED, from_8_sscaled, to_8_sscaled, LL_Int8, 3}, + {PIPE_FORMAT_R8G8B8A8_SSCALED, from_8_sscaled, to_8_sscaled, LL_Int8, 4}, + + {PIPE_FORMAT_R32_FIXED, from_32_fixed, to_32_fixed, LL_Int32, 1}, + {PIPE_FORMAT_R32G32_FIXED, from_32_fixed, to_32_fixed, LL_Int32, 2}, + {PIPE_FORMAT_R32G32B32_FIXED, from_32_fixed, to_32_fixed, LL_Int32, 3}, + {PIPE_FORMAT_R32G32B32A32_FIXED, from_32_fixed, to_32_fixed, LL_Int32, 4}, + + {PIPE_FORMAT_A8R8G8B8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 4}, + {PIPE_FORMAT_B8G8R8A8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 4}, +}; + + +static LLVMValueRef +fetch(LLVMBuilderRef builder, + LLVMValueRef ptr, int val_size, int nr_components, + from_func func) +{ + int i; + int offset = 0; + LLVMValueRef res = LLVMConstNull( + LLVMVectorType(LLVMFloatType(), 4)); + LLVMValueRef defaults[4]; + + defaults[0] = LLVMConstReal(LLVMFloatType(), 0); + defaults[1] = LLVMConstReal(LLVMFloatType(), 0); + defaults[2] = LLVMConstReal(LLVMFloatType(), 0); + defaults[3] = LLVMConstReal(LLVMFloatType(), 1); + + for (i = 0; i < nr_components; ++i) { + LLVMValueRef src_index = LLVMConstInt(LLVMInt32Type(), offset, 0); + LLVMValueRef dst_index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef src_tmp; + LLVMValueRef component; + + src_tmp = LLVMBuildGEP(builder, ptr, &src_index, 1, "src_tmp"); + + /* convert src_tmp to float */ + component = func(builder, src_tmp); + + /* vec.comp = component */ + res = LLVMBuildInsertElement(builder, + res, + component, + dst_index, ""); + offset += val_size; + } + for (; i < 4; ++i) { + LLVMValueRef dst_index = LLVMConstInt(LLVMInt32Type(), i, 0); + res = LLVMBuildInsertElement(builder, + res, + defaults[i], + dst_index, ""); + } + return res; +} + + +LLVMValueRef +draw_llvm_translate_from(LLVMBuilderRef builder, + LLVMValueRef vbuffer, + enum pipe_format from_format) +{ + int i; + for (i = 0; i < Elements(translates); ++i) { + if (translates[i].format == from_format) { + /*LLVMTypeRef type = ll_type_to_llvm(translates[i].type);*/ + return fetch(builder, + vbuffer, + ll_type_size(translates[i].type), + translates[i].num_components, + translates[i].from); + } + } + return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)); +} diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 1e6e01af9e..7e24e5fd6f 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -46,6 +46,10 @@ #include "tgsi/tgsi_scan.h" +#ifdef DRAW_LLVM +#include <llvm-c/ExecutionEngine.h> +#endif + struct pipe_context; struct draw_vertex_shader; @@ -237,9 +241,16 @@ struct draw_context unsigned instance_id; +#ifdef DRAW_LLVM + LLVMExecutionEngineRef engine; +#endif void *driver_private; }; +/******************************************************************************* + * Draw common initialization code + */ +boolean draw_init(struct draw_context *draw); /******************************************************************************* * Vertex shader code: diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index a8cdc57ad9..43126c6c88 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -140,7 +140,9 @@ boolean draw_pt_init( struct draw_context *draw ) if (!draw->pt.middle.fetch_shade_emit) return FALSE; - draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw ); + draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit_llvm( draw ); + if (!draw->pt.middle.general) + draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw ); if (!draw->pt.middle.general) return FALSE; diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index d5e0d92a60..c2797a759e 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -147,6 +147,7 @@ struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw); struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ); struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw ); struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw); +struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm(struct draw_context *draw); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c new file mode 100644 index 0000000000..d2ed0eb3d1 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -0,0 +1,464 @@ +/************************************************************************** + * + * Copyright 2010 VMWare, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "draw/draw_context.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" +#include "draw/draw_vs.h" +#include "draw/draw_gs.h" +#include "draw/draw_llvm.h" + +#include "translate/translate.h" + + +struct llvm_middle_end { + struct draw_pt_middle_end base; + struct draw_context *draw; + + struct pt_emit *emit; + struct pt_fetch *fetch; + struct pt_post_vs *post_vs; + + + unsigned vertex_data_offset; + unsigned vertex_size; + unsigned prim; + unsigned opt; + + struct draw_llvm *llvm; + struct draw_llvm_variant *variants; + struct draw_llvm_variant *current_variant; +}; + + +static void +llvm_middle_end_prepare( struct draw_pt_middle_end *middle, + unsigned prim, + unsigned opt, + unsigned *max_vertices ) +{ + struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_vertex_shader *vs = draw->vs.vertex_shader; + struct draw_geometry_shader *gs = draw->gs.geometry_shader; + struct draw_llvm_variant_key key; + struct draw_llvm_variant *variant = NULL; + unsigned i; + unsigned instance_id_index = ~0; + + /* Add one to num_outputs because the pipeline occasionally tags on + * an additional texcoord, eg for AA lines. + */ + unsigned nr = MAX2( vs->info.num_inputs, + vs->info.num_outputs + 1 ); + + /* Scan for instanceID system value. + */ + for (i = 0; i < vs->info.num_inputs; i++) { + if (vs->info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) { + instance_id_index = i; + break; + } + } + + fpme->prim = prim; + fpme->opt = opt; + + /* Always leave room for the vertex header whether we need it or + * not. It's hard to get rid of it in particular because of the + * viewport code in draw_pt_post_vs.c. + */ + fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float); + + + + draw_pt_fetch_prepare( fpme->fetch, + vs->info.num_inputs, + fpme->vertex_size, + instance_id_index ); + if (opt & PT_SHADE) { + vs->prepare(vs, draw); + draw_geometry_shader_prepare(gs, draw); + } + + + /* XXX: it's not really gl rasterization rules we care about here, + * but gl vs dx9 clip spaces. + */ + draw_pt_post_vs_prepare( fpme->post_vs, + (boolean)draw->bypass_clipping, + (boolean)(draw->identity_viewport), + (boolean)draw->rasterizer->gl_rasterization_rules, + (draw->vs.edgeflag_output ? true : false) ); + + if (!(opt & PT_PIPELINE)) { + draw_pt_emit_prepare( fpme->emit, + prim, + max_vertices ); + + *max_vertices = MAX2( *max_vertices, + DRAW_PIPE_MAX_VERTICES ); + } + else { + *max_vertices = DRAW_PIPE_MAX_VERTICES; + } + + /* return even number */ + *max_vertices = *max_vertices & ~1; + + draw_llvm_make_variant_key(fpme->llvm, &key); + + variant = fpme->variants; + while(variant) { + if(memcmp(&variant->key, &key, sizeof key) == 0) + break; + + variant = variant->next; + } + + if (!variant) { + variant = draw_llvm_prepare(fpme->llvm, nr); + variant->next = fpme->variants; + fpme->variants = variant; + } + fpme->current_variant = variant; + + /*XXX we only support one constant buffer */ + fpme->llvm->jit_context.vs_constants = + draw->pt.user.vs_constants[0]; + fpme->llvm->jit_context.gs_constants = + draw->pt.user.gs_constants[0]; +} + + + +static void llvm_middle_end_run( struct draw_pt_middle_end *middle, + const unsigned *fetch_elts, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_vertex_shader *vshader = draw->vs.vertex_shader; + struct draw_geometry_shader *gshader = draw->gs.geometry_shader; + unsigned opt = fpme->opt; + unsigned alloc_count = align( fetch_count, 4 ); + + struct vertex_header *pipeline_verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); + + if (!pipeline_verts) { + /* Not much we can do here - just skip the rendering. + */ + assert(0); + return; + } + + /* Fetch into our vertex buffer + */ + draw_pt_fetch_run( fpme->fetch, + fetch_elts, + fetch_count, + (char *)pipeline_verts ); + + /* Run the shader, note that this overwrites the data[] parts of + * the pipeline verts. If there is no shader, eg if + * bypass_vs_clip_and_viewport, then the inputs == outputs, and are + * already in the correct place.*/ + if (opt & PT_SHADE) + { + vshader->run_linear(vshader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + draw->pt.user.vs_constants, + fetch_count, + fpme->vertex_size, + fpme->vertex_size); + if (gshader) + draw_geometry_shader_run(gshader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + draw->pt.user.gs_constants, + fetch_count, + fpme->vertex_size, + fpme->vertex_size); + } + + if (draw_pt_post_vs_run( fpme->post_vs, + pipeline_verts, + fetch_count, + fpme->vertex_size )) + { + opt |= PT_PIPELINE; + } + + /* Do we need to run the pipeline? + */ + if (opt & PT_PIPELINE) { + draw_pipeline_run( fpme->draw, + fpme->prim, + pipeline_verts, + fetch_count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + else { + draw_pt_emit( fpme->emit, + (const float (*)[4])pipeline_verts->data, + fetch_count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + + + FREE(pipeline_verts); +} + + +static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count) +{ + struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + struct draw_context *draw = fpme->draw; + unsigned opt = fpme->opt; + unsigned alloc_count = align( count, 4 ); + + struct vertex_header *pipeline_verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); + + if (!pipeline_verts) { + /* Not much we can do here - just skip the rendering. + */ + assert(0); + return; + } + +#if 0 + debug_printf("#### Pipeline = %p (data = %p)\n", + pipeline_verts, pipeline_verts->data); +#endif + fpme->current_variant->jit_func( &fpme->llvm->jit_context, + pipeline_verts, + (const char **)draw->pt.user.vbuffer, + start, + count, + fpme->vertex_size ); + + if (draw_pt_post_vs_run( fpme->post_vs, + pipeline_verts, + count, + fpme->vertex_size )) + { + opt |= PT_PIPELINE; + } + + /* Do we need to run the pipeline? + */ + if (opt & PT_PIPELINE) { + draw_pipeline_run_linear( fpme->draw, + fpme->prim, + pipeline_verts, + count, + fpme->vertex_size); + } + else { + draw_pt_emit_linear( fpme->emit, + (const float (*)[4])pipeline_verts->data, + fpme->vertex_size, + count ); + } + + FREE(pipeline_verts); +} + + + +static boolean +llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_vertex_shader *shader = draw->vs.vertex_shader; + struct draw_geometry_shader *geometry_shader = draw->gs.geometry_shader; + unsigned opt = fpme->opt; + unsigned alloc_count = align( count, 4 ); + + struct vertex_header *pipeline_verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); + + if (!pipeline_verts) + return FALSE; + + /* Fetch into our vertex buffer + */ + draw_pt_fetch_run_linear( fpme->fetch, + start, + count, + (char *)pipeline_verts ); + + /* Run the shader, note that this overwrites the data[] parts of + * the pipeline verts. If there is no shader, ie if + * bypass_vs_clip_and_viewport, then the inputs == outputs, and are + * already in the correct place. + */ + if (opt & PT_SHADE) + { + shader->run_linear(shader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + draw->pt.user.vs_constants, + count, + fpme->vertex_size, + fpme->vertex_size); + + if (geometry_shader) + draw_geometry_shader_run(geometry_shader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + draw->pt.user.gs_constants, + count, + fpme->vertex_size, + fpme->vertex_size); + } + + if (draw_pt_post_vs_run( fpme->post_vs, + pipeline_verts, + count, + fpme->vertex_size )) + { + opt |= PT_PIPELINE; + } + + /* Do we need to run the pipeline? + */ + if (opt & PT_PIPELINE) { + draw_pipeline_run( fpme->draw, + fpme->prim, + pipeline_verts, + count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + else { + draw_pt_emit( fpme->emit, + (const float (*)[4])pipeline_verts->data, + count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + + FREE(pipeline_verts); + return TRUE; +} + + + +static void llvm_middle_end_finish( struct draw_pt_middle_end *middle ) +{ + /* nothing to do */ +} + +static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle ) +{ + struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + + if (fpme->fetch) + draw_pt_fetch_destroy( fpme->fetch ); + + if (fpme->emit) + draw_pt_emit_destroy( fpme->emit ); + + if (fpme->post_vs) + draw_pt_post_vs_destroy( fpme->post_vs ); + + if (fpme->llvm) + draw_llvm_destroy( fpme->llvm ); + + FREE(middle); +} + + +struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm( struct draw_context *draw ) +{ + struct llvm_middle_end *fpme = 0; + + if (!draw->engine) + return NULL; + + fpme = CALLOC_STRUCT( llvm_middle_end ); + if (!fpme) + goto fail; + + fpme->base.prepare = llvm_middle_end_prepare; + fpme->base.run = llvm_middle_end_run; + fpme->base.run_linear = llvm_middle_end_linear_run; + fpme->base.run_linear_elts = llvm_middle_end_linear_run_elts; + fpme->base.finish = llvm_middle_end_finish; + fpme->base.destroy = llvm_middle_end_destroy; + + fpme->draw = draw; + + fpme->fetch = draw_pt_fetch_create( draw ); + if (!fpme->fetch) + goto fail; + + fpme->post_vs = draw_pt_post_vs_create( draw ); + if (!fpme->post_vs) + goto fail; + + fpme->emit = draw_pt_emit_create( draw ); + if (!fpme->emit) + goto fail; + + fpme->llvm = draw_llvm_create(draw); + if (!fpme->llvm) + goto fail; + + fpme->variants = NULL; + fpme->current_variant = NULL; + + return &fpme->base; + + fail: + if (fpme) + llvm_middle_end_destroy( &fpme->base ); + + return NULL; +} diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index 9728d5c2bd..5525dfc748 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -108,6 +108,11 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, for (j = 0; j < count; j++) { float *position = out->data[pos]; +#if 0 + debug_printf("%d) io = %p, data = %p = [%f, %f, %f, %f]\n", + j, out, position, position[0], position[1], position[2], position[3]); +#endif + out->clip[0] = position[0]; out->clip[1] = position[1]; out->clip[2] = position[2]; diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 5f7a645f5d..0c483de407 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -40,7 +40,7 @@ #include "tgsi/tgsi_parse.h" -#ifdef MESA_LLVM +#ifdef DRAW_LLVM struct draw_llvm_vertex_shader { struct draw_vertex_shader base; @@ -64,12 +64,8 @@ vs_llvm_run_linear( struct draw_vertex_shader *base, unsigned input_stride, unsigned output_stride ) { - struct draw_llvm_vertex_shader *shader = - (struct draw_llvm_vertex_shader *)base; } - - static void vs_llvm_delete( struct draw_vertex_shader *base ) { @@ -90,6 +86,7 @@ struct draw_vertex_shader * draw_create_vs_llvm(struct draw_context *draw, const struct pipe_shader_state *templ) { +#if 0 struct draw_llvm_vertex_shader *vs; vs = CALLOC_STRUCT( draw_llvm_vertex_shader ); @@ -113,6 +110,8 @@ draw_create_vs_llvm(struct draw_context *draw, vs->machine = draw->vs.machine; return &vs->base; +#endif + return NULL; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c index 106fc03e46..e60ab4f6ba 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c @@ -570,6 +570,35 @@ lp_build_loop_end(LLVMBuilderRef builder, LLVMPositionBuilderAtEnd(builder, after_block); } +void +lp_build_loop_end_cond(LLVMBuilderRef builder, + LLVMValueRef end, + LLVMValueRef step, + int llvm_cond, + struct lp_build_loop_state *state) +{ + LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); + LLVMValueRef function = LLVMGetBasicBlockParent(block); + LLVMValueRef next; + LLVMValueRef cond; + LLVMBasicBlockRef after_block; + + if (!step) + step = LLVMConstInt(LLVMTypeOf(end), 1, 0); + + next = LLVMBuildAdd(builder, state->counter, step, ""); + + cond = LLVMBuildICmp(builder, llvm_cond, next, end, ""); + + after_block = LLVMAppendBasicBlock(function, ""); + + LLVMBuildCondBr(builder, cond, after_block, state->block); + + LLVMAddIncoming(state->counter, &next, &block, 1); + + LLVMPositionBuilderAtEnd(builder, after_block); +} + /* diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h index c2b50e1b60..745838570c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h @@ -124,6 +124,13 @@ lp_build_loop_end(LLVMBuilderRef builder, LLVMValueRef step, struct lp_build_loop_state *state); +void +lp_build_loop_end_cond(LLVMBuilderRef builder, + LLVMValueRef end, + LLVMValueRef step, + int cond, /* LLVM condition */ + struct lp_build_loop_state *state); + |