diff options
Diffstat (limited to 'src/gallium/auxiliary/draw')
30 files changed, 2178 insertions, 498 deletions
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 18435af848..02abddf149 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -38,6 +38,9 @@ #include "draw_vs.h" #include "draw_gs.h" +#if HAVE_LLVM +#include "gallivm/lp_bld_init.h" +#endif struct draw_context *draw_create( struct pipe_context *pipe ) { @@ -45,6 +48,32 @@ struct draw_context *draw_create( struct pipe_context *pipe ) if (draw == NULL) goto fail; +#if HAVE_LLVM + lp_build_init(); + assert(lp_build_engine); + draw->engine = lp_build_engine; +#endif + + if (!draw_init(draw)) + goto fail; + + draw->pipe = pipe; + + return draw; + +fail: + draw_destroy( draw ); + return NULL; +} + +boolean draw_init(struct draw_context *draw) +{ + /* + * Note that several functions compute the clipmask of the predefined + * formats with hardcoded formulas instead of using these. So modifications + * here must be reflected there too. + */ + ASSIGN_4V( draw->plane[0], -1, 0, 0, 1 ); ASSIGN_4V( draw->plane[1], 1, 0, 0, 1 ); ASSIGN_4V( draw->plane[2], 0, -1, 0, 1 ); @@ -58,35 +87,31 @@ struct draw_context *draw_create( struct pipe_context *pipe ) if (!draw_pipeline_init( draw )) - goto fail; + return FALSE; if (!draw_pt_init( draw )) - goto fail; + return FALSE; if (!draw_vs_init( draw )) - goto fail; + return FALSE; if (!draw_gs_init( draw )) - goto fail; - - draw->pipe = pipe; - - return draw; + return FALSE; -fail: - draw_destroy( draw ); - return NULL; + return TRUE; } void draw_destroy( struct draw_context *draw ) { - struct pipe_context *pipe = draw->pipe; + struct pipe_context *pipe; int i, j; if (!draw) return; + pipe = draw->pipe; + /* free any rasterizer CSOs that we may have created. */ for (i = 0; i < 2; i++) { @@ -280,6 +305,17 @@ draw_wide_point_threshold(struct draw_context *draw, float threshold) /** + * Should the draw module handle point->quad conversion for drawing sprites? + */ +void +draw_wide_point_sprites(struct draw_context *draw, boolean draw_sprite) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw->pipeline.wide_point_sprites = draw_sprite; +} + + +/** * Tells the draw module to draw lines with triangles if their width * is greater than this threshold. */ @@ -432,12 +468,14 @@ void draw_set_render( struct draw_context *draw, void draw_set_mapped_element_buffer_range( struct draw_context *draw, unsigned eltSize, + int eltBias, unsigned min_index, unsigned max_index, const void *elements ) { draw->pt.user.elts = elements; draw->pt.user.eltSize = eltSize; + draw->pt.user.eltBias = eltBias; draw->pt.user.min_index = min_index; draw->pt.user.max_index = max_index; } @@ -446,10 +484,12 @@ draw_set_mapped_element_buffer_range( struct draw_context *draw, void draw_set_mapped_element_buffer( struct draw_context *draw, unsigned eltSize, + int eltBias, const void *elements ) { draw->pt.user.elts = elements; draw->pt.user.eltSize = eltSize; + draw->pt.user.eltBias = eltBias; draw->pt.user.min_index = 0; draw->pt.user.max_index = 0xffffffff; } diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 3b21a88170..b905c2f2da 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -40,7 +40,6 @@ #include "pipe/p_state.h" - struct pipe_context; struct draw_context; struct draw_stage; @@ -68,6 +67,8 @@ void draw_set_rasterize_stage( struct draw_context *draw, void draw_wide_point_threshold(struct draw_context *draw, float threshold); +void draw_wide_point_sprites(struct draw_context *draw, boolean draw_sprite); + void draw_wide_line_threshold(struct draw_context *draw, float threshold); void draw_enable_line_stipple(struct draw_context *draw, boolean enable); @@ -141,12 +142,14 @@ void draw_set_vertex_elements(struct draw_context *draw, void draw_set_mapped_element_buffer_range( struct draw_context *draw, unsigned eltSize, + int eltBias, unsigned min_index, unsigned max_index, const void *elements ); void draw_set_mapped_element_buffer( struct draw_context *draw, unsigned eltSize, + int eltBias, const void *elements ); void draw_set_mapped_vertex_buffer(struct draw_context *draw, @@ -198,6 +201,4 @@ boolean draw_need_pipeline(const struct draw_context *draw, const struct pipe_rasterizer_state *rasterizer, unsigned prim ); - - #endif /* DRAW_CONTEXT_H */ diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index 7069aa6b18..131deed43e 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -342,10 +342,10 @@ void draw_geometry_shader_delete(struct draw_geometry_shader *shader) void draw_geometry_shader_prepare(struct draw_geometry_shader *shader, struct draw_context *draw) { - if (shader->machine->Tokens != shader->state.tokens) { - tgsi_exec_machine_bind_shader(shader->machine, - shader->state.tokens, - draw->gs.num_samplers, - draw->gs.samplers); - } + if (shader && shader->machine->Tokens != shader->state.tokens) { + tgsi_exec_machine_bind_shader(shader->machine, + shader->state.tokens, + draw->gs.num_samplers, + draw->gs.samplers); + } } diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c new file mode 100644 index 0000000000..936753f66b --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -0,0 +1,708 @@ +#include "draw_llvm.h" + +#include "draw_context.h" +#include "draw_vs.h" + +#include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_struct.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_flow.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_tgsi.h" +#include "gallivm/lp_bld_printf.h" + +#include "tgsi/tgsi_exec.h" + +#include "util/u_cpu_detect.h" +#include "util/u_string.h" + +#include <llvm-c/Transforms/Scalar.h> + +#define DEBUG_STORE 0 + + +/* generates the draw jit function */ +static void +draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var); + +static void +init_globals(struct draw_llvm *llvm) +{ + LLVMTypeRef texture_type; + + /* struct draw_jit_texture */ + { + LLVMTypeRef elem_types[4]; + + elem_types[DRAW_JIT_TEXTURE_WIDTH] = LLVMInt32Type(); + elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type(); + elem_types[DRAW_JIT_TEXTURE_STRIDE] = LLVMInt32Type(); + elem_types[DRAW_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0); + + texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); + + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_WIDTH); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_HEIGHT); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, stride, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_STRIDE); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_DATA); + LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, + llvm->target, texture_type); + + LLVMAddTypeName(llvm->module, "texture", texture_type); + } + + + /* struct draw_jit_context */ + { + LLVMTypeRef elem_types[3]; + LLVMTypeRef context_type; + + elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ + elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ + elem_types[2] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ + + context_type = LLVMStructType(elem_types, Elements(elem_types), 0); + + LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants, + llvm->target, context_type, 0); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants, + llvm->target, context_type, 1); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures, + llvm->target, context_type, + DRAW_JIT_CONTEXT_TEXTURES_INDEX); + LP_CHECK_STRUCT_SIZE(struct draw_jit_context, + llvm->target, context_type); + + LLVMAddTypeName(llvm->module, "draw_jit_context", context_type); + + llvm->context_ptr_type = LLVMPointerType(context_type, 0); + } + { + LLVMTypeRef buffer_ptr = LLVMPointerType(LLVMIntType(8), 0); + llvm->buffer_ptr_type = LLVMPointerType(buffer_ptr, 0); + } + /* struct pipe_vertex_buffer */ + { + LLVMTypeRef elem_types[4]; + LLVMTypeRef vb_type; + + elem_types[0] = LLVMInt32Type(); + elem_types[1] = LLVMInt32Type(); + elem_types[2] = LLVMInt32Type(); + elem_types[3] = LLVMPointerType(LLVMOpaqueType(), 0); /* vs_constants */ + + vb_type = LLVMStructType(elem_types, Elements(elem_types), 0); + + LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride, + llvm->target, vb_type, 0); + LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset, + llvm->target, vb_type, 2); + LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, + llvm->target, vb_type); + + LLVMAddTypeName(llvm->module, "pipe_vertex_buffer", vb_type); + + llvm->vb_ptr_type = LLVMPointerType(vb_type, 0); + } +} + +static LLVMTypeRef +create_vertex_header(struct draw_llvm *llvm, int data_elems) +{ + /* struct vertex_header */ + LLVMTypeRef elem_types[3]; + LLVMTypeRef vertex_header; + char struct_name[24]; + + util_snprintf(struct_name, 23, "vertex_header%d", data_elems); + + elem_types[0] = LLVMIntType(32); + elem_types[1] = LLVMArrayType(LLVMFloatType(), 4); + elem_types[2] = LLVMArrayType(elem_types[1], data_elems); + + vertex_header = LLVMStructType(elem_types, Elements(elem_types), 0); + + /* these are bit-fields and we can't take address of them + LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_CLIPMASK); + LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_EDGEFLAG); + LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_PAD); + LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_VERTEX_ID); + */ + LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_CLIP); + LP_CHECK_MEMBER_OFFSET(struct vertex_header, data, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_DATA); + + LLVMAddTypeName(llvm->module, struct_name, vertex_header); + + return LLVMPointerType(vertex_header, 0); +} + +struct draw_llvm * +draw_llvm_create(struct draw_context *draw) +{ + struct draw_llvm *llvm = CALLOC_STRUCT( draw_llvm ); + + util_cpu_detect(); + + llvm->draw = draw; + llvm->engine = draw->engine; + + debug_assert(llvm->engine); + + llvm->module = LLVMModuleCreateWithName("draw_llvm"); + llvm->provider = LLVMCreateModuleProviderForExistingModule(llvm->module); + + LLVMAddModuleProvider(llvm->engine, llvm->provider); + + llvm->target = LLVMGetExecutionEngineTargetData(llvm->engine); + + llvm->pass = LLVMCreateFunctionPassManager(llvm->provider); + LLVMAddTargetData(llvm->target, llvm->pass); + /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, + * but there are more on SVN. */ + /* TODO: Add more passes */ + LLVMAddConstantPropagationPass(llvm->pass); + if(util_cpu_caps.has_sse4_1) { + /* FIXME: There is a bug in this pass, whereby the combination of fptosi + * and sitofp (necessary for trunc/floor/ceil/round implementation) + * somehow becomes invalid code. + */ + LLVMAddInstructionCombiningPass(llvm->pass); + } + LLVMAddPromoteMemoryToRegisterPass(llvm->pass); + LLVMAddGVNPass(llvm->pass); + LLVMAddCFGSimplificationPass(llvm->pass); + + init_globals(llvm); + + +#if 0 + LLVMDumpModule(llvm->module); +#endif + + return llvm; +} + +void +draw_llvm_destroy(struct draw_llvm *llvm) +{ + FREE(llvm); +} + +struct draw_llvm_variant * +draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs) +{ + struct draw_llvm_variant *variant = MALLOC(sizeof(struct draw_llvm_variant)); + + draw_llvm_make_variant_key(llvm, &variant->key); + + llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs); + + draw_llvm_generate(llvm, variant); + + return variant; +} + +static void +generate_vs(struct draw_llvm *llvm, + LLVMBuilderRef builder, + LLVMValueRef (*outputs)[NUM_CHANNELS], + const LLVMValueRef (*inputs)[NUM_CHANNELS], + LLVMValueRef context_ptr) +{ + const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens; + struct lp_type vs_type; + LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr); + + memset(&vs_type, 0, sizeof vs_type); + vs_type.floating = TRUE; /* floating point values */ + vs_type.sign = TRUE; /* values are signed */ + vs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ + vs_type.width = 32; /* 32-bit float */ + vs_type.length = 4; /* 4 elements per vector */ +#if 0 + num_vs = 4; /* number of vertices per block */ +#endif + + /*tgsi_dump(tokens, 0);*/ + lp_build_tgsi_soa(builder, + tokens, + vs_type, + NULL /*struct lp_build_mask_context *mask*/, + consts_ptr, + NULL /*pos*/, + inputs, + outputs, + NULL/*sampler*/, + &llvm->draw->vs.vertex_shader->info); +} + +#if DEBUG_STORE +static void print_vectorf(LLVMBuilderRef builder, + LLVMValueRef vec) +{ + LLVMValueRef val[4]; + val[0] = LLVMBuildExtractElement(builder, vec, + LLVMConstInt(LLVMInt32Type(), 0, 0), ""); + val[1] = LLVMBuildExtractElement(builder, vec, + LLVMConstInt(LLVMInt32Type(), 1, 0), ""); + val[2] = LLVMBuildExtractElement(builder, vec, + LLVMConstInt(LLVMInt32Type(), 2, 0), ""); + val[3] = LLVMBuildExtractElement(builder, vec, + LLVMConstInt(LLVMInt32Type(), 3, 0), ""); + lp_build_printf(builder, "vector = [%f, %f, %f, %f]\n", + val[0], val[1], val[2], val[3]); +} +#endif + +static void +generate_fetch(LLVMBuilderRef builder, + LLVMValueRef vbuffers_ptr, + LLVMValueRef *res, + struct pipe_vertex_element *velem, + LLVMValueRef vbuf, + LLVMValueRef index) +{ + LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0); + LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, + &indices, 1, ""); + LLVMValueRef vb_stride = draw_jit_vbuffer_stride(builder, vbuf); + LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(builder, vbuf); + LLVMValueRef stride = LLVMBuildMul(builder, + vb_stride, + index, ""); + + vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer"); + + stride = LLVMBuildAdd(builder, stride, + vb_buffer_offset, + ""); + stride = LLVMBuildAdd(builder, stride, + LLVMConstInt(LLVMInt32Type(), velem->src_offset, 0), + ""); + + /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/ + vbuffer_ptr = LLVMBuildGEP(builder, vbuffer_ptr, &stride, 1, ""); + + *res = draw_llvm_translate_from(builder, vbuffer_ptr, velem->src_format); +} + +static LLVMValueRef +aos_to_soa(LLVMBuilderRef builder, + LLVMValueRef val0, + LLVMValueRef val1, + LLVMValueRef val2, + LLVMValueRef val3, + LLVMValueRef channel) +{ + LLVMValueRef ex, res; + + ex = LLVMBuildExtractElement(builder, val0, + channel, ""); + res = LLVMBuildInsertElement(builder, + LLVMConstNull(LLVMTypeOf(val0)), + ex, + LLVMConstInt(LLVMInt32Type(), 0, 0), + ""); + + ex = LLVMBuildExtractElement(builder, val1, + channel, ""); + res = LLVMBuildInsertElement(builder, + res, ex, + LLVMConstInt(LLVMInt32Type(), 1, 0), + ""); + + ex = LLVMBuildExtractElement(builder, val2, + channel, ""); + res = LLVMBuildInsertElement(builder, + res, ex, + LLVMConstInt(LLVMInt32Type(), 2, 0), + ""); + + ex = LLVMBuildExtractElement(builder, val3, + channel, ""); + res = LLVMBuildInsertElement(builder, + res, ex, + LLVMConstInt(LLVMInt32Type(), 3, 0), + ""); + + return res; +} + +static void +soa_to_aos(LLVMBuilderRef builder, + LLVMValueRef soa[NUM_CHANNELS], + LLVMValueRef aos[NUM_CHANNELS]) +{ + LLVMValueRef comp; + int i = 0; + + debug_assert(NUM_CHANNELS == 4); + + aos[0] = LLVMConstNull(LLVMTypeOf(soa[0])); + aos[1] = aos[2] = aos[3] = aos[0]; + + for (i = 0; i < NUM_CHANNELS; ++i) { + LLVMValueRef channel = LLVMConstInt(LLVMInt32Type(), i, 0); + + comp = LLVMBuildExtractElement(builder, soa[i], + LLVMConstInt(LLVMInt32Type(), 0, 0), ""); + aos[0] = LLVMBuildInsertElement(builder, aos[0], comp, channel, ""); + + comp = LLVMBuildExtractElement(builder, soa[i], + LLVMConstInt(LLVMInt32Type(), 1, 0), ""); + aos[1] = LLVMBuildInsertElement(builder, aos[1], comp, channel, ""); + + comp = LLVMBuildExtractElement(builder, soa[i], + LLVMConstInt(LLVMInt32Type(), 2, 0), ""); + aos[2] = LLVMBuildInsertElement(builder, aos[2], comp, channel, ""); + + comp = LLVMBuildExtractElement(builder, soa[i], + LLVMConstInt(LLVMInt32Type(), 3, 0), ""); + aos[3] = LLVMBuildInsertElement(builder, aos[3], comp, channel, ""); + + } +} + +static void +convert_to_soa(LLVMBuilderRef builder, + LLVMValueRef (*aos)[NUM_CHANNELS], + LLVMValueRef (*soa)[NUM_CHANNELS], + int num_attribs) +{ + int i; + + debug_assert(NUM_CHANNELS == 4); + + for (i = 0; i < num_attribs; ++i) { + LLVMValueRef val0 = aos[i][0]; + LLVMValueRef val1 = aos[i][1]; + LLVMValueRef val2 = aos[i][2]; + LLVMValueRef val3 = aos[i][3]; + + soa[i][0] = aos_to_soa(builder, val0, val1, val2, val3, + LLVMConstInt(LLVMInt32Type(), 0, 0)); + soa[i][1] = aos_to_soa(builder, val0, val1, val2, val3, + LLVMConstInt(LLVMInt32Type(), 1, 0)); + soa[i][2] = aos_to_soa(builder, val0, val1, val2, val3, + LLVMConstInt(LLVMInt32Type(), 2, 0)); + soa[i][3] = aos_to_soa(builder, val0, val1, val2, val3, + LLVMConstInt(LLVMInt32Type(), 3, 0)); + } +} + +static void +store_aos(LLVMBuilderRef builder, + LLVMValueRef io_ptr, + LLVMValueRef index, + LLVMValueRef value) +{ + LLVMValueRef id_ptr = draw_jit_header_id(builder, io_ptr); + LLVMValueRef data_ptr = draw_jit_header_data(builder, io_ptr); + LLVMValueRef indices[3]; + + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[1] = index; + indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0); + + /* undefined vertex */ + LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), + 0xffff, 0), id_ptr); + +#if DEBUG_STORE + lp_build_printf(builder, " ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr); +#endif +#if 0 + /*lp_build_printf(builder, " ---- %p storing at %d (%p) ", io_ptr, index, data_ptr); + print_vectorf(builder, value);*/ + data_ptr = LLVMBuildBitCast(builder, data_ptr, + LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatType(), 4), 0), 0), + "datavec"); + data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 2, ""); + + LLVMBuildStore(builder, value, data_ptr); +#else + { + LLVMValueRef x, y, z, w; + LLVMValueRef idx0, idx1, idx2, idx3; + LLVMValueRef gep0, gep1, gep2, gep3; + data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, ""); + + idx0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + idx1 = LLVMConstInt(LLVMInt32Type(), 1, 0); + idx2 = LLVMConstInt(LLVMInt32Type(), 2, 0); + idx3 = LLVMConstInt(LLVMInt32Type(), 3, 0); + + x = LLVMBuildExtractElement(builder, value, + idx0, ""); + y = LLVMBuildExtractElement(builder, value, + idx1, ""); + z = LLVMBuildExtractElement(builder, value, + idx2, ""); + w = LLVMBuildExtractElement(builder, value, + idx3, ""); + + gep0 = LLVMBuildGEP(builder, data_ptr, &idx0, 1, ""); + gep1 = LLVMBuildGEP(builder, data_ptr, &idx1, 1, ""); + gep2 = LLVMBuildGEP(builder, data_ptr, &idx2, 1, ""); + gep3 = LLVMBuildGEP(builder, data_ptr, &idx3, 1, ""); + + /*lp_build_printf(builder, "##### x = %f (%p), y = %f (%p), z = %f (%p), w = %f (%p)\n", + x, gep0, y, gep1, z, gep2, w, gep3);*/ + LLVMBuildStore(builder, x, gep0); + LLVMBuildStore(builder, y, gep1); + LLVMBuildStore(builder, z, gep2); + LLVMBuildStore(builder, w, gep3); + } +#endif +} + +static void +store_aos_array(LLVMBuilderRef builder, + LLVMValueRef io_ptr, + LLVMValueRef aos[NUM_CHANNELS], + int attrib, + int num_outputs) +{ + LLVMValueRef attr_index = LLVMConstInt(LLVMInt32Type(), attrib, 0); + LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0); + LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0); + LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0); + LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; + + debug_assert(NUM_CHANNELS == 4); + + io0_ptr = LLVMBuildGEP(builder, io_ptr, + &ind0, 1, ""); + io1_ptr = LLVMBuildGEP(builder, io_ptr, + &ind1, 1, ""); + io2_ptr = LLVMBuildGEP(builder, io_ptr, + &ind2, 1, ""); + io3_ptr = LLVMBuildGEP(builder, io_ptr, + &ind3, 1, ""); + +#if DEBUG_STORE + lp_build_printf(builder, " io = %p, indexes[%d, %d, %d, %d]\n", + io_ptr, ind0, ind1, ind2, ind3); +#endif + + store_aos(builder, io0_ptr, attr_index, aos[0]); + store_aos(builder, io1_ptr, attr_index, aos[1]); + store_aos(builder, io2_ptr, attr_index, aos[2]); + store_aos(builder, io3_ptr, attr_index, aos[3]); +} + +static void +convert_to_aos(LLVMBuilderRef builder, + LLVMValueRef io, + LLVMValueRef (*outputs)[NUM_CHANNELS], + int num_outputs, + int max_vertices) +{ + unsigned chan, attrib; + +#if DEBUG_STORE + lp_build_printf(builder, " # storing begin\n"); +#endif + for (attrib = 0; attrib < num_outputs; ++attrib) { + LLVMValueRef soa[4]; + LLVMValueRef aos[4]; + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + if(outputs[attrib][chan]) { + LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); + lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]); + /*lp_build_printf(builder, "output %d : %d ", + LLVMConstInt(LLVMInt32Type(), attrib, 0), + LLVMConstInt(LLVMInt32Type(), chan, 0)); + print_vectorf(builder, out);*/ + soa[chan] = out; + } else + soa[chan] = 0; + } + soa_to_aos(builder, soa, aos); + store_aos_array(builder, + io, + aos, + attrib, + num_outputs); + } +#if DEBUG_STORE + lp_build_printf(builder, " # storing end\n"); +#endif +} + +static void +draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) +{ + LLVMTypeRef arg_types[7]; + LLVMTypeRef func_type; + LLVMValueRef context_ptr; + LLVMBasicBlockRef block; + LLVMBuilderRef builder; + LLVMValueRef start, end, count, stride, step, io_itr; + LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; + struct draw_context *draw = llvm->draw; + unsigned i, j; + struct lp_build_context bld; + struct lp_build_loop_state lp_loop; + struct lp_type vs_type = lp_type_float_vec(32); + const int max_vertices = 4; + LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; + + arg_types[0] = llvm->context_ptr_type; /* context */ + arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ + arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ + arg_types[3] = LLVMInt32Type(); /* start */ + arg_types[4] = LLVMInt32Type(); /* count */ + arg_types[5] = LLVMInt32Type(); /* stride */ + arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ + + func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); + + variant->function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type); + LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); + for(i = 0; i < Elements(arg_types); ++i) + if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) + LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute); + + context_ptr = LLVMGetParam(variant->function, 0); + io_ptr = LLVMGetParam(variant->function, 1); + vbuffers_ptr = LLVMGetParam(variant->function, 2); + start = LLVMGetParam(variant->function, 3); + count = LLVMGetParam(variant->function, 4); + stride = LLVMGetParam(variant->function, 5); + vb_ptr = LLVMGetParam(variant->function, 6); + + lp_build_name(context_ptr, "context"); + lp_build_name(io_ptr, "io"); + lp_build_name(vbuffers_ptr, "vbuffers"); + lp_build_name(start, "start"); + lp_build_name(count, "count"); + lp_build_name(stride, "stride"); + lp_build_name(vb_ptr, "vb"); + + /* + * Function body + */ + + block = LLVMAppendBasicBlock(variant->function, "entry"); + builder = LLVMCreateBuilder(); + LLVMPositionBuilderAtEnd(builder, block); + + lp_build_context_init(&bld, builder, vs_type); + + end = lp_build_add(&bld, start, count); + + step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); + +#if DEBUG_STORE + lp_build_printf(builder, "start = %d, end = %d, step = %d\n", + start, end, step); +#endif + lp_build_loop_begin(builder, start, &lp_loop); + { + LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; + LLVMValueRef io; + const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; + + io_itr = LLVMBuildSub(builder, lp_loop.counter, start, ""); + io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, ""); +#if DEBUG_STORE + lp_build_printf(builder, " --- io %d = %p, loop counter %d\n", + io_itr, io, lp_loop.counter); +#endif + for (i = 0; i < NUM_CHANNELS; ++i) { + LLVMValueRef true_index = LLVMBuildAdd( + builder, + lp_loop.counter, + LLVMConstInt(LLVMInt32Type(), i, 0), ""); + for (j = 0; j < draw->pt.nr_vertex_elements; ++j) { + struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; + LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(), + velem->vertex_buffer_index, + 0); + LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, + &vb_index, 1, ""); + generate_fetch(builder, vbuffers_ptr, + &aos_attribs[j][i], velem, vb, true_index); + } + } + convert_to_soa(builder, aos_attribs, inputs, + draw->pt.nr_vertex_elements); + + ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs; + generate_vs(llvm, + builder, + outputs, + ptr_aos, + context_ptr); + + convert_to_aos(builder, io, outputs, + draw->vs.vertex_shader->info.num_outputs, + max_vertices); + } + lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop); + + LLVMBuildRetVoid(builder); + + LLVMDisposeBuilder(builder); + + /* + * Translate the LLVM IR into machine code. + */ +#ifdef DEBUG + if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { + LLVMDumpValue(variant->function); + assert(0); + } +#endif + + LLVMRunFunctionPassManager(llvm->pass, variant->function); + + if (0) { + LLVMDumpValue(variant->function); + debug_printf("\n"); + } + variant->jit_func = (draw_jit_vert_func)LLVMGetPointerToGlobal(llvm->draw->engine, variant->function); + + if (0) + lp_disassemble(variant->jit_func); +} + +void +draw_llvm_make_variant_key(struct draw_llvm *llvm, + struct draw_llvm_variant_key *key) +{ + memset(key, 0, sizeof(struct draw_llvm_variant_key)); + + key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements; + + memcpy(key->vertex_element, + llvm->draw->pt.vertex_element, + sizeof(struct pipe_vertex_element) * key->nr_vertex_elements); + + memcpy(&key->vs, + &llvm->draw->vs.vertex_shader->state, + sizeof(struct pipe_shader_state)); +} diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h new file mode 100644 index 0000000000..28b9044a81 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -0,0 +1,144 @@ +#ifndef HAVE_LLVM_H +#define HAVE_LLVM_H + +#include "draw/draw_private.h" + +#include "pipe/p_context.h" + +#include <llvm-c/Core.h> +#include <llvm-c/Analysis.h> +#include <llvm-c/Target.h> +#include <llvm-c/ExecutionEngine.h> + +struct draw_jit_texture +{ + uint32_t width; + uint32_t height; + uint32_t stride; + const void *data; +}; + +enum { + DRAW_JIT_TEXTURE_WIDTH = 0, + DRAW_JIT_TEXTURE_HEIGHT, + DRAW_JIT_TEXTURE_STRIDE, + DRAW_JIT_TEXTURE_DATA +}; + +enum { + DRAW_JIT_VERTEX_VERTEX_ID = 0, + DRAW_JIT_VERTEX_CLIP, + DRAW_JIT_VERTEX_DATA +}; + +/** + * This structure is passed directly to the generated vertex shader. + * + * It contains the derived state. + * + * Changes here must be reflected in the draw_jit_context_* macros. + * Changes to the ordering should be avoided. + * + * Only use types with a clear size and padding here, in particular prefer the + * stdint.h types to the basic integer types. + */ +struct draw_jit_context +{ + const float *vs_constants; + const float *gs_constants; + + + struct draw_jit_texture textures[PIPE_MAX_SAMPLERS]; +}; + + +#define draw_jit_context_vs_constants(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 0, "vs_constants") + +#define draw_jit_context_gs_constants(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 1, "gs_constants") + +#define DRAW_JIT_CONTEXT_TEXTURES_INDEX 2 + +#define draw_jit_context_textures(_builder, _ptr) \ + lp_build_struct_get_ptr(_builder, _ptr, DRAW_JIT_CONTEXT_TEXTURES_INDEX, "textures") + + + +#define draw_jit_header_id(_builder, _ptr) \ + lp_build_struct_get_ptr(_builder, _ptr, 0, "id") + +#define draw_jit_header_clip(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 1, "clip") + +#define draw_jit_header_data(_builder, _ptr) \ + lp_build_struct_get_ptr(_builder, _ptr, 2, "data") + + +#define draw_jit_vbuffer_stride(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 0, "stride") + +#define draw_jit_vbuffer_offset(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 2, "buffer_offset") + + +typedef void +(*draw_jit_vert_func)(struct draw_jit_context *context, + struct vertex_header *io, + const char *vbuffers[PIPE_MAX_ATTRIBS], + unsigned start, + unsigned count, + unsigned stride, + struct pipe_vertex_buffer *vertex_buffers); + +struct draw_llvm { + struct draw_context *draw; + + struct draw_jit_context jit_context; + + LLVMModuleRef module; + LLVMExecutionEngineRef engine; + LLVMModuleProviderRef provider; + LLVMTargetDataRef target; + LLVMPassManagerRef pass; + + LLVMTypeRef context_ptr_type; + LLVMTypeRef vertex_header_ptr_type; + LLVMTypeRef buffer_ptr_type; + LLVMTypeRef vb_ptr_type; +}; + +struct draw_llvm_variant_key +{ + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + unsigned nr_vertex_elements; + struct pipe_shader_state vs; +}; + +struct draw_llvm_variant +{ + struct draw_llvm_variant_key key; + LLVMValueRef function; + draw_jit_vert_func jit_func; + + struct draw_llvm_variant *next; +}; + +struct draw_llvm * +draw_llvm_create(struct draw_context *draw); + +void +draw_llvm_destroy(struct draw_llvm *llvm); + +struct draw_llvm_variant * +draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs); + +void +draw_llvm_make_variant_key(struct draw_llvm *llvm, + struct draw_llvm_variant_key *key); + +LLVMValueRef +draw_llvm_translate_from(LLVMBuilderRef builder, + LLVMValueRef vbuffer, + enum pipe_format from_format); +#endif diff --git a/src/gallium/auxiliary/draw/draw_llvm_translate.c b/src/gallium/auxiliary/draw/draw_llvm_translate.c new file mode 100644 index 0000000000..d7da7ed357 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_llvm_translate.c @@ -0,0 +1,497 @@ +#include "draw_private.h" +#include "draw_context.h" + +#include "draw_llvm.h" + +#include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_struct.h" +#include "gallivm/lp_bld_format.h" +#include "gallivm/lp_bld_debug.h" + +#include "util/u_memory.h" +#include "util/u_format.h" +#include "pipe/p_state.h" + + +#define DRAW_DBG 0 + +static LLVMValueRef +from_64_float(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMDoubleType(), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + return LLVMBuildFPTrunc(builder, l, LLVMFloatType(), ""); +} + +static LLVMValueRef +from_32_float(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMFloatType(), 0) , ""); + return LLVMBuildLoad(builder, bc, ""); +} + +static INLINE LLVMValueRef +from_8_uscaled(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef l = LLVMBuildLoad(builder, val, ""); + return LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); +} + +static INLINE LLVMValueRef +from_16_uscaled(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(16), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + return LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); +} + +static INLINE LLVMValueRef +from_32_uscaled(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(32), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + return LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); +} + +static INLINE LLVMValueRef +from_8_sscaled(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef l = LLVMBuildLoad(builder, val, ""); + return LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); +} + +static INLINE LLVMValueRef +from_16_sscaled(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(16), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + return LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); +} + +static INLINE LLVMValueRef +from_32_sscaled(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(32), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + return LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); +} + + +static INLINE LLVMValueRef +from_8_unorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef l = LLVMBuildLoad(builder, val, ""); + LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 255.), ""); +} + +static INLINE LLVMValueRef +from_16_unorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(16), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 65535.), ""); +} + +static INLINE LLVMValueRef +from_32_unorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(32), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); + + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 4294967295.), ""); +} + +static INLINE LLVMValueRef +from_8_snorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef l = LLVMBuildLoad(builder, val, ""); + LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 127.0), ""); +} + +static INLINE LLVMValueRef +from_16_snorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(16), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 32767.0f), ""); +} + +static INLINE LLVMValueRef +from_32_snorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(32), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); + + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 2147483647.0), ""); +} + +static INLINE LLVMValueRef +from_32_fixed(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(32), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); + + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 65536.0), ""); +} + +static LLVMValueRef +to_64_float(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPExt(builder, l, LLVMDoubleType(), ""); +} + +static LLVMValueRef +to_32_float(LLVMBuilderRef builder, LLVMValueRef fp) +{ + return LLVMBuildLoad(builder, fp, ""); +} + +static INLINE LLVMValueRef +to_8_uscaled(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPToUI(builder, l, LLVMIntType(8), ""); +} + +static INLINE LLVMValueRef +to_16_uscaled(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPToUI(builder, l, LLVMIntType(16), ""); +} + +static INLINE LLVMValueRef +to_32_uscaled(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPToUI(builder, l, LLVMIntType(32), ""); +} + +static INLINE LLVMValueRef +to_8_sscaled(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPToSI(builder, l, LLVMIntType(8), ""); +} + +static INLINE LLVMValueRef +to_16_sscaled(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPToSI(builder, l, LLVMIntType(16), ""); +} + +static INLINE LLVMValueRef +to_32_sscaled(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPToSI(builder, l, LLVMIntType(32), ""); +} + +static INLINE LLVMValueRef +to_8_unorm(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(8), ""); + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 255.), ""); +} + +static INLINE LLVMValueRef +to_16_unorm(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(32), ""); + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 65535.), ""); +} + +static INLINE LLVMValueRef +to_32_unorm(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(32), ""); + + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 4294967295.), ""); +} + +static INLINE LLVMValueRef +to_8_snorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef l = LLVMBuildLoad(builder, val, ""); + LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(8), ""); + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 127.0), ""); +} + +static INLINE LLVMValueRef +to_16_snorm(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(16), ""); + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 32767.0f), ""); +} + +static INLINE LLVMValueRef +to_32_snorm(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(32), ""); + + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 2147483647.0), ""); +} + +static INLINE LLVMValueRef +to_32_fixed(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(32), ""); + + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 65536.0), ""); +} + +typedef LLVMValueRef (*from_func)(LLVMBuilderRef, LLVMValueRef); +typedef LLVMValueRef (*to_func)(LLVMBuilderRef, LLVMValueRef); + +/* so that underneath can avoid function calls which are prohibited + * for static initialization we need this conversion */ +enum ll_type { + LL_Double, + LL_Float, + LL_Int32, + LL_Int16, + LL_Int8 +}; + +static INLINE LLVMTypeRef +ll_type_to_llvm(enum ll_type type) +{ + switch (type) { + case LL_Double: + return LLVMDoubleType(); + case LL_Float: + return LLVMFloatType(); + case LL_Int32: + return LLVMInt32Type(); + case LL_Int16: + return LLVMIntType(16); + case LL_Int8: + return LLVMIntType(8); + } + return LLVMIntType(8); +} + +static INLINE int +ll_type_size(enum ll_type type) +{ + switch (type) { + case LL_Double: + return 8; + case LL_Float: + return 4; + case LL_Int32: + return 4; + case LL_Int16: + return 2; + case LL_Int8: + return 1; + } + return 1; +} + +struct draw_llvm_translate { + int format; + from_func from; + to_func to; + enum ll_type type; + int num_components; +} translates[] = +{ + {PIPE_FORMAT_R64_FLOAT, from_64_float, to_64_float, LL_Double, 1}, + {PIPE_FORMAT_R64G64_FLOAT, from_64_float, to_64_float, LL_Double, 2}, + {PIPE_FORMAT_R64G64B64_FLOAT, from_64_float, to_64_float, LL_Double, 3}, + {PIPE_FORMAT_R64G64B64A64_FLOAT, from_64_float, to_64_float, LL_Double, 4}, + {PIPE_FORMAT_R32_FLOAT, from_32_float, to_32_float, LL_Float, 1}, + {PIPE_FORMAT_R32G32_FLOAT, from_32_float, to_32_float, LL_Float, 2}, + {PIPE_FORMAT_R32G32B32_FLOAT, from_32_float, to_32_float, LL_Float, 3}, + {PIPE_FORMAT_R32G32B32A32_FLOAT, from_32_float, to_32_float, LL_Float, 4}, + + {PIPE_FORMAT_R32_UNORM, from_32_unorm, to_32_unorm, LL_Int32, 1}, + {PIPE_FORMAT_R32G32_UNORM, from_32_unorm, to_32_unorm, LL_Int32, 2}, + {PIPE_FORMAT_R32G32B32_UNORM, from_32_unorm, to_32_unorm, LL_Int32, 3}, + {PIPE_FORMAT_R32G32B32A32_UNORM, from_32_unorm, to_32_unorm, LL_Int32, 4}, + + {PIPE_FORMAT_R32_USCALED, from_32_uscaled, to_32_uscaled, LL_Int32, 1}, + {PIPE_FORMAT_R32G32_USCALED, from_32_uscaled, to_32_uscaled, LL_Int32, 2}, + {PIPE_FORMAT_R32G32B32_USCALED, from_32_uscaled, to_32_uscaled, LL_Int32, 3}, + {PIPE_FORMAT_R32G32B32A32_USCALED, from_32_uscaled, to_32_uscaled, LL_Int32, 4}, + + {PIPE_FORMAT_R32_SNORM, from_32_snorm, to_32_snorm, LL_Int32, 1}, + {PIPE_FORMAT_R32G32_SNORM, from_32_snorm, to_32_snorm, LL_Int32, 2}, + {PIPE_FORMAT_R32G32B32_SNORM, from_32_snorm, to_32_snorm, LL_Int32, 3}, + {PIPE_FORMAT_R32G32B32A32_SNORM, from_32_snorm, to_32_snorm, LL_Int32, 4}, + + {PIPE_FORMAT_R32_SSCALED, from_32_sscaled, to_32_sscaled, LL_Int32, 1}, + {PIPE_FORMAT_R32G32_SSCALED, from_32_sscaled, to_32_sscaled, LL_Int32, 2}, + {PIPE_FORMAT_R32G32B32_SSCALED, from_32_sscaled, to_32_sscaled, LL_Int32, 3}, + {PIPE_FORMAT_R32G32B32A32_SSCALED, from_32_sscaled, to_32_sscaled, LL_Int32, 4}, + + {PIPE_FORMAT_R16_UNORM, from_16_unorm, to_16_unorm, LL_Int16, 1}, + {PIPE_FORMAT_R16G16_UNORM, from_16_unorm, to_16_unorm, LL_Int16, 2}, + {PIPE_FORMAT_R16G16B16_UNORM, from_16_unorm, to_16_unorm, LL_Int16, 3}, + {PIPE_FORMAT_R16G16B16A16_UNORM, from_16_unorm, to_16_unorm, LL_Int16, 4}, + + {PIPE_FORMAT_R16_USCALED, from_16_uscaled, to_16_uscaled, LL_Int16, 1}, + {PIPE_FORMAT_R16G16_USCALED, from_16_uscaled, to_16_uscaled, LL_Int16, 2}, + {PIPE_FORMAT_R16G16B16_USCALED, from_16_uscaled, to_16_uscaled, LL_Int16, 3}, + {PIPE_FORMAT_R16G16B16A16_USCALED, from_16_uscaled, to_16_uscaled, LL_Int16, 4}, + + {PIPE_FORMAT_R16_SNORM, from_16_snorm, to_16_snorm, LL_Int16, 1}, + {PIPE_FORMAT_R16G16_SNORM, from_16_snorm, to_16_snorm, LL_Int16, 2}, + {PIPE_FORMAT_R16G16B16_SNORM, from_16_snorm, to_16_snorm, LL_Int16, 3}, + {PIPE_FORMAT_R16G16B16A16_SNORM, from_16_snorm, to_16_snorm, LL_Int16, 4}, + + {PIPE_FORMAT_R16_SSCALED, from_16_sscaled, to_16_sscaled, LL_Int16, 1}, + {PIPE_FORMAT_R16G16_SSCALED, from_16_sscaled, to_16_sscaled, LL_Int16, 2}, + {PIPE_FORMAT_R16G16B16_SSCALED, from_16_sscaled, to_16_sscaled, LL_Int16, 3}, + {PIPE_FORMAT_R16G16B16A16_SSCALED, from_16_sscaled, to_16_sscaled, LL_Int16, 4}, + + {PIPE_FORMAT_R8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 1}, + {PIPE_FORMAT_R8G8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 2}, + {PIPE_FORMAT_R8G8B8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 3}, + {PIPE_FORMAT_R8G8B8A8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 4}, + + {PIPE_FORMAT_R8_USCALED, from_8_uscaled, to_8_uscaled, LL_Int8, 1}, + {PIPE_FORMAT_R8G8_USCALED, from_8_uscaled, to_8_uscaled, LL_Int8, 2}, + {PIPE_FORMAT_R8G8B8_USCALED, from_8_uscaled, to_8_uscaled, LL_Int8, 3}, + {PIPE_FORMAT_R8G8B8A8_USCALED, from_8_uscaled, to_8_uscaled, LL_Int8, 4}, + + {PIPE_FORMAT_R8_SNORM, from_8_snorm, to_8_snorm, LL_Int8, 1}, + {PIPE_FORMAT_R8G8_SNORM, from_8_snorm, to_8_snorm, LL_Int8, 2}, + {PIPE_FORMAT_R8G8B8_SNORM, from_8_snorm, to_8_snorm, LL_Int8, 3}, + {PIPE_FORMAT_R8G8B8A8_SNORM, from_8_snorm, to_8_snorm, LL_Int8, 4}, + + {PIPE_FORMAT_R8_SSCALED, from_8_sscaled, to_8_sscaled, LL_Int8, 1}, + {PIPE_FORMAT_R8G8_SSCALED, from_8_sscaled, to_8_sscaled, LL_Int8, 2}, + {PIPE_FORMAT_R8G8B8_SSCALED, from_8_sscaled, to_8_sscaled, LL_Int8, 3}, + {PIPE_FORMAT_R8G8B8A8_SSCALED, from_8_sscaled, to_8_sscaled, LL_Int8, 4}, + + {PIPE_FORMAT_R32_FIXED, from_32_fixed, to_32_fixed, LL_Int32, 1}, + {PIPE_FORMAT_R32G32_FIXED, from_32_fixed, to_32_fixed, LL_Int32, 2}, + {PIPE_FORMAT_R32G32B32_FIXED, from_32_fixed, to_32_fixed, LL_Int32, 3}, + {PIPE_FORMAT_R32G32B32A32_FIXED, from_32_fixed, to_32_fixed, LL_Int32, 4}, +}; + + +static LLVMValueRef +fetch(LLVMBuilderRef builder, + LLVMValueRef ptr, int val_size, int nr_components, + from_func func) +{ + int i; + int offset = 0; + LLVMValueRef res = LLVMConstNull( + LLVMVectorType(LLVMFloatType(), 4)); + LLVMValueRef defaults[4]; + + defaults[0] = LLVMConstReal(LLVMFloatType(), 0); + defaults[1] = LLVMConstReal(LLVMFloatType(), 0); + defaults[2] = LLVMConstReal(LLVMFloatType(), 0); + defaults[3] = LLVMConstReal(LLVMFloatType(), 1); + + for (i = 0; i < nr_components; ++i) { + LLVMValueRef src_index = LLVMConstInt(LLVMInt32Type(), offset, 0); + LLVMValueRef dst_index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef src_tmp; + LLVMValueRef component; + + src_tmp = LLVMBuildGEP(builder, ptr, &src_index, 1, "src_tmp"); + + /* convert src_tmp to float */ + component = func(builder, src_tmp); + + /* vec.comp = component */ + res = LLVMBuildInsertElement(builder, + res, + component, + dst_index, ""); + offset += val_size; + } + for (; i < 4; ++i) { + LLVMValueRef dst_index = LLVMConstInt(LLVMInt32Type(), i, 0); + res = LLVMBuildInsertElement(builder, + res, + defaults[i], + dst_index, ""); + } + return res; +} + + +LLVMValueRef +draw_llvm_translate_from(LLVMBuilderRef builder, + LLVMValueRef vbuffer, + enum pipe_format from_format) +{ + const struct util_format_description *format_desc; + LLVMValueRef zero; + int i; + + /* + * The above can only cope with straight arrays: no bitfields, + * swizzles, or half floats. + */ + + for (i = 0; i < Elements(translates); ++i) { + if (translates[i].format == from_format) { + /*LLVMTypeRef type = ll_type_to_llvm(translates[i].type);*/ + return fetch(builder, + vbuffer, + ll_type_size(translates[i].type), + translates[i].num_components, + translates[i].from); + } + } + + + /* + * This doesn't handle anything bigger than 32bits, or half floats + * yet. + * + * TODO: unify all this code into lp_build_fetch_rgba_aos(). + */ + + format_desc = util_format_description(from_format); + zero = LLVMConstNull(LLVMInt32Type()); + return lp_build_fetch_rgba_aos(builder, format_desc, vbuffer, zero, zero); +} diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 83dc1a35f4..64c3502508 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -66,6 +66,7 @@ boolean draw_pipeline_init( struct draw_context *draw ) /* these defaults are oriented toward the needs of softpipe */ draw->pipeline.wide_point_threshold = 1000000.0; /* infinity */ draw->pipeline.wide_line_threshold = 1.0; + draw->pipeline.wide_point_sprites = FALSE; draw->pipeline.line_stipple = TRUE; draw->pipeline.point_sprite = TRUE; @@ -225,7 +226,7 @@ static void do_triangle( struct draw_context *draw, /** - * Code to run the pipeline on a fairly arbitary collection of vertices. + * Code to run the pipeline on a fairly arbitrary collection of vertices. * For drawing indexed primitives. * * Vertex headers must be pre-initialized with the diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 72d7480efb..4faf0a779c 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -40,6 +40,7 @@ #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_sampler.h" #include "tgsi/tgsi_transform.h" #include "tgsi/tgsi_dump.h" @@ -87,9 +88,10 @@ struct aaline_stage uint pos_slot; void *sampler_cso; - struct pipe_texture *texture; + struct pipe_resource *texture; + struct pipe_sampler_view *sampler_view; uint num_samplers; - uint num_textures; + uint num_sampler_views; /* @@ -98,7 +100,7 @@ struct aaline_stage struct aaline_fragment_shader *fs; struct { void *sampler[PIPE_MAX_SAMPLERS]; - struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; } state; /* @@ -111,8 +113,10 @@ struct aaline_stage void (*driver_bind_sampler_states)(struct pipe_context *, unsigned, void **); - void (*driver_set_sampler_textures)(struct pipe_context *, unsigned, - struct pipe_texture **); + + void (*driver_set_sampler_views)(struct pipe_context *, + unsigned, + struct pipe_sampler_view **); }; @@ -392,7 +396,8 @@ aaline_create_texture(struct aaline_stage *aaline) { struct pipe_context *pipe = aaline->stage.draw->pipe; struct pipe_screen *screen = pipe->screen; - struct pipe_texture texTemp; + struct pipe_resource texTemp; + struct pipe_sampler_view viewTempl; uint level; memset(&texTemp, 0, sizeof(texTemp)); @@ -402,28 +407,46 @@ aaline_create_texture(struct aaline_stage *aaline) texTemp.width0 = 1 << MAX_TEXTURE_LEVEL; texTemp.height0 = 1 << MAX_TEXTURE_LEVEL; texTemp.depth0 = 1; + texTemp.bind = PIPE_BIND_SAMPLER_VIEW; - aaline->texture = screen->texture_create(screen, &texTemp); + aaline->texture = screen->resource_create(screen, &texTemp); if (!aaline->texture) return FALSE; + u_sampler_view_default_template(&viewTempl, + aaline->texture, + aaline->texture->format); + aaline->sampler_view = pipe->create_sampler_view(pipe, + aaline->texture, + &viewTempl); + if (!aaline->sampler_view) { + return FALSE; + } + /* Fill in mipmap images. * Basically each level is solid opaque, except for the outermost * texels which are zero. Special case the 1x1 and 2x2 levels. */ for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) { struct pipe_transfer *transfer; + struct pipe_box box; const uint size = u_minify(aaline->texture->width0, level); ubyte *data; uint i, j; assert(aaline->texture->width0 == aaline->texture->height0); + u_box_origin_2d( size, size, &box ); + /* This texture is new, no need to flush. */ - transfer = screen->get_tex_transfer(screen, aaline->texture, 0, level, 0, - PIPE_TRANSFER_WRITE, 0, 0, size, size); - data = screen->transfer_map(screen, transfer); + transfer = pipe->get_transfer(pipe, + aaline->texture, + u_subresource(0, level), + PIPE_TRANSFER_WRITE, + &box); + + data = pipe->transfer_map(pipe, transfer); if (data == NULL) return FALSE; @@ -447,8 +470,8 @@ aaline_create_texture(struct aaline_stage *aaline) } /* unmap */ - screen->transfer_unmap(screen, transfer); - screen->tex_transfer_destroy(transfer); + pipe->transfer_unmap(pipe, transfer); + pipe->transfer_destroy(pipe, transfer); } return TRUE; } @@ -671,16 +694,16 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) /* how many samplers? */ /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ - num_samplers = MAX2(aaline->num_textures, aaline->num_samplers); + num_samplers = MAX2(aaline->num_sampler_views, aaline->num_samplers); num_samplers = MAX2(num_samplers, aaline->fs->sampler_unit + 1); aaline->state.sampler[aaline->fs->sampler_unit] = aaline->sampler_cso; - pipe_texture_reference(&aaline->state.texture[aaline->fs->sampler_unit], - aaline->texture); + pipe_sampler_view_reference(&aaline->state.sampler_views[aaline->fs->sampler_unit], + aaline->sampler_view); draw->suspend_flushing = TRUE; aaline->driver_bind_sampler_states(pipe, num_samplers, aaline->state.sampler); - aaline->driver_set_sampler_textures(pipe, num_samplers, aaline->state.texture); + aaline->driver_set_sampler_views(pipe, num_samplers, aaline->state.sampler_views); /* Disable triangle culling, stippling, unfilled mode etc. */ r = draw_get_rasterizer_no_cull(draw, rast->scissor, rast->flatshade); @@ -709,8 +732,9 @@ aaline_flush(struct draw_stage *stage, unsigned flags) aaline->driver_bind_fs_state(pipe, aaline->fs->driver_fs); aaline->driver_bind_sampler_states(pipe, aaline->num_samplers, aaline->state.sampler); - aaline->driver_set_sampler_textures(pipe, aaline->num_textures, - aaline->state.texture); + aaline->driver_set_sampler_views(pipe, + aaline->num_sampler_views, + aaline->state.sampler_views); /* restore original rasterizer state */ if (draw->rast_handle) { @@ -738,14 +762,18 @@ aaline_destroy(struct draw_stage *stage) uint i; for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - pipe_texture_reference(&aaline->state.texture[i], NULL); + pipe_sampler_view_reference(&aaline->state.sampler_views[i], NULL); } if (aaline->sampler_cso) pipe->delete_sampler_state(pipe, aaline->sampler_cso); if (aaline->texture) - pipe_texture_reference(&aaline->texture, NULL); + pipe_resource_reference(&aaline->texture, NULL); + + if (aaline->sampler_view) { + pipe_sampler_view_reference(&aaline->sampler_view, NULL); + } draw_free_temp_verts( stage ); @@ -859,23 +887,24 @@ aaline_bind_sampler_states(struct pipe_context *pipe, static void -aaline_set_sampler_textures(struct pipe_context *pipe, - unsigned num, struct pipe_texture **texture) +aaline_set_sampler_views(struct pipe_context *pipe, + unsigned num, + struct pipe_sampler_view **views) { struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); uint i; /* save current */ for (i = 0; i < num; i++) { - pipe_texture_reference(&aaline->state.texture[i], texture[i]); + pipe_sampler_view_reference(&aaline->state.sampler_views[i], views[i]); } for ( ; i < PIPE_MAX_SAMPLERS; i++) { - pipe_texture_reference(&aaline->state.texture[i], NULL); + pipe_sampler_view_reference(&aaline->state.sampler_views[i], NULL); } - aaline->num_textures = num; + aaline->num_sampler_views = num; /* pass-through */ - aaline->driver_set_sampler_textures(pipe, num, texture); + aaline->driver_set_sampler_views(pipe, num, views); } @@ -911,7 +940,7 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) aaline->driver_delete_fs_state = pipe->delete_fs_state; aaline->driver_bind_sampler_states = pipe->bind_fragment_sampler_states; - aaline->driver_set_sampler_textures = pipe->set_fragment_sampler_textures; + aaline->driver_set_sampler_views = pipe->set_fragment_sampler_views; /* override the driver's functions */ pipe->create_fs_state = aaline_create_fs_state; @@ -919,7 +948,7 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) pipe->delete_fs_state = aaline_delete_fs_state; pipe->bind_fragment_sampler_states = aaline_bind_sampler_states; - pipe->set_fragment_sampler_textures = aaline_set_sampler_textures; + pipe->set_fragment_sampler_views = aaline_set_sampler_views; /* Install once everything is known to be OK: */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index d0d99aa331..ef30db094f 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -42,6 +42,7 @@ #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_sampler.h" #include "tgsi/tgsi_transform.h" #include "tgsi/tgsi_dump.h" @@ -74,9 +75,10 @@ struct pstip_stage struct draw_stage stage; void *sampler_cso; - struct pipe_texture *texture; + struct pipe_resource *texture; + struct pipe_sampler_view *sampler_view; uint num_samplers; - uint num_textures; + uint num_sampler_views; /* * Currently bound state @@ -84,7 +86,7 @@ struct pstip_stage struct pstip_fragment_shader *fs; struct { void *samplers[PIPE_MAX_SAMPLERS]; - struct pipe_texture *textures[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; const struct pipe_poly_stipple *stipple; } state; @@ -98,8 +100,9 @@ struct pstip_stage void (*driver_bind_sampler_states)(struct pipe_context *, unsigned, void **); - void (*driver_set_sampler_textures)(struct pipe_context *, unsigned, - struct pipe_texture **); + void (*driver_set_sampler_views)(struct pipe_context *, + unsigned, + struct pipe_sampler_view **); void (*driver_set_polygon_stipple)(struct pipe_context *, const struct pipe_poly_stipple *); @@ -374,19 +377,21 @@ pstip_update_texture(struct pstip_stage *pstip) { static const uint bit31 = 1 << 31; struct pipe_context *pipe = pstip->pipe; - struct pipe_screen *screen = pipe->screen; struct pipe_transfer *transfer; const uint *stipple = pstip->state.stipple->stipple; uint i, j; ubyte *data; /* XXX: want to avoid flushing just because we use stipple: + * + * Flush should no longer be necessary if driver is properly + * interleaving drawing and transfers on a given context: */ pipe->flush( pipe, PIPE_FLUSH_TEXTURE_CACHE, NULL ); - transfer = screen->get_tex_transfer(screen, pstip->texture, 0, 0, 0, - PIPE_TRANSFER_WRITE, 0, 0, 32, 32); - data = screen->transfer_map(screen, transfer); + transfer = pipe_get_transfer(pipe, pstip->texture, 0, 0, 0, + PIPE_TRANSFER_WRITE, 0, 0, 32, 32); + data = pipe->transfer_map(pipe, transfer); /* * Load alpha texture. @@ -408,8 +413,8 @@ pstip_update_texture(struct pstip_stage *pstip) } /* unmap */ - screen->transfer_unmap(screen, transfer); - screen->tex_transfer_destroy(transfer); + pipe->transfer_unmap(pipe, transfer); + pipe->transfer_destroy(pipe, transfer); } @@ -421,7 +426,8 @@ pstip_create_texture(struct pstip_stage *pstip) { struct pipe_context *pipe = pstip->pipe; struct pipe_screen *screen = pipe->screen; - struct pipe_texture texTemp; + struct pipe_resource texTemp; + struct pipe_sampler_view viewTempl; memset(&texTemp, 0, sizeof(texTemp)); texTemp.target = PIPE_TEXTURE_2D; @@ -430,11 +436,22 @@ pstip_create_texture(struct pstip_stage *pstip) texTemp.width0 = 32; texTemp.height0 = 32; texTemp.depth0 = 1; + texTemp.bind = PIPE_BIND_SAMPLER_VIEW; - pstip->texture = screen->texture_create(screen, &texTemp); + pstip->texture = screen->resource_create(screen, &texTemp); if (pstip->texture == NULL) return FALSE; + u_sampler_view_default_template(&viewTempl, + pstip->texture, + pstip->texture->format); + pstip->sampler_view = pipe->create_sampler_view(pipe, + pstip->texture, + &viewTempl); + if (!pstip->sampler_view) { + return FALSE; + } + return TRUE; } @@ -513,19 +530,19 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header) /* how many samplers? */ /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ - num_samplers = MAX2(pstip->num_textures, pstip->num_samplers); + num_samplers = MAX2(pstip->num_sampler_views, pstip->num_samplers); num_samplers = MAX2(num_samplers, pstip->fs->sampler_unit + 1); /* plug in our sampler, texture */ pstip->state.samplers[pstip->fs->sampler_unit] = pstip->sampler_cso; - pipe_texture_reference(&pstip->state.textures[pstip->fs->sampler_unit], - pstip->texture); + pipe_sampler_view_reference(&pstip->state.sampler_views[pstip->fs->sampler_unit], + pstip->sampler_view); assert(num_samplers <= PIPE_MAX_SAMPLERS); draw->suspend_flushing = TRUE; pstip->driver_bind_sampler_states(pipe, num_samplers, pstip->state.samplers); - pstip->driver_set_sampler_textures(pipe, num_samplers, pstip->state.textures); + pstip->driver_set_sampler_views(pipe, num_samplers, pstip->state.sampler_views); draw->suspend_flushing = FALSE; /* now really draw first triangle */ @@ -549,8 +566,9 @@ pstip_flush(struct draw_stage *stage, unsigned flags) pstip->driver_bind_fs_state(pipe, pstip->fs->driver_fs); pstip->driver_bind_sampler_states(pipe, pstip->num_samplers, pstip->state.samplers); - pstip->driver_set_sampler_textures(pipe, pstip->num_textures, - pstip->state.textures); + pstip->driver_set_sampler_views(pipe, + pstip->num_sampler_views, + pstip->state.sampler_views); draw->suspend_flushing = FALSE; } @@ -569,12 +587,16 @@ pstip_destroy(struct draw_stage *stage) uint i; for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - pipe_texture_reference(&pstip->state.textures[i], NULL); + pipe_sampler_view_reference(&pstip->state.sampler_views[i], NULL); } pstip->pipe->delete_sampler_state(pstip->pipe, pstip->sampler_cso); - pipe_texture_reference(&pstip->texture, NULL); + pipe_resource_reference(&pstip->texture, NULL); + + if (pstip->sampler_view) { + pipe_sampler_view_reference(&pstip->sampler_view, NULL); + } draw_free_temp_verts( stage ); FREE( stage ); @@ -680,24 +702,25 @@ pstip_bind_sampler_states(struct pipe_context *pipe, static void -pstip_set_sampler_textures(struct pipe_context *pipe, - unsigned num, struct pipe_texture **texture) +pstip_set_sampler_views(struct pipe_context *pipe, + unsigned num, + struct pipe_sampler_view **views) { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); uint i; /* save current */ for (i = 0; i < num; i++) { - pipe_texture_reference(&pstip->state.textures[i], texture[i]); + pipe_sampler_view_reference(&pstip->state.sampler_views[i], views[i]); } for (; i < PIPE_MAX_SAMPLERS; i++) { - pipe_texture_reference(&pstip->state.textures[i], NULL); + pipe_sampler_view_reference(&pstip->state.sampler_views[i], NULL); } - pstip->num_textures = num; + pstip->num_sampler_views = num; /* pass-through */ - pstip->driver_set_sampler_textures(pstip->pipe, num, texture); + pstip->driver_set_sampler_views(pstip->pipe, num, views); } @@ -754,7 +777,7 @@ draw_install_pstipple_stage(struct draw_context *draw, pstip->driver_delete_fs_state = pipe->delete_fs_state; pstip->driver_bind_sampler_states = pipe->bind_fragment_sampler_states; - pstip->driver_set_sampler_textures = pipe->set_fragment_sampler_textures; + pstip->driver_set_sampler_views = pipe->set_fragment_sampler_views; pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple; /* override the driver's functions */ @@ -763,7 +786,7 @@ draw_install_pstipple_stage(struct draw_context *draw, pipe->delete_fs_state = pstip_delete_fs_state; pipe->bind_fragment_sampler_states = pstip_bind_sampler_states; - pipe->set_fragment_sampler_textures = pstip_set_sampler_textures; + pipe->set_fragment_sampler_views = pstip_set_sampler_views; pipe->set_polygon_stipple = pstip_set_polygon_stipple; return TRUE; diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c index 153097e543..2a50af7a41 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_validate.c +++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c @@ -100,6 +100,11 @@ draw_need_pipeline(const struct draw_context *draw, if (rasterizer->point_size > draw->pipeline.wide_point_threshold) return TRUE; + /* sprite points */ + if (rasterizer->point_quad_rasterization + && draw->pipeline.wide_point_sprites) + return TRUE; + /* AA points */ if (rasterizer->point_smooth && draw->pipeline.aapoint) return TRUE; @@ -154,6 +159,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) boolean need_det = FALSE; boolean precalc_flat = FALSE; boolean wide_lines, wide_points; + const struct pipe_rasterizer_state *rast = draw->rasterizer; /* Set the validate's next stage to the rasterize stage, so that it * can be found later if needed for flushing. @@ -161,15 +167,17 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) stage->next = next; /* drawing wide lines? */ - wide_lines = (draw->rasterizer->line_width > draw->pipeline.wide_line_threshold - && !draw->rasterizer->line_smooth); + wide_lines = (rast->line_width > draw->pipeline.wide_line_threshold + && !rast->line_smooth); /* drawing large points? */ - if (draw->rasterizer->sprite_coord_enable && draw->pipeline.point_sprite) + if (rast->sprite_coord_enable && draw->pipeline.point_sprite) wide_points = TRUE; - else if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) + else if (rast->point_smooth && draw->pipeline.aapoint) wide_points = FALSE; - else if (draw->rasterizer->point_size > draw->pipeline.wide_point_threshold) + else if (rast->point_size > draw->pipeline.wide_point_threshold) + wide_points = TRUE; + else if (rast->point_quad_rasterization && draw->pipeline.wide_point_sprites) wide_points = TRUE; else wide_points = FALSE; @@ -181,12 +189,12 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) * shorter pipelines for lines & points. */ - if (draw->rasterizer->line_smooth && draw->pipeline.aaline) { + if (rast->line_smooth && draw->pipeline.aaline) { draw->pipeline.aaline->next = next; next = draw->pipeline.aaline; } - if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) { + if (rast->point_smooth && draw->pipeline.aapoint) { draw->pipeline.aapoint->next = next; next = draw->pipeline.aapoint; } @@ -197,44 +205,44 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) precalc_flat = TRUE; } - if (wide_points || draw->rasterizer->sprite_coord_enable) { + if (wide_points || rast->sprite_coord_enable) { draw->pipeline.wide_point->next = next; next = draw->pipeline.wide_point; } - if (draw->rasterizer->line_stipple_enable && draw->pipeline.line_stipple) { + if (rast->line_stipple_enable && draw->pipeline.line_stipple) { draw->pipeline.stipple->next = next; next = draw->pipeline.stipple; precalc_flat = TRUE; /* only needed for lines really */ } - if (draw->rasterizer->poly_stipple_enable + if (rast->poly_stipple_enable && draw->pipeline.pstipple) { draw->pipeline.pstipple->next = next; next = draw->pipeline.pstipple; } - if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || - draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) { + if (rast->fill_cw != PIPE_POLYGON_MODE_FILL || + rast->fill_ccw != PIPE_POLYGON_MODE_FILL) { draw->pipeline.unfilled->next = next; next = draw->pipeline.unfilled; precalc_flat = TRUE; /* only needed for triangles really */ need_det = TRUE; } - if (draw->rasterizer->flatshade && precalc_flat) { + if (rast->flatshade && precalc_flat) { draw->pipeline.flatshade->next = next; next = draw->pipeline.flatshade; } - if (draw->rasterizer->offset_cw || - draw->rasterizer->offset_ccw) { + if (rast->offset_cw || + rast->offset_ccw) { draw->pipeline.offset->next = next; next = draw->pipeline.offset; need_det = TRUE; } - if (draw->rasterizer->light_twoside) { + if (rast->light_twoside) { draw->pipeline.twoside->next = next; next = draw->pipeline.twoside; need_det = TRUE; @@ -247,7 +255,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) * to less work emitting vertices, smaller vertex buffers, etc. * It's difficult to say whether this will be true in general. */ - if (need_det || draw->rasterizer->cull_mode) { + if (need_det || rast->cull_mode) { draw->pipeline.cull->next = next; next = draw->pipeline.cull; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index 2709957961..ee2b811603 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -235,41 +235,18 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim ) for (i = 0; i < vbuf->vinfo->num_attribs; i++) { unsigned emit_sz = 0; unsigned src_buffer = 0; - unsigned output_format; + enum pipe_format output_format; unsigned src_offset = (vbuf->vinfo->attrib[i].src_index * 4 * sizeof(float) ); - switch (vbuf->vinfo->attrib[i].emit) { - case EMIT_4F: - output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - emit_sz = 4 * sizeof(float); - break; - case EMIT_3F: - output_format = PIPE_FORMAT_R32G32B32_FLOAT; - emit_sz = 3 * sizeof(float); - break; - case EMIT_2F: - output_format = PIPE_FORMAT_R32G32_FLOAT; - emit_sz = 2 * sizeof(float); - break; - case EMIT_1F: - output_format = PIPE_FORMAT_R32_FLOAT; - emit_sz = 1 * sizeof(float); - break; - case EMIT_1F_PSIZE: - output_format = PIPE_FORMAT_R32_FLOAT; - emit_sz = 1 * sizeof(float); + output_format = draw_translate_vinfo_format(vbuf->vinfo->attrib[i].emit); + emit_sz = draw_translate_vinfo_size(vbuf->vinfo->attrib[i].emit); + + /* doesn't handle EMIT_OMIT */ + assert(emit_sz != 0); + + if (vbuf->vinfo->attrib[i].emit == EMIT_1F_PSIZE) { src_buffer = 1; src_offset = 0; - break; - case EMIT_4UB: - output_format = PIPE_FORMAT_A8R8G8B8_UNORM; - emit_sz = 4 * sizeof(ubyte); - break; - default: - assert(0); - output_format = PIPE_FORMAT_NONE; - emit_sz = 0; - break; } hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL; diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c index d6d7513e2a..a86fe19586 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -114,7 +114,10 @@ static void set_texcoords(const struct widepoint_stage *wide, /* put gl_PointCoord into the extra vertex slot */ uint slot = wide->stage.draw->extra_shader_outputs.slot; v->data[slot][0] = tc[0]; - v->data[slot][1] = tc[1]; + if (wide->texcoord_mode == PIPE_SPRITE_COORD_LOWER_LEFT) + v->data[slot][1] = 1.0f - tc[1]; + else + v->data[slot][1] = tc[1]; v->data[slot][2] = 0.0F; v->data[slot][3] = 1.0F; } @@ -129,10 +132,9 @@ static void set_texcoords(const struct widepoint_stage *wide, static void widepoint_point( struct draw_stage *stage, struct prim_header *header ) { - /* XXX should take point_quad_rasterization into account? */ const struct widepoint_stage *wide = widepoint_stage(stage); const unsigned pos = draw_current_shader_position_output(stage->draw); - const boolean sprite = (boolean) stage->draw->rasterizer->sprite_coord_enable; + const boolean sprite = (boolean) stage->draw->rasterizer->point_quad_rasterization; float half_size; float left_adj, right_adj, bot_adj, top_adj; @@ -218,11 +220,11 @@ static void widepoint_first_point( struct draw_stage *stage, const struct pipe_rasterizer_state *rast = draw->rasterizer; void *r; - wide->half_point_size = 0.5f * draw->rasterizer->point_size; + wide->half_point_size = 0.5f * rast->point_size; wide->xbias = 0.0; wide->ybias = 0.0; - if (draw->rasterizer->gl_rasterization_rules) { + if (rast->gl_rasterization_rules) { wide->xbias = 0.125; } @@ -233,23 +235,23 @@ static void widepoint_first_point( struct draw_stage *stage, draw->suspend_flushing = FALSE; /* XXX we won't know the real size if it's computed by the vertex shader! */ - if ((draw->rasterizer->point_size > draw->pipeline.wide_point_threshold) || - (draw->rasterizer->sprite_coord_enable && draw->pipeline.point_sprite)) { + if ((rast->point_size > draw->pipeline.wide_point_threshold) || + (rast->point_quad_rasterization && draw->pipeline.point_sprite)) { stage->point = widepoint_point; } else { stage->point = draw_pipe_passthrough_point; } - if (draw->rasterizer->sprite_coord_enable) { + if (rast->point_quad_rasterization) { /* find vertex shader texcoord outputs */ const struct draw_vertex_shader *vs = draw->vs.vertex_shader; uint i, j = 0; - wide->texcoord_mode = draw->rasterizer->sprite_coord_mode; + wide->texcoord_mode = rast->sprite_coord_mode; for (i = 0; i < vs->info.num_outputs; i++) { if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { wide->texcoord_slot[j] = i; - wide->texcoord_enable[j] = (draw->rasterizer->sprite_coord_enable >> j) & 1; + wide->texcoord_enable[j] = (rast->sprite_coord_enable >> j) & 1; j++; } } @@ -269,7 +271,7 @@ static void widepoint_first_point( struct draw_stage *stage, } wide->psize_slot = -1; - if (draw->rasterizer->point_size_per_vertex) { + if (rast->point_size_per_vertex) { /* find PSIZ vertex output */ const struct draw_vertex_shader *vs = draw->vs.vertex_shader; uint i; diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 3fe3dc519d..4bb3282f62 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -46,6 +46,10 @@ #include "tgsi/tgsi_scan.h" +#ifdef HAVE_LLVM +#include <llvm-c/ExecutionEngine.h> +#endif + struct pipe_context; struct draw_vertex_shader; @@ -107,6 +111,7 @@ struct draw_context float wide_point_threshold; /**< convert pnts to tris if larger than this */ float wide_line_threshold; /**< convert lines to tris if wider than this */ + boolean wide_point_sprites; /**< convert points to tris for sprite mode */ boolean line_stipple; /**< do line stipple? */ boolean point_sprite; /**< convert points to quads for sprites? */ @@ -146,6 +151,7 @@ struct draw_context const void *elts; /** bytes per index (0, 1, 2 or 4) */ unsigned eltSize; + int eltBias; unsigned min_index; unsigned max_index; @@ -245,9 +251,17 @@ struct draw_context unsigned instance_id; +#ifdef HAVE_LLVM + LLVMExecutionEngineRef engine; + boolean use_llvm; +#endif void *driver_private; }; +/******************************************************************************* + * Draw common initialization code + */ +boolean draw_init(struct draw_context *draw); /******************************************************************************* * Vertex shader code: diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 6d90a6c42f..b5876bb1bd 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -111,6 +111,7 @@ draw_pt_arrays(struct draw_context *draw, frontend->run(frontend, draw_pt_elt_func(draw), draw_pt_elt_ptr(draw, start), + draw->pt.user.eltBias, count); frontend->finish( frontend ); @@ -140,7 +141,16 @@ boolean draw_pt_init( struct draw_context *draw ) if (!draw->pt.middle.fetch_shade_emit) return FALSE; - draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw ); +#if HAVE_LLVM + draw->use_llvm = debug_get_bool_option("DRAW_USE_LLVM", TRUE); + if (draw->use_llvm) + draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit_llvm( draw ); +#else + draw->pt.middle.general = NULL; +#endif + + if (!draw->pt.middle.general) + draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw ); if (!draw->pt.middle.general) return FALSE; @@ -215,8 +225,11 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count) break; default: assert(0); + return; } - debug_printf("Element[%u + %u] -> Vertex %u:\n", start, i, ii); + ii += draw->pt.user.eltBias; + debug_printf("Element[%u + %u] + %i -> Vertex %u:\n", start, i, + draw->pt.user.eltBias, ii); } else { /* non-indexed arrays */ @@ -307,9 +320,8 @@ draw_arrays_instanced(struct draw_context *draw, tgsi_dump(draw->vs.vertex_shader->state.tokens, 0); debug_printf("Elements:\n"); for (i = 0; i < draw->pt.nr_vertex_elements; i++) { - debug_printf(" format=%s comps=%u\n", - util_format_name(draw->pt.vertex_element[i].src_format), - draw->pt.vertex_element[i].nr_components); + debug_printf(" format=%s\n", + util_format_name(draw->pt.vertex_element[i].src_format)); } debug_printf("Buffers:\n"); for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index d5e0d92a60..3e3ea320cc 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -67,6 +67,7 @@ struct draw_pt_front_end { void (*run)( struct draw_pt_front_end *, pt_elt_func elt_func, const void *elt_ptr, + int elt_bias, unsigned count ); void (*finish)( struct draw_pt_front_end * ); @@ -147,6 +148,7 @@ struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw); struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ); struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw ); struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw); +struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm(struct draw_context *draw); diff --git a/src/gallium/auxiliary/draw/draw_pt_decompose.h b/src/gallium/auxiliary/draw/draw_pt_decompose.h index 4ca5b52020..3c44f7c11e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_decompose.h +++ b/src/gallium/auxiliary/draw/draw_pt_decompose.h @@ -105,40 +105,20 @@ static void FUNC( ARGS, case PIPE_PRIM_QUADS: - if (flatfirst) { - for (i = 0; i+3 < count; i += 4) { - QUAD( (i + 1), - (i + 2), - (i + 3), - (i + 0) ); - } - } - else { - for (i = 0; i+3 < count; i += 4) { - QUAD( (i + 0), - (i + 1), - (i + 2), - (i + 3)); - } + for (i = 0; i+3 < count; i += 4) { + QUAD( (i + 0), + (i + 1), + (i + 2), + (i + 3)); } break; case PIPE_PRIM_QUAD_STRIP: - if (flatfirst) { - for (i = 0; i+3 < count; i += 2) { - QUAD( (i + 1), - (i + 3), - (i + 2), - (i + 0) ); - } - } - else { - for (i = 0; i+3 < count; i += 2) { - QUAD( (i + 2), - (i + 0), - (i + 1), - (i + 3)); - } + for (i = 0; i+3 < count; i += 2) { + QUAD( (i + 2), + (i + 0), + (i + 1), + (i + 3)); } break; diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index ae357b5122..a7917f54b0 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -86,40 +86,15 @@ void draw_pt_emit_prepare( struct pt_emit *emit, unsigned output_format; unsigned src_offset = (vinfo->attrib[i].src_index * 4 * sizeof(float) ); + output_format = draw_translate_vinfo_format(vinfo->attrib[i].emit); + emit_sz = draw_translate_vinfo_size(vinfo->attrib[i].emit); - - switch (vinfo->attrib[i].emit) { - case EMIT_4F: - output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - emit_sz = 4 * sizeof(float); - break; - case EMIT_3F: - output_format = PIPE_FORMAT_R32G32B32_FLOAT; - emit_sz = 3 * sizeof(float); - break; - case EMIT_2F: - output_format = PIPE_FORMAT_R32G32_FLOAT; - emit_sz = 2 * sizeof(float); - break; - case EMIT_1F: - output_format = PIPE_FORMAT_R32_FLOAT; - emit_sz = 1 * sizeof(float); - break; - case EMIT_1F_PSIZE: - output_format = PIPE_FORMAT_R32_FLOAT; - emit_sz = 1 * sizeof(float); + /* doesn't handle EMIT_OMIT */ + assert(emit_sz != 0); + + if (vinfo->attrib[i].emit == EMIT_1F_PSIZE) { src_buffer = 1; src_offset = 0; - break; - case EMIT_4UB: - output_format = PIPE_FORMAT_A8R8G8B8_UNORM; - emit_sz = 4 * sizeof(ubyte); - break; - default: - assert(0); - output_format = PIPE_FORMAT_NONE; - emit_sz = 0; - break; } hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 2a604470e9..1994ddf2bc 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -129,41 +129,16 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, unsigned input_offset = src->src_offset; unsigned output_format; - switch (vinfo->attrib[i].emit) { - case EMIT_4UB: - output_format = PIPE_FORMAT_R8G8B8A8_UNORM; - emit_sz = 4 * sizeof(unsigned char); - break; - case EMIT_4F: - output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - emit_sz = 4 * sizeof(float); - break; - case EMIT_3F: - output_format = PIPE_FORMAT_R32G32B32_FLOAT; - emit_sz = 3 * sizeof(float); - break; - case EMIT_2F: - output_format = PIPE_FORMAT_R32G32_FLOAT; - emit_sz = 2 * sizeof(float); - break; - case EMIT_1F: - output_format = PIPE_FORMAT_R32_FLOAT; - emit_sz = 1 * sizeof(float); - break; - case EMIT_1F_PSIZE: + output_format = draw_translate_vinfo_format(vinfo->attrib[i].emit); + emit_sz = draw_translate_vinfo_size(vinfo->attrib[i].emit); + + if (vinfo->attrib[i].emit == EMIT_OMIT) + continue; + + if (vinfo->attrib[i].emit == EMIT_1F_PSIZE) { input_format = PIPE_FORMAT_R32_FLOAT; input_buffer = draw->pt.nr_vertex_buffers; input_offset = 0; - output_format = PIPE_FORMAT_R32_FLOAT; - emit_sz = 1 * sizeof(float); - break; - case EMIT_OMIT: - continue; - default: - assert(0); - output_format = PIPE_FORMAT_NONE; - emit_sz = 0; - continue; } key.element[i].type = TRANSLATE_ELEMENT_NORMAL; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 1aecb51077..389e2b105e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -130,31 +130,10 @@ static void fse_prepare( struct draw_pt_middle_end *middle, unsigned dst_offset = 0; for (i = 0; i < vinfo->num_attribs; i++) { - unsigned emit_sz = 0; - - switch (vinfo->attrib[i].emit) { - case EMIT_4F: - emit_sz = 4 * sizeof(float); - break; - case EMIT_3F: - emit_sz = 3 * sizeof(float); - break; - case EMIT_2F: - emit_sz = 2 * sizeof(float); - break; - case EMIT_1F: - emit_sz = 1 * sizeof(float); - break; - case EMIT_1F_PSIZE: - emit_sz = 1 * sizeof(float); - break; - case EMIT_4UB: - emit_sz = 4 * sizeof(ubyte); - break; - default: - assert(0); - break; - } + unsigned emit_sz = draw_translate_vinfo_size(vinfo->attrib[i].emit); + + /* doesn't handle EMIT_OMIT */ + assert(emit_sz != 0); /* The elements in the key correspond to vertex shader output * numbers, not to positions in the hw vertex description -- diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c new file mode 100644 index 0000000000..f71271bd91 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -0,0 +1,440 @@ +/************************************************************************** + * + * Copyright 2010 VMWare, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "draw/draw_context.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" +#include "draw/draw_vs.h" +#include "draw/draw_gs.h" +#include "draw/draw_llvm.h" + + +struct llvm_middle_end { + struct draw_pt_middle_end base; + struct draw_context *draw; + + struct pt_emit *emit; + struct pt_fetch *fetch; + struct pt_post_vs *post_vs; + + + unsigned vertex_data_offset; + unsigned vertex_size; + unsigned prim; + unsigned opt; + + struct draw_llvm *llvm; + struct draw_llvm_variant *variants; + struct draw_llvm_variant *current_variant; + int nr_variants; +}; + + +static void +llvm_middle_end_prepare( struct draw_pt_middle_end *middle, + unsigned prim, + unsigned opt, + unsigned *max_vertices ) +{ + struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_vertex_shader *vs = draw->vs.vertex_shader; + struct draw_geometry_shader *gs = draw->gs.geometry_shader; + struct draw_llvm_variant_key key; + struct draw_llvm_variant *variant = NULL; + unsigned i; + unsigned instance_id_index = ~0; + + /* Add one to num_outputs because the pipeline occasionally tags on + * an additional texcoord, eg for AA lines. + */ + unsigned nr = MAX2( vs->info.num_inputs, + vs->info.num_outputs + 1 ); + + /* Scan for instanceID system value. + */ + for (i = 0; i < vs->info.num_inputs; i++) { + if (vs->info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) { + instance_id_index = i; + break; + } + } + + fpme->prim = prim; + fpme->opt = opt; + + /* Always leave room for the vertex header whether we need it or + * not. It's hard to get rid of it in particular because of the + * viewport code in draw_pt_post_vs.c. + */ + fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float); + + + + draw_pt_fetch_prepare( fpme->fetch, + vs->info.num_inputs, + fpme->vertex_size, + instance_id_index ); + if (opt & PT_SHADE) { + vs->prepare(vs, draw); + draw_geometry_shader_prepare(gs, draw); + } + + + /* XXX: it's not really gl rasterization rules we care about here, + * but gl vs dx9 clip spaces. + */ + draw_pt_post_vs_prepare( fpme->post_vs, + (boolean)draw->bypass_clipping, + (boolean)(draw->identity_viewport), + (boolean)draw->rasterizer->gl_rasterization_rules, + (draw->vs.edgeflag_output ? true : false) ); + + if (!(opt & PT_PIPELINE)) { + draw_pt_emit_prepare( fpme->emit, + prim, + max_vertices ); + + *max_vertices = MAX2( *max_vertices, + DRAW_PIPE_MAX_VERTICES ); + } + else { + *max_vertices = DRAW_PIPE_MAX_VERTICES; + } + + /* return even number */ + *max_vertices = *max_vertices & ~1; + + draw_llvm_make_variant_key(fpme->llvm, &key); + + variant = fpme->variants; + while(variant) { + if(memcmp(&variant->key, &key, sizeof key) == 0) + break; + + variant = variant->next; + } + + if (!variant) { + variant = draw_llvm_prepare(fpme->llvm, nr); + variant->next = fpme->variants; + fpme->variants = variant; + ++fpme->nr_variants; + } + fpme->current_variant = variant; + + /*XXX we only support one constant buffer */ + fpme->llvm->jit_context.vs_constants = + draw->pt.user.vs_constants[0]; + fpme->llvm->jit_context.gs_constants = + draw->pt.user.gs_constants[0]; +} + + + +static void llvm_middle_end_run( struct draw_pt_middle_end *middle, + const unsigned *fetch_elts, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_vertex_shader *vshader = draw->vs.vertex_shader; + struct draw_geometry_shader *gshader = draw->gs.geometry_shader; + unsigned opt = fpme->opt; + unsigned alloc_count = align( fetch_count, 4 ); + + struct vertex_header *pipeline_verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); + + if (!pipeline_verts) { + /* Not much we can do here - just skip the rendering. + */ + assert(0); + return; + } + + /* Fetch into our vertex buffer + */ + draw_pt_fetch_run( fpme->fetch, + fetch_elts, + fetch_count, + (char *)pipeline_verts ); + + /* Run the shader, note that this overwrites the data[] parts of + * the pipeline verts. If there is no shader, eg if + * bypass_vs_clip_and_viewport, then the inputs == outputs, and are + * already in the correct place.*/ + if (opt & PT_SHADE) + { + vshader->run_linear(vshader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + draw->pt.user.vs_constants, + fetch_count, + fpme->vertex_size, + fpme->vertex_size); + if (gshader) + draw_geometry_shader_run(gshader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + draw->pt.user.gs_constants, + fetch_count, + fpme->vertex_size, + fpme->vertex_size); + } + + if (draw_pt_post_vs_run( fpme->post_vs, + pipeline_verts, + fetch_count, + fpme->vertex_size )) + { + opt |= PT_PIPELINE; + } + + /* Do we need to run the pipeline? + */ + if (opt & PT_PIPELINE) { + draw_pipeline_run( fpme->draw, + fpme->prim, + pipeline_verts, + fetch_count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + else { + draw_pt_emit( fpme->emit, + (const float (*)[4])pipeline_verts->data, + fetch_count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + + + FREE(pipeline_verts); +} + + +static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count) +{ + struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + struct draw_context *draw = fpme->draw; + unsigned opt = fpme->opt; + unsigned alloc_count = align( count, 4 ); + + struct vertex_header *pipeline_verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); + + if (!pipeline_verts) { + /* Not much we can do here - just skip the rendering. + */ + assert(0); + return; + } + +#if 0 + debug_printf("#### Pipeline = %p (data = %p)\n", + pipeline_verts, pipeline_verts->data); +#endif + fpme->current_variant->jit_func( &fpme->llvm->jit_context, + pipeline_verts, + (const char **)draw->pt.user.vbuffer, + start, + count, + fpme->vertex_size, + draw->pt.vertex_buffer ); + + if (draw_pt_post_vs_run( fpme->post_vs, + pipeline_verts, + count, + fpme->vertex_size )) + { + opt |= PT_PIPELINE; + } + + /* Do we need to run the pipeline? + */ + if (opt & PT_PIPELINE) { + draw_pipeline_run_linear( fpme->draw, + fpme->prim, + pipeline_verts, + count, + fpme->vertex_size); + } + else { + draw_pt_emit_linear( fpme->emit, + (const float (*)[4])pipeline_verts->data, + fpme->vertex_size, + count ); + } + + FREE(pipeline_verts); +} + + + +static boolean +llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + struct draw_context *draw = fpme->draw; + unsigned opt = fpme->opt; + unsigned alloc_count = align( count, 4 ); + + struct vertex_header *pipeline_verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); + + if (!pipeline_verts) + return FALSE; + + fpme->current_variant->jit_func( &fpme->llvm->jit_context, + pipeline_verts, + (const char **)draw->pt.user.vbuffer, + start, + count, + fpme->vertex_size, + draw->pt.vertex_buffer ); + + if (draw_pt_post_vs_run( fpme->post_vs, + pipeline_verts, + count, + fpme->vertex_size )) + { + opt |= PT_PIPELINE; + } + + /* Do we need to run the pipeline? + */ + if (opt & PT_PIPELINE) { + draw_pipeline_run( fpme->draw, + fpme->prim, + pipeline_verts, + count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + else { + draw_pt_emit( fpme->emit, + (const float (*)[4])pipeline_verts->data, + count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + + FREE(pipeline_verts); + return TRUE; +} + + + +static void llvm_middle_end_finish( struct draw_pt_middle_end *middle ) +{ + /* nothing to do */ +} + +static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle ) +{ + struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + + if (fpme->fetch) + draw_pt_fetch_destroy( fpme->fetch ); + + if (fpme->emit) + draw_pt_emit_destroy( fpme->emit ); + + if (fpme->post_vs) + draw_pt_post_vs_destroy( fpme->post_vs ); + + if (fpme->llvm) + draw_llvm_destroy( fpme->llvm ); + + FREE(middle); +} + + +struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm( struct draw_context *draw ) +{ + struct llvm_middle_end *fpme = 0; + + if (!draw->engine) + return NULL; + + fpme = CALLOC_STRUCT( llvm_middle_end ); + if (!fpme) + goto fail; + + fpme->base.prepare = llvm_middle_end_prepare; + fpme->base.run = llvm_middle_end_run; + fpme->base.run_linear = llvm_middle_end_linear_run; + fpme->base.run_linear_elts = llvm_middle_end_linear_run_elts; + fpme->base.finish = llvm_middle_end_finish; + fpme->base.destroy = llvm_middle_end_destroy; + + fpme->draw = draw; + + fpme->fetch = draw_pt_fetch_create( draw ); + if (!fpme->fetch) + goto fail; + + fpme->post_vs = draw_pt_post_vs_create( draw ); + if (!fpme->post_vs) + goto fail; + + fpme->emit = draw_pt_emit_create( draw ); + if (!fpme->emit) + goto fail; + + fpme->llvm = draw_llvm_create(draw); + if (!fpme->llvm) + goto fail; + + fpme->variants = NULL; + fpme->current_variant = NULL; + fpme->nr_variants = 0; + + return &fpme->base; + + fail: + if (fpme) + llvm_middle_end_destroy( &fpme->base ); + + return NULL; +} diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index 9728d5c2bd..5525dfc748 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -108,6 +108,11 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, for (j = 0; j < count; j++) { float *position = out->data[pos]; +#if 0 + debug_printf("%d) io = %p, data = %p = [%f, %f, %f, %f]\n", + j, out, position, position[0], position[1], position[2], position[3]); +#endif + out->clip[0] = position[0]; out->clip[1] = position[1]; out->clip[2] = position[2]; diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h index f0aec5feba..a292346be9 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h @@ -6,6 +6,7 @@ static unsigned trim( unsigned count, unsigned first, unsigned incr ) static void FUNC(struct draw_pt_front_end *frontend, pt_elt_func get_elt, const void *elts, + int elt_bias, unsigned count) { struct varray_frontend *varray = (struct varray_frontend *)frontend; @@ -14,6 +15,8 @@ static void FUNC(struct draw_pt_front_end *frontend, unsigned j; unsigned first, incr; + assert(elt_bias == 0); + draw_pt_split_prim(varray->input_prim, &first, &incr); /* Sanitize primitive length: diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 757c487454..37ffbac4f9 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -329,6 +329,7 @@ static INLINE void vcache_check_run( struct draw_pt_front_end *frontend, pt_elt_func get_elt, const void *elts, + int elt_bias, unsigned draw_count ) { struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; @@ -346,7 +347,7 @@ vcache_check_run( struct draw_pt_front_end *frontend, vcache->fetch_max, draw_count); - if (max_index >= DRAW_PIPE_MAX_VERTICES || + if (elt_bias + max_index >= DRAW_PIPE_MAX_VERTICES || fetch_count >= UNDEFINED_VERTEX_ID || fetch_count > draw_count) { if (0) debug_printf("fail\n"); @@ -362,8 +363,11 @@ vcache_check_run( struct draw_pt_front_end *frontend, } + assert((elt_bias >= 0 && min_index + elt_bias >= min_index) || + (elt_bias < 0 && min_index + elt_bias < min_index)); + if (min_index == 0 && - index_size == 2) + index_size == 2) { transformed_elts = (const ushort *)elts; } @@ -433,7 +437,7 @@ vcache_check_run( struct draw_pt_front_end *frontend, if (fetch_count < UNDEFINED_VERTEX_ID) ok = vcache->middle->run_linear_elts( vcache->middle, - min_index, /* start */ + min_index + elt_bias, /* start */ fetch_count, transformed_elts, draw_count ); @@ -447,7 +451,7 @@ vcache_check_run( struct draw_pt_front_end *frontend, fetch_count, draw_count); fail: - vcache_run( frontend, get_elt, elts, draw_count ); + vcache_run( frontend, get_elt, elts, elt_bias, draw_count ); } diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h index 62822a3d56..f7a63de3ba 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h @@ -3,6 +3,7 @@ static void FUNC( struct draw_pt_front_end *frontend, pt_elt_func get_elt, const void *elts, + int elt_bias, unsigned count ) { struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; @@ -20,7 +21,7 @@ static void FUNC( struct draw_pt_front_end *frontend, case PIPE_PRIM_POINTS: for (i = 0; i < count; i ++) { POINT( vcache, - get_elt(elts, i + 0) ); + get_elt(elts, i + 0) + elt_bias ); } break; @@ -28,8 +29,8 @@ static void FUNC( struct draw_pt_front_end *frontend, for (i = 0; i+1 < count; i += 2) { LINE( vcache, DRAW_PIPE_RESET_STIPPLE, - get_elt(elts, i + 0), - get_elt(elts, i + 1)); + get_elt(elts, i + 0) + elt_bias, + get_elt(elts, i + 1) + elt_bias); } break; @@ -40,14 +41,14 @@ static void FUNC( struct draw_pt_front_end *frontend, for (i = 1; i < count; i++, flags = 0) { LINE( vcache, flags, - get_elt(elts, i - 1), - get_elt(elts, i )); + get_elt(elts, i - 1) + elt_bias, + get_elt(elts, i ) + elt_bias); } LINE( vcache, flags, - get_elt(elts, i - 1), - get_elt(elts, 0 )); + get_elt(elts, i - 1) + elt_bias, + get_elt(elts, 0 ) + elt_bias); } break; @@ -56,8 +57,8 @@ static void FUNC( struct draw_pt_front_end *frontend, for (i = 1; i < count; i++, flags = 0) { LINE( vcache, flags, - get_elt(elts, i - 1), - get_elt(elts, i )); + get_elt(elts, i - 1) + elt_bias, + get_elt(elts, i ) + elt_bias); } break; @@ -65,9 +66,9 @@ static void FUNC( struct draw_pt_front_end *frontend, for (i = 0; i+2 < count; i += 3) { TRIANGLE( vcache, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2 )); + get_elt(elts, i + 0) + elt_bias, + get_elt(elts, i + 1) + elt_bias, + get_elt(elts, i + 2 ) + elt_bias); } break; @@ -76,18 +77,18 @@ static void FUNC( struct draw_pt_front_end *frontend, for (i = 0; i+2 < count; i++) { TRIANGLE( vcache, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, i + 0), - get_elt(elts, i + 1 + (i&1)), - get_elt(elts, i + 2 - (i&1))); + get_elt(elts, i + 0) + elt_bias, + get_elt(elts, i + 1 + (i&1)) + elt_bias, + get_elt(elts, i + 2 - (i&1)) + elt_bias); } } else { for (i = 0; i+2 < count; i++) { TRIANGLE( vcache, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, i + 0 + (i&1)), - get_elt(elts, i + 1 - (i&1)), - get_elt(elts, i + 2 )); + get_elt(elts, i + 0 + (i&1)) + elt_bias, + get_elt(elts, i + 1 - (i&1)) + elt_bias, + get_elt(elts, i + 2 ) + elt_bias); } } break; @@ -98,18 +99,18 @@ static void FUNC( struct draw_pt_front_end *frontend, for (i = 0; i+2 < count; i++) { TRIANGLE( vcache, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, i + 1), - get_elt(elts, i + 2), - get_elt(elts, 0 )); + get_elt(elts, i + 1) + elt_bias, + get_elt(elts, i + 2) + elt_bias, + get_elt(elts, 0 ) + elt_bias); } } else { for (i = 0; i+2 < count; i++) { TRIANGLE( vcache, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2 )); + get_elt(elts, 0) + elt_bias, + get_elt(elts, i + 1) + elt_bias, + get_elt(elts, i + 2 ) + elt_bias); } } } @@ -118,39 +119,21 @@ static void FUNC( struct draw_pt_front_end *frontend, case PIPE_PRIM_QUADS: for (i = 0; i+3 < count; i += 4) { - if (flatfirst) { - QUAD( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2), - get_elt(elts, i + 3) ); - } - else { - QUAD( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2), - get_elt(elts, i + 3) ); - } + QUAD( vcache, + get_elt(elts, i + 0) + elt_bias, + get_elt(elts, i + 1) + elt_bias, + get_elt(elts, i + 2) + elt_bias, + get_elt(elts, i + 3) + elt_bias ); } break; case PIPE_PRIM_QUAD_STRIP: for (i = 0; i+3 < count; i += 2) { - if (flatfirst) { - QUAD( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 3), - get_elt(elts, i + 2) ); - } - else { - QUAD( vcache, - get_elt(elts, i + 2), - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 3) ); - } + QUAD( vcache, + get_elt(elts, i + 2) + elt_bias, + get_elt(elts, i + 0) + elt_bias, + get_elt(elts, i + 1) + elt_bias, + get_elt(elts, i + 3) + elt_bias ); } break; @@ -183,16 +166,16 @@ static void FUNC( struct draw_pt_front_end *frontend, if (flatfirst) { TRIANGLE( vcache, flags, - get_elt(elts, 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2) ); + get_elt(elts, 0) + elt_bias, + get_elt(elts, i + 1) + elt_bias, + get_elt(elts, i + 2) + elt_bias ); } else { TRIANGLE( vcache, flags, - get_elt(elts, i + 1), - get_elt(elts, i + 2), - get_elt(elts, 0)); + get_elt(elts, i + 1) + elt_bias, + get_elt(elts, i + 2) + elt_bias, + get_elt(elts, 0) + elt_bias); } } } diff --git a/src/gallium/auxiliary/draw/draw_vertex.c b/src/gallium/auxiliary/draw/draw_vertex.c index 3214213e44..a4f5e882c0 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.c +++ b/src/gallium/auxiliary/draw/draw_vertex.c @@ -48,30 +48,12 @@ draw_compute_vertex_size(struct vertex_info *vinfo) uint i; vinfo->size = 0; - for (i = 0; i < vinfo->num_attribs; i++) { - switch (vinfo->attrib[i].emit) { - case EMIT_OMIT: - break; - case EMIT_4UB: - /* fall-through */ - case EMIT_1F_PSIZE: - /* fall-through */ - case EMIT_1F: - vinfo->size += 1; - break; - case EMIT_2F: - vinfo->size += 2; - break; - case EMIT_3F: - vinfo->size += 3; - break; - case EMIT_4F: - vinfo->size += 4; - break; - default: - assert(0); - } - } + for (i = 0; i < vinfo->num_attribs; i++) + vinfo->size += draw_translate_vinfo_size(vinfo->attrib[i].emit); + + assert(vinfo->size % 4 == 0); + /* in dwords */ + vinfo->size /= 4; } @@ -120,6 +102,13 @@ draw_dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data) debug_printf("%u ", *data++); debug_printf("%u ", *data++); break; + case EMIT_4UB_BGRA: + debug_printf("EMIT_4UB_BGRA:\t"); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + break; default: assert(0); } diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h index 8c3c7befbc..3af31ffe12 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.h +++ b/src/gallium/auxiliary/draw/draw_vertex.h @@ -54,7 +54,8 @@ enum attrib_emit { EMIT_2F, EMIT_3F, EMIT_4F, - EMIT_4UB /**< XXX may need variations for RGBA vs BGRA, etc */ + EMIT_4UB, /**< is RGBA like the rest */ + EMIT_4UB_BGRA }; @@ -141,9 +142,11 @@ void draw_dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data); -static INLINE unsigned draw_translate_vinfo_format(unsigned format ) +static INLINE enum pipe_format draw_translate_vinfo_format(enum attrib_emit emit) { - switch (format) { + switch (emit) { + case EMIT_OMIT: + return PIPE_FORMAT_NONE; case EMIT_1F: case EMIT_1F_PSIZE: return PIPE_FORMAT_R32_FLOAT; @@ -155,10 +158,36 @@ static INLINE unsigned draw_translate_vinfo_format(unsigned format ) return PIPE_FORMAT_R32G32B32A32_FLOAT; case EMIT_4UB: return PIPE_FORMAT_R8G8B8A8_UNORM; + case EMIT_4UB_BGRA: + return PIPE_FORMAT_B8G8R8A8_UNORM; default: + assert(!"unexpected format"); return PIPE_FORMAT_NONE; } } +static INLINE enum attrib_emit draw_translate_vinfo_size(enum attrib_emit emit) +{ + switch (emit) { + case EMIT_OMIT: + return 0; + case EMIT_1F: + case EMIT_1F_PSIZE: + return 1 * sizeof(float); + case EMIT_2F: + return 2 * sizeof(float); + case EMIT_3F: + return 3 * sizeof(float); + case EMIT_4F: + return 4 * sizeof(float); + case EMIT_4UB: + return 4 * sizeof(unsigned char); + case EMIT_4UB_BGRA: + return 4 * sizeof(unsigned char); + default: + assert(!"unexpected format"); + return 0; + } +} #endif /* DRAW_VERTEX_H */ diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 9085838022..cfd5154024 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -91,14 +91,11 @@ draw_create_vertex_shader(struct draw_context *draw, tgsi_dump(shader->tokens, 0); } - vs = draw_create_vs_llvm( draw, shader ); + vs = draw_create_vs_sse( draw, shader ); if (!vs) { - vs = draw_create_vs_sse( draw, shader ); + vs = draw_create_vs_ppc( draw, shader ); if (!vs) { - vs = draw_create_vs_ppc( draw, shader ); - if (!vs) { - vs = draw_create_vs_exec( draw, shader ); - } + vs = draw_create_vs_exec( draw, shader ); } } diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index d095c9bad1..f49332352b 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -163,10 +163,6 @@ struct draw_vertex_shader * draw_create_vs_ppc(struct draw_context *draw, const struct pipe_shader_state *templ); -struct draw_vertex_shader * -draw_create_vs_llvm(struct draw_context *draw, - const struct pipe_shader_state *templ); - struct draw_vs_varient_key; diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index ece1ddde0c..8f8bbe7cb8 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -401,13 +401,11 @@ static boolean emit_output( struct aos_compilation *cp, emit_store_R32G32B32A32(cp, ptr, dataXMM); break; case EMIT_4UB: - if (1) { - emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W)); - emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); - } - else { - emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); - } + emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); + break; + case EMIT_4UB_BGRA: + emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W)); + emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); break; default: AOS_ERROR(cp, "unhandled output format"); diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c deleted file mode 100644 index 5f7a645f5d..0000000000 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ /dev/null @@ -1,131 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin - * Keith Whitwell <keith@tungstengraphics.com> - * Brian Paul - */ - -#include "util/u_memory.h" -#include "pipe/p_shader_tokens.h" -#include "draw_private.h" -#include "draw_context.h" -#include "draw_vs.h" - -#include "tgsi/tgsi_parse.h" - -#ifdef MESA_LLVM - -struct draw_llvm_vertex_shader { - struct draw_vertex_shader base; - struct tgsi_exec_machine *machine; -}; - - -static void -vs_llvm_prepare( struct draw_vertex_shader *base, - struct draw_context *draw ) -{ -} - - -static void -vs_llvm_run_linear( struct draw_vertex_shader *base, - const float (*input)[4], - float (*output)[4], - const void *constants[PIPE_MAX_CONSTANT_BUFFERS], - unsigned count, - unsigned input_stride, - unsigned output_stride ) -{ - struct draw_llvm_vertex_shader *shader = - (struct draw_llvm_vertex_shader *)base; -} - - - -static void -vs_llvm_delete( struct draw_vertex_shader *base ) -{ - struct draw_llvm_vertex_shader *shader = - (struct draw_llvm_vertex_shader *)base; - - /* Do something to free compiled shader: - */ - - FREE( (void*) shader->base.state.tokens ); - FREE( shader ); -} - - - - -struct draw_vertex_shader * -draw_create_vs_llvm(struct draw_context *draw, - const struct pipe_shader_state *templ) -{ - struct draw_llvm_vertex_shader *vs; - - vs = CALLOC_STRUCT( draw_llvm_vertex_shader ); - if (vs == NULL) - return NULL; - - /* we make a private copy of the tokens */ - vs->base.state.tokens = tgsi_dup_tokens(templ->tokens); - if (!vs->base.state.tokens) { - FREE(vs); - return NULL; - } - - tgsi_scan_shader(vs->base.state.tokens, &vs->base.info); - - vs->base.draw = draw; - vs->base.prepare = vs_llvm_prepare; - vs->base.create_varient = draw_vs_varient_generic; - vs->base.run_linear = vs_llvm_run_linear; - vs->base.delete = vs_llvm_delete; - vs->machine = draw->vs.machine; - - return &vs->base; -} - - - - - -#else - -struct draw_vertex_shader * -draw_create_vs_llvm(struct draw_context *draw, - const struct pipe_shader_state *shader) -{ - return NULL; -} - -#endif |