diff options
author | Eric Anholt <eric@anholt.net> | 2010-07-26 17:47:59 -0700 |
---|---|---|
committer | Eric Anholt <eric@anholt.net> | 2010-07-26 17:53:27 -0700 |
commit | afe125e0a18ac3886c45c7e6b02b122fb2d327b5 (patch) | |
tree | 78621707e71154c0b388b0baacffc26432b7e992 /src/gallium/auxiliary | |
parent | d64343f1ae84979bd154475badf11af8a9bfc2eb (diff) | |
parent | 5403ca79b225605c79f49866a6497c97da53be3b (diff) |
Merge remote branch 'origin/master' into glsl2
This pulls in multiple i965 driver fixes which will help ensure better
testing coverage during development, and also gets past the conflicts
of the src/mesa/shader -> src/mesa/program move.
Conflicts:
src/mesa/Makefile
src/mesa/main/shaderapi.c
src/mesa/main/shaderobj.h
Diffstat (limited to 'src/gallium/auxiliary')
71 files changed, 3801 insertions, 938 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 7c8db19f5c..dcebab7c0f 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -124,6 +124,7 @@ C_SOURCES = \ util/u_linear.c \ util/u_network.c \ util/u_math.c \ + util/u_mempool.c \ util/u_mm.c \ util/u_rect.c \ util/u_ringbuffer.c \ @@ -154,6 +155,8 @@ GALLIVM_SOURCES = \ gallivm/lp_bld_flow.c \ gallivm/lp_bld_format_aos.c \ gallivm/lp_bld_format_soa.c \ + gallivm/lp_bld_format_yuv.c \ + gallivm/lp_bld_gather.c \ gallivm/lp_bld_init.c \ gallivm/lp_bld_intr.c \ gallivm/lp_bld_logic.c \ @@ -167,8 +170,10 @@ GALLIVM_SOURCES = \ gallivm/lp_bld_tgsi_soa.c \ gallivm/lp_bld_type.c \ draw/draw_llvm.c \ + draw/draw_vs_llvm.c \ draw/draw_pt_fetch_shade_pipeline_llvm.c \ - draw/draw_llvm_translate.c + draw/draw_llvm_translate.c \ + draw/draw_llvm_sample.c GALLIVM_CPP_SOURCES = \ gallivm/lp_bld_misc.cpp diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 6242ab0c59..72a16617db 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -32,8 +32,8 @@ env.CodeGenerate( env.CodeGenerate( target = 'util/u_format_table.c', - script = 'util/u_format_table.py', - source = ['util/u_format.csv'], + script = '#src/gallium/auxiliary/util/u_format_table.py', + source = ['#src/gallium/auxiliary/util/u_format.csv'], command = 'python $SCRIPT $SOURCE > $TARGET' ) @@ -45,7 +45,7 @@ env.CodeGenerate( ) env.Depends('util/u_format_table.c', [ - 'util/u_format_parse.py', + '#src/gallium/auxiliary/util/u_format_parse.py', 'util/u_format_pack.py', ]) @@ -172,6 +172,7 @@ source = [ 'util/u_keymap.c', 'util/u_network.c', 'util/u_math.c', + 'util/u_mempool.c', 'util/u_mm.c', 'util/u_rect.c', 'util/u_resource.c', @@ -203,6 +204,8 @@ if env['llvm']: 'gallivm/lp_bld_flow.c', 'gallivm/lp_bld_format_aos.c', 'gallivm/lp_bld_format_soa.c', + 'gallivm/lp_bld_format_yuv.c', + 'gallivm/lp_bld_gather.c', 'gallivm/lp_bld_intr.c', 'gallivm/lp_bld_logic.c', 'gallivm/lp_bld_init.c', @@ -218,7 +221,9 @@ if env['llvm']: 'gallivm/lp_bld_type.c', 'draw/draw_llvm.c', 'draw/draw_pt_fetch_shade_pipeline_llvm.c', - 'draw/draw_llvm_translate.c' + 'draw/draw_llvm_translate.c', + 'draw/draw_vs_llvm.c', + 'draw/draw_llvm_sample.c' ] gallium = env.ConvenienceLibrary( diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 20a8612dca..58b022d531 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -289,6 +289,9 @@ void cso_release_all( struct cso_context *ctx ) ctx->pipe->bind_fs_state( ctx->pipe, NULL ); ctx->pipe->bind_vs_state( ctx->pipe, NULL ); ctx->pipe->bind_vertex_elements_state( ctx->pipe, NULL ); + ctx->pipe->set_fragment_sampler_views(ctx->pipe, 0, NULL); + if (ctx->pipe->set_vertex_sampler_views) + ctx->pipe->set_vertex_sampler_views(ctx->pipe, 0, NULL); } for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { @@ -1029,6 +1032,7 @@ static INLINE void clip_state_cpy(struct pipe_clip_state *dst, const struct pipe_clip_state *src) { + dst->depth_clamp = src->depth_clamp; dst->nr = src->nr; if (src->nr) { memcpy(dst->ucp, src->ucp, src->nr * sizeof(src->ucp[0])); @@ -1039,6 +1043,9 @@ static INLINE int clip_state_cmp(const struct pipe_clip_state *a, const struct pipe_clip_state *b) { + if (a->depth_clamp != b->depth_clamp) { + return 1; + } if (a->nr != b->nr) { return 1; } diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index dab95e5051..c127f74188 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -40,6 +40,7 @@ #if HAVE_LLVM #include "gallivm/lp_bld_init.h" +#include "draw_llvm.h" #endif struct draw_context *draw_create( struct pipe_context *pipe ) @@ -52,6 +53,7 @@ struct draw_context *draw_create( struct pipe_context *pipe ) lp_build_init(); assert(lp_build_engine); draw->engine = lp_build_engine; + draw->llvm = draw_llvm_create(draw); #endif if (!draw_init(draw)) @@ -132,6 +134,9 @@ void draw_destroy( struct draw_context *draw ) draw_pt_destroy( draw ); draw_vs_destroy( draw ); draw_gs_destroy( draw ); +#ifdef HAVE_LLVM + draw_llvm_destroy( draw->llvm ); +#endif FREE( draw ); } @@ -211,6 +216,7 @@ void draw_set_clip_state( struct draw_context *draw, assert(clip->nr <= PIPE_MAX_CLIP_PLANES); memcpy(&draw->plane[6], clip->ucp, clip->nr * sizeof(clip->ucp[0])); draw->nr_planes = 6 + clip->nr; + draw->depth_clamp = clip->depth_clamp; } @@ -601,3 +607,54 @@ draw_set_so_state(struct draw_context *draw, state, sizeof(struct pipe_stream_output_state)); } + +void +draw_set_sampler_views(struct draw_context *draw, + struct pipe_sampler_view **views, + unsigned num) +{ + unsigned i; + + debug_assert(num <= PIPE_MAX_VERTEX_SAMPLERS); + + for (i = 0; i < num; ++i) + draw->sampler_views[i] = views[i]; + for (i = num; i < PIPE_MAX_VERTEX_SAMPLERS; ++i) + draw->sampler_views[i] = NULL; + + draw->num_sampler_views = num; +} + +void +draw_set_samplers(struct draw_context *draw, + struct pipe_sampler_state **samplers, + unsigned num) +{ + unsigned i; + + debug_assert(num <= PIPE_MAX_VERTEX_SAMPLERS); + + for (i = 0; i < num; ++i) + draw->samplers[i] = samplers[i]; + for (i = num; i < PIPE_MAX_VERTEX_SAMPLERS; ++i) + draw->samplers[i] = NULL; + + draw->num_samplers = num; +} + +void +draw_set_mapped_texture(struct draw_context *draw, + unsigned sampler_idx, + uint32_t width, uint32_t height, uint32_t depth, + uint32_t last_level, + uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], + uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], + const void *data[DRAW_MAX_TEXTURE_LEVELS]) +{ +#ifdef HAVE_LLVM + draw_llvm_set_mapped_texture(draw, + sampler_idx, + width, height, depth, last_level, + row_stride, img_stride, data); +#endif +} diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index c0122f2aca..116716af6f 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -47,11 +47,14 @@ struct draw_vertex_shader; struct draw_geometry_shader; struct tgsi_sampler; +#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */ struct draw_context *draw_create( struct pipe_context *pipe ); void draw_destroy( struct draw_context *draw ); +void draw_flush(struct draw_context *draw); + void draw_set_viewport_state( struct draw_context *draw, const struct pipe_viewport_state *viewport ); @@ -101,6 +104,23 @@ draw_texture_samplers(struct draw_context *draw, uint num_samplers, struct tgsi_sampler **samplers); +void +draw_set_sampler_views(struct draw_context *draw, + struct pipe_sampler_view **views, + unsigned num); +void +draw_set_samplers(struct draw_context *draw, + struct pipe_sampler_state **samplers, + unsigned num); + +void +draw_set_mapped_texture(struct draw_context *draw, + unsigned sampler_idx, + uint32_t width, uint32_t height, uint32_t depth, + uint32_t last_level, + uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], + uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], + const void *data[DRAW_MAX_TEXTURE_LEVELS]); /* @@ -173,7 +193,7 @@ draw_set_so_state(struct draw_context *draw, /*********************************************************************** - * draw_prim.c + * draw_pt.c */ void draw_arrays(struct draw_context *draw, unsigned prim, @@ -187,8 +207,6 @@ draw_arrays_instanced(struct draw_context *draw, unsigned startInstance, unsigned instanceCount); -void draw_flush(struct draw_context *draw); - /******************************************************************************* * Driver backend interface diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 9117c1303d..19f96c37ab 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #include "draw_llvm.h" #include "draw_context.h" @@ -15,14 +42,13 @@ #include "tgsi/tgsi_dump.h" #include "util/u_cpu_detect.h" -#include "util/u_string.h" #include "util/u_pointer.h" +#include "util/u_string.h" #include <llvm-c/Transforms/Scalar.h> #define DEBUG_STORE 0 - /* generates the draw jit function */ static void draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var); @@ -36,12 +62,19 @@ init_globals(struct draw_llvm *llvm) /* struct draw_jit_texture */ { - LLVMTypeRef elem_types[4]; + LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS]; elem_types[DRAW_JIT_TEXTURE_WIDTH] = LLVMInt32Type(); elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type(); - elem_types[DRAW_JIT_TEXTURE_STRIDE] = LLVMInt32Type(); - elem_types[DRAW_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0); + elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type(); + elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type(); + elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] = + LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS); + elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] = + LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS); + elem_types[DRAW_JIT_TEXTURE_DATA] = + LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0), + DRAW_MAX_TEXTURE_LEVELS); texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); @@ -51,9 +84,18 @@ init_globals(struct draw_llvm *llvm) LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height, llvm->target, texture_type, DRAW_JIT_TEXTURE_HEIGHT); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, stride, + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_DEPTH); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_LAST_LEVEL); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride, llvm->target, texture_type, - DRAW_JIT_TEXTURE_STRIDE); + DRAW_JIT_TEXTURE_ROW_STRIDE); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_IMG_STRIDE); LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data, llvm->target, texture_type, DRAW_JIT_TEXTURE_DATA); @@ -71,7 +113,8 @@ init_globals(struct draw_llvm *llvm) elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ - elem_types[2] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ + elem_types[2] = LLVMArrayType(texture_type, + PIPE_MAX_VERTEX_SAMPLERS); /* textures */ context_type = LLVMStructType(elem_types, Elements(elem_types), 0); @@ -81,7 +124,7 @@ init_globals(struct draw_llvm *llvm) llvm->target, context_type, 1); LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures, llvm->target, context_type, - DRAW_JIT_CONTEXT_TEXTURES_INDEX); + DRAW_JIT_CTX_TEXTURES); LP_CHECK_STRUCT_SIZE(struct draw_jit_context, llvm->target, context_type); @@ -195,9 +238,22 @@ draw_llvm_create(struct draw_context *draw) /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, * but there are more on SVN. */ /* TODO: Add more passes */ + LLVMAddCFGSimplificationPass(llvm->pass); - LLVMAddPromoteMemoryToRegisterPass(llvm->pass); - LLVMAddConstantPropagationPass(llvm->pass); + + if (HAVE_LLVM >= 0x207 && sizeof(void*) == 4) { + /* For LLVM >= 2.7 and 32-bit build, use this order of passes to + * avoid generating bad code. + * Test with piglit glsl-vs-sqrt-zero test. + */ + LLVMAddConstantPropagationPass(llvm->pass); + LLVMAddPromoteMemoryToRegisterPass(llvm->pass); + } + else { + LLVMAddPromoteMemoryToRegisterPass(llvm->pass); + LLVMAddConstantPropagationPass(llvm->pass); + } + if(util_cpu_caps.has_sse4_1) { /* FIXME: There is a bug in this pass, whereby the combination of fptosi * and sitofp (necessary for trunc/floor/ceil/round implementation) @@ -219,6 +275,9 @@ draw_llvm_create(struct draw_context *draw) LLVMDumpModule(llvm->module); } + llvm->nr_variants = 0; + make_empty_list(&llvm->vs_variants_list); + return llvm; } @@ -231,9 +290,13 @@ draw_llvm_destroy(struct draw_llvm *llvm) } struct draw_llvm_variant * -draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs) +draw_llvm_create_variant(struct draw_llvm *llvm, int num_inputs) { struct draw_llvm_variant *variant = MALLOC(sizeof(struct draw_llvm_variant)); + struct llvm_vertex_shader *shader = + llvm_vertex_shader(llvm->draw->vs.vertex_shader); + + variant->llvm = llvm; draw_llvm_make_variant_key(llvm, &variant->key); @@ -242,6 +305,12 @@ draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs) draw_llvm_generate(llvm, variant); draw_llvm_generate_elts(llvm, variant); + variant->shader = shader; + variant->list_item_global.base = variant; + variant->list_item_local.base = variant; + /*variant->no = */shader->variants_created++; + variant->list_item_global.base = variant; + return variant; } @@ -250,11 +319,13 @@ generate_vs(struct draw_llvm *llvm, LLVMBuilderRef builder, LLVMValueRef (*outputs)[NUM_CHANNELS], const LLVMValueRef (*inputs)[NUM_CHANNELS], - LLVMValueRef context_ptr) + LLVMValueRef context_ptr, + struct lp_build_sampler_soa *draw_sampler) { const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens; struct lp_type vs_type; LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr); + struct lp_build_sampler_soa *sampler = 0; memset(&vs_type, 0, sizeof vs_type); vs_type.floating = TRUE; /* floating point values */ @@ -270,6 +341,10 @@ generate_vs(struct draw_llvm *llvm, tgsi_dump(tokens, 0); } + if (llvm->draw->num_sampler_views && + llvm->draw->num_samplers) + sampler = draw_sampler; + lp_build_tgsi_soa(builder, tokens, vs_type, @@ -278,7 +353,7 @@ generate_vs(struct draw_llvm *llvm, NULL /*pos*/, inputs, outputs, - NULL/*sampler*/, + sampler, &llvm->draw->vs.vertex_shader->info); } @@ -306,7 +381,8 @@ generate_fetch(LLVMBuilderRef builder, LLVMValueRef *res, struct pipe_vertex_element *velem, LLVMValueRef vbuf, - LLVMValueRef index) + LLVMValueRef index, + LLVMValueRef instance_id) { LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0); LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, @@ -317,8 +393,15 @@ generate_fetch(LLVMBuilderRef builder, LLVMValueRef cond; LLVMValueRef stride; - cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, ""); + if (velem->instance_divisor) { + /* array index = instance_id / instance_divisor */ + index = LLVMBuildUDiv(builder, instance_id, + LLVMConstInt(LLVMInt32Type(), velem->instance_divisor, 0), + "instance_divisor"); + } + /* limit index to min(inex, vb_max_index) */ + cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, ""); index = LLVMBuildSelect(builder, cond, index, vb_max_index, ""); stride = LLVMBuildMul(builder, vb_stride, index, ""); @@ -586,13 +669,14 @@ convert_to_aos(LLVMBuilderRef builder, static void draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) { - LLVMTypeRef arg_types[7]; + LLVMTypeRef arg_types[8]; LLVMTypeRef func_type; LLVMValueRef context_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef start, end, count, stride, step, io_itr; LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; + LLVMValueRef instance_id; struct draw_context *draw = llvm->draw; unsigned i, j; struct lp_build_context bld; @@ -601,6 +685,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) const int max_vertices = 4; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; void *code; + struct lp_build_sampler_soa *sampler = 0; arg_types[0] = llvm->context_ptr_type; /* context */ arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ @@ -609,6 +694,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) arg_types[4] = LLVMInt32Type(); /* count */ arg_types[5] = LLVMInt32Type(); /* stride */ arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ + arg_types[7] = LLVMInt32Type(); /* instance_id */ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); @@ -625,6 +711,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) count = LLVMGetParam(variant->function, 4); stride = LLVMGetParam(variant->function, 5); vb_ptr = LLVMGetParam(variant->function, 6); + instance_id = LLVMGetParam(variant->function, 7); lp_build_name(context_ptr, "context"); lp_build_name(io_ptr, "io"); @@ -633,6 +720,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) lp_build_name(count, "count"); lp_build_name(stride, "stride"); lp_build_name(vb_ptr, "vb"); + lp_build_name(instance_id, "instance_id"); /* * Function body @@ -648,6 +736,10 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); + /* code generated texture sampling */ + sampler = draw_llvm_sampler_soa_create(variant->key.sampler, + context_ptr); + #if DEBUG_STORE lp_build_printf(builder, "start = %d, end = %d, step = %d\n", start, end, step); @@ -678,7 +770,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, ""); generate_fetch(builder, vbuffers_ptr, - &aos_attribs[j][i], velem, vb, true_index); + &aos_attribs[j][i], velem, vb, true_index, + instance_id); } } convert_to_soa(builder, aos_attribs, inputs, @@ -689,7 +782,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) builder, outputs, ptr_aos, - context_ptr); + context_ptr, + sampler); convert_to_aos(builder, io, outputs, draw->vs.vertex_shader->info.num_outputs, @@ -697,6 +791,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) } lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop); + sampler->destroy(sampler); + LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); @@ -730,13 +826,14 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) static void draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant) { - LLVMTypeRef arg_types[7]; + LLVMTypeRef arg_types[8]; LLVMTypeRef func_type; LLVMValueRef context_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr; LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; + LLVMValueRef instance_id; struct draw_context *draw = llvm->draw; unsigned i, j; struct lp_build_context bld; @@ -747,6 +844,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; LLVMValueRef fetch_max; void *code; + struct lp_build_sampler_soa *sampler = 0; arg_types[0] = llvm->context_ptr_type; /* context */ arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ @@ -755,14 +853,17 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian arg_types[4] = LLVMInt32Type(); /* fetch_count */ arg_types[5] = LLVMInt32Type(); /* stride */ arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ + arg_types[7] = LLVMInt32Type(); /* instance_id */ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); - variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type); + variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", + func_type); LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv); for(i = 0; i < Elements(arg_types); ++i) if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) - LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), LLVMNoAliasAttribute); + LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), + LLVMNoAliasAttribute); context_ptr = LLVMGetParam(variant->function_elts, 0); io_ptr = LLVMGetParam(variant->function_elts, 1); @@ -771,6 +872,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian fetch_count = LLVMGetParam(variant->function_elts, 4); stride = LLVMGetParam(variant->function_elts, 5); vb_ptr = LLVMGetParam(variant->function_elts, 6); + instance_id = LLVMGetParam(variant->function_elts, 7); lp_build_name(context_ptr, "context"); lp_build_name(io_ptr, "io"); @@ -779,6 +881,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian lp_build_name(fetch_count, "fetch_count"); lp_build_name(stride, "stride"); lp_build_name(vb_ptr, "vb"); + lp_build_name(instance_id, "instance_id"); /* * Function body @@ -793,6 +896,10 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); + /* code generated texture sampling */ + sampler = draw_llvm_sampler_soa_create(variant->key.sampler, + context_ptr); + fetch_max = LLVMBuildSub(builder, fetch_count, LLVMConstInt(LLVMInt32Type(), 1, 0), "fetch_max"); @@ -833,7 +940,8 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, ""); generate_fetch(builder, vbuffers_ptr, - &aos_attribs[j][i], velem, vb, true_index); + &aos_attribs[j][i], velem, vb, true_index, + instance_id); } } convert_to_soa(builder, aos_attribs, inputs, @@ -844,7 +952,8 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian builder, outputs, ptr_aos, - context_ptr); + context_ptr, + sampler); convert_to_aos(builder, io, outputs, draw->vs.vertex_shader->info.num_outputs, @@ -852,6 +961,8 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian } lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop); + sampler->destroy(sampler); + LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); @@ -885,6 +996,8 @@ void draw_llvm_make_variant_key(struct draw_llvm *llvm, struct draw_llvm_variant_key *key) { + unsigned i; + memset(key, 0, sizeof(struct draw_llvm_variant_key)); key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements; @@ -896,4 +1009,72 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, memcpy(&key->vs, &llvm->draw->vs.vertex_shader->state, sizeof(struct pipe_shader_state)); + + /* if the driver implemented the sampling hooks then + * setup our sampling state */ + if (llvm->draw->num_sampler_views && llvm->draw->num_samplers) { + for(i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; ++i) { + struct draw_vertex_shader *shader = llvm->draw->vs.vertex_shader; + if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) + lp_sampler_static_state(&key->sampler[i], + llvm->draw->sampler_views[i], + llvm->draw->samplers[i]); + } + } +} + +void +draw_llvm_set_mapped_texture(struct draw_context *draw, + unsigned sampler_idx, + uint32_t width, uint32_t height, uint32_t depth, + uint32_t last_level, + uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], + uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], + const void *data[DRAW_MAX_TEXTURE_LEVELS]) +{ + unsigned j; + struct draw_jit_texture *jit_tex; + + assert(sampler_idx < PIPE_MAX_VERTEX_SAMPLERS); + + + jit_tex = &draw->llvm->jit_context.textures[sampler_idx]; + + jit_tex->width = width; + jit_tex->height = height; + jit_tex->depth = depth; + jit_tex->last_level = last_level; + + for (j = 0; j <= last_level; j++) { + jit_tex->data[j] = data[j]; + jit_tex->row_stride[j] = row_stride[j]; + jit_tex->img_stride[j] = img_stride[j]; + } +} + +void +draw_llvm_destroy_variant(struct draw_llvm_variant *variant) +{ + struct draw_llvm *llvm = variant->llvm; + struct draw_context *draw = llvm->draw; + + if (variant->function_elts) { + if (variant->function_elts) + LLVMFreeMachineCodeForFunction(draw->engine, + variant->function_elts); + LLVMDeleteFunction(variant->function_elts); + } + + if (variant->function) { + if (variant->function) + LLVMFreeMachineCodeForFunction(draw->engine, + variant->function); + LLVMDeleteFunction(variant->function); + } + + remove_from_list(&variant->list_item_local); + variant->shader->variants_cached--; + remove_from_list(&variant->list_item_global); + llvm->nr_variants--; + FREE(variant); } diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index 58fee7f9d6..4addb47d2d 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -1,28 +1,71 @@ -#ifndef HAVE_LLVM_H -#define HAVE_LLVM_H +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef DRAW_LLVM_H +#define DRAW_LLVM_H #include "draw/draw_private.h" +#include "draw/draw_vs.h" +#include "gallivm/lp_bld_sample.h" + #include "pipe/p_context.h" +#include "util/u_simple_list.h" #include <llvm-c/Core.h> #include <llvm-c/Analysis.h> #include <llvm-c/Target.h> #include <llvm-c/ExecutionEngine.h> +#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */ + +struct draw_llvm; +struct llvm_vertex_shader; + struct draw_jit_texture { uint32_t width; uint32_t height; - uint32_t stride; - const void *data; + uint32_t depth; + uint32_t last_level; + uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS]; + uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS]; + const void *data[DRAW_MAX_TEXTURE_LEVELS]; }; enum { DRAW_JIT_TEXTURE_WIDTH = 0, DRAW_JIT_TEXTURE_HEIGHT, - DRAW_JIT_TEXTURE_STRIDE, - DRAW_JIT_TEXTURE_DATA + DRAW_JIT_TEXTURE_DEPTH, + DRAW_JIT_TEXTURE_LAST_LEVEL, + DRAW_JIT_TEXTURE_ROW_STRIDE, + DRAW_JIT_TEXTURE_IMG_STRIDE, + DRAW_JIT_TEXTURE_DATA, + DRAW_JIT_TEXTURE_NUM_FIELDS /* number of fields above */ }; enum { @@ -48,7 +91,7 @@ struct draw_jit_context const float *gs_constants; - struct draw_jit_texture textures[PIPE_MAX_SAMPLERS]; + struct draw_jit_texture textures[PIPE_MAX_VERTEX_SAMPLERS]; }; @@ -58,10 +101,10 @@ struct draw_jit_context #define draw_jit_context_gs_constants(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, 1, "gs_constants") -#define DRAW_JIT_CONTEXT_TEXTURES_INDEX 2 +#define DRAW_JIT_CTX_TEXTURES 2 #define draw_jit_context_textures(_builder, _ptr) \ - lp_build_struct_get_ptr(_builder, _ptr, DRAW_JIT_CONTEXT_TEXTURES_INDEX, "textures") + lp_build_struct_get_ptr(_builder, _ptr, DRAW_JIT_CTX_TEXTURES, "textures") @@ -92,7 +135,8 @@ typedef void unsigned start, unsigned count, unsigned stride, - struct pipe_vertex_buffer *vertex_buffers); + struct pipe_vertex_buffer *vertex_buffers, + unsigned instance_id); typedef void @@ -102,13 +146,54 @@ typedef void const unsigned *fetch_elts, unsigned fetch_count, unsigned stride, - struct pipe_vertex_buffer *vertex_buffers); + struct pipe_vertex_buffer *vertex_buffers, + unsigned instance_id); + +struct draw_llvm_variant_key +{ + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + unsigned nr_vertex_elements; + struct pipe_shader_state vs; + struct lp_sampler_static_state sampler[PIPE_MAX_VERTEX_SAMPLERS]; +}; + +struct draw_llvm_variant_list_item +{ + struct draw_llvm_variant *base; + struct draw_llvm_variant_list_item *next, *prev; +}; + +struct draw_llvm_variant +{ + struct draw_llvm_variant_key key; + LLVMValueRef function; + LLVMValueRef function_elts; + draw_jit_vert_func jit_func; + draw_jit_vert_func_elts jit_func_elts; + + struct llvm_vertex_shader *shader; + + struct draw_llvm *llvm; + struct draw_llvm_variant_list_item list_item_global; + struct draw_llvm_variant_list_item list_item_local; +}; + +struct llvm_vertex_shader { + struct draw_vertex_shader base; + + struct draw_llvm_variant_list_item variants; + unsigned variants_created; + unsigned variants_cached; +}; struct draw_llvm { struct draw_context *draw; struct draw_jit_context jit_context; + struct draw_llvm_variant_list_item vs_variants_list; + int nr_variants; + LLVMModuleRef module; LLVMExecutionEngineRef engine; LLVMModuleProviderRef provider; @@ -121,23 +206,12 @@ struct draw_llvm { LLVMTypeRef vb_ptr_type; }; -struct draw_llvm_variant_key +static INLINE struct llvm_vertex_shader * +llvm_vertex_shader(struct draw_vertex_shader *vs) { - struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; - unsigned nr_vertex_elements; - struct pipe_shader_state vs; -}; + return (struct llvm_vertex_shader *)vs; +} -struct draw_llvm_variant -{ - struct draw_llvm_variant_key key; - LLVMValueRef function; - LLVMValueRef function_elts; - draw_jit_vert_func jit_func; - draw_jit_vert_func_elts jit_func_elts; - - struct draw_llvm_variant *next; -}; struct draw_llvm * draw_llvm_create(struct draw_context *draw); @@ -146,7 +220,10 @@ void draw_llvm_destroy(struct draw_llvm *llvm); struct draw_llvm_variant * -draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs); +draw_llvm_create_variant(struct draw_llvm *llvm, int num_inputs); + +void +draw_llvm_destroy_variant(struct draw_llvm_variant *variant); void draw_llvm_make_variant_key(struct draw_llvm *llvm, @@ -156,4 +233,18 @@ LLVMValueRef draw_llvm_translate_from(LLVMBuilderRef builder, LLVMValueRef vbuffer, enum pipe_format from_format); + +struct lp_build_sampler_soa * +draw_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state, + LLVMValueRef context_ptr); + +void +draw_llvm_set_mapped_texture(struct draw_context *draw, + unsigned sampler_idx, + uint32_t width, uint32_t height, uint32_t depth, + uint32_t last_level, + uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], + uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], + const void *data[DRAW_MAX_TEXTURE_LEVELS]); + #endif diff --git a/src/gallium/auxiliary/draw/draw_llvm_sample.c b/src/gallium/auxiliary/draw/draw_llvm_sample.c new file mode 100644 index 0000000000..e9811010db --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_llvm_sample.c @@ -0,0 +1,215 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Texture sampling code generation + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_sample.h" +#include "gallivm/lp_bld_tgsi.h" + + +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "util/u_pointer.h" +#include "util/u_string.h" + +#include "draw_llvm.h" + + +/** + * This provides the bridge between the sampler state store in + * lp_jit_context and lp_jit_texture and the sampler code + * generator. It provides the texture layout information required by + * the texture sampler code generator in terms of the state stored in + * lp_jit_context and lp_jit_texture in runtime. + */ +struct draw_llvm_sampler_dynamic_state +{ + struct lp_sampler_dynamic_state base; + + const struct lp_sampler_static_state *static_state; + + LLVMValueRef context_ptr; +}; + + +/** + * This is the bridge between our sampler and the TGSI translator. + */ +struct draw_llvm_sampler_soa +{ + struct lp_build_sampler_soa base; + + struct draw_llvm_sampler_dynamic_state dynamic_state; +}; + + +/** + * Fetch the specified member of the lp_jit_texture structure. + * \param emit_load if TRUE, emit the LLVM load instruction to actually + * fetch the field's value. Otherwise, just emit the + * GEP code to address the field. + * + * @sa http://llvm.org/docs/GetElementPtr.html + */ +static LLVMValueRef +draw_llvm_texture_member(const struct lp_sampler_dynamic_state *base, + LLVMBuilderRef builder, + unsigned unit, + unsigned member_index, + const char *member_name, + boolean emit_load) +{ + struct draw_llvm_sampler_dynamic_state *state = + (struct draw_llvm_sampler_dynamic_state *)base; + LLVMValueRef indices[4]; + LLVMValueRef ptr; + LLVMValueRef res; + + debug_assert(unit < PIPE_MAX_VERTEX_SAMPLERS); + + /* context[0] */ + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + /* context[0].textures */ + indices[1] = LLVMConstInt(LLVMInt32Type(), DRAW_JIT_CTX_TEXTURES, 0); + /* context[0].textures[unit] */ + indices[2] = LLVMConstInt(LLVMInt32Type(), unit, 0); + /* context[0].textures[unit].member */ + indices[3] = LLVMConstInt(LLVMInt32Type(), member_index, 0); + + ptr = LLVMBuildGEP(builder, state->context_ptr, indices, Elements(indices), ""); + + if (emit_load) + res = LLVMBuildLoad(builder, ptr, ""); + else + res = ptr; + + lp_build_name(res, "context.texture%u.%s", unit, member_name); + + return res; +} + + +/** + * Helper macro to instantiate the functions that generate the code to + * fetch the members of lp_jit_texture to fulfill the sampler code + * generator requests. + * + * This complexity is the price we have to pay to keep the texture + * sampler code generator a reusable module without dependencies to + * llvmpipe internals. + */ +#define DRAW_LLVM_TEXTURE_MEMBER(_name, _index, _emit_load) \ + static LLVMValueRef \ + draw_llvm_texture_##_name( const struct lp_sampler_dynamic_state *base, \ + LLVMBuilderRef builder, \ + unsigned unit) \ + { \ + return draw_llvm_texture_member(base, builder, unit, _index, #_name, _emit_load ); \ + } + + +DRAW_LLVM_TEXTURE_MEMBER(width, DRAW_JIT_TEXTURE_WIDTH, TRUE) +DRAW_LLVM_TEXTURE_MEMBER(height, DRAW_JIT_TEXTURE_HEIGHT, TRUE) +DRAW_LLVM_TEXTURE_MEMBER(depth, DRAW_JIT_TEXTURE_DEPTH, TRUE) +DRAW_LLVM_TEXTURE_MEMBER(last_level, DRAW_JIT_TEXTURE_LAST_LEVEL, TRUE) +DRAW_LLVM_TEXTURE_MEMBER(row_stride, DRAW_JIT_TEXTURE_ROW_STRIDE, FALSE) +DRAW_LLVM_TEXTURE_MEMBER(img_stride, DRAW_JIT_TEXTURE_IMG_STRIDE, FALSE) +DRAW_LLVM_TEXTURE_MEMBER(data_ptr, DRAW_JIT_TEXTURE_DATA, FALSE) + + +static void +draw_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) +{ + FREE(sampler); +} + + +/** + * Fetch filtered values from texture. + * The 'texel' parameter returns four vectors corresponding to R, G, B, A. + */ +static void +draw_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base, + LLVMBuilderRef builder, + struct lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + const LLVMValueRef *ddx, + const LLVMValueRef *ddy, + LLVMValueRef lod_bias, /* optional */ + LLVMValueRef explicit_lod, /* optional */ + LLVMValueRef *texel) +{ + struct draw_llvm_sampler_soa *sampler = (struct draw_llvm_sampler_soa *)base; + + assert(unit < PIPE_MAX_VERTEX_SAMPLERS); + + lp_build_sample_soa(builder, + &sampler->dynamic_state.static_state[unit], + &sampler->dynamic_state.base, + type, + unit, + num_coords, coords, + ddx, ddy, + lod_bias, explicit_lod, + texel); +} + + +struct lp_build_sampler_soa * +draw_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state, + LLVMValueRef context_ptr) +{ + struct draw_llvm_sampler_soa *sampler; + + sampler = CALLOC_STRUCT(draw_llvm_sampler_soa); + if(!sampler) + return NULL; + + sampler->base.destroy = draw_llvm_sampler_soa_destroy; + sampler->base.emit_fetch_texel = draw_llvm_sampler_soa_emit_fetch_texel; + sampler->dynamic_state.base.width = draw_llvm_texture_width; + sampler->dynamic_state.base.height = draw_llvm_texture_height; + sampler->dynamic_state.base.depth = draw_llvm_texture_depth; + sampler->dynamic_state.base.last_level = draw_llvm_texture_last_level; + sampler->dynamic_state.base.row_stride = draw_llvm_texture_row_stride; + sampler->dynamic_state.base.img_stride = draw_llvm_texture_img_stride; + sampler->dynamic_state.base.data_ptr = draw_llvm_texture_data_ptr; + sampler->dynamic_state.static_state = static_state; + sampler->dynamic_state.context_ptr = context_ptr; + + return &sampler->base; +} + diff --git a/src/gallium/auxiliary/draw/draw_llvm_translate.c b/src/gallium/auxiliary/draw/draw_llvm_translate.c index d7da7ed357..6ebe1f7de4 100644 --- a/src/gallium/auxiliary/draw/draw_llvm_translate.c +++ b/src/gallium/auxiliary/draw/draw_llvm_translate.c @@ -7,6 +7,7 @@ #include "gallivm/lp_bld_struct.h" #include "gallivm/lp_bld_format.h" #include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_type.h" #include "util/u_memory.h" #include "util/u_format.h" @@ -466,6 +467,7 @@ draw_llvm_translate_from(LLVMBuilderRef builder, const struct util_format_description *format_desc; LLVMValueRef zero; int i; + struct lp_type type = lp_float32_vec4_type(); /* * The above can only cope with straight arrays: no bitfields, @@ -493,5 +495,5 @@ draw_llvm_translate_from(LLVMBuilderRef builder, format_desc = util_format_description(from_format); zero = LLVMConstNull(LLVMInt32Type()); - return lp_build_fetch_rgba_aos(builder, format_desc, vbuffer, zero, zero); + return lp_build_fetch_rgba_aos(builder, format_desc, type, vbuffer, zero, zero, zero); } diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 83556f10a8..8cd75ecf9a 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -177,15 +177,15 @@ static void do_triangle( struct draw_context *draw, ( DRAW_PIPE_RESET_STIPPLE | \ DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_1 ), \ - verts + stride * elts[i0], \ - verts + stride * elts[i1], \ - verts + stride * elts[i2]); \ + verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK)); \ do_triangle( draw, \ ( DRAW_PIPE_EDGE_FLAG_1 | \ DRAW_PIPE_EDGE_FLAG_2 ), \ - verts + stride * elts[i0], \ - verts + stride * elts[i2], \ - verts + stride * elts[i3]) + verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)) /* emit last quad vertex as last vertex in triangles */ #define QUAD_LAST_PV(i0,i1,i2,i3) \ @@ -193,15 +193,15 @@ static void do_triangle( struct draw_context *draw, ( DRAW_PIPE_RESET_STIPPLE | \ DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_2 ), \ - verts + stride * elts[i0], \ - verts + stride * elts[i1], \ - verts + stride * elts[i3]); \ + verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)); \ do_triangle( draw, \ ( DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_1 ), \ - verts + stride * elts[i1], \ - verts + stride * elts[i2], \ - verts + stride * elts[i3]) + verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)) #define TRIANGLE(flags,i0,i1,i2) \ do_triangle( draw, \ @@ -218,7 +218,7 @@ static void do_triangle( struct draw_context *draw, #define POINT(i0) \ do_point( draw, \ - verts + stride * elts[i0] ) + verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK) ) #define FUNC pipe_run #define ARGS \ @@ -260,7 +260,7 @@ void draw_pipeline_run( struct draw_context *draw, const struct draw_prim_info *prim_info) { unsigned i, start; - + draw->pipeline.verts = (char *)vert_info->verts; draw->pipeline.vertex_stride = vert_info->stride; draw->pipeline.vertex_count = vert_info->count; @@ -296,14 +296,14 @@ void draw_pipeline_run( struct draw_context *draw, DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_1 ), \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1), \ - verts + stride * (i2)); \ + verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK)); \ do_triangle( draw, \ ( DRAW_PIPE_EDGE_FLAG_1 | \ DRAW_PIPE_EDGE_FLAG_2 ), \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i2), \ - verts + stride * (i3)) + verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)) /* emit last quad vertex as last vertex in triangles */ #define QUAD_LAST_PV(i0,i1,i2,i3) \ @@ -312,31 +312,31 @@ void draw_pipeline_run( struct draw_context *draw, DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_2 ), \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1), \ - verts + stride * (i3)); \ + verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)); \ do_triangle( draw, \ ( DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_1 ), \ verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i2), \ - verts + stride * (i3)) + verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)) #define TRIANGLE(flags,i0,i1,i2) \ do_triangle( draw, \ flags, /* flags */ \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1), \ - verts + stride * (i2)) + verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK)) #define LINE(flags,i0,i1) \ do_line( draw, \ flags, \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1)) + verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK)) #define POINT(i0) \ do_point( draw, \ - verts + stride * i0 ) + verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK) ) #define FUNC pipe_run_linear #define ARGS \ diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index debd17fd74..c0135f5bb7 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -425,7 +425,8 @@ aaline_create_texture(struct aaline_stage *aaline) /* Fill in mipmap images. * Basically each level is solid opaque, except for the outermost - * texels which are zero. Special case the 1x1 and 2x2 levels. + * texels which are zero. Special case the 1x1 and 2x2 levels + * (though, those levels shouldn't be used - see the max_lod setting). */ for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) { struct pipe_transfer *transfer; @@ -497,7 +498,8 @@ aaline_create_sampler(struct aaline_stage *aaline) sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; sampler.normalized_coords = 1; sampler.min_lod = 0.0f; - sampler.max_lod = MAX_TEXTURE_LEVEL; + /* avoid using the 1x1 and 2x2 mipmap levels */ + sampler.max_lod = MAX_TEXTURE_LEVEL - 2; aaline->sampler_cso = pipe->create_sampler_state(pipe, &sampler); if (aaline->sampler_cso == NULL) @@ -669,8 +671,8 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) assert(draw->rasterizer->line_smooth); - if (draw->rasterizer->line_width <= 3.0) - aaline->half_line_width = 1.5f; + if (draw->rasterizer->line_width <= 2.2) + aaline->half_line_width = 1.1f; else aaline->half_line_width = 0.5f * draw->rasterizer->line_width; diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index 122b1c7968..1cf6ee7a7f 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -262,6 +262,7 @@ do_clip_tri( struct draw_stage *stage, clipmask &= ~(1<<plane_idx); + assert(n < MAX_CLIPPED_VERTICES); inlist[n] = inlist[0]; /* prevent rotation of vertices */ for (i = 1; i <= n; i++) { @@ -270,11 +271,17 @@ do_clip_tri( struct draw_stage *stage, float dp = dot4( vert->clip, plane ); if (!IS_NEGATIVE(dp_prev)) { + assert(outcount < MAX_CLIPPED_VERTICES); outlist[outcount++] = vert_prev; } if (DIFFERENT_SIGNS(dp, dp_prev)) { - struct vertex_header *new_vert = clipper->stage.tmp[tmpnr++]; + struct vertex_header *new_vert; + + assert(tmpnr < MAX_CLIPPED_VERTICES+1); + new_vert = clipper->stage.tmp[tmpnr++]; + + assert(outcount < MAX_CLIPPED_VERTICES); outlist[outcount++] = new_vert; if (IS_NEGATIVE(dp)) { @@ -317,12 +324,14 @@ do_clip_tri( struct draw_stage *stage, if (clipper->flat) { if (stage->draw->rasterizer->flatshade_first) { if (inlist[0] != header->v[0]) { + assert(tmpnr < MAX_CLIPPED_VERTICES + 1); inlist[0] = dup_vert(stage, inlist[0], tmpnr++); copy_colors(stage, inlist[0], header->v[0]); } } else { if (inlist[0] != header->v[2]) { + assert(tmpnr < MAX_CLIPPED_VERTICES + 1); inlist[0] = dup_vert(stage, inlist[0], tmpnr++); copy_colors(stage, inlist[0], header->v[2]); } diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index fff960c7eb..ed9a53e154 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -363,8 +363,12 @@ generate_pstip_fs(struct pstip_stage *pstip) assert(pstip->fs->sampler_unit < PIPE_MAX_SAMPLERS); pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs); - + FREE((void *)pstip_fs.tokens); + + if (!pstip->fs->pstip_fs) + return FALSE; + return TRUE; } @@ -603,13 +607,16 @@ pstip_destroy(struct draw_stage *stage) } +/** Create a new polygon stipple drawing stage object */ static struct pstip_stage * -draw_pstip_stage(struct draw_context *draw) +draw_pstip_stage(struct draw_context *draw, struct pipe_context *pipe) { struct pstip_stage *pstip = CALLOC_STRUCT(pstip_stage); if (pstip == NULL) goto fail; + pstip->pipe = pipe; + pstip->stage.draw = draw; pstip->stage.name = "pstip"; pstip->stage.next = NULL; @@ -765,14 +772,12 @@ draw_install_pstipple_stage(struct draw_context *draw, /* * Create / install pgon stipple drawing / prim stage */ - pstip = draw_pstip_stage( draw ); + pstip = draw_pstip_stage( draw, pipe ); if (pstip == NULL) goto fail; draw->pipeline.pstipple = &pstip->stage; - pstip->pipe = pipe; - /* create special texture, sampler state */ if (!pstip_create_texture(pstip)) goto fail; diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c index 3e6e538995..ee2945c7c9 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -226,6 +226,7 @@ static void widepoint_first_point( struct draw_stage *stage, if (rast->gl_rasterization_rules) { wide->xbias = 0.125; + wide->ybias = -0.125; } /* Disable triangle culling, stippling, unfilled mode etc. */ diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 4584033bc2..058aeedc17 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -48,6 +48,7 @@ #ifdef HAVE_LLVM #include <llvm-c/ExecutionEngine.h> +struct draw_llvm; #endif @@ -81,6 +82,9 @@ struct vertex_header { #define UNDEFINED_VERTEX_ID 0xffff +/* maximum number of shader variants we can cache */ +#define DRAW_MAX_SHADER_VARIANTS 1024 + /** * Private context for the drawing module. */ @@ -245,6 +249,7 @@ struct draw_context */ float plane[12][4]; unsigned nr_planes; + boolean depth_clamp; /* If a prim stage introduces new vertex attributes, they'll be stored here */ @@ -259,9 +264,15 @@ struct draw_context unsigned instance_id; #ifdef HAVE_LLVM + struct draw_llvm *llvm; LLVMExecutionEngineRef engine; #endif + struct pipe_sampler_view *sampler_views[PIPE_MAX_VERTEX_SAMPLERS]; + unsigned num_sampler_views; + const struct pipe_sampler_state *samplers[PIPE_MAX_VERTEX_SAMPLERS]; + unsigned num_samplers; + void *driver_private; }; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 02c97fec81..92d4113b4c 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -34,9 +34,11 @@ #include "draw/draw_gs.h" #include "draw/draw_private.h" #include "draw/draw_pt.h" +#include "draw/draw_vs.h" #include "tgsi/tgsi_dump.h" #include "util/u_math.h" #include "util/u_prim.h" +#include "util/u_format.h" DEBUG_GET_ONCE_BOOL_OPTION(draw_fse, "DRAW_FSE", FALSE) @@ -69,7 +71,6 @@ draw_pt_arrays(struct draw_context *draw, struct draw_pt_front_end *frontend = NULL; struct draw_pt_middle_end *middle = NULL; unsigned opt = 0; - unsigned out_prim = prim; /* Sanitize primitive length: */ @@ -80,18 +81,19 @@ draw_pt_arrays(struct draw_context *draw, if (count < first) return TRUE; } - if (draw->gs.geometry_shader) { - out_prim = draw->gs.geometry_shader->output_primitive; - } if (!draw->force_passthrough) { + unsigned gs_out_prim = (draw->gs.geometry_shader ? + draw->gs.geometry_shader->output_primitive : + prim); + if (!draw->render) { opt |= PT_PIPELINE; } if (draw_need_pipeline(draw, draw->rasterizer, - out_prim)) { + gs_out_prim)) { opt |= PT_PIPELINE; } @@ -102,7 +104,7 @@ draw_pt_arrays(struct draw_context *draw, opt |= PT_SHADE; } - if (draw->pt.middle.llvm && !draw->gs.geometry_shader) { + if (draw->pt.middle.llvm) { middle = draw->pt.middle.llvm; } else { if (opt == 0) @@ -122,7 +124,7 @@ draw_pt_arrays(struct draw_context *draw, frontend = draw->pt.front.varray; } - frontend->prepare( frontend, prim, out_prim, middle, opt ); + frontend->prepare( frontend, prim, middle, opt ); frontend->run(frontend, draw_pt_elt_func(draw), @@ -265,31 +267,38 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count) case PIPE_FORMAT_R32_FLOAT: { float *v = (float *) ptr; - debug_printf("%f @ %p\n", v[0], (void *) v); + debug_printf("R %f @ %p\n", v[0], (void *) v); } break; case PIPE_FORMAT_R32G32_FLOAT: { float *v = (float *) ptr; - debug_printf("%f %f @ %p\n", v[0], v[1], (void *) v); + debug_printf("RG %f %f @ %p\n", v[0], v[1], (void *) v); } break; case PIPE_FORMAT_R32G32B32_FLOAT: { float *v = (float *) ptr; - debug_printf("%f %f %f @ %p\n", v[0], v[1], v[2], (void *) v); + debug_printf("RGB %f %f %f @ %p\n", v[0], v[1], v[2], (void *) v); } break; case PIPE_FORMAT_R32G32B32A32_FLOAT: { float *v = (float *) ptr; - debug_printf("%f %f %f %f @ %p\n", v[0], v[1], v[2], v[3], + debug_printf("RGBA %f %f %f %f @ %p\n", v[0], v[1], v[2], v[3], (void *) v); } break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + { + ubyte *u = (ubyte *) ptr; + debug_printf("BGRA %d %d %d %d @ %p\n", u[0], u[1], u[2], u[3], + (void *) u); + } + break; default: - debug_printf("other format (fix me)\n"); - ; + debug_printf("other format %s (fix me)\n", + util_format_name(draw->pt.vertex_element[j].src_format)); } } } @@ -297,11 +306,8 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count) /** - * Draw vertex arrays - * This is the main entrypoint into the drawing module. - * \param prim one of PIPE_PRIM_x - * \param start index of first vertex to draw - * \param count number of vertices to draw + * Non-instanced drawing. + * \sa draw_arrays_instanced */ void draw_arrays(struct draw_context *draw, unsigned prim, @@ -310,6 +316,20 @@ draw_arrays(struct draw_context *draw, unsigned prim, draw_arrays_instanced(draw, prim, start, count, 0, 1); } + +/** + * Draw vertex arrays. + * This is the main entrypoint into the drawing module. + * If drawing an indexed primitive, the draw_set_mapped_element_buffer_range() + * function should have already been called to specify the element/index buffer + * information. + * + * \param prim one of PIPE_PRIM_x + * \param start index of first vertex to draw + * \param count number of vertices to draw + * \param startInstance number for the first primitive instance (usually 0). + * \param instanceCount number of instances to draw (1=non-instanced) + */ void draw_arrays_instanced(struct draw_context *draw, unsigned mode, @@ -329,26 +349,30 @@ draw_arrays_instanced(struct draw_context *draw, if (0) draw_print_arrays(draw, mode, start, MIN2(count, 20)); -#if 0 - { - int i; + if (0) { + unsigned int i; debug_printf("draw_arrays(mode=%u start=%u count=%u):\n", mode, start, count); tgsi_dump(draw->vs.vertex_shader->state.tokens, 0); debug_printf("Elements:\n"); for (i = 0; i < draw->pt.nr_vertex_elements; i++) { - debug_printf(" format=%s\n", + debug_printf(" %u: src_offset=%u inst_div=%u vbuf=%u format=%s\n", + i, + draw->pt.vertex_element[i].src_offset, + draw->pt.vertex_element[i].instance_divisor, + draw->pt.vertex_element[i].vertex_buffer_index, util_format_name(draw->pt.vertex_element[i].src_format)); } debug_printf("Buffers:\n"); for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { - debug_printf(" stride=%u offset=%u ptr=%p\n", + debug_printf(" %u: stride=%u maxindex=%u offset=%u ptr=%p\n", + i, draw->pt.vertex_buffer[i].stride, + draw->pt.vertex_buffer[i].max_index, draw->pt.vertex_buffer[i].buffer_offset, draw->pt.user.vbuffer[i]); } } -#endif for (instance = 0; instance < instanceCount; instance++) { draw->instance_id = instance + startInstance; diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index b6741ca83c..44356fba4c 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -62,8 +62,7 @@ struct draw_vertex_info; */ struct draw_pt_front_end { void (*prepare)( struct draw_pt_front_end *, - unsigned input_prim, - unsigned output_prim, + unsigned prim, struct draw_pt_middle_end *, unsigned opt ); @@ -87,8 +86,7 @@ struct draw_pt_front_end { */ struct draw_pt_middle_end { void (*prepare)( struct draw_pt_middle_end *, - unsigned input_prim, - unsigned output_prim, + unsigned prim, unsigned opt, unsigned *max_vertices ); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index bf799db352..ae12ee24bd 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -68,31 +68,12 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, fetch->vertex_size = vertex_size; - /* Always emit/leave space for a vertex header. - * - * It's worth considering whether the vertex headers should contain - * a pointer to the 'data', rather than having it inline. - * Something to look at after we've fully switched over to the pt - * paths. + /* Leave the clipmask/edgeflags/pad/vertex_id untouched */ - { - /* Need to set header->vertex_id = 0xffff somehow. - */ - key.element[nr].type = TRANSLATE_ELEMENT_NORMAL; - key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT; - key.element[nr].input_buffer = draw->pt.nr_vertex_buffers; - key.element[nr].input_offset = 0; - key.element[nr].instance_divisor = 0; - key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT; - key.element[nr].output_offset = dst_offset; - dst_offset += 1 * sizeof(float); - nr++; - - - /* Just leave the clip[] array untouched. - */ - dst_offset += 4 * sizeof(float); - } + dst_offset += 1 * sizeof(float); + /* Just leave the clip[] array untouched. + */ + dst_offset += 4 * sizeof(float); if (instance_id_index != ~0) { num_extra_inputs++; @@ -131,26 +112,11 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, key.nr_elements = nr; key.output_stride = vertex_size; - if (!fetch->translate || translate_key_compare(&fetch->translate->key, &key) != 0) { translate_key_sanitize(&key); fetch->translate = translate_cache_find(fetch->cache, &key); - - { - static struct vertex_header vh = { 0, - 1, - 0, - UNDEFINED_VERTEX_ID, - { .0f, .0f, .0f, .0f } }; - - fetch->translate->set_buffer(fetch->translate, - draw->pt.nr_vertex_buffers, - &vh, - 0, - ~0); - } } } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index c629d55563..5c8af17c8e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -36,6 +36,7 @@ #include "draw/draw_vbuf.h" #include "draw/draw_vertex.h" #include "draw/draw_pt.h" +#include "draw/draw_gs.h" #include "translate/translate.h" #include "translate/translate_cache.h" @@ -90,7 +91,6 @@ struct fetch_emit_middle_end { static void fetch_emit_prepare( struct draw_pt_middle_end *middle, unsigned prim, - unsigned out_prim, unsigned opt, unsigned *max_vertices ) { @@ -101,9 +101,14 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, boolean ok; struct translate_key key; + unsigned gs_out_prim = (draw->gs.geometry_shader ? + draw->gs.geometry_shader->output_primitive : + prim); + + ok = draw->render->set_primitive( draw->render, - out_prim ); + gs_out_prim ); if (!ok) { assert(0); return; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 5483a25f1d..b8270280b6 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -68,8 +68,7 @@ struct fetch_shade_emit { static void fse_prepare( struct draw_pt_middle_end *middle, - unsigned in_prim, - unsigned out_prim, + unsigned prim, unsigned opt, unsigned *max_vertices ) { @@ -80,9 +79,12 @@ static void fse_prepare( struct draw_pt_middle_end *middle, unsigned i; unsigned nr_vbs = 0; + /* Can't support geometry shader on this path. + */ + assert(!draw->gs.geometry_shader); if (!draw->render->set_primitive( draw->render, - out_prim )) { + prim )) { assert(0); return; } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 43b08a030c..121dfc414a 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -48,13 +48,11 @@ struct fetch_pipeline_middle_end { unsigned vertex_data_offset; unsigned vertex_size; unsigned input_prim; - unsigned output_prim; unsigned opt; }; static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, - unsigned in_prim, - unsigned out_prim, + unsigned prim, unsigned opt, unsigned *max_vertices ) { @@ -64,6 +62,10 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, unsigned i; unsigned instance_id_index = ~0; + unsigned gs_out_prim = (draw->gs.geometry_shader ? + draw->gs.geometry_shader->output_primitive : + prim); + /* Add one to num_outputs because the pipeline occasionally tags on * an additional texcoord, eg for AA lines. */ @@ -79,8 +81,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, } } - fpme->input_prim = in_prim; - fpme->output_prim = out_prim; + fpme->input_prim = prim; fpme->opt = opt; /* Always leave room for the vertex header whether we need it or @@ -102,13 +103,13 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, (boolean)draw->bypass_clipping, (boolean)draw->identity_viewport, (boolean)draw->rasterizer->gl_rasterization_rules, - (draw->vs.edgeflag_output ? true : false) ); + (draw->vs.edgeflag_output ? TRUE : FALSE) ); draw_pt_so_emit_prepare( fpme->so_emit ); if (!(opt & PT_PIPELINE)) { draw_pt_emit_prepare( fpme->emit, - out_prim, + gs_out_prim, max_vertices ); *max_vertices = MAX2( *max_vertices, @@ -183,7 +184,7 @@ static void draw_vertex_shader_run(struct draw_vertex_shader *vshader, output_verts->count = input_verts->count; output_verts->verts = (struct vertex_header *)MALLOC(output_verts->vertex_size * - output_verts->count); + align(output_verts->count, 4)); vshader->run_linear(vshader, (const float (*)[4])input_verts->verts->data, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index 7d2de58e73..bc074df8c2 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -49,48 +49,50 @@ struct llvm_middle_end { unsigned vertex_data_offset; unsigned vertex_size; unsigned input_prim; - unsigned output_prim; unsigned opt; struct draw_llvm *llvm; - struct draw_llvm_variant *variants; struct draw_llvm_variant *current_variant; - int nr_variants; }; static void llvm_middle_end_prepare( struct draw_pt_middle_end *middle, unsigned in_prim, - unsigned out_prim, unsigned opt, unsigned *max_vertices ) { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; struct draw_context *draw = fpme->draw; - struct draw_vertex_shader *vs = draw->vs.vertex_shader; + struct llvm_vertex_shader *shader = + llvm_vertex_shader(draw->vs.vertex_shader); struct draw_llvm_variant_key key; struct draw_llvm_variant *variant = NULL; + struct draw_llvm_variant_list_item *li; unsigned i; unsigned instance_id_index = ~0; + + unsigned out_prim = (draw->gs.geometry_shader ? + draw->gs.geometry_shader->output_primitive : + in_prim); + /* Add one to num_outputs because the pipeline occasionally tags on * an additional texcoord, eg for AA lines. */ - unsigned nr = MAX2( vs->info.num_inputs, - vs->info.num_outputs + 1 ); + unsigned nr = MAX2( shader->base.info.num_inputs, + shader->base.info.num_outputs + 1 ); /* Scan for instanceID system value. */ - for (i = 0; i < vs->info.num_inputs; i++) { - if (vs->info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) { + for (i = 0; i < shader->base.info.num_inputs; i++) { + if (shader->base.info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) { instance_id_index = i; break; } } fpme->input_prim = in_prim; - fpme->output_prim = out_prim; fpme->opt = opt; /* Always leave room for the vertex header whether we need it or @@ -107,7 +109,7 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, (boolean)draw->bypass_clipping, (boolean)(draw->identity_viewport), (boolean)draw->rasterizer->gl_rasterization_rules, - (draw->vs.edgeflag_output ? true : false) ); + (draw->vs.edgeflag_output ? TRUE : FALSE) ); draw_pt_so_emit_prepare( fpme->so_emit ); @@ -128,20 +130,41 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, draw_llvm_make_variant_key(fpme->llvm, &key); - variant = fpme->variants; - while(variant) { - if(memcmp(&variant->key, &key, sizeof key) == 0) + li = first_elem(&shader->variants); + while(!at_end(&shader->variants, li)) { + if(memcmp(&li->base->key, &key, sizeof key) == 0) { + variant = li->base; break; + } + li = next_elem(li); + } - variant = variant->next; + if (variant) { + move_to_head(&fpme->llvm->vs_variants_list, &variant->list_item_global); } + else { + unsigned i; + if (fpme->llvm->nr_variants >= DRAW_MAX_SHADER_VARIANTS) { + /* + * XXX: should we flush here ? + */ + for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 4; i++) { + struct draw_llvm_variant_list_item *item = + last_elem(&fpme->llvm->vs_variants_list); + draw_llvm_destroy_variant(item->base); + } + } + + variant = draw_llvm_create_variant(fpme->llvm, nr); - if (!variant) { - variant = draw_llvm_prepare(fpme->llvm, nr); - variant->next = fpme->variants; - fpme->variants = variant; - ++fpme->nr_variants; + if (variant) { + insert_at_head(&shader->variants, &variant->list_item_local); + insert_at_head(&fpme->llvm->vs_variants_list, &variant->list_item_global); + fpme->llvm->nr_variants++; + shader->variants_cached++; + } } + fpme->current_variant = variant; /*XXX we only support one constant buffer */ @@ -210,7 +233,8 @@ llvm_pipeline_generic( struct draw_pt_middle_end *middle, fetch_info->start, fetch_info->count, fpme->vertex_size, - draw->pt.vertex_buffer ); + draw->pt.vertex_buffer, + draw->instance_id); else fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context, llvm_vert_info.verts, @@ -218,7 +242,8 @@ llvm_pipeline_generic( struct draw_pt_middle_end *middle, fetch_info->elts, fetch_info->count, fpme->vertex_size, - draw->pt.vertex_buffer); + draw->pt.vertex_buffer, + draw->instance_id); /* Finished with fetch and vs: */ @@ -356,31 +381,7 @@ static void llvm_middle_end_finish( struct draw_pt_middle_end *middle ) static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle ) { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; - struct draw_context *draw = fpme->draw; - struct draw_llvm_variant *variant = NULL; - - variant = fpme->variants; - while(variant) { - struct draw_llvm_variant *next = variant->next; - if (variant->function_elts) { - if (variant->function_elts) - LLVMFreeMachineCodeForFunction(draw->engine, - variant->function_elts); - LLVMDeleteFunction(variant->function_elts); - } - - if (variant->function) { - if (variant->function) - LLVMFreeMachineCodeForFunction(draw->engine, - variant->function); - LLVMDeleteFunction(variant->function); - } - - FREE(variant); - - variant = next; - } if (fpme->fetch) draw_pt_fetch_destroy( fpme->fetch ); @@ -393,14 +394,12 @@ static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle ) if (fpme->post_vs) draw_pt_post_vs_destroy( fpme->post_vs ); - if (fpme->llvm) - draw_llvm_destroy( fpme->llvm ); - FREE(middle); } -struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm( struct draw_context *draw ) +struct draw_pt_middle_end * +draw_pt_fetch_pipeline_or_emit_llvm(struct draw_context *draw) { struct llvm_middle_end *fpme = 0; @@ -436,13 +435,11 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm( struct draw_cont if (!fpme->so_emit) goto fail; - fpme->llvm = draw_llvm_create(draw); + fpme->llvm = draw->llvm; if (!fpme->llvm) goto fail; - fpme->variants = NULL; fpme->current_variant = NULL; - fpme->nr_variants = 0; return &fpme->base; diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index 112be50f9a..308f927b77 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -38,7 +38,14 @@ struct pt_post_vs { struct draw_vertex_info *info ); }; - +static INLINE void +initialize_vertex_header(struct vertex_header *header) +{ + header->clipmask = 0; + header->edgeflag = 1; + header->pad = 0; + header->vertex_id = UNDEFINED_VERTEX_ID; +} static INLINE float dot4(const float *a, const float *b) @@ -49,10 +56,9 @@ dot4(const float *a, const float *b) a[3]*b[3]); } - - static INLINE unsigned -compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr) +compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr, + boolean clip_depth) { unsigned mask = 0x0; unsigned i; @@ -69,8 +75,10 @@ compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr) if ( clip[0] + clip[3] < 0) mask |= (1<<1); if (-clip[1] + clip[3] < 0) mask |= (1<<2); if ( clip[1] + clip[3] < 0) mask |= (1<<3); - if ( clip[2] + clip[3] < 0) mask |= (1<<4); /* match mesa clipplane numbering - for now */ - if (-clip[2] + clip[3] < 0) mask |= (1<<5); /* match mesa clipplane numbering - for now */ + if (clip_depth) { + if ( clip[2] + clip[3] < 0) mask |= (1<<4); /* match mesa clipplane numbering - for now */ + if (-clip[2] + clip[3] < 0) mask |= (1<<5); /* match mesa clipplane numbering - for now */ + } /* Followed by any remaining ones: */ @@ -103,6 +111,7 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, for (j = 0; j < info->count; j++) { float *position = out->data[pos]; + initialize_vertex_header(out); #if 0 debug_printf("%d) io = %p, data = %p = [%f, %f, %f, %f]\n", j, out, position, position[0], position[1], position[2], position[3]); @@ -114,9 +123,11 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, out->clip[3] = position[3]; out->vertex_id = 0xffff; + /* Disable depth clipping if depth clamping is enabled. */ out->clipmask = compute_clipmask_gl(out->clip, pvs->draw->plane, - pvs->draw->nr_planes); + pvs->draw->nr_planes, + !pvs->draw->depth_clamp); clipped += out->clipmask; if (out->clipmask == 0) @@ -192,6 +203,7 @@ static boolean post_vs_viewport( struct pt_post_vs *pvs, for (j = 0; j < info->count; j++) { float *position = out->data[pos]; + initialize_vertex_header(out); /* Viewport mapping only, no cliptest/rhw divide */ position[0] = position[0] * scale[0] + trans[0]; @@ -211,7 +223,16 @@ static boolean post_vs_viewport( struct pt_post_vs *pvs, static boolean post_vs_none( struct pt_post_vs *pvs, struct draw_vertex_info *info ) { + struct vertex_header *out = info->verts; + unsigned j; + if (0) debug_printf("%s\n", __FUNCTION__); + /* just initialize the vertex_id in all headers */ + for (j = 0; j < info->count; j++) { + initialize_vertex_header(out); + + out = (struct vertex_header *)((char *)out + info->stride); + } return FALSE; } diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index 5ea833032f..cd7bb7bf25 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -120,24 +120,27 @@ static void varray_fan_segment(struct varray_frontend *varray, #define FUNC varray_run #include "draw_pt_varray_tmp_linear.h" -static unsigned decompose_prim[PIPE_PRIM_POLYGON + 1] = { +static unsigned decompose_prim[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY + 1] = { PIPE_PRIM_POINTS, PIPE_PRIM_LINES, PIPE_PRIM_LINE_STRIP, /* decomposed LINELOOP */ PIPE_PRIM_LINE_STRIP, PIPE_PRIM_TRIANGLES, PIPE_PRIM_TRIANGLE_STRIP, - PIPE_PRIM_TRIANGLE_FAN, + PIPE_PRIM_TRIANGLE_FAN, PIPE_PRIM_QUADS, PIPE_PRIM_QUAD_STRIP, - PIPE_PRIM_POLYGON + PIPE_PRIM_POLYGON, + PIPE_PRIM_LINES_ADJACENCY, + PIPE_PRIM_LINE_STRIP_ADJACENCY, + PIPE_PRIM_TRIANGLES_ADJACENCY, + PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY }; static void varray_prepare(struct draw_pt_front_end *frontend, unsigned in_prim, - unsigned out_prim, struct draw_pt_middle_end *middle, unsigned opt) { @@ -146,11 +149,13 @@ static void varray_prepare(struct draw_pt_front_end *frontend, varray->base.run = varray_run; varray->input_prim = in_prim; - varray->output_prim = decompose_prim[out_prim]; + assert(in_prim < Elements(decompose_prim)); + varray->output_prim = decompose_prim[in_prim]; varray->middle = middle; - middle->prepare(middle, varray->input_prim, - varray->output_prim, opt, &varray->driver_fetch_max ); + middle->prepare(middle, + varray->output_prim, + opt, &varray->driver_fetch_max ); /* check that the max is even */ assert((varray->driver_fetch_max & 1) == 0); diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 914c87a9dc..8ef94c3163 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -41,6 +41,7 @@ #define FETCH_MAX 256 #define DRAW_MAX (16*1024) + struct vcache_frontend { struct draw_pt_front_end base; struct draw_context *draw; @@ -64,13 +65,13 @@ struct vcache_frontend { unsigned opt; }; + static INLINE void vcache_flush( struct vcache_frontend *vcache ) { if (vcache->middle_prim != vcache->output_prim) { vcache->middle_prim = vcache->output_prim; vcache->middle->prepare( vcache->middle, - vcache->input_prim, vcache->middle_prim, vcache->opt, &vcache->fetch_max ); @@ -89,12 +90,12 @@ vcache_flush( struct vcache_frontend *vcache ) vcache->draw_count = 0; } + static INLINE void vcache_check_flush( struct vcache_frontend *vcache ) { - if ( vcache->draw_count + 6 >= DRAW_MAX || - vcache->fetch_count + 4 >= FETCH_MAX ) - { + if (vcache->draw_count + 6 >= DRAW_MAX || + vcache->fetch_count + 4 >= FETCH_MAX) { vcache_flush( vcache ); } } @@ -146,6 +147,7 @@ vcache_triangle_flags( struct vcache_frontend *vcache, vcache_check_flush(vcache); } + static INLINE void vcache_line( struct vcache_frontend *vcache, unsigned i0, @@ -177,6 +179,7 @@ vcache_point( struct vcache_frontend *vcache, vcache_check_flush(vcache); } + static INLINE void vcache_quad( struct vcache_frontend *vcache, unsigned i0, @@ -196,6 +199,7 @@ vcache_quad( struct vcache_frontend *vcache, } } + static INLINE void vcache_ef_quad( struct vcache_frontend *vcache, unsigned i0, @@ -231,6 +235,7 @@ vcache_ef_quad( struct vcache_frontend *vcache, } } + /* At least for now, we're back to using a template include file for * this. The two paths aren't too different though - it may be * possible to reunify them. @@ -256,23 +261,23 @@ rebase_uint_elts( const unsigned *src, ushort *dest ) { unsigned i; - for (i = 0; i < count; i++) dest[i] = (ushort)(src[i] + delta); } + static INLINE void rebase_ushort_elts( const ushort *src, unsigned count, int delta, - ushort *dest ) + ushort *dest ) { unsigned i; - for (i = 0; i < count; i++) dest[i] = (ushort)(src[i] + delta); } + static INLINE void rebase_ubyte_elts( const ubyte *src, unsigned count, @@ -280,42 +285,39 @@ rebase_ubyte_elts( const ubyte *src, ushort *dest ) { unsigned i; - for (i = 0; i < count; i++) dest[i] = (ushort)(src[i] + delta); } - static INLINE void translate_uint_elts( const unsigned *src, unsigned count, ushort *dest ) { unsigned i; - for (i = 0; i < count; i++) dest[i] = (ushort)(src[i]); } + static INLINE void translate_ushort_elts( const ushort *src, unsigned count, ushort *dest ) { unsigned i; - for (i = 0; i < count; i++) dest[i] = (ushort)(src[i]); } + static INLINE void translate_ubyte_elts( const ubyte *src, unsigned count, ushort *dest ) { unsigned i; - for (i = 0; i < count; i++) dest[i] = (ushort)(src[i]); } @@ -336,6 +338,7 @@ format_from_get_elt( pt_elt_func get_elt ) } #endif + static INLINE void vcache_check_run( struct draw_pt_front_end *frontend, pt_elt_func get_elt, @@ -345,18 +348,46 @@ vcache_check_run( struct draw_pt_front_end *frontend, { struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; struct draw_context *draw = vcache->draw; - unsigned min_index = draw->pt.user.min_index; - unsigned max_index = draw->pt.user.max_index; - unsigned index_size = draw->pt.user.eltSize; - unsigned fetch_count = max_index + 1 - min_index; + const unsigned min_index = draw->pt.user.min_index; + const unsigned max_index = draw->pt.user.max_index; + const unsigned index_size = draw->pt.user.eltSize; + unsigned fetch_count; const ushort *transformed_elts; ushort *storage = NULL; boolean ok = FALSE; + /* debug: verify indexes are in range [min_index, max_index] */ + if (0) { + unsigned i; + for (i = 0; i < draw_count; i++) { + if (index_size == 1) { + assert( ((const ubyte *) elts)[i] >= min_index); + assert( ((const ubyte *) elts)[i] <= max_index); + } + else if (index_size == 2) { + assert( ((const ushort *) elts)[i] >= min_index); + assert( ((const ushort *) elts)[i] <= max_index); + } + else { + assert(index_size == 4); + assert( ((const uint *) elts)[i] >= min_index); + assert( ((const uint *) elts)[i] <= max_index); + } + } + } + + /* Note: max_index is frequently 0xffffffff so we have to be sure + * that any arithmetic involving max_index doesn't overflow! + */ + if (max_index >= (unsigned) DRAW_PIPE_MAX_VERTICES) + goto fail; - if (0) debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count, - vcache->fetch_max, - draw_count); + fetch_count = max_index + 1 - min_index; + + if (0) + debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count, + vcache->fetch_max, + draw_count); if (elt_bias + max_index >= DRAW_PIPE_MAX_VERTICES || fetch_count >= UNDEFINED_VERTEX_ID || @@ -368,23 +399,19 @@ vcache_check_run( struct draw_pt_front_end *frontend, if (vcache->middle_prim != vcache->input_prim) { vcache->middle_prim = vcache->input_prim; vcache->middle->prepare( vcache->middle, - vcache->input_prim, vcache->middle_prim, vcache->opt, &vcache->fetch_max ); } - assert((elt_bias >= 0 && min_index + elt_bias >= min_index) || (elt_bias < 0 && min_index + elt_bias < min_index)); if (min_index == 0 && - index_size == 2) - { + index_size == 2) { transformed_elts = (const ushort *)elts; } - else - { + else { storage = MALLOC( draw_count * sizeof(ushort) ); if (!storage) goto fail; @@ -419,23 +446,23 @@ vcache_check_run( struct draw_pt_front_end *frontend, switch(index_size) { case 1: rebase_ubyte_elts( (const ubyte *)elts, - draw_count, - 0 - (int)min_index, - storage ); + draw_count, + 0 - (int)min_index, + storage ); break; case 2: rebase_ushort_elts( (const ushort *)elts, - draw_count, - 0 - (int)min_index, - storage ); + draw_count, + 0 - (int)min_index, + storage ); break; case 4: rebase_uint_elts( (const uint *)elts, - draw_count, - 0 - (int)min_index, - storage ); + draw_count, + 0 - (int)min_index, + storage ); break; default: @@ -462,7 +489,7 @@ vcache_check_run( struct draw_pt_front_end *frontend, debug_printf("failed to execute atomic draw elts for %d/%d, splitting up\n", fetch_count, draw_count); - fail: +fail: vcache_run( frontend, get_elt, elts, elt_bias, draw_count ); } @@ -472,23 +499,26 @@ vcache_check_run( struct draw_pt_front_end *frontend, static void vcache_prepare( struct draw_pt_front_end *frontend, unsigned in_prim, - unsigned out_prim, struct draw_pt_middle_end *middle, unsigned opt ) { struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; - if (opt & PT_PIPELINE) - { + if (opt & PT_PIPELINE) { vcache->base.run = vcache_run_extras; } - else - { + else { vcache->base.run = vcache_check_run; } + /* VCache will always emit the reduced version of its input + * primitive, ie STRIP/FANS become TRIS, etc. + * + * This is not to be confused with what the GS might be up to, + * which is a separate issue. + */ vcache->input_prim = in_prim; - vcache->output_prim = u_reduced_prim(out_prim); + vcache->output_prim = u_reduced_prim(in_prim); vcache->middle = middle; vcache->opt = opt; @@ -496,12 +526,13 @@ vcache_prepare( struct draw_pt_front_end *frontend, /* Have to run prepare here, but try and guess a good prim for * doing so: */ - vcache->middle_prim = (opt & PT_PIPELINE) ? vcache->output_prim : vcache->input_prim; - middle->prepare( middle, vcache->input_prim, - vcache->middle_prim, opt, &vcache->fetch_max ); -} - + vcache->middle_prim = (opt & PT_PIPELINE) + ? vcache->output_prim : vcache->input_prim; + middle->prepare( middle, + vcache->middle_prim, + opt, &vcache->fetch_max ); +} static void @@ -512,6 +543,7 @@ vcache_finish( struct draw_pt_front_end *frontend ) vcache->middle = NULL; } + static void vcache_destroy( struct draw_pt_front_end *frontend ) { diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index b9db886a24..57ea63fc06 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -98,6 +98,11 @@ draw_create_vertex_shader(struct draw_context *draw, vs = draw_create_vs_ppc( draw, shader ); #endif } +#if HAVE_LLVM + else { + vs = draw_create_vs_llvm(draw, shader); + } +#endif if (!vs) { vs = draw_create_vs_exec( draw, shader ); diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 6c7e94db43..a731994523 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -165,7 +165,6 @@ draw_create_vs_ppc(struct draw_context *draw, const struct pipe_shader_state *templ); - struct draw_vs_varient_key; struct draw_vertex_shader; @@ -173,6 +172,11 @@ struct draw_vs_varient * draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs, const struct draw_vs_varient_key *key ); +#if HAVE_LLVM +struct draw_vertex_shader * +draw_create_vs_llvm(struct draw_context *draw, + const struct pipe_shader_state *state); +#endif /******************************************************************************** diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c new file mode 100644 index 0000000000..6c13df7913 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -0,0 +1,120 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" + +#include "draw_private.h" +#include "draw_context.h" +#include "draw_vs.h" +#include "draw_llvm.h" + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_scan.h" + +static void +vs_llvm_prepare(struct draw_vertex_shader *shader, + struct draw_context *draw) +{ + /*struct llvm_vertex_shader *evs = llvm_vertex_shader(shader);*/ +} + +static void +vs_llvm_run_linear( struct draw_vertex_shader *shader, + const float (*input)[4], + float (*output)[4], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + unsigned count, + unsigned input_stride, + unsigned output_stride ) +{ + /* we should never get here since the entire pipeline is + * generated in draw_pt_fetch_shade_pipeline_llvm.c */ + debug_assert(0); +} + + +static void +vs_llvm_delete( struct draw_vertex_shader *dvs ) +{ + struct llvm_vertex_shader *shader = llvm_vertex_shader(dvs); + struct pipe_fence_handle *fence = NULL; + struct draw_llvm_variant_list_item *li; + struct pipe_context *pipe = dvs->draw->pipe; + + /* + * XXX: This might be not neccessary at all. + */ + pipe->flush(pipe, 0, &fence); + if (fence) { + pipe->screen->fence_finish(pipe->screen, fence, 0); + pipe->screen->fence_reference(pipe->screen, &fence, NULL); + } + + + li = first_elem(&shader->variants); + while(!at_end(&shader->variants, li)) { + struct draw_llvm_variant_list_item *next = next_elem(li); + draw_llvm_destroy_variant(li->base); + li = next; + } + + assert(shader->variants_cached == 0); + FREE((void*) dvs->state.tokens); + FREE( dvs ); +} + + +struct draw_vertex_shader * +draw_create_vs_llvm(struct draw_context *draw, + const struct pipe_shader_state *state) +{ + struct llvm_vertex_shader *vs = CALLOC_STRUCT( llvm_vertex_shader ); + + if (vs == NULL) + return NULL; + + /* we make a private copy of the tokens */ + vs->base.state.tokens = tgsi_dup_tokens(state->tokens); + if (!vs->base.state.tokens) { + FREE(vs); + return NULL; + } + + tgsi_scan_shader(state->tokens, &vs->base.info); + + vs->base.draw = draw; + vs->base.prepare = vs_llvm_prepare; + vs->base.run_linear = vs_llvm_run_linear; + vs->base.delete = vs_llvm_delete; + vs->base.create_varient = draw_vs_create_varient_generic; + + make_empty_list(&vs->variants); + + return &vs->base; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index d926b2de18..f5f2623e46 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -56,7 +56,6 @@ #include "lp_bld_intr.h" #include "lp_bld_logic.h" #include "lp_bld_pack.h" -#include "lp_bld_debug.h" #include "lp_bld_arit.h" @@ -847,6 +846,11 @@ lp_build_round_sse41(struct lp_build_context *bld, } +/** + * Return the integer part of a float (vector) value. The returned value is + * a float (vector). + * Ex: trunc(-1.5) = 1.0 + */ LLVMValueRef lp_build_trunc(struct lp_build_context *bld, LLVMValueRef a) @@ -869,6 +873,12 @@ lp_build_trunc(struct lp_build_context *bld, } +/** + * Return float (vector) rounded to nearest integer (vector). The returned + * value is a float (vector). + * Ex: round(0.9) = 1.0 + * Ex: round(-1.5) = -2.0 + */ LLVMValueRef lp_build_round(struct lp_build_context *bld, LLVMValueRef a) @@ -890,6 +900,11 @@ lp_build_round(struct lp_build_context *bld, } +/** + * Return floor of float (vector), result is a float (vector) + * Ex: floor(1.1) = 1.0 + * Ex: floor(-1.1) = -2.0 + */ LLVMValueRef lp_build_floor(struct lp_build_context *bld, LLVMValueRef a) @@ -911,6 +926,11 @@ lp_build_floor(struct lp_build_context *bld, } +/** + * Return ceiling of float (vector), returning float (vector). + * Ex: ceil( 1.1) = 2.0 + * Ex: ceil(-1.1) = -1.0 + */ LLVMValueRef lp_build_ceil(struct lp_build_context *bld, LLVMValueRef a) @@ -933,7 +953,7 @@ lp_build_ceil(struct lp_build_context *bld, /** - * Return fractional part of 'a' computed as a - floor(f) + * Return fractional part of 'a' computed as a - floor(a) * Typically used in texture coord arithmetic. */ LLVMValueRef @@ -946,8 +966,9 @@ lp_build_fract(struct lp_build_context *bld, /** - * Convert to integer, through whichever rounding method that's fastest, - * typically truncating toward zero. + * Return the integer part of a float (vector) value. The returned value is + * an integer (vector). + * Ex: itrunc(-1.5) = 1 */ LLVMValueRef lp_build_itrunc(struct lp_build_context *bld, @@ -964,7 +985,10 @@ lp_build_itrunc(struct lp_build_context *bld, /** - * Convert float[] to int[] with round(). + * Return float (vector) rounded to nearest integer (vector). The returned + * value is an integer (vector). + * Ex: iround(0.9) = 1 + * Ex: iround(-1.5) = -2 */ LLVMValueRef lp_build_iround(struct lp_build_context *bld, @@ -1007,7 +1031,9 @@ lp_build_iround(struct lp_build_context *bld, /** - * Convert float[] to int[] with floor(). + * Return floor of float (vector), result is an int (vector) + * Ex: ifloor(1.1) = 1.0 + * Ex: ifloor(-1.1) = -2.0 */ LLVMValueRef lp_build_ifloor(struct lp_build_context *bld, @@ -1034,29 +1060,31 @@ lp_build_ifloor(struct lp_build_context *bld, /* sign = a < 0 ? ~0 : 0 */ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); sign = LLVMBuildAnd(bld->builder, sign, mask, ""); - sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), ""); - lp_build_name(sign, "floor.sign"); + sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "ifloor.sign"); /* offset = -0.99999(9)f */ - offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa)); + offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa)); offset = LLVMConstBitCast(offset, int_vec_type); - /* offset = a < 0 ? -0.99999(9)f : 0.0f */ + /* offset = a < 0 ? offset : 0.0f */ offset = LLVMBuildAnd(bld->builder, offset, sign, ""); - offset = LLVMBuildBitCast(bld->builder, offset, vec_type, ""); - lp_build_name(offset, "floor.offset"); + offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset"); - res = LLVMBuildAdd(bld->builder, a, offset, ""); - lp_build_name(res, "floor.res"); + res = LLVMBuildAdd(bld->builder, a, offset, "ifloor.res"); } - res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); - lp_build_name(res, "floor"); + /* round to nearest (toward zero) */ + res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "ifloor.res"); return res; } +/** + * Return ceiling of float (vector), returning int (vector). + * Ex: iceil( 1.1) = 2 + * Ex: iceil(-1.1) = -1 + */ LLVMValueRef lp_build_iceil(struct lp_build_context *bld, LLVMValueRef a) @@ -1072,12 +1100,31 @@ lp_build_iceil(struct lp_build_context *bld, res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); } else { - /* TODO: mimic lp_build_ifloor() here */ - assert(0); - res = bld->undef; + LLVMTypeRef vec_type = lp_build_vec_type(type); + unsigned mantissa = lp_mantissa(type); + LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef sign; + LLVMValueRef offset; + + /* sign = a < 0 ? 0 : ~0 */ + sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(bld->builder, sign, mask, ""); + sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "iceil.sign"); + sign = LLVMBuildNot(bld->builder, sign, "iceil.not"); + + /* offset = 0.99999(9)f */ + offset = lp_build_const_vec(type, (double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa)); + offset = LLVMConstBitCast(offset, int_vec_type); + + /* offset = a < 0 ? 0.0 : offset */ + offset = LLVMBuildAnd(bld->builder, offset, sign, ""); + offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset"); + + res = LLVMBuildAdd(bld->builder, a, offset, "iceil.res"); } - res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); + /* round to nearest (toward zero) */ + res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "iceil.res"); return res; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.h b/src/gallium/auxiliary/gallivm/lp_bld_const.h index d46b9f882b..7ee8fff140 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_const.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.h @@ -107,4 +107,12 @@ lp_build_const_mask_aos(struct lp_type type, const boolean cond[4]); +static INLINE LLVMValueRef +lp_build_const_int32(int i) +{ + return LLVMConstInt(LLVMInt32Type(), i, 0); +} + + + #endif /* !LP_BLD_CONST_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c index 3f7f2ebde9..77012f1fac 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c @@ -83,6 +83,9 @@ * * Although the result values can be scaled to an arbitrary bit width specified * by dst_width, the actual result type will have the same width. + * + * Ex: src = { float, float, float, float } + * return { i32, i32, i32, i32 } where each value is in [0, 2^dst_width-1]. */ LLVMValueRef lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, @@ -152,6 +155,8 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, /** * Inverse of lp_build_clamped_float_to_unsigned_norm above. + * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1] + * return {float, float, float, float} with values in range [0, 1]. */ LLVMValueRef lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, @@ -219,18 +224,19 @@ lp_build_conv(LLVMBuilderRef builder, unsigned num_tmps; unsigned i; - /* Register width must remain constant */ - assert(src_type.width * src_type.length == dst_type.width * dst_type.length); - /* We must not loose or gain channels. Only precision */ assert(src_type.length * num_srcs == dst_type.length * num_dsts); assert(src_type.length <= LP_MAX_VECTOR_LENGTH); assert(dst_type.length <= LP_MAX_VECTOR_LENGTH); + assert(num_srcs <= LP_MAX_VECTOR_LENGTH); + assert(num_dsts <= LP_MAX_VECTOR_LENGTH); tmp_type = src_type; - for(i = 0; i < num_srcs; ++i) + for(i = 0; i < num_srcs; ++i) { + assert(lp_check_value(src_type, src[i])); tmp[i] = src[i]; + } num_tmps = num_srcs; /* @@ -326,30 +332,25 @@ lp_build_conv(LLVMBuilderRef builder, /* * Truncate or expand bit width + * + * No data conversion should happen here, although the sign bits are + * crucial to avoid bad clamping. */ - assert(!tmp_type.floating || tmp_type.width == dst_type.width); + { + struct lp_type new_type; - if(tmp_type.width > dst_type.width) { - assert(num_dsts == 1); - tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps); - tmp_type.width = dst_type.width; - tmp_type.length = dst_type.length; - num_tmps = 1; - } + new_type = tmp_type; + new_type.sign = dst_type.sign; + new_type.width = dst_type.width; + new_type.length = dst_type.length; + + lp_build_resize(builder, tmp_type, new_type, tmp, num_srcs, tmp, num_dsts); - if(tmp_type.width < dst_type.width) { - assert(num_tmps == 1); - lp_build_unpack(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts); - tmp_type.width = dst_type.width; - tmp_type.length = dst_type.length; + tmp_type = new_type; num_tmps = num_dsts; } - assert(tmp_type.width == dst_type.width); - assert(tmp_type.length == dst_type.length); - assert(num_tmps == num_dsts); - /* * Scale to the widest range */ @@ -406,8 +407,10 @@ lp_build_conv(LLVMBuilderRef builder, } } - for(i = 0; i < num_dsts; ++i) + for(i = 0; i < num_dsts; ++i) { dst[i] = tmp[i]; + assert(lp_check_value(dst_type, dst[i])); + } } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h index 5f5036e7bd..60e22d727a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h @@ -48,9 +48,9 @@ struct lp_build_context; */ LLVMValueRef -lp_build_unpack_rgba_aos(LLVMBuilderRef builder, - const struct util_format_description *desc, - LLVMValueRef packed); +lp_build_format_swizzle_aos(const struct util_format_description *desc, + struct lp_build_context *bld, + LLVMValueRef unswizzled); LLVMValueRef lp_build_pack_rgba_aos(LLVMBuilderRef builder, @@ -60,7 +60,9 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, LLVMValueRef lp_build_fetch_rgba_aos(LLVMBuilderRef builder, const struct util_format_description *format_desc, - LLVMValueRef ptr, + struct lp_type type, + LLVMValueRef base_ptr, + LLVMValueRef offset, LLVMValueRef i, LLVMValueRef j); @@ -72,7 +74,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, void lp_build_format_swizzle_soa(const struct util_format_description *format_desc, struct lp_build_context *bld, - const LLVMValueRef *unswizzled, + const LLVMValueRef unswizzled[4], LLVMValueRef swizzled_out[4]); void @@ -82,6 +84,11 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, LLVMValueRef packed, LLVMValueRef rgba_out[4]); +void +lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, + struct lp_type dst_type, + LLVMValueRef packed, + LLVMValueRef *rgba); void lp_build_fetch_rgba_soa(LLVMBuilderRef builder, @@ -93,5 +100,18 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder, LLVMValueRef j, LLVMValueRef rgba_out[4]); +/* + * YUV + */ + + +LLVMValueRef +lp_build_fetch_subsampled_rgba_aos(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + unsigned n, + LLVMValueRef base_ptr, + LLVMValueRef offset, + LLVMValueRef i, + LLVMValueRef j); #endif /* !LP_BLD_FORMAT_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index 87e3e72a6e..0f01fc1d75 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -38,33 +38,122 @@ #include "util/u_math.h" #include "util/u_string.h" +#include "lp_bld_arit.h" #include "lp_bld_init.h" #include "lp_bld_type.h" #include "lp_bld_flow.h" +#include "lp_bld_const.h" +#include "lp_bld_conv.h" +#include "lp_bld_swizzle.h" +#include "lp_bld_gather.h" #include "lp_bld_format.h" /** + * Basic swizzling. Rearrange the order of the unswizzled array elements + * according to the format description. PIPE_SWIZZLE_ZERO/ONE are supported + * too. + * Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}. + */ +LLVMValueRef +lp_build_format_swizzle_aos(const struct util_format_description *desc, + struct lp_build_context *bld, + LLVMValueRef unswizzled) +{ + unsigned char swizzles[4]; + unsigned chan; + + assert(bld->type.length % 4 == 0); + + for (chan = 0; chan < 4; ++chan) { + enum util_format_swizzle swizzle; + + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + /* + * For ZS formats do RGBA = ZZZ1 + */ + if (chan == 3) { + swizzle = UTIL_FORMAT_SWIZZLE_1; + } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) { + swizzle = UTIL_FORMAT_SWIZZLE_0; + } else { + swizzle = desc->swizzle[0]; + } + } else { + swizzle = desc->swizzle[chan]; + } + swizzles[chan] = swizzle; + } + + return lp_build_swizzle_aos(bld, unswizzled, swizzles); +} + + +/** + * Whether the format matches the vector type, apart of swizzles. + */ +static INLINE boolean +format_matches_type(const struct util_format_description *desc, + struct lp_type type) +{ + enum util_format_type chan_type; + unsigned chan; + + assert(type.length % 4 == 0); + + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || + desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB || + desc->block.width != 1 || + desc->block.height != 1) { + return FALSE; + } + + if (type.floating) { + chan_type = UTIL_FORMAT_TYPE_FLOAT; + } else if (type.fixed) { + chan_type = UTIL_FORMAT_TYPE_FIXED; + } else if (type.sign) { + chan_type = UTIL_FORMAT_TYPE_SIGNED; + } else { + chan_type = UTIL_FORMAT_TYPE_UNSIGNED; + } + + for (chan = 0; chan < desc->nr_channels; ++chan) { + if (desc->channel[chan].size != type.width) { + return FALSE; + } + + if (desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) { + if (desc->channel[chan].type != chan_type || + desc->channel[chan].normalized != type.norm) { + return FALSE; + } + } + } + + return TRUE; +} + + +/** * Unpack a single pixel into its RGBA components. * * @param desc the pixel format for the packed pixel value * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM * - * @return RGBA in a 4 floats vector. + * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector. */ -LLVMValueRef -lp_build_unpack_rgba_aos(LLVMBuilderRef builder, - const struct util_format_description *desc, - LLVMValueRef packed) +static INLINE LLVMValueRef +lp_build_unpack_arith_rgba_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + LLVMValueRef packed) { LLVMValueRef shifted, casted, scaled, masked; LLVMValueRef shifts[4]; LLVMValueRef masks[4]; LLVMValueRef scales[4]; - LLVMValueRef swizzles[4]; - LLVMValueRef aux[4]; + boolean normalized; - int empty_channel; boolean needs_uitofp; unsigned shift; unsigned i; @@ -77,8 +166,7 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, /* Do the intermediate integer computations with 32bit integers since it * matches floating point size */ - if (desc->block.bits < 32) - packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), ""); + assert (LLVMTypeOf(packed) == LLVMInt32Type()); /* Broadcast the packed value to all four channels * before: packed = BGRA @@ -98,7 +186,6 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, /* Initialize vector constants */ normalized = FALSE; needs_uitofp = FALSE; - empty_channel = -1; shift = 0; /* Loop over 4 color components */ @@ -109,7 +196,6 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, shifts[i] = LLVMGetUndef(LLVMInt32Type()); masks[i] = LLVMConstNull(LLVMInt32Type()); scales[i] = LLVMConstNull(LLVMFloatType()); - empty_channel = i; } else { unsigned long long mask = (1ULL << bits) - 1; @@ -158,52 +244,7 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, else scaled = casted; - for (i = 0; i < 4; ++i) - aux[i] = LLVMGetUndef(LLVMFloatType()); - - /* Build swizzles vector to put components into R,G,B,A order */ - for (i = 0; i < 4; ++i) { - enum util_format_swizzle swizzle; - - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { - /* - * For ZS formats do RGBA = ZZZ1 - */ - if (i == 3) { - swizzle = UTIL_FORMAT_SWIZZLE_1; - } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) { - swizzle = UTIL_FORMAT_SWIZZLE_0; - } else { - swizzle = desc->swizzle[0]; - } - } else { - swizzle = desc->swizzle[i]; - } - - switch (swizzle) { - case UTIL_FORMAT_SWIZZLE_X: - case UTIL_FORMAT_SWIZZLE_Y: - case UTIL_FORMAT_SWIZZLE_Z: - case UTIL_FORMAT_SWIZZLE_W: - swizzles[i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0); - break; - case UTIL_FORMAT_SWIZZLE_0: - assert(empty_channel >= 0); - swizzles[i] = LLVMConstInt(LLVMInt32Type(), empty_channel, 0); - break; - case UTIL_FORMAT_SWIZZLE_1: - swizzles[i] = LLVMConstInt(LLVMInt32Type(), 4, 0); - aux[0] = LLVMConstReal(LLVMFloatType(), 1.0); - break; - case UTIL_FORMAT_SWIZZLE_NONE: - swizzles[i] = LLVMGetUndef(LLVMFloatType()); - assert(0); - break; - } - } - - return LLVMBuildShuffleVector(builder, scaled, LLVMConstVector(aux, 4), - LLVMConstVector(swizzles, 4), ""); + return scaled; } @@ -310,22 +351,65 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, } + + /** * Fetch a pixel into a 4 float AoS. * * \param format_desc describes format of the image we're fetching from * \param ptr address of the pixel block (or the texel if uncompressed) * \param i, j the sub-block pixel coordinates. For non-compressed formats - * these will always be (0,). - * \return valueRef with the float[4] RGBA pixel + * these will always be (0, 0). + * \return a 4 element vector with the pixel's RGBA values. */ LLVMValueRef lp_build_fetch_rgba_aos(LLVMBuilderRef builder, const struct util_format_description *format_desc, - LLVMValueRef ptr, + struct lp_type type, + LLVMValueRef base_ptr, + LLVMValueRef offset, LLVMValueRef i, LLVMValueRef j) { + unsigned num_pixels = type.length / 4; + struct lp_build_context bld; + + assert(type.length <= LP_MAX_VECTOR_LENGTH); + assert(type.length % 4 == 0); + + lp_build_context_init(&bld, builder, type); + + /* + * Trivial case + * + * The format matches the type (apart of a swizzle) so no need for + * scaling or converting. + */ + + if (format_matches_type(format_desc, type) && + format_desc->block.bits <= type.width * 4 && + util_is_pot(format_desc->block.bits)) { + LLVMValueRef packed; + + /* + * The format matches the type (apart of a swizzle) so no need for + * scaling or converting. + */ + + packed = lp_build_gather(builder, type.length/4, + format_desc->block.bits, type.width*4, + base_ptr, offset); + + assert(format_desc->block.bits <= type.width * type.length); + + packed = LLVMBuildBitCast(builder, packed, lp_build_vec_type(type), ""); + + return lp_build_format_swizzle_aos(format_desc, &bld, packed); + } + + /* + * Bit arithmetic + */ if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || @@ -337,21 +421,77 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, format_desc->is_bitmask && !format_desc->is_mixed && (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED || - format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) - { - LLVMValueRef packed; + format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) { + + LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; + LLVMValueRef res; + unsigned k; + + /* + * Unpack a pixel at a time into a <4 x float> RGBA vector + */ + + for (k = 0; k < num_pixels; ++k) { + LLVMValueRef packed; + + packed = lp_build_gather_elem(builder, num_pixels, + format_desc->block.bits, 32, + base_ptr, offset, k); - ptr = LLVMBuildBitCast(builder, ptr, - LLVMPointerType(LLVMIntType(format_desc->block.bits), 0) , - ""); + tmps[k] = lp_build_unpack_arith_rgba_aos(builder, format_desc, + packed); + } + + /* + * Type conversion. + * + * TODO: We could avoid floating conversion for integer to + * integer conversions. + */ - packed = LLVMBuildLoad(builder, ptr, "packed"); + lp_build_conv(builder, + lp_float32_vec4_type(), + type, + tmps, num_pixels, &res, 1); - return lp_build_unpack_rgba_aos(builder, format_desc, packed); + return lp_build_format_swizzle_aos(format_desc, &bld, res); } - else if (format_desc->fetch_rgba_float) { + + /* + * YUV / subsampled formats + */ + + if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { + struct lp_type tmp_type; + LLVMValueRef tmp; + + memset(&tmp_type, 0, sizeof tmp_type); + tmp_type.width = 8; + tmp_type.length = num_pixels * 4; + tmp_type.norm = TRUE; + + tmp = lp_build_fetch_subsampled_rgba_aos(builder, + format_desc, + num_pixels, + base_ptr, + offset, + i, j); + + lp_build_conv(builder, + tmp_type, type, + &tmp, 1, &tmp, 1); + + return tmp; + } + + /* + * Fallback to util_format_description::fetch_rgba_8unorm(). + */ + + if (format_desc->fetch_rgba_8unorm && + !type.floating && type.width == 8 && !type.sign && type.norm) { /* - * Fallback to calling util_format_description::fetch_rgba_float. + * Fallback to calling util_format_description::fetch_rgba_8unorm. * * This is definitely not the most efficient way of fetching pixels, as * we miss the opportunity to do vectorization, but this it is a @@ -361,9 +501,113 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); char name[256]; + LLVMTypeRef i8t = LLVMInt8Type(); + LLVMTypeRef pi8t = LLVMPointerType(i8t, 0); + LLVMTypeRef i32t = LLVMInt32Type(); LLVMValueRef function; + LLVMValueRef tmp_ptr; LLVMValueRef tmp; - LLVMValueRef args[4]; + LLVMValueRef res; + unsigned k; + + util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_8unorm", + format_desc->short_name); + + /* + * Declare and bind format_desc->fetch_rgba_8unorm(). + */ + + function = LLVMGetNamedFunction(module, name); + if (!function) { + LLVMTypeRef ret_type; + LLVMTypeRef arg_types[4]; + LLVMTypeRef function_type; + + ret_type = LLVMVoidType(); + arg_types[0] = pi8t; + arg_types[1] = pi8t; + arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8); + function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); + function = LLVMAddFunction(module, name, function_type); + + LLVMSetFunctionCallConv(function, LLVMCCallConv); + LLVMSetLinkage(function, LLVMExternalLinkage); + + assert(LLVMIsDeclaration(function)); + + LLVMAddGlobalMapping(lp_build_engine, function, + func_to_pointer((func_pointer)format_desc->fetch_rgba_8unorm)); + } + + tmp_ptr = lp_build_alloca(builder, i32t, ""); + + res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels)); + + /* + * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result + * in the SoA vectors. + */ + + for (k = 0; k < num_pixels; ++k) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0); + LLVMValueRef args[4]; + + args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, ""); + args[1] = lp_build_gather_elem_ptr(builder, num_pixels, + base_ptr, offset, k); + + if (num_pixels == 1) { + args[2] = i; + args[3] = j; + } + else { + args[2] = LLVMBuildExtractElement(builder, i, index, ""); + args[3] = LLVMBuildExtractElement(builder, j, index, ""); + } + + LLVMBuildCall(builder, function, args, Elements(args), ""); + + tmp = LLVMBuildLoad(builder, tmp_ptr, ""); + + if (num_pixels == 1) { + res = tmp; + } + else { + res = LLVMBuildInsertElement(builder, res, tmp, index, ""); + } + } + + /* Bitcast from <n x i32> to <4n x i8> */ + res = LLVMBuildBitCast(builder, res, bld.vec_type, ""); + + return res; + } + + + /* + * Fallback to util_format_description::fetch_rgba_float(). + */ + + if (format_desc->fetch_rgba_float) { + /* + * Fallback to calling util_format_description::fetch_rgba_float. + * + * This is definitely not the most efficient way of fetching pixels, as + * we miss the opportunity to do vectorization, but this it is a + * convenient for formats or scenarios for which there was no opportunity + * or incentive to optimize. + */ + + LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); + char name[256]; + LLVMTypeRef f32t = LLVMFloatType(); + LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4); + LLVMTypeRef pf32t = LLVMPointerType(f32t, 0); + LLVMValueRef function; + LLVMValueRef tmp_ptr; + LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; + LLVMValueRef res; + unsigned k; util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", format_desc->short_name); @@ -379,7 +623,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, LLVMTypeRef function_type; ret_type = LLVMVoidType(); - arg_types[0] = LLVMPointerType(LLVMFloatType(), 0); + arg_types[0] = pf32t; arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0); arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8); function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); @@ -394,25 +638,43 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, func_to_pointer((func_pointer)format_desc->fetch_rgba_float)); } - tmp = lp_build_alloca(builder, LLVMVectorType(LLVMFloatType(), 4), ""); + tmp_ptr = lp_build_alloca(builder, f32x4t, ""); /* * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result * in the SoA vectors. */ - args[0] = LLVMBuildBitCast(builder, tmp, - LLVMPointerType(LLVMFloatType(), 0), ""); - args[1] = ptr; - args[2] = i; - args[3] = j; + for (k = 0; k < num_pixels; ++k) { + LLVMValueRef args[4]; - LLVMBuildCall(builder, function, args, Elements(args), ""); + args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, ""); + args[1] = lp_build_gather_elem_ptr(builder, num_pixels, + base_ptr, offset, k); - return LLVMBuildLoad(builder, tmp, ""); - } - else { - assert(0); - return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)); + if (num_pixels == 1) { + args[2] = i; + args[3] = j; + } + else { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0); + args[2] = LLVMBuildExtractElement(builder, i, index, ""); + args[3] = LLVMBuildExtractElement(builder, j, index, ""); + } + + LLVMBuildCall(builder, function, args, Elements(args), ""); + + tmps[k] = LLVMBuildLoad(builder, tmp_ptr, ""); + } + + lp_build_conv(builder, + lp_float32_vec4_type(), + type, + tmps, num_pixels, &res, 1); + + return res; } + + assert(0); + return lp_build_undef(type); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index e1b94adc85..9f405921b0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -36,7 +36,7 @@ #include "lp_bld_const.h" #include "lp_bld_conv.h" #include "lp_bld_swizzle.h" -#include "lp_bld_sample.h" /* for lp_build_gather */ +#include "lp_bld_gather.h" #include "lp_bld_format.h" @@ -251,6 +251,41 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, } +void +lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, + struct lp_type dst_type, + LLVMValueRef packed, + LLVMValueRef *rgba) +{ + LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff); + unsigned chan; + + packed = LLVMBuildBitCast(builder, packed, + lp_build_int_vec_type(dst_type), ""); + + /* Decode the input vector components */ + for (chan = 0; chan < 4; ++chan) { + unsigned start = chan*8; + unsigned stop = start + 8; + LLVMValueRef input; + + input = packed; + + if (start) + input = LLVMBuildLShr(builder, input, + lp_build_const_int_vec(dst_type, start), ""); + + if (stop < 32) + input = LLVMBuildAnd(builder, input, mask, ""); + + input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input); + + rgba[chan] = input; + } +} + + + /** * Fetch a texels from a texture, returning them in SoA layout. * @@ -311,20 +346,49 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder, format_desc, type, packed, rgba_out); + return; } - else { - /* - * Fallback to calling lp_build_fetch_rgba_aos for each pixel. - * - * This is not the most efficient way of fetching pixels, as we - * miss some opportunities to do vectorization, but this is - * convenient for formats or scenarios for which there was no - * opportunity or incentive to optimize. - */ + /* + * Try calling lp_build_fetch_rgba_aos for all pixels. + */ + + if (util_format_fits_8unorm(format_desc) && + type.floating && type.width == 32 && type.length == 4) { + struct lp_type tmp_type; + LLVMValueRef tmp; + + memset(&tmp_type, 0, sizeof tmp_type); + tmp_type.width = 8; + tmp_type.length = type.length * 4; + tmp_type.norm = TRUE; + + tmp = lp_build_fetch_rgba_aos(builder, format_desc, tmp_type, + base_ptr, offset, i, j); + + lp_build_rgba8_to_f32_soa(builder, + type, + tmp, + rgba_out); + + return; + } + + /* + * Fallback to calling lp_build_fetch_rgba_aos for each pixel. + * + * This is not the most efficient way of fetching pixels, as we + * miss some opportunities to do vectorization, but this is + * convenient for formats or scenarios for which there was no + * opportunity or incentive to optimize. + */ + + { unsigned k, chan; + struct lp_type tmp_type; - assert(type.floating); + tmp_type = type; + tmp_type.length = 4; for (chan = 0; chan < 4; ++chan) { rgba_out[chan] = lp_build_undef(type); @@ -334,18 +398,17 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder, for(k = 0; k < type.length; ++k) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0); LLVMValueRef offset_elem; - LLVMValueRef ptr; LLVMValueRef i_elem, j_elem; LLVMValueRef tmp; offset_elem = LLVMBuildExtractElement(builder, offset, index, ""); - ptr = LLVMBuildGEP(builder, base_ptr, &offset_elem, 1, ""); i_elem = LLVMBuildExtractElement(builder, i, index, ""); j_elem = LLVMBuildExtractElement(builder, j, index, ""); /* Get a single float[4]={R,G,B,A} pixel */ - tmp = lp_build_fetch_rgba_aos(builder, format_desc, ptr, + tmp = lp_build_fetch_rgba_aos(builder, format_desc, tmp_type, + base_ptr, offset_elem, i_elem, j_elem); /* diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c new file mode 100644 index 0000000000..0a5038bc98 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c @@ -0,0 +1,399 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +/** + * @file + * YUV pixel format manipulation. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + + +#include "util/u_format.h" + +#include "lp_bld_arit.h" +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_conv.h" +#include "lp_bld_gather.h" +#include "lp_bld_format.h" + + +/** + * Extract Y, U, V channels from packed UYVY. + * @param packed is a <n x i32> vector with the packed UYVY blocks + * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) + */ +static void +uyvy_to_yuv_soa(LLVMBuilderRef builder, + unsigned n, + LLVMValueRef packed, + LLVMValueRef i, + LLVMValueRef *y, + LLVMValueRef *u, + LLVMValueRef *v) +{ + struct lp_type type; + LLVMValueRef shift, mask; + + memset(&type, 0, sizeof type); + type.width = 32; + type.length = n; + + assert(lp_check_value(type, packed)); + assert(lp_check_value(type, i)); + + /* + * y = (uyvy >> 16*i) & 0xff + * u = (uyvy ) & 0xff + * v = (uyvy >> 16 ) & 0xff + */ + + shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), ""); + shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), ""); + *y = LLVMBuildLShr(builder, packed, shift, ""); + *u = packed; + *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), ""); + + mask = lp_build_const_int_vec(type, 0xff); + + *y = LLVMBuildAnd(builder, *y, mask, "y"); + *u = LLVMBuildAnd(builder, *u, mask, "u"); + *v = LLVMBuildAnd(builder, *v, mask, "v"); +} + + +/** + * Extract Y, U, V channels from packed YUYV. + * @param packed is a <n x i32> vector with the packed YUYV blocks + * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) + */ +static void +yuyv_to_yuv_soa(LLVMBuilderRef builder, + unsigned n, + LLVMValueRef packed, + LLVMValueRef i, + LLVMValueRef *y, + LLVMValueRef *u, + LLVMValueRef *v) +{ + struct lp_type type; + LLVMValueRef shift, mask; + + memset(&type, 0, sizeof type); + type.width = 32; + type.length = n; + + assert(lp_check_value(type, packed)); + assert(lp_check_value(type, i)); + + /* + * y = (yuyv >> 16*i) & 0xff + * u = (yuyv >> 8 ) & 0xff + * v = (yuyv >> 24 ) & 0xff + */ + + shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), ""); + *y = LLVMBuildLShr(builder, packed, shift, ""); + *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), ""); + *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 24), ""); + + mask = lp_build_const_int_vec(type, 0xff); + + *y = LLVMBuildAnd(builder, *y, mask, "y"); + *u = LLVMBuildAnd(builder, *u, mask, "u"); + *v = LLVMBuildAnd(builder, *v, mask, "v"); +} + + +static INLINE void +yuv_to_rgb_soa(LLVMBuilderRef builder, + unsigned n, + LLVMValueRef y, LLVMValueRef u, LLVMValueRef v, + LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b) +{ + struct lp_type type; + struct lp_build_context bld; + + LLVMValueRef c0; + LLVMValueRef c8; + LLVMValueRef c16; + LLVMValueRef c128; + LLVMValueRef c255; + + LLVMValueRef cy; + LLVMValueRef cug; + LLVMValueRef cub; + LLVMValueRef cvr; + LLVMValueRef cvg; + + memset(&type, 0, sizeof type); + type.sign = TRUE; + type.width = 32; + type.length = n; + + lp_build_context_init(&bld, builder, type); + + assert(lp_check_value(type, y)); + assert(lp_check_value(type, u)); + assert(lp_check_value(type, v)); + + /* + * Constants + */ + + c0 = lp_build_const_int_vec(type, 0); + c8 = lp_build_const_int_vec(type, 8); + c16 = lp_build_const_int_vec(type, 16); + c128 = lp_build_const_int_vec(type, 128); + c255 = lp_build_const_int_vec(type, 255); + + cy = lp_build_const_int_vec(type, 298); + cug = lp_build_const_int_vec(type, -100); + cub = lp_build_const_int_vec(type, 516); + cvr = lp_build_const_int_vec(type, 409); + cvg = lp_build_const_int_vec(type, -208); + + /* + * y -= 16; + * u -= 128; + * v -= 128; + */ + + y = LLVMBuildSub(builder, y, c16, ""); + u = LLVMBuildSub(builder, u, c128, ""); + v = LLVMBuildSub(builder, v, c128, ""); + + /* + * r = 298 * _y + 409 * _v + 128; + * g = 298 * _y - 100 * _u - 208 * _v + 128; + * b = 298 * _y + 516 * _u + 128; + */ + + y = LLVMBuildMul(builder, y, cy, ""); + y = LLVMBuildAdd(builder, y, c128, ""); + + *r = LLVMBuildMul(builder, v, cvr, ""); + *g = LLVMBuildAdd(builder, + LLVMBuildMul(builder, u, cug, ""), + LLVMBuildMul(builder, v, cvg, ""), + ""); + *b = LLVMBuildMul(builder, u, cub, ""); + + *r = LLVMBuildAdd(builder, *r, y, ""); + *g = LLVMBuildAdd(builder, *g, y, ""); + *b = LLVMBuildAdd(builder, *b, y, ""); + + /* + * r >>= 8; + * g >>= 8; + * b >>= 8; + */ + + *r = LLVMBuildAShr(builder, *r, c8, "r"); + *g = LLVMBuildAShr(builder, *g, c8, "g"); + *b = LLVMBuildAShr(builder, *b, c8, "b"); + + /* + * Clamp + */ + + *r = lp_build_clamp(&bld, *r, c0, c255); + *g = lp_build_clamp(&bld, *g, c0, c255); + *b = lp_build_clamp(&bld, *b, c0, c255); +} + + +static LLVMValueRef +rgb_to_rgba_aos(LLVMBuilderRef builder, + unsigned n, + LLVMValueRef r, LLVMValueRef g, LLVMValueRef b) +{ + struct lp_type type; + LLVMValueRef a; + LLVMValueRef rgba; + + memset(&type, 0, sizeof type); + type.sign = TRUE; + type.width = 32; + type.length = n; + + assert(lp_check_value(type, r)); + assert(lp_check_value(type, g)); + assert(lp_check_value(type, b)); + + /* + * Make a 4 x unorm8 vector + */ + + r = r; + g = LLVMBuildShl(builder, g, lp_build_const_int_vec(type, 8), ""); + b = LLVMBuildShl(builder, b, lp_build_const_int_vec(type, 16), ""); + a = lp_build_const_int_vec(type, 0xff000000); + + rgba = r; + rgba = LLVMBuildOr(builder, rgba, g, ""); + rgba = LLVMBuildOr(builder, rgba, b, ""); + rgba = LLVMBuildOr(builder, rgba, a, ""); + + rgba = LLVMBuildBitCast(builder, rgba, + LLVMVectorType(LLVMInt8Type(), 4*n), ""); + + return rgba; +} + + +/** + * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS + */ +static LLVMValueRef +uyvy_to_rgba_aos(LLVMBuilderRef builder, + unsigned n, + LLVMValueRef packed, + LLVMValueRef i) +{ + LLVMValueRef y, u, v; + LLVMValueRef r, g, b; + LLVMValueRef rgba; + + uyvy_to_yuv_soa(builder, n, packed, i, &y, &u, &v); + yuv_to_rgb_soa(builder, n, y, u, v, &r, &g, &b); + rgba = rgb_to_rgba_aos(builder, n, r, g, b); + + return rgba; +} + + +/** + * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS + */ +static LLVMValueRef +yuyv_to_rgba_aos(LLVMBuilderRef builder, + unsigned n, + LLVMValueRef packed, + LLVMValueRef i) +{ + LLVMValueRef y, u, v; + LLVMValueRef r, g, b; + LLVMValueRef rgba; + + yuyv_to_yuv_soa(builder, n, packed, i, &y, &u, &v); + yuv_to_rgb_soa(builder, n, y, u, v, &r, &g, &b); + rgba = rgb_to_rgba_aos(builder, n, r, g, b); + + return rgba; +} + + +/** + * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS + */ +static LLVMValueRef +rgbg_to_rgba_aos(LLVMBuilderRef builder, + unsigned n, + LLVMValueRef packed, + LLVMValueRef i) +{ + LLVMValueRef r, g, b; + LLVMValueRef rgba; + + uyvy_to_yuv_soa(builder, n, packed, i, &g, &r, &b); + rgba = rgb_to_rgba_aos(builder, n, r, g, b); + + return rgba; +} + + +/** + * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS + */ +static LLVMValueRef +grgb_to_rgba_aos(LLVMBuilderRef builder, + unsigned n, + LLVMValueRef packed, + LLVMValueRef i) +{ + LLVMValueRef r, g, b; + LLVMValueRef rgba; + + yuyv_to_yuv_soa(builder, n, packed, i, &g, &r, &b); + rgba = rgb_to_rgba_aos(builder, n, r, g, b); + + return rgba; +} + + +/** + * @param n is the number of pixels processed + * @param packed is a <n x i32> vector with the packed YUYV blocks + * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) + * @return a <4*n x i8> vector with the pixel RGBA values in AoS + */ +LLVMValueRef +lp_build_fetch_subsampled_rgba_aos(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + unsigned n, + LLVMValueRef base_ptr, + LLVMValueRef offset, + LLVMValueRef i, + LLVMValueRef j) +{ + LLVMValueRef packed; + LLVMValueRef rgba; + + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED); + assert(format_desc->block.bits == 32); + assert(format_desc->block.width == 2); + assert(format_desc->block.height == 1); + + packed = lp_build_gather(builder, n, 32, 32, base_ptr, offset); + + (void)j; + + switch (format_desc->format) { + case PIPE_FORMAT_UYVY: + rgba = uyvy_to_rgba_aos(builder, n, packed, i); + break; + case PIPE_FORMAT_YUYV: + rgba = yuyv_to_rgba_aos(builder, n, packed, i); + break; + case PIPE_FORMAT_R8G8_B8G8_UNORM: + rgba = rgbg_to_rgba_aos(builder, n, packed, i); + break; + case PIPE_FORMAT_G8R8_G8B8_UNORM: + rgba = grgb_to_rgba_aos(builder, n, packed, i); + break; + default: + assert(0); + rgba = LLVMGetUndef(LLVMVectorType(LLVMInt8Type(), 4*n)); + break; + } + + return rgba; +} + diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.c b/src/gallium/auxiliary/gallivm/lp_bld_gather.c new file mode 100644 index 0000000000..d60472e065 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.c @@ -0,0 +1,148 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +#include "util/u_debug.h" +#include "lp_bld_debug.h" +#include "lp_bld_const.h" +#include "lp_bld_format.h" +#include "lp_bld_gather.h" + + +/** + * Get the pointer to one element from scatter positions in memory. + * + * @sa lp_build_gather() + */ +LLVMValueRef +lp_build_gather_elem_ptr(LLVMBuilderRef builder, + unsigned length, + LLVMValueRef base_ptr, + LLVMValueRef offsets, + unsigned i) +{ + LLVMValueRef offset; + LLVMValueRef ptr; + + assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8Type(), 0)); + + if (length == 1) { + assert(i == 0); + offset = offsets; + } else { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + offset = LLVMBuildExtractElement(builder, offsets, index, ""); + } + + ptr = LLVMBuildGEP(builder, base_ptr, &offset, 1, ""); + + return ptr; +} + + +/** + * Gather one element from scatter positions in memory. + * + * @sa lp_build_gather() + */ +LLVMValueRef +lp_build_gather_elem(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets, + unsigned i) +{ + LLVMTypeRef src_type = LLVMIntType(src_width); + LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); + LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); + LLVMValueRef ptr; + LLVMValueRef res; + + assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8Type(), 0)); + + ptr = lp_build_gather_elem_ptr(builder, length, base_ptr, offsets, i); + ptr = LLVMBuildBitCast(builder, ptr, src_ptr_type, ""); + res = LLVMBuildLoad(builder, ptr, ""); + + assert(src_width <= dst_width); + if (src_width > dst_width) + res = LLVMBuildTrunc(builder, res, dst_elem_type, ""); + if (src_width < dst_width) + res = LLVMBuildZExt(builder, res, dst_elem_type, ""); + + return res; +} + + +/** + * Gather elements from scatter positions in memory into a single vector. + * Use for fetching texels from a texture. + * For SSE, typical values are length=4, src_width=32, dst_width=32. + * + * @param length length of the offsets + * @param src_width src element width in bits + * @param dst_width result element width in bits (src will be expanded to fit) + * @param base_ptr base pointer, should be a i8 pointer type. + * @param offsets vector with offsets + */ +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets) +{ + LLVMValueRef res; + + if (length == 1) { + /* Scalar */ + return lp_build_gather_elem(builder, length, + src_width, dst_width, + base_ptr, offsets, 0); + } else { + /* Vector */ + + LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); + LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); + unsigned i; + + res = LLVMGetUndef(dst_vec_type); + for (i = 0; i < length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef elem; + elem = lp_build_gather_elem(builder, length, + src_width, dst_width, + base_ptr, offsets, i); + res = LLVMBuildInsertElement(builder, res, elem, index, ""); + } + } + + return res; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.h b/src/gallium/auxiliary/gallivm/lp_bld_gather.h new file mode 100644 index 0000000000..131af8ea07 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.h @@ -0,0 +1,61 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +#ifndef LP_BLD_GATHER_H_ +#define LP_BLD_GATHER_H_ + + +#include "gallivm/lp_bld.h" + + +LLVMValueRef +lp_build_gather_elem_ptr(LLVMBuilderRef builder, + unsigned length, + LLVMValueRef base_ptr, + LLVMValueRef offsets, + unsigned i); + +LLVMValueRef +lp_build_gather_elem(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets, + unsigned i); + +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets); + + +#endif /* LP_BLD_GATHER_H_ */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 44cfdc4d3f..69353dea09 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -32,6 +32,8 @@ #include "lp_bld_debug.h" #include "lp_bld_init.h" +#include <llvm-c/Transforms/Scalar.h> + #ifdef DEBUG unsigned gallivm_debug = 0; @@ -50,6 +52,7 @@ LLVMModuleRef lp_build_module = NULL; LLVMExecutionEngineRef lp_build_engine = NULL; LLVMModuleProviderRef lp_build_provider = NULL; LLVMTargetDataRef lp_build_target = NULL; +LLVMPassManagerRef lp_build_pass = NULL; /* @@ -127,6 +130,33 @@ lp_build_init(void) if (!lp_build_target) lp_build_target = LLVMGetExecutionEngineTargetData(lp_build_engine); + if (!lp_build_pass) { + lp_build_pass = LLVMCreateFunctionPassManager(lp_build_provider); + LLVMAddTargetData(lp_build_target, lp_build_pass); + + if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) { + /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, + * but there are more on SVN. */ + /* TODO: Add more passes */ + LLVMAddCFGSimplificationPass(lp_build_pass); + LLVMAddPromoteMemoryToRegisterPass(lp_build_pass); + LLVMAddConstantPropagationPass(lp_build_pass); + if(util_cpu_caps.has_sse4_1) { + /* FIXME: There is a bug in this pass, whereby the combination of fptosi + * and sitofp (necessary for trunc/floor/ceil/round implementation) + * somehow becomes invalid code. + */ + LLVMAddInstructionCombiningPass(lp_build_pass); + } + LLVMAddGVNPass(lp_build_pass); + } else { + /* We need at least this pass to prevent the backends to fail in + * unexpected ways. + */ + LLVMAddPromoteMemoryToRegisterPass(lp_build_pass); + } + } + util_cpu_detect(); #if 0 diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h index 0ec2afcd1b..a32ced9b4c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h @@ -38,6 +38,7 @@ extern LLVMModuleRef lp_build_module; extern LLVMExecutionEngineRef lp_build_engine; extern LLVMModuleProviderRef lp_build_provider; extern LLVMTargetDataRef lp_build_target; +extern LLVMPassManagerRef lp_build_pass; void diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index d13fa1a5d0..39854e43b1 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -34,6 +34,7 @@ #include "util/u_cpu_detect.h" +#include "util/u_memory.h" #include "util/u_debug.h" #include "lp_bld_type.h" @@ -187,12 +188,10 @@ lp_build_compare(LLVMBuilderRef builder, return lp_build_undef(type); } - /* There are no signed byte and unsigned word/dword comparison - * instructions. So flip the sign bit so that the results match. + /* There are no unsigned comparison instructions. So flip the sign bit + * so that the results match. */ - if(table[func].gt && - ((type.width == 8 && type.sign) || - (type.width != 8 && !type.sign))) { + if (table[func].gt && !type.sign) { LLVMValueRef msb = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); a = LLVMBuildXor(builder, a, msb, ""); b = LLVMBuildXor(builder, b, msb, ""); @@ -384,6 +383,46 @@ lp_build_select(struct lp_build_context *bld, mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), ""); res = LLVMBuildSelect(bld->builder, mask, a, b, ""); } + else if (util_cpu_caps.has_sse4_1 && + type.width * type.length == 128 && + !LLVMIsConstant(a) && + !LLVMIsConstant(b) && + !LLVMIsConstant(mask)) { + const char *intrinsic; + LLVMTypeRef arg_type; + LLVMValueRef args[3]; + + if (type.width == 64) { + intrinsic = "llvm.x86.sse41.blendvpd"; + arg_type = LLVMVectorType(LLVMDoubleType(), 2); + } else if (type.width == 32) { + intrinsic = "llvm.x86.sse41.blendvps"; + arg_type = LLVMVectorType(LLVMFloatType(), 4); + } else { + intrinsic = "llvm.x86.sse41.pblendvb"; + arg_type = LLVMVectorType(LLVMInt8Type(), 16); + } + + if (arg_type != bld->int_vec_type) { + mask = LLVMBuildBitCast(bld->builder, mask, arg_type, ""); + } + + if (arg_type != bld->vec_type) { + a = LLVMBuildBitCast(bld->builder, a, arg_type, ""); + b = LLVMBuildBitCast(bld->builder, b, arg_type, ""); + } + + args[0] = b; + args[1] = a; + args[2] = mask; + + res = lp_build_intrinsic(bld->builder, intrinsic, + arg_type, args, Elements(args)); + + if (arg_type != bld->vec_type) { + res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, ""); + } + } else { if(type.floating) { LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c index 186f8849b8..7748f8f099 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c @@ -427,3 +427,123 @@ lp_build_pack(LLVMBuilderRef builder, return tmp[0]; } + + +/** + * Truncate or expand the bitwidth. + * + * NOTE: Getting the right sign flags is crucial here, as we employ some + * intrinsics that do saturation. + */ +void +lp_build_resize(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + const LLVMValueRef *src, unsigned num_srcs, + LLVMValueRef *dst, unsigned num_dsts) +{ + LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; + unsigned i; + + /* + * We don't support float <-> int conversion here. That must be done + * before/after calling this function. + */ + assert(src_type.floating == dst_type.floating); + + /* + * We don't support double <-> float conversion yet, although it could be + * added with little effort. + */ + assert((!src_type.floating && !dst_type.floating) || + src_type.width == dst_type.width); + + /* We must not loose or gain channels. Only precision */ + assert(src_type.length * num_srcs == dst_type.length * num_dsts); + + /* We don't support M:N conversion, only 1:N, M:1, or 1:1 */ + assert(num_srcs == 1 || num_dsts == 1); + + assert(src_type.length <= LP_MAX_VECTOR_LENGTH); + assert(dst_type.length <= LP_MAX_VECTOR_LENGTH); + assert(num_srcs <= LP_MAX_VECTOR_LENGTH); + assert(num_dsts <= LP_MAX_VECTOR_LENGTH); + + if (src_type.width > dst_type.width) { + /* + * Truncate bit width. + */ + + assert(num_dsts == 1); + + if (src_type.width * src_type.length == dst_type.width * dst_type.length) { + /* + * Register width remains constant -- use vector packing intrinsics + */ + + tmp[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs); + } + else { + /* + * Do it element-wise. + */ + + assert(src_type.length == dst_type.length); + tmp[0] = lp_build_undef(dst_type); + for (i = 0; i < dst_type.length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, ""); + val = LLVMBuildTrunc(builder, val, lp_build_elem_type(dst_type), ""); + tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, ""); + } + } + } + else if (src_type.width < dst_type.width) { + /* + * Expand bit width. + */ + + assert(num_srcs == 1); + + if (src_type.width * src_type.length == dst_type.width * dst_type.length) { + /* + * Register width remains constant -- use vector unpack intrinsics + */ + lp_build_unpack(builder, src_type, dst_type, src[0], tmp, num_dsts); + } + else { + /* + * Do it element-wise. + */ + + assert(src_type.length == dst_type.length); + tmp[0] = lp_build_undef(dst_type); + for (i = 0; i < dst_type.length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, ""); + + if (src_type.sign && dst_type.sign) { + val = LLVMBuildSExt(builder, val, lp_build_elem_type(dst_type), ""); + } else { + val = LLVMBuildZExt(builder, val, lp_build_elem_type(dst_type), ""); + } + tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, ""); + } + } + } + else { + /* + * No-op + */ + + assert(num_srcs == 1); + assert(num_dsts == 1); + + tmp[0] = src[0]; + } + + for(i = 0; i < num_dsts; ++i) + dst[i] = tmp[i]; +} + + diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h index 41adeed220..e470082b97 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h @@ -92,4 +92,12 @@ lp_build_pack(LLVMBuilderRef builder, const LLVMValueRef *src, unsigned num_srcs); +void +lp_build_resize(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + const LLVMValueRef *src, unsigned num_srcs, + LLVMValueRef *dst, unsigned num_dsts); + + #endif /* !LP_BLD_PACK_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c b/src/gallium/auxiliary/gallivm/lp_bld_quad.c index 38fd5a39ef..ca36046d22 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c @@ -61,8 +61,8 @@ LLVMValueRef lp_build_ddx(struct lp_build_context *bld, LLVMValueRef a) { - LLVMValueRef a_left = lp_build_swizzle1_aos(bld, a, swizzle_left); - LLVMValueRef a_right = lp_build_swizzle1_aos(bld, a, swizzle_right); + LLVMValueRef a_left = lp_build_swizzle_aos(bld, a, swizzle_left); + LLVMValueRef a_right = lp_build_swizzle_aos(bld, a, swizzle_right); return lp_build_sub(bld, a_right, a_left); } @@ -71,8 +71,8 @@ LLVMValueRef lp_build_ddy(struct lp_build_context *bld, LLVMValueRef a) { - LLVMValueRef a_top = lp_build_swizzle1_aos(bld, a, swizzle_top); - LLVMValueRef a_bottom = lp_build_swizzle1_aos(bld, a, swizzle_bottom); + LLVMValueRef a_top = lp_build_swizzle_aos(bld, a, swizzle_top); + LLVMValueRef a_bottom = lp_build_swizzle_aos(bld, a, swizzle_bottom); return lp_build_sub(bld, a_bottom, a_top); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 946c23e317..0fd014ab9b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -40,7 +40,6 @@ #include "lp_bld_const.h" #include "lp_bld_arit.h" #include "lp_bld_type.h" -#include "lp_bld_format.h" #include "lp_bld_sample.h" @@ -125,73 +124,53 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, /** - * Gather elements from scatter positions in memory into a single vector. - * Use for fetching texels from a texture. - * For SSE, typical values are length=4, src_width=32, dst_width=32. - * - * @param length length of the offsets - * @param src_width src element width in bits - * @param dst_width result element width in bits (src will be expanded to fit) - * @param base_ptr base pointer, should be a i8 pointer type. - * @param offsets vector with offsets - */ -LLVMValueRef -lp_build_gather(LLVMBuilderRef builder, - unsigned length, - unsigned src_width, - unsigned dst_width, - LLVMValueRef base_ptr, - LLVMValueRef offsets) -{ - LLVMTypeRef src_type = LLVMIntType(src_width); - LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); - LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); - LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); - LLVMValueRef res; - unsigned i; - - res = LLVMGetUndef(dst_vec_type); - for(i = 0; i < length; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef elem_offset; - LLVMValueRef elem_ptr; - LLVMValueRef elem; - - elem_offset = LLVMBuildExtractElement(builder, offsets, index, ""); - elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, ""); - elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, ""); - elem = LLVMBuildLoad(builder, elem_ptr, ""); - - assert(src_width <= dst_width); - if(src_width > dst_width) - elem = LLVMBuildTrunc(builder, elem, dst_elem_type, ""); - if(src_width < dst_width) - elem = LLVMBuildZExt(builder, elem, dst_elem_type, ""); - - res = LLVMBuildInsertElement(builder, res, elem, index, ""); - } - - return res; -} - - -/** * Compute the offset of a pixel block. * - * x, y, z, y_stride, z_stride are vectors, and they refer to pixel blocks, as - * per format description, and not individual pixels. + * x, y, z, y_stride, z_stride are vectors, and they refer to pixels. + * + * Returns the relative offset and i,j sub-block coordinates */ -LLVMValueRef +void lp_build_sample_offset(struct lp_build_context *bld, const struct util_format_description *format_desc, LLVMValueRef x, LLVMValueRef y, LLVMValueRef z, LLVMValueRef y_stride, - LLVMValueRef z_stride) + LLVMValueRef z_stride, + LLVMValueRef *out_offset, + LLVMValueRef *out_i, + LLVMValueRef *out_j) { LLVMValueRef x_stride; LLVMValueRef offset; + LLVMValueRef i; + LLVMValueRef j; + + /* + * Describe the coordinates in terms of pixel blocks. + * + * TODO: pixel blocks are power of two. LLVM should convert rem/div to + * bit arithmetic. Verify this. + */ + + if (format_desc->block.width == 1) { + i = bld->zero; + } + else { + LLVMValueRef block_width = lp_build_const_int_vec(bld->type, format_desc->block.width); + i = LLVMBuildURem(bld->builder, x, block_width, ""); + x = LLVMBuildUDiv(bld->builder, x, block_width, ""); + } + + if (format_desc->block.height == 1) { + j = bld->zero; + } + else { + LLVMValueRef block_height = lp_build_const_int_vec(bld->type, format_desc->block.height); + j = LLVMBuildURem(bld->builder, y, block_height, ""); + y = LLVMBuildUDiv(bld->builder, y, block_height, ""); + } x_stride = lp_build_const_vec(bld->type, format_desc->block.bits/8); offset = lp_build_mul(bld, x, x_stride); @@ -206,5 +185,7 @@ lp_build_sample_offset(struct lp_build_context *bld, offset = lp_build_add(bld, offset, z_offset); } - return offset; + *out_offset = offset; + *out_i = i; + *out_j = j; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index 51e98ab2f9..5b8f478094 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -146,23 +146,17 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, const struct pipe_sampler_state *sampler); -LLVMValueRef -lp_build_gather(LLVMBuilderRef builder, - unsigned length, - unsigned src_width, - unsigned dst_width, - LLVMValueRef base_ptr, - LLVMValueRef offsets); - - -LLVMValueRef +void lp_build_sample_offset(struct lp_build_context *bld, const struct util_format_description *format_desc, LLVMValueRef x, LLVMValueRef y, LLVMValueRef z, LLVMValueRef y_stride, - LLVMValueRef z_stride); + LLVMValueRef z_stride, + LLVMValueRef *out_offset, + LLVMValueRef *out_i, + LLVMValueRef *out_j); void diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 84c04fe272..1a20d74cac 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -50,8 +50,10 @@ #include "lp_bld_swizzle.h" #include "lp_bld_pack.h" #include "lp_bld_flow.h" +#include "lp_bld_gather.h" #include "lp_bld_format.h" #include "lp_bld_sample.h" +#include "lp_bld_quad.h" /** @@ -264,35 +266,11 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, } } - /* - * Describe the coordinates in terms of pixel blocks. - * - * TODO: pixel blocks are power of two. LLVM should convert rem/div to - * bit arithmetic. Verify this. - */ - - if (bld->format_desc->block.width == 1) { - i = bld->uint_coord_bld.zero; - } - else { - LLVMValueRef block_width = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.width); - i = LLVMBuildURem(bld->builder, x, block_width, ""); - x = LLVMBuildUDiv(bld->builder, x, block_width, ""); - } - - if (bld->format_desc->block.height == 1) { - j = bld->uint_coord_bld.zero; - } - else { - LLVMValueRef block_height = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.height); - j = LLVMBuildURem(bld->builder, y, block_height, ""); - y = LLVMBuildUDiv(bld->builder, y, block_height, ""); - } - /* convert x,y,z coords to linear offset from start of texture, in bytes */ - offset = lp_build_sample_offset(&bld->uint_coord_bld, - bld->format_desc, - x, y, z, y_stride, z_stride); + lp_build_sample_offset(&bld->uint_coord_bld, + bld->format_desc, + x, y, z, y_stride, z_stride, + &offset, &i, &j); if (use_border) { /* If we can sample the border color, it means that texcoords may @@ -344,6 +322,9 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, } +/** + * Fetch the texels as <4n x i8> in AoS form. + */ static LLVMValueRef lp_build_sample_packed(struct lp_build_sample_context *bld, LLVMValueRef x, @@ -351,25 +332,46 @@ lp_build_sample_packed(struct lp_build_sample_context *bld, LLVMValueRef y_stride, LLVMValueRef data_array) { - LLVMValueRef offset; + LLVMValueRef offset, i, j; LLVMValueRef data_ptr; + LLVMValueRef res; - offset = lp_build_sample_offset(&bld->uint_coord_bld, - bld->format_desc, - x, y, NULL, y_stride, NULL); - - assert(bld->format_desc->block.width == 1); - assert(bld->format_desc->block.height == 1); - assert(bld->format_desc->block.bits <= bld->texel_type.width); + /* convert x,y,z coords to linear offset from start of texture, in bytes */ + lp_build_sample_offset(&bld->uint_coord_bld, + bld->format_desc, + x, y, NULL, y_stride, NULL, + &offset, &i, &j); /* get pointer to mipmap level 0 data */ data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0); - return lp_build_gather(bld->builder, - bld->texel_type.length, - bld->format_desc->block.bits, - bld->texel_type.width, - data_ptr, offset); + if (util_format_is_rgba8_variant(bld->format_desc)) { + /* Just fetch the data directly without swizzling */ + assert(bld->format_desc->block.width == 1); + assert(bld->format_desc->block.height == 1); + assert(bld->format_desc->block.bits <= bld->texel_type.width); + + res = lp_build_gather(bld->builder, + bld->texel_type.length, + bld->format_desc->block.bits, + bld->texel_type.width, + data_ptr, offset); + } + else { + struct lp_type type; + + assert(bld->texel_type.width == 32); + + memset(&type, 0, sizeof type); + type.width = 8; + type.length = bld->texel_type.length*4; + type.norm = TRUE; + + res = lp_build_fetch_rgba_aos(bld->builder, bld->format_desc, type, + data_ptr, offset, i, j); + } + + return res; } @@ -817,9 +819,8 @@ lp_build_minify(struct lp_build_sample_context *bld, /** * Generate code to compute texture level of detail (lambda). - * \param s vector of texcoord s values - * \param t vector of texcoord t values - * \param r vector of texcoord r values + * \param ddx partial derivatives of (s, t, r, q) with respect to X + * \param ddy partial derivatives of (s, t, r, q) with respect to Y * \param lod_bias optional float vector with the shader lod bias * \param explicit_lod optional float vector with the explicit lod * \param width scalar int texture width @@ -831,11 +832,8 @@ lp_build_minify(struct lp_build_sample_context *bld, */ static LLVMValueRef lp_build_lod_selector(struct lp_build_sample_context *bld, - LLVMValueRef s, - LLVMValueRef t, - LLVMValueRef r, - const LLVMValueRef *ddx, - const LLVMValueRef *ddy, + const LLVMValueRef ddx[4], + const LLVMValueRef ddy[4], LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ LLVMValueRef width, @@ -870,14 +868,6 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL; LLVMValueRef rho; - /* - * dsdx = abs(s[1] - s[0]); - * dsdy = abs(s[2] - s[0]); - * dtdx = abs(t[1] - t[0]); - * dtdy = abs(t[2] - t[0]); - * drdx = abs(r[1] - r[0]); - * drdy = abs(r[2] - r[0]); - */ dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx"); dsdx = lp_build_abs(float_bld, dsdx); dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy"); @@ -1287,7 +1277,7 @@ lp_build_cube_face(struct lp_build_sample_context *bld, /** - * Generate code to do cube face selection and per-face texcoords. + * Generate code to do cube face selection and compute per-face texcoords. */ static void lp_build_cube_lookup(struct lp_build_sample_context *bld, @@ -1411,7 +1401,6 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, lp_build_endif(&if_ctx2); lp_build_flow_scope_end(flow_ctx2); lp_build_flow_destroy(flow_ctx2); - *face_s = face_s2; *face_t = face_t2; *face = face2; @@ -1457,13 +1446,14 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, int chan; if (img_filter == PIPE_TEX_FILTER_NEAREST) { + /* sample the first mipmap level */ lp_build_sample_image_nearest(bld, width0_vec, height0_vec, depth0_vec, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, colors0); if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* sample the second mipmap level, and interp */ + /* sample the second mipmap level */ lp_build_sample_image_nearest(bld, width1_vec, height1_vec, depth1_vec, row_stride1_vec, img_stride1_vec, @@ -1473,13 +1463,14 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, else { assert(img_filter == PIPE_TEX_FILTER_LINEAR); + /* sample the first mipmap level */ lp_build_sample_image_linear(bld, width0_vec, height0_vec, depth0_vec, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, colors0); if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* sample the second mipmap level, and interp */ + /* sample the second mipmap level */ lp_build_sample_image_linear(bld, width1_vec, height1_vec, depth1_vec, row_stride1_vec, img_stride1_vec, @@ -1542,6 +1533,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL; LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL; LLVMValueRef data_ptr0, data_ptr1 = NULL; + LLVMValueRef face_ddx[4], face_ddy[4]; /* printf("%s mip %d min %d mag %d\n", __FUNCTION__, @@ -1549,6 +1541,30 @@ lp_build_sample_general(struct lp_build_sample_context *bld, */ /* + * Choose cube face, recompute texcoords and derivatives for the chosen face. + */ + if (bld->static_state->target == PIPE_TEXTURE_CUBE) { + LLVMValueRef face, face_s, face_t; + lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t); + s = face_s; /* vec */ + t = face_t; /* vec */ + /* use 'r' to indicate cube face */ + r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */ + + /* recompute ddx, ddy using the new (s,t) face texcoords */ + face_ddx[0] = lp_build_ddx(&bld->coord_bld, s); + face_ddx[1] = lp_build_ddx(&bld->coord_bld, t); + face_ddx[2] = NULL; + face_ddx[3] = NULL; + face_ddy[0] = lp_build_ddy(&bld->coord_bld, s); + face_ddy[1] = lp_build_ddy(&bld->coord_bld, t); + face_ddy[2] = NULL; + face_ddy[3] = NULL; + ddx = face_ddx; + ddy = face_ddy; + } + + /* * Compute the level of detail (float). */ if (min_filter != mag_filter || @@ -1556,7 +1572,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, /* Need to compute lod either to choose mipmap levels or to * distinguish between minification/magnification with one mipmap level. */ - lod = lp_build_lod_selector(bld, s, t, r, ddx, ddy, + lod = lp_build_lod_selector(bld, ddx, ddy, lod_bias, explicit_lod, width, height, depth); } @@ -1566,9 +1582,20 @@ lp_build_sample_general(struct lp_build_sample_context *bld, */ if (mip_filter == PIPE_TEX_MIPFILTER_NONE) { /* always use mip level 0 */ - ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + if (bld->static_state->target == PIPE_TEXTURE_CUBE) { + /* XXX this is a work-around for an apparent bug in LLVM 2.7. + * We should be able to set ilevel0 = const(0) but that causes + * bad x86 code to be emitted. + */ + lod = lp_build_const_elem(bld->coord_bld.type, 0.0); + lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); + } + else { + ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + } } else { + assert(lod); if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); } @@ -1623,18 +1650,6 @@ lp_build_sample_general(struct lp_build_sample_context *bld, } /* - * Choose cube face, recompute per-face texcoords. - */ - if (bld->static_state->target == PIPE_TEXTURE_CUBE) { - LLVMValueRef face, face_s, face_t; - lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t); - s = face_s; /* vec */ - t = face_t; /* vec */ - /* use 'r' to indicate cube face */ - r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */ - } - - /* * Get pointer(s) to image data for mipmap level(s). */ data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0); @@ -1712,36 +1727,6 @@ lp_build_sample_general(struct lp_build_sample_context *bld, static void -lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, - struct lp_type dst_type, - LLVMValueRef packed, - LLVMValueRef *rgba) -{ - LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff); - unsigned chan; - - /* Decode the input vector components */ - for (chan = 0; chan < 4; ++chan) { - unsigned start = chan*8; - unsigned stop = start + 8; - LLVMValueRef input; - - input = packed; - - if(start) - input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(dst_type, start), ""); - - if(stop < 32) - input = LLVMBuildAnd(builder, input, mask, ""); - - input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input); - - rgba[chan] = input; - } -} - - -static void lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, @@ -1935,15 +1920,20 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, * Convert to SoA and swizzle. */ - packed = LLVMBuildBitCast(builder, packed, i32_vec_type, ""); - lp_build_rgba8_to_f32_soa(bld->builder, bld->texel_type, packed, unswizzled); - lp_build_format_swizzle_soa(bld->format_desc, - &bld->texel_bld, - unswizzled, texel_out); + if (util_format_is_rgba8_variant(bld->format_desc)) { + lp_build_format_swizzle_soa(bld->format_desc, + &bld->texel_bld, + unswizzled, texel_out); + } else { + texel_out[0] = unswizzled[0]; + texel_out[1] = unswizzled[1]; + texel_out[2] = unswizzled[2]; + texel_out[3] = unswizzled[3]; + } apply_sampler_swizzle(bld, texel_out); } @@ -2007,6 +1997,8 @@ lp_build_sample_nop(struct lp_build_sample_context *bld, * 'texel' will return a vector of four LLVMValueRefs corresponding to * R, G, B, A. * \param type vector float type to use for coords, etc. + * \param ddx partial derivatives of (s,t,r,q) with respect to x + * \param ddy partial derivatives of (s,t,r,q) with respect to y */ void lp_build_sample_soa(LLVMBuilderRef builder, @@ -2016,8 +2008,8 @@ lp_build_sample_soa(LLVMBuilderRef builder, unsigned unit, unsigned num_coords, const LLVMValueRef *coords, - const LLVMValueRef *ddx, - const LLVMValueRef *ddy, + const LLVMValueRef ddx[4], + const LLVMValueRef ddy[4], LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ LLVMValueRef texel_out[4]) @@ -2079,7 +2071,8 @@ lp_build_sample_soa(LLVMBuilderRef builder, /* For debug: no-op texture sampling */ lp_build_sample_nop(&bld, texel_out); } - else if (util_format_is_rgba8_variant(bld.format_desc) && + else if (util_format_fits_8unorm(bld.format_desc) && + bld.format_desc->nr_channels > 1 && static_state->target == PIPE_TEXTURE_2D && static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR && static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR && diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c index 3c8a7bc09e..20cf96ca66 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c @@ -110,7 +110,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld, /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing * using shuffles here actually causes worst results. More investigation is * needed. */ - if (n <= 4) { + if (type.width >= 16) { /* * Shuffle. */ @@ -132,7 +132,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld, * YY00 YY00 .... YY00 * YYYY YYYY .... YYYY <= output */ - struct lp_type type4 = type; + struct lp_type type4; const char shifts[4][2] = { { 1, 2}, {-1, 2}, @@ -147,6 +147,13 @@ lp_build_broadcast_aos(struct lp_build_context *bld, a = LLVMBuildAnd(bld->builder, a, lp_build_const_mask_aos(type, cond), ""); + /* + * Build a type where each element is an integer that cover the four + * channels. + */ + + type4 = type; + type4.floating = FALSE; type4.width *= 4; type4.length /= 4; @@ -176,80 +183,170 @@ lp_build_broadcast_aos(struct lp_build_context *bld, LLVMValueRef -lp_build_swizzle1_aos(struct lp_build_context *bld, - LLVMValueRef a, - const unsigned char swizzle[4]) +lp_build_swizzle_aos(struct lp_build_context *bld, + LLVMValueRef a, + const unsigned char swizzles[4]) { - const unsigned n = bld->type.length; + const struct lp_type type = bld->type; + const unsigned n = type.length; unsigned i, j; - if(a == bld->undef || a == bld->zero || a == bld->one) + if (swizzles[0] == PIPE_SWIZZLE_RED && + swizzles[1] == PIPE_SWIZZLE_GREEN && + swizzles[2] == PIPE_SWIZZLE_BLUE && + swizzles[3] == PIPE_SWIZZLE_ALPHA) { return a; + } - if(swizzle[0] == swizzle[1] && swizzle[1] == swizzle[2] && swizzle[2] == swizzle[3]) - return lp_build_broadcast_aos(bld, a, swizzle[0]); + if (swizzles[0] == swizzles[1] && + swizzles[1] == swizzles[2] && + swizzles[2] == swizzles[3]) { + switch (swizzles[0]) { + case PIPE_SWIZZLE_RED: + case PIPE_SWIZZLE_GREEN: + case PIPE_SWIZZLE_BLUE: + case PIPE_SWIZZLE_ALPHA: + return lp_build_broadcast_aos(bld, a, swizzles[0]); + case PIPE_SWIZZLE_ZERO: + return bld->zero; + case PIPE_SWIZZLE_ONE: + return bld->one; + default: + assert(0); + return bld->undef; + } + } - { + if (type.width >= 16) { /* * Shuffle. */ - LLVMTypeRef elem_type = LLVMInt32Type(); + LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(type)); + LLVMTypeRef i32t = LLVMInt32Type(); LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; + LLVMValueRef aux[LP_MAX_VECTOR_LENGTH]; + + memset(aux, 0, sizeof aux); + + for(j = 0; j < n; j += 4) { + for(i = 0; i < 4; ++i) { + unsigned shuffle; + switch (swizzles[i]) { + default: + assert(0); + /* fall through */ + case PIPE_SWIZZLE_RED: + case PIPE_SWIZZLE_GREEN: + case PIPE_SWIZZLE_BLUE: + case PIPE_SWIZZLE_ALPHA: + shuffle = j + swizzles[i]; + break; + case PIPE_SWIZZLE_ZERO: + shuffle = type.length + 0; + if (!aux[0]) { + aux[0] = lp_build_const_elem(type, 0.0); + } + break; + case PIPE_SWIZZLE_ONE: + shuffle = type.length + 1; + if (!aux[1]) { + aux[1] = lp_build_const_elem(type, 1.0); + } + break; + } + shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0); + } + } - for(j = 0; j < n; j += 4) - for(i = 0; i < 4; ++i) - shuffles[j + i] = LLVMConstInt(elem_type, j + swizzle[i], 0); - - return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), ""); - } -} - + for (i = 0; i < n; ++i) { + if (!aux[i]) { + aux[i] = undef; + } + } -LLVMValueRef -lp_build_swizzle2_aos(struct lp_build_context *bld, - LLVMValueRef a, - LLVMValueRef b, - const unsigned char swizzle[4]) -{ - const unsigned n = bld->type.length; - unsigned i, j; + return LLVMBuildShuffleVector(bld->builder, a, + LLVMConstVector(aux, n), + LLVMConstVector(shuffles, n), ""); + } else { + /* + * Bit mask and shifts. + * + * For example, this will convert BGRA to RGBA by doing + * + * rgba = (bgra & 0x00ff0000) >> 16 + * | (bgra & 0xff00ff00) + * | (bgra & 0x000000ff) << 16 + * + * This is necessary not only for faster cause, but because X86 backend + * will refuse shuffles of <4 x i8> vectors + */ + LLVMValueRef res; + struct lp_type type4; + boolean cond[4]; + unsigned chan; + int shift; - if(swizzle[0] < 4 && swizzle[1] < 4 && swizzle[2] < 4 && swizzle[3] < 4) - return lp_build_swizzle1_aos(bld, a, swizzle); + /* + * Start with a mixture of 1 and 0. + */ + for (chan = 0; chan < 4; ++chan) { + cond[chan] = swizzles[chan] == PIPE_SWIZZLE_ONE ? TRUE : FALSE; + } + res = lp_build_select_aos(bld, bld->one, bld->zero, cond); - if(a == b) { - unsigned char swizzle1[4]; - swizzle1[0] = swizzle[0] % 4; - swizzle1[1] = swizzle[1] % 4; - swizzle1[2] = swizzle[2] % 4; - swizzle1[3] = swizzle[3] % 4; - return lp_build_swizzle1_aos(bld, a, swizzle1); - } + /* + * Build a type where each element is an integer that cover the four + * channels. + */ + type4 = type; + type4.floating = FALSE; + type4.width *= 4; + type4.length /= 4; - if(swizzle[0] % 4 == 0 && - swizzle[1] % 4 == 1 && - swizzle[2] % 4 == 2 && - swizzle[3] % 4 == 3) { - boolean cond[4]; - cond[0] = swizzle[0] / 4; - cond[1] = swizzle[1] / 4; - cond[2] = swizzle[2] / 4; - cond[3] = swizzle[3] / 4; - return lp_build_select_aos(bld, a, b, cond); - } + a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type4), ""); + res = LLVMBuildBitCast(bld->builder, res, lp_build_vec_type(type4), ""); - { /* - * Shuffle. + * Mask and shift the channels, trying to group as many channels in the + * same shift as possible */ - LLVMTypeRef elem_type = LLVMInt32Type(); - LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; - - for(j = 0; j < n; j += 4) - for(i = 0; i < 4; ++i) - shuffles[j + i] = LLVMConstInt(elem_type, j + (swizzle[i] % 4) + (swizzle[i] / 4 * n), 0); + for (shift = -3; shift <= 3; ++shift) { + unsigned long long mask = 0; + + assert(type4.width <= sizeof(mask)*8); + + for (chan = 0; chan < 4; ++chan) { + /* FIXME: big endian */ + if (swizzles[chan] < 4 && + chan - swizzles[chan] == shift) { + mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width); + } + } + + if (mask) { + LLVMValueRef masked; + LLVMValueRef shifted; + + if (0) + debug_printf("shift = %i, mask = 0x%08llx\n", shift, mask); + + masked = LLVMBuildAnd(bld->builder, a, + lp_build_const_int_vec(type4, mask), ""); + if (shift > 0) { + shifted = LLVMBuildShl(bld->builder, masked, + lp_build_const_int_vec(type4, shift*type.width), ""); + } else if (shift < 0) { + shifted = LLVMBuildLShr(bld->builder, masked, + lp_build_const_int_vec(type4, -shift*type.width), ""); + } else { + shifted = masked; + } + + res = LLVMBuildOr(bld->builder, res, shifted, ""); + } + } - return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); + return LLVMBuildBitCast(bld->builder, res, lp_build_vec_type(type), ""); } } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h index 4f4fa777c9..315e1bcb54 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h @@ -68,24 +68,12 @@ lp_build_broadcast_aos(struct lp_build_context *bld, /** * Swizzle a vector consisting of an array of XYZW structs. * - * @param swizzle is the in [0,4[ range. + * @param swizzles is the in [0,4[ range. */ LLVMValueRef -lp_build_swizzle1_aos(struct lp_build_context *bld, - LLVMValueRef a, - const unsigned char swizzle[4]); - - -/** - * Swizzle two vector consisting of an array of XYZW structs. - * - * @param swizzle is the in [0,8[ range. Values in [4,8[ range refer to b. - */ -LLVMValueRef -lp_build_swizzle2_aos(struct lp_build_context *bld, - LLVMValueRef a, - LLVMValueRef b, - const unsigned char swizzle[4]); +lp_build_swizzle_aos(struct lp_build_context *bld, + LLVMValueRef a, + const unsigned char swizzles[4]); LLVMValueRef diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index dec7556138..21236839fb 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -49,6 +49,7 @@ #include "lp_bld_type.h" #include "lp_bld_const.h" #include "lp_bld_arit.h" +#include "lp_bld_gather.h" #include "lp_bld_logic.h" #include "lp_bld_swizzle.h" #include "lp_bld_flow.h" @@ -132,10 +133,14 @@ struct lp_build_tgsi_soa_context LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS]; LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS]; - /* we allocate an array of temps if we have indirect - * addressing and then the temps above is unused */ + /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is + * set in the indirect_files field. + * The temps[] array above is unused then. + */ LLVMValueRef temps_array; - boolean has_indirect_addressing; + + /** bitmask indicating which register files are accessed indirectly */ + unsigned indirect_files; struct lp_build_mask_context *mask; struct lp_exec_mask exec_mask; @@ -404,25 +409,92 @@ static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc) lp_exec_mask_update(mask); } + +/** + * Return pointer to a temporary register channel (src or dest). + * Note that indirect addressing cannot be handled here. + * \param index which temporary register + * \param chan which channel of the temp register. + */ static LLVMValueRef get_temp_ptr(struct lp_build_tgsi_soa_context *bld, unsigned index, - unsigned chan, - boolean is_indirect, - LLVMValueRef addr) + unsigned chan) { assert(chan < 4); - if (!bld->has_indirect_addressing) { - return bld->temps[index][chan]; - } else { - LLVMValueRef lindex = - LLVMConstInt(LLVMInt32Type(), index * 4 + chan, 0); - if (is_indirect) - lindex = lp_build_add(&bld->base, lindex, addr); + if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { + LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan); return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, ""); } + else { + return bld->temps[index][chan]; + } } + +/** + * Gather vector. + * XXX the lp_build_gather() function should be capable of doing this + * with a little work. + */ +static LLVMValueRef +build_gather(struct lp_build_tgsi_soa_context *bld, + LLVMValueRef base_ptr, + LLVMValueRef indexes) +{ + LLVMValueRef res = bld->base.undef; + unsigned i; + + /* + * Loop over elements of index_vec, load scalar value, insert it into 'res'. + */ + for (i = 0; i < bld->base.type.length; i++) { + LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder, + indexes, ii, ""); + LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr, + &index, 1, ""); + LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); + + res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, ""); + } + + return res; +} + + +/** + * Read the current value of the ADDR register, convert the floats to + * ints, multiply by four and return the vector of offsets. + * The offsets will be used to index into the constant buffer or + * temporary register file. + */ +static LLVMValueRef +get_indirect_offsets(struct lp_build_tgsi_soa_context *bld, + const struct tgsi_src_register *indirect_reg) +{ + /* always use X component of address register */ + const int x = indirect_reg->SwizzleX; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); + uint swizzle = tgsi_util_get_src_register_swizzle(indirect_reg, x); + LLVMValueRef vec4 = lp_build_const_int_vec(bld->int_bld.type, 4); + LLVMValueRef addr_vec; + + addr_vec = LLVMBuildLoad(bld->base.builder, + bld->addr[indirect_reg->Index][swizzle], + "load addr reg"); + + /* for indexing we want integers */ + addr_vec = LLVMBuildFPToSI(bld->base.builder, addr_vec, + int_vec_type, ""); + + /* addr_vec = addr_vec * 4 */ + addr_vec = lp_build_mul(&bld->base, addr_vec, vec4); + + return addr_vec; +} + + /** * Register fetch. */ @@ -430,14 +502,14 @@ static LLVMValueRef emit_fetch( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_instruction *inst, - unsigned index, + unsigned src_op, const unsigned chan_index ) { - const struct tgsi_full_src_register *reg = &inst->Src[index]; + const struct tgsi_full_src_register *reg = &inst->Src[src_op]; const unsigned swizzle = tgsi_util_get_full_src_register_swizzle(reg, chan_index); LLVMValueRef res; - LLVMValueRef addr = NULL; + LLVMValueRef addr_vec = NULL; if (swizzle > 3) { assert(0 && "invalid swizzle in emit_fetch()"); @@ -445,32 +517,33 @@ emit_fetch( } if (reg->Register.Indirect) { - LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); - unsigned swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, chan_index ); - addr = LLVMBuildLoad(bld->base.builder, - bld->addr[reg->Indirect.Index][swizzle], - ""); - /* for indexing we want integers */ - addr = LLVMBuildFPToSI(bld->base.builder, addr, - int_vec_type, ""); - addr = LLVMBuildExtractElement(bld->base.builder, - addr, LLVMConstInt(LLVMInt32Type(), 0, 0), - ""); - addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0)); + assert(bld->indirect_files); + addr_vec = get_indirect_offsets(bld, ®->Indirect); } switch (reg->Register.File) { case TGSI_FILE_CONSTANT: - { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), - reg->Register.Index*4 + swizzle, 0); + if (reg->Register.Indirect) { + LLVMValueRef index_vec; /* index into the const buffer */ + + assert(bld->indirect_files & (1 << TGSI_FILE_CONSTANT)); + + /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */ + index_vec = lp_build_const_int_vec(bld->int_bld.type, + reg->Register.Index * 4 + swizzle); + + /* index_vec = index_vec + addr_vec */ + index_vec = lp_build_add(&bld->base, index_vec, addr_vec); + + /* Gather values from the constant buffer */ + res = build_gather(bld, bld->consts_ptr, index_vec); + } + else { + LLVMValueRef index; /* index into the const buffer */ LLVMValueRef scalar, scalar_ptr; - if (reg->Register.Indirect) { - /*lp_build_printf(bld->base.builder, - "\taddr = %d\n", addr);*/ - index = lp_build_add(&bld->base, index, addr); - } + index = lp_build_const_int32(reg->Register.Index*4 + swizzle); + scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, ""); scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); @@ -490,13 +563,38 @@ emit_fetch( break; case TGSI_FILE_TEMPORARY: - { - LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, - swizzle, - reg->Register.Indirect, - addr); + if (reg->Register.Indirect) { + LLVMValueRef vec_len = + lp_build_const_int_vec(bld->int_bld.type, bld->base.type.length); + LLVMValueRef index_vec; /* index into the const buffer */ + LLVMValueRef temps_array; + LLVMTypeRef float4_ptr_type; + + assert(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)); + + /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */ + index_vec = lp_build_const_int_vec(bld->int_bld.type, + reg->Register.Index * 4 + swizzle); + + /* index_vec += addr_vec */ + index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec); + + /* index_vec *= vector_length */ + index_vec = lp_build_mul(&bld->int_bld, index_vec, vec_len); + + /* cast temps_array pointer to float* */ + float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0); + temps_array = LLVMBuildBitCast(bld->int_bld.builder, bld->temps_array, + float4_ptr_type, ""); + + /* Gather values from the temporary register array */ + res = build_gather(bld, temps_array, index_vec); + } + else { + LLVMValueRef temp_ptr; + temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle); res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); - if(!res) + if (!res) return bld->base.undef; } break; @@ -660,8 +758,12 @@ emit_store( } if (reg->Register.Indirect) { + /* XXX use get_indirect_offsets() here eventually */ LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); unsigned swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, chan_index ); + + assert(bld->indirect_files); + addr = LLVMBuildLoad(bld->base.builder, bld->addr[reg->Indirect.Index][swizzle], ""); @@ -680,14 +782,18 @@ emit_store( bld->outputs[reg->Register.Index][chan_index]); break; - case TGSI_FILE_TEMPORARY: { - LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, - chan_index, - reg->Register.Indirect, - addr); - lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr); + case TGSI_FILE_TEMPORARY: + if (reg->Register.Indirect) { + /* XXX not done yet */ + debug_printf("WARNING: LLVM scatter store of temp regs" + " not implemented\n"); + } + else { + LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, + chan_index); + lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr); + } break; - } case TGSI_FILE_ADDRESS: lp_exec_mask_store(&bld->exec_mask, pred, value, @@ -905,7 +1011,7 @@ emit_declaration( switch (decl->Declaration.File) { case TGSI_FILE_TEMPORARY: assert(idx < LP_MAX_TGSI_TEMPS); - if (bld->has_indirect_addressing) { + if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(), last*4 + 4, 0); bld->temps_array = lp_build_array_alloca(bld->base.builder, @@ -1929,8 +2035,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, bld.outputs = outputs; bld.consts_ptr = consts_ptr; bld.sampler = sampler; - bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 || - info->opcode_count[TGSI_OPCODE_ARL] > 0; + bld.indirect_files = info->indirect_files; bld.instructions = (struct tgsi_full_instruction *) MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) ); bld.max_instructions = LP_MAX_INSTRUCTIONS; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h index df77ef2155..3ffe916f8e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_type.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h @@ -316,6 +316,54 @@ LLVMTypeRef lp_build_int32_vec4_type(void); +static INLINE struct lp_type +lp_float32_vec4_type(void) +{ + struct lp_type type; + + memset(&type, 0, sizeof(type)); + type.floating = TRUE; + type.sign = TRUE; + type.norm = FALSE; + type.width = 32; + type.length = 4; + + return type; +} + + +static INLINE struct lp_type +lp_int32_vec4_type(void) +{ + struct lp_type type; + + memset(&type, 0, sizeof(type)); + type.floating = FALSE; + type.sign = TRUE; + type.norm = FALSE; + type.width = 32; + type.length = 4; + + return type; +} + + +static INLINE struct lp_type +lp_unorm8_vec4_type(void) +{ + struct lp_type type; + + memset(&type, 0, sizeof(type)); + type.floating = FALSE; + type.sign = FALSE; + type.norm = TRUE; + type.width = 8; + type.length = 4; + + return type; +} + + struct lp_type lp_uint_type(struct lp_type type); diff --git a/src/gallium/auxiliary/os/os_thread.h b/src/gallium/auxiliary/os/os_thread.h index 0238308d20..a084310d4f 100644 --- a/src/gallium/auxiliary/os/os_thread.h +++ b/src/gallium/auxiliary/os/os_thread.h @@ -45,7 +45,6 @@ #include <pthread.h> /* POSIX threads headers */ #include <stdio.h> /* for perror() */ -#define PIPE_THREAD_HAVE_CONDVAR /* pipe_thread */ @@ -168,19 +167,59 @@ typedef CRITICAL_SECTION pipe_mutex; #define pipe_mutex_unlock(mutex) \ LeaveCriticalSection(&mutex) +/* TODO: Need a macro to declare "I don't care about WinXP compatibilty" */ +#if 0 && defined (_WIN32_WINNT) && (_WIN32_WINNT >= 0x0600) +/* CONDITION_VARIABLE is only available on newer versions of Windows + * (Server 2008/Vista or later). + * http://msdn.microsoft.com/en-us/library/ms682052(VS.85).aspx + * + * pipe_condvar + */ +typedef CONDITION_VARIABLE pipe_condvar; + +#define pipe_static_condvar(cond) \ + /*static*/ pipe_condvar cond = CONDITION_VARIABLE_INIT + +#define pipe_condvar_init(cond) \ + InitializeConditionVariable(&(cond)) + +#define pipe_condvar_destroy(cond) \ + (void) cond /* nothing to do */ + +#define pipe_condvar_wait(cond, mutex) \ + SleepConditionVariableCS(&(cond), &(mutex), INFINITE) + +#define pipe_condvar_signal(cond) \ + WakeConditionVariable(&(cond)) + +#define pipe_condvar_broadcast(cond) \ + WakeAllConditionVariable(&(cond)) + +#else /* need compatibility with pre-Vista Win32 */ /* pipe_condvar (XXX FIX THIS) + * See http://www.cs.wustl.edu/~schmidt/win32-cv-1.html + * for potential pitfalls in implementation. */ -typedef unsigned pipe_condvar; +typedef DWORD pipe_condvar; + +#define pipe_static_condvar(cond) \ + /*static*/ pipe_condvar cond = 1 #define pipe_condvar_init(cond) \ - (void) cond + (void) (cond = 1) #define pipe_condvar_destroy(cond) \ (void) cond +/* Poor man's pthread_cond_wait(): + Just release the mutex and sleep for one millisecond. + The caller's while() loop does all the work. */ #define pipe_condvar_wait(cond, mutex) \ - (void) cond; (void) mutex + do { pipe_mutex_unlock(mutex); \ + Sleep(cond); \ + pipe_mutex_lock(mutex); \ + } while (0) #define pipe_condvar_signal(cond) \ (void) cond @@ -188,9 +227,12 @@ typedef unsigned pipe_condvar; #define pipe_condvar_broadcast(cond) \ (void) cond +#endif /* pre-Vista win32 */ #else +#include "os/os_time.h" + /** Dummy definitions */ typedef unsigned pipe_thread; @@ -214,7 +256,6 @@ static INLINE int pipe_thread_destroy( pipe_thread thread ) } typedef unsigned pipe_mutex; -typedef unsigned pipe_condvar; #define pipe_static_mutex(mutex) \ static pipe_mutex mutex = 0 @@ -231,17 +272,25 @@ typedef unsigned pipe_condvar; #define pipe_mutex_unlock(mutex) \ (void) mutex +typedef int64_t pipe_condvar; + #define pipe_static_condvar(condvar) \ - static unsigned condvar = 0 + static pipe_condvar condvar = 1000 #define pipe_condvar_init(condvar) \ - (void) condvar + (void) (condvar = 1000) #define pipe_condvar_destroy(condvar) \ (void) condvar +/* Poor man's pthread_cond_wait(): + Just release the mutex and sleep for one millisecond. + The caller's while() loop does all the work. */ #define pipe_condvar_wait(condvar, mutex) \ - (void) condvar + do { pipe_mutex_unlock(mutex); \ + os_time_sleep(condvar); \ + pipe_mutex_lock(mutex); \ + } while (0) #define pipe_condvar_signal(condvar) \ (void) condvar @@ -277,27 +326,7 @@ static INLINE void pipe_barrier_wait(pipe_barrier *barrier) } -#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) - -/* XXX FIX THIS */ -typedef unsigned pipe_barrier; - -static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count) -{ - /* XXX we could implement barriers with a mutex and condition var */ -} - -static INLINE void pipe_barrier_destroy(pipe_barrier *barrier) -{ -} - -static INLINE void pipe_barrier_wait(pipe_barrier *barrier) -{ - assert(0); -} - - -#else +#else /* If the OS doesn't have its own, implement barriers using a mutex and a condvar */ typedef struct { unsigned count; diff --git a/src/gallium/auxiliary/target-helpers/inline_debug_helper.h b/src/gallium/auxiliary/target-helpers/inline_debug_helper.h new file mode 100644 index 0000000000..0433da6141 --- /dev/null +++ b/src/gallium/auxiliary/target-helpers/inline_debug_helper.h @@ -0,0 +1,44 @@ + +#ifndef INLINE_DEBUG_HELPER_H +#define INLINE_DEBUG_HELPER_H + +#include "pipe/p_compiler.h" +#include "util/u_debug.h" + + +/* Helper function to wrap a screen with + * one or more debug driver: rbug, trace. + */ + +#ifdef GALLIUM_TRACE +#include "trace/tr_public.h" +#endif + +#ifdef GALLIUM_RBUG +#include "rbug/rbug_public.h" +#endif + +#ifdef GALLIUM_GALAHAD +#include "galahad/glhd_public.h" +#endif + +static INLINE struct pipe_screen * +debug_screen_wrap(struct pipe_screen *screen) +{ + +#if defined(GALLIUM_RBUG) + screen = rbug_screen_create(screen); +#endif + +#if defined(GALLIUM_TRACE) + screen = trace_screen_create(screen); +#endif + +#if defined(GALLIUM_GALAHAD) + screen = galahad_screen_create(screen); +#endif + + return screen; +} + +#endif diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h new file mode 100644 index 0000000000..036c1ee48a --- /dev/null +++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h @@ -0,0 +1,63 @@ + +#ifndef INLINE_SW_HELPER_H +#define INLINE_SW_HELPER_H + +#include "pipe/p_compiler.h" +#include "util/u_debug.h" +#include "state_tracker/sw_winsys.h" + + +/* Helper function to choose and instantiate one of the software rasterizers: + * cell, llvmpipe, softpipe. + */ + +#ifdef GALLIUM_SOFTPIPE +#include "softpipe/sp_public.h" +#endif + +#ifdef GALLIUM_LLVMPIPE +#include "llvmpipe/lp_public.h" +#endif + +#ifdef GALLIUM_CELL +#include "cell/ppu/cell_public.h" +#endif + +static INLINE struct pipe_screen * +sw_screen_create(struct sw_winsys *winsys) +{ + const char *default_driver; + const char *driver; + struct pipe_screen *screen = NULL; + +#if defined(GALLIUM_CELL) + default_driver = "cell"; +#elif defined(GALLIUM_LLVMPIPE) + default_driver = "llvmpipe"; +#elif defined(GALLIUM_SOFTPIPE) + default_driver = "softpipe"; +#else + default_driver = ""; +#endif + + driver = debug_get_option("GALLIUM_DRIVER", default_driver); + +#if defined(GALLIUM_CELL) + if (screen == NULL && strcmp(driver, "cell") == 0) + screen = cell_create_screen(winsys); +#endif + +#if defined(GALLIUM_LLVMPIPE) + if (screen == NULL && strcmp(driver, "llvmpipe") == 0) + screen = llvmpipe_create_screen(winsys); +#endif + +#if defined(GALLIUM_SOFTPIPE) + if (screen == NULL) + screen = softpipe_create_screen(winsys); +#endif + + return screen; +} + +#endif diff --git a/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h new file mode 100644 index 0000000000..0b4e740403 --- /dev/null +++ b/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h @@ -0,0 +1,34 @@ + +#ifndef INLINE_WRAPPER_SW_HELPER_H +#define INLINE_WRAPPER_SW_HELPER_H + +#include "target-helpers/inline_sw_helper.h" +#include "sw/wrapper/wrapper_sw_winsys.h" + +/** + * Try to wrap a hw screen with a software screen. + * On failure will return given screen. + */ +static INLINE struct pipe_screen * +sw_screen_wrap(struct pipe_screen *screen) +{ + struct sw_winsys *sws; + struct pipe_screen *sw_screen; + + sws = wrapper_sw_winsys_warp_pipe_screen(screen); + if (!sws) + goto err; + + sw_screen = sw_screen_create(sws); + if (sw_screen == screen) + goto err_winsys; + + return sw_screen; + +err_winsys: + sws->destroy(sws); +err: + return screen; +} + +#endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 9fcc28f4c9..f71ffb7030 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -176,7 +176,11 @@ static const char *primitive_names[] = "TRIANGLE_FAN", "QUADS", "QUAD_STRIP", - "POLYGON" + "POLYGON", + "LINES_ADJACENCY", + "LINE_STRIP_ADJACENCY", + "TRIANGLES_ADJACENCY", + "TRIANGLE_STRIP_ADJACENCY" }; static const char *fs_coord_origin_names[] = diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index ced9c94f46..90198a4f60 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -109,6 +109,19 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->input_usage_mask[ind] |= usage_mask; } } + + /* check for indirect register reads */ + if (src->Register.Indirect) { + info->indirect_files |= (1 << src->Register.File); + } + } + + /* check for indirect register writes */ + for (i = 0; i < fullinst->Instruction.NumDstRegs; i++) { + const struct tgsi_full_dst_register *dst = &fullinst->Dst[i]; + if (dst->Register.Indirect) { + info->indirect_files |= (1 << dst->Register.File); + } } info->num_instructions++; diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index e75280336f..f8aa90cf06 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -63,6 +63,12 @@ struct tgsi_shader_info boolean writes_edgeflag; /**< vertex shader outputs edgeflag */ boolean uses_kill; /**< KIL or KILP instruction used? */ + /** + * Bitmask indicating which register files are accessed with + * indirect addressing. The bits are (1 << TGSI_FILE_x), etc. + */ + unsigned indirect_files; + struct { unsigned name; unsigned data[8]; diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 55fccba4d8..b01d2ff468 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -58,7 +58,7 @@ static boolean is_digit_alpha_underscore( const char *cur ) static char uprcase( char c ) { if (c >= 'a' && c <= 'z') - return c += 'A' - 'a'; + return c + 'A' - 'a'; return c; } @@ -732,7 +732,7 @@ parse_optional_swizzle( else if (uprcase( *cur ) == 'W') swizzle[i] = TGSI_SWIZZLE_W; else { - report_error( ctx, "Expected register swizzle component `x', `y', `z', `w', `0' or `1'" ); + report_error( ctx, "Expected register swizzle component `x', `y', `z' or `w'" ); return FALSE; } cur++; diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index dfe2101c2e..0d94aaae95 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -52,9 +52,8 @@ struct blitter_context_priv { - struct blitter_context blitter; + struct blitter_context base; - struct pipe_context *pipe; /**< pipe context */ struct pipe_resource *vbuf; /**< quad */ float vertices[4][2][4]; /**< {pos, color} or {pos, texcoord} */ @@ -97,15 +96,25 @@ struct blitter_context_priv /* Rasterizer state. */ void *rs_state; - struct pipe_sampler_view *sampler_view; - /* Viewport state. */ struct pipe_viewport_state viewport; /* Clip state. */ struct pipe_clip_state clip; + + /* Destination surface dimensions. */ + unsigned dst_width; + unsigned dst_height; }; +static void blitter_draw_rectangle(struct blitter_context *blitter, + unsigned x, unsigned y, + unsigned width, unsigned height, + float depth, + enum blitter_attrib_type type, + const float attrib[4]); + + struct blitter_context *util_blitter_create(struct pipe_context *pipe) { struct blitter_context_priv *ctx; @@ -120,19 +129,20 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) if (!ctx) return NULL; - ctx->pipe = pipe; + ctx->base.pipe = pipe; + ctx->base.draw_rectangle = blitter_draw_rectangle; /* init state objects for them to be considered invalid */ - ctx->blitter.saved_blend_state = INVALID_PTR; - ctx->blitter.saved_dsa_state = INVALID_PTR; - ctx->blitter.saved_rs_state = INVALID_PTR; - ctx->blitter.saved_fs = INVALID_PTR; - ctx->blitter.saved_vs = INVALID_PTR; - ctx->blitter.saved_velem_state = INVALID_PTR; - ctx->blitter.saved_fb_state.nr_cbufs = ~0; - ctx->blitter.saved_num_sampler_views = ~0; - ctx->blitter.saved_num_sampler_states = ~0; - ctx->blitter.saved_num_vertex_buffers = ~0; + ctx->base.saved_blend_state = INVALID_PTR; + ctx->base.saved_dsa_state = INVALID_PTR; + ctx->base.saved_rs_state = INVALID_PTR; + ctx->base.saved_fs = INVALID_PTR; + ctx->base.saved_vs = INVALID_PTR; + ctx->base.saved_velem_state = INVALID_PTR; + ctx->base.saved_fb_state.nr_cbufs = ~0; + ctx->base.saved_num_sampler_views = ~0; + ctx->base.saved_num_sampler_states = ~0; + ctx->base.saved_num_vertex_buffers = ~0; /* blend state objects */ memset(&blend, 0, sizeof(blend)); @@ -219,17 +229,17 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) ctx->vertices[i][0][3] = 1; /*v.w*/ /* create the vertex buffer */ - ctx->vbuf = pipe_buffer_create(ctx->pipe->screen, + ctx->vbuf = pipe_buffer_create(ctx->base.pipe->screen, PIPE_BIND_VERTEX_BUFFER, sizeof(ctx->vertices)); - return &ctx->blitter; + return &ctx->base; } void util_blitter_destroy(struct blitter_context *blitter) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = blitter->pipe; int i; pipe->delete_blend_state(pipe, ctx->blend_write_color); @@ -260,10 +270,6 @@ void util_blitter_destroy(struct blitter_context *blitter) if (ctx->sampler_state[i]) pipe->delete_sampler_state(pipe, ctx->sampler_state[i]); - if (ctx->sampler_view) { - pipe_sampler_view_reference(&ctx->sampler_view, NULL); - } - pipe_resource_reference(&ctx->vbuf, NULL); FREE(ctx); } @@ -271,112 +277,117 @@ void util_blitter_destroy(struct blitter_context *blitter) static void blitter_check_saved_CSOs(struct blitter_context_priv *ctx) { /* make sure these CSOs have been saved */ - assert(ctx->blitter.saved_blend_state != INVALID_PTR && - ctx->blitter.saved_dsa_state != INVALID_PTR && - ctx->blitter.saved_rs_state != INVALID_PTR && - ctx->blitter.saved_fs != INVALID_PTR && - ctx->blitter.saved_vs != INVALID_PTR && - ctx->blitter.saved_velem_state != INVALID_PTR); + assert(ctx->base.saved_blend_state != INVALID_PTR && + ctx->base.saved_dsa_state != INVALID_PTR && + ctx->base.saved_rs_state != INVALID_PTR && + ctx->base.saved_fs != INVALID_PTR && + ctx->base.saved_vs != INVALID_PTR && + ctx->base.saved_velem_state != INVALID_PTR); } static void blitter_restore_CSOs(struct blitter_context_priv *ctx) { - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; unsigned i; /* restore the state objects which are always required to be saved */ - pipe->bind_blend_state(pipe, ctx->blitter.saved_blend_state); - pipe->bind_depth_stencil_alpha_state(pipe, ctx->blitter.saved_dsa_state); - pipe->bind_rasterizer_state(pipe, ctx->blitter.saved_rs_state); - pipe->bind_fs_state(pipe, ctx->blitter.saved_fs); - pipe->bind_vs_state(pipe, ctx->blitter.saved_vs); - pipe->bind_vertex_elements_state(pipe, ctx->blitter.saved_velem_state); + pipe->bind_blend_state(pipe, ctx->base.saved_blend_state); + pipe->bind_depth_stencil_alpha_state(pipe, ctx->base.saved_dsa_state); + pipe->bind_rasterizer_state(pipe, ctx->base.saved_rs_state); + pipe->bind_fs_state(pipe, ctx->base.saved_fs); + pipe->bind_vs_state(pipe, ctx->base.saved_vs); + pipe->bind_vertex_elements_state(pipe, ctx->base.saved_velem_state); - ctx->blitter.saved_blend_state = INVALID_PTR; - ctx->blitter.saved_dsa_state = INVALID_PTR; - ctx->blitter.saved_rs_state = INVALID_PTR; - ctx->blitter.saved_fs = INVALID_PTR; - ctx->blitter.saved_vs = INVALID_PTR; - ctx->blitter.saved_velem_state = INVALID_PTR; + ctx->base.saved_blend_state = INVALID_PTR; + ctx->base.saved_dsa_state = INVALID_PTR; + ctx->base.saved_rs_state = INVALID_PTR; + ctx->base.saved_fs = INVALID_PTR; + ctx->base.saved_vs = INVALID_PTR; + ctx->base.saved_velem_state = INVALID_PTR; - pipe->set_stencil_ref(pipe, &ctx->blitter.saved_stencil_ref); + pipe->set_stencil_ref(pipe, &ctx->base.saved_stencil_ref); - pipe->set_viewport_state(pipe, &ctx->blitter.saved_viewport); - pipe->set_clip_state(pipe, &ctx->blitter.saved_clip); + pipe->set_viewport_state(pipe, &ctx->base.saved_viewport); + pipe->set_clip_state(pipe, &ctx->base.saved_clip); /* restore the state objects which are required to be saved before copy/fill */ - if (ctx->blitter.saved_fb_state.nr_cbufs != ~0) { - pipe->set_framebuffer_state(pipe, &ctx->blitter.saved_fb_state); - ctx->blitter.saved_fb_state.nr_cbufs = ~0; + if (ctx->base.saved_fb_state.nr_cbufs != ~0) { + pipe->set_framebuffer_state(pipe, &ctx->base.saved_fb_state); + util_assign_framebuffer_state(&ctx->base.saved_fb_state, NULL); + ctx->base.saved_fb_state.nr_cbufs = ~0; } - if (ctx->blitter.saved_num_sampler_states != ~0) { + if (ctx->base.saved_num_sampler_states != ~0) { pipe->bind_fragment_sampler_states(pipe, - ctx->blitter.saved_num_sampler_states, - ctx->blitter.saved_sampler_states); - ctx->blitter.saved_num_sampler_states = ~0; + ctx->base.saved_num_sampler_states, + ctx->base.saved_sampler_states); + ctx->base.saved_num_sampler_states = ~0; } - if (ctx->blitter.saved_num_sampler_views != ~0) { + if (ctx->base.saved_num_sampler_views != ~0) { pipe->set_fragment_sampler_views(pipe, - ctx->blitter.saved_num_sampler_views, - ctx->blitter.saved_sampler_views); - ctx->blitter.saved_num_sampler_views = ~0; + ctx->base.saved_num_sampler_views, + ctx->base.saved_sampler_views); + + for (i = 0; i < ctx->base.saved_num_sampler_views; i++) + pipe_sampler_view_reference(&ctx->base.saved_sampler_views[i], + NULL); + + ctx->base.saved_num_sampler_views = ~0; } - if (ctx->blitter.saved_num_vertex_buffers != ~0) { + if (ctx->base.saved_num_vertex_buffers != ~0) { pipe->set_vertex_buffers(pipe, - ctx->blitter.saved_num_vertex_buffers, - ctx->blitter.saved_vertex_buffers); + ctx->base.saved_num_vertex_buffers, + ctx->base.saved_vertex_buffers); - for (i = 0; i < ctx->blitter.saved_num_vertex_buffers; i++) { - if (ctx->blitter.saved_vertex_buffers[i].buffer) { - pipe_resource_reference(&ctx->blitter.saved_vertex_buffers[i].buffer, + for (i = 0; i < ctx->base.saved_num_vertex_buffers; i++) { + if (ctx->base.saved_vertex_buffers[i].buffer) { + pipe_resource_reference(&ctx->base.saved_vertex_buffers[i].buffer, NULL); } } - ctx->blitter.saved_num_vertex_buffers = ~0; + ctx->base.saved_num_vertex_buffers = ~0; } } static void blitter_set_rectangle(struct blitter_context_priv *ctx, unsigned x1, unsigned y1, unsigned x2, unsigned y2, - unsigned width, unsigned height, float depth) { int i; /* set vertex positions */ - ctx->vertices[0][0][0] = (float)x1 / width * 2.0f - 1.0f; /*v0.x*/ - ctx->vertices[0][0][1] = (float)y1 / height * 2.0f - 1.0f; /*v0.y*/ + ctx->vertices[0][0][0] = (float)x1 / ctx->dst_width * 2.0f - 1.0f; /*v0.x*/ + ctx->vertices[0][0][1] = (float)y1 / ctx->dst_height * 2.0f - 1.0f; /*v0.y*/ - ctx->vertices[1][0][0] = (float)x2 / width * 2.0f - 1.0f; /*v1.x*/ - ctx->vertices[1][0][1] = (float)y1 / height * 2.0f - 1.0f; /*v1.y*/ + ctx->vertices[1][0][0] = (float)x2 / ctx->dst_width * 2.0f - 1.0f; /*v1.x*/ + ctx->vertices[1][0][1] = (float)y1 / ctx->dst_height * 2.0f - 1.0f; /*v1.y*/ - ctx->vertices[2][0][0] = (float)x2 / width * 2.0f - 1.0f; /*v2.x*/ - ctx->vertices[2][0][1] = (float)y2 / height * 2.0f - 1.0f; /*v2.y*/ + ctx->vertices[2][0][0] = (float)x2 / ctx->dst_width * 2.0f - 1.0f; /*v2.x*/ + ctx->vertices[2][0][1] = (float)y2 / ctx->dst_height * 2.0f - 1.0f; /*v2.y*/ - ctx->vertices[3][0][0] = (float)x1 / width * 2.0f - 1.0f; /*v3.x*/ - ctx->vertices[3][0][1] = (float)y2 / height * 2.0f - 1.0f; /*v3.y*/ + ctx->vertices[3][0][0] = (float)x1 / ctx->dst_width * 2.0f - 1.0f; /*v3.x*/ + ctx->vertices[3][0][1] = (float)y2 / ctx->dst_height * 2.0f - 1.0f; /*v3.y*/ for (i = 0; i < 4; i++) ctx->vertices[i][0][2] = depth; /*z*/ /* viewport */ - ctx->viewport.scale[0] = 0.5f * width; - ctx->viewport.scale[1] = 0.5f * height; + ctx->viewport.scale[0] = 0.5f * ctx->dst_width; + ctx->viewport.scale[1] = 0.5f * ctx->dst_height; ctx->viewport.scale[2] = 1.0f; ctx->viewport.scale[3] = 1.0f; - ctx->viewport.translate[0] = 0.5f * width; - ctx->viewport.translate[1] = 0.5f * height; + ctx->viewport.translate[0] = 0.5f * ctx->dst_width; + ctx->viewport.translate[1] = 0.5f * ctx->dst_height; ctx->viewport.translate[2] = 0.0f; ctx->viewport.translate[3] = 0.0f; - ctx->pipe->set_viewport_state(ctx->pipe, &ctx->viewport); + ctx->base.pipe->set_viewport_state(ctx->base.pipe, &ctx->viewport); /* clip */ - ctx->pipe->set_clip_state(ctx->pipe, &ctx->clip); + ctx->base.pipe->set_clip_state(ctx->base.pipe, &ctx->clip); } static void blitter_set_clear_color(struct blitter_context_priv *ctx, @@ -401,29 +412,45 @@ static void blitter_set_clear_color(struct blitter_context_priv *ctx, } } +static void get_normalized_texcoords(struct pipe_resource *src, + struct pipe_subresource subsrc, + unsigned x1, unsigned y1, + unsigned x2, unsigned y2, + float out[4]) +{ + out[0] = x1 / (float)u_minify(src->width0, subsrc.level); + out[1] = y1 / (float)u_minify(src->height0, subsrc.level); + out[2] = x2 / (float)u_minify(src->width0, subsrc.level); + out[3] = y2 / (float)u_minify(src->height0, subsrc.level); +} + +static void set_texcoords_in_vertices(const float coord[4], + float *out, unsigned stride) +{ + out[0] = coord[0]; /*t0.s*/ + out[1] = coord[1]; /*t0.t*/ + out += stride; + out[0] = coord[2]; /*t1.s*/ + out[1] = coord[1]; /*t1.t*/ + out += stride; + out[0] = coord[2]; /*t2.s*/ + out[1] = coord[3]; /*t2.t*/ + out += stride; + out[0] = coord[0]; /*t3.s*/ + out[1] = coord[3]; /*t3.t*/ +} + static void blitter_set_texcoords_2d(struct blitter_context_priv *ctx, struct pipe_resource *src, struct pipe_subresource subsrc, unsigned x1, unsigned y1, unsigned x2, unsigned y2) { - int i; - float s1 = x1 / (float)u_minify(src->width0, subsrc.level); - float t1 = y1 / (float)u_minify(src->height0, subsrc.level); - float s2 = x2 / (float)u_minify(src->width0, subsrc.level); - float t2 = y2 / (float)u_minify(src->height0, subsrc.level); - - ctx->vertices[0][1][0] = s1; /*t0.s*/ - ctx->vertices[0][1][1] = t1; /*t0.t*/ - - ctx->vertices[1][1][0] = s2; /*t1.s*/ - ctx->vertices[1][1][1] = t1; /*t1.t*/ - - ctx->vertices[2][1][0] = s2; /*t2.s*/ - ctx->vertices[2][1][1] = t2; /*t2.t*/ + unsigned i; + float coord[4]; - ctx->vertices[3][1][0] = s1; /*t3.s*/ - ctx->vertices[3][1][1] = t2; /*t3.t*/ + get_normalized_texcoords(src, subsrc, x1, y1, x2, y2, coord); + set_texcoords_in_vertices(coord, &ctx->vertices[0][1][0], 8); for (i = 0; i < 4; i++) { ctx->vertices[i][1][2] = 0; /*r*/ @@ -454,20 +481,11 @@ static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx, unsigned x2, unsigned y2) { int i; - float s1 = x1 / (float)u_minify(src->width0, subsrc.level); - float t1 = y1 / (float)u_minify(src->height0, subsrc.level); - float s2 = x2 / (float)u_minify(src->width0, subsrc.level); - float t2 = y2 / (float)u_minify(src->height0, subsrc.level); + float coord[4]; float st[4][2]; - st[0][0] = s1; - st[0][1] = t1; - st[1][0] = s2; - st[1][1] = t1; - st[2][0] = s2; - st[2][1] = t2; - st[3][0] = s1; - st[3][1] = t2; + get_normalized_texcoords(src, subsrc, x1, y1, x2, y2, coord); + set_texcoords_in_vertices(coord, &st[0][0], 2); util_map_texcoords2d_onto_cubemap(subsrc.face, /* pointer, stride in floats */ @@ -478,9 +496,16 @@ static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx, ctx->vertices[i][1][3] = 1; /*q*/ } +static void blitter_set_dst_dimensions(struct blitter_context_priv *ctx, + unsigned width, unsigned height) +{ + ctx->dst_width = width; + ctx->dst_height = height; +} + static void blitter_draw_quad(struct blitter_context_priv *ctx) { - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; /* write vertices and draw them */ pipe_buffer_write(pipe, ctx->vbuf, @@ -495,7 +520,7 @@ static INLINE void **blitter_get_sampler_state(struct blitter_context_priv *ctx, int miplevel) { - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; struct pipe_sampler_state *sampler_state = &ctx->template_sampler_state; assert(miplevel < PIPE_MAX_TEXTURE_LEVELS); @@ -518,7 +543,7 @@ void **blitter_get_sampler_state(struct blitter_context_priv *ctx, static INLINE void *blitter_get_fs_col(struct blitter_context_priv *ctx, unsigned num_cbufs) { - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; assert(num_cbufs <= PIPE_MAX_COLOR_BUFS); @@ -531,7 +556,7 @@ void *blitter_get_fs_col(struct blitter_context_priv *ctx, unsigned num_cbufs) /** Convert PIPE_TEXTURE_x to TGSI_TEXTURE_x */ static unsigned -pipe_tex_to_tgsi_tex(unsigned pipe_tex_target) +pipe_tex_to_tgsi_tex(enum pipe_texture_target pipe_tex_target) { switch (pipe_tex_target) { case PIPE_TEXTURE_1D: @@ -553,7 +578,7 @@ static INLINE void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx, unsigned tex_target) { - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; assert(tex_target < PIPE_MAX_TEXTURE_TYPES); @@ -572,7 +597,7 @@ static INLINE void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx, unsigned tex_target) { - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; assert(tex_target < PIPE_MAX_TEXTURE_TYPES); @@ -588,6 +613,31 @@ void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx, return ctx->fs_texfetch_depth[tex_target]; } +static void blitter_draw_rectangle(struct blitter_context *blitter, + unsigned x1, unsigned y1, + unsigned x2, unsigned y2, + float depth, + enum blitter_attrib_type type, + const float attrib[4]) +{ + struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; + + switch (type) { + case UTIL_BLITTER_ATTRIB_COLOR: + blitter_set_clear_color(ctx, attrib); + break; + + case UTIL_BLITTER_ATTRIB_TEXCOORD: + set_texcoords_in_vertices(attrib, &ctx->vertices[0][1][0], 8); + break; + + default:; + } + + blitter_set_rectangle(ctx, x1, y1, x2, y2, depth); + blitter_draw_quad(ctx); +} + void util_blitter_clear(struct blitter_context *blitter, unsigned width, unsigned height, unsigned num_cbufs, @@ -596,7 +646,7 @@ void util_blitter_clear(struct blitter_context *blitter, double depth, unsigned stencil) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; struct pipe_stencil_ref sr = { { 0 } }; assert(num_cbufs <= PIPE_MAX_COLOR_BUFS); @@ -630,9 +680,9 @@ void util_blitter_clear(struct blitter_context *blitter, pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, num_cbufs)); pipe->bind_vs_state(pipe, ctx->vs_col); - blitter_set_clear_color(ctx, rgba); - blitter_set_rectangle(ctx, 0, 0, width, height, width, height, depth); - blitter_draw_quad(ctx); + blitter_set_dst_dimensions(ctx, width, height); + blitter->draw_rectangle(blitter, 0, 0, width, height, depth, + UTIL_BLITTER_ATTRIB_COLOR, rgba); blitter_restore_CSOs(ctx); } @@ -654,7 +704,7 @@ void util_blitter_copy_region(struct blitter_context *blitter, boolean ignore_stencil) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; struct pipe_screen *screen = pipe->screen; struct pipe_surface *dstsurf; struct pipe_framebuffer_state fb_state; @@ -736,11 +786,6 @@ void util_blitter_copy_region(struct blitter_context *blitter, u_sampler_view_default_template(&viewTempl, src, src->format); view = pipe->create_sampler_view(pipe, src, &viewTempl); - if (ctx->sampler_view) { - pipe_sampler_view_reference(&ctx->sampler_view, NULL); - } - ctx->sampler_view = view; - /* Set rasterizer state, shaders, and textures. */ pipe->bind_rasterizer_state(pipe, ctx->rs_state); pipe->bind_vs_state(pipe, ctx->vs_tex); @@ -750,32 +795,49 @@ void util_blitter_copy_region(struct blitter_context *blitter, pipe->set_fragment_sampler_views(pipe, 1, &view); pipe->set_framebuffer_state(pipe, &fb_state); - /* Set texture coordinates. */ + blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height); + switch (src->target) { + /* Draw the quad with the draw_rectangle callback. */ case PIPE_TEXTURE_1D: case PIPE_TEXTURE_2D: - blitter_set_texcoords_2d(ctx, src, subsrc, - srcx, srcy, srcx+width, srcy+height); + { + /* Set texture coordinates. */ + float coord[4]; + get_normalized_texcoords(src, subsrc, srcx, srcy, + srcx+width, srcy+height, coord); + + /* Draw. */ + blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height, 0, + UTIL_BLITTER_ATTRIB_TEXCOORD, coord); + } break; + + /* Draw the quad with the generic codepath. */ case PIPE_TEXTURE_3D: - blitter_set_texcoords_3d(ctx, src, subsrc, srcz, - srcx, srcy, srcx+width, srcy+height); - break; case PIPE_TEXTURE_CUBE: - blitter_set_texcoords_cube(ctx, src, subsrc, - srcx, srcy, srcx+width, srcy+height); + /* Set texture coordinates. */ + if (src->target == PIPE_TEXTURE_3D) + blitter_set_texcoords_3d(ctx, src, subsrc, srcz, + srcx, srcy, srcx+width, srcy+height); + else + blitter_set_texcoords_cube(ctx, src, subsrc, + srcx, srcy, srcx+width, srcy+height); + + /* Draw. */ + blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, 0); + blitter_draw_quad(ctx); break; + default: assert(0); return; } - blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, - dstsurf->width, dstsurf->height, 0); - blitter_draw_quad(ctx); blitter_restore_CSOs(ctx); pipe_surface_reference(&dstsurf, NULL); + pipe_sampler_view_reference(&view, NULL); } /* Clear a region of a color surface to a constant value. */ @@ -786,7 +848,7 @@ void util_blitter_clear_render_target(struct blitter_context *blitter, unsigned width, unsigned height) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; struct pipe_framebuffer_state fb_state; assert(dstsurf->texture); @@ -813,9 +875,9 @@ void util_blitter_clear_render_target(struct blitter_context *blitter, fb_state.zsbuf = 0; pipe->set_framebuffer_state(pipe, &fb_state); - blitter_set_clear_color(ctx, rgba); - blitter_set_rectangle(ctx, 0, 0, width, height, dstsurf->width, dstsurf->height, 0); - blitter_draw_quad(ctx); + blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height); + blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height, 0, + UTIL_BLITTER_ATTRIB_COLOR, rgba); blitter_restore_CSOs(ctx); } @@ -829,7 +891,7 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, unsigned width, unsigned height) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; struct pipe_framebuffer_state fb_state; struct pipe_stencil_ref sr = { { 0 } }; @@ -873,7 +935,8 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, fb_state.zsbuf = dstsurf; pipe->set_framebuffer_state(pipe, &fb_state); - blitter_set_rectangle(ctx, 0, 0, width, height, dstsurf->width, dstsurf->height, depth); - blitter_draw_quad(ctx); + blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height); + blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height, depth, + UTIL_BLITTER_ATTRIB_NONE, NULL); blitter_restore_CSOs(ctx); } diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index 22849280ab..ba3f92eca8 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -39,9 +39,48 @@ extern "C" { struct pipe_context; +enum blitter_attrib_type { + UTIL_BLITTER_ATTRIB_NONE, + UTIL_BLITTER_ATTRIB_COLOR, + UTIL_BLITTER_ATTRIB_TEXCOORD +}; + struct blitter_context { + /** + * Draw a rectangle. + * + * \param x1 An X coordinate of the top-left corner. + * \param y1 A Y coordinate of the top-left corner. + * \param x2 An X coordinate of the bottom-right corner. + * \param y2 A Y coordinate of the bottom-right corner. + * \param depth A depth which the rectangle is rendered at. + * + * \param type Semantics of the attributes "attrib". + * If type is UTIL_BLITTER_ATTRIB_NONE, ignore them. + * If type is UTIL_BLITTER_ATTRIB_COLOR, the attributes + * make up a constant RGBA color, and should go to the COLOR0 + * varying slot of a fragment shader. + * If type is UTIL_BLITTER_ATTRIB_TEXCOORD, {a1, a2} and + * {a3, a4} specify top-left and bottom-right texture + * coordinates of the rectangle, respectively, and should go + * to the GENERIC0 varying slot of a fragment shader. + * + * \param attrib See type. + * + * \note A driver may optionally override this callback to implement + * a specialized hardware path for drawing a rectangle, e.g. using + * a rectangular point sprite. + */ + void (*draw_rectangle)(struct blitter_context *blitter, + unsigned x1, unsigned y1, unsigned x2, unsigned y2, + float depth, + enum blitter_attrib_type type, + const float attrib[4]); + /* Private members, really. */ + struct pipe_context *pipe; /**< pipe context */ + void *saved_blend_state; /**< blend state */ void *saved_dsa_state; /**< depth stencil alpha state */ void *saved_velem_state; /**< vertex elements state */ @@ -73,6 +112,15 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe); */ void util_blitter_destroy(struct blitter_context *blitter); +/** + * Return the pipe context associated with a blitter context. + */ +static INLINE +struct pipe_context *util_blitter_get_pipe(struct blitter_context *blitter) +{ + return blitter->pipe; +} + /* * These CSOs must be saved before any of the following functions is called: * - blend state @@ -208,11 +256,45 @@ void util_blitter_save_vertex_shader(struct blitter_context *blitter, blitter->saved_vs = vs; } +/* XXX This should probably be moved elsewhere. */ +static INLINE +void util_assign_framebuffer_state(struct pipe_framebuffer_state *dst, + const struct pipe_framebuffer_state *src) +{ + unsigned i; + + if (src) { + /* Reference all surfaces. */ + for (i = 0; i < src->nr_cbufs; i++) { + pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]); + } + for (; i < dst->nr_cbufs; i++) { + pipe_surface_reference(&dst->cbufs[i], NULL); + } + + pipe_surface_reference(&dst->zsbuf, src->zsbuf); + + dst->nr_cbufs = src->nr_cbufs; + dst->width = src->width; + dst->height = src->height; + } else { + /* Set all surfaces to NULL. */ + for (i = 0; i < dst->nr_cbufs; i++) { + pipe_surface_reference(&dst->cbufs[i], NULL); + } + + pipe_surface_reference(&dst->zsbuf, NULL); + + dst->nr_cbufs = 0; + } +} + static INLINE void util_blitter_save_framebuffer(struct blitter_context *blitter, - struct pipe_framebuffer_state *state) + const struct pipe_framebuffer_state *state) { - blitter->saved_fb_state = *state; + blitter->saved_fb_state.nr_cbufs = 0; /* It's ~0 now, meaning it's unsaved. */ + util_assign_framebuffer_state(&blitter->saved_fb_state, state); } static INLINE @@ -247,12 +329,13 @@ util_blitter_save_fragment_sampler_views(struct blitter_context *blitter, int num_views, struct pipe_sampler_view **views) { + unsigned i; assert(num_views <= Elements(blitter->saved_sampler_views)); blitter->saved_num_sampler_views = num_views; - memcpy(blitter->saved_sampler_views, - views, - num_views * sizeof(struct pipe_sampler_view *)); + for (i = 0; i < num_views; i++) + pipe_sampler_view_reference(&blitter->saved_sampler_views[i], + views[i]); } static INLINE void diff --git a/src/gallium/auxiliary/util/u_caps.c b/src/gallium/auxiliary/util/u_caps.c index 294ee37033..94d5bd3027 100644 --- a/src/gallium/auxiliary/util/u_caps.c +++ b/src/gallium/auxiliary/util/u_caps.c @@ -186,6 +186,22 @@ static unsigned caps_opengl_2_1[] = { /* OpenGL 3.0 */ /* UTIL_CHECK_INT(MAX_RENDER_TARGETS, 8), */ +/* Shader Model 3 */ +static unsigned caps_sm3[] = { + UTIL_CHECK_INT(MAX_FS_INSTRUCTIONS, 512), + UTIL_CHECK_INT(MAX_FS_INPUTS, 10), + UTIL_CHECK_INT(MAX_FS_TEMPS, 32), + UTIL_CHECK_INT(MAX_FS_ADDRS, 1), + UTIL_CHECK_INT(MAX_FS_CONSTS, 224), + + UTIL_CHECK_INT(MAX_VS_INSTRUCTIONS, 512), + UTIL_CHECK_INT(MAX_VS_INPUTS, 16), + UTIL_CHECK_INT(MAX_VS_TEMPS, 32), + UTIL_CHECK_INT(MAX_VS_ADDRS, 2), + UTIL_CHECK_INT(MAX_VS_CONSTS, 256), + + UTIL_CHECK_TERMINATE +}; /** * Demo function which checks against theoretical caps needed for different APIs. @@ -203,6 +219,7 @@ void util_caps_demo_print(struct pipe_screen *screen) {"DX 11", caps_dx_11}, {"OpenGL 2.1", caps_opengl_2_1}, /* {"OpenGL 3.0", caps_opengl_3_0},*/ + {"SM3", caps_sm3}, {NULL, NULL} }; int i, out = 0; diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index a08241971c..23d33af4e4 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -38,7 +38,7 @@ #include "u_cpu_detect.h" #if defined(PIPE_ARCH_PPC) -#if defined(PIPE_OS_DARWIN) +#if defined(PIPE_OS_APPLE) #include <sys/sysctl.h> #else #include <signal.h> @@ -132,7 +132,7 @@ win32_sig_handler_sse(EXCEPTION_POINTERS* ep) #endif /* PIPE_ARCH_X86 */ -#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_DARWIN) +#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_APPLE) static jmp_buf __lv_powerpc_jmpbuf; static volatile sig_atomic_t __lv_powerpc_canjump = 0; @@ -153,7 +153,7 @@ sigill_handler(int sig) static void check_os_altivec_support(void) { -#if defined(PIPE_OS_DARWIN) +#if defined(PIPE_OS_APPLE) int sels[2] = {CTL_HW, HW_VECTORUNIT}; int has_vu = 0; int len = sizeof (has_vu); @@ -166,8 +166,8 @@ check_os_altivec_support(void) util_cpu_caps.has_altivec = 1; } } -#else /* !PIPE_OS_DARWIN */ - /* no Darwin, do it the brute-force way */ +#else /* !PIPE_OS_APPLE */ + /* not on Apple/Darwin, do it the brute-force way */ /* this is borrowed from the libmpeg2 library */ signal(SIGILL, sigill_handler); if (setjmp(__lv_powerpc_jmpbuf)) { @@ -184,7 +184,7 @@ check_os_altivec_support(void) signal(SIGILL, SIG_DFL); util_cpu_caps.has_altivec = 1; } -#endif /* PIPE_OS_DARWIN */ +#endif /* !PIPE_OS_APPLE */ } #endif /* PIPE_ARCH_PPC */ diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 5e373ff24c..ad162558bc 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -190,11 +190,11 @@ debug_get_flags_option(const char *name, result = dfault; else if (!util_strcmp(str, "help")) { result = dfault; - debug_printf("%s: help for %s:\n", __FUNCTION__, name); + _debug_printf("%s: help for %s:\n", __FUNCTION__, name); for (; flags->name; ++flags) namealign = MAX2(namealign, strlen(flags->name)); for (flags = orig; flags->name; ++flags) - debug_printf("| %*s [0x%0*lx]%s%s\n", namealign, flags->name, + _debug_printf("| %*s [0x%0*lx]%s%s\n", namealign, flags->name, (int)sizeof(unsigned long)*CHAR_BIT/4, flags->value, flags->desc ? " " : "", flags->desc ? flags->desc : ""); } diff --git a/src/gallium/auxiliary/util/u_double_list.h b/src/gallium/auxiliary/util/u_double_list.h index 53bb1342dd..42adb1f069 100644 --- a/src/gallium/auxiliary/util/u_double_list.h +++ b/src/gallium/auxiliary/util/u_double_list.h @@ -98,5 +98,20 @@ struct list_head #define LIST_IS_EMPTY(__list) \ ((__list)->next == (__list)) - +#ifndef container_of +#define container_of(ptr, sample, member) \ + (void *)((char *)(ptr) \ + - ((char *)&(sample)->member - (char *)(sample))) +#endif + +#define LIST_FOR_EACH_ENTRY(pos, head, member) \ + for (pos = container_of((head)->next, pos, member); \ + &pos->member != (head); \ + pos = container_of(pos->member.next, pos, member)) + +#define LIST_FOR_EACH_ENTRY_SAFE(pos, storage, head, member) \ + for (pos = container_of((head)->next, pos, member), \ + storage = container_of(pos->member.next, pos, member); \ + &pos->member != (head); \ + pos = storage, storage = container_of(storage->member.next, storage, member)) #endif /*_U_DOUBLE_LIST_H_*/ diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c index 3168a1fab4..43d09f1960 100644 --- a/src/gallium/auxiliary/util/u_format.c +++ b/src/gallium/auxiliary/util/u_format.c @@ -120,7 +120,7 @@ util_format_write_4ub(enum pipe_format format, const uint8_t *src, unsigned src_ } -static INLINE boolean +boolean util_format_fits_8unorm(const struct util_format_description *format_desc) { unsigned chan; diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index fd95bea1a7..38254b1096 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -213,6 +213,16 @@ struct util_format_description unsigned width, unsigned height); /** + * Fetch a single pixel (i, j) from a block. + * + * XXX: Only defined for a very few select formats. + */ + void + (*fetch_rgba_8unorm)(uint8_t *dst, + const uint8_t *src, + unsigned i, unsigned j); + + /** * Unpack pixel blocks to R32G32B32A32_FLOAT. * Note: strides are in bytes. * @@ -663,6 +673,9 @@ util_format_write_4ub(enum pipe_format format, * Generic format conversion; */ +boolean +util_format_fits_8unorm(const struct util_format_description *format_desc); + void util_format_translate(enum pipe_format dst_format, void *dst, unsigned dst_stride, diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py index ae9a598197..f0b407b8b8 100755 --- a/src/gallium/auxiliary/util/u_format_table.py +++ b/src/gallium/auxiliary/util/u_format_table.py @@ -132,12 +132,17 @@ def write_format_table(formats): if format.colorspace != ZS: print " &util_format_%s_unpack_rgba_8unorm," % format.short_name() print " &util_format_%s_pack_rgba_8unorm," % format.short_name() + if format.layout == 's3tc': + print " &util_format_%s_fetch_rgba_8unorm," % format.short_name() + else: + print " NULL, /* fetch_rgba_8unorm */" print " &util_format_%s_unpack_rgba_float," % format.short_name() print " &util_format_%s_pack_rgba_float," % format.short_name() print " &util_format_%s_fetch_rgba_float," % format.short_name() else: print " NULL, /* unpack_rgba_8unorm */" print " NULL, /* pack_rgba_8unorm */" + print " NULL, /* fetch_rgba_8unorm */" print " NULL, /* unpack_rgba_float */" print " NULL, /* pack_rgba_float */" print " NULL, /* fetch_rgba_float */" diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 6370e77986..fe19466436 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -567,12 +567,26 @@ util_bswap16(uint16_t n) #define MAX3( A, B, C ) MAX2( MAX2( A, B ), C ) +/** + * Align a value, only works pot alignemnts. + */ static INLINE int align(int value, int alignment) { return (value + alignment - 1) & ~(alignment - 1); } +/** + * Works like align but on npot alignments. + */ +static INLINE size_t +util_align_npot(size_t value, size_t alignment) +{ + if (value % alignment) + return value + (alignment - (value % alignment)); + return value; +} + static INLINE unsigned u_minify(unsigned value, unsigned levels) { diff --git a/src/gallium/auxiliary/util/u_mempool.c b/src/gallium/auxiliary/util/u_mempool.c new file mode 100644 index 0000000000..1f336b39a1 --- /dev/null +++ b/src/gallium/auxiliary/util/u_mempool.c @@ -0,0 +1,169 @@ +/* + * Copyright 2010 Marek Olšák <maraeo@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "util/u_mempool.h" + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_simple_list.h" + +#include <stdio.h> + +#define UTIL_MEMPOOL_MAGIC 0xcafe4321 + +/* The block is either allocated memory or free space. */ +struct util_mempool_block { + /* The header. */ + /* The first next free block. */ + struct util_mempool_block *next_free; + + intptr_t magic; + + /* Memory after the last member is dedicated to the block itself. + * The allocated size is always larger than this structure. */ +}; + +static struct util_mempool_block * +util_mempool_get_block(struct util_mempool *pool, + struct util_mempool_page *page, unsigned index) +{ + return (struct util_mempool_block*) + ((uint8_t*)page + sizeof(struct util_mempool_page) + + (pool->block_size * index)); +} + +static void util_mempool_add_new_page(struct util_mempool *pool) +{ + struct util_mempool_page *page; + struct util_mempool_block *block; + int i; + + page = MALLOC(pool->page_size); + insert_at_tail(&pool->list, page); + + /* Mark all blocks as free. */ + for (i = 0; i < pool->num_blocks-1; i++) { + block = util_mempool_get_block(pool, page, i); + block->next_free = util_mempool_get_block(pool, page, i+1); + block->magic = UTIL_MEMPOOL_MAGIC; + } + + block = util_mempool_get_block(pool, page, pool->num_blocks-1); + block->next_free = pool->first_free; + block->magic = UTIL_MEMPOOL_MAGIC; + pool->first_free = util_mempool_get_block(pool, page, 0); + pool->num_pages++; + +#if 0 + fprintf(stderr, "New page! Num of pages: %i\n", pool->num_pages); +#endif +} + +static void *util_mempool_malloc_st(struct util_mempool *pool) +{ + struct util_mempool_block *block; + + if (!pool->first_free) + util_mempool_add_new_page(pool); + + block = pool->first_free; + assert(block->magic == UTIL_MEMPOOL_MAGIC); + pool->first_free = block->next_free; + + return (uint8_t*)block + sizeof(struct util_mempool_block); +} + +static void util_mempool_free_st(struct util_mempool *pool, void *ptr) +{ + struct util_mempool_block *block = + (struct util_mempool_block*) + ((uint8_t*)ptr - sizeof(struct util_mempool_block)); + + assert(block->magic == UTIL_MEMPOOL_MAGIC); + block->next_free = pool->first_free; + pool->first_free = block; +} + +static void *util_mempool_malloc_mt(struct util_mempool *pool) +{ + void *mem; + + pipe_mutex_lock(pool->mutex); + mem = util_mempool_malloc_st(pool); + pipe_mutex_unlock(pool->mutex); + return mem; +} + +static void util_mempool_free_mt(struct util_mempool *pool, void *ptr) +{ + pipe_mutex_lock(pool->mutex); + util_mempool_free_st(pool, ptr); + pipe_mutex_unlock(pool->mutex); +} + +void util_mempool_set_thread_safety(struct util_mempool *pool, + enum util_mempool_threading threading) +{ + pool->threading = threading; + + if (threading) { + pool->malloc = util_mempool_malloc_mt; + pool->free = util_mempool_free_mt; + } else { + pool->malloc = util_mempool_malloc_st; + pool->free = util_mempool_free_st; + } +} + +void util_mempool_create(struct util_mempool *pool, + unsigned item_size, + unsigned num_blocks, + enum util_mempool_threading threading) +{ + item_size = align(item_size, sizeof(intptr_t)); + + pool->num_pages = 0; + pool->num_blocks = num_blocks; + pool->block_size = sizeof(struct util_mempool_block) + item_size; + pool->block_size = align(pool->block_size, sizeof(intptr_t)); + pool->page_size = sizeof(struct util_mempool_page) + + num_blocks * pool->block_size; + pool->first_free = NULL; + + make_empty_list(&pool->list); + + pipe_mutex_init(pool->mutex); + + util_mempool_set_thread_safety(pool, threading); +} + +void util_mempool_destroy(struct util_mempool *pool) +{ + struct util_mempool_page *page, *temp; + + foreach_s(page, temp, &pool->list) { + remove_from_list(page); + FREE(page); + } + + pipe_mutex_destroy(pool->mutex); +} diff --git a/src/gallium/auxiliary/util/u_mempool.h b/src/gallium/auxiliary/util/u_mempool.h new file mode 100644 index 0000000000..a5b5d6a9b7 --- /dev/null +++ b/src/gallium/auxiliary/util/u_mempool.h @@ -0,0 +1,87 @@ +/* + * Copyright 2010 Marek Olšák <maraeo@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +/** + * @file + * Simple memory pool for equally sized memory allocations. + * util_mempool_malloc and util_mempool_free are in O(1). + * + * Good for allocations which have very low lifetime and are allocated + * and freed very often. Use a profiler first! + * + * Candidates: get_transfer, user_buffer_create + * + * @author Marek Olšák + */ + +#ifndef U_MEMPOOL_H +#define U_MEMPOOL_H + +#include "os/os_thread.h" + +enum util_mempool_threading { + UTIL_MEMPOOL_SINGLETHREADED = FALSE, + UTIL_MEMPOOL_MULTITHREADED = TRUE +}; + +/* The page is an array of blocks (allocations). */ +struct util_mempool_page { + /* The header (linked-list pointers). */ + struct util_mempool_page *prev, *next; + + /* Memory after the last member is dedicated to the page itself. + * The allocated size is always larger than this structure. */ +}; + +struct util_mempool { + /* Public members. */ + void *(*malloc)(struct util_mempool *pool); + void (*free)(struct util_mempool *pool, void *ptr); + + /* Private members. */ + struct util_mempool_block *first_free; + + struct util_mempool_page list; + + unsigned block_size; + unsigned page_size; + unsigned num_blocks; + unsigned num_pages; + enum util_mempool_threading threading; + + pipe_mutex mutex; +}; + +void util_mempool_create(struct util_mempool *pool, + unsigned item_size, + unsigned num_blocks, + enum util_mempool_threading threading); + +void util_mempool_destroy(struct util_mempool *pool); + +void util_mempool_set_thread_safety(struct util_mempool *pool, + enum util_mempool_threading threading); + +#define util_mempool_malloc(pool) (pool)->malloc(pool) +#define util_mempool_free(pool, ptr) (pool)->free(pool, ptr) + +#endif diff --git a/src/gallium/auxiliary/util/u_network.c b/src/gallium/auxiliary/util/u_network.c index 87ee0e4768..77f2c5fc7d 100644 --- a/src/gallium/auxiliary/util/u_network.c +++ b/src/gallium/auxiliary/util/u_network.c @@ -6,7 +6,7 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) # include <winsock2.h> # include <windows.h> -#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE) +#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_CYGWIN) # include <sys/socket.h> # include <netinet/in.h> # include <unistd.h> |