diff options
author | Eric Anholt <eric@anholt.net> | 2010-07-26 17:47:59 -0700 |
---|---|---|
committer | Eric Anholt <eric@anholt.net> | 2010-07-26 17:53:27 -0700 |
commit | afe125e0a18ac3886c45c7e6b02b122fb2d327b5 (patch) | |
tree | 78621707e71154c0b388b0baacffc26432b7e992 /src/gallium | |
parent | d64343f1ae84979bd154475badf11af8a9bfc2eb (diff) | |
parent | 5403ca79b225605c79f49866a6497c97da53be3b (diff) |
Merge remote branch 'origin/master' into glsl2
This pulls in multiple i965 driver fixes which will help ensure better
testing coverage during development, and also gets past the conflicts
of the src/mesa/shader -> src/mesa/program move.
Conflicts:
src/mesa/Makefile
src/mesa/main/shaderapi.c
src/mesa/main/shaderobj.h
Diffstat (limited to 'src/gallium')
417 files changed, 16450 insertions, 9781 deletions
diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template index 1ba0724949..bff399ec64 100644 --- a/src/gallium/Makefile.template +++ b/src/gallium/Makefile.template @@ -23,6 +23,10 @@ INCLUDES = \ -I$(TOP)/src/gallium/drivers \ $(LIBRARY_INCLUDES) +ifeq ($(MESA_LLVM),1) +LIBRARY_DEFINES += $(LLVM_CFLAGS) +endif + ##### TARGETS ##### @@ -45,7 +49,7 @@ tags: # Remove .o and backup files clean: - rm -f $(OBJECTS) $(GENERATED_SOURCES) $(PROGS) lib$(LIBNAME).a depend depend.bak + rm -f $(OBJECTS) $(GENERATED_SOURCES) $(PROGS) lib$(LIBNAME).a depend depend.bak $(CLEAN_EXTRA) # Dummy target install: diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 7c8db19f5c..dcebab7c0f 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -124,6 +124,7 @@ C_SOURCES = \ util/u_linear.c \ util/u_network.c \ util/u_math.c \ + util/u_mempool.c \ util/u_mm.c \ util/u_rect.c \ util/u_ringbuffer.c \ @@ -154,6 +155,8 @@ GALLIVM_SOURCES = \ gallivm/lp_bld_flow.c \ gallivm/lp_bld_format_aos.c \ gallivm/lp_bld_format_soa.c \ + gallivm/lp_bld_format_yuv.c \ + gallivm/lp_bld_gather.c \ gallivm/lp_bld_init.c \ gallivm/lp_bld_intr.c \ gallivm/lp_bld_logic.c \ @@ -167,8 +170,10 @@ GALLIVM_SOURCES = \ gallivm/lp_bld_tgsi_soa.c \ gallivm/lp_bld_type.c \ draw/draw_llvm.c \ + draw/draw_vs_llvm.c \ draw/draw_pt_fetch_shade_pipeline_llvm.c \ - draw/draw_llvm_translate.c + draw/draw_llvm_translate.c \ + draw/draw_llvm_sample.c GALLIVM_CPP_SOURCES = \ gallivm/lp_bld_misc.cpp diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 6242ab0c59..72a16617db 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -32,8 +32,8 @@ env.CodeGenerate( env.CodeGenerate( target = 'util/u_format_table.c', - script = 'util/u_format_table.py', - source = ['util/u_format.csv'], + script = '#src/gallium/auxiliary/util/u_format_table.py', + source = ['#src/gallium/auxiliary/util/u_format.csv'], command = 'python $SCRIPT $SOURCE > $TARGET' ) @@ -45,7 +45,7 @@ env.CodeGenerate( ) env.Depends('util/u_format_table.c', [ - 'util/u_format_parse.py', + '#src/gallium/auxiliary/util/u_format_parse.py', 'util/u_format_pack.py', ]) @@ -172,6 +172,7 @@ source = [ 'util/u_keymap.c', 'util/u_network.c', 'util/u_math.c', + 'util/u_mempool.c', 'util/u_mm.c', 'util/u_rect.c', 'util/u_resource.c', @@ -203,6 +204,8 @@ if env['llvm']: 'gallivm/lp_bld_flow.c', 'gallivm/lp_bld_format_aos.c', 'gallivm/lp_bld_format_soa.c', + 'gallivm/lp_bld_format_yuv.c', + 'gallivm/lp_bld_gather.c', 'gallivm/lp_bld_intr.c', 'gallivm/lp_bld_logic.c', 'gallivm/lp_bld_init.c', @@ -218,7 +221,9 @@ if env['llvm']: 'gallivm/lp_bld_type.c', 'draw/draw_llvm.c', 'draw/draw_pt_fetch_shade_pipeline_llvm.c', - 'draw/draw_llvm_translate.c' + 'draw/draw_llvm_translate.c', + 'draw/draw_vs_llvm.c', + 'draw/draw_llvm_sample.c' ] gallium = env.ConvenienceLibrary( diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 20a8612dca..58b022d531 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -289,6 +289,9 @@ void cso_release_all( struct cso_context *ctx ) ctx->pipe->bind_fs_state( ctx->pipe, NULL ); ctx->pipe->bind_vs_state( ctx->pipe, NULL ); ctx->pipe->bind_vertex_elements_state( ctx->pipe, NULL ); + ctx->pipe->set_fragment_sampler_views(ctx->pipe, 0, NULL); + if (ctx->pipe->set_vertex_sampler_views) + ctx->pipe->set_vertex_sampler_views(ctx->pipe, 0, NULL); } for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { @@ -1029,6 +1032,7 @@ static INLINE void clip_state_cpy(struct pipe_clip_state *dst, const struct pipe_clip_state *src) { + dst->depth_clamp = src->depth_clamp; dst->nr = src->nr; if (src->nr) { memcpy(dst->ucp, src->ucp, src->nr * sizeof(src->ucp[0])); @@ -1039,6 +1043,9 @@ static INLINE int clip_state_cmp(const struct pipe_clip_state *a, const struct pipe_clip_state *b) { + if (a->depth_clamp != b->depth_clamp) { + return 1; + } if (a->nr != b->nr) { return 1; } diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index dab95e5051..c127f74188 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -40,6 +40,7 @@ #if HAVE_LLVM #include "gallivm/lp_bld_init.h" +#include "draw_llvm.h" #endif struct draw_context *draw_create( struct pipe_context *pipe ) @@ -52,6 +53,7 @@ struct draw_context *draw_create( struct pipe_context *pipe ) lp_build_init(); assert(lp_build_engine); draw->engine = lp_build_engine; + draw->llvm = draw_llvm_create(draw); #endif if (!draw_init(draw)) @@ -132,6 +134,9 @@ void draw_destroy( struct draw_context *draw ) draw_pt_destroy( draw ); draw_vs_destroy( draw ); draw_gs_destroy( draw ); +#ifdef HAVE_LLVM + draw_llvm_destroy( draw->llvm ); +#endif FREE( draw ); } @@ -211,6 +216,7 @@ void draw_set_clip_state( struct draw_context *draw, assert(clip->nr <= PIPE_MAX_CLIP_PLANES); memcpy(&draw->plane[6], clip->ucp, clip->nr * sizeof(clip->ucp[0])); draw->nr_planes = 6 + clip->nr; + draw->depth_clamp = clip->depth_clamp; } @@ -601,3 +607,54 @@ draw_set_so_state(struct draw_context *draw, state, sizeof(struct pipe_stream_output_state)); } + +void +draw_set_sampler_views(struct draw_context *draw, + struct pipe_sampler_view **views, + unsigned num) +{ + unsigned i; + + debug_assert(num <= PIPE_MAX_VERTEX_SAMPLERS); + + for (i = 0; i < num; ++i) + draw->sampler_views[i] = views[i]; + for (i = num; i < PIPE_MAX_VERTEX_SAMPLERS; ++i) + draw->sampler_views[i] = NULL; + + draw->num_sampler_views = num; +} + +void +draw_set_samplers(struct draw_context *draw, + struct pipe_sampler_state **samplers, + unsigned num) +{ + unsigned i; + + debug_assert(num <= PIPE_MAX_VERTEX_SAMPLERS); + + for (i = 0; i < num; ++i) + draw->samplers[i] = samplers[i]; + for (i = num; i < PIPE_MAX_VERTEX_SAMPLERS; ++i) + draw->samplers[i] = NULL; + + draw->num_samplers = num; +} + +void +draw_set_mapped_texture(struct draw_context *draw, + unsigned sampler_idx, + uint32_t width, uint32_t height, uint32_t depth, + uint32_t last_level, + uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], + uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], + const void *data[DRAW_MAX_TEXTURE_LEVELS]) +{ +#ifdef HAVE_LLVM + draw_llvm_set_mapped_texture(draw, + sampler_idx, + width, height, depth, last_level, + row_stride, img_stride, data); +#endif +} diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index c0122f2aca..116716af6f 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -47,11 +47,14 @@ struct draw_vertex_shader; struct draw_geometry_shader; struct tgsi_sampler; +#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */ struct draw_context *draw_create( struct pipe_context *pipe ); void draw_destroy( struct draw_context *draw ); +void draw_flush(struct draw_context *draw); + void draw_set_viewport_state( struct draw_context *draw, const struct pipe_viewport_state *viewport ); @@ -101,6 +104,23 @@ draw_texture_samplers(struct draw_context *draw, uint num_samplers, struct tgsi_sampler **samplers); +void +draw_set_sampler_views(struct draw_context *draw, + struct pipe_sampler_view **views, + unsigned num); +void +draw_set_samplers(struct draw_context *draw, + struct pipe_sampler_state **samplers, + unsigned num); + +void +draw_set_mapped_texture(struct draw_context *draw, + unsigned sampler_idx, + uint32_t width, uint32_t height, uint32_t depth, + uint32_t last_level, + uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], + uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], + const void *data[DRAW_MAX_TEXTURE_LEVELS]); /* @@ -173,7 +193,7 @@ draw_set_so_state(struct draw_context *draw, /*********************************************************************** - * draw_prim.c + * draw_pt.c */ void draw_arrays(struct draw_context *draw, unsigned prim, @@ -187,8 +207,6 @@ draw_arrays_instanced(struct draw_context *draw, unsigned startInstance, unsigned instanceCount); -void draw_flush(struct draw_context *draw); - /******************************************************************************* * Driver backend interface diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 9117c1303d..19f96c37ab 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #include "draw_llvm.h" #include "draw_context.h" @@ -15,14 +42,13 @@ #include "tgsi/tgsi_dump.h" #include "util/u_cpu_detect.h" -#include "util/u_string.h" #include "util/u_pointer.h" +#include "util/u_string.h" #include <llvm-c/Transforms/Scalar.h> #define DEBUG_STORE 0 - /* generates the draw jit function */ static void draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var); @@ -36,12 +62,19 @@ init_globals(struct draw_llvm *llvm) /* struct draw_jit_texture */ { - LLVMTypeRef elem_types[4]; + LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS]; elem_types[DRAW_JIT_TEXTURE_WIDTH] = LLVMInt32Type(); elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type(); - elem_types[DRAW_JIT_TEXTURE_STRIDE] = LLVMInt32Type(); - elem_types[DRAW_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0); + elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type(); + elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type(); + elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] = + LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS); + elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] = + LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS); + elem_types[DRAW_JIT_TEXTURE_DATA] = + LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0), + DRAW_MAX_TEXTURE_LEVELS); texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); @@ -51,9 +84,18 @@ init_globals(struct draw_llvm *llvm) LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height, llvm->target, texture_type, DRAW_JIT_TEXTURE_HEIGHT); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, stride, + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_DEPTH); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_LAST_LEVEL); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride, llvm->target, texture_type, - DRAW_JIT_TEXTURE_STRIDE); + DRAW_JIT_TEXTURE_ROW_STRIDE); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_IMG_STRIDE); LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data, llvm->target, texture_type, DRAW_JIT_TEXTURE_DATA); @@ -71,7 +113,8 @@ init_globals(struct draw_llvm *llvm) elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ - elem_types[2] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ + elem_types[2] = LLVMArrayType(texture_type, + PIPE_MAX_VERTEX_SAMPLERS); /* textures */ context_type = LLVMStructType(elem_types, Elements(elem_types), 0); @@ -81,7 +124,7 @@ init_globals(struct draw_llvm *llvm) llvm->target, context_type, 1); LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures, llvm->target, context_type, - DRAW_JIT_CONTEXT_TEXTURES_INDEX); + DRAW_JIT_CTX_TEXTURES); LP_CHECK_STRUCT_SIZE(struct draw_jit_context, llvm->target, context_type); @@ -195,9 +238,22 @@ draw_llvm_create(struct draw_context *draw) /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, * but there are more on SVN. */ /* TODO: Add more passes */ + LLVMAddCFGSimplificationPass(llvm->pass); - LLVMAddPromoteMemoryToRegisterPass(llvm->pass); - LLVMAddConstantPropagationPass(llvm->pass); + + if (HAVE_LLVM >= 0x207 && sizeof(void*) == 4) { + /* For LLVM >= 2.7 and 32-bit build, use this order of passes to + * avoid generating bad code. + * Test with piglit glsl-vs-sqrt-zero test. + */ + LLVMAddConstantPropagationPass(llvm->pass); + LLVMAddPromoteMemoryToRegisterPass(llvm->pass); + } + else { + LLVMAddPromoteMemoryToRegisterPass(llvm->pass); + LLVMAddConstantPropagationPass(llvm->pass); + } + if(util_cpu_caps.has_sse4_1) { /* FIXME: There is a bug in this pass, whereby the combination of fptosi * and sitofp (necessary for trunc/floor/ceil/round implementation) @@ -219,6 +275,9 @@ draw_llvm_create(struct draw_context *draw) LLVMDumpModule(llvm->module); } + llvm->nr_variants = 0; + make_empty_list(&llvm->vs_variants_list); + return llvm; } @@ -231,9 +290,13 @@ draw_llvm_destroy(struct draw_llvm *llvm) } struct draw_llvm_variant * -draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs) +draw_llvm_create_variant(struct draw_llvm *llvm, int num_inputs) { struct draw_llvm_variant *variant = MALLOC(sizeof(struct draw_llvm_variant)); + struct llvm_vertex_shader *shader = + llvm_vertex_shader(llvm->draw->vs.vertex_shader); + + variant->llvm = llvm; draw_llvm_make_variant_key(llvm, &variant->key); @@ -242,6 +305,12 @@ draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs) draw_llvm_generate(llvm, variant); draw_llvm_generate_elts(llvm, variant); + variant->shader = shader; + variant->list_item_global.base = variant; + variant->list_item_local.base = variant; + /*variant->no = */shader->variants_created++; + variant->list_item_global.base = variant; + return variant; } @@ -250,11 +319,13 @@ generate_vs(struct draw_llvm *llvm, LLVMBuilderRef builder, LLVMValueRef (*outputs)[NUM_CHANNELS], const LLVMValueRef (*inputs)[NUM_CHANNELS], - LLVMValueRef context_ptr) + LLVMValueRef context_ptr, + struct lp_build_sampler_soa *draw_sampler) { const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens; struct lp_type vs_type; LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr); + struct lp_build_sampler_soa *sampler = 0; memset(&vs_type, 0, sizeof vs_type); vs_type.floating = TRUE; /* floating point values */ @@ -270,6 +341,10 @@ generate_vs(struct draw_llvm *llvm, tgsi_dump(tokens, 0); } + if (llvm->draw->num_sampler_views && + llvm->draw->num_samplers) + sampler = draw_sampler; + lp_build_tgsi_soa(builder, tokens, vs_type, @@ -278,7 +353,7 @@ generate_vs(struct draw_llvm *llvm, NULL /*pos*/, inputs, outputs, - NULL/*sampler*/, + sampler, &llvm->draw->vs.vertex_shader->info); } @@ -306,7 +381,8 @@ generate_fetch(LLVMBuilderRef builder, LLVMValueRef *res, struct pipe_vertex_element *velem, LLVMValueRef vbuf, - LLVMValueRef index) + LLVMValueRef index, + LLVMValueRef instance_id) { LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0); LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, @@ -317,8 +393,15 @@ generate_fetch(LLVMBuilderRef builder, LLVMValueRef cond; LLVMValueRef stride; - cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, ""); + if (velem->instance_divisor) { + /* array index = instance_id / instance_divisor */ + index = LLVMBuildUDiv(builder, instance_id, + LLVMConstInt(LLVMInt32Type(), velem->instance_divisor, 0), + "instance_divisor"); + } + /* limit index to min(inex, vb_max_index) */ + cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, ""); index = LLVMBuildSelect(builder, cond, index, vb_max_index, ""); stride = LLVMBuildMul(builder, vb_stride, index, ""); @@ -586,13 +669,14 @@ convert_to_aos(LLVMBuilderRef builder, static void draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) { - LLVMTypeRef arg_types[7]; + LLVMTypeRef arg_types[8]; LLVMTypeRef func_type; LLVMValueRef context_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef start, end, count, stride, step, io_itr; LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; + LLVMValueRef instance_id; struct draw_context *draw = llvm->draw; unsigned i, j; struct lp_build_context bld; @@ -601,6 +685,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) const int max_vertices = 4; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; void *code; + struct lp_build_sampler_soa *sampler = 0; arg_types[0] = llvm->context_ptr_type; /* context */ arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ @@ -609,6 +694,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) arg_types[4] = LLVMInt32Type(); /* count */ arg_types[5] = LLVMInt32Type(); /* stride */ arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ + arg_types[7] = LLVMInt32Type(); /* instance_id */ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); @@ -625,6 +711,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) count = LLVMGetParam(variant->function, 4); stride = LLVMGetParam(variant->function, 5); vb_ptr = LLVMGetParam(variant->function, 6); + instance_id = LLVMGetParam(variant->function, 7); lp_build_name(context_ptr, "context"); lp_build_name(io_ptr, "io"); @@ -633,6 +720,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) lp_build_name(count, "count"); lp_build_name(stride, "stride"); lp_build_name(vb_ptr, "vb"); + lp_build_name(instance_id, "instance_id"); /* * Function body @@ -648,6 +736,10 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); + /* code generated texture sampling */ + sampler = draw_llvm_sampler_soa_create(variant->key.sampler, + context_ptr); + #if DEBUG_STORE lp_build_printf(builder, "start = %d, end = %d, step = %d\n", start, end, step); @@ -678,7 +770,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, ""); generate_fetch(builder, vbuffers_ptr, - &aos_attribs[j][i], velem, vb, true_index); + &aos_attribs[j][i], velem, vb, true_index, + instance_id); } } convert_to_soa(builder, aos_attribs, inputs, @@ -689,7 +782,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) builder, outputs, ptr_aos, - context_ptr); + context_ptr, + sampler); convert_to_aos(builder, io, outputs, draw->vs.vertex_shader->info.num_outputs, @@ -697,6 +791,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) } lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop); + sampler->destroy(sampler); + LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); @@ -730,13 +826,14 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) static void draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant) { - LLVMTypeRef arg_types[7]; + LLVMTypeRef arg_types[8]; LLVMTypeRef func_type; LLVMValueRef context_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr; LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; + LLVMValueRef instance_id; struct draw_context *draw = llvm->draw; unsigned i, j; struct lp_build_context bld; @@ -747,6 +844,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; LLVMValueRef fetch_max; void *code; + struct lp_build_sampler_soa *sampler = 0; arg_types[0] = llvm->context_ptr_type; /* context */ arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ @@ -755,14 +853,17 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian arg_types[4] = LLVMInt32Type(); /* fetch_count */ arg_types[5] = LLVMInt32Type(); /* stride */ arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ + arg_types[7] = LLVMInt32Type(); /* instance_id */ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); - variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type); + variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", + func_type); LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv); for(i = 0; i < Elements(arg_types); ++i) if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) - LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), LLVMNoAliasAttribute); + LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), + LLVMNoAliasAttribute); context_ptr = LLVMGetParam(variant->function_elts, 0); io_ptr = LLVMGetParam(variant->function_elts, 1); @@ -771,6 +872,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian fetch_count = LLVMGetParam(variant->function_elts, 4); stride = LLVMGetParam(variant->function_elts, 5); vb_ptr = LLVMGetParam(variant->function_elts, 6); + instance_id = LLVMGetParam(variant->function_elts, 7); lp_build_name(context_ptr, "context"); lp_build_name(io_ptr, "io"); @@ -779,6 +881,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian lp_build_name(fetch_count, "fetch_count"); lp_build_name(stride, "stride"); lp_build_name(vb_ptr, "vb"); + lp_build_name(instance_id, "instance_id"); /* * Function body @@ -793,6 +896,10 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); + /* code generated texture sampling */ + sampler = draw_llvm_sampler_soa_create(variant->key.sampler, + context_ptr); + fetch_max = LLVMBuildSub(builder, fetch_count, LLVMConstInt(LLVMInt32Type(), 1, 0), "fetch_max"); @@ -833,7 +940,8 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, ""); generate_fetch(builder, vbuffers_ptr, - &aos_attribs[j][i], velem, vb, true_index); + &aos_attribs[j][i], velem, vb, true_index, + instance_id); } } convert_to_soa(builder, aos_attribs, inputs, @@ -844,7 +952,8 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian builder, outputs, ptr_aos, - context_ptr); + context_ptr, + sampler); convert_to_aos(builder, io, outputs, draw->vs.vertex_shader->info.num_outputs, @@ -852,6 +961,8 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian } lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop); + sampler->destroy(sampler); + LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); @@ -885,6 +996,8 @@ void draw_llvm_make_variant_key(struct draw_llvm *llvm, struct draw_llvm_variant_key *key) { + unsigned i; + memset(key, 0, sizeof(struct draw_llvm_variant_key)); key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements; @@ -896,4 +1009,72 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, memcpy(&key->vs, &llvm->draw->vs.vertex_shader->state, sizeof(struct pipe_shader_state)); + + /* if the driver implemented the sampling hooks then + * setup our sampling state */ + if (llvm->draw->num_sampler_views && llvm->draw->num_samplers) { + for(i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; ++i) { + struct draw_vertex_shader *shader = llvm->draw->vs.vertex_shader; + if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) + lp_sampler_static_state(&key->sampler[i], + llvm->draw->sampler_views[i], + llvm->draw->samplers[i]); + } + } +} + +void +draw_llvm_set_mapped_texture(struct draw_context *draw, + unsigned sampler_idx, + uint32_t width, uint32_t height, uint32_t depth, + uint32_t last_level, + uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], + uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], + const void *data[DRAW_MAX_TEXTURE_LEVELS]) +{ + unsigned j; + struct draw_jit_texture *jit_tex; + + assert(sampler_idx < PIPE_MAX_VERTEX_SAMPLERS); + + + jit_tex = &draw->llvm->jit_context.textures[sampler_idx]; + + jit_tex->width = width; + jit_tex->height = height; + jit_tex->depth = depth; + jit_tex->last_level = last_level; + + for (j = 0; j <= last_level; j++) { + jit_tex->data[j] = data[j]; + jit_tex->row_stride[j] = row_stride[j]; + jit_tex->img_stride[j] = img_stride[j]; + } +} + +void +draw_llvm_destroy_variant(struct draw_llvm_variant *variant) +{ + struct draw_llvm *llvm = variant->llvm; + struct draw_context *draw = llvm->draw; + + if (variant->function_elts) { + if (variant->function_elts) + LLVMFreeMachineCodeForFunction(draw->engine, + variant->function_elts); + LLVMDeleteFunction(variant->function_elts); + } + + if (variant->function) { + if (variant->function) + LLVMFreeMachineCodeForFunction(draw->engine, + variant->function); + LLVMDeleteFunction(variant->function); + } + + remove_from_list(&variant->list_item_local); + variant->shader->variants_cached--; + remove_from_list(&variant->list_item_global); + llvm->nr_variants--; + FREE(variant); } diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index 58fee7f9d6..4addb47d2d 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -1,28 +1,71 @@ -#ifndef HAVE_LLVM_H -#define HAVE_LLVM_H +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef DRAW_LLVM_H +#define DRAW_LLVM_H #include "draw/draw_private.h" +#include "draw/draw_vs.h" +#include "gallivm/lp_bld_sample.h" + #include "pipe/p_context.h" +#include "util/u_simple_list.h" #include <llvm-c/Core.h> #include <llvm-c/Analysis.h> #include <llvm-c/Target.h> #include <llvm-c/ExecutionEngine.h> +#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */ + +struct draw_llvm; +struct llvm_vertex_shader; + struct draw_jit_texture { uint32_t width; uint32_t height; - uint32_t stride; - const void *data; + uint32_t depth; + uint32_t last_level; + uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS]; + uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS]; + const void *data[DRAW_MAX_TEXTURE_LEVELS]; }; enum { DRAW_JIT_TEXTURE_WIDTH = 0, DRAW_JIT_TEXTURE_HEIGHT, - DRAW_JIT_TEXTURE_STRIDE, - DRAW_JIT_TEXTURE_DATA + DRAW_JIT_TEXTURE_DEPTH, + DRAW_JIT_TEXTURE_LAST_LEVEL, + DRAW_JIT_TEXTURE_ROW_STRIDE, + DRAW_JIT_TEXTURE_IMG_STRIDE, + DRAW_JIT_TEXTURE_DATA, + DRAW_JIT_TEXTURE_NUM_FIELDS /* number of fields above */ }; enum { @@ -48,7 +91,7 @@ struct draw_jit_context const float *gs_constants; - struct draw_jit_texture textures[PIPE_MAX_SAMPLERS]; + struct draw_jit_texture textures[PIPE_MAX_VERTEX_SAMPLERS]; }; @@ -58,10 +101,10 @@ struct draw_jit_context #define draw_jit_context_gs_constants(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, 1, "gs_constants") -#define DRAW_JIT_CONTEXT_TEXTURES_INDEX 2 +#define DRAW_JIT_CTX_TEXTURES 2 #define draw_jit_context_textures(_builder, _ptr) \ - lp_build_struct_get_ptr(_builder, _ptr, DRAW_JIT_CONTEXT_TEXTURES_INDEX, "textures") + lp_build_struct_get_ptr(_builder, _ptr, DRAW_JIT_CTX_TEXTURES, "textures") @@ -92,7 +135,8 @@ typedef void unsigned start, unsigned count, unsigned stride, - struct pipe_vertex_buffer *vertex_buffers); + struct pipe_vertex_buffer *vertex_buffers, + unsigned instance_id); typedef void @@ -102,13 +146,54 @@ typedef void const unsigned *fetch_elts, unsigned fetch_count, unsigned stride, - struct pipe_vertex_buffer *vertex_buffers); + struct pipe_vertex_buffer *vertex_buffers, + unsigned instance_id); + +struct draw_llvm_variant_key +{ + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + unsigned nr_vertex_elements; + struct pipe_shader_state vs; + struct lp_sampler_static_state sampler[PIPE_MAX_VERTEX_SAMPLERS]; +}; + +struct draw_llvm_variant_list_item +{ + struct draw_llvm_variant *base; + struct draw_llvm_variant_list_item *next, *prev; +}; + +struct draw_llvm_variant +{ + struct draw_llvm_variant_key key; + LLVMValueRef function; + LLVMValueRef function_elts; + draw_jit_vert_func jit_func; + draw_jit_vert_func_elts jit_func_elts; + + struct llvm_vertex_shader *shader; + + struct draw_llvm *llvm; + struct draw_llvm_variant_list_item list_item_global; + struct draw_llvm_variant_list_item list_item_local; +}; + +struct llvm_vertex_shader { + struct draw_vertex_shader base; + + struct draw_llvm_variant_list_item variants; + unsigned variants_created; + unsigned variants_cached; +}; struct draw_llvm { struct draw_context *draw; struct draw_jit_context jit_context; + struct draw_llvm_variant_list_item vs_variants_list; + int nr_variants; + LLVMModuleRef module; LLVMExecutionEngineRef engine; LLVMModuleProviderRef provider; @@ -121,23 +206,12 @@ struct draw_llvm { LLVMTypeRef vb_ptr_type; }; -struct draw_llvm_variant_key +static INLINE struct llvm_vertex_shader * +llvm_vertex_shader(struct draw_vertex_shader *vs) { - struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; - unsigned nr_vertex_elements; - struct pipe_shader_state vs; -}; + return (struct llvm_vertex_shader *)vs; +} -struct draw_llvm_variant -{ - struct draw_llvm_variant_key key; - LLVMValueRef function; - LLVMValueRef function_elts; - draw_jit_vert_func jit_func; - draw_jit_vert_func_elts jit_func_elts; - - struct draw_llvm_variant *next; -}; struct draw_llvm * draw_llvm_create(struct draw_context *draw); @@ -146,7 +220,10 @@ void draw_llvm_destroy(struct draw_llvm *llvm); struct draw_llvm_variant * -draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs); +draw_llvm_create_variant(struct draw_llvm *llvm, int num_inputs); + +void +draw_llvm_destroy_variant(struct draw_llvm_variant *variant); void draw_llvm_make_variant_key(struct draw_llvm *llvm, @@ -156,4 +233,18 @@ LLVMValueRef draw_llvm_translate_from(LLVMBuilderRef builder, LLVMValueRef vbuffer, enum pipe_format from_format); + +struct lp_build_sampler_soa * +draw_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state, + LLVMValueRef context_ptr); + +void +draw_llvm_set_mapped_texture(struct draw_context *draw, + unsigned sampler_idx, + uint32_t width, uint32_t height, uint32_t depth, + uint32_t last_level, + uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], + uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], + const void *data[DRAW_MAX_TEXTURE_LEVELS]); + #endif diff --git a/src/gallium/auxiliary/draw/draw_llvm_sample.c b/src/gallium/auxiliary/draw/draw_llvm_sample.c new file mode 100644 index 0000000000..e9811010db --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_llvm_sample.c @@ -0,0 +1,215 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Texture sampling code generation + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_sample.h" +#include "gallivm/lp_bld_tgsi.h" + + +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "util/u_pointer.h" +#include "util/u_string.h" + +#include "draw_llvm.h" + + +/** + * This provides the bridge between the sampler state store in + * lp_jit_context and lp_jit_texture and the sampler code + * generator. It provides the texture layout information required by + * the texture sampler code generator in terms of the state stored in + * lp_jit_context and lp_jit_texture in runtime. + */ +struct draw_llvm_sampler_dynamic_state +{ + struct lp_sampler_dynamic_state base; + + const struct lp_sampler_static_state *static_state; + + LLVMValueRef context_ptr; +}; + + +/** + * This is the bridge between our sampler and the TGSI translator. + */ +struct draw_llvm_sampler_soa +{ + struct lp_build_sampler_soa base; + + struct draw_llvm_sampler_dynamic_state dynamic_state; +}; + + +/** + * Fetch the specified member of the lp_jit_texture structure. + * \param emit_load if TRUE, emit the LLVM load instruction to actually + * fetch the field's value. Otherwise, just emit the + * GEP code to address the field. + * + * @sa http://llvm.org/docs/GetElementPtr.html + */ +static LLVMValueRef +draw_llvm_texture_member(const struct lp_sampler_dynamic_state *base, + LLVMBuilderRef builder, + unsigned unit, + unsigned member_index, + const char *member_name, + boolean emit_load) +{ + struct draw_llvm_sampler_dynamic_state *state = + (struct draw_llvm_sampler_dynamic_state *)base; + LLVMValueRef indices[4]; + LLVMValueRef ptr; + LLVMValueRef res; + + debug_assert(unit < PIPE_MAX_VERTEX_SAMPLERS); + + /* context[0] */ + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + /* context[0].textures */ + indices[1] = LLVMConstInt(LLVMInt32Type(), DRAW_JIT_CTX_TEXTURES, 0); + /* context[0].textures[unit] */ + indices[2] = LLVMConstInt(LLVMInt32Type(), unit, 0); + /* context[0].textures[unit].member */ + indices[3] = LLVMConstInt(LLVMInt32Type(), member_index, 0); + + ptr = LLVMBuildGEP(builder, state->context_ptr, indices, Elements(indices), ""); + + if (emit_load) + res = LLVMBuildLoad(builder, ptr, ""); + else + res = ptr; + + lp_build_name(res, "context.texture%u.%s", unit, member_name); + + return res; +} + + +/** + * Helper macro to instantiate the functions that generate the code to + * fetch the members of lp_jit_texture to fulfill the sampler code + * generator requests. + * + * This complexity is the price we have to pay to keep the texture + * sampler code generator a reusable module without dependencies to + * llvmpipe internals. + */ +#define DRAW_LLVM_TEXTURE_MEMBER(_name, _index, _emit_load) \ + static LLVMValueRef \ + draw_llvm_texture_##_name( const struct lp_sampler_dynamic_state *base, \ + LLVMBuilderRef builder, \ + unsigned unit) \ + { \ + return draw_llvm_texture_member(base, builder, unit, _index, #_name, _emit_load ); \ + } + + +DRAW_LLVM_TEXTURE_MEMBER(width, DRAW_JIT_TEXTURE_WIDTH, TRUE) +DRAW_LLVM_TEXTURE_MEMBER(height, DRAW_JIT_TEXTURE_HEIGHT, TRUE) +DRAW_LLVM_TEXTURE_MEMBER(depth, DRAW_JIT_TEXTURE_DEPTH, TRUE) +DRAW_LLVM_TEXTURE_MEMBER(last_level, DRAW_JIT_TEXTURE_LAST_LEVEL, TRUE) +DRAW_LLVM_TEXTURE_MEMBER(row_stride, DRAW_JIT_TEXTURE_ROW_STRIDE, FALSE) +DRAW_LLVM_TEXTURE_MEMBER(img_stride, DRAW_JIT_TEXTURE_IMG_STRIDE, FALSE) +DRAW_LLVM_TEXTURE_MEMBER(data_ptr, DRAW_JIT_TEXTURE_DATA, FALSE) + + +static void +draw_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) +{ + FREE(sampler); +} + + +/** + * Fetch filtered values from texture. + * The 'texel' parameter returns four vectors corresponding to R, G, B, A. + */ +static void +draw_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base, + LLVMBuilderRef builder, + struct lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + const LLVMValueRef *ddx, + const LLVMValueRef *ddy, + LLVMValueRef lod_bias, /* optional */ + LLVMValueRef explicit_lod, /* optional */ + LLVMValueRef *texel) +{ + struct draw_llvm_sampler_soa *sampler = (struct draw_llvm_sampler_soa *)base; + + assert(unit < PIPE_MAX_VERTEX_SAMPLERS); + + lp_build_sample_soa(builder, + &sampler->dynamic_state.static_state[unit], + &sampler->dynamic_state.base, + type, + unit, + num_coords, coords, + ddx, ddy, + lod_bias, explicit_lod, + texel); +} + + +struct lp_build_sampler_soa * +draw_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state, + LLVMValueRef context_ptr) +{ + struct draw_llvm_sampler_soa *sampler; + + sampler = CALLOC_STRUCT(draw_llvm_sampler_soa); + if(!sampler) + return NULL; + + sampler->base.destroy = draw_llvm_sampler_soa_destroy; + sampler->base.emit_fetch_texel = draw_llvm_sampler_soa_emit_fetch_texel; + sampler->dynamic_state.base.width = draw_llvm_texture_width; + sampler->dynamic_state.base.height = draw_llvm_texture_height; + sampler->dynamic_state.base.depth = draw_llvm_texture_depth; + sampler->dynamic_state.base.last_level = draw_llvm_texture_last_level; + sampler->dynamic_state.base.row_stride = draw_llvm_texture_row_stride; + sampler->dynamic_state.base.img_stride = draw_llvm_texture_img_stride; + sampler->dynamic_state.base.data_ptr = draw_llvm_texture_data_ptr; + sampler->dynamic_state.static_state = static_state; + sampler->dynamic_state.context_ptr = context_ptr; + + return &sampler->base; +} + diff --git a/src/gallium/auxiliary/draw/draw_llvm_translate.c b/src/gallium/auxiliary/draw/draw_llvm_translate.c index d7da7ed357..6ebe1f7de4 100644 --- a/src/gallium/auxiliary/draw/draw_llvm_translate.c +++ b/src/gallium/auxiliary/draw/draw_llvm_translate.c @@ -7,6 +7,7 @@ #include "gallivm/lp_bld_struct.h" #include "gallivm/lp_bld_format.h" #include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_type.h" #include "util/u_memory.h" #include "util/u_format.h" @@ -466,6 +467,7 @@ draw_llvm_translate_from(LLVMBuilderRef builder, const struct util_format_description *format_desc; LLVMValueRef zero; int i; + struct lp_type type = lp_float32_vec4_type(); /* * The above can only cope with straight arrays: no bitfields, @@ -493,5 +495,5 @@ draw_llvm_translate_from(LLVMBuilderRef builder, format_desc = util_format_description(from_format); zero = LLVMConstNull(LLVMInt32Type()); - return lp_build_fetch_rgba_aos(builder, format_desc, vbuffer, zero, zero); + return lp_build_fetch_rgba_aos(builder, format_desc, type, vbuffer, zero, zero, zero); } diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 83556f10a8..8cd75ecf9a 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -177,15 +177,15 @@ static void do_triangle( struct draw_context *draw, ( DRAW_PIPE_RESET_STIPPLE | \ DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_1 ), \ - verts + stride * elts[i0], \ - verts + stride * elts[i1], \ - verts + stride * elts[i2]); \ + verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK)); \ do_triangle( draw, \ ( DRAW_PIPE_EDGE_FLAG_1 | \ DRAW_PIPE_EDGE_FLAG_2 ), \ - verts + stride * elts[i0], \ - verts + stride * elts[i2], \ - verts + stride * elts[i3]) + verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)) /* emit last quad vertex as last vertex in triangles */ #define QUAD_LAST_PV(i0,i1,i2,i3) \ @@ -193,15 +193,15 @@ static void do_triangle( struct draw_context *draw, ( DRAW_PIPE_RESET_STIPPLE | \ DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_2 ), \ - verts + stride * elts[i0], \ - verts + stride * elts[i1], \ - verts + stride * elts[i3]); \ + verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)); \ do_triangle( draw, \ ( DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_1 ), \ - verts + stride * elts[i1], \ - verts + stride * elts[i2], \ - verts + stride * elts[i3]) + verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)) #define TRIANGLE(flags,i0,i1,i2) \ do_triangle( draw, \ @@ -218,7 +218,7 @@ static void do_triangle( struct draw_context *draw, #define POINT(i0) \ do_point( draw, \ - verts + stride * elts[i0] ) + verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK) ) #define FUNC pipe_run #define ARGS \ @@ -260,7 +260,7 @@ void draw_pipeline_run( struct draw_context *draw, const struct draw_prim_info *prim_info) { unsigned i, start; - + draw->pipeline.verts = (char *)vert_info->verts; draw->pipeline.vertex_stride = vert_info->stride; draw->pipeline.vertex_count = vert_info->count; @@ -296,14 +296,14 @@ void draw_pipeline_run( struct draw_context *draw, DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_1 ), \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1), \ - verts + stride * (i2)); \ + verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK)); \ do_triangle( draw, \ ( DRAW_PIPE_EDGE_FLAG_1 | \ DRAW_PIPE_EDGE_FLAG_2 ), \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i2), \ - verts + stride * (i3)) + verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)) /* emit last quad vertex as last vertex in triangles */ #define QUAD_LAST_PV(i0,i1,i2,i3) \ @@ -312,31 +312,31 @@ void draw_pipeline_run( struct draw_context *draw, DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_2 ), \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1), \ - verts + stride * (i3)); \ + verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)); \ do_triangle( draw, \ ( DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_1 ), \ verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i2), \ - verts + stride * (i3)) + verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)) #define TRIANGLE(flags,i0,i1,i2) \ do_triangle( draw, \ flags, /* flags */ \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1), \ - verts + stride * (i2)) + verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK)) #define LINE(flags,i0,i1) \ do_line( draw, \ flags, \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1)) + verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK)) #define POINT(i0) \ do_point( draw, \ - verts + stride * i0 ) + verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK) ) #define FUNC pipe_run_linear #define ARGS \ diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index debd17fd74..c0135f5bb7 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -425,7 +425,8 @@ aaline_create_texture(struct aaline_stage *aaline) /* Fill in mipmap images. * Basically each level is solid opaque, except for the outermost - * texels which are zero. Special case the 1x1 and 2x2 levels. + * texels which are zero. Special case the 1x1 and 2x2 levels + * (though, those levels shouldn't be used - see the max_lod setting). */ for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) { struct pipe_transfer *transfer; @@ -497,7 +498,8 @@ aaline_create_sampler(struct aaline_stage *aaline) sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; sampler.normalized_coords = 1; sampler.min_lod = 0.0f; - sampler.max_lod = MAX_TEXTURE_LEVEL; + /* avoid using the 1x1 and 2x2 mipmap levels */ + sampler.max_lod = MAX_TEXTURE_LEVEL - 2; aaline->sampler_cso = pipe->create_sampler_state(pipe, &sampler); if (aaline->sampler_cso == NULL) @@ -669,8 +671,8 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) assert(draw->rasterizer->line_smooth); - if (draw->rasterizer->line_width <= 3.0) - aaline->half_line_width = 1.5f; + if (draw->rasterizer->line_width <= 2.2) + aaline->half_line_width = 1.1f; else aaline->half_line_width = 0.5f * draw->rasterizer->line_width; diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index 122b1c7968..1cf6ee7a7f 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -262,6 +262,7 @@ do_clip_tri( struct draw_stage *stage, clipmask &= ~(1<<plane_idx); + assert(n < MAX_CLIPPED_VERTICES); inlist[n] = inlist[0]; /* prevent rotation of vertices */ for (i = 1; i <= n; i++) { @@ -270,11 +271,17 @@ do_clip_tri( struct draw_stage *stage, float dp = dot4( vert->clip, plane ); if (!IS_NEGATIVE(dp_prev)) { + assert(outcount < MAX_CLIPPED_VERTICES); outlist[outcount++] = vert_prev; } if (DIFFERENT_SIGNS(dp, dp_prev)) { - struct vertex_header *new_vert = clipper->stage.tmp[tmpnr++]; + struct vertex_header *new_vert; + + assert(tmpnr < MAX_CLIPPED_VERTICES+1); + new_vert = clipper->stage.tmp[tmpnr++]; + + assert(outcount < MAX_CLIPPED_VERTICES); outlist[outcount++] = new_vert; if (IS_NEGATIVE(dp)) { @@ -317,12 +324,14 @@ do_clip_tri( struct draw_stage *stage, if (clipper->flat) { if (stage->draw->rasterizer->flatshade_first) { if (inlist[0] != header->v[0]) { + assert(tmpnr < MAX_CLIPPED_VERTICES + 1); inlist[0] = dup_vert(stage, inlist[0], tmpnr++); copy_colors(stage, inlist[0], header->v[0]); } } else { if (inlist[0] != header->v[2]) { + assert(tmpnr < MAX_CLIPPED_VERTICES + 1); inlist[0] = dup_vert(stage, inlist[0], tmpnr++); copy_colors(stage, inlist[0], header->v[2]); } diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index fff960c7eb..ed9a53e154 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -363,8 +363,12 @@ generate_pstip_fs(struct pstip_stage *pstip) assert(pstip->fs->sampler_unit < PIPE_MAX_SAMPLERS); pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs); - + FREE((void *)pstip_fs.tokens); + + if (!pstip->fs->pstip_fs) + return FALSE; + return TRUE; } @@ -603,13 +607,16 @@ pstip_destroy(struct draw_stage *stage) } +/** Create a new polygon stipple drawing stage object */ static struct pstip_stage * -draw_pstip_stage(struct draw_context *draw) +draw_pstip_stage(struct draw_context *draw, struct pipe_context *pipe) { struct pstip_stage *pstip = CALLOC_STRUCT(pstip_stage); if (pstip == NULL) goto fail; + pstip->pipe = pipe; + pstip->stage.draw = draw; pstip->stage.name = "pstip"; pstip->stage.next = NULL; @@ -765,14 +772,12 @@ draw_install_pstipple_stage(struct draw_context *draw, /* * Create / install pgon stipple drawing / prim stage */ - pstip = draw_pstip_stage( draw ); + pstip = draw_pstip_stage( draw, pipe ); if (pstip == NULL) goto fail; draw->pipeline.pstipple = &pstip->stage; - pstip->pipe = pipe; - /* create special texture, sampler state */ if (!pstip_create_texture(pstip)) goto fail; diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c index 3e6e538995..ee2945c7c9 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -226,6 +226,7 @@ static void widepoint_first_point( struct draw_stage *stage, if (rast->gl_rasterization_rules) { wide->xbias = 0.125; + wide->ybias = -0.125; } /* Disable triangle culling, stippling, unfilled mode etc. */ diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 4584033bc2..058aeedc17 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -48,6 +48,7 @@ #ifdef HAVE_LLVM #include <llvm-c/ExecutionEngine.h> +struct draw_llvm; #endif @@ -81,6 +82,9 @@ struct vertex_header { #define UNDEFINED_VERTEX_ID 0xffff +/* maximum number of shader variants we can cache */ +#define DRAW_MAX_SHADER_VARIANTS 1024 + /** * Private context for the drawing module. */ @@ -245,6 +249,7 @@ struct draw_context */ float plane[12][4]; unsigned nr_planes; + boolean depth_clamp; /* If a prim stage introduces new vertex attributes, they'll be stored here */ @@ -259,9 +264,15 @@ struct draw_context unsigned instance_id; #ifdef HAVE_LLVM + struct draw_llvm *llvm; LLVMExecutionEngineRef engine; #endif + struct pipe_sampler_view *sampler_views[PIPE_MAX_VERTEX_SAMPLERS]; + unsigned num_sampler_views; + const struct pipe_sampler_state *samplers[PIPE_MAX_VERTEX_SAMPLERS]; + unsigned num_samplers; + void *driver_private; }; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 02c97fec81..92d4113b4c 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -34,9 +34,11 @@ #include "draw/draw_gs.h" #include "draw/draw_private.h" #include "draw/draw_pt.h" +#include "draw/draw_vs.h" #include "tgsi/tgsi_dump.h" #include "util/u_math.h" #include "util/u_prim.h" +#include "util/u_format.h" DEBUG_GET_ONCE_BOOL_OPTION(draw_fse, "DRAW_FSE", FALSE) @@ -69,7 +71,6 @@ draw_pt_arrays(struct draw_context *draw, struct draw_pt_front_end *frontend = NULL; struct draw_pt_middle_end *middle = NULL; unsigned opt = 0; - unsigned out_prim = prim; /* Sanitize primitive length: */ @@ -80,18 +81,19 @@ draw_pt_arrays(struct draw_context *draw, if (count < first) return TRUE; } - if (draw->gs.geometry_shader) { - out_prim = draw->gs.geometry_shader->output_primitive; - } if (!draw->force_passthrough) { + unsigned gs_out_prim = (draw->gs.geometry_shader ? + draw->gs.geometry_shader->output_primitive : + prim); + if (!draw->render) { opt |= PT_PIPELINE; } if (draw_need_pipeline(draw, draw->rasterizer, - out_prim)) { + gs_out_prim)) { opt |= PT_PIPELINE; } @@ -102,7 +104,7 @@ draw_pt_arrays(struct draw_context *draw, opt |= PT_SHADE; } - if (draw->pt.middle.llvm && !draw->gs.geometry_shader) { + if (draw->pt.middle.llvm) { middle = draw->pt.middle.llvm; } else { if (opt == 0) @@ -122,7 +124,7 @@ draw_pt_arrays(struct draw_context *draw, frontend = draw->pt.front.varray; } - frontend->prepare( frontend, prim, out_prim, middle, opt ); + frontend->prepare( frontend, prim, middle, opt ); frontend->run(frontend, draw_pt_elt_func(draw), @@ -265,31 +267,38 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count) case PIPE_FORMAT_R32_FLOAT: { float *v = (float *) ptr; - debug_printf("%f @ %p\n", v[0], (void *) v); + debug_printf("R %f @ %p\n", v[0], (void *) v); } break; case PIPE_FORMAT_R32G32_FLOAT: { float *v = (float *) ptr; - debug_printf("%f %f @ %p\n", v[0], v[1], (void *) v); + debug_printf("RG %f %f @ %p\n", v[0], v[1], (void *) v); } break; case PIPE_FORMAT_R32G32B32_FLOAT: { float *v = (float *) ptr; - debug_printf("%f %f %f @ %p\n", v[0], v[1], v[2], (void *) v); + debug_printf("RGB %f %f %f @ %p\n", v[0], v[1], v[2], (void *) v); } break; case PIPE_FORMAT_R32G32B32A32_FLOAT: { float *v = (float *) ptr; - debug_printf("%f %f %f %f @ %p\n", v[0], v[1], v[2], v[3], + debug_printf("RGBA %f %f %f %f @ %p\n", v[0], v[1], v[2], v[3], (void *) v); } break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + { + ubyte *u = (ubyte *) ptr; + debug_printf("BGRA %d %d %d %d @ %p\n", u[0], u[1], u[2], u[3], + (void *) u); + } + break; default: - debug_printf("other format (fix me)\n"); - ; + debug_printf("other format %s (fix me)\n", + util_format_name(draw->pt.vertex_element[j].src_format)); } } } @@ -297,11 +306,8 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count) /** - * Draw vertex arrays - * This is the main entrypoint into the drawing module. - * \param prim one of PIPE_PRIM_x - * \param start index of first vertex to draw - * \param count number of vertices to draw + * Non-instanced drawing. + * \sa draw_arrays_instanced */ void draw_arrays(struct draw_context *draw, unsigned prim, @@ -310,6 +316,20 @@ draw_arrays(struct draw_context *draw, unsigned prim, draw_arrays_instanced(draw, prim, start, count, 0, 1); } + +/** + * Draw vertex arrays. + * This is the main entrypoint into the drawing module. + * If drawing an indexed primitive, the draw_set_mapped_element_buffer_range() + * function should have already been called to specify the element/index buffer + * information. + * + * \param prim one of PIPE_PRIM_x + * \param start index of first vertex to draw + * \param count number of vertices to draw + * \param startInstance number for the first primitive instance (usually 0). + * \param instanceCount number of instances to draw (1=non-instanced) + */ void draw_arrays_instanced(struct draw_context *draw, unsigned mode, @@ -329,26 +349,30 @@ draw_arrays_instanced(struct draw_context *draw, if (0) draw_print_arrays(draw, mode, start, MIN2(count, 20)); -#if 0 - { - int i; + if (0) { + unsigned int i; debug_printf("draw_arrays(mode=%u start=%u count=%u):\n", mode, start, count); tgsi_dump(draw->vs.vertex_shader->state.tokens, 0); debug_printf("Elements:\n"); for (i = 0; i < draw->pt.nr_vertex_elements; i++) { - debug_printf(" format=%s\n", + debug_printf(" %u: src_offset=%u inst_div=%u vbuf=%u format=%s\n", + i, + draw->pt.vertex_element[i].src_offset, + draw->pt.vertex_element[i].instance_divisor, + draw->pt.vertex_element[i].vertex_buffer_index, util_format_name(draw->pt.vertex_element[i].src_format)); } debug_printf("Buffers:\n"); for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { - debug_printf(" stride=%u offset=%u ptr=%p\n", + debug_printf(" %u: stride=%u maxindex=%u offset=%u ptr=%p\n", + i, draw->pt.vertex_buffer[i].stride, + draw->pt.vertex_buffer[i].max_index, draw->pt.vertex_buffer[i].buffer_offset, draw->pt.user.vbuffer[i]); } } -#endif for (instance = 0; instance < instanceCount; instance++) { draw->instance_id = instance + startInstance; diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index b6741ca83c..44356fba4c 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -62,8 +62,7 @@ struct draw_vertex_info; */ struct draw_pt_front_end { void (*prepare)( struct draw_pt_front_end *, - unsigned input_prim, - unsigned output_prim, + unsigned prim, struct draw_pt_middle_end *, unsigned opt ); @@ -87,8 +86,7 @@ struct draw_pt_front_end { */ struct draw_pt_middle_end { void (*prepare)( struct draw_pt_middle_end *, - unsigned input_prim, - unsigned output_prim, + unsigned prim, unsigned opt, unsigned *max_vertices ); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index bf799db352..ae12ee24bd 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -68,31 +68,12 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, fetch->vertex_size = vertex_size; - /* Always emit/leave space for a vertex header. - * - * It's worth considering whether the vertex headers should contain - * a pointer to the 'data', rather than having it inline. - * Something to look at after we've fully switched over to the pt - * paths. + /* Leave the clipmask/edgeflags/pad/vertex_id untouched */ - { - /* Need to set header->vertex_id = 0xffff somehow. - */ - key.element[nr].type = TRANSLATE_ELEMENT_NORMAL; - key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT; - key.element[nr].input_buffer = draw->pt.nr_vertex_buffers; - key.element[nr].input_offset = 0; - key.element[nr].instance_divisor = 0; - key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT; - key.element[nr].output_offset = dst_offset; - dst_offset += 1 * sizeof(float); - nr++; - - - /* Just leave the clip[] array untouched. - */ - dst_offset += 4 * sizeof(float); - } + dst_offset += 1 * sizeof(float); + /* Just leave the clip[] array untouched. + */ + dst_offset += 4 * sizeof(float); if (instance_id_index != ~0) { num_extra_inputs++; @@ -131,26 +112,11 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, key.nr_elements = nr; key.output_stride = vertex_size; - if (!fetch->translate || translate_key_compare(&fetch->translate->key, &key) != 0) { translate_key_sanitize(&key); fetch->translate = translate_cache_find(fetch->cache, &key); - - { - static struct vertex_header vh = { 0, - 1, - 0, - UNDEFINED_VERTEX_ID, - { .0f, .0f, .0f, .0f } }; - - fetch->translate->set_buffer(fetch->translate, - draw->pt.nr_vertex_buffers, - &vh, - 0, - ~0); - } } } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index c629d55563..5c8af17c8e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -36,6 +36,7 @@ #include "draw/draw_vbuf.h" #include "draw/draw_vertex.h" #include "draw/draw_pt.h" +#include "draw/draw_gs.h" #include "translate/translate.h" #include "translate/translate_cache.h" @@ -90,7 +91,6 @@ struct fetch_emit_middle_end { static void fetch_emit_prepare( struct draw_pt_middle_end *middle, unsigned prim, - unsigned out_prim, unsigned opt, unsigned *max_vertices ) { @@ -101,9 +101,14 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, boolean ok; struct translate_key key; + unsigned gs_out_prim = (draw->gs.geometry_shader ? + draw->gs.geometry_shader->output_primitive : + prim); + + ok = draw->render->set_primitive( draw->render, - out_prim ); + gs_out_prim ); if (!ok) { assert(0); return; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 5483a25f1d..b8270280b6 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -68,8 +68,7 @@ struct fetch_shade_emit { static void fse_prepare( struct draw_pt_middle_end *middle, - unsigned in_prim, - unsigned out_prim, + unsigned prim, unsigned opt, unsigned *max_vertices ) { @@ -80,9 +79,12 @@ static void fse_prepare( struct draw_pt_middle_end *middle, unsigned i; unsigned nr_vbs = 0; + /* Can't support geometry shader on this path. + */ + assert(!draw->gs.geometry_shader); if (!draw->render->set_primitive( draw->render, - out_prim )) { + prim )) { assert(0); return; } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 43b08a030c..121dfc414a 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -48,13 +48,11 @@ struct fetch_pipeline_middle_end { unsigned vertex_data_offset; unsigned vertex_size; unsigned input_prim; - unsigned output_prim; unsigned opt; }; static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, - unsigned in_prim, - unsigned out_prim, + unsigned prim, unsigned opt, unsigned *max_vertices ) { @@ -64,6 +62,10 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, unsigned i; unsigned instance_id_index = ~0; + unsigned gs_out_prim = (draw->gs.geometry_shader ? + draw->gs.geometry_shader->output_primitive : + prim); + /* Add one to num_outputs because the pipeline occasionally tags on * an additional texcoord, eg for AA lines. */ @@ -79,8 +81,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, } } - fpme->input_prim = in_prim; - fpme->output_prim = out_prim; + fpme->input_prim = prim; fpme->opt = opt; /* Always leave room for the vertex header whether we need it or @@ -102,13 +103,13 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, (boolean)draw->bypass_clipping, (boolean)draw->identity_viewport, (boolean)draw->rasterizer->gl_rasterization_rules, - (draw->vs.edgeflag_output ? true : false) ); + (draw->vs.edgeflag_output ? TRUE : FALSE) ); draw_pt_so_emit_prepare( fpme->so_emit ); if (!(opt & PT_PIPELINE)) { draw_pt_emit_prepare( fpme->emit, - out_prim, + gs_out_prim, max_vertices ); *max_vertices = MAX2( *max_vertices, @@ -183,7 +184,7 @@ static void draw_vertex_shader_run(struct draw_vertex_shader *vshader, output_verts->count = input_verts->count; output_verts->verts = (struct vertex_header *)MALLOC(output_verts->vertex_size * - output_verts->count); + align(output_verts->count, 4)); vshader->run_linear(vshader, (const float (*)[4])input_verts->verts->data, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index 7d2de58e73..bc074df8c2 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -49,48 +49,50 @@ struct llvm_middle_end { unsigned vertex_data_offset; unsigned vertex_size; unsigned input_prim; - unsigned output_prim; unsigned opt; struct draw_llvm *llvm; - struct draw_llvm_variant *variants; struct draw_llvm_variant *current_variant; - int nr_variants; }; static void llvm_middle_end_prepare( struct draw_pt_middle_end *middle, unsigned in_prim, - unsigned out_prim, unsigned opt, unsigned *max_vertices ) { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; struct draw_context *draw = fpme->draw; - struct draw_vertex_shader *vs = draw->vs.vertex_shader; + struct llvm_vertex_shader *shader = + llvm_vertex_shader(draw->vs.vertex_shader); struct draw_llvm_variant_key key; struct draw_llvm_variant *variant = NULL; + struct draw_llvm_variant_list_item *li; unsigned i; unsigned instance_id_index = ~0; + + unsigned out_prim = (draw->gs.geometry_shader ? + draw->gs.geometry_shader->output_primitive : + in_prim); + /* Add one to num_outputs because the pipeline occasionally tags on * an additional texcoord, eg for AA lines. */ - unsigned nr = MAX2( vs->info.num_inputs, - vs->info.num_outputs + 1 ); + unsigned nr = MAX2( shader->base.info.num_inputs, + shader->base.info.num_outputs + 1 ); /* Scan for instanceID system value. */ - for (i = 0; i < vs->info.num_inputs; i++) { - if (vs->info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) { + for (i = 0; i < shader->base.info.num_inputs; i++) { + if (shader->base.info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) { instance_id_index = i; break; } } fpme->input_prim = in_prim; - fpme->output_prim = out_prim; fpme->opt = opt; /* Always leave room for the vertex header whether we need it or @@ -107,7 +109,7 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, (boolean)draw->bypass_clipping, (boolean)(draw->identity_viewport), (boolean)draw->rasterizer->gl_rasterization_rules, - (draw->vs.edgeflag_output ? true : false) ); + (draw->vs.edgeflag_output ? TRUE : FALSE) ); draw_pt_so_emit_prepare( fpme->so_emit ); @@ -128,20 +130,41 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, draw_llvm_make_variant_key(fpme->llvm, &key); - variant = fpme->variants; - while(variant) { - if(memcmp(&variant->key, &key, sizeof key) == 0) + li = first_elem(&shader->variants); + while(!at_end(&shader->variants, li)) { + if(memcmp(&li->base->key, &key, sizeof key) == 0) { + variant = li->base; break; + } + li = next_elem(li); + } - variant = variant->next; + if (variant) { + move_to_head(&fpme->llvm->vs_variants_list, &variant->list_item_global); } + else { + unsigned i; + if (fpme->llvm->nr_variants >= DRAW_MAX_SHADER_VARIANTS) { + /* + * XXX: should we flush here ? + */ + for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 4; i++) { + struct draw_llvm_variant_list_item *item = + last_elem(&fpme->llvm->vs_variants_list); + draw_llvm_destroy_variant(item->base); + } + } + + variant = draw_llvm_create_variant(fpme->llvm, nr); - if (!variant) { - variant = draw_llvm_prepare(fpme->llvm, nr); - variant->next = fpme->variants; - fpme->variants = variant; - ++fpme->nr_variants; + if (variant) { + insert_at_head(&shader->variants, &variant->list_item_local); + insert_at_head(&fpme->llvm->vs_variants_list, &variant->list_item_global); + fpme->llvm->nr_variants++; + shader->variants_cached++; + } } + fpme->current_variant = variant; /*XXX we only support one constant buffer */ @@ -210,7 +233,8 @@ llvm_pipeline_generic( struct draw_pt_middle_end *middle, fetch_info->start, fetch_info->count, fpme->vertex_size, - draw->pt.vertex_buffer ); + draw->pt.vertex_buffer, + draw->instance_id); else fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context, llvm_vert_info.verts, @@ -218,7 +242,8 @@ llvm_pipeline_generic( struct draw_pt_middle_end *middle, fetch_info->elts, fetch_info->count, fpme->vertex_size, - draw->pt.vertex_buffer); + draw->pt.vertex_buffer, + draw->instance_id); /* Finished with fetch and vs: */ @@ -356,31 +381,7 @@ static void llvm_middle_end_finish( struct draw_pt_middle_end *middle ) static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle ) { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; - struct draw_context *draw = fpme->draw; - struct draw_llvm_variant *variant = NULL; - - variant = fpme->variants; - while(variant) { - struct draw_llvm_variant *next = variant->next; - if (variant->function_elts) { - if (variant->function_elts) - LLVMFreeMachineCodeForFunction(draw->engine, - variant->function_elts); - LLVMDeleteFunction(variant->function_elts); - } - - if (variant->function) { - if (variant->function) - LLVMFreeMachineCodeForFunction(draw->engine, - variant->function); - LLVMDeleteFunction(variant->function); - } - - FREE(variant); - - variant = next; - } if (fpme->fetch) draw_pt_fetch_destroy( fpme->fetch ); @@ -393,14 +394,12 @@ static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle ) if (fpme->post_vs) draw_pt_post_vs_destroy( fpme->post_vs ); - if (fpme->llvm) - draw_llvm_destroy( fpme->llvm ); - FREE(middle); } -struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm( struct draw_context *draw ) +struct draw_pt_middle_end * +draw_pt_fetch_pipeline_or_emit_llvm(struct draw_context *draw) { struct llvm_middle_end *fpme = 0; @@ -436,13 +435,11 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm( struct draw_cont if (!fpme->so_emit) goto fail; - fpme->llvm = draw_llvm_create(draw); + fpme->llvm = draw->llvm; if (!fpme->llvm) goto fail; - fpme->variants = NULL; fpme->current_variant = NULL; - fpme->nr_variants = 0; return &fpme->base; diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index 112be50f9a..308f927b77 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -38,7 +38,14 @@ struct pt_post_vs { struct draw_vertex_info *info ); }; - +static INLINE void +initialize_vertex_header(struct vertex_header *header) +{ + header->clipmask = 0; + header->edgeflag = 1; + header->pad = 0; + header->vertex_id = UNDEFINED_VERTEX_ID; +} static INLINE float dot4(const float *a, const float *b) @@ -49,10 +56,9 @@ dot4(const float *a, const float *b) a[3]*b[3]); } - - static INLINE unsigned -compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr) +compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr, + boolean clip_depth) { unsigned mask = 0x0; unsigned i; @@ -69,8 +75,10 @@ compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr) if ( clip[0] + clip[3] < 0) mask |= (1<<1); if (-clip[1] + clip[3] < 0) mask |= (1<<2); if ( clip[1] + clip[3] < 0) mask |= (1<<3); - if ( clip[2] + clip[3] < 0) mask |= (1<<4); /* match mesa clipplane numbering - for now */ - if (-clip[2] + clip[3] < 0) mask |= (1<<5); /* match mesa clipplane numbering - for now */ + if (clip_depth) { + if ( clip[2] + clip[3] < 0) mask |= (1<<4); /* match mesa clipplane numbering - for now */ + if (-clip[2] + clip[3] < 0) mask |= (1<<5); /* match mesa clipplane numbering - for now */ + } /* Followed by any remaining ones: */ @@ -103,6 +111,7 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, for (j = 0; j < info->count; j++) { float *position = out->data[pos]; + initialize_vertex_header(out); #if 0 debug_printf("%d) io = %p, data = %p = [%f, %f, %f, %f]\n", j, out, position, position[0], position[1], position[2], position[3]); @@ -114,9 +123,11 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, out->clip[3] = position[3]; out->vertex_id = 0xffff; + /* Disable depth clipping if depth clamping is enabled. */ out->clipmask = compute_clipmask_gl(out->clip, pvs->draw->plane, - pvs->draw->nr_planes); + pvs->draw->nr_planes, + !pvs->draw->depth_clamp); clipped += out->clipmask; if (out->clipmask == 0) @@ -192,6 +203,7 @@ static boolean post_vs_viewport( struct pt_post_vs *pvs, for (j = 0; j < info->count; j++) { float *position = out->data[pos]; + initialize_vertex_header(out); /* Viewport mapping only, no cliptest/rhw divide */ position[0] = position[0] * scale[0] + trans[0]; @@ -211,7 +223,16 @@ static boolean post_vs_viewport( struct pt_post_vs *pvs, static boolean post_vs_none( struct pt_post_vs *pvs, struct draw_vertex_info *info ) { + struct vertex_header *out = info->verts; + unsigned j; + if (0) debug_printf("%s\n", __FUNCTION__); + /* just initialize the vertex_id in all headers */ + for (j = 0; j < info->count; j++) { + initialize_vertex_header(out); + + out = (struct vertex_header *)((char *)out + info->stride); + } return FALSE; } diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index 5ea833032f..cd7bb7bf25 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -120,24 +120,27 @@ static void varray_fan_segment(struct varray_frontend *varray, #define FUNC varray_run #include "draw_pt_varray_tmp_linear.h" -static unsigned decompose_prim[PIPE_PRIM_POLYGON + 1] = { +static unsigned decompose_prim[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY + 1] = { PIPE_PRIM_POINTS, PIPE_PRIM_LINES, PIPE_PRIM_LINE_STRIP, /* decomposed LINELOOP */ PIPE_PRIM_LINE_STRIP, PIPE_PRIM_TRIANGLES, PIPE_PRIM_TRIANGLE_STRIP, - PIPE_PRIM_TRIANGLE_FAN, + PIPE_PRIM_TRIANGLE_FAN, PIPE_PRIM_QUADS, PIPE_PRIM_QUAD_STRIP, - PIPE_PRIM_POLYGON + PIPE_PRIM_POLYGON, + PIPE_PRIM_LINES_ADJACENCY, + PIPE_PRIM_LINE_STRIP_ADJACENCY, + PIPE_PRIM_TRIANGLES_ADJACENCY, + PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY }; static void varray_prepare(struct draw_pt_front_end *frontend, unsigned in_prim, - unsigned out_prim, struct draw_pt_middle_end *middle, unsigned opt) { @@ -146,11 +149,13 @@ static void varray_prepare(struct draw_pt_front_end *frontend, varray->base.run = varray_run; varray->input_prim = in_prim; - varray->output_prim = decompose_prim[out_prim]; + assert(in_prim < Elements(decompose_prim)); + varray->output_prim = decompose_prim[in_prim]; varray->middle = middle; - middle->prepare(middle, varray->input_prim, - varray->output_prim, opt, &varray->driver_fetch_max ); + middle->prepare(middle, + varray->output_prim, + opt, &varray->driver_fetch_max ); /* check that the max is even */ assert((varray->driver_fetch_max & 1) == 0); diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 914c87a9dc..8ef94c3163 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -41,6 +41,7 @@ #define FETCH_MAX 256 #define DRAW_MAX (16*1024) + struct vcache_frontend { struct draw_pt_front_end base; struct draw_context *draw; @@ -64,13 +65,13 @@ struct vcache_frontend { unsigned opt; }; + static INLINE void vcache_flush( struct vcache_frontend *vcache ) { if (vcache->middle_prim != vcache->output_prim) { vcache->middle_prim = vcache->output_prim; vcache->middle->prepare( vcache->middle, - vcache->input_prim, vcache->middle_prim, vcache->opt, &vcache->fetch_max ); @@ -89,12 +90,12 @@ vcache_flush( struct vcache_frontend *vcache ) vcache->draw_count = 0; } + static INLINE void vcache_check_flush( struct vcache_frontend *vcache ) { - if ( vcache->draw_count + 6 >= DRAW_MAX || - vcache->fetch_count + 4 >= FETCH_MAX ) - { + if (vcache->draw_count + 6 >= DRAW_MAX || + vcache->fetch_count + 4 >= FETCH_MAX) { vcache_flush( vcache ); } } @@ -146,6 +147,7 @@ vcache_triangle_flags( struct vcache_frontend *vcache, vcache_check_flush(vcache); } + static INLINE void vcache_line( struct vcache_frontend *vcache, unsigned i0, @@ -177,6 +179,7 @@ vcache_point( struct vcache_frontend *vcache, vcache_check_flush(vcache); } + static INLINE void vcache_quad( struct vcache_frontend *vcache, unsigned i0, @@ -196,6 +199,7 @@ vcache_quad( struct vcache_frontend *vcache, } } + static INLINE void vcache_ef_quad( struct vcache_frontend *vcache, unsigned i0, @@ -231,6 +235,7 @@ vcache_ef_quad( struct vcache_frontend *vcache, } } + /* At least for now, we're back to using a template include file for * this. The two paths aren't too different though - it may be * possible to reunify them. @@ -256,23 +261,23 @@ rebase_uint_elts( const unsigned *src, ushort *dest ) { unsigned i; - for (i = 0; i < count; i++) dest[i] = (ushort)(src[i] + delta); } + static INLINE void rebase_ushort_elts( const ushort *src, unsigned count, int delta, - ushort *dest ) + ushort *dest ) { unsigned i; - for (i = 0; i < count; i++) dest[i] = (ushort)(src[i] + delta); } + static INLINE void rebase_ubyte_elts( const ubyte *src, unsigned count, @@ -280,42 +285,39 @@ rebase_ubyte_elts( const ubyte *src, ushort *dest ) { unsigned i; - for (i = 0; i < count; i++) dest[i] = (ushort)(src[i] + delta); } - static INLINE void translate_uint_elts( const unsigned *src, unsigned count, ushort *dest ) { unsigned i; - for (i = 0; i < count; i++) dest[i] = (ushort)(src[i]); } + static INLINE void translate_ushort_elts( const ushort *src, unsigned count, ushort *dest ) { unsigned i; - for (i = 0; i < count; i++) dest[i] = (ushort)(src[i]); } + static INLINE void translate_ubyte_elts( const ubyte *src, unsigned count, ushort *dest ) { unsigned i; - for (i = 0; i < count; i++) dest[i] = (ushort)(src[i]); } @@ -336,6 +338,7 @@ format_from_get_elt( pt_elt_func get_elt ) } #endif + static INLINE void vcache_check_run( struct draw_pt_front_end *frontend, pt_elt_func get_elt, @@ -345,18 +348,46 @@ vcache_check_run( struct draw_pt_front_end *frontend, { struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; struct draw_context *draw = vcache->draw; - unsigned min_index = draw->pt.user.min_index; - unsigned max_index = draw->pt.user.max_index; - unsigned index_size = draw->pt.user.eltSize; - unsigned fetch_count = max_index + 1 - min_index; + const unsigned min_index = draw->pt.user.min_index; + const unsigned max_index = draw->pt.user.max_index; + const unsigned index_size = draw->pt.user.eltSize; + unsigned fetch_count; const ushort *transformed_elts; ushort *storage = NULL; boolean ok = FALSE; + /* debug: verify indexes are in range [min_index, max_index] */ + if (0) { + unsigned i; + for (i = 0; i < draw_count; i++) { + if (index_size == 1) { + assert( ((const ubyte *) elts)[i] >= min_index); + assert( ((const ubyte *) elts)[i] <= max_index); + } + else if (index_size == 2) { + assert( ((const ushort *) elts)[i] >= min_index); + assert( ((const ushort *) elts)[i] <= max_index); + } + else { + assert(index_size == 4); + assert( ((const uint *) elts)[i] >= min_index); + assert( ((const uint *) elts)[i] <= max_index); + } + } + } + + /* Note: max_index is frequently 0xffffffff so we have to be sure + * that any arithmetic involving max_index doesn't overflow! + */ + if (max_index >= (unsigned) DRAW_PIPE_MAX_VERTICES) + goto fail; - if (0) debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count, - vcache->fetch_max, - draw_count); + fetch_count = max_index + 1 - min_index; + + if (0) + debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count, + vcache->fetch_max, + draw_count); if (elt_bias + max_index >= DRAW_PIPE_MAX_VERTICES || fetch_count >= UNDEFINED_VERTEX_ID || @@ -368,23 +399,19 @@ vcache_check_run( struct draw_pt_front_end *frontend, if (vcache->middle_prim != vcache->input_prim) { vcache->middle_prim = vcache->input_prim; vcache->middle->prepare( vcache->middle, - vcache->input_prim, vcache->middle_prim, vcache->opt, &vcache->fetch_max ); } - assert((elt_bias >= 0 && min_index + elt_bias >= min_index) || (elt_bias < 0 && min_index + elt_bias < min_index)); if (min_index == 0 && - index_size == 2) - { + index_size == 2) { transformed_elts = (const ushort *)elts; } - else - { + else { storage = MALLOC( draw_count * sizeof(ushort) ); if (!storage) goto fail; @@ -419,23 +446,23 @@ vcache_check_run( struct draw_pt_front_end *frontend, switch(index_size) { case 1: rebase_ubyte_elts( (const ubyte *)elts, - draw_count, - 0 - (int)min_index, - storage ); + draw_count, + 0 - (int)min_index, + storage ); break; case 2: rebase_ushort_elts( (const ushort *)elts, - draw_count, - 0 - (int)min_index, - storage ); + draw_count, + 0 - (int)min_index, + storage ); break; case 4: rebase_uint_elts( (const uint *)elts, - draw_count, - 0 - (int)min_index, - storage ); + draw_count, + 0 - (int)min_index, + storage ); break; default: @@ -462,7 +489,7 @@ vcache_check_run( struct draw_pt_front_end *frontend, debug_printf("failed to execute atomic draw elts for %d/%d, splitting up\n", fetch_count, draw_count); - fail: +fail: vcache_run( frontend, get_elt, elts, elt_bias, draw_count ); } @@ -472,23 +499,26 @@ vcache_check_run( struct draw_pt_front_end *frontend, static void vcache_prepare( struct draw_pt_front_end *frontend, unsigned in_prim, - unsigned out_prim, struct draw_pt_middle_end *middle, unsigned opt ) { struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; - if (opt & PT_PIPELINE) - { + if (opt & PT_PIPELINE) { vcache->base.run = vcache_run_extras; } - else - { + else { vcache->base.run = vcache_check_run; } + /* VCache will always emit the reduced version of its input + * primitive, ie STRIP/FANS become TRIS, etc. + * + * This is not to be confused with what the GS might be up to, + * which is a separate issue. + */ vcache->input_prim = in_prim; - vcache->output_prim = u_reduced_prim(out_prim); + vcache->output_prim = u_reduced_prim(in_prim); vcache->middle = middle; vcache->opt = opt; @@ -496,12 +526,13 @@ vcache_prepare( struct draw_pt_front_end *frontend, /* Have to run prepare here, but try and guess a good prim for * doing so: */ - vcache->middle_prim = (opt & PT_PIPELINE) ? vcache->output_prim : vcache->input_prim; - middle->prepare( middle, vcache->input_prim, - vcache->middle_prim, opt, &vcache->fetch_max ); -} - + vcache->middle_prim = (opt & PT_PIPELINE) + ? vcache->output_prim : vcache->input_prim; + middle->prepare( middle, + vcache->middle_prim, + opt, &vcache->fetch_max ); +} static void @@ -512,6 +543,7 @@ vcache_finish( struct draw_pt_front_end *frontend ) vcache->middle = NULL; } + static void vcache_destroy( struct draw_pt_front_end *frontend ) { diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index b9db886a24..57ea63fc06 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -98,6 +98,11 @@ draw_create_vertex_shader(struct draw_context *draw, vs = draw_create_vs_ppc( draw, shader ); #endif } +#if HAVE_LLVM + else { + vs = draw_create_vs_llvm(draw, shader); + } +#endif if (!vs) { vs = draw_create_vs_exec( draw, shader ); diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 6c7e94db43..a731994523 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -165,7 +165,6 @@ draw_create_vs_ppc(struct draw_context *draw, const struct pipe_shader_state *templ); - struct draw_vs_varient_key; struct draw_vertex_shader; @@ -173,6 +172,11 @@ struct draw_vs_varient * draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs, const struct draw_vs_varient_key *key ); +#if HAVE_LLVM +struct draw_vertex_shader * +draw_create_vs_llvm(struct draw_context *draw, + const struct pipe_shader_state *state); +#endif /******************************************************************************** diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c new file mode 100644 index 0000000000..6c13df7913 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -0,0 +1,120 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" + +#include "draw_private.h" +#include "draw_context.h" +#include "draw_vs.h" +#include "draw_llvm.h" + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_scan.h" + +static void +vs_llvm_prepare(struct draw_vertex_shader *shader, + struct draw_context *draw) +{ + /*struct llvm_vertex_shader *evs = llvm_vertex_shader(shader);*/ +} + +static void +vs_llvm_run_linear( struct draw_vertex_shader *shader, + const float (*input)[4], + float (*output)[4], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + unsigned count, + unsigned input_stride, + unsigned output_stride ) +{ + /* we should never get here since the entire pipeline is + * generated in draw_pt_fetch_shade_pipeline_llvm.c */ + debug_assert(0); +} + + +static void +vs_llvm_delete( struct draw_vertex_shader *dvs ) +{ + struct llvm_vertex_shader *shader = llvm_vertex_shader(dvs); + struct pipe_fence_handle *fence = NULL; + struct draw_llvm_variant_list_item *li; + struct pipe_context *pipe = dvs->draw->pipe; + + /* + * XXX: This might be not neccessary at all. + */ + pipe->flush(pipe, 0, &fence); + if (fence) { + pipe->screen->fence_finish(pipe->screen, fence, 0); + pipe->screen->fence_reference(pipe->screen, &fence, NULL); + } + + + li = first_elem(&shader->variants); + while(!at_end(&shader->variants, li)) { + struct draw_llvm_variant_list_item *next = next_elem(li); + draw_llvm_destroy_variant(li->base); + li = next; + } + + assert(shader->variants_cached == 0); + FREE((void*) dvs->state.tokens); + FREE( dvs ); +} + + +struct draw_vertex_shader * +draw_create_vs_llvm(struct draw_context *draw, + const struct pipe_shader_state *state) +{ + struct llvm_vertex_shader *vs = CALLOC_STRUCT( llvm_vertex_shader ); + + if (vs == NULL) + return NULL; + + /* we make a private copy of the tokens */ + vs->base.state.tokens = tgsi_dup_tokens(state->tokens); + if (!vs->base.state.tokens) { + FREE(vs); + return NULL; + } + + tgsi_scan_shader(state->tokens, &vs->base.info); + + vs->base.draw = draw; + vs->base.prepare = vs_llvm_prepare; + vs->base.run_linear = vs_llvm_run_linear; + vs->base.delete = vs_llvm_delete; + vs->base.create_varient = draw_vs_create_varient_generic; + + make_empty_list(&vs->variants); + + return &vs->base; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index d926b2de18..f5f2623e46 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -56,7 +56,6 @@ #include "lp_bld_intr.h" #include "lp_bld_logic.h" #include "lp_bld_pack.h" -#include "lp_bld_debug.h" #include "lp_bld_arit.h" @@ -847,6 +846,11 @@ lp_build_round_sse41(struct lp_build_context *bld, } +/** + * Return the integer part of a float (vector) value. The returned value is + * a float (vector). + * Ex: trunc(-1.5) = 1.0 + */ LLVMValueRef lp_build_trunc(struct lp_build_context *bld, LLVMValueRef a) @@ -869,6 +873,12 @@ lp_build_trunc(struct lp_build_context *bld, } +/** + * Return float (vector) rounded to nearest integer (vector). The returned + * value is a float (vector). + * Ex: round(0.9) = 1.0 + * Ex: round(-1.5) = -2.0 + */ LLVMValueRef lp_build_round(struct lp_build_context *bld, LLVMValueRef a) @@ -890,6 +900,11 @@ lp_build_round(struct lp_build_context *bld, } +/** + * Return floor of float (vector), result is a float (vector) + * Ex: floor(1.1) = 1.0 + * Ex: floor(-1.1) = -2.0 + */ LLVMValueRef lp_build_floor(struct lp_build_context *bld, LLVMValueRef a) @@ -911,6 +926,11 @@ lp_build_floor(struct lp_build_context *bld, } +/** + * Return ceiling of float (vector), returning float (vector). + * Ex: ceil( 1.1) = 2.0 + * Ex: ceil(-1.1) = -1.0 + */ LLVMValueRef lp_build_ceil(struct lp_build_context *bld, LLVMValueRef a) @@ -933,7 +953,7 @@ lp_build_ceil(struct lp_build_context *bld, /** - * Return fractional part of 'a' computed as a - floor(f) + * Return fractional part of 'a' computed as a - floor(a) * Typically used in texture coord arithmetic. */ LLVMValueRef @@ -946,8 +966,9 @@ lp_build_fract(struct lp_build_context *bld, /** - * Convert to integer, through whichever rounding method that's fastest, - * typically truncating toward zero. + * Return the integer part of a float (vector) value. The returned value is + * an integer (vector). + * Ex: itrunc(-1.5) = 1 */ LLVMValueRef lp_build_itrunc(struct lp_build_context *bld, @@ -964,7 +985,10 @@ lp_build_itrunc(struct lp_build_context *bld, /** - * Convert float[] to int[] with round(). + * Return float (vector) rounded to nearest integer (vector). The returned + * value is an integer (vector). + * Ex: iround(0.9) = 1 + * Ex: iround(-1.5) = -2 */ LLVMValueRef lp_build_iround(struct lp_build_context *bld, @@ -1007,7 +1031,9 @@ lp_build_iround(struct lp_build_context *bld, /** - * Convert float[] to int[] with floor(). + * Return floor of float (vector), result is an int (vector) + * Ex: ifloor(1.1) = 1.0 + * Ex: ifloor(-1.1) = -2.0 */ LLVMValueRef lp_build_ifloor(struct lp_build_context *bld, @@ -1034,29 +1060,31 @@ lp_build_ifloor(struct lp_build_context *bld, /* sign = a < 0 ? ~0 : 0 */ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); sign = LLVMBuildAnd(bld->builder, sign, mask, ""); - sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), ""); - lp_build_name(sign, "floor.sign"); + sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "ifloor.sign"); /* offset = -0.99999(9)f */ - offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa)); + offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa)); offset = LLVMConstBitCast(offset, int_vec_type); - /* offset = a < 0 ? -0.99999(9)f : 0.0f */ + /* offset = a < 0 ? offset : 0.0f */ offset = LLVMBuildAnd(bld->builder, offset, sign, ""); - offset = LLVMBuildBitCast(bld->builder, offset, vec_type, ""); - lp_build_name(offset, "floor.offset"); + offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset"); - res = LLVMBuildAdd(bld->builder, a, offset, ""); - lp_build_name(res, "floor.res"); + res = LLVMBuildAdd(bld->builder, a, offset, "ifloor.res"); } - res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); - lp_build_name(res, "floor"); + /* round to nearest (toward zero) */ + res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "ifloor.res"); return res; } +/** + * Return ceiling of float (vector), returning int (vector). + * Ex: iceil( 1.1) = 2 + * Ex: iceil(-1.1) = -1 + */ LLVMValueRef lp_build_iceil(struct lp_build_context *bld, LLVMValueRef a) @@ -1072,12 +1100,31 @@ lp_build_iceil(struct lp_build_context *bld, res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); } else { - /* TODO: mimic lp_build_ifloor() here */ - assert(0); - res = bld->undef; + LLVMTypeRef vec_type = lp_build_vec_type(type); + unsigned mantissa = lp_mantissa(type); + LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef sign; + LLVMValueRef offset; + + /* sign = a < 0 ? 0 : ~0 */ + sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(bld->builder, sign, mask, ""); + sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "iceil.sign"); + sign = LLVMBuildNot(bld->builder, sign, "iceil.not"); + + /* offset = 0.99999(9)f */ + offset = lp_build_const_vec(type, (double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa)); + offset = LLVMConstBitCast(offset, int_vec_type); + + /* offset = a < 0 ? 0.0 : offset */ + offset = LLVMBuildAnd(bld->builder, offset, sign, ""); + offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset"); + + res = LLVMBuildAdd(bld->builder, a, offset, "iceil.res"); } - res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); + /* round to nearest (toward zero) */ + res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "iceil.res"); return res; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.h b/src/gallium/auxiliary/gallivm/lp_bld_const.h index d46b9f882b..7ee8fff140 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_const.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.h @@ -107,4 +107,12 @@ lp_build_const_mask_aos(struct lp_type type, const boolean cond[4]); +static INLINE LLVMValueRef +lp_build_const_int32(int i) +{ + return LLVMConstInt(LLVMInt32Type(), i, 0); +} + + + #endif /* !LP_BLD_CONST_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c index 3f7f2ebde9..77012f1fac 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c @@ -83,6 +83,9 @@ * * Although the result values can be scaled to an arbitrary bit width specified * by dst_width, the actual result type will have the same width. + * + * Ex: src = { float, float, float, float } + * return { i32, i32, i32, i32 } where each value is in [0, 2^dst_width-1]. */ LLVMValueRef lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, @@ -152,6 +155,8 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, /** * Inverse of lp_build_clamped_float_to_unsigned_norm above. + * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1] + * return {float, float, float, float} with values in range [0, 1]. */ LLVMValueRef lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, @@ -219,18 +224,19 @@ lp_build_conv(LLVMBuilderRef builder, unsigned num_tmps; unsigned i; - /* Register width must remain constant */ - assert(src_type.width * src_type.length == dst_type.width * dst_type.length); - /* We must not loose or gain channels. Only precision */ assert(src_type.length * num_srcs == dst_type.length * num_dsts); assert(src_type.length <= LP_MAX_VECTOR_LENGTH); assert(dst_type.length <= LP_MAX_VECTOR_LENGTH); + assert(num_srcs <= LP_MAX_VECTOR_LENGTH); + assert(num_dsts <= LP_MAX_VECTOR_LENGTH); tmp_type = src_type; - for(i = 0; i < num_srcs; ++i) + for(i = 0; i < num_srcs; ++i) { + assert(lp_check_value(src_type, src[i])); tmp[i] = src[i]; + } num_tmps = num_srcs; /* @@ -326,30 +332,25 @@ lp_build_conv(LLVMBuilderRef builder, /* * Truncate or expand bit width + * + * No data conversion should happen here, although the sign bits are + * crucial to avoid bad clamping. */ - assert(!tmp_type.floating || tmp_type.width == dst_type.width); + { + struct lp_type new_type; - if(tmp_type.width > dst_type.width) { - assert(num_dsts == 1); - tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps); - tmp_type.width = dst_type.width; - tmp_type.length = dst_type.length; - num_tmps = 1; - } + new_type = tmp_type; + new_type.sign = dst_type.sign; + new_type.width = dst_type.width; + new_type.length = dst_type.length; + + lp_build_resize(builder, tmp_type, new_type, tmp, num_srcs, tmp, num_dsts); - if(tmp_type.width < dst_type.width) { - assert(num_tmps == 1); - lp_build_unpack(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts); - tmp_type.width = dst_type.width; - tmp_type.length = dst_type.length; + tmp_type = new_type; num_tmps = num_dsts; } - assert(tmp_type.width == dst_type.width); - assert(tmp_type.length == dst_type.length); - assert(num_tmps == num_dsts); - /* * Scale to the widest range */ @@ -406,8 +407,10 @@ lp_build_conv(LLVMBuilderRef builder, } } - for(i = 0; i < num_dsts; ++i) + for(i = 0; i < num_dsts; ++i) { dst[i] = tmp[i]; + assert(lp_check_value(dst_type, dst[i])); + } } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h index 5f5036e7bd..60e22d727a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h @@ -48,9 +48,9 @@ struct lp_build_context; */ LLVMValueRef -lp_build_unpack_rgba_aos(LLVMBuilderRef builder, - const struct util_format_description *desc, - LLVMValueRef packed); +lp_build_format_swizzle_aos(const struct util_format_description *desc, + struct lp_build_context *bld, + LLVMValueRef unswizzled); LLVMValueRef lp_build_pack_rgba_aos(LLVMBuilderRef builder, @@ -60,7 +60,9 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, LLVMValueRef lp_build_fetch_rgba_aos(LLVMBuilderRef builder, const struct util_format_description *format_desc, - LLVMValueRef ptr, + struct lp_type type, + LLVMValueRef base_ptr, + LLVMValueRef offset, LLVMValueRef i, LLVMValueRef j); @@ -72,7 +74,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, void lp_build_format_swizzle_soa(const struct util_format_description *format_desc, struct lp_build_context *bld, - const LLVMValueRef *unswizzled, + const LLVMValueRef unswizzled[4], LLVMValueRef swizzled_out[4]); void @@ -82,6 +84,11 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, LLVMValueRef packed, LLVMValueRef rgba_out[4]); +void +lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, + struct lp_type dst_type, + LLVMValueRef packed, + LLVMValueRef *rgba); void lp_build_fetch_rgba_soa(LLVMBuilderRef builder, @@ -93,5 +100,18 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder, LLVMValueRef j, LLVMValueRef rgba_out[4]); +/* + * YUV + */ + + +LLVMValueRef +lp_build_fetch_subsampled_rgba_aos(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + unsigned n, + LLVMValueRef base_ptr, + LLVMValueRef offset, + LLVMValueRef i, + LLVMValueRef j); #endif /* !LP_BLD_FORMAT_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index 87e3e72a6e..0f01fc1d75 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -38,33 +38,122 @@ #include "util/u_math.h" #include "util/u_string.h" +#include "lp_bld_arit.h" #include "lp_bld_init.h" #include "lp_bld_type.h" #include "lp_bld_flow.h" +#include "lp_bld_const.h" +#include "lp_bld_conv.h" +#include "lp_bld_swizzle.h" +#include "lp_bld_gather.h" #include "lp_bld_format.h" /** + * Basic swizzling. Rearrange the order of the unswizzled array elements + * according to the format description. PIPE_SWIZZLE_ZERO/ONE are supported + * too. + * Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}. + */ +LLVMValueRef +lp_build_format_swizzle_aos(const struct util_format_description *desc, + struct lp_build_context *bld, + LLVMValueRef unswizzled) +{ + unsigned char swizzles[4]; + unsigned chan; + + assert(bld->type.length % 4 == 0); + + for (chan = 0; chan < 4; ++chan) { + enum util_format_swizzle swizzle; + + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + /* + * For ZS formats do RGBA = ZZZ1 + */ + if (chan == 3) { + swizzle = UTIL_FORMAT_SWIZZLE_1; + } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) { + swizzle = UTIL_FORMAT_SWIZZLE_0; + } else { + swizzle = desc->swizzle[0]; + } + } else { + swizzle = desc->swizzle[chan]; + } + swizzles[chan] = swizzle; + } + + return lp_build_swizzle_aos(bld, unswizzled, swizzles); +} + + +/** + * Whether the format matches the vector type, apart of swizzles. + */ +static INLINE boolean +format_matches_type(const struct util_format_description *desc, + struct lp_type type) +{ + enum util_format_type chan_type; + unsigned chan; + + assert(type.length % 4 == 0); + + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || + desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB || + desc->block.width != 1 || + desc->block.height != 1) { + return FALSE; + } + + if (type.floating) { + chan_type = UTIL_FORMAT_TYPE_FLOAT; + } else if (type.fixed) { + chan_type = UTIL_FORMAT_TYPE_FIXED; + } else if (type.sign) { + chan_type = UTIL_FORMAT_TYPE_SIGNED; + } else { + chan_type = UTIL_FORMAT_TYPE_UNSIGNED; + } + + for (chan = 0; chan < desc->nr_channels; ++chan) { + if (desc->channel[chan].size != type.width) { + return FALSE; + } + + if (desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) { + if (desc->channel[chan].type != chan_type || + desc->channel[chan].normalized != type.norm) { + return FALSE; + } + } + } + + return TRUE; +} + + +/** * Unpack a single pixel into its RGBA components. * * @param desc the pixel format for the packed pixel value * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM * - * @return RGBA in a 4 floats vector. + * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector. */ -LLVMValueRef -lp_build_unpack_rgba_aos(LLVMBuilderRef builder, - const struct util_format_description *desc, - LLVMValueRef packed) +static INLINE LLVMValueRef +lp_build_unpack_arith_rgba_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + LLVMValueRef packed) { LLVMValueRef shifted, casted, scaled, masked; LLVMValueRef shifts[4]; LLVMValueRef masks[4]; LLVMValueRef scales[4]; - LLVMValueRef swizzles[4]; - LLVMValueRef aux[4]; + boolean normalized; - int empty_channel; boolean needs_uitofp; unsigned shift; unsigned i; @@ -77,8 +166,7 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, /* Do the intermediate integer computations with 32bit integers since it * matches floating point size */ - if (desc->block.bits < 32) - packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), ""); + assert (LLVMTypeOf(packed) == LLVMInt32Type()); /* Broadcast the packed value to all four channels * before: packed = BGRA @@ -98,7 +186,6 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, /* Initialize vector constants */ normalized = FALSE; needs_uitofp = FALSE; - empty_channel = -1; shift = 0; /* Loop over 4 color components */ @@ -109,7 +196,6 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, shifts[i] = LLVMGetUndef(LLVMInt32Type()); masks[i] = LLVMConstNull(LLVMInt32Type()); scales[i] = LLVMConstNull(LLVMFloatType()); - empty_channel = i; } else { unsigned long long mask = (1ULL << bits) - 1; @@ -158,52 +244,7 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, else scaled = casted; - for (i = 0; i < 4; ++i) - aux[i] = LLVMGetUndef(LLVMFloatType()); - - /* Build swizzles vector to put components into R,G,B,A order */ - for (i = 0; i < 4; ++i) { - enum util_format_swizzle swizzle; - - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { - /* - * For ZS formats do RGBA = ZZZ1 - */ - if (i == 3) { - swizzle = UTIL_FORMAT_SWIZZLE_1; - } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) { - swizzle = UTIL_FORMAT_SWIZZLE_0; - } else { - swizzle = desc->swizzle[0]; - } - } else { - swizzle = desc->swizzle[i]; - } - - switch (swizzle) { - case UTIL_FORMAT_SWIZZLE_X: - case UTIL_FORMAT_SWIZZLE_Y: - case UTIL_FORMAT_SWIZZLE_Z: - case UTIL_FORMAT_SWIZZLE_W: - swizzles[i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0); - break; - case UTIL_FORMAT_SWIZZLE_0: - assert(empty_channel >= 0); - swizzles[i] = LLVMConstInt(LLVMInt32Type(), empty_channel, 0); - break; - case UTIL_FORMAT_SWIZZLE_1: - swizzles[i] = LLVMConstInt(LLVMInt32Type(), 4, 0); - aux[0] = LLVMConstReal(LLVMFloatType(), 1.0); - break; - case UTIL_FORMAT_SWIZZLE_NONE: - swizzles[i] = LLVMGetUndef(LLVMFloatType()); - assert(0); - break; - } - } - - return LLVMBuildShuffleVector(builder, scaled, LLVMConstVector(aux, 4), - LLVMConstVector(swizzles, 4), ""); + return scaled; } @@ -310,22 +351,65 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, } + + /** * Fetch a pixel into a 4 float AoS. * * \param format_desc describes format of the image we're fetching from * \param ptr address of the pixel block (or the texel if uncompressed) * \param i, j the sub-block pixel coordinates. For non-compressed formats - * these will always be (0,). - * \return valueRef with the float[4] RGBA pixel + * these will always be (0, 0). + * \return a 4 element vector with the pixel's RGBA values. */ LLVMValueRef lp_build_fetch_rgba_aos(LLVMBuilderRef builder, const struct util_format_description *format_desc, - LLVMValueRef ptr, + struct lp_type type, + LLVMValueRef base_ptr, + LLVMValueRef offset, LLVMValueRef i, LLVMValueRef j) { + unsigned num_pixels = type.length / 4; + struct lp_build_context bld; + + assert(type.length <= LP_MAX_VECTOR_LENGTH); + assert(type.length % 4 == 0); + + lp_build_context_init(&bld, builder, type); + + /* + * Trivial case + * + * The format matches the type (apart of a swizzle) so no need for + * scaling or converting. + */ + + if (format_matches_type(format_desc, type) && + format_desc->block.bits <= type.width * 4 && + util_is_pot(format_desc->block.bits)) { + LLVMValueRef packed; + + /* + * The format matches the type (apart of a swizzle) so no need for + * scaling or converting. + */ + + packed = lp_build_gather(builder, type.length/4, + format_desc->block.bits, type.width*4, + base_ptr, offset); + + assert(format_desc->block.bits <= type.width * type.length); + + packed = LLVMBuildBitCast(builder, packed, lp_build_vec_type(type), ""); + + return lp_build_format_swizzle_aos(format_desc, &bld, packed); + } + + /* + * Bit arithmetic + */ if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || @@ -337,21 +421,77 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, format_desc->is_bitmask && !format_desc->is_mixed && (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED || - format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) - { - LLVMValueRef packed; + format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) { + + LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; + LLVMValueRef res; + unsigned k; + + /* + * Unpack a pixel at a time into a <4 x float> RGBA vector + */ + + for (k = 0; k < num_pixels; ++k) { + LLVMValueRef packed; + + packed = lp_build_gather_elem(builder, num_pixels, + format_desc->block.bits, 32, + base_ptr, offset, k); - ptr = LLVMBuildBitCast(builder, ptr, - LLVMPointerType(LLVMIntType(format_desc->block.bits), 0) , - ""); + tmps[k] = lp_build_unpack_arith_rgba_aos(builder, format_desc, + packed); + } + + /* + * Type conversion. + * + * TODO: We could avoid floating conversion for integer to + * integer conversions. + */ - packed = LLVMBuildLoad(builder, ptr, "packed"); + lp_build_conv(builder, + lp_float32_vec4_type(), + type, + tmps, num_pixels, &res, 1); - return lp_build_unpack_rgba_aos(builder, format_desc, packed); + return lp_build_format_swizzle_aos(format_desc, &bld, res); } - else if (format_desc->fetch_rgba_float) { + + /* + * YUV / subsampled formats + */ + + if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { + struct lp_type tmp_type; + LLVMValueRef tmp; + + memset(&tmp_type, 0, sizeof tmp_type); + tmp_type.width = 8; + tmp_type.length = num_pixels * 4; + tmp_type.norm = TRUE; + + tmp = lp_build_fetch_subsampled_rgba_aos(builder, + format_desc, + num_pixels, + base_ptr, + offset, + i, j); + + lp_build_conv(builder, + tmp_type, type, + &tmp, 1, &tmp, 1); + + return tmp; + } + + /* + * Fallback to util_format_description::fetch_rgba_8unorm(). + */ + + if (format_desc->fetch_rgba_8unorm && + !type.floating && type.width == 8 && !type.sign && type.norm) { /* - * Fallback to calling util_format_description::fetch_rgba_float. + * Fallback to calling util_format_description::fetch_rgba_8unorm. * * This is definitely not the most efficient way of fetching pixels, as * we miss the opportunity to do vectorization, but this it is a @@ -361,9 +501,113 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); char name[256]; + LLVMTypeRef i8t = LLVMInt8Type(); + LLVMTypeRef pi8t = LLVMPointerType(i8t, 0); + LLVMTypeRef i32t = LLVMInt32Type(); LLVMValueRef function; + LLVMValueRef tmp_ptr; LLVMValueRef tmp; - LLVMValueRef args[4]; + LLVMValueRef res; + unsigned k; + + util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_8unorm", + format_desc->short_name); + + /* + * Declare and bind format_desc->fetch_rgba_8unorm(). + */ + + function = LLVMGetNamedFunction(module, name); + if (!function) { + LLVMTypeRef ret_type; + LLVMTypeRef arg_types[4]; + LLVMTypeRef function_type; + + ret_type = LLVMVoidType(); + arg_types[0] = pi8t; + arg_types[1] = pi8t; + arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8); + function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); + function = LLVMAddFunction(module, name, function_type); + + LLVMSetFunctionCallConv(function, LLVMCCallConv); + LLVMSetLinkage(function, LLVMExternalLinkage); + + assert(LLVMIsDeclaration(function)); + + LLVMAddGlobalMapping(lp_build_engine, function, + func_to_pointer((func_pointer)format_desc->fetch_rgba_8unorm)); + } + + tmp_ptr = lp_build_alloca(builder, i32t, ""); + + res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels)); + + /* + * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result + * in the SoA vectors. + */ + + for (k = 0; k < num_pixels; ++k) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0); + LLVMValueRef args[4]; + + args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, ""); + args[1] = lp_build_gather_elem_ptr(builder, num_pixels, + base_ptr, offset, k); + + if (num_pixels == 1) { + args[2] = i; + args[3] = j; + } + else { + args[2] = LLVMBuildExtractElement(builder, i, index, ""); + args[3] = LLVMBuildExtractElement(builder, j, index, ""); + } + + LLVMBuildCall(builder, function, args, Elements(args), ""); + + tmp = LLVMBuildLoad(builder, tmp_ptr, ""); + + if (num_pixels == 1) { + res = tmp; + } + else { + res = LLVMBuildInsertElement(builder, res, tmp, index, ""); + } + } + + /* Bitcast from <n x i32> to <4n x i8> */ + res = LLVMBuildBitCast(builder, res, bld.vec_type, ""); + + return res; + } + + + /* + * Fallback to util_format_description::fetch_rgba_float(). + */ + + if (format_desc->fetch_rgba_float) { + /* + * Fallback to calling util_format_description::fetch_rgba_float. + * + * This is definitely not the most efficient way of fetching pixels, as + * we miss the opportunity to do vectorization, but this it is a + * convenient for formats or scenarios for which there was no opportunity + * or incentive to optimize. + */ + + LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); + char name[256]; + LLVMTypeRef f32t = LLVMFloatType(); + LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4); + LLVMTypeRef pf32t = LLVMPointerType(f32t, 0); + LLVMValueRef function; + LLVMValueRef tmp_ptr; + LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; + LLVMValueRef res; + unsigned k; util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", format_desc->short_name); @@ -379,7 +623,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, LLVMTypeRef function_type; ret_type = LLVMVoidType(); - arg_types[0] = LLVMPointerType(LLVMFloatType(), 0); + arg_types[0] = pf32t; arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0); arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8); function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); @@ -394,25 +638,43 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, func_to_pointer((func_pointer)format_desc->fetch_rgba_float)); } - tmp = lp_build_alloca(builder, LLVMVectorType(LLVMFloatType(), 4), ""); + tmp_ptr = lp_build_alloca(builder, f32x4t, ""); /* * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result * in the SoA vectors. */ - args[0] = LLVMBuildBitCast(builder, tmp, - LLVMPointerType(LLVMFloatType(), 0), ""); - args[1] = ptr; - args[2] = i; - args[3] = j; + for (k = 0; k < num_pixels; ++k) { + LLVMValueRef args[4]; - LLVMBuildCall(builder, function, args, Elements(args), ""); + args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, ""); + args[1] = lp_build_gather_elem_ptr(builder, num_pixels, + base_ptr, offset, k); - return LLVMBuildLoad(builder, tmp, ""); - } - else { - assert(0); - return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)); + if (num_pixels == 1) { + args[2] = i; + args[3] = j; + } + else { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0); + args[2] = LLVMBuildExtractElement(builder, i, index, ""); + args[3] = LLVMBuildExtractElement(builder, j, index, ""); + } + + LLVMBuildCall(builder, function, args, Elements(args), ""); + + tmps[k] = LLVMBuildLoad(builder, tmp_ptr, ""); + } + + lp_build_conv(builder, + lp_float32_vec4_type(), + type, + tmps, num_pixels, &res, 1); + + return res; } + + assert(0); + return lp_build_undef(type); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index e1b94adc85..9f405921b0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -36,7 +36,7 @@ #include "lp_bld_const.h" #include "lp_bld_conv.h" #include "lp_bld_swizzle.h" -#include "lp_bld_sample.h" /* for lp_build_gather */ +#include "lp_bld_gather.h" #include "lp_bld_format.h" @@ -251,6 +251,41 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, } +void +lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, + struct lp_type dst_type, + LLVMValueRef packed, + LLVMValueRef *rgba) +{ + LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff); + unsigned chan; + + packed = LLVMBuildBitCast(builder, packed, + lp_build_int_vec_type(dst_type), ""); + + /* Decode the input vector components */ + for (chan = 0; chan < 4; ++chan) { + unsigned start = chan*8; + unsigned stop = start + 8; + LLVMValueRef input; + + input = packed; + + if (start) + input = LLVMBuildLShr(builder, input, + lp_build_const_int_vec(dst_type, start), ""); + + if (stop < 32) + input = LLVMBuildAnd(builder, input, mask, ""); + + input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input); + + rgba[chan] = input; + } +} + + + /** * Fetch a texels from a texture, returning them in SoA layout. * @@ -311,20 +346,49 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder, format_desc, type, packed, rgba_out); + return; } - else { - /* - * Fallback to calling lp_build_fetch_rgba_aos for each pixel. - * - * This is not the most efficient way of fetching pixels, as we - * miss some opportunities to do vectorization, but this is - * convenient for formats or scenarios for which there was no - * opportunity or incentive to optimize. - */ + /* + * Try calling lp_build_fetch_rgba_aos for all pixels. + */ + + if (util_format_fits_8unorm(format_desc) && + type.floating && type.width == 32 && type.length == 4) { + struct lp_type tmp_type; + LLVMValueRef tmp; + + memset(&tmp_type, 0, sizeof tmp_type); + tmp_type.width = 8; + tmp_type.length = type.length * 4; + tmp_type.norm = TRUE; + + tmp = lp_build_fetch_rgba_aos(builder, format_desc, tmp_type, + base_ptr, offset, i, j); + + lp_build_rgba8_to_f32_soa(builder, + type, + tmp, + rgba_out); + + return; + } + + /* + * Fallback to calling lp_build_fetch_rgba_aos for each pixel. + * + * This is not the most efficient way of fetching pixels, as we + * miss some opportunities to do vectorization, but this is + * convenient for formats or scenarios for which there was no + * opportunity or incentive to optimize. + */ + + { unsigned k, chan; + struct lp_type tmp_type; - assert(type.floating); + tmp_type = type; + tmp_type.length = 4; for (chan = 0; chan < 4; ++chan) { rgba_out[chan] = lp_build_undef(type); @@ -334,18 +398,17 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder, for(k = 0; k < type.length; ++k) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0); LLVMValueRef offset_elem; - LLVMValueRef ptr; LLVMValueRef i_elem, j_elem; LLVMValueRef tmp; offset_elem = LLVMBuildExtractElement(builder, offset, index, ""); - ptr = LLVMBuildGEP(builder, base_ptr, &offset_elem, 1, ""); i_elem = LLVMBuildExtractElement(builder, i, index, ""); j_elem = LLVMBuildExtractElement(builder, j, index, ""); /* Get a single float[4]={R,G,B,A} pixel */ - tmp = lp_build_fetch_rgba_aos(builder, format_desc, ptr, + tmp = lp_build_fetch_rgba_aos(builder, format_desc, tmp_type, + base_ptr, offset_elem, i_elem, j_elem); /* diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c new file mode 100644 index 0000000000..0a5038bc98 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c @@ -0,0 +1,399 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +/** + * @file + * YUV pixel format manipulation. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + + +#include "util/u_format.h" + +#include "lp_bld_arit.h" +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_conv.h" +#include "lp_bld_gather.h" +#include "lp_bld_format.h" + + +/** + * Extract Y, U, V channels from packed UYVY. + * @param packed is a <n x i32> vector with the packed UYVY blocks + * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) + */ +static void +uyvy_to_yuv_soa(LLVMBuilderRef builder, + unsigned n, + LLVMValueRef packed, + LLVMValueRef i, + LLVMValueRef *y, + LLVMValueRef *u, + LLVMValueRef *v) +{ + struct lp_type type; + LLVMValueRef shift, mask; + + memset(&type, 0, sizeof type); + type.width = 32; + type.length = n; + + assert(lp_check_value(type, packed)); + assert(lp_check_value(type, i)); + + /* + * y = (uyvy >> 16*i) & 0xff + * u = (uyvy ) & 0xff + * v = (uyvy >> 16 ) & 0xff + */ + + shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), ""); + shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), ""); + *y = LLVMBuildLShr(builder, packed, shift, ""); + *u = packed; + *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), ""); + + mask = lp_build_const_int_vec(type, 0xff); + + *y = LLVMBuildAnd(builder, *y, mask, "y"); + *u = LLVMBuildAnd(builder, *u, mask, "u"); + *v = LLVMBuildAnd(builder, *v, mask, "v"); +} + + +/** + * Extract Y, U, V channels from packed YUYV. + * @param packed is a <n x i32> vector with the packed YUYV blocks + * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) + */ +static void +yuyv_to_yuv_soa(LLVMBuilderRef builder, + unsigned n, + LLVMValueRef packed, + LLVMValueRef i, + LLVMValueRef *y, + LLVMValueRef *u, + LLVMValueRef *v) +{ + struct lp_type type; + LLVMValueRef shift, mask; + + memset(&type, 0, sizeof type); + type.width = 32; + type.length = n; + + assert(lp_check_value(type, packed)); + assert(lp_check_value(type, i)); + + /* + * y = (yuyv >> 16*i) & 0xff + * u = (yuyv >> 8 ) & 0xff + * v = (yuyv >> 24 ) & 0xff + */ + + shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), ""); + *y = LLVMBuildLShr(builder, packed, shift, ""); + *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), ""); + *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 24), ""); + + mask = lp_build_const_int_vec(type, 0xff); + + *y = LLVMBuildAnd(builder, *y, mask, "y"); + *u = LLVMBuildAnd(builder, *u, mask, "u"); + *v = LLVMBuildAnd(builder, *v, mask, "v"); +} + + +static INLINE void +yuv_to_rgb_soa(LLVMBuilderRef builder, + unsigned n, + LLVMValueRef y, LLVMValueRef u, LLVMValueRef v, + LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b) +{ + struct lp_type type; + struct lp_build_context bld; + + LLVMValueRef c0; + LLVMValueRef c8; + LLVMValueRef c16; + LLVMValueRef c128; + LLVMValueRef c255; + + LLVMValueRef cy; + LLVMValueRef cug; + LLVMValueRef cub; + LLVMValueRef cvr; + LLVMValueRef cvg; + + memset(&type, 0, sizeof type); + type.sign = TRUE; + type.width = 32; + type.length = n; + + lp_build_context_init(&bld, builder, type); + + assert(lp_check_value(type, y)); + assert(lp_check_value(type, u)); + assert(lp_check_value(type, v)); + + /* + * Constants + */ + + c0 = lp_build_const_int_vec(type, 0); + c8 = lp_build_const_int_vec(type, 8); + c16 = lp_build_const_int_vec(type, 16); + c128 = lp_build_const_int_vec(type, 128); + c255 = lp_build_const_int_vec(type, 255); + + cy = lp_build_const_int_vec(type, 298); + cug = lp_build_const_int_vec(type, -100); + cub = lp_build_const_int_vec(type, 516); + cvr = lp_build_const_int_vec(type, 409); + cvg = lp_build_const_int_vec(type, -208); + + /* + * y -= 16; + * u -= 128; + * v -= 128; + */ + + y = LLVMBuildSub(builder, y, c16, ""); + u = LLVMBuildSub(builder, u, c128, ""); + v = LLVMBuildSub(builder, v, c128, ""); + + /* + * r = 298 * _y + 409 * _v + 128; + * g = 298 * _y - 100 * _u - 208 * _v + 128; + * b = 298 * _y + 516 * _u + 128; + */ + + y = LLVMBuildMul(builder, y, cy, ""); + y = LLVMBuildAdd(builder, y, c128, ""); + + *r = LLVMBuildMul(builder, v, cvr, ""); + *g = LLVMBuildAdd(builder, + LLVMBuildMul(builder, u, cug, ""), + LLVMBuildMul(builder, v, cvg, ""), + ""); + *b = LLVMBuildMul(builder, u, cub, ""); + + *r = LLVMBuildAdd(builder, *r, y, ""); + *g = LLVMBuildAdd(builder, *g, y, ""); + *b = LLVMBuildAdd(builder, *b, y, ""); + + /* + * r >>= 8; + * g >>= 8; + * b >>= 8; + */ + + *r = LLVMBuildAShr(builder, *r, c8, "r"); + *g = LLVMBuildAShr(builder, *g, c8, "g"); + *b = LLVMBuildAShr(builder, *b, c8, "b"); + + /* + * Clamp + */ + + *r = lp_build_clamp(&bld, *r, c0, c255); + *g = lp_build_clamp(&bld, *g, c0, c255); + *b = lp_build_clamp(&bld, *b, c0, c255); +} + + +static LLVMValueRef +rgb_to_rgba_aos(LLVMBuilderRef builder, + unsigned n, + LLVMValueRef r, LLVMValueRef g, LLVMValueRef b) +{ + struct lp_type type; + LLVMValueRef a; + LLVMValueRef rgba; + + memset(&type, 0, sizeof type); + type.sign = TRUE; + type.width = 32; + type.length = n; + + assert(lp_check_value(type, r)); + assert(lp_check_value(type, g)); + assert(lp_check_value(type, b)); + + /* + * Make a 4 x unorm8 vector + */ + + r = r; + g = LLVMBuildShl(builder, g, lp_build_const_int_vec(type, 8), ""); + b = LLVMBuildShl(builder, b, lp_build_const_int_vec(type, 16), ""); + a = lp_build_const_int_vec(type, 0xff000000); + + rgba = r; + rgba = LLVMBuildOr(builder, rgba, g, ""); + rgba = LLVMBuildOr(builder, rgba, b, ""); + rgba = LLVMBuildOr(builder, rgba, a, ""); + + rgba = LLVMBuildBitCast(builder, rgba, + LLVMVectorType(LLVMInt8Type(), 4*n), ""); + + return rgba; +} + + +/** + * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS + */ +static LLVMValueRef +uyvy_to_rgba_aos(LLVMBuilderRef builder, + unsigned n, + LLVMValueRef packed, + LLVMValueRef i) +{ + LLVMValueRef y, u, v; + LLVMValueRef r, g, b; + LLVMValueRef rgba; + + uyvy_to_yuv_soa(builder, n, packed, i, &y, &u, &v); + yuv_to_rgb_soa(builder, n, y, u, v, &r, &g, &b); + rgba = rgb_to_rgba_aos(builder, n, r, g, b); + + return rgba; +} + + +/** + * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS + */ +static LLVMValueRef +yuyv_to_rgba_aos(LLVMBuilderRef builder, + unsigned n, + LLVMValueRef packed, + LLVMValueRef i) +{ + LLVMValueRef y, u, v; + LLVMValueRef r, g, b; + LLVMValueRef rgba; + + yuyv_to_yuv_soa(builder, n, packed, i, &y, &u, &v); + yuv_to_rgb_soa(builder, n, y, u, v, &r, &g, &b); + rgba = rgb_to_rgba_aos(builder, n, r, g, b); + + return rgba; +} + + +/** + * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS + */ +static LLVMValueRef +rgbg_to_rgba_aos(LLVMBuilderRef builder, + unsigned n, + LLVMValueRef packed, + LLVMValueRef i) +{ + LLVMValueRef r, g, b; + LLVMValueRef rgba; + + uyvy_to_yuv_soa(builder, n, packed, i, &g, &r, &b); + rgba = rgb_to_rgba_aos(builder, n, r, g, b); + + return rgba; +} + + +/** + * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS + */ +static LLVMValueRef +grgb_to_rgba_aos(LLVMBuilderRef builder, + unsigned n, + LLVMValueRef packed, + LLVMValueRef i) +{ + LLVMValueRef r, g, b; + LLVMValueRef rgba; + + yuyv_to_yuv_soa(builder, n, packed, i, &g, &r, &b); + rgba = rgb_to_rgba_aos(builder, n, r, g, b); + + return rgba; +} + + +/** + * @param n is the number of pixels processed + * @param packed is a <n x i32> vector with the packed YUYV blocks + * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) + * @return a <4*n x i8> vector with the pixel RGBA values in AoS + */ +LLVMValueRef +lp_build_fetch_subsampled_rgba_aos(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + unsigned n, + LLVMValueRef base_ptr, + LLVMValueRef offset, + LLVMValueRef i, + LLVMValueRef j) +{ + LLVMValueRef packed; + LLVMValueRef rgba; + + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED); + assert(format_desc->block.bits == 32); + assert(format_desc->block.width == 2); + assert(format_desc->block.height == 1); + + packed = lp_build_gather(builder, n, 32, 32, base_ptr, offset); + + (void)j; + + switch (format_desc->format) { + case PIPE_FORMAT_UYVY: + rgba = uyvy_to_rgba_aos(builder, n, packed, i); + break; + case PIPE_FORMAT_YUYV: + rgba = yuyv_to_rgba_aos(builder, n, packed, i); + break; + case PIPE_FORMAT_R8G8_B8G8_UNORM: + rgba = rgbg_to_rgba_aos(builder, n, packed, i); + break; + case PIPE_FORMAT_G8R8_G8B8_UNORM: + rgba = grgb_to_rgba_aos(builder, n, packed, i); + break; + default: + assert(0); + rgba = LLVMGetUndef(LLVMVectorType(LLVMInt8Type(), 4*n)); + break; + } + + return rgba; +} + diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.c b/src/gallium/auxiliary/gallivm/lp_bld_gather.c new file mode 100644 index 0000000000..d60472e065 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.c @@ -0,0 +1,148 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +#include "util/u_debug.h" +#include "lp_bld_debug.h" +#include "lp_bld_const.h" +#include "lp_bld_format.h" +#include "lp_bld_gather.h" + + +/** + * Get the pointer to one element from scatter positions in memory. + * + * @sa lp_build_gather() + */ +LLVMValueRef +lp_build_gather_elem_ptr(LLVMBuilderRef builder, + unsigned length, + LLVMValueRef base_ptr, + LLVMValueRef offsets, + unsigned i) +{ + LLVMValueRef offset; + LLVMValueRef ptr; + + assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8Type(), 0)); + + if (length == 1) { + assert(i == 0); + offset = offsets; + } else { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + offset = LLVMBuildExtractElement(builder, offsets, index, ""); + } + + ptr = LLVMBuildGEP(builder, base_ptr, &offset, 1, ""); + + return ptr; +} + + +/** + * Gather one element from scatter positions in memory. + * + * @sa lp_build_gather() + */ +LLVMValueRef +lp_build_gather_elem(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets, + unsigned i) +{ + LLVMTypeRef src_type = LLVMIntType(src_width); + LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); + LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); + LLVMValueRef ptr; + LLVMValueRef res; + + assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8Type(), 0)); + + ptr = lp_build_gather_elem_ptr(builder, length, base_ptr, offsets, i); + ptr = LLVMBuildBitCast(builder, ptr, src_ptr_type, ""); + res = LLVMBuildLoad(builder, ptr, ""); + + assert(src_width <= dst_width); + if (src_width > dst_width) + res = LLVMBuildTrunc(builder, res, dst_elem_type, ""); + if (src_width < dst_width) + res = LLVMBuildZExt(builder, res, dst_elem_type, ""); + + return res; +} + + +/** + * Gather elements from scatter positions in memory into a single vector. + * Use for fetching texels from a texture. + * For SSE, typical values are length=4, src_width=32, dst_width=32. + * + * @param length length of the offsets + * @param src_width src element width in bits + * @param dst_width result element width in bits (src will be expanded to fit) + * @param base_ptr base pointer, should be a i8 pointer type. + * @param offsets vector with offsets + */ +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets) +{ + LLVMValueRef res; + + if (length == 1) { + /* Scalar */ + return lp_build_gather_elem(builder, length, + src_width, dst_width, + base_ptr, offsets, 0); + } else { + /* Vector */ + + LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); + LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); + unsigned i; + + res = LLVMGetUndef(dst_vec_type); + for (i = 0; i < length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef elem; + elem = lp_build_gather_elem(builder, length, + src_width, dst_width, + base_ptr, offsets, i); + res = LLVMBuildInsertElement(builder, res, elem, index, ""); + } + } + + return res; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.h b/src/gallium/auxiliary/gallivm/lp_bld_gather.h new file mode 100644 index 0000000000..131af8ea07 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.h @@ -0,0 +1,61 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +#ifndef LP_BLD_GATHER_H_ +#define LP_BLD_GATHER_H_ + + +#include "gallivm/lp_bld.h" + + +LLVMValueRef +lp_build_gather_elem_ptr(LLVMBuilderRef builder, + unsigned length, + LLVMValueRef base_ptr, + LLVMValueRef offsets, + unsigned i); + +LLVMValueRef +lp_build_gather_elem(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets, + unsigned i); + +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets); + + +#endif /* LP_BLD_GATHER_H_ */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 44cfdc4d3f..69353dea09 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -32,6 +32,8 @@ #include "lp_bld_debug.h" #include "lp_bld_init.h" +#include <llvm-c/Transforms/Scalar.h> + #ifdef DEBUG unsigned gallivm_debug = 0; @@ -50,6 +52,7 @@ LLVMModuleRef lp_build_module = NULL; LLVMExecutionEngineRef lp_build_engine = NULL; LLVMModuleProviderRef lp_build_provider = NULL; LLVMTargetDataRef lp_build_target = NULL; +LLVMPassManagerRef lp_build_pass = NULL; /* @@ -127,6 +130,33 @@ lp_build_init(void) if (!lp_build_target) lp_build_target = LLVMGetExecutionEngineTargetData(lp_build_engine); + if (!lp_build_pass) { + lp_build_pass = LLVMCreateFunctionPassManager(lp_build_provider); + LLVMAddTargetData(lp_build_target, lp_build_pass); + + if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) { + /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, + * but there are more on SVN. */ + /* TODO: Add more passes */ + LLVMAddCFGSimplificationPass(lp_build_pass); + LLVMAddPromoteMemoryToRegisterPass(lp_build_pass); + LLVMAddConstantPropagationPass(lp_build_pass); + if(util_cpu_caps.has_sse4_1) { + /* FIXME: There is a bug in this pass, whereby the combination of fptosi + * and sitofp (necessary for trunc/floor/ceil/round implementation) + * somehow becomes invalid code. + */ + LLVMAddInstructionCombiningPass(lp_build_pass); + } + LLVMAddGVNPass(lp_build_pass); + } else { + /* We need at least this pass to prevent the backends to fail in + * unexpected ways. + */ + LLVMAddPromoteMemoryToRegisterPass(lp_build_pass); + } + } + util_cpu_detect(); #if 0 diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h index 0ec2afcd1b..a32ced9b4c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h @@ -38,6 +38,7 @@ extern LLVMModuleRef lp_build_module; extern LLVMExecutionEngineRef lp_build_engine; extern LLVMModuleProviderRef lp_build_provider; extern LLVMTargetDataRef lp_build_target; +extern LLVMPassManagerRef lp_build_pass; void diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index d13fa1a5d0..39854e43b1 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -34,6 +34,7 @@ #include "util/u_cpu_detect.h" +#include "util/u_memory.h" #include "util/u_debug.h" #include "lp_bld_type.h" @@ -187,12 +188,10 @@ lp_build_compare(LLVMBuilderRef builder, return lp_build_undef(type); } - /* There are no signed byte and unsigned word/dword comparison - * instructions. So flip the sign bit so that the results match. + /* There are no unsigned comparison instructions. So flip the sign bit + * so that the results match. */ - if(table[func].gt && - ((type.width == 8 && type.sign) || - (type.width != 8 && !type.sign))) { + if (table[func].gt && !type.sign) { LLVMValueRef msb = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); a = LLVMBuildXor(builder, a, msb, ""); b = LLVMBuildXor(builder, b, msb, ""); @@ -384,6 +383,46 @@ lp_build_select(struct lp_build_context *bld, mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), ""); res = LLVMBuildSelect(bld->builder, mask, a, b, ""); } + else if (util_cpu_caps.has_sse4_1 && + type.width * type.length == 128 && + !LLVMIsConstant(a) && + !LLVMIsConstant(b) && + !LLVMIsConstant(mask)) { + const char *intrinsic; + LLVMTypeRef arg_type; + LLVMValueRef args[3]; + + if (type.width == 64) { + intrinsic = "llvm.x86.sse41.blendvpd"; + arg_type = LLVMVectorType(LLVMDoubleType(), 2); + } else if (type.width == 32) { + intrinsic = "llvm.x86.sse41.blendvps"; + arg_type = LLVMVectorType(LLVMFloatType(), 4); + } else { + intrinsic = "llvm.x86.sse41.pblendvb"; + arg_type = LLVMVectorType(LLVMInt8Type(), 16); + } + + if (arg_type != bld->int_vec_type) { + mask = LLVMBuildBitCast(bld->builder, mask, arg_type, ""); + } + + if (arg_type != bld->vec_type) { + a = LLVMBuildBitCast(bld->builder, a, arg_type, ""); + b = LLVMBuildBitCast(bld->builder, b, arg_type, ""); + } + + args[0] = b; + args[1] = a; + args[2] = mask; + + res = lp_build_intrinsic(bld->builder, intrinsic, + arg_type, args, Elements(args)); + + if (arg_type != bld->vec_type) { + res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, ""); + } + } else { if(type.floating) { LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c index 186f8849b8..7748f8f099 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c @@ -427,3 +427,123 @@ lp_build_pack(LLVMBuilderRef builder, return tmp[0]; } + + +/** + * Truncate or expand the bitwidth. + * + * NOTE: Getting the right sign flags is crucial here, as we employ some + * intrinsics that do saturation. + */ +void +lp_build_resize(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + const LLVMValueRef *src, unsigned num_srcs, + LLVMValueRef *dst, unsigned num_dsts) +{ + LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; + unsigned i; + + /* + * We don't support float <-> int conversion here. That must be done + * before/after calling this function. + */ + assert(src_type.floating == dst_type.floating); + + /* + * We don't support double <-> float conversion yet, although it could be + * added with little effort. + */ + assert((!src_type.floating && !dst_type.floating) || + src_type.width == dst_type.width); + + /* We must not loose or gain channels. Only precision */ + assert(src_type.length * num_srcs == dst_type.length * num_dsts); + + /* We don't support M:N conversion, only 1:N, M:1, or 1:1 */ + assert(num_srcs == 1 || num_dsts == 1); + + assert(src_type.length <= LP_MAX_VECTOR_LENGTH); + assert(dst_type.length <= LP_MAX_VECTOR_LENGTH); + assert(num_srcs <= LP_MAX_VECTOR_LENGTH); + assert(num_dsts <= LP_MAX_VECTOR_LENGTH); + + if (src_type.width > dst_type.width) { + /* + * Truncate bit width. + */ + + assert(num_dsts == 1); + + if (src_type.width * src_type.length == dst_type.width * dst_type.length) { + /* + * Register width remains constant -- use vector packing intrinsics + */ + + tmp[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs); + } + else { + /* + * Do it element-wise. + */ + + assert(src_type.length == dst_type.length); + tmp[0] = lp_build_undef(dst_type); + for (i = 0; i < dst_type.length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, ""); + val = LLVMBuildTrunc(builder, val, lp_build_elem_type(dst_type), ""); + tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, ""); + } + } + } + else if (src_type.width < dst_type.width) { + /* + * Expand bit width. + */ + + assert(num_srcs == 1); + + if (src_type.width * src_type.length == dst_type.width * dst_type.length) { + /* + * Register width remains constant -- use vector unpack intrinsics + */ + lp_build_unpack(builder, src_type, dst_type, src[0], tmp, num_dsts); + } + else { + /* + * Do it element-wise. + */ + + assert(src_type.length == dst_type.length); + tmp[0] = lp_build_undef(dst_type); + for (i = 0; i < dst_type.length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, ""); + + if (src_type.sign && dst_type.sign) { + val = LLVMBuildSExt(builder, val, lp_build_elem_type(dst_type), ""); + } else { + val = LLVMBuildZExt(builder, val, lp_build_elem_type(dst_type), ""); + } + tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, ""); + } + } + } + else { + /* + * No-op + */ + + assert(num_srcs == 1); + assert(num_dsts == 1); + + tmp[0] = src[0]; + } + + for(i = 0; i < num_dsts; ++i) + dst[i] = tmp[i]; +} + + diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h index 41adeed220..e470082b97 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h @@ -92,4 +92,12 @@ lp_build_pack(LLVMBuilderRef builder, const LLVMValueRef *src, unsigned num_srcs); +void +lp_build_resize(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + const LLVMValueRef *src, unsigned num_srcs, + LLVMValueRef *dst, unsigned num_dsts); + + #endif /* !LP_BLD_PACK_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c b/src/gallium/auxiliary/gallivm/lp_bld_quad.c index 38fd5a39ef..ca36046d22 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c @@ -61,8 +61,8 @@ LLVMValueRef lp_build_ddx(struct lp_build_context *bld, LLVMValueRef a) { - LLVMValueRef a_left = lp_build_swizzle1_aos(bld, a, swizzle_left); - LLVMValueRef a_right = lp_build_swizzle1_aos(bld, a, swizzle_right); + LLVMValueRef a_left = lp_build_swizzle_aos(bld, a, swizzle_left); + LLVMValueRef a_right = lp_build_swizzle_aos(bld, a, swizzle_right); return lp_build_sub(bld, a_right, a_left); } @@ -71,8 +71,8 @@ LLVMValueRef lp_build_ddy(struct lp_build_context *bld, LLVMValueRef a) { - LLVMValueRef a_top = lp_build_swizzle1_aos(bld, a, swizzle_top); - LLVMValueRef a_bottom = lp_build_swizzle1_aos(bld, a, swizzle_bottom); + LLVMValueRef a_top = lp_build_swizzle_aos(bld, a, swizzle_top); + LLVMValueRef a_bottom = lp_build_swizzle_aos(bld, a, swizzle_bottom); return lp_build_sub(bld, a_bottom, a_top); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 946c23e317..0fd014ab9b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -40,7 +40,6 @@ #include "lp_bld_const.h" #include "lp_bld_arit.h" #include "lp_bld_type.h" -#include "lp_bld_format.h" #include "lp_bld_sample.h" @@ -125,73 +124,53 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, /** - * Gather elements from scatter positions in memory into a single vector. - * Use for fetching texels from a texture. - * For SSE, typical values are length=4, src_width=32, dst_width=32. - * - * @param length length of the offsets - * @param src_width src element width in bits - * @param dst_width result element width in bits (src will be expanded to fit) - * @param base_ptr base pointer, should be a i8 pointer type. - * @param offsets vector with offsets - */ -LLVMValueRef -lp_build_gather(LLVMBuilderRef builder, - unsigned length, - unsigned src_width, - unsigned dst_width, - LLVMValueRef base_ptr, - LLVMValueRef offsets) -{ - LLVMTypeRef src_type = LLVMIntType(src_width); - LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); - LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); - LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); - LLVMValueRef res; - unsigned i; - - res = LLVMGetUndef(dst_vec_type); - for(i = 0; i < length; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef elem_offset; - LLVMValueRef elem_ptr; - LLVMValueRef elem; - - elem_offset = LLVMBuildExtractElement(builder, offsets, index, ""); - elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, ""); - elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, ""); - elem = LLVMBuildLoad(builder, elem_ptr, ""); - - assert(src_width <= dst_width); - if(src_width > dst_width) - elem = LLVMBuildTrunc(builder, elem, dst_elem_type, ""); - if(src_width < dst_width) - elem = LLVMBuildZExt(builder, elem, dst_elem_type, ""); - - res = LLVMBuildInsertElement(builder, res, elem, index, ""); - } - - return res; -} - - -/** * Compute the offset of a pixel block. * - * x, y, z, y_stride, z_stride are vectors, and they refer to pixel blocks, as - * per format description, and not individual pixels. + * x, y, z, y_stride, z_stride are vectors, and they refer to pixels. + * + * Returns the relative offset and i,j sub-block coordinates */ -LLVMValueRef +void lp_build_sample_offset(struct lp_build_context *bld, const struct util_format_description *format_desc, LLVMValueRef x, LLVMValueRef y, LLVMValueRef z, LLVMValueRef y_stride, - LLVMValueRef z_stride) + LLVMValueRef z_stride, + LLVMValueRef *out_offset, + LLVMValueRef *out_i, + LLVMValueRef *out_j) { LLVMValueRef x_stride; LLVMValueRef offset; + LLVMValueRef i; + LLVMValueRef j; + + /* + * Describe the coordinates in terms of pixel blocks. + * + * TODO: pixel blocks are power of two. LLVM should convert rem/div to + * bit arithmetic. Verify this. + */ + + if (format_desc->block.width == 1) { + i = bld->zero; + } + else { + LLVMValueRef block_width = lp_build_const_int_vec(bld->type, format_desc->block.width); + i = LLVMBuildURem(bld->builder, x, block_width, ""); + x = LLVMBuildUDiv(bld->builder, x, block_width, ""); + } + + if (format_desc->block.height == 1) { + j = bld->zero; + } + else { + LLVMValueRef block_height = lp_build_const_int_vec(bld->type, format_desc->block.height); + j = LLVMBuildURem(bld->builder, y, block_height, ""); + y = LLVMBuildUDiv(bld->builder, y, block_height, ""); + } x_stride = lp_build_const_vec(bld->type, format_desc->block.bits/8); offset = lp_build_mul(bld, x, x_stride); @@ -206,5 +185,7 @@ lp_build_sample_offset(struct lp_build_context *bld, offset = lp_build_add(bld, offset, z_offset); } - return offset; + *out_offset = offset; + *out_i = i; + *out_j = j; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index 51e98ab2f9..5b8f478094 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -146,23 +146,17 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, const struct pipe_sampler_state *sampler); -LLVMValueRef -lp_build_gather(LLVMBuilderRef builder, - unsigned length, - unsigned src_width, - unsigned dst_width, - LLVMValueRef base_ptr, - LLVMValueRef offsets); - - -LLVMValueRef +void lp_build_sample_offset(struct lp_build_context *bld, const struct util_format_description *format_desc, LLVMValueRef x, LLVMValueRef y, LLVMValueRef z, LLVMValueRef y_stride, - LLVMValueRef z_stride); + LLVMValueRef z_stride, + LLVMValueRef *out_offset, + LLVMValueRef *out_i, + LLVMValueRef *out_j); void diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 84c04fe272..1a20d74cac 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -50,8 +50,10 @@ #include "lp_bld_swizzle.h" #include "lp_bld_pack.h" #include "lp_bld_flow.h" +#include "lp_bld_gather.h" #include "lp_bld_format.h" #include "lp_bld_sample.h" +#include "lp_bld_quad.h" /** @@ -264,35 +266,11 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, } } - /* - * Describe the coordinates in terms of pixel blocks. - * - * TODO: pixel blocks are power of two. LLVM should convert rem/div to - * bit arithmetic. Verify this. - */ - - if (bld->format_desc->block.width == 1) { - i = bld->uint_coord_bld.zero; - } - else { - LLVMValueRef block_width = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.width); - i = LLVMBuildURem(bld->builder, x, block_width, ""); - x = LLVMBuildUDiv(bld->builder, x, block_width, ""); - } - - if (bld->format_desc->block.height == 1) { - j = bld->uint_coord_bld.zero; - } - else { - LLVMValueRef block_height = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.height); - j = LLVMBuildURem(bld->builder, y, block_height, ""); - y = LLVMBuildUDiv(bld->builder, y, block_height, ""); - } - /* convert x,y,z coords to linear offset from start of texture, in bytes */ - offset = lp_build_sample_offset(&bld->uint_coord_bld, - bld->format_desc, - x, y, z, y_stride, z_stride); + lp_build_sample_offset(&bld->uint_coord_bld, + bld->format_desc, + x, y, z, y_stride, z_stride, + &offset, &i, &j); if (use_border) { /* If we can sample the border color, it means that texcoords may @@ -344,6 +322,9 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, } +/** + * Fetch the texels as <4n x i8> in AoS form. + */ static LLVMValueRef lp_build_sample_packed(struct lp_build_sample_context *bld, LLVMValueRef x, @@ -351,25 +332,46 @@ lp_build_sample_packed(struct lp_build_sample_context *bld, LLVMValueRef y_stride, LLVMValueRef data_array) { - LLVMValueRef offset; + LLVMValueRef offset, i, j; LLVMValueRef data_ptr; + LLVMValueRef res; - offset = lp_build_sample_offset(&bld->uint_coord_bld, - bld->format_desc, - x, y, NULL, y_stride, NULL); - - assert(bld->format_desc->block.width == 1); - assert(bld->format_desc->block.height == 1); - assert(bld->format_desc->block.bits <= bld->texel_type.width); + /* convert x,y,z coords to linear offset from start of texture, in bytes */ + lp_build_sample_offset(&bld->uint_coord_bld, + bld->format_desc, + x, y, NULL, y_stride, NULL, + &offset, &i, &j); /* get pointer to mipmap level 0 data */ data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0); - return lp_build_gather(bld->builder, - bld->texel_type.length, - bld->format_desc->block.bits, - bld->texel_type.width, - data_ptr, offset); + if (util_format_is_rgba8_variant(bld->format_desc)) { + /* Just fetch the data directly without swizzling */ + assert(bld->format_desc->block.width == 1); + assert(bld->format_desc->block.height == 1); + assert(bld->format_desc->block.bits <= bld->texel_type.width); + + res = lp_build_gather(bld->builder, + bld->texel_type.length, + bld->format_desc->block.bits, + bld->texel_type.width, + data_ptr, offset); + } + else { + struct lp_type type; + + assert(bld->texel_type.width == 32); + + memset(&type, 0, sizeof type); + type.width = 8; + type.length = bld->texel_type.length*4; + type.norm = TRUE; + + res = lp_build_fetch_rgba_aos(bld->builder, bld->format_desc, type, + data_ptr, offset, i, j); + } + + return res; } @@ -817,9 +819,8 @@ lp_build_minify(struct lp_build_sample_context *bld, /** * Generate code to compute texture level of detail (lambda). - * \param s vector of texcoord s values - * \param t vector of texcoord t values - * \param r vector of texcoord r values + * \param ddx partial derivatives of (s, t, r, q) with respect to X + * \param ddy partial derivatives of (s, t, r, q) with respect to Y * \param lod_bias optional float vector with the shader lod bias * \param explicit_lod optional float vector with the explicit lod * \param width scalar int texture width @@ -831,11 +832,8 @@ lp_build_minify(struct lp_build_sample_context *bld, */ static LLVMValueRef lp_build_lod_selector(struct lp_build_sample_context *bld, - LLVMValueRef s, - LLVMValueRef t, - LLVMValueRef r, - const LLVMValueRef *ddx, - const LLVMValueRef *ddy, + const LLVMValueRef ddx[4], + const LLVMValueRef ddy[4], LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ LLVMValueRef width, @@ -870,14 +868,6 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL; LLVMValueRef rho; - /* - * dsdx = abs(s[1] - s[0]); - * dsdy = abs(s[2] - s[0]); - * dtdx = abs(t[1] - t[0]); - * dtdy = abs(t[2] - t[0]); - * drdx = abs(r[1] - r[0]); - * drdy = abs(r[2] - r[0]); - */ dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx"); dsdx = lp_build_abs(float_bld, dsdx); dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy"); @@ -1287,7 +1277,7 @@ lp_build_cube_face(struct lp_build_sample_context *bld, /** - * Generate code to do cube face selection and per-face texcoords. + * Generate code to do cube face selection and compute per-face texcoords. */ static void lp_build_cube_lookup(struct lp_build_sample_context *bld, @@ -1411,7 +1401,6 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, lp_build_endif(&if_ctx2); lp_build_flow_scope_end(flow_ctx2); lp_build_flow_destroy(flow_ctx2); - *face_s = face_s2; *face_t = face_t2; *face = face2; @@ -1457,13 +1446,14 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, int chan; if (img_filter == PIPE_TEX_FILTER_NEAREST) { + /* sample the first mipmap level */ lp_build_sample_image_nearest(bld, width0_vec, height0_vec, depth0_vec, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, colors0); if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* sample the second mipmap level, and interp */ + /* sample the second mipmap level */ lp_build_sample_image_nearest(bld, width1_vec, height1_vec, depth1_vec, row_stride1_vec, img_stride1_vec, @@ -1473,13 +1463,14 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, else { assert(img_filter == PIPE_TEX_FILTER_LINEAR); + /* sample the first mipmap level */ lp_build_sample_image_linear(bld, width0_vec, height0_vec, depth0_vec, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, colors0); if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* sample the second mipmap level, and interp */ + /* sample the second mipmap level */ lp_build_sample_image_linear(bld, width1_vec, height1_vec, depth1_vec, row_stride1_vec, img_stride1_vec, @@ -1542,6 +1533,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL; LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL; LLVMValueRef data_ptr0, data_ptr1 = NULL; + LLVMValueRef face_ddx[4], face_ddy[4]; /* printf("%s mip %d min %d mag %d\n", __FUNCTION__, @@ -1549,6 +1541,30 @@ lp_build_sample_general(struct lp_build_sample_context *bld, */ /* + * Choose cube face, recompute texcoords and derivatives for the chosen face. + */ + if (bld->static_state->target == PIPE_TEXTURE_CUBE) { + LLVMValueRef face, face_s, face_t; + lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t); + s = face_s; /* vec */ + t = face_t; /* vec */ + /* use 'r' to indicate cube face */ + r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */ + + /* recompute ddx, ddy using the new (s,t) face texcoords */ + face_ddx[0] = lp_build_ddx(&bld->coord_bld, s); + face_ddx[1] = lp_build_ddx(&bld->coord_bld, t); + face_ddx[2] = NULL; + face_ddx[3] = NULL; + face_ddy[0] = lp_build_ddy(&bld->coord_bld, s); + face_ddy[1] = lp_build_ddy(&bld->coord_bld, t); + face_ddy[2] = NULL; + face_ddy[3] = NULL; + ddx = face_ddx; + ddy = face_ddy; + } + + /* * Compute the level of detail (float). */ if (min_filter != mag_filter || @@ -1556,7 +1572,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, /* Need to compute lod either to choose mipmap levels or to * distinguish between minification/magnification with one mipmap level. */ - lod = lp_build_lod_selector(bld, s, t, r, ddx, ddy, + lod = lp_build_lod_selector(bld, ddx, ddy, lod_bias, explicit_lod, width, height, depth); } @@ -1566,9 +1582,20 @@ lp_build_sample_general(struct lp_build_sample_context *bld, */ if (mip_filter == PIPE_TEX_MIPFILTER_NONE) { /* always use mip level 0 */ - ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + if (bld->static_state->target == PIPE_TEXTURE_CUBE) { + /* XXX this is a work-around for an apparent bug in LLVM 2.7. + * We should be able to set ilevel0 = const(0) but that causes + * bad x86 code to be emitted. + */ + lod = lp_build_const_elem(bld->coord_bld.type, 0.0); + lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); + } + else { + ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + } } else { + assert(lod); if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); } @@ -1623,18 +1650,6 @@ lp_build_sample_general(struct lp_build_sample_context *bld, } /* - * Choose cube face, recompute per-face texcoords. - */ - if (bld->static_state->target == PIPE_TEXTURE_CUBE) { - LLVMValueRef face, face_s, face_t; - lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t); - s = face_s; /* vec */ - t = face_t; /* vec */ - /* use 'r' to indicate cube face */ - r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */ - } - - /* * Get pointer(s) to image data for mipmap level(s). */ data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0); @@ -1712,36 +1727,6 @@ lp_build_sample_general(struct lp_build_sample_context *bld, static void -lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, - struct lp_type dst_type, - LLVMValueRef packed, - LLVMValueRef *rgba) -{ - LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff); - unsigned chan; - - /* Decode the input vector components */ - for (chan = 0; chan < 4; ++chan) { - unsigned start = chan*8; - unsigned stop = start + 8; - LLVMValueRef input; - - input = packed; - - if(start) - input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(dst_type, start), ""); - - if(stop < 32) - input = LLVMBuildAnd(builder, input, mask, ""); - - input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input); - - rgba[chan] = input; - } -} - - -static void lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, @@ -1935,15 +1920,20 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, * Convert to SoA and swizzle. */ - packed = LLVMBuildBitCast(builder, packed, i32_vec_type, ""); - lp_build_rgba8_to_f32_soa(bld->builder, bld->texel_type, packed, unswizzled); - lp_build_format_swizzle_soa(bld->format_desc, - &bld->texel_bld, - unswizzled, texel_out); + if (util_format_is_rgba8_variant(bld->format_desc)) { + lp_build_format_swizzle_soa(bld->format_desc, + &bld->texel_bld, + unswizzled, texel_out); + } else { + texel_out[0] = unswizzled[0]; + texel_out[1] = unswizzled[1]; + texel_out[2] = unswizzled[2]; + texel_out[3] = unswizzled[3]; + } apply_sampler_swizzle(bld, texel_out); } @@ -2007,6 +1997,8 @@ lp_build_sample_nop(struct lp_build_sample_context *bld, * 'texel' will return a vector of four LLVMValueRefs corresponding to * R, G, B, A. * \param type vector float type to use for coords, etc. + * \param ddx partial derivatives of (s,t,r,q) with respect to x + * \param ddy partial derivatives of (s,t,r,q) with respect to y */ void lp_build_sample_soa(LLVMBuilderRef builder, @@ -2016,8 +2008,8 @@ lp_build_sample_soa(LLVMBuilderRef builder, unsigned unit, unsigned num_coords, const LLVMValueRef *coords, - const LLVMValueRef *ddx, - const LLVMValueRef *ddy, + const LLVMValueRef ddx[4], + const LLVMValueRef ddy[4], LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ LLVMValueRef texel_out[4]) @@ -2079,7 +2071,8 @@ lp_build_sample_soa(LLVMBuilderRef builder, /* For debug: no-op texture sampling */ lp_build_sample_nop(&bld, texel_out); } - else if (util_format_is_rgba8_variant(bld.format_desc) && + else if (util_format_fits_8unorm(bld.format_desc) && + bld.format_desc->nr_channels > 1 && static_state->target == PIPE_TEXTURE_2D && static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR && static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR && diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c index 3c8a7bc09e..20cf96ca66 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c @@ -110,7 +110,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld, /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing * using shuffles here actually causes worst results. More investigation is * needed. */ - if (n <= 4) { + if (type.width >= 16) { /* * Shuffle. */ @@ -132,7 +132,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld, * YY00 YY00 .... YY00 * YYYY YYYY .... YYYY <= output */ - struct lp_type type4 = type; + struct lp_type type4; const char shifts[4][2] = { { 1, 2}, {-1, 2}, @@ -147,6 +147,13 @@ lp_build_broadcast_aos(struct lp_build_context *bld, a = LLVMBuildAnd(bld->builder, a, lp_build_const_mask_aos(type, cond), ""); + /* + * Build a type where each element is an integer that cover the four + * channels. + */ + + type4 = type; + type4.floating = FALSE; type4.width *= 4; type4.length /= 4; @@ -176,80 +183,170 @@ lp_build_broadcast_aos(struct lp_build_context *bld, LLVMValueRef -lp_build_swizzle1_aos(struct lp_build_context *bld, - LLVMValueRef a, - const unsigned char swizzle[4]) +lp_build_swizzle_aos(struct lp_build_context *bld, + LLVMValueRef a, + const unsigned char swizzles[4]) { - const unsigned n = bld->type.length; + const struct lp_type type = bld->type; + const unsigned n = type.length; unsigned i, j; - if(a == bld->undef || a == bld->zero || a == bld->one) + if (swizzles[0] == PIPE_SWIZZLE_RED && + swizzles[1] == PIPE_SWIZZLE_GREEN && + swizzles[2] == PIPE_SWIZZLE_BLUE && + swizzles[3] == PIPE_SWIZZLE_ALPHA) { return a; + } - if(swizzle[0] == swizzle[1] && swizzle[1] == swizzle[2] && swizzle[2] == swizzle[3]) - return lp_build_broadcast_aos(bld, a, swizzle[0]); + if (swizzles[0] == swizzles[1] && + swizzles[1] == swizzles[2] && + swizzles[2] == swizzles[3]) { + switch (swizzles[0]) { + case PIPE_SWIZZLE_RED: + case PIPE_SWIZZLE_GREEN: + case PIPE_SWIZZLE_BLUE: + case PIPE_SWIZZLE_ALPHA: + return lp_build_broadcast_aos(bld, a, swizzles[0]); + case PIPE_SWIZZLE_ZERO: + return bld->zero; + case PIPE_SWIZZLE_ONE: + return bld->one; + default: + assert(0); + return bld->undef; + } + } - { + if (type.width >= 16) { /* * Shuffle. */ - LLVMTypeRef elem_type = LLVMInt32Type(); + LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(type)); + LLVMTypeRef i32t = LLVMInt32Type(); LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; + LLVMValueRef aux[LP_MAX_VECTOR_LENGTH]; + + memset(aux, 0, sizeof aux); + + for(j = 0; j < n; j += 4) { + for(i = 0; i < 4; ++i) { + unsigned shuffle; + switch (swizzles[i]) { + default: + assert(0); + /* fall through */ + case PIPE_SWIZZLE_RED: + case PIPE_SWIZZLE_GREEN: + case PIPE_SWIZZLE_BLUE: + case PIPE_SWIZZLE_ALPHA: + shuffle = j + swizzles[i]; + break; + case PIPE_SWIZZLE_ZERO: + shuffle = type.length + 0; + if (!aux[0]) { + aux[0] = lp_build_const_elem(type, 0.0); + } + break; + case PIPE_SWIZZLE_ONE: + shuffle = type.length + 1; + if (!aux[1]) { + aux[1] = lp_build_const_elem(type, 1.0); + } + break; + } + shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0); + } + } - for(j = 0; j < n; j += 4) - for(i = 0; i < 4; ++i) - shuffles[j + i] = LLVMConstInt(elem_type, j + swizzle[i], 0); - - return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), ""); - } -} - + for (i = 0; i < n; ++i) { + if (!aux[i]) { + aux[i] = undef; + } + } -LLVMValueRef -lp_build_swizzle2_aos(struct lp_build_context *bld, - LLVMValueRef a, - LLVMValueRef b, - const unsigned char swizzle[4]) -{ - const unsigned n = bld->type.length; - unsigned i, j; + return LLVMBuildShuffleVector(bld->builder, a, + LLVMConstVector(aux, n), + LLVMConstVector(shuffles, n), ""); + } else { + /* + * Bit mask and shifts. + * + * For example, this will convert BGRA to RGBA by doing + * + * rgba = (bgra & 0x00ff0000) >> 16 + * | (bgra & 0xff00ff00) + * | (bgra & 0x000000ff) << 16 + * + * This is necessary not only for faster cause, but because X86 backend + * will refuse shuffles of <4 x i8> vectors + */ + LLVMValueRef res; + struct lp_type type4; + boolean cond[4]; + unsigned chan; + int shift; - if(swizzle[0] < 4 && swizzle[1] < 4 && swizzle[2] < 4 && swizzle[3] < 4) - return lp_build_swizzle1_aos(bld, a, swizzle); + /* + * Start with a mixture of 1 and 0. + */ + for (chan = 0; chan < 4; ++chan) { + cond[chan] = swizzles[chan] == PIPE_SWIZZLE_ONE ? TRUE : FALSE; + } + res = lp_build_select_aos(bld, bld->one, bld->zero, cond); - if(a == b) { - unsigned char swizzle1[4]; - swizzle1[0] = swizzle[0] % 4; - swizzle1[1] = swizzle[1] % 4; - swizzle1[2] = swizzle[2] % 4; - swizzle1[3] = swizzle[3] % 4; - return lp_build_swizzle1_aos(bld, a, swizzle1); - } + /* + * Build a type where each element is an integer that cover the four + * channels. + */ + type4 = type; + type4.floating = FALSE; + type4.width *= 4; + type4.length /= 4; - if(swizzle[0] % 4 == 0 && - swizzle[1] % 4 == 1 && - swizzle[2] % 4 == 2 && - swizzle[3] % 4 == 3) { - boolean cond[4]; - cond[0] = swizzle[0] / 4; - cond[1] = swizzle[1] / 4; - cond[2] = swizzle[2] / 4; - cond[3] = swizzle[3] / 4; - return lp_build_select_aos(bld, a, b, cond); - } + a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type4), ""); + res = LLVMBuildBitCast(bld->builder, res, lp_build_vec_type(type4), ""); - { /* - * Shuffle. + * Mask and shift the channels, trying to group as many channels in the + * same shift as possible */ - LLVMTypeRef elem_type = LLVMInt32Type(); - LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; - - for(j = 0; j < n; j += 4) - for(i = 0; i < 4; ++i) - shuffles[j + i] = LLVMConstInt(elem_type, j + (swizzle[i] % 4) + (swizzle[i] / 4 * n), 0); + for (shift = -3; shift <= 3; ++shift) { + unsigned long long mask = 0; + + assert(type4.width <= sizeof(mask)*8); + + for (chan = 0; chan < 4; ++chan) { + /* FIXME: big endian */ + if (swizzles[chan] < 4 && + chan - swizzles[chan] == shift) { + mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width); + } + } + + if (mask) { + LLVMValueRef masked; + LLVMValueRef shifted; + + if (0) + debug_printf("shift = %i, mask = 0x%08llx\n", shift, mask); + + masked = LLVMBuildAnd(bld->builder, a, + lp_build_const_int_vec(type4, mask), ""); + if (shift > 0) { + shifted = LLVMBuildShl(bld->builder, masked, + lp_build_const_int_vec(type4, shift*type.width), ""); + } else if (shift < 0) { + shifted = LLVMBuildLShr(bld->builder, masked, + lp_build_const_int_vec(type4, -shift*type.width), ""); + } else { + shifted = masked; + } + + res = LLVMBuildOr(bld->builder, res, shifted, ""); + } + } - return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); + return LLVMBuildBitCast(bld->builder, res, lp_build_vec_type(type), ""); } } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h index 4f4fa777c9..315e1bcb54 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h @@ -68,24 +68,12 @@ lp_build_broadcast_aos(struct lp_build_context *bld, /** * Swizzle a vector consisting of an array of XYZW structs. * - * @param swizzle is the in [0,4[ range. + * @param swizzles is the in [0,4[ range. */ LLVMValueRef -lp_build_swizzle1_aos(struct lp_build_context *bld, - LLVMValueRef a, - const unsigned char swizzle[4]); - - -/** - * Swizzle two vector consisting of an array of XYZW structs. - * - * @param swizzle is the in [0,8[ range. Values in [4,8[ range refer to b. - */ -LLVMValueRef -lp_build_swizzle2_aos(struct lp_build_context *bld, - LLVMValueRef a, - LLVMValueRef b, - const unsigned char swizzle[4]); +lp_build_swizzle_aos(struct lp_build_context *bld, + LLVMValueRef a, + const unsigned char swizzles[4]); LLVMValueRef diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index dec7556138..21236839fb 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -49,6 +49,7 @@ #include "lp_bld_type.h" #include "lp_bld_const.h" #include "lp_bld_arit.h" +#include "lp_bld_gather.h" #include "lp_bld_logic.h" #include "lp_bld_swizzle.h" #include "lp_bld_flow.h" @@ -132,10 +133,14 @@ struct lp_build_tgsi_soa_context LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS]; LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS]; - /* we allocate an array of temps if we have indirect - * addressing and then the temps above is unused */ + /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is + * set in the indirect_files field. + * The temps[] array above is unused then. + */ LLVMValueRef temps_array; - boolean has_indirect_addressing; + + /** bitmask indicating which register files are accessed indirectly */ + unsigned indirect_files; struct lp_build_mask_context *mask; struct lp_exec_mask exec_mask; @@ -404,25 +409,92 @@ static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc) lp_exec_mask_update(mask); } + +/** + * Return pointer to a temporary register channel (src or dest). + * Note that indirect addressing cannot be handled here. + * \param index which temporary register + * \param chan which channel of the temp register. + */ static LLVMValueRef get_temp_ptr(struct lp_build_tgsi_soa_context *bld, unsigned index, - unsigned chan, - boolean is_indirect, - LLVMValueRef addr) + unsigned chan) { assert(chan < 4); - if (!bld->has_indirect_addressing) { - return bld->temps[index][chan]; - } else { - LLVMValueRef lindex = - LLVMConstInt(LLVMInt32Type(), index * 4 + chan, 0); - if (is_indirect) - lindex = lp_build_add(&bld->base, lindex, addr); + if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { + LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan); return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, ""); } + else { + return bld->temps[index][chan]; + } } + +/** + * Gather vector. + * XXX the lp_build_gather() function should be capable of doing this + * with a little work. + */ +static LLVMValueRef +build_gather(struct lp_build_tgsi_soa_context *bld, + LLVMValueRef base_ptr, + LLVMValueRef indexes) +{ + LLVMValueRef res = bld->base.undef; + unsigned i; + + /* + * Loop over elements of index_vec, load scalar value, insert it into 'res'. + */ + for (i = 0; i < bld->base.type.length; i++) { + LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder, + indexes, ii, ""); + LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr, + &index, 1, ""); + LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); + + res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, ""); + } + + return res; +} + + +/** + * Read the current value of the ADDR register, convert the floats to + * ints, multiply by four and return the vector of offsets. + * The offsets will be used to index into the constant buffer or + * temporary register file. + */ +static LLVMValueRef +get_indirect_offsets(struct lp_build_tgsi_soa_context *bld, + const struct tgsi_src_register *indirect_reg) +{ + /* always use X component of address register */ + const int x = indirect_reg->SwizzleX; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); + uint swizzle = tgsi_util_get_src_register_swizzle(indirect_reg, x); + LLVMValueRef vec4 = lp_build_const_int_vec(bld->int_bld.type, 4); + LLVMValueRef addr_vec; + + addr_vec = LLVMBuildLoad(bld->base.builder, + bld->addr[indirect_reg->Index][swizzle], + "load addr reg"); + + /* for indexing we want integers */ + addr_vec = LLVMBuildFPToSI(bld->base.builder, addr_vec, + int_vec_type, ""); + + /* addr_vec = addr_vec * 4 */ + addr_vec = lp_build_mul(&bld->base, addr_vec, vec4); + + return addr_vec; +} + + /** * Register fetch. */ @@ -430,14 +502,14 @@ static LLVMValueRef emit_fetch( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_instruction *inst, - unsigned index, + unsigned src_op, const unsigned chan_index ) { - const struct tgsi_full_src_register *reg = &inst->Src[index]; + const struct tgsi_full_src_register *reg = &inst->Src[src_op]; const unsigned swizzle = tgsi_util_get_full_src_register_swizzle(reg, chan_index); LLVMValueRef res; - LLVMValueRef addr = NULL; + LLVMValueRef addr_vec = NULL; if (swizzle > 3) { assert(0 && "invalid swizzle in emit_fetch()"); @@ -445,32 +517,33 @@ emit_fetch( } if (reg->Register.Indirect) { - LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); - unsigned swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, chan_index ); - addr = LLVMBuildLoad(bld->base.builder, - bld->addr[reg->Indirect.Index][swizzle], - ""); - /* for indexing we want integers */ - addr = LLVMBuildFPToSI(bld->base.builder, addr, - int_vec_type, ""); - addr = LLVMBuildExtractElement(bld->base.builder, - addr, LLVMConstInt(LLVMInt32Type(), 0, 0), - ""); - addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0)); + assert(bld->indirect_files); + addr_vec = get_indirect_offsets(bld, ®->Indirect); } switch (reg->Register.File) { case TGSI_FILE_CONSTANT: - { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), - reg->Register.Index*4 + swizzle, 0); + if (reg->Register.Indirect) { + LLVMValueRef index_vec; /* index into the const buffer */ + + assert(bld->indirect_files & (1 << TGSI_FILE_CONSTANT)); + + /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */ + index_vec = lp_build_const_int_vec(bld->int_bld.type, + reg->Register.Index * 4 + swizzle); + + /* index_vec = index_vec + addr_vec */ + index_vec = lp_build_add(&bld->base, index_vec, addr_vec); + + /* Gather values from the constant buffer */ + res = build_gather(bld, bld->consts_ptr, index_vec); + } + else { + LLVMValueRef index; /* index into the const buffer */ LLVMValueRef scalar, scalar_ptr; - if (reg->Register.Indirect) { - /*lp_build_printf(bld->base.builder, - "\taddr = %d\n", addr);*/ - index = lp_build_add(&bld->base, index, addr); - } + index = lp_build_const_int32(reg->Register.Index*4 + swizzle); + scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, ""); scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); @@ -490,13 +563,38 @@ emit_fetch( break; case TGSI_FILE_TEMPORARY: - { - LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, - swizzle, - reg->Register.Indirect, - addr); + if (reg->Register.Indirect) { + LLVMValueRef vec_len = + lp_build_const_int_vec(bld->int_bld.type, bld->base.type.length); + LLVMValueRef index_vec; /* index into the const buffer */ + LLVMValueRef temps_array; + LLVMTypeRef float4_ptr_type; + + assert(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)); + + /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */ + index_vec = lp_build_const_int_vec(bld->int_bld.type, + reg->Register.Index * 4 + swizzle); + + /* index_vec += addr_vec */ + index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec); + + /* index_vec *= vector_length */ + index_vec = lp_build_mul(&bld->int_bld, index_vec, vec_len); + + /* cast temps_array pointer to float* */ + float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0); + temps_array = LLVMBuildBitCast(bld->int_bld.builder, bld->temps_array, + float4_ptr_type, ""); + + /* Gather values from the temporary register array */ + res = build_gather(bld, temps_array, index_vec); + } + else { + LLVMValueRef temp_ptr; + temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle); res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); - if(!res) + if (!res) return bld->base.undef; } break; @@ -660,8 +758,12 @@ emit_store( } if (reg->Register.Indirect) { + /* XXX use get_indirect_offsets() here eventually */ LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); unsigned swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, chan_index ); + + assert(bld->indirect_files); + addr = LLVMBuildLoad(bld->base.builder, bld->addr[reg->Indirect.Index][swizzle], ""); @@ -680,14 +782,18 @@ emit_store( bld->outputs[reg->Register.Index][chan_index]); break; - case TGSI_FILE_TEMPORARY: { - LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, - chan_index, - reg->Register.Indirect, - addr); - lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr); + case TGSI_FILE_TEMPORARY: + if (reg->Register.Indirect) { + /* XXX not done yet */ + debug_printf("WARNING: LLVM scatter store of temp regs" + " not implemented\n"); + } + else { + LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, + chan_index); + lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr); + } break; - } case TGSI_FILE_ADDRESS: lp_exec_mask_store(&bld->exec_mask, pred, value, @@ -905,7 +1011,7 @@ emit_declaration( switch (decl->Declaration.File) { case TGSI_FILE_TEMPORARY: assert(idx < LP_MAX_TGSI_TEMPS); - if (bld->has_indirect_addressing) { + if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(), last*4 + 4, 0); bld->temps_array = lp_build_array_alloca(bld->base.builder, @@ -1929,8 +2035,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, bld.outputs = outputs; bld.consts_ptr = consts_ptr; bld.sampler = sampler; - bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 || - info->opcode_count[TGSI_OPCODE_ARL] > 0; + bld.indirect_files = info->indirect_files; bld.instructions = (struct tgsi_full_instruction *) MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) ); bld.max_instructions = LP_MAX_INSTRUCTIONS; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h index df77ef2155..3ffe916f8e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_type.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h @@ -316,6 +316,54 @@ LLVMTypeRef lp_build_int32_vec4_type(void); +static INLINE struct lp_type +lp_float32_vec4_type(void) +{ + struct lp_type type; + + memset(&type, 0, sizeof(type)); + type.floating = TRUE; + type.sign = TRUE; + type.norm = FALSE; + type.width = 32; + type.length = 4; + + return type; +} + + +static INLINE struct lp_type +lp_int32_vec4_type(void) +{ + struct lp_type type; + + memset(&type, 0, sizeof(type)); + type.floating = FALSE; + type.sign = TRUE; + type.norm = FALSE; + type.width = 32; + type.length = 4; + + return type; +} + + +static INLINE struct lp_type +lp_unorm8_vec4_type(void) +{ + struct lp_type type; + + memset(&type, 0, sizeof(type)); + type.floating = FALSE; + type.sign = FALSE; + type.norm = TRUE; + type.width = 8; + type.length = 4; + + return type; +} + + struct lp_type lp_uint_type(struct lp_type type); diff --git a/src/gallium/auxiliary/os/os_thread.h b/src/gallium/auxiliary/os/os_thread.h index 0238308d20..a084310d4f 100644 --- a/src/gallium/auxiliary/os/os_thread.h +++ b/src/gallium/auxiliary/os/os_thread.h @@ -45,7 +45,6 @@ #include <pthread.h> /* POSIX threads headers */ #include <stdio.h> /* for perror() */ -#define PIPE_THREAD_HAVE_CONDVAR /* pipe_thread */ @@ -168,19 +167,59 @@ typedef CRITICAL_SECTION pipe_mutex; #define pipe_mutex_unlock(mutex) \ LeaveCriticalSection(&mutex) +/* TODO: Need a macro to declare "I don't care about WinXP compatibilty" */ +#if 0 && defined (_WIN32_WINNT) && (_WIN32_WINNT >= 0x0600) +/* CONDITION_VARIABLE is only available on newer versions of Windows + * (Server 2008/Vista or later). + * http://msdn.microsoft.com/en-us/library/ms682052(VS.85).aspx + * + * pipe_condvar + */ +typedef CONDITION_VARIABLE pipe_condvar; + +#define pipe_static_condvar(cond) \ + /*static*/ pipe_condvar cond = CONDITION_VARIABLE_INIT + +#define pipe_condvar_init(cond) \ + InitializeConditionVariable(&(cond)) + +#define pipe_condvar_destroy(cond) \ + (void) cond /* nothing to do */ + +#define pipe_condvar_wait(cond, mutex) \ + SleepConditionVariableCS(&(cond), &(mutex), INFINITE) + +#define pipe_condvar_signal(cond) \ + WakeConditionVariable(&(cond)) + +#define pipe_condvar_broadcast(cond) \ + WakeAllConditionVariable(&(cond)) + +#else /* need compatibility with pre-Vista Win32 */ /* pipe_condvar (XXX FIX THIS) + * See http://www.cs.wustl.edu/~schmidt/win32-cv-1.html + * for potential pitfalls in implementation. */ -typedef unsigned pipe_condvar; +typedef DWORD pipe_condvar; + +#define pipe_static_condvar(cond) \ + /*static*/ pipe_condvar cond = 1 #define pipe_condvar_init(cond) \ - (void) cond + (void) (cond = 1) #define pipe_condvar_destroy(cond) \ (void) cond +/* Poor man's pthread_cond_wait(): + Just release the mutex and sleep for one millisecond. + The caller's while() loop does all the work. */ #define pipe_condvar_wait(cond, mutex) \ - (void) cond; (void) mutex + do { pipe_mutex_unlock(mutex); \ + Sleep(cond); \ + pipe_mutex_lock(mutex); \ + } while (0) #define pipe_condvar_signal(cond) \ (void) cond @@ -188,9 +227,12 @@ typedef unsigned pipe_condvar; #define pipe_condvar_broadcast(cond) \ (void) cond +#endif /* pre-Vista win32 */ #else +#include "os/os_time.h" + /** Dummy definitions */ typedef unsigned pipe_thread; @@ -214,7 +256,6 @@ static INLINE int pipe_thread_destroy( pipe_thread thread ) } typedef unsigned pipe_mutex; -typedef unsigned pipe_condvar; #define pipe_static_mutex(mutex) \ static pipe_mutex mutex = 0 @@ -231,17 +272,25 @@ typedef unsigned pipe_condvar; #define pipe_mutex_unlock(mutex) \ (void) mutex +typedef int64_t pipe_condvar; + #define pipe_static_condvar(condvar) \ - static unsigned condvar = 0 + static pipe_condvar condvar = 1000 #define pipe_condvar_init(condvar) \ - (void) condvar + (void) (condvar = 1000) #define pipe_condvar_destroy(condvar) \ (void) condvar +/* Poor man's pthread_cond_wait(): + Just release the mutex and sleep for one millisecond. + The caller's while() loop does all the work. */ #define pipe_condvar_wait(condvar, mutex) \ - (void) condvar + do { pipe_mutex_unlock(mutex); \ + os_time_sleep(condvar); \ + pipe_mutex_lock(mutex); \ + } while (0) #define pipe_condvar_signal(condvar) \ (void) condvar @@ -277,27 +326,7 @@ static INLINE void pipe_barrier_wait(pipe_barrier *barrier) } -#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) - -/* XXX FIX THIS */ -typedef unsigned pipe_barrier; - -static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count) -{ - /* XXX we could implement barriers with a mutex and condition var */ -} - -static INLINE void pipe_barrier_destroy(pipe_barrier *barrier) -{ -} - -static INLINE void pipe_barrier_wait(pipe_barrier *barrier) -{ - assert(0); -} - - -#else +#else /* If the OS doesn't have its own, implement barriers using a mutex and a condvar */ typedef struct { unsigned count; diff --git a/src/gallium/auxiliary/target-helpers/inline_debug_helper.h b/src/gallium/auxiliary/target-helpers/inline_debug_helper.h new file mode 100644 index 0000000000..0433da6141 --- /dev/null +++ b/src/gallium/auxiliary/target-helpers/inline_debug_helper.h @@ -0,0 +1,44 @@ + +#ifndef INLINE_DEBUG_HELPER_H +#define INLINE_DEBUG_HELPER_H + +#include "pipe/p_compiler.h" +#include "util/u_debug.h" + + +/* Helper function to wrap a screen with + * one or more debug driver: rbug, trace. + */ + +#ifdef GALLIUM_TRACE +#include "trace/tr_public.h" +#endif + +#ifdef GALLIUM_RBUG +#include "rbug/rbug_public.h" +#endif + +#ifdef GALLIUM_GALAHAD +#include "galahad/glhd_public.h" +#endif + +static INLINE struct pipe_screen * +debug_screen_wrap(struct pipe_screen *screen) +{ + +#if defined(GALLIUM_RBUG) + screen = rbug_screen_create(screen); +#endif + +#if defined(GALLIUM_TRACE) + screen = trace_screen_create(screen); +#endif + +#if defined(GALLIUM_GALAHAD) + screen = galahad_screen_create(screen); +#endif + + return screen; +} + +#endif diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h new file mode 100644 index 0000000000..036c1ee48a --- /dev/null +++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h @@ -0,0 +1,63 @@ + +#ifndef INLINE_SW_HELPER_H +#define INLINE_SW_HELPER_H + +#include "pipe/p_compiler.h" +#include "util/u_debug.h" +#include "state_tracker/sw_winsys.h" + + +/* Helper function to choose and instantiate one of the software rasterizers: + * cell, llvmpipe, softpipe. + */ + +#ifdef GALLIUM_SOFTPIPE +#include "softpipe/sp_public.h" +#endif + +#ifdef GALLIUM_LLVMPIPE +#include "llvmpipe/lp_public.h" +#endif + +#ifdef GALLIUM_CELL +#include "cell/ppu/cell_public.h" +#endif + +static INLINE struct pipe_screen * +sw_screen_create(struct sw_winsys *winsys) +{ + const char *default_driver; + const char *driver; + struct pipe_screen *screen = NULL; + +#if defined(GALLIUM_CELL) + default_driver = "cell"; +#elif defined(GALLIUM_LLVMPIPE) + default_driver = "llvmpipe"; +#elif defined(GALLIUM_SOFTPIPE) + default_driver = "softpipe"; +#else + default_driver = ""; +#endif + + driver = debug_get_option("GALLIUM_DRIVER", default_driver); + +#if defined(GALLIUM_CELL) + if (screen == NULL && strcmp(driver, "cell") == 0) + screen = cell_create_screen(winsys); +#endif + +#if defined(GALLIUM_LLVMPIPE) + if (screen == NULL && strcmp(driver, "llvmpipe") == 0) + screen = llvmpipe_create_screen(winsys); +#endif + +#if defined(GALLIUM_SOFTPIPE) + if (screen == NULL) + screen = softpipe_create_screen(winsys); +#endif + + return screen; +} + +#endif diff --git a/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h new file mode 100644 index 0000000000..0b4e740403 --- /dev/null +++ b/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h @@ -0,0 +1,34 @@ + +#ifndef INLINE_WRAPPER_SW_HELPER_H +#define INLINE_WRAPPER_SW_HELPER_H + +#include "target-helpers/inline_sw_helper.h" +#include "sw/wrapper/wrapper_sw_winsys.h" + +/** + * Try to wrap a hw screen with a software screen. + * On failure will return given screen. + */ +static INLINE struct pipe_screen * +sw_screen_wrap(struct pipe_screen *screen) +{ + struct sw_winsys *sws; + struct pipe_screen *sw_screen; + + sws = wrapper_sw_winsys_warp_pipe_screen(screen); + if (!sws) + goto err; + + sw_screen = sw_screen_create(sws); + if (sw_screen == screen) + goto err_winsys; + + return sw_screen; + +err_winsys: + sws->destroy(sws); +err: + return screen; +} + +#endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 9fcc28f4c9..f71ffb7030 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -176,7 +176,11 @@ static const char *primitive_names[] = "TRIANGLE_FAN", "QUADS", "QUAD_STRIP", - "POLYGON" + "POLYGON", + "LINES_ADJACENCY", + "LINE_STRIP_ADJACENCY", + "TRIANGLES_ADJACENCY", + "TRIANGLE_STRIP_ADJACENCY" }; static const char *fs_coord_origin_names[] = diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index ced9c94f46..90198a4f60 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -109,6 +109,19 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->input_usage_mask[ind] |= usage_mask; } } + + /* check for indirect register reads */ + if (src->Register.Indirect) { + info->indirect_files |= (1 << src->Register.File); + } + } + + /* check for indirect register writes */ + for (i = 0; i < fullinst->Instruction.NumDstRegs; i++) { + const struct tgsi_full_dst_register *dst = &fullinst->Dst[i]; + if (dst->Register.Indirect) { + info->indirect_files |= (1 << dst->Register.File); + } } info->num_instructions++; diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index e75280336f..f8aa90cf06 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -63,6 +63,12 @@ struct tgsi_shader_info boolean writes_edgeflag; /**< vertex shader outputs edgeflag */ boolean uses_kill; /**< KIL or KILP instruction used? */ + /** + * Bitmask indicating which register files are accessed with + * indirect addressing. The bits are (1 << TGSI_FILE_x), etc. + */ + unsigned indirect_files; + struct { unsigned name; unsigned data[8]; diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 55fccba4d8..b01d2ff468 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -58,7 +58,7 @@ static boolean is_digit_alpha_underscore( const char *cur ) static char uprcase( char c ) { if (c >= 'a' && c <= 'z') - return c += 'A' - 'a'; + return c + 'A' - 'a'; return c; } @@ -732,7 +732,7 @@ parse_optional_swizzle( else if (uprcase( *cur ) == 'W') swizzle[i] = TGSI_SWIZZLE_W; else { - report_error( ctx, "Expected register swizzle component `x', `y', `z', `w', `0' or `1'" ); + report_error( ctx, "Expected register swizzle component `x', `y', `z' or `w'" ); return FALSE; } cur++; diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index dfe2101c2e..0d94aaae95 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -52,9 +52,8 @@ struct blitter_context_priv { - struct blitter_context blitter; + struct blitter_context base; - struct pipe_context *pipe; /**< pipe context */ struct pipe_resource *vbuf; /**< quad */ float vertices[4][2][4]; /**< {pos, color} or {pos, texcoord} */ @@ -97,15 +96,25 @@ struct blitter_context_priv /* Rasterizer state. */ void *rs_state; - struct pipe_sampler_view *sampler_view; - /* Viewport state. */ struct pipe_viewport_state viewport; /* Clip state. */ struct pipe_clip_state clip; + + /* Destination surface dimensions. */ + unsigned dst_width; + unsigned dst_height; }; +static void blitter_draw_rectangle(struct blitter_context *blitter, + unsigned x, unsigned y, + unsigned width, unsigned height, + float depth, + enum blitter_attrib_type type, + const float attrib[4]); + + struct blitter_context *util_blitter_create(struct pipe_context *pipe) { struct blitter_context_priv *ctx; @@ -120,19 +129,20 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) if (!ctx) return NULL; - ctx->pipe = pipe; + ctx->base.pipe = pipe; + ctx->base.draw_rectangle = blitter_draw_rectangle; /* init state objects for them to be considered invalid */ - ctx->blitter.saved_blend_state = INVALID_PTR; - ctx->blitter.saved_dsa_state = INVALID_PTR; - ctx->blitter.saved_rs_state = INVALID_PTR; - ctx->blitter.saved_fs = INVALID_PTR; - ctx->blitter.saved_vs = INVALID_PTR; - ctx->blitter.saved_velem_state = INVALID_PTR; - ctx->blitter.saved_fb_state.nr_cbufs = ~0; - ctx->blitter.saved_num_sampler_views = ~0; - ctx->blitter.saved_num_sampler_states = ~0; - ctx->blitter.saved_num_vertex_buffers = ~0; + ctx->base.saved_blend_state = INVALID_PTR; + ctx->base.saved_dsa_state = INVALID_PTR; + ctx->base.saved_rs_state = INVALID_PTR; + ctx->base.saved_fs = INVALID_PTR; + ctx->base.saved_vs = INVALID_PTR; + ctx->base.saved_velem_state = INVALID_PTR; + ctx->base.saved_fb_state.nr_cbufs = ~0; + ctx->base.saved_num_sampler_views = ~0; + ctx->base.saved_num_sampler_states = ~0; + ctx->base.saved_num_vertex_buffers = ~0; /* blend state objects */ memset(&blend, 0, sizeof(blend)); @@ -219,17 +229,17 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) ctx->vertices[i][0][3] = 1; /*v.w*/ /* create the vertex buffer */ - ctx->vbuf = pipe_buffer_create(ctx->pipe->screen, + ctx->vbuf = pipe_buffer_create(ctx->base.pipe->screen, PIPE_BIND_VERTEX_BUFFER, sizeof(ctx->vertices)); - return &ctx->blitter; + return &ctx->base; } void util_blitter_destroy(struct blitter_context *blitter) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = blitter->pipe; int i; pipe->delete_blend_state(pipe, ctx->blend_write_color); @@ -260,10 +270,6 @@ void util_blitter_destroy(struct blitter_context *blitter) if (ctx->sampler_state[i]) pipe->delete_sampler_state(pipe, ctx->sampler_state[i]); - if (ctx->sampler_view) { - pipe_sampler_view_reference(&ctx->sampler_view, NULL); - } - pipe_resource_reference(&ctx->vbuf, NULL); FREE(ctx); } @@ -271,112 +277,117 @@ void util_blitter_destroy(struct blitter_context *blitter) static void blitter_check_saved_CSOs(struct blitter_context_priv *ctx) { /* make sure these CSOs have been saved */ - assert(ctx->blitter.saved_blend_state != INVALID_PTR && - ctx->blitter.saved_dsa_state != INVALID_PTR && - ctx->blitter.saved_rs_state != INVALID_PTR && - ctx->blitter.saved_fs != INVALID_PTR && - ctx->blitter.saved_vs != INVALID_PTR && - ctx->blitter.saved_velem_state != INVALID_PTR); + assert(ctx->base.saved_blend_state != INVALID_PTR && + ctx->base.saved_dsa_state != INVALID_PTR && + ctx->base.saved_rs_state != INVALID_PTR && + ctx->base.saved_fs != INVALID_PTR && + ctx->base.saved_vs != INVALID_PTR && + ctx->base.saved_velem_state != INVALID_PTR); } static void blitter_restore_CSOs(struct blitter_context_priv *ctx) { - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; unsigned i; /* restore the state objects which are always required to be saved */ - pipe->bind_blend_state(pipe, ctx->blitter.saved_blend_state); - pipe->bind_depth_stencil_alpha_state(pipe, ctx->blitter.saved_dsa_state); - pipe->bind_rasterizer_state(pipe, ctx->blitter.saved_rs_state); - pipe->bind_fs_state(pipe, ctx->blitter.saved_fs); - pipe->bind_vs_state(pipe, ctx->blitter.saved_vs); - pipe->bind_vertex_elements_state(pipe, ctx->blitter.saved_velem_state); + pipe->bind_blend_state(pipe, ctx->base.saved_blend_state); + pipe->bind_depth_stencil_alpha_state(pipe, ctx->base.saved_dsa_state); + pipe->bind_rasterizer_state(pipe, ctx->base.saved_rs_state); + pipe->bind_fs_state(pipe, ctx->base.saved_fs); + pipe->bind_vs_state(pipe, ctx->base.saved_vs); + pipe->bind_vertex_elements_state(pipe, ctx->base.saved_velem_state); - ctx->blitter.saved_blend_state = INVALID_PTR; - ctx->blitter.saved_dsa_state = INVALID_PTR; - ctx->blitter.saved_rs_state = INVALID_PTR; - ctx->blitter.saved_fs = INVALID_PTR; - ctx->blitter.saved_vs = INVALID_PTR; - ctx->blitter.saved_velem_state = INVALID_PTR; + ctx->base.saved_blend_state = INVALID_PTR; + ctx->base.saved_dsa_state = INVALID_PTR; + ctx->base.saved_rs_state = INVALID_PTR; + ctx->base.saved_fs = INVALID_PTR; + ctx->base.saved_vs = INVALID_PTR; + ctx->base.saved_velem_state = INVALID_PTR; - pipe->set_stencil_ref(pipe, &ctx->blitter.saved_stencil_ref); + pipe->set_stencil_ref(pipe, &ctx->base.saved_stencil_ref); - pipe->set_viewport_state(pipe, &ctx->blitter.saved_viewport); - pipe->set_clip_state(pipe, &ctx->blitter.saved_clip); + pipe->set_viewport_state(pipe, &ctx->base.saved_viewport); + pipe->set_clip_state(pipe, &ctx->base.saved_clip); /* restore the state objects which are required to be saved before copy/fill */ - if (ctx->blitter.saved_fb_state.nr_cbufs != ~0) { - pipe->set_framebuffer_state(pipe, &ctx->blitter.saved_fb_state); - ctx->blitter.saved_fb_state.nr_cbufs = ~0; + if (ctx->base.saved_fb_state.nr_cbufs != ~0) { + pipe->set_framebuffer_state(pipe, &ctx->base.saved_fb_state); + util_assign_framebuffer_state(&ctx->base.saved_fb_state, NULL); + ctx->base.saved_fb_state.nr_cbufs = ~0; } - if (ctx->blitter.saved_num_sampler_states != ~0) { + if (ctx->base.saved_num_sampler_states != ~0) { pipe->bind_fragment_sampler_states(pipe, - ctx->blitter.saved_num_sampler_states, - ctx->blitter.saved_sampler_states); - ctx->blitter.saved_num_sampler_states = ~0; + ctx->base.saved_num_sampler_states, + ctx->base.saved_sampler_states); + ctx->base.saved_num_sampler_states = ~0; } - if (ctx->blitter.saved_num_sampler_views != ~0) { + if (ctx->base.saved_num_sampler_views != ~0) { pipe->set_fragment_sampler_views(pipe, - ctx->blitter.saved_num_sampler_views, - ctx->blitter.saved_sampler_views); - ctx->blitter.saved_num_sampler_views = ~0; + ctx->base.saved_num_sampler_views, + ctx->base.saved_sampler_views); + + for (i = 0; i < ctx->base.saved_num_sampler_views; i++) + pipe_sampler_view_reference(&ctx->base.saved_sampler_views[i], + NULL); + + ctx->base.saved_num_sampler_views = ~0; } - if (ctx->blitter.saved_num_vertex_buffers != ~0) { + if (ctx->base.saved_num_vertex_buffers != ~0) { pipe->set_vertex_buffers(pipe, - ctx->blitter.saved_num_vertex_buffers, - ctx->blitter.saved_vertex_buffers); + ctx->base.saved_num_vertex_buffers, + ctx->base.saved_vertex_buffers); - for (i = 0; i < ctx->blitter.saved_num_vertex_buffers; i++) { - if (ctx->blitter.saved_vertex_buffers[i].buffer) { - pipe_resource_reference(&ctx->blitter.saved_vertex_buffers[i].buffer, + for (i = 0; i < ctx->base.saved_num_vertex_buffers; i++) { + if (ctx->base.saved_vertex_buffers[i].buffer) { + pipe_resource_reference(&ctx->base.saved_vertex_buffers[i].buffer, NULL); } } - ctx->blitter.saved_num_vertex_buffers = ~0; + ctx->base.saved_num_vertex_buffers = ~0; } } static void blitter_set_rectangle(struct blitter_context_priv *ctx, unsigned x1, unsigned y1, unsigned x2, unsigned y2, - unsigned width, unsigned height, float depth) { int i; /* set vertex positions */ - ctx->vertices[0][0][0] = (float)x1 / width * 2.0f - 1.0f; /*v0.x*/ - ctx->vertices[0][0][1] = (float)y1 / height * 2.0f - 1.0f; /*v0.y*/ + ctx->vertices[0][0][0] = (float)x1 / ctx->dst_width * 2.0f - 1.0f; /*v0.x*/ + ctx->vertices[0][0][1] = (float)y1 / ctx->dst_height * 2.0f - 1.0f; /*v0.y*/ - ctx->vertices[1][0][0] = (float)x2 / width * 2.0f - 1.0f; /*v1.x*/ - ctx->vertices[1][0][1] = (float)y1 / height * 2.0f - 1.0f; /*v1.y*/ + ctx->vertices[1][0][0] = (float)x2 / ctx->dst_width * 2.0f - 1.0f; /*v1.x*/ + ctx->vertices[1][0][1] = (float)y1 / ctx->dst_height * 2.0f - 1.0f; /*v1.y*/ - ctx->vertices[2][0][0] = (float)x2 / width * 2.0f - 1.0f; /*v2.x*/ - ctx->vertices[2][0][1] = (float)y2 / height * 2.0f - 1.0f; /*v2.y*/ + ctx->vertices[2][0][0] = (float)x2 / ctx->dst_width * 2.0f - 1.0f; /*v2.x*/ + ctx->vertices[2][0][1] = (float)y2 / ctx->dst_height * 2.0f - 1.0f; /*v2.y*/ - ctx->vertices[3][0][0] = (float)x1 / width * 2.0f - 1.0f; /*v3.x*/ - ctx->vertices[3][0][1] = (float)y2 / height * 2.0f - 1.0f; /*v3.y*/ + ctx->vertices[3][0][0] = (float)x1 / ctx->dst_width * 2.0f - 1.0f; /*v3.x*/ + ctx->vertices[3][0][1] = (float)y2 / ctx->dst_height * 2.0f - 1.0f; /*v3.y*/ for (i = 0; i < 4; i++) ctx->vertices[i][0][2] = depth; /*z*/ /* viewport */ - ctx->viewport.scale[0] = 0.5f * width; - ctx->viewport.scale[1] = 0.5f * height; + ctx->viewport.scale[0] = 0.5f * ctx->dst_width; + ctx->viewport.scale[1] = 0.5f * ctx->dst_height; ctx->viewport.scale[2] = 1.0f; ctx->viewport.scale[3] = 1.0f; - ctx->viewport.translate[0] = 0.5f * width; - ctx->viewport.translate[1] = 0.5f * height; + ctx->viewport.translate[0] = 0.5f * ctx->dst_width; + ctx->viewport.translate[1] = 0.5f * ctx->dst_height; ctx->viewport.translate[2] = 0.0f; ctx->viewport.translate[3] = 0.0f; - ctx->pipe->set_viewport_state(ctx->pipe, &ctx->viewport); + ctx->base.pipe->set_viewport_state(ctx->base.pipe, &ctx->viewport); /* clip */ - ctx->pipe->set_clip_state(ctx->pipe, &ctx->clip); + ctx->base.pipe->set_clip_state(ctx->base.pipe, &ctx->clip); } static void blitter_set_clear_color(struct blitter_context_priv *ctx, @@ -401,29 +412,45 @@ static void blitter_set_clear_color(struct blitter_context_priv *ctx, } } +static void get_normalized_texcoords(struct pipe_resource *src, + struct pipe_subresource subsrc, + unsigned x1, unsigned y1, + unsigned x2, unsigned y2, + float out[4]) +{ + out[0] = x1 / (float)u_minify(src->width0, subsrc.level); + out[1] = y1 / (float)u_minify(src->height0, subsrc.level); + out[2] = x2 / (float)u_minify(src->width0, subsrc.level); + out[3] = y2 / (float)u_minify(src->height0, subsrc.level); +} + +static void set_texcoords_in_vertices(const float coord[4], + float *out, unsigned stride) +{ + out[0] = coord[0]; /*t0.s*/ + out[1] = coord[1]; /*t0.t*/ + out += stride; + out[0] = coord[2]; /*t1.s*/ + out[1] = coord[1]; /*t1.t*/ + out += stride; + out[0] = coord[2]; /*t2.s*/ + out[1] = coord[3]; /*t2.t*/ + out += stride; + out[0] = coord[0]; /*t3.s*/ + out[1] = coord[3]; /*t3.t*/ +} + static void blitter_set_texcoords_2d(struct blitter_context_priv *ctx, struct pipe_resource *src, struct pipe_subresource subsrc, unsigned x1, unsigned y1, unsigned x2, unsigned y2) { - int i; - float s1 = x1 / (float)u_minify(src->width0, subsrc.level); - float t1 = y1 / (float)u_minify(src->height0, subsrc.level); - float s2 = x2 / (float)u_minify(src->width0, subsrc.level); - float t2 = y2 / (float)u_minify(src->height0, subsrc.level); - - ctx->vertices[0][1][0] = s1; /*t0.s*/ - ctx->vertices[0][1][1] = t1; /*t0.t*/ - - ctx->vertices[1][1][0] = s2; /*t1.s*/ - ctx->vertices[1][1][1] = t1; /*t1.t*/ - - ctx->vertices[2][1][0] = s2; /*t2.s*/ - ctx->vertices[2][1][1] = t2; /*t2.t*/ + unsigned i; + float coord[4]; - ctx->vertices[3][1][0] = s1; /*t3.s*/ - ctx->vertices[3][1][1] = t2; /*t3.t*/ + get_normalized_texcoords(src, subsrc, x1, y1, x2, y2, coord); + set_texcoords_in_vertices(coord, &ctx->vertices[0][1][0], 8); for (i = 0; i < 4; i++) { ctx->vertices[i][1][2] = 0; /*r*/ @@ -454,20 +481,11 @@ static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx, unsigned x2, unsigned y2) { int i; - float s1 = x1 / (float)u_minify(src->width0, subsrc.level); - float t1 = y1 / (float)u_minify(src->height0, subsrc.level); - float s2 = x2 / (float)u_minify(src->width0, subsrc.level); - float t2 = y2 / (float)u_minify(src->height0, subsrc.level); + float coord[4]; float st[4][2]; - st[0][0] = s1; - st[0][1] = t1; - st[1][0] = s2; - st[1][1] = t1; - st[2][0] = s2; - st[2][1] = t2; - st[3][0] = s1; - st[3][1] = t2; + get_normalized_texcoords(src, subsrc, x1, y1, x2, y2, coord); + set_texcoords_in_vertices(coord, &st[0][0], 2); util_map_texcoords2d_onto_cubemap(subsrc.face, /* pointer, stride in floats */ @@ -478,9 +496,16 @@ static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx, ctx->vertices[i][1][3] = 1; /*q*/ } +static void blitter_set_dst_dimensions(struct blitter_context_priv *ctx, + unsigned width, unsigned height) +{ + ctx->dst_width = width; + ctx->dst_height = height; +} + static void blitter_draw_quad(struct blitter_context_priv *ctx) { - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; /* write vertices and draw them */ pipe_buffer_write(pipe, ctx->vbuf, @@ -495,7 +520,7 @@ static INLINE void **blitter_get_sampler_state(struct blitter_context_priv *ctx, int miplevel) { - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; struct pipe_sampler_state *sampler_state = &ctx->template_sampler_state; assert(miplevel < PIPE_MAX_TEXTURE_LEVELS); @@ -518,7 +543,7 @@ void **blitter_get_sampler_state(struct blitter_context_priv *ctx, static INLINE void *blitter_get_fs_col(struct blitter_context_priv *ctx, unsigned num_cbufs) { - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; assert(num_cbufs <= PIPE_MAX_COLOR_BUFS); @@ -531,7 +556,7 @@ void *blitter_get_fs_col(struct blitter_context_priv *ctx, unsigned num_cbufs) /** Convert PIPE_TEXTURE_x to TGSI_TEXTURE_x */ static unsigned -pipe_tex_to_tgsi_tex(unsigned pipe_tex_target) +pipe_tex_to_tgsi_tex(enum pipe_texture_target pipe_tex_target) { switch (pipe_tex_target) { case PIPE_TEXTURE_1D: @@ -553,7 +578,7 @@ static INLINE void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx, unsigned tex_target) { - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; assert(tex_target < PIPE_MAX_TEXTURE_TYPES); @@ -572,7 +597,7 @@ static INLINE void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx, unsigned tex_target) { - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; assert(tex_target < PIPE_MAX_TEXTURE_TYPES); @@ -588,6 +613,31 @@ void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx, return ctx->fs_texfetch_depth[tex_target]; } +static void blitter_draw_rectangle(struct blitter_context *blitter, + unsigned x1, unsigned y1, + unsigned x2, unsigned y2, + float depth, + enum blitter_attrib_type type, + const float attrib[4]) +{ + struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; + + switch (type) { + case UTIL_BLITTER_ATTRIB_COLOR: + blitter_set_clear_color(ctx, attrib); + break; + + case UTIL_BLITTER_ATTRIB_TEXCOORD: + set_texcoords_in_vertices(attrib, &ctx->vertices[0][1][0], 8); + break; + + default:; + } + + blitter_set_rectangle(ctx, x1, y1, x2, y2, depth); + blitter_draw_quad(ctx); +} + void util_blitter_clear(struct blitter_context *blitter, unsigned width, unsigned height, unsigned num_cbufs, @@ -596,7 +646,7 @@ void util_blitter_clear(struct blitter_context *blitter, double depth, unsigned stencil) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; struct pipe_stencil_ref sr = { { 0 } }; assert(num_cbufs <= PIPE_MAX_COLOR_BUFS); @@ -630,9 +680,9 @@ void util_blitter_clear(struct blitter_context *blitter, pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, num_cbufs)); pipe->bind_vs_state(pipe, ctx->vs_col); - blitter_set_clear_color(ctx, rgba); - blitter_set_rectangle(ctx, 0, 0, width, height, width, height, depth); - blitter_draw_quad(ctx); + blitter_set_dst_dimensions(ctx, width, height); + blitter->draw_rectangle(blitter, 0, 0, width, height, depth, + UTIL_BLITTER_ATTRIB_COLOR, rgba); blitter_restore_CSOs(ctx); } @@ -654,7 +704,7 @@ void util_blitter_copy_region(struct blitter_context *blitter, boolean ignore_stencil) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; struct pipe_screen *screen = pipe->screen; struct pipe_surface *dstsurf; struct pipe_framebuffer_state fb_state; @@ -736,11 +786,6 @@ void util_blitter_copy_region(struct blitter_context *blitter, u_sampler_view_default_template(&viewTempl, src, src->format); view = pipe->create_sampler_view(pipe, src, &viewTempl); - if (ctx->sampler_view) { - pipe_sampler_view_reference(&ctx->sampler_view, NULL); - } - ctx->sampler_view = view; - /* Set rasterizer state, shaders, and textures. */ pipe->bind_rasterizer_state(pipe, ctx->rs_state); pipe->bind_vs_state(pipe, ctx->vs_tex); @@ -750,32 +795,49 @@ void util_blitter_copy_region(struct blitter_context *blitter, pipe->set_fragment_sampler_views(pipe, 1, &view); pipe->set_framebuffer_state(pipe, &fb_state); - /* Set texture coordinates. */ + blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height); + switch (src->target) { + /* Draw the quad with the draw_rectangle callback. */ case PIPE_TEXTURE_1D: case PIPE_TEXTURE_2D: - blitter_set_texcoords_2d(ctx, src, subsrc, - srcx, srcy, srcx+width, srcy+height); + { + /* Set texture coordinates. */ + float coord[4]; + get_normalized_texcoords(src, subsrc, srcx, srcy, + srcx+width, srcy+height, coord); + + /* Draw. */ + blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height, 0, + UTIL_BLITTER_ATTRIB_TEXCOORD, coord); + } break; + + /* Draw the quad with the generic codepath. */ case PIPE_TEXTURE_3D: - blitter_set_texcoords_3d(ctx, src, subsrc, srcz, - srcx, srcy, srcx+width, srcy+height); - break; case PIPE_TEXTURE_CUBE: - blitter_set_texcoords_cube(ctx, src, subsrc, - srcx, srcy, srcx+width, srcy+height); + /* Set texture coordinates. */ + if (src->target == PIPE_TEXTURE_3D) + blitter_set_texcoords_3d(ctx, src, subsrc, srcz, + srcx, srcy, srcx+width, srcy+height); + else + blitter_set_texcoords_cube(ctx, src, subsrc, + srcx, srcy, srcx+width, srcy+height); + + /* Draw. */ + blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, 0); + blitter_draw_quad(ctx); break; + default: assert(0); return; } - blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, - dstsurf->width, dstsurf->height, 0); - blitter_draw_quad(ctx); blitter_restore_CSOs(ctx); pipe_surface_reference(&dstsurf, NULL); + pipe_sampler_view_reference(&view, NULL); } /* Clear a region of a color surface to a constant value. */ @@ -786,7 +848,7 @@ void util_blitter_clear_render_target(struct blitter_context *blitter, unsigned width, unsigned height) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; struct pipe_framebuffer_state fb_state; assert(dstsurf->texture); @@ -813,9 +875,9 @@ void util_blitter_clear_render_target(struct blitter_context *blitter, fb_state.zsbuf = 0; pipe->set_framebuffer_state(pipe, &fb_state); - blitter_set_clear_color(ctx, rgba); - blitter_set_rectangle(ctx, 0, 0, width, height, dstsurf->width, dstsurf->height, 0); - blitter_draw_quad(ctx); + blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height); + blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height, 0, + UTIL_BLITTER_ATTRIB_COLOR, rgba); blitter_restore_CSOs(ctx); } @@ -829,7 +891,7 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, unsigned width, unsigned height) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; struct pipe_framebuffer_state fb_state; struct pipe_stencil_ref sr = { { 0 } }; @@ -873,7 +935,8 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, fb_state.zsbuf = dstsurf; pipe->set_framebuffer_state(pipe, &fb_state); - blitter_set_rectangle(ctx, 0, 0, width, height, dstsurf->width, dstsurf->height, depth); - blitter_draw_quad(ctx); + blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height); + blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height, depth, + UTIL_BLITTER_ATTRIB_NONE, NULL); blitter_restore_CSOs(ctx); } diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index 22849280ab..ba3f92eca8 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -39,9 +39,48 @@ extern "C" { struct pipe_context; +enum blitter_attrib_type { + UTIL_BLITTER_ATTRIB_NONE, + UTIL_BLITTER_ATTRIB_COLOR, + UTIL_BLITTER_ATTRIB_TEXCOORD +}; + struct blitter_context { + /** + * Draw a rectangle. + * + * \param x1 An X coordinate of the top-left corner. + * \param y1 A Y coordinate of the top-left corner. + * \param x2 An X coordinate of the bottom-right corner. + * \param y2 A Y coordinate of the bottom-right corner. + * \param depth A depth which the rectangle is rendered at. + * + * \param type Semantics of the attributes "attrib". + * If type is UTIL_BLITTER_ATTRIB_NONE, ignore them. + * If type is UTIL_BLITTER_ATTRIB_COLOR, the attributes + * make up a constant RGBA color, and should go to the COLOR0 + * varying slot of a fragment shader. + * If type is UTIL_BLITTER_ATTRIB_TEXCOORD, {a1, a2} and + * {a3, a4} specify top-left and bottom-right texture + * coordinates of the rectangle, respectively, and should go + * to the GENERIC0 varying slot of a fragment shader. + * + * \param attrib See type. + * + * \note A driver may optionally override this callback to implement + * a specialized hardware path for drawing a rectangle, e.g. using + * a rectangular point sprite. + */ + void (*draw_rectangle)(struct blitter_context *blitter, + unsigned x1, unsigned y1, unsigned x2, unsigned y2, + float depth, + enum blitter_attrib_type type, + const float attrib[4]); + /* Private members, really. */ + struct pipe_context *pipe; /**< pipe context */ + void *saved_blend_state; /**< blend state */ void *saved_dsa_state; /**< depth stencil alpha state */ void *saved_velem_state; /**< vertex elements state */ @@ -73,6 +112,15 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe); */ void util_blitter_destroy(struct blitter_context *blitter); +/** + * Return the pipe context associated with a blitter context. + */ +static INLINE +struct pipe_context *util_blitter_get_pipe(struct blitter_context *blitter) +{ + return blitter->pipe; +} + /* * These CSOs must be saved before any of the following functions is called: * - blend state @@ -208,11 +256,45 @@ void util_blitter_save_vertex_shader(struct blitter_context *blitter, blitter->saved_vs = vs; } +/* XXX This should probably be moved elsewhere. */ +static INLINE +void util_assign_framebuffer_state(struct pipe_framebuffer_state *dst, + const struct pipe_framebuffer_state *src) +{ + unsigned i; + + if (src) { + /* Reference all surfaces. */ + for (i = 0; i < src->nr_cbufs; i++) { + pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]); + } + for (; i < dst->nr_cbufs; i++) { + pipe_surface_reference(&dst->cbufs[i], NULL); + } + + pipe_surface_reference(&dst->zsbuf, src->zsbuf); + + dst->nr_cbufs = src->nr_cbufs; + dst->width = src->width; + dst->height = src->height; + } else { + /* Set all surfaces to NULL. */ + for (i = 0; i < dst->nr_cbufs; i++) { + pipe_surface_reference(&dst->cbufs[i], NULL); + } + + pipe_surface_reference(&dst->zsbuf, NULL); + + dst->nr_cbufs = 0; + } +} + static INLINE void util_blitter_save_framebuffer(struct blitter_context *blitter, - struct pipe_framebuffer_state *state) + const struct pipe_framebuffer_state *state) { - blitter->saved_fb_state = *state; + blitter->saved_fb_state.nr_cbufs = 0; /* It's ~0 now, meaning it's unsaved. */ + util_assign_framebuffer_state(&blitter->saved_fb_state, state); } static INLINE @@ -247,12 +329,13 @@ util_blitter_save_fragment_sampler_views(struct blitter_context *blitter, int num_views, struct pipe_sampler_view **views) { + unsigned i; assert(num_views <= Elements(blitter->saved_sampler_views)); blitter->saved_num_sampler_views = num_views; - memcpy(blitter->saved_sampler_views, - views, - num_views * sizeof(struct pipe_sampler_view *)); + for (i = 0; i < num_views; i++) + pipe_sampler_view_reference(&blitter->saved_sampler_views[i], + views[i]); } static INLINE void diff --git a/src/gallium/auxiliary/util/u_caps.c b/src/gallium/auxiliary/util/u_caps.c index 294ee37033..94d5bd3027 100644 --- a/src/gallium/auxiliary/util/u_caps.c +++ b/src/gallium/auxiliary/util/u_caps.c @@ -186,6 +186,22 @@ static unsigned caps_opengl_2_1[] = { /* OpenGL 3.0 */ /* UTIL_CHECK_INT(MAX_RENDER_TARGETS, 8), */ +/* Shader Model 3 */ +static unsigned caps_sm3[] = { + UTIL_CHECK_INT(MAX_FS_INSTRUCTIONS, 512), + UTIL_CHECK_INT(MAX_FS_INPUTS, 10), + UTIL_CHECK_INT(MAX_FS_TEMPS, 32), + UTIL_CHECK_INT(MAX_FS_ADDRS, 1), + UTIL_CHECK_INT(MAX_FS_CONSTS, 224), + + UTIL_CHECK_INT(MAX_VS_INSTRUCTIONS, 512), + UTIL_CHECK_INT(MAX_VS_INPUTS, 16), + UTIL_CHECK_INT(MAX_VS_TEMPS, 32), + UTIL_CHECK_INT(MAX_VS_ADDRS, 2), + UTIL_CHECK_INT(MAX_VS_CONSTS, 256), + + UTIL_CHECK_TERMINATE +}; /** * Demo function which checks against theoretical caps needed for different APIs. @@ -203,6 +219,7 @@ void util_caps_demo_print(struct pipe_screen *screen) {"DX 11", caps_dx_11}, {"OpenGL 2.1", caps_opengl_2_1}, /* {"OpenGL 3.0", caps_opengl_3_0},*/ + {"SM3", caps_sm3}, {NULL, NULL} }; int i, out = 0; diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index a08241971c..23d33af4e4 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -38,7 +38,7 @@ #include "u_cpu_detect.h" #if defined(PIPE_ARCH_PPC) -#if defined(PIPE_OS_DARWIN) +#if defined(PIPE_OS_APPLE) #include <sys/sysctl.h> #else #include <signal.h> @@ -132,7 +132,7 @@ win32_sig_handler_sse(EXCEPTION_POINTERS* ep) #endif /* PIPE_ARCH_X86 */ -#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_DARWIN) +#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_APPLE) static jmp_buf __lv_powerpc_jmpbuf; static volatile sig_atomic_t __lv_powerpc_canjump = 0; @@ -153,7 +153,7 @@ sigill_handler(int sig) static void check_os_altivec_support(void) { -#if defined(PIPE_OS_DARWIN) +#if defined(PIPE_OS_APPLE) int sels[2] = {CTL_HW, HW_VECTORUNIT}; int has_vu = 0; int len = sizeof (has_vu); @@ -166,8 +166,8 @@ check_os_altivec_support(void) util_cpu_caps.has_altivec = 1; } } -#else /* !PIPE_OS_DARWIN */ - /* no Darwin, do it the brute-force way */ +#else /* !PIPE_OS_APPLE */ + /* not on Apple/Darwin, do it the brute-force way */ /* this is borrowed from the libmpeg2 library */ signal(SIGILL, sigill_handler); if (setjmp(__lv_powerpc_jmpbuf)) { @@ -184,7 +184,7 @@ check_os_altivec_support(void) signal(SIGILL, SIG_DFL); util_cpu_caps.has_altivec = 1; } -#endif /* PIPE_OS_DARWIN */ +#endif /* !PIPE_OS_APPLE */ } #endif /* PIPE_ARCH_PPC */ diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 5e373ff24c..ad162558bc 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -190,11 +190,11 @@ debug_get_flags_option(const char *name, result = dfault; else if (!util_strcmp(str, "help")) { result = dfault; - debug_printf("%s: help for %s:\n", __FUNCTION__, name); + _debug_printf("%s: help for %s:\n", __FUNCTION__, name); for (; flags->name; ++flags) namealign = MAX2(namealign, strlen(flags->name)); for (flags = orig; flags->name; ++flags) - debug_printf("| %*s [0x%0*lx]%s%s\n", namealign, flags->name, + _debug_printf("| %*s [0x%0*lx]%s%s\n", namealign, flags->name, (int)sizeof(unsigned long)*CHAR_BIT/4, flags->value, flags->desc ? " " : "", flags->desc ? flags->desc : ""); } diff --git a/src/gallium/auxiliary/util/u_double_list.h b/src/gallium/auxiliary/util/u_double_list.h index 53bb1342dd..42adb1f069 100644 --- a/src/gallium/auxiliary/util/u_double_list.h +++ b/src/gallium/auxiliary/util/u_double_list.h @@ -98,5 +98,20 @@ struct list_head #define LIST_IS_EMPTY(__list) \ ((__list)->next == (__list)) - +#ifndef container_of +#define container_of(ptr, sample, member) \ + (void *)((char *)(ptr) \ + - ((char *)&(sample)->member - (char *)(sample))) +#endif + +#define LIST_FOR_EACH_ENTRY(pos, head, member) \ + for (pos = container_of((head)->next, pos, member); \ + &pos->member != (head); \ + pos = container_of(pos->member.next, pos, member)) + +#define LIST_FOR_EACH_ENTRY_SAFE(pos, storage, head, member) \ + for (pos = container_of((head)->next, pos, member), \ + storage = container_of(pos->member.next, pos, member); \ + &pos->member != (head); \ + pos = storage, storage = container_of(storage->member.next, storage, member)) #endif /*_U_DOUBLE_LIST_H_*/ diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c index 3168a1fab4..43d09f1960 100644 --- a/src/gallium/auxiliary/util/u_format.c +++ b/src/gallium/auxiliary/util/u_format.c @@ -120,7 +120,7 @@ util_format_write_4ub(enum pipe_format format, const uint8_t *src, unsigned src_ } -static INLINE boolean +boolean util_format_fits_8unorm(const struct util_format_description *format_desc) { unsigned chan; diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index fd95bea1a7..38254b1096 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -213,6 +213,16 @@ struct util_format_description unsigned width, unsigned height); /** + * Fetch a single pixel (i, j) from a block. + * + * XXX: Only defined for a very few select formats. + */ + void + (*fetch_rgba_8unorm)(uint8_t *dst, + const uint8_t *src, + unsigned i, unsigned j); + + /** * Unpack pixel blocks to R32G32B32A32_FLOAT. * Note: strides are in bytes. * @@ -663,6 +673,9 @@ util_format_write_4ub(enum pipe_format format, * Generic format conversion; */ +boolean +util_format_fits_8unorm(const struct util_format_description *format_desc); + void util_format_translate(enum pipe_format dst_format, void *dst, unsigned dst_stride, diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py index ae9a598197..f0b407b8b8 100755 --- a/src/gallium/auxiliary/util/u_format_table.py +++ b/src/gallium/auxiliary/util/u_format_table.py @@ -132,12 +132,17 @@ def write_format_table(formats): if format.colorspace != ZS: print " &util_format_%s_unpack_rgba_8unorm," % format.short_name() print " &util_format_%s_pack_rgba_8unorm," % format.short_name() + if format.layout == 's3tc': + print " &util_format_%s_fetch_rgba_8unorm," % format.short_name() + else: + print " NULL, /* fetch_rgba_8unorm */" print " &util_format_%s_unpack_rgba_float," % format.short_name() print " &util_format_%s_pack_rgba_float," % format.short_name() print " &util_format_%s_fetch_rgba_float," % format.short_name() else: print " NULL, /* unpack_rgba_8unorm */" print " NULL, /* pack_rgba_8unorm */" + print " NULL, /* fetch_rgba_8unorm */" print " NULL, /* unpack_rgba_float */" print " NULL, /* pack_rgba_float */" print " NULL, /* fetch_rgba_float */" diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 6370e77986..fe19466436 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -567,12 +567,26 @@ util_bswap16(uint16_t n) #define MAX3( A, B, C ) MAX2( MAX2( A, B ), C ) +/** + * Align a value, only works pot alignemnts. + */ static INLINE int align(int value, int alignment) { return (value + alignment - 1) & ~(alignment - 1); } +/** + * Works like align but on npot alignments. + */ +static INLINE size_t +util_align_npot(size_t value, size_t alignment) +{ + if (value % alignment) + return value + (alignment - (value % alignment)); + return value; +} + static INLINE unsigned u_minify(unsigned value, unsigned levels) { diff --git a/src/gallium/auxiliary/util/u_mempool.c b/src/gallium/auxiliary/util/u_mempool.c new file mode 100644 index 0000000000..1f336b39a1 --- /dev/null +++ b/src/gallium/auxiliary/util/u_mempool.c @@ -0,0 +1,169 @@ +/* + * Copyright 2010 Marek Olšák <maraeo@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "util/u_mempool.h" + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_simple_list.h" + +#include <stdio.h> + +#define UTIL_MEMPOOL_MAGIC 0xcafe4321 + +/* The block is either allocated memory or free space. */ +struct util_mempool_block { + /* The header. */ + /* The first next free block. */ + struct util_mempool_block *next_free; + + intptr_t magic; + + /* Memory after the last member is dedicated to the block itself. + * The allocated size is always larger than this structure. */ +}; + +static struct util_mempool_block * +util_mempool_get_block(struct util_mempool *pool, + struct util_mempool_page *page, unsigned index) +{ + return (struct util_mempool_block*) + ((uint8_t*)page + sizeof(struct util_mempool_page) + + (pool->block_size * index)); +} + +static void util_mempool_add_new_page(struct util_mempool *pool) +{ + struct util_mempool_page *page; + struct util_mempool_block *block; + int i; + + page = MALLOC(pool->page_size); + insert_at_tail(&pool->list, page); + + /* Mark all blocks as free. */ + for (i = 0; i < pool->num_blocks-1; i++) { + block = util_mempool_get_block(pool, page, i); + block->next_free = util_mempool_get_block(pool, page, i+1); + block->magic = UTIL_MEMPOOL_MAGIC; + } + + block = util_mempool_get_block(pool, page, pool->num_blocks-1); + block->next_free = pool->first_free; + block->magic = UTIL_MEMPOOL_MAGIC; + pool->first_free = util_mempool_get_block(pool, page, 0); + pool->num_pages++; + +#if 0 + fprintf(stderr, "New page! Num of pages: %i\n", pool->num_pages); +#endif +} + +static void *util_mempool_malloc_st(struct util_mempool *pool) +{ + struct util_mempool_block *block; + + if (!pool->first_free) + util_mempool_add_new_page(pool); + + block = pool->first_free; + assert(block->magic == UTIL_MEMPOOL_MAGIC); + pool->first_free = block->next_free; + + return (uint8_t*)block + sizeof(struct util_mempool_block); +} + +static void util_mempool_free_st(struct util_mempool *pool, void *ptr) +{ + struct util_mempool_block *block = + (struct util_mempool_block*) + ((uint8_t*)ptr - sizeof(struct util_mempool_block)); + + assert(block->magic == UTIL_MEMPOOL_MAGIC); + block->next_free = pool->first_free; + pool->first_free = block; +} + +static void *util_mempool_malloc_mt(struct util_mempool *pool) +{ + void *mem; + + pipe_mutex_lock(pool->mutex); + mem = util_mempool_malloc_st(pool); + pipe_mutex_unlock(pool->mutex); + return mem; +} + +static void util_mempool_free_mt(struct util_mempool *pool, void *ptr) +{ + pipe_mutex_lock(pool->mutex); + util_mempool_free_st(pool, ptr); + pipe_mutex_unlock(pool->mutex); +} + +void util_mempool_set_thread_safety(struct util_mempool *pool, + enum util_mempool_threading threading) +{ + pool->threading = threading; + + if (threading) { + pool->malloc = util_mempool_malloc_mt; + pool->free = util_mempool_free_mt; + } else { + pool->malloc = util_mempool_malloc_st; + pool->free = util_mempool_free_st; + } +} + +void util_mempool_create(struct util_mempool *pool, + unsigned item_size, + unsigned num_blocks, + enum util_mempool_threading threading) +{ + item_size = align(item_size, sizeof(intptr_t)); + + pool->num_pages = 0; + pool->num_blocks = num_blocks; + pool->block_size = sizeof(struct util_mempool_block) + item_size; + pool->block_size = align(pool->block_size, sizeof(intptr_t)); + pool->page_size = sizeof(struct util_mempool_page) + + num_blocks * pool->block_size; + pool->first_free = NULL; + + make_empty_list(&pool->list); + + pipe_mutex_init(pool->mutex); + + util_mempool_set_thread_safety(pool, threading); +} + +void util_mempool_destroy(struct util_mempool *pool) +{ + struct util_mempool_page *page, *temp; + + foreach_s(page, temp, &pool->list) { + remove_from_list(page); + FREE(page); + } + + pipe_mutex_destroy(pool->mutex); +} diff --git a/src/gallium/auxiliary/util/u_mempool.h b/src/gallium/auxiliary/util/u_mempool.h new file mode 100644 index 0000000000..a5b5d6a9b7 --- /dev/null +++ b/src/gallium/auxiliary/util/u_mempool.h @@ -0,0 +1,87 @@ +/* + * Copyright 2010 Marek Olšák <maraeo@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +/** + * @file + * Simple memory pool for equally sized memory allocations. + * util_mempool_malloc and util_mempool_free are in O(1). + * + * Good for allocations which have very low lifetime and are allocated + * and freed very often. Use a profiler first! + * + * Candidates: get_transfer, user_buffer_create + * + * @author Marek Olšák + */ + +#ifndef U_MEMPOOL_H +#define U_MEMPOOL_H + +#include "os/os_thread.h" + +enum util_mempool_threading { + UTIL_MEMPOOL_SINGLETHREADED = FALSE, + UTIL_MEMPOOL_MULTITHREADED = TRUE +}; + +/* The page is an array of blocks (allocations). */ +struct util_mempool_page { + /* The header (linked-list pointers). */ + struct util_mempool_page *prev, *next; + + /* Memory after the last member is dedicated to the page itself. + * The allocated size is always larger than this structure. */ +}; + +struct util_mempool { + /* Public members. */ + void *(*malloc)(struct util_mempool *pool); + void (*free)(struct util_mempool *pool, void *ptr); + + /* Private members. */ + struct util_mempool_block *first_free; + + struct util_mempool_page list; + + unsigned block_size; + unsigned page_size; + unsigned num_blocks; + unsigned num_pages; + enum util_mempool_threading threading; + + pipe_mutex mutex; +}; + +void util_mempool_create(struct util_mempool *pool, + unsigned item_size, + unsigned num_blocks, + enum util_mempool_threading threading); + +void util_mempool_destroy(struct util_mempool *pool); + +void util_mempool_set_thread_safety(struct util_mempool *pool, + enum util_mempool_threading threading); + +#define util_mempool_malloc(pool) (pool)->malloc(pool) +#define util_mempool_free(pool, ptr) (pool)->free(pool, ptr) + +#endif diff --git a/src/gallium/auxiliary/util/u_network.c b/src/gallium/auxiliary/util/u_network.c index 87ee0e4768..77f2c5fc7d 100644 --- a/src/gallium/auxiliary/util/u_network.c +++ b/src/gallium/auxiliary/util/u_network.c @@ -6,7 +6,7 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) # include <winsock2.h> # include <windows.h> -#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE) +#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_CYGWIN) # include <sys/socket.h> # include <netinet/in.h> # include <unistd.h> diff --git a/src/gallium/docs/source/cso/blend.rst b/src/gallium/docs/source/cso/blend.rst index c74396284c..7bde10c124 100644 --- a/src/gallium/docs/source/cso/blend.rst +++ b/src/gallium/docs/source/cso/blend.rst @@ -14,21 +14,74 @@ in other modern and legacy drawing APIs. XXX blurb about dual-source blends +Logical Operations +------------------ + +Logical operations, also known as logicops, lops, or rops, are supported. +Only two-operand logicops are available. When logicops are enabled, all other +blend state is ignored, including per-render-target state, so logicops are +performed on all render targets. + +.. warning:: + The blend_enable flag is ignored for all render targets when logical + operations are enabled. + +For a source component `s` and destination component `d`, the logical +operations are defined as taking the bits of each channel of each component, +and performing one of the following operations per-channel: + +* ``CLEAR``: 0 +* ``NOR``: :math:`\lnot(s \lor d)` +* ``AND_INVERTED``: :math:`\lnot s \land d` +* ``COPY_INVERTED``: :math:`\lnot s` +* ``AND_REVERSE``: :math:`s \land \lnot d` +* ``INVERT``: :math:`\lnot d` +* ``XOR``: :math:`s \oplus d` +* ``NAND``: :math:`\lnot(s \land d)` +* ``AND``: :math:`s \land d` +* ``EQUIV``: :math:`\lnot(s \oplus d)` +* ``NOOP``: :math:`d` +* ``OR_INVERTED``: :math:`\lnot s \lor d` +* ``COPY``: :math:`s` +* ``OR_REVERSE``: :math:`s \lor \lnot d` +* ``OR``: :math:`s \lor d` +* ``SET``: 1 + +.. note:: + The logical operation names and definitions match those of the OpenGL API, + and are similar to the ROP2 and ROP3 definitions of GDI. This is + intentional, to ease transitions to Gallium. + Members ------- +These members affect all render targets. + +dither +%%%%%% + +Whether dithering is enabled. + +.. note:: + Dithering is completely implementation-dependent. It may be ignored by + drivers for any reason, and some render targets may always or never be + dithered depending on their format or usage flags. + +logicop_enable +%%%%%%%%%%%%%% + +Whether the blender should perform a logicop instead of blending. + +logicop_func +%%%%%%%%%%%% + +The logicop to use. One of ``PIPE_LOGICOP``. + independent_blend_enable If enabled, blend state is different for each render target, and for each render target set in the respective member of the rt array. If disabled, blend state is the same for all render targets, and only the first member of the rt array contains valid data. -logicop_enable - Enables logic ops. Cannot be enabled at the same time as blending, and - is always the same for all render targets. -logicop_func - The logic operation to use if logic ops are enabled. One of PIPE_LOGICOP. -dither - Whether dithering is enabled. Note: Dithering is implementation-dependent. rt Contains the per-rendertarget blend state. diff --git a/src/gallium/docs/source/cso/velems.rst b/src/gallium/docs/source/cso/velems.rst index 92cde014fb..978ad4a243 100644 --- a/src/gallium/docs/source/cso/velems.rst +++ b/src/gallium/docs/source/cso/velems.rst @@ -3,9 +3,44 @@ Vertex Elements =============== -This state controls format etc. of the input attributes contained -in the pipe_vertex_buffer(s). There's one pipe_vertex_element array member -for each input attribute. +This state controls the format of the input attributes contained in +pipe_vertex_buffers. There is one pipe_vertex_element array member for each +input attribute. + +Input Formats +------------- + +Gallium supports a diverse range of formats for vertex data. Drivers are +guaranteed to support 32-bit floating-point vectors of one to four components. +Additionally, they may support the following formats: + +* Integers, signed or unsigned, normalized or non-normalized, 8, 16, or 32 + bits wide +* Floating-point, 16, 32, or 64 bits wide + +At this time, support for varied vertex data formats is limited by driver +deficiencies. It is planned to support a single uniform set of formats for all +Gallium drivers at some point. + +Rather than attempt to specify every small nuance of behavior, Gallium uses a +very simple set of rules for padding out unspecified components. If an input +uses less than four components, it will be padded out with the constant vector +``(0, 0, 0, 1)``. + +Fog, point size, the facing bit, and edgeflags, all are in the standard format +of ``(x, 0, 0, 1)``, and so only the first component of those inputs is used. + +Position +%%%%%%%% + +Vertex position may be specified with two to four components. Using less than +two components is not allowed. + +Colors +%%%%%% + +Colors, both front- and back-facing, may omit the alpha component, only using +three components. Using less than three components is not allowed. Members ------- diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 48d9d570b6..e3ef49c862 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -36,7 +36,9 @@ The integer capabilities: bound. * ``OCCLUSION_QUERY``: Whether occlusion queries are available. * ``TIMER_QUERY``: Whether timer queries are available. -* ``TEXTURE_SHADOW_MAP``: XXX +* ``TEXTURE_SHADOW_MAP``: indicates whether the fragment shader hardware + can do the depth texture / Z comparison operation in TEX instructions + for shadow testing. * ``MAX_TEXTURE_2D_LEVELS``: The maximum number of mipmap levels available for a 2D texture. * ``MAX_TEXTURE_3D_LEVELS``: The maximum number of mipmap levels available @@ -55,7 +57,13 @@ The integer capabilities: from color blend equations, in :ref:`Blend` state. * ``SM3``: Whether the vertex shader and fragment shader support equivalent opcodes to the Shader Model 3 specification. XXX oh god this is horrible -* ``MAX_PREDICATE_REGISTERS``: XXX +* ``MAX_PREDICATE_REGISTERS``: indicates the number of predicate registers + available. Predicate register may be set as a side-effect of ALU + instructions to indicate less than, greater than or equal to zero. + Later instructions can use a predicate register to control writing to + each channel of destination registers. NOTE: predicate registers have + not been fully implemented in Gallium at this time. See the + GL_NV_fragment_program extension for more info (look for "condition codes"). * ``MAX_COMBINED_SAMPLERS``: The total number of samplers accessible from the vertex and fragment shader, inclusive. * ``MAX_CONST_BUFFERS``: Maximum number of constant buffers that can be bound diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index ecab7cb809..e588c5b7bd 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -1286,6 +1286,8 @@ wrapping rules. Declaration Semantic ^^^^^^^^^^^^^^^^^^^^^^^^ + Vertex and fragment shader input and output registers may be labeled + with semantic information consisting of a name and index. Follows Declaration token if Semantic bit is set. @@ -1306,90 +1308,115 @@ Declaration Semantic TGSI_SEMANTIC_POSITION """""""""""""""""""""" -Position, sometimes known as HPOS or WPOS for historical reasons, is the -location of the vertex in space, in ``(x, y, z, w)`` format. ``x``, ``y``, and ``z`` -are the Cartesian coordinates, and ``w`` is the homogenous coordinate and used -for the perspective divide, if enabled. +For vertex shaders, TGSI_SEMANTIC_POSITION indicates the vertex shader +output register which contains the homogeneous vertex position in the clip +space coordinate system. After clipping, the X, Y and Z components of the +vertex will be divided by the W value to get normalized device coordinates. -As a vertex shader output, position should be scaled to the viewport. When -used in fragment shaders, position will be in window coordinates. The convention -used depends on the FS_COORD_ORIGIN and FS_COORD_PIXEL_CENTER properties. +For fragment shaders, TGSI_SEMANTIC_POSITION is used to indicate that +fragment shader input contains the fragment's window position. The X +component starts at zero and always increases from left to right. +The Y component starts at zero and always increases but Y=0 may either +indicate the top of the window or the bottom depending on the fragment +coordinate origin convention (see TGSI_PROPERTY_FS_COORD_ORIGIN). +The Z coordinate ranges from 0 to 1 to represent depth from the front +to the back of the Z buffer. The W component contains the reciprocol +of the interpolated vertex position W component. -XXX additionally, is there a way to configure the perspective divide? it's -accelerated on most chipsets AFAIK... +Fragment shaders may also declare an output register with +TGSI_SEMANTIC_POSITION. Only the Z component is writable. This allows +the fragment shader to change the fragment's Z position. -Position, if not specified, usually defaults to ``(0, 0, 0, 1)``, and can -be partially specified as ``(x, y, 0, 1)`` or ``(x, y, z, 1)``. -XXX usually? can we solidify that? TGSI_SEMANTIC_COLOR """"""""""""""""""" -Colors are used to, well, color the primitives. Colors are always in -``(r, g, b, a)`` format. +For vertex shader outputs or fragment shader inputs/outputs, this +label indicates that the resister contains an R,G,B,A color. + +Several shader inputs/outputs may contain colors so the semantic index +is used to distinguish them. For example, color[0] may be the diffuse +color while color[1] may be the specular color. + +This label is needed so that the flat/smooth shading can be applied +to the right interpolants during rasterization. + -If alpha is not specified, it defaults to 1. TGSI_SEMANTIC_BCOLOR """""""""""""""""""" Back-facing colors are only used for back-facing polygons, and are only valid in vertex shader outputs. After rasterization, all polygons are front-facing -and COLOR and BCOLOR end up occupying the same slots in the fragment, so -all BCOLORs effectively become regular COLORs in the fragment shader. +and COLOR and BCOLOR end up occupying the same slots in the fragment shader, +so all BCOLORs effectively become regular COLORs in the fragment shader. + TGSI_SEMANTIC_FOG """"""""""""""""" -The fog coordinate historically has been used to replace the depth coordinate -for generation of fog in dedicated fog blocks. Gallium, however, does not use -dedicated fog acceleration, placing it entirely in the fragment shader -instead. +Vertex shader inputs and outputs and fragment shader inputs may be +labeled with TGSI_SEMANTIC_FOG to indicate that the register contains +a fog coordinate in the form (F, 0, 0, 1). Typically, the fragment +shader will use the fog coordinate to compute a fog blend factor which +is used to blend the normal fragment color with a constant fog color. + +Only the first component matters when writing from the vertex shader; +the driver will ensure that the coordinate is in this format when used +as a fragment shader input. -The fog coordinate should be written in ``(f, 0, 0, 1)`` format. Only the first -component matters when writing from the vertex shader; the driver will ensure -that the coordinate is in this format when used as a fragment shader input. TGSI_SEMANTIC_PSIZE """"""""""""""""""" -PSIZE, or point size, is used to specify point sizes per-vertex. It should -be in ``(s, 0, 0, 1)`` format, where ``s`` is the (possibly clamped) point size. -Only the first component matters when writing from the vertex shader. +Vertex shader input and output registers may be labeled with +TGIS_SEMANTIC_PSIZE to indicate that the register contains a point size +in the form (S, 0, 0, 1). The point size controls the width or diameter +of points for rasterization. This label cannot be used in fragment +shaders. When using this semantic, be sure to set the appropriate state in the :ref:`rasterizer` first. + TGSI_SEMANTIC_GENERIC """"""""""""""""""""" -Generic semantics are nearly always used for texture coordinate attributes, -in ``(s, t, r, q)`` format. ``t`` and ``r`` may be unused for certain kinds -of lookups, and ``q`` is the level-of-detail bias for biased sampling. +All vertex/fragment shader inputs/outputs not labeled with any other +semantic label can be considered to be generic attributes. Typical +uses of generic inputs/outputs are texcoords and user-defined values. -These attributes are called "generic" because they may be used for anything -else, including parameters, texture generation information, or anything that -can be stored inside a four-component vector. TGSI_SEMANTIC_NORMAL """""""""""""""""""" -Vertex normal; could be used to implement per-pixel lighting for legacy APIs -that allow mixing fixed-function and programmable stages. +Indicates that a vertex shader input is a normal vector. This is +typically only used for legacy graphics APIs. + TGSI_SEMANTIC_FACE """""""""""""""""" -FACE is the facing bit, to store the facing information for the fragment -shader. ``(f, 0, 0, 1)`` is the format. The first component will be positive -when the fragment is front-facing, and negative when the component is -back-facing. +This label applies to fragment shader inputs only and indicates that +the register contains front/back-face information of the form (F, 0, +0, 1). The first component will be positive when the fragment belongs +to a front-facing polygon, and negative when the fragment belongs to a +back-facing polygon. + TGSI_SEMANTIC_EDGEFLAG """""""""""""""""""""" -XXX no clue +For vertex shaders, this sematic label indicates that an input or +output is a boolean edge flag. The register layout is [F, x, x, x] +where F is 0.0 or 1.0 and x = don't care. Normally, the vertex shader +simply copies the edge flag input to the edgeflag output. + +Edge flags are used to control which lines or points are actually +drawn when the polygon mode converts triangles/quads/polygons into +points or lines. + Properties diff --git a/src/gallium/drivers/galahad/Makefile b/src/gallium/drivers/galahad/Makefile new file mode 100644 index 0000000000..e9c4f7e28c --- /dev/null +++ b/src/gallium/drivers/galahad/Makefile @@ -0,0 +1,11 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = galahad + +C_SOURCES = \ + glhd_objects.c \ + glhd_context.c \ + glhd_screen.c + +include ../../Makefile.template diff --git a/src/gallium/drivers/galahad/SConscript b/src/gallium/drivers/galahad/SConscript new file mode 100644 index 0000000000..b398a3f061 --- /dev/null +++ b/src/gallium/drivers/galahad/SConscript @@ -0,0 +1,13 @@ +Import('*') + +env = env.Clone() + +galahad = env.ConvenienceLibrary( + target = 'identity', + source = [ + 'glhd_context.c', + 'glhd_objects.c', + 'glhd_screen.c', + ]) + +Export('galahad') diff --git a/src/gallium/drivers/galahad/glhd_context.c b/src/gallium/drivers/galahad/glhd_context.c new file mode 100644 index 0000000000..ab6f17b3ab --- /dev/null +++ b/src/gallium/drivers/galahad/glhd_context.c @@ -0,0 +1,997 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_context.h" + +#include "util/u_format.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" + +#include "glhd_context.h" +#include "glhd_objects.h" + + +static void +galahad_destroy(struct pipe_context *_pipe) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->destroy(pipe); + + FREE(glhd_pipe); +} + +static void +galahad_draw_arrays(struct pipe_context *_pipe, + unsigned prim, + unsigned start, + unsigned count) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->draw_arrays(pipe, + prim, + start, + count); +} + +static void +galahad_draw_elements(struct pipe_context *_pipe, + struct pipe_resource *_indexResource, + unsigned indexSize, + int indexBias, + unsigned prim, + unsigned start, + unsigned count) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct galahad_resource *glhd_resource = galahad_resource(_indexResource); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_resource *indexResource = glhd_resource->resource; + + pipe->draw_elements(pipe, + indexResource, + indexSize, + indexBias, + prim, + start, + count); +} + +static void +galahad_draw_range_elements(struct pipe_context *_pipe, + struct pipe_resource *_indexResource, + unsigned indexSize, + int indexBias, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct galahad_resource *glhd_resource = galahad_resource(_indexResource); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_resource *indexResource = glhd_resource->resource; + + pipe->draw_range_elements(pipe, + indexResource, + indexSize, + indexBias, + minIndex, + maxIndex, + mode, + start, + count); +} + +static struct pipe_query * +galahad_create_query(struct pipe_context *_pipe, + unsigned query_type) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + if (query_type == PIPE_QUERY_OCCLUSION_COUNTER && + !pipe->screen->get_param(pipe->screen, PIPE_CAP_OCCLUSION_QUERY)) { + glhd_error("Occlusion query requested but not supported"); + } + + if (query_type == PIPE_QUERY_TIME_ELAPSED && + !pipe->screen->get_param(pipe->screen, PIPE_CAP_TIMER_QUERY)) { + glhd_error("Timer query requested but not supported"); + } + + return pipe->create_query(pipe, + query_type); +} + +static void +galahad_destroy_query(struct pipe_context *_pipe, + struct pipe_query *query) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->destroy_query(pipe, + query); +} + +static void +galahad_begin_query(struct pipe_context *_pipe, + struct pipe_query *query) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->begin_query(pipe, + query); +} + +static void +galahad_end_query(struct pipe_context *_pipe, + struct pipe_query *query) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->end_query(pipe, + query); +} + +static boolean +galahad_get_query_result(struct pipe_context *_pipe, + struct pipe_query *query, + boolean wait, + void *result) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + return pipe->get_query_result(pipe, + query, + wait, + result); +} + +static void * +galahad_create_blend_state(struct pipe_context *_pipe, + const struct pipe_blend_state *blend) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + if (blend->logicop_enable) { + if (blend->rt[0].blend_enable) { + glhd_warn("Blending enabled for render target 0, but logicops " + "are enabled"); + } + } + + return pipe->create_blend_state(pipe, + blend); +} + +static void +galahad_bind_blend_state(struct pipe_context *_pipe, + void *blend) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->bind_blend_state(pipe, + blend); +} + +static void +galahad_delete_blend_state(struct pipe_context *_pipe, + void *blend) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->delete_blend_state(pipe, + blend); +} + +static void * +galahad_create_sampler_state(struct pipe_context *_pipe, + const struct pipe_sampler_state *sampler) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + return pipe->create_sampler_state(pipe, + sampler); +} + +static void +galahad_bind_fragment_sampler_states(struct pipe_context *_pipe, + unsigned num_samplers, + void **samplers) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->bind_fragment_sampler_states(pipe, + num_samplers, + samplers); +} + +static void +galahad_bind_vertex_sampler_states(struct pipe_context *_pipe, + unsigned num_samplers, + void **samplers) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->bind_vertex_sampler_states(pipe, + num_samplers, + samplers); +} + +static void +galahad_delete_sampler_state(struct pipe_context *_pipe, + void *sampler) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->delete_sampler_state(pipe, + sampler); +} + +static void * +galahad_create_rasterizer_state(struct pipe_context *_pipe, + const struct pipe_rasterizer_state *rasterizer) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + if (rasterizer->point_quad_rasterization) { + if (rasterizer->point_smooth) { + glhd_warn("Point smoothing requested but ignored"); + } + } else { + if (rasterizer->sprite_coord_enable) { + glhd_warn("Point sprites requested but ignored"); + } + } + + return pipe->create_rasterizer_state(pipe, + rasterizer); +} + +static void +galahad_bind_rasterizer_state(struct pipe_context *_pipe, + void *rasterizer) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->bind_rasterizer_state(pipe, + rasterizer); +} + +static void +galahad_delete_rasterizer_state(struct pipe_context *_pipe, + void *rasterizer) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->delete_rasterizer_state(pipe, + rasterizer); +} + +static void * +galahad_create_depth_stencil_alpha_state(struct pipe_context *_pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil_alpha) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + return pipe->create_depth_stencil_alpha_state(pipe, + depth_stencil_alpha); +} + +static void +galahad_bind_depth_stencil_alpha_state(struct pipe_context *_pipe, + void *depth_stencil_alpha) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->bind_depth_stencil_alpha_state(pipe, + depth_stencil_alpha); +} + +static void +galahad_delete_depth_stencil_alpha_state(struct pipe_context *_pipe, + void *depth_stencil_alpha) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->delete_depth_stencil_alpha_state(pipe, + depth_stencil_alpha); +} + +static void * +galahad_create_fs_state(struct pipe_context *_pipe, + const struct pipe_shader_state *fs) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + return pipe->create_fs_state(pipe, + fs); +} + +static void +galahad_bind_fs_state(struct pipe_context *_pipe, + void *fs) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->bind_fs_state(pipe, + fs); +} + +static void +galahad_delete_fs_state(struct pipe_context *_pipe, + void *fs) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->delete_fs_state(pipe, + fs); +} + +static void * +galahad_create_vs_state(struct pipe_context *_pipe, + const struct pipe_shader_state *vs) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + return pipe->create_vs_state(pipe, + vs); +} + +static void +galahad_bind_vs_state(struct pipe_context *_pipe, + void *vs) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->bind_vs_state(pipe, + vs); +} + +static void +galahad_delete_vs_state(struct pipe_context *_pipe, + void *vs) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->delete_vs_state(pipe, + vs); +} + + +static void * +galahad_create_vertex_elements_state(struct pipe_context *_pipe, + unsigned num_elements, + const struct pipe_vertex_element *vertex_elements) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + return pipe->create_vertex_elements_state(pipe, + num_elements, + vertex_elements); +} + +static void +galahad_bind_vertex_elements_state(struct pipe_context *_pipe, + void *velems) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->bind_vertex_elements_state(pipe, + velems); +} + +static void +galahad_delete_vertex_elements_state(struct pipe_context *_pipe, + void *velems) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->delete_vertex_elements_state(pipe, + velems); +} + +static void +galahad_set_blend_color(struct pipe_context *_pipe, + const struct pipe_blend_color *blend_color) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->set_blend_color(pipe, + blend_color); +} + +static void +galahad_set_stencil_ref(struct pipe_context *_pipe, + const struct pipe_stencil_ref *stencil_ref) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->set_stencil_ref(pipe, + stencil_ref); +} + +static void +galahad_set_clip_state(struct pipe_context *_pipe, + const struct pipe_clip_state *clip) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->set_clip_state(pipe, + clip); +} + +static void +galahad_set_sample_mask(struct pipe_context *_pipe, + unsigned sample_mask) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->set_sample_mask(pipe, + sample_mask); +} + +static void +galahad_set_constant_buffer(struct pipe_context *_pipe, + uint shader, + uint index, + struct pipe_resource *_resource) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_resource *unwrapped_resource; + struct pipe_resource *resource = NULL; + + /* XXX hmm? unwrap the input state */ + if (_resource) { + unwrapped_resource = galahad_resource_unwrap(_resource); + resource = unwrapped_resource; + } + + pipe->set_constant_buffer(pipe, + shader, + index, + resource); +} + +static void +galahad_set_framebuffer_state(struct pipe_context *_pipe, + const struct pipe_framebuffer_state *_state) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_framebuffer_state unwrapped_state; + struct pipe_framebuffer_state *state = NULL; + unsigned i; + + if (_state->nr_cbufs > PIPE_MAX_COLOR_BUFS) { + glhd_error("%d render targets bound, but only %d are permitted by API", + _state->nr_cbufs, PIPE_MAX_COLOR_BUFS); + } else if (_state->nr_cbufs > + pipe->screen->get_param(pipe->screen, PIPE_CAP_MAX_RENDER_TARGETS)) { + glhd_warn("%d render targets bound, but only %d are supported", + _state->nr_cbufs, + pipe->screen->get_param(pipe->screen, PIPE_CAP_MAX_RENDER_TARGETS)); + } + + /* unwrap the input state */ + if (_state) { + memcpy(&unwrapped_state, _state, sizeof(unwrapped_state)); + for(i = 0; i < _state->nr_cbufs; i++) + unwrapped_state.cbufs[i] = galahad_surface_unwrap(_state->cbufs[i]); + for (; i < PIPE_MAX_COLOR_BUFS; i++) + unwrapped_state.cbufs[i] = NULL; + unwrapped_state.zsbuf = galahad_surface_unwrap(_state->zsbuf); + state = &unwrapped_state; + } + + pipe->set_framebuffer_state(pipe, + state); +} + +static void +galahad_set_polygon_stipple(struct pipe_context *_pipe, + const struct pipe_poly_stipple *poly_stipple) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->set_polygon_stipple(pipe, + poly_stipple); +} + +static void +galahad_set_scissor_state(struct pipe_context *_pipe, + const struct pipe_scissor_state *scissor) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->set_scissor_state(pipe, + scissor); +} + +static void +galahad_set_viewport_state(struct pipe_context *_pipe, + const struct pipe_viewport_state *viewport) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->set_viewport_state(pipe, + viewport); +} + +static void +galahad_set_fragment_sampler_views(struct pipe_context *_pipe, + unsigned num, + struct pipe_sampler_view **_views) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_sampler_view *unwrapped_views[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_view **views = NULL; + unsigned i; + + if (_views) { + for (i = 0; i < num; i++) + unwrapped_views[i] = galahad_sampler_view_unwrap(_views[i]); + for (; i < PIPE_MAX_SAMPLERS; i++) + unwrapped_views[i] = NULL; + + views = unwrapped_views; + } + + pipe->set_fragment_sampler_views(pipe, num, views); +} + +static void +galahad_set_vertex_sampler_views(struct pipe_context *_pipe, + unsigned num, + struct pipe_sampler_view **_views) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_sampler_view *unwrapped_views[PIPE_MAX_VERTEX_SAMPLERS]; + struct pipe_sampler_view **views = NULL; + unsigned i; + + if (_views) { + for (i = 0; i < num; i++) + unwrapped_views[i] = galahad_sampler_view_unwrap(_views[i]); + for (; i < PIPE_MAX_VERTEX_SAMPLERS; i++) + unwrapped_views[i] = NULL; + + views = unwrapped_views; + } + + pipe->set_vertex_sampler_views(pipe, num, views); +} + +static void +galahad_set_vertex_buffers(struct pipe_context *_pipe, + unsigned num_buffers, + const struct pipe_vertex_buffer *_buffers) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_vertex_buffer unwrapped_buffers[PIPE_MAX_SHADER_INPUTS]; + struct pipe_vertex_buffer *buffers = NULL; + unsigned i; + + if (num_buffers) { + memcpy(unwrapped_buffers, _buffers, num_buffers * sizeof(*_buffers)); + for (i = 0; i < num_buffers; i++) + unwrapped_buffers[i].buffer = galahad_resource_unwrap(_buffers[i].buffer); + buffers = unwrapped_buffers; + } + + pipe->set_vertex_buffers(pipe, + num_buffers, + buffers); +} +static void +galahad_resource_copy_region(struct pipe_context *_pipe, + struct pipe_resource *_dst, + struct pipe_subresource subdst, + unsigned dstx, + unsigned dsty, + unsigned dstz, + struct pipe_resource *_src, + struct pipe_subresource subsrc, + unsigned srcx, + unsigned srcy, + unsigned srcz, + unsigned width, + unsigned height) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct galahad_resource *glhd_resource_dst = galahad_resource(_dst); + struct galahad_resource *glhd_resource_src = galahad_resource(_src); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_resource *dst = glhd_resource_dst->resource; + struct pipe_resource *src = glhd_resource_src->resource; + + if (_dst->format != _src->format) { + glhd_warn("Format mismatch: Source is %s, destination is %s", + util_format_short_name(_src->format), + util_format_short_name(_dst->format)); + } + + pipe->resource_copy_region(pipe, + dst, + subdst, + dstx, + dsty, + dstz, + src, + subsrc, + srcx, + srcy, + srcz, + width, + height); +} + +static void +galahad_clear(struct pipe_context *_pipe, + unsigned buffers, + const float *rgba, + double depth, + unsigned stencil) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->clear(pipe, + buffers, + rgba, + depth, + stencil); +} + +static void +galahad_clear_render_target(struct pipe_context *_pipe, + struct pipe_surface *_dst, + const float *rgba, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct galahad_surface *glhd_surface_dst = galahad_surface(_dst); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_surface *dst = glhd_surface_dst->surface; + + pipe->clear_render_target(pipe, + dst, + rgba, + dstx, + dsty, + width, + height); +} +static void +galahad_clear_depth_stencil(struct pipe_context *_pipe, + struct pipe_surface *_dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct galahad_surface *glhd_surface_dst = galahad_surface(_dst); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_surface *dst = glhd_surface_dst->surface; + + pipe->clear_depth_stencil(pipe, + dst, + clear_flags, + depth, + stencil, + dstx, + dsty, + width, + height); + +} + +static void +galahad_flush(struct pipe_context *_pipe, + unsigned flags, + struct pipe_fence_handle **fence) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->flush(pipe, + flags, + fence); +} + +static unsigned int +galahad_is_resource_referenced(struct pipe_context *_pipe, + struct pipe_resource *_resource, + unsigned face, + unsigned level) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct galahad_resource *glhd_resource = galahad_resource(_resource); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_resource *resource = glhd_resource->resource; + + return pipe->is_resource_referenced(pipe, + resource, + face, + level); +} + +static struct pipe_sampler_view * +galahad_context_create_sampler_view(struct pipe_context *_pipe, + struct pipe_resource *_resource, + const struct pipe_sampler_view *templ) +{ + struct galahad_context *glhd_context = galahad_context(_pipe); + struct galahad_resource *glhd_resource = galahad_resource(_resource); + struct pipe_context *pipe = glhd_context->pipe; + struct pipe_resource *resource = glhd_resource->resource; + struct pipe_sampler_view *result; + + result = pipe->create_sampler_view(pipe, + resource, + templ); + + if (result) + return galahad_sampler_view_create(glhd_context, glhd_resource, result); + return NULL; +} + +static void +galahad_context_sampler_view_destroy(struct pipe_context *_pipe, + struct pipe_sampler_view *_view) +{ + galahad_sampler_view_destroy(galahad_context(_pipe), + galahad_sampler_view(_view)); +} + +static struct pipe_transfer * +galahad_context_get_transfer(struct pipe_context *_context, + struct pipe_resource *_resource, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box) +{ + struct galahad_context *glhd_context = galahad_context(_context); + struct galahad_resource *glhd_resource = galahad_resource(_resource); + struct pipe_context *context = glhd_context->pipe; + struct pipe_resource *resource = glhd_resource->resource; + struct pipe_transfer *result; + + result = context->get_transfer(context, + resource, + sr, + usage, + box); + + if (result) + return galahad_transfer_create(glhd_context, glhd_resource, result); + return NULL; +} + +static void +galahad_context_transfer_destroy(struct pipe_context *_pipe, + struct pipe_transfer *_transfer) +{ + galahad_transfer_destroy(galahad_context(_pipe), + galahad_transfer(_transfer)); +} + +static void * +galahad_context_transfer_map(struct pipe_context *_context, + struct pipe_transfer *_transfer) +{ + struct galahad_context *glhd_context = galahad_context(_context); + struct galahad_transfer *glhd_transfer = galahad_transfer(_transfer); + struct pipe_context *context = glhd_context->pipe; + struct pipe_transfer *transfer = glhd_transfer->transfer; + + return context->transfer_map(context, + transfer); +} + + + +static void +galahad_context_transfer_flush_region(struct pipe_context *_context, + struct pipe_transfer *_transfer, + const struct pipe_box *box) +{ + struct galahad_context *glhd_context = galahad_context(_context); + struct galahad_transfer *glhd_transfer = galahad_transfer(_transfer); + struct pipe_context *context = glhd_context->pipe; + struct pipe_transfer *transfer = glhd_transfer->transfer; + + context->transfer_flush_region(context, + transfer, + box); +} + + +static void +galahad_context_transfer_unmap(struct pipe_context *_context, + struct pipe_transfer *_transfer) +{ + struct galahad_context *glhd_context = galahad_context(_context); + struct galahad_transfer *glhd_transfer = galahad_transfer(_transfer); + struct pipe_context *context = glhd_context->pipe; + struct pipe_transfer *transfer = glhd_transfer->transfer; + + context->transfer_unmap(context, + transfer); +} + + +static void +galahad_context_transfer_inline_write(struct pipe_context *_context, + struct pipe_resource *_resource, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned slice_stride) +{ + struct galahad_context *glhd_context = galahad_context(_context); + struct galahad_resource *glhd_resource = galahad_resource(_resource); + struct pipe_context *context = glhd_context->pipe; + struct pipe_resource *resource = glhd_resource->resource; + + context->transfer_inline_write(context, + resource, + sr, + usage, + box, + data, + stride, + slice_stride); +} + + +struct pipe_context * +galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) +{ + struct galahad_context *glhd_pipe; + (void)galahad_screen(_screen); + + glhd_pipe = CALLOC_STRUCT(galahad_context); + if (!glhd_pipe) { + return NULL; + } + + glhd_pipe->base.winsys = NULL; + glhd_pipe->base.screen = _screen; + glhd_pipe->base.priv = pipe->priv; /* expose wrapped data */ + glhd_pipe->base.draw = NULL; + + glhd_pipe->base.destroy = galahad_destroy; + glhd_pipe->base.draw_arrays = galahad_draw_arrays; + glhd_pipe->base.draw_elements = galahad_draw_elements; + glhd_pipe->base.draw_range_elements = galahad_draw_range_elements; + glhd_pipe->base.create_query = galahad_create_query; + glhd_pipe->base.destroy_query = galahad_destroy_query; + glhd_pipe->base.begin_query = galahad_begin_query; + glhd_pipe->base.end_query = galahad_end_query; + glhd_pipe->base.get_query_result = galahad_get_query_result; + glhd_pipe->base.create_blend_state = galahad_create_blend_state; + glhd_pipe->base.bind_blend_state = galahad_bind_blend_state; + glhd_pipe->base.delete_blend_state = galahad_delete_blend_state; + glhd_pipe->base.create_sampler_state = galahad_create_sampler_state; + glhd_pipe->base.bind_fragment_sampler_states = galahad_bind_fragment_sampler_states; + glhd_pipe->base.bind_vertex_sampler_states = galahad_bind_vertex_sampler_states; + glhd_pipe->base.delete_sampler_state = galahad_delete_sampler_state; + glhd_pipe->base.create_rasterizer_state = galahad_create_rasterizer_state; + glhd_pipe->base.bind_rasterizer_state = galahad_bind_rasterizer_state; + glhd_pipe->base.delete_rasterizer_state = galahad_delete_rasterizer_state; + glhd_pipe->base.create_depth_stencil_alpha_state = galahad_create_depth_stencil_alpha_state; + glhd_pipe->base.bind_depth_stencil_alpha_state = galahad_bind_depth_stencil_alpha_state; + glhd_pipe->base.delete_depth_stencil_alpha_state = galahad_delete_depth_stencil_alpha_state; + glhd_pipe->base.create_fs_state = galahad_create_fs_state; + glhd_pipe->base.bind_fs_state = galahad_bind_fs_state; + glhd_pipe->base.delete_fs_state = galahad_delete_fs_state; + glhd_pipe->base.create_vs_state = galahad_create_vs_state; + glhd_pipe->base.bind_vs_state = galahad_bind_vs_state; + glhd_pipe->base.delete_vs_state = galahad_delete_vs_state; + glhd_pipe->base.create_vertex_elements_state = galahad_create_vertex_elements_state; + glhd_pipe->base.bind_vertex_elements_state = galahad_bind_vertex_elements_state; + glhd_pipe->base.delete_vertex_elements_state = galahad_delete_vertex_elements_state; + glhd_pipe->base.set_blend_color = galahad_set_blend_color; + glhd_pipe->base.set_stencil_ref = galahad_set_stencil_ref; + glhd_pipe->base.set_clip_state = galahad_set_clip_state; + glhd_pipe->base.set_sample_mask = galahad_set_sample_mask; + glhd_pipe->base.set_constant_buffer = galahad_set_constant_buffer; + glhd_pipe->base.set_framebuffer_state = galahad_set_framebuffer_state; + glhd_pipe->base.set_polygon_stipple = galahad_set_polygon_stipple; + glhd_pipe->base.set_scissor_state = galahad_set_scissor_state; + glhd_pipe->base.set_viewport_state = galahad_set_viewport_state; + glhd_pipe->base.set_fragment_sampler_views = galahad_set_fragment_sampler_views; + glhd_pipe->base.set_vertex_sampler_views = galahad_set_vertex_sampler_views; + glhd_pipe->base.set_vertex_buffers = galahad_set_vertex_buffers; + glhd_pipe->base.resource_copy_region = galahad_resource_copy_region; + glhd_pipe->base.clear = galahad_clear; + glhd_pipe->base.clear_render_target = galahad_clear_render_target; + glhd_pipe->base.clear_depth_stencil = galahad_clear_depth_stencil; + glhd_pipe->base.flush = galahad_flush; + glhd_pipe->base.is_resource_referenced = galahad_is_resource_referenced; + glhd_pipe->base.create_sampler_view = galahad_context_create_sampler_view; + glhd_pipe->base.sampler_view_destroy = galahad_context_sampler_view_destroy; + glhd_pipe->base.get_transfer = galahad_context_get_transfer; + glhd_pipe->base.transfer_destroy = galahad_context_transfer_destroy; + glhd_pipe->base.transfer_map = galahad_context_transfer_map; + glhd_pipe->base.transfer_unmap = galahad_context_transfer_unmap; + glhd_pipe->base.transfer_flush_region = galahad_context_transfer_flush_region; + glhd_pipe->base.transfer_inline_write = galahad_context_transfer_inline_write; + + glhd_pipe->pipe = pipe; + + return &glhd_pipe->base; +} diff --git a/src/gallium/drivers/identity/id_drm.c b/src/gallium/drivers/galahad/glhd_context.h index 15d01519f8..4e71753ac3 100644 --- a/src/gallium/drivers/identity/id_drm.c +++ b/src/gallium/drivers/galahad/glhd_context.h @@ -25,69 +25,40 @@ * **************************************************************************/ -#include "state_tracker/drm_api.h" +#ifndef GLHD_CONTEXT_H +#define GLHD_CONTEXT_H -#include "util/u_memory.h" -#include "id_drm.h" -#include "id_screen.h" -#include "id_public.h" +#include <stdio.h> -struct identity_drm_api -{ - struct drm_api base; +#include "pipe/p_state.h" +#include "pipe/p_context.h" - struct drm_api *api; -}; -static INLINE struct identity_drm_api * -identity_drm_api(struct drm_api *_api) -{ - return (struct identity_drm_api *)_api; -} +struct galahad_context { + struct pipe_context base; /**< base class */ -static struct pipe_screen * -identity_drm_create_screen(struct drm_api *_api, int fd) -{ - struct identity_drm_api *id_api = identity_drm_api(_api); - struct drm_api *api = id_api->api; - struct pipe_screen *screen; + struct pipe_context *pipe; +}; - screen = api->create_screen(api, fd); - return identity_screen_create(screen); -} +struct pipe_context * +galahad_context_create(struct pipe_screen *screen, struct pipe_context *pipe); -static void -identity_drm_destroy(struct drm_api *_api) -{ - struct identity_drm_api *id_api = identity_drm_api(_api); - struct drm_api *api = id_api->api; - api->destroy(api); - FREE(id_api); -} - -struct drm_api * -identity_drm_create(struct drm_api *api) +static INLINE struct galahad_context * +galahad_context(struct pipe_context *pipe) { - struct identity_drm_api *id_api; - - if (!api) - goto error; - - id_api = CALLOC_STRUCT(identity_drm_api); - - if (!id_api) - goto error; + return (struct galahad_context *)pipe; +} - id_api->base.name = api->name; - id_api->base.driver_name = api->driver_name; - id_api->base.create_screen = identity_drm_create_screen; - id_api->base.destroy = identity_drm_destroy; - id_api->api = api; +#define glhd_warn(...) \ +do { \ + fprintf(stderr, "galahad: %s: ", __FUNCTION__); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, "\n"); \ +} while (0) - return &id_api->base; +#define glhd_error(...) \ + glhd_warn(__VA_ARGS__); -error: - return api; -} +#endif /* GLHD_CONTEXT_H */ diff --git a/src/gallium/drivers/galahad/glhd_objects.c b/src/gallium/drivers/galahad/glhd_objects.c new file mode 100644 index 0000000000..6c5a21ae70 --- /dev/null +++ b/src/gallium/drivers/galahad/glhd_objects.c @@ -0,0 +1,187 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_inlines.h" +#include "util/u_memory.h" + +#include "glhd_screen.h" +#include "glhd_objects.h" +#include "glhd_context.h" + + + +struct pipe_resource * +galahad_resource_create(struct galahad_screen *glhd_screen, + struct pipe_resource *resource) +{ + struct galahad_resource *glhd_resource; + + if(!resource) + goto error; + + assert(resource->screen == glhd_screen->screen); + + glhd_resource = CALLOC_STRUCT(galahad_resource); + if(!glhd_resource) + goto error; + + memcpy(&glhd_resource->base, resource, sizeof(struct pipe_resource)); + + pipe_reference_init(&glhd_resource->base.reference, 1); + glhd_resource->base.screen = &glhd_screen->base; + glhd_resource->resource = resource; + + return &glhd_resource->base; + +error: + pipe_resource_reference(&resource, NULL); + return NULL; +} + +void +galahad_resource_destroy(struct galahad_resource *glhd_resource) +{ + pipe_resource_reference(&glhd_resource->resource, NULL); + FREE(glhd_resource); +} + + +struct pipe_surface * +galahad_surface_create(struct galahad_resource *glhd_resource, + struct pipe_surface *surface) +{ + struct galahad_surface *glhd_surface; + + if(!surface) + goto error; + + assert(surface->texture == glhd_resource->resource); + + glhd_surface = CALLOC_STRUCT(galahad_surface); + if(!glhd_surface) + goto error; + + memcpy(&glhd_surface->base, surface, sizeof(struct pipe_surface)); + + pipe_reference_init(&glhd_surface->base.reference, 1); + glhd_surface->base.texture = NULL; + pipe_resource_reference(&glhd_surface->base.texture, &glhd_resource->base); + glhd_surface->surface = surface; + + return &glhd_surface->base; + +error: + pipe_surface_reference(&surface, NULL); + return NULL; +} + +void +galahad_surface_destroy(struct galahad_surface *glhd_surface) +{ + pipe_resource_reference(&glhd_surface->base.texture, NULL); + pipe_surface_reference(&glhd_surface->surface, NULL); + FREE(glhd_surface); +} + + +struct pipe_sampler_view * +galahad_sampler_view_create(struct galahad_context *glhd_context, + struct galahad_resource *glhd_resource, + struct pipe_sampler_view *view) +{ + struct galahad_sampler_view *glhd_view; + + if (!view) + goto error; + + assert(view->texture == glhd_resource->resource); + + glhd_view = CALLOC_STRUCT(galahad_sampler_view); + + glhd_view->base = *view; + glhd_view->base.reference.count = 1; + glhd_view->base.texture = NULL; + pipe_resource_reference(&glhd_view->base.texture, glhd_resource->resource); + glhd_view->base.context = glhd_context->pipe; + glhd_view->sampler_view = view; + + return &glhd_view->base; +error: + return NULL; +} + +void +galahad_sampler_view_destroy(struct galahad_context *glhd_context, + struct galahad_sampler_view *glhd_view) +{ + pipe_resource_reference(&glhd_view->base.texture, NULL); + glhd_context->pipe->sampler_view_destroy(glhd_context->pipe, + glhd_view->sampler_view); + FREE(glhd_view); +} + + +struct pipe_transfer * +galahad_transfer_create(struct galahad_context *glhd_context, + struct galahad_resource *glhd_resource, + struct pipe_transfer *transfer) +{ + struct galahad_transfer *glhd_transfer; + + if(!transfer) + goto error; + + assert(transfer->resource == glhd_resource->resource); + + glhd_transfer = CALLOC_STRUCT(galahad_transfer); + if(!glhd_transfer) + goto error; + + memcpy(&glhd_transfer->base, transfer, sizeof(struct pipe_transfer)); + + glhd_transfer->base.resource = NULL; + glhd_transfer->transfer = transfer; + + pipe_resource_reference(&glhd_transfer->base.resource, &glhd_resource->base); + assert(glhd_transfer->base.resource == &glhd_resource->base); + + return &glhd_transfer->base; + +error: + glhd_context->pipe->transfer_destroy(glhd_context->pipe, transfer); + return NULL; +} + +void +galahad_transfer_destroy(struct galahad_context *glhd_context, + struct galahad_transfer *glhd_transfer) +{ + pipe_resource_reference(&glhd_transfer->base.resource, NULL); + glhd_context->pipe->transfer_destroy(glhd_context->pipe, + glhd_transfer->transfer); + FREE(glhd_transfer); +} diff --git a/src/gallium/drivers/galahad/glhd_objects.h b/src/gallium/drivers/galahad/glhd_objects.h new file mode 100644 index 0000000000..935803915d --- /dev/null +++ b/src/gallium/drivers/galahad/glhd_objects.h @@ -0,0 +1,175 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef GLHD_OBJECTS_H +#define GLHD_OBJECTS_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" + +#include "glhd_screen.h" + +struct galahad_context; + + +struct galahad_resource +{ + struct pipe_resource base; + + struct pipe_resource *resource; +}; + + +struct galahad_sampler_view +{ + struct pipe_sampler_view base; + + struct pipe_sampler_view *sampler_view; +}; + + +struct galahad_surface +{ + struct pipe_surface base; + + struct pipe_surface *surface; +}; + + +struct galahad_transfer +{ + struct pipe_transfer base; + + struct pipe_transfer *transfer; +}; + + +static INLINE struct galahad_resource * +galahad_resource(struct pipe_resource *_resource) +{ + if(!_resource) + return NULL; + (void)galahad_screen(_resource->screen); + return (struct galahad_resource *)_resource; +} + +static INLINE struct galahad_sampler_view * +galahad_sampler_view(struct pipe_sampler_view *_sampler_view) +{ + if (!_sampler_view) { + return NULL; + } + return (struct galahad_sampler_view *)_sampler_view; +} + +static INLINE struct galahad_surface * +galahad_surface(struct pipe_surface *_surface) +{ + if(!_surface) + return NULL; + (void)galahad_resource(_surface->texture); + return (struct galahad_surface *)_surface; +} + +static INLINE struct galahad_transfer * +galahad_transfer(struct pipe_transfer *_transfer) +{ + if(!_transfer) + return NULL; + (void)galahad_resource(_transfer->resource); + return (struct galahad_transfer *)_transfer; +} + +static INLINE struct pipe_resource * +galahad_resource_unwrap(struct pipe_resource *_resource) +{ + if(!_resource) + return NULL; + return galahad_resource(_resource)->resource; +} + +static INLINE struct pipe_sampler_view * +galahad_sampler_view_unwrap(struct pipe_sampler_view *_sampler_view) +{ + if (!_sampler_view) { + return NULL; + } + return galahad_sampler_view(_sampler_view)->sampler_view; +} + +static INLINE struct pipe_surface * +galahad_surface_unwrap(struct pipe_surface *_surface) +{ + if(!_surface) + return NULL; + return galahad_surface(_surface)->surface; +} + +static INLINE struct pipe_transfer * +galahad_transfer_unwrap(struct pipe_transfer *_transfer) +{ + if(!_transfer) + return NULL; + return galahad_transfer(_transfer)->transfer; +} + + +struct pipe_resource * +galahad_resource_create(struct galahad_screen *glhd_screen, + struct pipe_resource *resource); + +void +galahad_resource_destroy(struct galahad_resource *glhd_resource); + +struct pipe_surface * +galahad_surface_create(struct galahad_resource *glhd_resource, + struct pipe_surface *surface); + +void +galahad_surface_destroy(struct galahad_surface *glhd_surface); + +struct pipe_sampler_view * +galahad_sampler_view_create(struct galahad_context *glhd_context, + struct galahad_resource *glhd_resource, + struct pipe_sampler_view *view); + +void +galahad_sampler_view_destroy(struct galahad_context *glhd_context, + struct galahad_sampler_view *glhd_sampler_view); + +struct pipe_transfer * +galahad_transfer_create(struct galahad_context *glhd_context, + struct galahad_resource *glhd_resource, + struct pipe_transfer *transfer); + +void +galahad_transfer_destroy(struct galahad_context *glhd_context, + struct galahad_transfer *glhd_transfer); + + +#endif /* GLHD_OBJECTS_H */ diff --git a/src/gallium/drivers/identity/id_drm.h b/src/gallium/drivers/galahad/glhd_public.h index cf2ad2ce07..77a380196a 100644 --- a/src/gallium/drivers/identity/id_drm.h +++ b/src/gallium/drivers/galahad/glhd_public.h @@ -25,11 +25,13 @@ * **************************************************************************/ -#ifndef ID_DRM_H -#define ID_DRM_H +#ifndef GLHD_PUBLIC_H +#define GLHD_PUBLIC_H -struct drm_api; +struct pipe_screen; +struct pipe_context; -struct drm_api* identity_drm_create(struct drm_api *api); +struct pipe_screen * +galahad_screen_create(struct pipe_screen *screen); -#endif /* ID_DRM_H */ +#endif /* GLHD_PUBLIC_H */ diff --git a/src/gallium/drivers/galahad/glhd_screen.c b/src/gallium/drivers/galahad/glhd_screen.c new file mode 100644 index 0000000000..4117485702 --- /dev/null +++ b/src/gallium/drivers/galahad/glhd_screen.c @@ -0,0 +1,334 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * 2010 Corbin Simpson <MostAwesomeDude@gmail.com> + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_screen.h" +#include "pipe/p_state.h" +#include "util/u_memory.h" + +#include "glhd_public.h" +#include "glhd_screen.h" +#include "glhd_context.h" +#include "glhd_objects.h" + +DEBUG_GET_ONCE_BOOL_OPTION(galahad, "GALLIUM_GALAHAD", FALSE) + +static void +galahad_screen_destroy(struct pipe_screen *_screen) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + screen->destroy(screen); + + FREE(glhd_screen); +} + +static const char * +galahad_screen_get_name(struct pipe_screen *_screen) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + return screen->get_name(screen); +} + +static const char * +galahad_screen_get_vendor(struct pipe_screen *_screen) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + return screen->get_vendor(screen); +} + +static int +galahad_screen_get_param(struct pipe_screen *_screen, + enum pipe_cap param) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + return screen->get_param(screen, + param); +} + +static float +galahad_screen_get_paramf(struct pipe_screen *_screen, + enum pipe_cap param) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + return screen->get_paramf(screen, + param); +} + +static boolean +galahad_screen_is_format_supported(struct pipe_screen *_screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned tex_usage, + unsigned geom_flags) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + if (target >= PIPE_MAX_TEXTURE_TYPES) { + glhd_warn("Received bogus texture target %d", target); + } + + return screen->is_format_supported(screen, + format, + target, + sample_count, + tex_usage, + geom_flags); +} + +static struct pipe_context * +galahad_screen_context_create(struct pipe_screen *_screen, + void *priv) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + struct pipe_context *result; + + result = screen->context_create(screen, priv); + if (result) + return galahad_context_create(_screen, result); + return NULL; +} + +static struct pipe_resource * +galahad_screen_resource_create(struct pipe_screen *_screen, + const struct pipe_resource *templat) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + struct pipe_resource *result; + + result = screen->resource_create(screen, + templat); + + if (result) + return galahad_resource_create(glhd_screen, result); + return NULL; +} + +static struct pipe_resource * +galahad_screen_resource_from_handle(struct pipe_screen *_screen, + const struct pipe_resource *templ, + struct winsys_handle *handle) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + struct pipe_resource *result; + + /* TODO trace call */ + + result = screen->resource_from_handle(screen, templ, handle); + + result = galahad_resource_create(galahad_screen(_screen), result); + + return result; +} + +static boolean +galahad_screen_resource_get_handle(struct pipe_screen *_screen, + struct pipe_resource *_resource, + struct winsys_handle *handle) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct galahad_resource *glhd_resource = galahad_resource(_resource); + struct pipe_screen *screen = glhd_screen->screen; + struct pipe_resource *resource = glhd_resource->resource; + + /* TODO trace call */ + + return screen->resource_get_handle(screen, resource, handle); +} + + + +static void +galahad_screen_resource_destroy(struct pipe_screen *screen, + struct pipe_resource *_resource) +{ + galahad_resource_destroy(galahad_resource(_resource)); +} + +static struct pipe_surface * +galahad_screen_get_tex_surface(struct pipe_screen *_screen, + struct pipe_resource *_resource, + unsigned face, + unsigned level, + unsigned zslice, + unsigned usage) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct galahad_resource *glhd_resource = galahad_resource(_resource); + struct pipe_screen *screen = glhd_screen->screen; + struct pipe_resource *resource = glhd_resource->resource; + struct pipe_surface *result; + + result = screen->get_tex_surface(screen, + resource, + face, + level, + zslice, + usage); + + if (result) + return galahad_surface_create(glhd_resource, result); + return NULL; +} + +static void +galahad_screen_tex_surface_destroy(struct pipe_surface *_surface) +{ + galahad_surface_destroy(galahad_surface(_surface)); +} + + + +static struct pipe_resource * +galahad_screen_user_buffer_create(struct pipe_screen *_screen, + void *ptr, + unsigned bytes, + unsigned usage) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + struct pipe_resource *result; + + result = screen->user_buffer_create(screen, + ptr, + bytes, + usage); + + if (result) + return galahad_resource_create(glhd_screen, result); + return NULL; +} + + + +static void +galahad_screen_flush_frontbuffer(struct pipe_screen *_screen, + struct pipe_surface *_surface, + void *context_private) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct galahad_surface *glhd_surface = galahad_surface(_surface); + struct pipe_screen *screen = glhd_screen->screen; + struct pipe_surface *surface = glhd_surface->surface; + + screen->flush_frontbuffer(screen, + surface, + context_private); +} + +static void +galahad_screen_fence_reference(struct pipe_screen *_screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + screen->fence_reference(screen, + ptr, + fence); +} + +static int +galahad_screen_fence_signalled(struct pipe_screen *_screen, + struct pipe_fence_handle *fence, + unsigned flags) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + return screen->fence_signalled(screen, + fence, + flags); +} + +static int +galahad_screen_fence_finish(struct pipe_screen *_screen, + struct pipe_fence_handle *fence, + unsigned flags) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + return screen->fence_finish(screen, + fence, + flags); +} + +struct pipe_screen * +galahad_screen_create(struct pipe_screen *screen) +{ + struct galahad_screen *glhd_screen; + + if (!debug_get_option_galahad()) + return screen; + + glhd_screen = CALLOC_STRUCT(galahad_screen); + if (!glhd_screen) { + return screen; + } + + glhd_screen->base.winsys = NULL; + + glhd_screen->base.destroy = galahad_screen_destroy; + glhd_screen->base.get_name = galahad_screen_get_name; + glhd_screen->base.get_vendor = galahad_screen_get_vendor; + glhd_screen->base.get_param = galahad_screen_get_param; + glhd_screen->base.get_paramf = galahad_screen_get_paramf; + glhd_screen->base.is_format_supported = galahad_screen_is_format_supported; + glhd_screen->base.context_create = galahad_screen_context_create; + glhd_screen->base.resource_create = galahad_screen_resource_create; + glhd_screen->base.resource_from_handle = galahad_screen_resource_from_handle; + glhd_screen->base.resource_get_handle = galahad_screen_resource_get_handle; + glhd_screen->base.resource_destroy = galahad_screen_resource_destroy; + glhd_screen->base.get_tex_surface = galahad_screen_get_tex_surface; + glhd_screen->base.tex_surface_destroy = galahad_screen_tex_surface_destroy; + glhd_screen->base.user_buffer_create = galahad_screen_user_buffer_create; + glhd_screen->base.flush_frontbuffer = galahad_screen_flush_frontbuffer; + glhd_screen->base.fence_reference = galahad_screen_fence_reference; + glhd_screen->base.fence_signalled = galahad_screen_fence_signalled; + glhd_screen->base.fence_finish = galahad_screen_fence_finish; + + glhd_screen->screen = screen; + + return &glhd_screen->base; +} diff --git a/src/gallium/drivers/trace/tr_drm.h b/src/gallium/drivers/galahad/glhd_screen.h index 845c66a32a..7862f4af2b 100644 --- a/src/gallium/drivers/trace/tr_drm.h +++ b/src/gallium/drivers/galahad/glhd_screen.h @@ -25,11 +25,24 @@ * **************************************************************************/ -#ifndef TR_DRM_H -#define TR_DRM_H +#ifndef GLHD_SCREEN_H +#define GLHD_SCREEN_H -struct drm_api; +#include "pipe/p_screen.h" +#include "pipe/p_defines.h" -struct drm_api* trace_drm_create(struct drm_api *api); -#endif /* ID_DRM_H */ +struct galahad_screen { + struct pipe_screen base; + + struct pipe_screen *screen; +}; + + +static INLINE struct galahad_screen * +galahad_screen(struct pipe_screen *screen) +{ + return (struct galahad_screen *)screen; +} + +#endif /* GLHD_SCREEN_H */ diff --git a/src/gallium/drivers/i915/Makefile b/src/gallium/drivers/i915/Makefile index 2cefe70850..b3f387f933 100644 --- a/src/gallium/drivers/i915/Makefile +++ b/src/gallium/drivers/i915/Makefile @@ -15,7 +15,9 @@ C_SOURCES = \ i915_state_dynamic.c \ i915_state_derived.c \ i915_state_emit.c \ + i915_state_fpc.c \ i915_state_sampler.c \ + i915_state_static.c \ i915_screen.c \ i915_prim_emit.c \ i915_prim_vbuf.c \ diff --git a/src/gallium/drivers/i915/SConscript b/src/gallium/drivers/i915/SConscript index d6e7a8dbd3..d4bf6fef13 100644 --- a/src/gallium/drivers/i915/SConscript +++ b/src/gallium/drivers/i915/SConscript @@ -24,9 +24,11 @@ i915 = env.ConvenienceLibrary( 'i915_state.c', 'i915_state_derived.c', 'i915_state_dynamic.c', + 'i915_state_fpc.c', 'i915_state_emit.c', 'i915_state_immediate.c', 'i915_state_sampler.c', + 'i915_state_static.c', 'i915_surface.c', 'i915_resource.c', 'i915_resource_texture.c', diff --git a/src/gallium/drivers/i915/i915_batch.h b/src/gallium/drivers/i915/i915_batch.h index f0086695d1..c411b84ccd 100644 --- a/src/gallium/drivers/i915/i915_batch.h +++ b/src/gallium/drivers/i915/i915_batch.h @@ -30,6 +30,7 @@ #include "i915_batchbuffer.h" + #define BEGIN_BATCH(dwords, relocs) \ (i915_winsys_batchbuffer_check(i915->batch, dwords, relocs)) @@ -39,9 +40,14 @@ #define OUT_RELOC(buf, usage, offset) \ i915_winsys_batchbuffer_reloc(i915->batch, buf, usage, offset) -#define FLUSH_BATCH(fence) do { \ - i915_winsys_batchbuffer_flush(i915->batch, fence); \ - i915->hardware_dirty = ~0; \ -} while (0) +#define FLUSH_BATCH(fence) \ + i915_flush(i915, fence) + + +/************************************************************************ + * i915_flush.c + */ +void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence); + #endif diff --git a/src/gallium/drivers/i915/i915_batchbuffer.h b/src/gallium/drivers/i915/i915_batchbuffer.h index 27ccaa6b1f..c1cd314e7b 100644 --- a/src/gallium/drivers/i915/i915_batchbuffer.h +++ b/src/gallium/drivers/i915/i915_batchbuffer.h @@ -30,6 +30,8 @@ #include "i915_winsys.h" +struct i915_context; + static INLINE boolean i915_winsys_batchbuffer_check(struct i915_winsys_batchbuffer *batch, size_t dwords, @@ -77,11 +79,4 @@ i915_winsys_batchbuffer_reloc(struct i915_winsys_batchbuffer *batch, return batch->iws->batchbuffer_reloc(batch, buffer, usage, offset); } -static INLINE void -i915_winsys_batchbuffer_flush(struct i915_winsys_batchbuffer *batch, - struct pipe_fence_handle **fence) -{ - batch->iws->batchbuffer_flush(batch, fence); -} - #endif diff --git a/src/gallium/drivers/i915/i915_blit.c b/src/gallium/drivers/i915/i915_blit.c index c5b5979bf9..cdf20c0055 100644 --- a/src/gallium/drivers/i915/i915_blit.c +++ b/src/gallium/drivers/i915/i915_blit.c @@ -31,7 +31,6 @@ #include "i915_batch.h" #include "i915_debug.h" -#define FILE_DEBUG_FLAG DEBUG_BLIT void i915_fill_blit(struct i915_context *i915, @@ -47,10 +46,8 @@ i915_fill_blit(struct i915_context *i915, unsigned BR13, CMD; - I915_DBG(i915, - "%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", - __FUNCTION__, - dst_buffer, dst_pitch, dst_offset, x, y, w, h); + I915_DBG(DBG_BLIT, "%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", + __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h); switch (cpp) { case 1: @@ -79,7 +76,6 @@ i915_fill_blit(struct i915_context *i915, OUT_BATCH(((y + h) << 16) | (x + w)); OUT_RELOC(dst_buffer, I915_USAGE_2D_TARGET, dst_offset); OUT_BATCH(color); - FLUSH_BATCH(NULL); } void @@ -100,11 +96,11 @@ i915_copy_blit(struct i915_context *i915, int dst_x2 = dst_x + w; - I915_DBG(i915, - "%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", - __FUNCTION__, - src_buffer, src_pitch, src_offset, src_x, src_y, - dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); + I915_DBG(DBG_BLIT, + "%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", + __FUNCTION__, + src_buffer, src_pitch, src_offset, src_x, src_y, + dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); switch (cpp) { case 1: @@ -146,5 +142,4 @@ i915_copy_blit(struct i915_context *i915, OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(((int) src_pitch & 0xffff)); OUT_RELOC(src_buffer, I915_USAGE_2D_SOURCE, src_offset); - FLUSH_BATCH(NULL); } diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index acc0ffe037..b210cb130d 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -237,8 +237,6 @@ struct i915_context struct i915_state current; unsigned hardware_dirty; - - unsigned debug; }; /* A flag for each state_tracker state object: @@ -318,8 +316,6 @@ struct pipe_context *i915_create_context(struct pipe_screen *screen, void *priv); - - /*********************************************************************** * Inline conversion functions. These are better-typed than the * macros used previously: @@ -331,5 +327,4 @@ i915_context( struct pipe_context *pipe ) } - #endif diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c index 663fac3055..57d3390dea 100644 --- a/src/gallium/drivers/i915/i915_debug.c +++ b/src/gallium/drivers/i915/i915_debug.c @@ -27,11 +27,37 @@ #include "i915_reg.h" #include "i915_context.h" +#include "i915_screen.h" #include "i915_debug.h" +#include "i915_debug_private.h" #include "i915_batch.h" #include "util/u_debug.h" + +static const struct debug_named_value debug_options[] = { + {"blit", DBG_BLIT, "Print when using the 2d blitter"}, + {"emit", DBG_EMIT, "State emit information"}, + {"atoms", DBG_ATOMS, "Print dirty state atoms"}, + {"flush", DBG_FLUSH, "Flushing information"}, + {"texture", DBG_TEXTURE, "Texture information"}, + {"constants", DBG_CONSTANTS, "Constant buffers"}, + DEBUG_NAMED_VALUE_END +}; + +unsigned i915_debug = 0; + +void i915_debug_init(struct i915_screen *screen) +{ + i915_debug = debug_get_flags_option("I915_DEBUG", debug_options, 0); +} + + + +/*********************************************************************** + * Batchbuffer dumping + */ + static void PRINTF( struct debug_stream *stream, @@ -896,3 +922,66 @@ i915_dump_batchbuffer( struct i915_winsys_batchbuffer *batch ) } + +/*********************************************************************** + * Dirty state atom dumping + */ + +void +i915_dump_dirty(struct i915_context *i915, const char *func) +{ + struct { + unsigned dirty; + const char *name; + } l[] = { + {I915_NEW_VIEWPORT, "viewport"}, + {I915_NEW_RASTERIZER, "rasterizer"}, + {I915_NEW_FS, "fs"}, + {I915_NEW_BLEND, "blend"}, + {I915_NEW_CLIP, "clip"}, + {I915_NEW_SCISSOR, "scissor"}, + {I915_NEW_STIPPLE, "stipple"}, + {I915_NEW_FRAMEBUFFER, "framebuffer"}, + {I915_NEW_ALPHA_TEST, "alpha_test"}, + {I915_NEW_DEPTH_STENCIL, "depth_stencil"}, + {I915_NEW_SAMPLER, "sampler"}, + {I915_NEW_SAMPLER_VIEW, "sampler_view"}, + {I915_NEW_CONSTANTS, "constants"}, + {I915_NEW_VBO, "vbo"}, + {I915_NEW_VS, "vs"}, + {0, NULL}, + }; + int i; + + debug_printf("%s: ", func); + for (i = 0; l[i].name; i++) + if (i915->dirty & l[i].dirty) + debug_printf("%s ", l[i].name); + debug_printf("\n"); +} + +void +i915_dump_hardware_dirty(struct i915_context *i915, const char *func) +{ + struct { + unsigned dirty; + const char *name; + } l[] = { + {I915_HW_STATIC, "static"}, + {I915_HW_DYNAMIC, "dynamic"}, + {I915_HW_SAMPLER, "sampler"}, + {I915_HW_MAP, "map"}, + {I915_HW_PROGRAM, "program"}, + {I915_HW_CONSTANTS, "constants"}, + {I915_HW_IMMEDIATE, "immediate"}, + {I915_HW_INVARIENT, "invarient"}, + {0, NULL}, + }; + int i; + + debug_printf("%s: ", func); + for (i = 0; l[i].name; i++) + if (i915->hardware_dirty & l[i].dirty) + debug_printf("%s ", l[i].name); + debug_printf("\n"); +} diff --git a/src/gallium/drivers/i915/i915_debug.h b/src/gallium/drivers/i915/i915_debug.h index 67b8d9c2f6..fa60799d0c 100644 --- a/src/gallium/drivers/i915/i915_debug.h +++ b/src/gallium/drivers/i915/i915_debug.h @@ -26,89 +26,56 @@ **************************************************************************/ /* Authors: Keith Whitwell <keith@tungstengraphics.com> + * Jakob Bornecrantz <wallbraker@gmail.com> */ #ifndef I915_DEBUG_H #define I915_DEBUG_H -#include <stdarg.h> +#include "util/u_debug.h" +struct i915_screen; struct i915_context; +struct i915_winsys_batchbuffer; -struct debug_stream -{ - unsigned offset; /* current gtt offset */ - char *ptr; /* pointer to gtt offset zero */ - char *end; /* pointer to gtt offset zero */ - unsigned print_addresses; -}; - - -/* Internal functions - */ -void i915_disassemble_program(struct debug_stream *stream, - const unsigned *program, unsigned sz); - -void i915_print_ureg(const char *msg, unsigned ureg); - - -#define DEBUG_BATCH 0x1 -#define DEBUG_BLIT 0x2 -#define DEBUG_BUFFER 0x4 -#define DEBUG_CONSTANTS 0x8 -#define DEBUG_CONTEXT 0x10 -#define DEBUG_DRAW 0x20 -#define DEBUG_DYNAMIC 0x40 -#define DEBUG_FLUSH 0x80 -#define DEBUG_MAP 0x100 -#define DEBUG_PROGRAM 0x200 -#define DEBUG_REGIONS 0x400 -#define DEBUG_SAMPLER 0x800 -#define DEBUG_STATIC 0x1000 -#define DEBUG_SURFACE 0x2000 -#define DEBUG_WINSYS 0x4000 - -#include "pipe/p_compiler.h" +#define DBG_BLIT 0x1 +#define DBG_EMIT 0x2 +#define DBG_ATOMS 0x4 +#define DBG_FLUSH 0x8 +#define DBG_TEXTURE 0x10 +#define DBG_CONSTANTS 0x20 -#if defined(DEBUG) && defined(FILE_DEBUG_FLAG) +extern unsigned i915_debug; -#include "util/u_simple_screen.h" +#ifdef DEBUG +static INLINE boolean +I915_DBG_ON(unsigned flags) +{ + return i915_debug & flags; +} static INLINE void -I915_DBG( - struct i915_context *i915, - const char *fmt, - ... ) +I915_DBG(unsigned flags, const char *fmt, ...) { - if ((i915)->debug & FILE_DEBUG_FLAG) { + if (I915_DBG_ON(flags)) { va_list args; - va_start( args, fmt ); - debug_vprintf( fmt, args ); - va_end( args ); + va_start(args, fmt); + debug_vprintf(fmt, args); + va_end(args); } } - #else - -static INLINE void -I915_DBG( - struct i915_context *i915, - const char *fmt, - ... ) -{ - (void) i915; - (void) fmt; -} - +#define I915_DBG_ON(flags) (0) +static INLINE void I915_DBG(unsigned flags, const char *fmt, ...) {} #endif +void i915_debug_init(struct i915_screen *i915); -struct i915_winsys_batchbuffer; - -void i915_dump_batchbuffer( struct i915_winsys_batchbuffer *i915 ); +void i915_dump_batchbuffer(struct i915_winsys_batchbuffer *i915); -void i915_debug_init( struct i915_context *i915 ); +void i915_dump_dirty(struct i915_context *i915, const char *func); +void i915_dump_hardware_dirty(struct i915_context *i915, const char *func); #endif diff --git a/src/gallium/drivers/i915/i915_debug_fp.c b/src/gallium/drivers/i915/i915_debug_fp.c index f41c51f299..50f49c540f 100644 --- a/src/gallium/drivers/i915/i915_debug_fp.c +++ b/src/gallium/drivers/i915/i915_debug_fp.c @@ -28,6 +28,7 @@ #include "i915_reg.h" #include "i915_debug.h" +#include "i915_debug_private.h" #include "util/u_debug.h" diff --git a/src/gallium/drivers/i915/i915_debug_private.h b/src/gallium/drivers/i915/i915_debug_private.h new file mode 100644 index 0000000000..b3668d0848 --- /dev/null +++ b/src/gallium/drivers/i915/i915_debug_private.h @@ -0,0 +1,45 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef I915_DEBUG_PRIVATE_H +#define I915_DEBUG_PRIVATE_H + +struct debug_stream +{ + unsigned offset; /* current gtt offset */ + char *ptr; /* pointer to gtt offset zero */ + char *end; /* pointer to gtt offset zero */ + unsigned print_addresses; +}; + +void i915_disassemble_program(struct debug_stream *stream, + const unsigned *program, unsigned sz); + +#endif diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c index 1582168eba..a2c70b1199 100644 --- a/src/gallium/drivers/i915/i915_flush.c +++ b/src/gallium/drivers/i915/i915_flush.c @@ -35,11 +35,12 @@ #include "i915_context.h" #include "i915_reg.h" #include "i915_batch.h" +#include "i915_debug.h" -static void i915_flush( struct pipe_context *pipe, - unsigned flags, - struct pipe_fence_handle **fence ) +static void i915_flush_pipe( struct pipe_context *pipe, + unsigned flags, + struct pipe_fence_handle **fence ) { struct i915_context *i915 = i915_context(pipe); @@ -66,21 +67,31 @@ static void i915_flush( struct pipe_context *pipe, } #endif -#if 0 if (i915->batch->map == i915->batch->ptr) { return; } -#endif /* If there are no flags, just flush pending commands to hardware: */ FLUSH_BATCH(fence); i915->vbo_flushed = 1; -} - + I915_DBG(DBG_FLUSH, "%s: #####\n", __FUNCTION__); +} void i915_init_flush_functions( struct i915_context *i915 ) { - i915->base.flush = i915_flush; + i915->base.flush = i915_flush_pipe; +} + +/** + * Here we handle all the notifications that needs to go out on a flush. + * XXX might move above function to i915_pipe_flush.c and leave this here. + */ +void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence) +{ + struct i915_winsys_batchbuffer *batch = i915->batch; + + batch->iws->batchbuffer_flush(batch, fence); + i915->hardware_dirty = ~0; } diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c index f8665acbe1..bd046bd905 100644 --- a/src/gallium/drivers/i915/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915/i915_prim_vbuf.c @@ -52,8 +52,7 @@ #include "i915_state.h" -#undef VBUF_USE_FIFO -#undef VBUF_MAP_BUFFER +#define VBUF_MAP_BUFFER /** * Primitive renderer for i915. @@ -79,23 +78,18 @@ struct i915_vbuf_render { struct i915_winsys_buffer *vbo; size_t vbo_size; /**< current size of allocated buffer */ size_t vbo_alloc_size; /**< minimum buffer size to allocate */ - size_t vbo_offset; + size_t vbo_hw_offset; /**< offset that we program the hardware with */ + size_t vbo_sw_offset; /**< offset that we work with */ + size_t vbo_index; /**< index offset to be added to all indices */ void *vbo_ptr; size_t vbo_max_used; + size_t vbo_max_index; /**< index offset to be added to all indices */ #ifndef VBUF_MAP_BUFFER size_t map_used_start; size_t map_used_end; size_t map_size; #endif - -#ifdef VBUF_USE_FIFO - /* Stuff for the pool */ - struct util_fifo *pool_fifo; - unsigned pool_used; - unsigned pool_buffer_size; - boolean pool_not_used; -#endif }; @@ -109,6 +103,35 @@ i915_vbuf_render(struct vbuf_render *render) return (struct i915_vbuf_render *)render; } +/** + * If vbo state differs between renderer and context + * push state to the context. This function pushes + * hw_offset to i915->vbo_offset and vbo to i915->vbo. + * + * Side effects: + * May updates context vbo_offset and vbo fields. + */ +static void +i915_vbuf_update_vbo_state(struct vbuf_render *render) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + + if (i915->vbo != i915_render->vbo || + i915->vbo_offset != i915_render->vbo_hw_offset) { + i915->vbo = i915_render->vbo; + i915->vbo_offset = i915_render->vbo_hw_offset; + i915->dirty |= I915_NEW_VBO; + } +} + +/** + * Callback exported to the draw module. + * Returns the current vertex_info. + * + * Side effects: + * If state is dirty update derived state. + */ static const struct vertex_info * i915_vbuf_render_get_vertex_info(struct vbuf_render *render) { @@ -123,12 +146,18 @@ i915_vbuf_render_get_vertex_info(struct vbuf_render *render) return &i915->current.vertex_info; } +/** + * Reserve space in the vbo for vertices. + * + * Side effects: + * None. + */ static boolean i915_vbuf_render_reserve(struct i915_vbuf_render *i915_render, size_t size) { struct i915_context *i915 = i915_render->i915; - if (i915_render->vbo_size < size + i915_render->vbo_offset) + if (i915_render->vbo_size < size + i915_render->vbo_sw_offset) return FALSE; if (i915->vbo_flushed) @@ -137,28 +166,28 @@ i915_vbuf_render_reserve(struct i915_vbuf_render *i915_render, size_t size) return TRUE; } +/** + * Allocate a new vbo buffer should there not be enough space for + * the requested number of vertices by the draw module. + * + * Side effects: + * Updates hw_offset, sw_offset, index and allocates a new buffer. + */ static void i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) { struct i915_context *i915 = i915_render->i915; struct i915_winsys *iws = i915->iws; - if (i915_render->vbo) { -#ifdef VBUF_USE_FIFO - if (i915_render->pool_not_used) - iws->buffer_destroy(iws, i915_render->vbo); - else - u_fifo_add(i915_render->pool_fifo, i915_render->vbo); - i915_render->vbo = NULL; -#else + if (i915_render->vbo) iws->buffer_destroy(iws, i915_render->vbo); -#endif - } i915->vbo_flushed = 0; i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size); - i915_render->vbo_offset = 0; + i915_render->vbo_hw_offset = 0; + i915_render->vbo_sw_offset = 0; + i915_render->vbo_index = 0; #ifndef VBUF_MAP_BUFFER if (i915_render->vbo_size > i915_render->map_size) { @@ -168,52 +197,51 @@ i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) } #endif -#ifdef VBUF_USE_FIFO - if (i915_render->vbo_size != i915_render->pool_buffer_size) { - i915_render->pool_not_used = TRUE; - i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64, - I915_NEW_VERTEX); - } else { - i915_render->pool_not_used = FALSE; - - if (i915_render->pool_used >= 2) { - FLUSH_BATCH(NULL); - i915->vbo_flushed = 0; - i915_render->pool_used = 0; - } - u_fifo_pop(i915_render->pool_fifo, (void**)&i915_render->vbo); - } -#else i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64, I915_NEW_VERTEX); -#endif } +/** + * Callback exported to the draw module. + * + * Side effects: + * Updates hw_offset, sw_offset, index and may allocate + * a new buffer. Also updates may update the vbo state + * on the i915 context. + */ static boolean i915_vbuf_render_allocate_vertices(struct vbuf_render *render, ushort vertex_size, ushort nr_vertices) { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); - struct i915_context *i915 = i915_render->i915; size_t size = (size_t)vertex_size * (size_t)nr_vertices; + size_t offset; - /* FIXME: handle failure */ - assert(!i915->vbo); + /* + * Align sw_offset with first multiple of vertex size from hw_offset. + * Set index to be the multiples from from hw_offset to sw_offset. + * i915_vbuf_render_new_buf will reset index, sw_offset, hw_offset + * when it allocates a new buffer this is correct. + */ + { + offset = i915_render->vbo_sw_offset - i915_render->vbo_hw_offset; + offset = util_align_npot(offset, vertex_size); + i915_render->vbo_sw_offset = i915_render->vbo_hw_offset + offset; + i915_render->vbo_index = offset / vertex_size; + } - if (!i915_vbuf_render_reserve(i915_render, size)) { -#ifdef VBUF_USE_FIFO - /* incase we flushed reset the number of pool buffers used */ - if (i915->vbo_flushed) - i915_render->pool_used = 0; -#endif + if (!i915_vbuf_render_reserve(i915_render, size)) i915_vbuf_render_new_buf(i915_render, size); - } + + /* + * If a new buffer has been alocated sw_offset, + * hw_offset & index will be reset by new_buf + */ i915_render->vertex_size = vertex_size; - i915->vbo = i915_render->vbo; - i915->vbo_offset = i915_render->vbo_offset; - i915->dirty |= I915_NEW_VBO; + + i915_vbuf_update_vbo_state(render); if (!i915_render->vbo) return FALSE; @@ -232,7 +260,7 @@ i915_vbuf_render_map_vertices(struct vbuf_render *render) #ifdef VBUF_MAP_BUFFER i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE); - return (unsigned char *)i915_render->vbo_ptr + i915_render->vbo_offset; + return (unsigned char *)i915_render->vbo_ptr + i915_render->vbo_sw_offset; #else (void)iws; return (unsigned char *)i915_render->vbo_ptr; @@ -248,6 +276,7 @@ i915_vbuf_render_unmap_vertices(struct vbuf_render *render, struct i915_context *i915 = i915_render->i915; struct i915_winsys *iws = i915->iws; + i915_render->vbo_max_index = max_index; i915_render->vbo_max_used = MAX2(i915_render->vbo_max_used, i915_render->vertex_size * (max_index + 1)); #ifdef VBUF_MAP_BUFFER iws->buffer_unmap(iws, i915_render->vbo); @@ -255,13 +284,36 @@ i915_vbuf_render_unmap_vertices(struct vbuf_render *render, i915_render->map_used_start = i915_render->vertex_size * min_index; i915_render->map_used_end = i915_render->vertex_size * (max_index + 1); iws->buffer_write(iws, i915_render->vbo, - i915_render->map_used_start + i915_render->vbo_offset, + i915_render->map_used_start + i915_render->vbo_sw_offset, i915_render->map_used_end - i915_render->map_used_start, (unsigned char *)i915_render->vbo_ptr + i915_render->map_used_start); #endif } +/** + * Ensure that the given max_index given is not larger ushort max. + * If it is larger then ushort max it advanced the hw_offset to the + * same position in the vbo as sw_offset and set index to zero. + * + * Side effects: + * On failure update hw_offset and index. + */ +static void +i915_vbuf_ensure_index_bounds(struct vbuf_render *render, + unsigned max_index) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + + if (max_index + i915_render->vbo_index < ((1 << 17) - 1)) + return; + + i915_render->vbo_hw_offset = i915_render->vbo_sw_offset; + i915_render->vbo_index = 0; + + i915_vbuf_update_vbo_state(render); +} + static boolean i915_vbuf_render_set_primitive(struct vbuf_render *render, unsigned prim) @@ -327,7 +379,9 @@ draw_arrays_generate_indices(struct vbuf_render *render, struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; unsigned i; - unsigned end = start + nr; + unsigned end = start + nr + i915_render->vbo_index; + start += i915_render->vbo_index; + switch(type) { case 0: for (i = start; i+1 < end; i += 2) @@ -391,16 +445,18 @@ draw_arrays_fallback(struct vbuf_render *render, struct i915_context *i915 = i915_render->i915; unsigned nr_indices; + nr_indices = draw_arrays_calc_nr_indices(nr, i915_render->fallback); + if (!nr_indices) + return; + + i915_vbuf_ensure_index_bounds(render, start + nr_indices); + if (i915->dirty) i915_update_derived(i915); if (i915->hardware_dirty) i915_emit_hardware_state(i915); - nr_indices = draw_arrays_calc_nr_indices(nr, i915_render->fallback); - if (!nr_indices) - return; - if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { FLUSH_BATCH(NULL); @@ -415,6 +471,7 @@ draw_arrays_fallback(struct vbuf_render *render, goto out; } } + OUT_BATCH(_3DPRIMITIVE | PRIM_INDIRECT | i915_render->hwprim | @@ -440,6 +497,9 @@ i915_vbuf_render_draw_arrays(struct vbuf_render *render, return; } + i915_vbuf_ensure_index_bounds(render, start + nr); + start += i915_render->vbo_index; + if (i915->dirty) i915_update_derived(i915); @@ -485,35 +545,36 @@ draw_generate_indices(struct vbuf_render *render, struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; unsigned i; + unsigned o = i915_render->vbo_index; switch(type) { case 0: for (i = 0; i + 1 < nr_indices; i += 2) { - OUT_BATCH(indices[i] | indices[i+1] << 16); + OUT_BATCH((o+indices[i]) | (o+indices[i+1]) << 16); } if (i < nr_indices) { - OUT_BATCH(indices[i]); + OUT_BATCH((o+indices[i])); } break; case PIPE_PRIM_LINE_LOOP: if (nr_indices >= 2) { for (i = 1; i < nr_indices; i++) - OUT_BATCH(indices[i-1] | indices[i] << 16); - OUT_BATCH(indices[i-1] | indices[0] << 16); + OUT_BATCH((o+indices[i-1]) | (o+indices[i]) << 16); + OUT_BATCH((o+indices[i-1]) | (o+indices[0]) << 16); } break; case PIPE_PRIM_QUADS: for (i = 0; i + 3 < nr_indices; i += 4) { - OUT_BATCH(indices[i+0] | indices[i+1] << 16); - OUT_BATCH(indices[i+3] | indices[i+1] << 16); - OUT_BATCH(indices[i+2] | indices[i+3] << 16); + OUT_BATCH((o+indices[i+0]) | (o+indices[i+1]) << 16); + OUT_BATCH((o+indices[i+3]) | (o+indices[i+1]) << 16); + OUT_BATCH((o+indices[i+2]) | (o+indices[i+3]) << 16); } break; case PIPE_PRIM_QUAD_STRIP: for (i = 0; i + 3 < nr_indices; i += 2) { - OUT_BATCH(indices[i+0] | indices[i+1] << 16); - OUT_BATCH(indices[i+3] | indices[i+2] << 16); - OUT_BATCH(indices[i+0] | indices[i+3] << 16); + OUT_BATCH((o+indices[i+0]) | (o+indices[i+1]) << 16); + OUT_BATCH((o+indices[i+3]) | (o+indices[i+2]) << 16); + OUT_BATCH((o+indices[i+0]) | (o+indices[i+3]) << 16); } break; default: @@ -558,6 +619,8 @@ i915_vbuf_render_draw_elements(struct vbuf_render *render, if (!nr_indices) return; + i915_vbuf_ensure_index_bounds(render, i915_render->vbo_max_index); + if (i915->dirty) i915_update_derived(i915); @@ -597,14 +660,15 @@ static void i915_vbuf_render_release_vertices(struct vbuf_render *render) { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); - struct i915_context *i915 = i915_render->i915; - - assert(i915->vbo); - i915_render->vbo_offset += i915_render->vbo_max_used; + i915_render->vbo_sw_offset += i915_render->vbo_max_used; i915_render->vbo_max_used = 0; - i915->vbo = NULL; - i915->dirty |= I915_NEW_VBO; + + /* + * Micro optimization, by calling update here we the offset change + * will be picked up on the next pipe_context::draw_*. + */ + i915_vbuf_update_vbo_state(render); } static void @@ -652,7 +716,8 @@ i915_vbuf_render_create(struct i915_context *i915) i915_render->vbo = NULL; i915_render->vbo_ptr = NULL; i915_render->vbo_size = 0; - i915_render->vbo_offset = 0; + i915_render->vbo_hw_offset = 0; + i915_render->vbo_sw_offset = 0; i915_render->vbo_alloc_size = i915_render->base.max_vertex_buffer_bytes * 4; #ifdef VBUF_USE_POOL diff --git a/src/gallium/drivers/i915/i915_public.h b/src/gallium/drivers/i915/i915_public.h new file mode 100644 index 0000000000..588654d608 --- /dev/null +++ b/src/gallium/drivers/i915/i915_public.h @@ -0,0 +1,13 @@ + +#ifndef I915_PUBLIC_H +#define I915_PUBLIC_H + +struct i915_winsys; +struct pipe_screen; + +/** + * Create i915 pipe_screen. + */ +struct pipe_screen * i915_screen_create(struct i915_winsys *iws); + +#endif diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c index 17fcdee379..752ddaae7b 100644 --- a/src/gallium/drivers/i915/i915_resource_texture.c +++ b/src/gallium/drivers/i915/i915_resource_texture.c @@ -42,6 +42,7 @@ #include "i915_resource.h" #include "i915_screen.h" #include "i915_winsys.h" +#include "i915_debug.h" #define DEBUG_TEXTURES 0 @@ -800,12 +801,10 @@ i915_texture_create(struct pipe_screen *screen, ws->buffer_unmap(ws, tex->buffer); #endif -#if DEBUG_TEXTURES - debug_printf("%s: %p size %u, stride %u, blocks (%u, %u)\n", __func__, - tex, (unsigned int)tex_size, tex->stride, - tex->stride / util_format_get_blocksize(tex->b.b.format), - tex->total_nblocksy); -#endif + I915_DBG(DBG_TEXTURE, "%s: %p size %u, stride %u, blocks (%u, %u)\n", __func__, + tex, (unsigned int)tex_size, tex->stride, + tex->stride / util_format_get_blocksize(tex->b.b.format), + tex->total_nblocksy); return &tex->b.b; @@ -846,12 +845,18 @@ i915_texture_from_handle(struct pipe_screen * screen, tex->b.b.screen = screen; tex->stride = stride; + tex->total_nblocksy = align_nblocksy(tex->b.b.format, tex->b.b.height0, 8); i915_texture_set_level_info(tex, 0, 1); i915_texture_set_image_offset(tex, 0, 0, 0, 0); tex->buffer = buffer; + I915_DBG(DBG_TEXTURE, "%s: %p stride %u, blocks (%ux%u)\n", __func__, + tex, tex->stride, + tex->stride / util_format_get_blocksize(tex->b.b.format), + tex->total_nblocksy); + return &tex->b.b; } diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index f82426520c..77345d5f71 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -31,11 +31,13 @@ #include "util/u_string.h" #include "i915_reg.h" +#include "i915_debug.h" #include "i915_context.h" #include "i915_screen.h" #include "i915_surface.h" #include "i915_resource.h" #include "i915_winsys.h" +#include "i915_public.h" /* @@ -330,5 +332,7 @@ i915_screen_create(struct i915_winsys *iws) i915_init_screen_resource_functions(is); i915_init_screen_surface_functions(is); + i915_debug_init(is); + return &is->base; } diff --git a/src/gallium/drivers/i915/i915_state.h b/src/gallium/drivers/i915/i915_state.h index 86c6b0027d..b4074dc35b 100644 --- a/src/gallium/drivers/i915/i915_state.h +++ b/src/gallium/drivers/i915/i915_state.h @@ -35,16 +35,22 @@ struct i915_context; struct i915_tracked_state { + const char *name; + void (*update)(struct i915_context *); unsigned dirty; - void (*update)( struct i915_context * ); }; -void i915_update_immediate( struct i915_context *i915 ); -void i915_update_dynamic( struct i915_context *i915 ); -void i915_update_derived( struct i915_context *i915 ); -void i915_update_samplers( struct i915_context *i915 ); -void i915_update_textures(struct i915_context *i915); +extern struct i915_tracked_state i915_update_vertex_layout; -void i915_emit_hardware_state( struct i915_context *i915 ); +extern struct i915_tracked_state i915_hw_samplers; +extern struct i915_tracked_state i915_hw_sampler_views; +extern struct i915_tracked_state i915_hw_immediate; +extern struct i915_tracked_state i915_hw_dynamic; +extern struct i915_tracked_state i915_hw_fs; +extern struct i915_tracked_state i915_hw_framebuffer; +extern struct i915_tracked_state i915_hw_constants; + +void i915_update_derived(struct i915_context *i915); +void i915_emit_hardware_state(struct i915_context *i915); #endif diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c index 4da46772b5..1d4026a214 100644 --- a/src/gallium/drivers/i915/i915_state_derived.c +++ b/src/gallium/drivers/i915/i915_state_derived.c @@ -32,15 +32,16 @@ #include "draw/draw_vertex.h" #include "i915_context.h" #include "i915_state.h" +#include "i915_debug.h" #include "i915_reg.h" -/** +/*********************************************************************** * Determine the hardware vertex layout. * Depends on vertex/fragment shader state. */ -static void calculate_vertex_layout( struct i915_context *i915 ) +static void calculate_vertex_layout(struct i915_context *i915) { const struct i915_fragment_shader *fs = i915->fs; const enum interp_mode colorInterp = i915->rasterizer->color_interp; @@ -146,37 +147,38 @@ static void calculate_vertex_layout( struct i915_context *i915 ) } } +struct i915_tracked_state i915_update_vertex_layout = { + "vertex_layout", + calculate_vertex_layout, + I915_NEW_RASTERIZER | I915_NEW_FS | I915_NEW_VS +}; -/* Hopefully this will remain quite simple, otherwise need to pull in - * something like the state tracker mechanism. +/*********************************************************************** */ -void i915_update_derived( struct i915_context *i915 ) +static struct i915_tracked_state *atoms[] = { + &i915_update_vertex_layout, + &i915_hw_samplers, + &i915_hw_sampler_views, + &i915_hw_immediate, + &i915_hw_dynamic, + &i915_hw_fs, + &i915_hw_framebuffer, + &i915_hw_constants, + NULL, +}; + +void i915_update_derived(struct i915_context *i915) { - if (i915->dirty & (I915_NEW_RASTERIZER | I915_NEW_FS | I915_NEW_VS)) - calculate_vertex_layout( i915 ); + int i; - if (i915->dirty & (I915_NEW_SAMPLER | I915_NEW_SAMPLER_VIEW)) - i915_update_samplers(i915); + if (I915_DBG_ON(DBG_ATOMS)) + i915_dump_dirty(i915, __FUNCTION__); - if (i915->dirty & I915_NEW_SAMPLER_VIEW) - i915_update_textures(i915); - - if (i915->dirty) - i915_update_immediate( i915 ); - - if (i915->dirty) - i915_update_dynamic( i915 ); - - if (i915->dirty & I915_NEW_FS) { - i915->hardware_dirty |= I915_HW_PROGRAM; /* XXX right? */ - } - - /* HW emit currently references framebuffer state directly: - */ - if (i915->dirty & I915_NEW_FRAMEBUFFER) - i915->hardware_dirty |= I915_HW_STATIC; + for (i = 0; atoms[i]; i++) + if (atoms[i]->dirty & i915->dirty) + atoms[i]->update(i915); i915->dirty = 0; } diff --git a/src/gallium/drivers/i915/i915_state_dynamic.c b/src/gallium/drivers/i915/i915_state_dynamic.c index 9c6723b391..d61a8c3407 100644 --- a/src/gallium/drivers/i915/i915_state_dynamic.c +++ b/src/gallium/drivers/i915/i915_state_dynamic.c @@ -1,8 +1,8 @@ /************************************************************************** - * + * * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,7 +22,7 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ #include "i915_batch.h" @@ -30,14 +30,13 @@ #include "i915_context.h" #include "i915_reg.h" #include "i915_state.h" -#include "util/u_math.h" + #include "util/u_memory.h" #include "util/u_pack_color.h" -#define FILE_DEBUG_FLAG DEBUG_STATE /* State that we have chosen to store in the DYNAMIC segment of the - * i915 indirect state mechanism. + * i915 indirect state mechanism. * * Can't cache these in the way we do the static state, as there is no * start/size in the command packet, instead an 'end' value that gets @@ -47,13 +46,16 @@ * (active) state every time a 4kb boundary is crossed. */ -static INLINE void set_dynamic_indirect( struct i915_context *i915, - unsigned offset, - const unsigned *src, - unsigned dwords ) +static INLINE void set_dynamic_indirect(struct i915_context *i915, + unsigned offset, + const unsigned *src, + unsigned dwords) { unsigned i; + if (!memcmp(src, &i915->current.dynamic[offset], dwords * 4)) + return; + for (i = 0; i < dwords; i++) i915->current.dynamic[offset + i] = src[i]; @@ -61,38 +63,41 @@ static INLINE void set_dynamic_indirect( struct i915_context *i915, } + /*********************************************************************** - * Modes4: stencil masks and logicop + * Modes4: stencil masks and logicop */ -static void upload_MODES4( struct i915_context *i915 ) +static void upload_MODES4(struct i915_context *i915) { unsigned modes4 = 0; - /* I915_NEW_STENCIL */ + /* I915_NEW_STENCIL + */ modes4 |= i915->depth_stencil->stencil_modes4; - /* I915_NEW_BLEND */ + + /* I915_NEW_BLEND + */ modes4 |= i915->blend->modes4; - /* Always, so that we know when state is in-active: + /* Always, so that we know when state is in-active: */ - set_dynamic_indirect( i915, - I915_DYNAMIC_MODES4, - &modes4, - 1 ); + set_dynamic_indirect(i915, + I915_DYNAMIC_MODES4, + &modes4, + 1); } const struct i915_tracked_state i915_upload_MODES4 = { - I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL, - upload_MODES4 + "MODES4", + upload_MODES4, + I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL }; - /*********************************************************************** */ - -static void upload_BFO( struct i915_context *i915 ) +static void upload_BFO(struct i915_context *i915) { unsigned bfo[2]; bfo[0] = i915->depth_stencil->bfo[0]; @@ -101,88 +106,89 @@ static void upload_BFO( struct i915_context *i915 ) if (bfo[0] & BFO_ENABLE_STENCIL_REF) { bfo[0] |= i915->stencil_ref.ref_value[1] << BFO_STENCIL_REF_SHIFT; } - set_dynamic_indirect( i915, - I915_DYNAMIC_BFO_0, - &(bfo[0]), - 2 ); + + set_dynamic_indirect(i915, + I915_DYNAMIC_BFO_0, + &(bfo[0]), + 2); } const struct i915_tracked_state i915_upload_BFO = { - I915_NEW_DEPTH_STENCIL, - upload_BFO + "BFO", + upload_BFO, + I915_NEW_DEPTH_STENCIL }; + /*********************************************************************** */ - - -static void upload_BLENDCOLOR( struct i915_context *i915 ) +static void upload_BLENDCOLOR(struct i915_context *i915) { unsigned bc[2]; - memset( bc, 0, sizeof(bc) ); + memset(bc, 0, sizeof(bc)); - /* I915_NEW_BLEND {_COLOR} + /* I915_NEW_BLEND */ { const float *color = i915->blend_color.color; bc[0] = _3DSTATE_CONST_BLEND_COLOR_CMD; - bc[1] = pack_ui32_float4( color[0], - color[1], - color[2], - color[3] ); + bc[1] = pack_ui32_float4(color[0], + color[1], + color[2], + color[3]); } - set_dynamic_indirect( i915, - I915_DYNAMIC_BC_0, - bc, - 2 ); + set_dynamic_indirect(i915, + I915_DYNAMIC_BC_0, + bc, + 2); } const struct i915_tracked_state i915_upload_BLENDCOLOR = { - I915_NEW_BLEND, - upload_BLENDCOLOR + "BLENDCOLOR", + upload_BLENDCOLOR, + I915_NEW_BLEND }; -/*********************************************************************** - */ -static void upload_IAB( struct i915_context *i915 ) +/*********************************************************************** + */ +static void upload_IAB(struct i915_context *i915) { unsigned iab = i915->blend->iab; - - set_dynamic_indirect( i915, - I915_DYNAMIC_IAB, - &iab, - 1 ); + set_dynamic_indirect(i915, + I915_DYNAMIC_IAB, + &iab, + 1); } const struct i915_tracked_state i915_upload_IAB = { - I915_NEW_BLEND, - upload_IAB + "IAB", + upload_IAB, + I915_NEW_BLEND }; + /*********************************************************************** */ - - - -static void upload_DEPTHSCALE( struct i915_context *i915 ) +static void upload_DEPTHSCALE(struct i915_context *i915) { - set_dynamic_indirect( i915, - I915_DYNAMIC_DEPTHSCALE_0, - &(i915->rasterizer->ds[0].u), - 2 ); + set_dynamic_indirect(i915, + I915_DYNAMIC_DEPTHSCALE_0, + &(i915->rasterizer->ds[0].u), + 2); } const struct i915_tracked_state i915_upload_DEPTHSCALE = { - I915_NEW_RASTERIZER, - upload_DEPTHSCALE + "DEPTHSCALE", + upload_DEPTHSCALE, + I915_NEW_RASTERIZER }; @@ -196,10 +202,9 @@ const struct i915_tracked_state i915_upload_DEPTHSCALE = { * XXX: does stipple pattern need to be adjusted according to * the window position? * - * XXX: possibly need workaround for conform paths test. + * XXX: possibly need workaround for conform paths test. */ - -static void upload_STIPPLE( struct i915_context *i915 ) +static void upload_STIPPLE(struct i915_context *i915) { unsigned st[2]; @@ -210,7 +215,6 @@ static void upload_STIPPLE( struct i915_context *i915 ) */ st[1] |= i915->rasterizer->st; - /* I915_NEW_STIPPLE */ { @@ -225,73 +229,75 @@ static void upload_STIPPLE( struct i915_context *i915 ) /* Not sure what to do about fallbacks, so for now just dont: */ st[1] |= ((p[0] << 0) | - (p[1] << 4) | - (p[2] << 8) | - (p[3] << 12)); + (p[1] << 4) | + (p[2] << 8) | + (p[3] << 12)); } - - set_dynamic_indirect( i915, - I915_DYNAMIC_STP_0, - &st[0], - 2 ); + set_dynamic_indirect(i915, + I915_DYNAMIC_STP_0, + &st[0], + 2); } - const struct i915_tracked_state i915_upload_STIPPLE = { - I915_NEW_RASTERIZER | I915_NEW_STIPPLE, - upload_STIPPLE + "STIPPLE", + upload_STIPPLE, + I915_NEW_RASTERIZER | I915_NEW_STIPPLE }; /*********************************************************************** - * Scissor. + * Scissor enable */ static void upload_SCISSOR_ENABLE( struct i915_context *i915 ) { - set_dynamic_indirect( i915, - I915_DYNAMIC_SC_ENA_0, - &(i915->rasterizer->sc[0]), - 1 ); + set_dynamic_indirect(i915, + I915_DYNAMIC_SC_ENA_0, + &(i915->rasterizer->sc[0]), + 1); } const struct i915_tracked_state i915_upload_SCISSOR_ENABLE = { - I915_NEW_RASTERIZER, - upload_SCISSOR_ENABLE + "SCISSOR ENABLE", + upload_SCISSOR_ENABLE, + I915_NEW_RASTERIZER }; -static void upload_SCISSOR_RECT( struct i915_context *i915 ) +/*********************************************************************** + * Scissor rect + */ +static void upload_SCISSOR_RECT(struct i915_context *i915) { unsigned x1 = i915->scissor.minx; unsigned y1 = i915->scissor.miny; unsigned x2 = i915->scissor.maxx; unsigned y2 = i915->scissor.maxy; unsigned sc[3]; - + sc[0] = _3DSTATE_SCISSOR_RECT_0_CMD; sc[1] = (y1 << 16) | (x1 & 0xffff); sc[2] = (y2 << 16) | (x2 & 0xffff); - set_dynamic_indirect( i915, - I915_DYNAMIC_SC_RECT_0, - &sc[0], - 3 ); + set_dynamic_indirect(i915, + I915_DYNAMIC_SC_RECT_0, + &sc[0], + 3); } - const struct i915_tracked_state i915_upload_SCISSOR_RECT = { - I915_NEW_SCISSOR, - upload_SCISSOR_RECT + "SCISSOR RECT", + upload_SCISSOR_RECT, + I915_NEW_SCISSOR }; - - - +/*********************************************************************** + */ static const struct i915_tracked_state *atoms[] = { &i915_upload_MODES4, &i915_upload_BFO, @@ -306,12 +312,17 @@ static const struct i915_tracked_state *atoms[] = { /* These will be dynamic indirect state commands, but for now just end * up on the batch buffer with everything else. */ -void i915_update_dynamic( struct i915_context *i915 ) +static void update_dynamic(struct i915_context *i915) { int i; for (i = 0; i < Elements(atoms); i++) if (i915->dirty & atoms[i]->dirty) - atoms[i]->update( i915 ); + atoms[i]->update(i915); } +struct i915_tracked_state i915_hw_dynamic = { + "dynamic", + update_dynamic, + ~0 /* all state atoms, becuase we do internal checking */ +}; diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index 22082fece8..7bb7893d93 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -29,6 +29,7 @@ #include "i915_reg.h" #include "i915_context.h" #include "i915_batch.h" +#include "i915_debug.h" #include "i915_reg.h" #include "i915_resource.h" @@ -111,15 +112,20 @@ i915_emit_hardware_state(struct i915_context *i915 ) 3 ) * 3/2; /* plus 50% margin */ -#if 0 - debug_printf("i915_emit_hardware_state: %d dwords, %d relocs\n", dwords, relocs); -#endif - + uintptr_t save_ptr; + size_t save_relocs; + + if (I915_DBG_ON(DBG_ATOMS)) + i915_dump_hardware_dirty(i915, __FUNCTION__); + if(!BEGIN_BATCH(dwords, relocs)) { FLUSH_BATCH(NULL); assert(BEGIN_BATCH(dwords, relocs)); } + save_ptr = (uintptr_t)i915->batch->ptr; + save_relocs = i915->batch->relocs; + /* 14 dwords, 0 relocs */ if (i915->hardware_dirty & I915_HW_INVARIENT) { @@ -169,7 +175,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); OUT_BATCH(0); } - + /* 7 dwords, 1 relocs */ if (i915->hardware_dirty & I915_HW_IMMEDIATE) { @@ -195,7 +201,8 @@ i915_emit_hardware_state(struct i915_context *i915 ) OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S5]); OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S6]); } - + +#if 01 /* I915_MAX_DYNAMIC dwords, 0 relocs */ if (i915->hardware_dirty & I915_HW_DYNAMIC) { @@ -204,7 +211,9 @@ i915_emit_hardware_state(struct i915_context *i915 ) OUT_BATCH(i915->current.dynamic[i]); } } - +#endif + +#if 01 /* 8 dwords, 2 relocs */ if (i915->hardware_dirty & I915_HW_STATIC) { @@ -253,10 +262,10 @@ i915_emit_hardware_state(struct i915_context *i915 ) I915_USAGE_RENDER, depth_surface->offset); } - + { unsigned cformat, zformat = 0; - + if (cbuf_surface) cformat = cbuf_surface->format; else @@ -275,6 +284,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) zformat ); } } +#endif #if 01 /* texture images */ @@ -314,7 +324,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) { if (i915->current.sampler_enable_nr) { int i; - + OUT_BATCH( _3DSTATE_SAMPLER_STATE | (3 * i915->current.sampler_enable_nr) ); @@ -331,9 +341,10 @@ i915_emit_hardware_state(struct i915_context *i915 ) } #endif +#if 01 /* constants */ /* 2 + I915_MAX_CONSTANT*4 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_PROGRAM) + if (i915->hardware_dirty & I915_HW_CONSTANTS) { /* Collate the user-defined constants with the fragment shader's * immediates according to the constant_flags[] array. @@ -370,7 +381,9 @@ i915_emit_hardware_state(struct i915_context *i915 ) } } } +#endif +#if 01 /* Fragment program */ /* i915->current.program_len dwords, 0 relocs */ if (i915->hardware_dirty & I915_HW_PROGRAM) @@ -382,7 +395,9 @@ i915_emit_hardware_state(struct i915_context *i915 ) OUT_BATCH(i915->fs->program[i]); } } +#endif +#if 01 /* drawing surface size */ /* 6 dwords, 0 relocs */ { @@ -398,7 +413,11 @@ i915_emit_hardware_state(struct i915_context *i915 ) OUT_BATCH(0); OUT_BATCH(0); } +#endif + I915_DBG(DBG_EMIT, "%s: used %d dwords, %d relocs\n", __FUNCTION__, + ((uintptr_t)i915->batch->ptr - save_ptr) / 4, + i915->batch->relocs - save_relocs); i915->hardware_dirty = 0; } diff --git a/src/gallium/drivers/i915/i915_state_fpc.c b/src/gallium/drivers/i915/i915_state_fpc.c new file mode 100644 index 0000000000..ec7cec0e47 --- /dev/null +++ b/src/gallium/drivers/i915/i915_state_fpc.c @@ -0,0 +1,59 @@ +/************************************************************************** + * + * Copyright © 2010 Jakob Bornecrantz + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_state.h" + + + +/*********************************************************************** + */ +static void update_hw_constants(struct i915_context *i915) +{ + i915->hardware_dirty |= I915_HW_CONSTANTS; +} + +struct i915_tracked_state i915_hw_constants = { + "hw_constants", + update_hw_constants, + I915_NEW_CONSTANTS | I915_NEW_FS +}; + + + +/*********************************************************************** + */ +static void update_fs(struct i915_context *i915) +{ + i915->hardware_dirty |= I915_HW_PROGRAM; +} + +struct i915_tracked_state i915_hw_fs = { + "fs", + update_fs, + I915_NEW_FS +}; diff --git a/src/gallium/drivers/i915/i915_state_immediate.c b/src/gallium/drivers/i915/i915_state_immediate.c index 8cec699285..f9ade7077f 100644 --- a/src/gallium/drivers/i915/i915_state_immediate.c +++ b/src/gallium/drivers/i915/i915_state_immediate.c @@ -1,8 +1,8 @@ /************************************************************************** - * + * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,13 +22,13 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ /* * Authors: * Keith Whitwell <keith@tungstengraphics.com> */ - + #include "i915_state_inlines.h" #include "i915_context.h" #include "i915_state.h" @@ -46,30 +46,31 @@ /*********************************************************************** - * S0,S1: Vertex buffer state. + * S0,S1: Vertex buffer state. */ static void upload_S0S1(struct i915_context *i915) { unsigned LIS0, LIS1; - /* I915_NEW_VBO */ - /* TODO: re-use vertex buffers here? */ + /* I915_NEW_VBO + */ LIS0 = i915->vbo_offset; - /* I915_NEW_VERTEX_SIZE -- do this where the vertex size is calculated! + /* I915_NEW_VERTEX_SIZE */ + /* XXX do this where the vertex size is calculated! */ { unsigned vertex_size = i915->current.vertex_info.size; LIS1 = ((vertex_size << 24) | - (vertex_size << 16)); + (vertex_size << 16)); } - /* I915_NEW_VBO */ - /* TODO: use a vertex generation number to track vbo changes */ + /* I915_NEW_VBO + */ if (1 || i915->current.immediate[I915_IMMEDIATE_S0] != LIS0 || - i915->current.immediate[I915_IMMEDIATE_S1] != LIS1) + i915->current.immediate[I915_IMMEDIATE_S1] != LIS1) { i915->current.immediate[I915_IMMEDIATE_S0] = LIS0; i915->current.immediate[I915_IMMEDIATE_S1] = LIS1; @@ -78,13 +79,13 @@ static void upload_S0S1(struct i915_context *i915) } const struct i915_tracked_state i915_upload_S0S1 = { - I915_NEW_VBO | I915_NEW_VERTEX_FORMAT, - upload_S0S1 + "imm S0 S1", + upload_S0S1, + I915_NEW_VBO | I915_NEW_VERTEX_FORMAT }; - /*********************************************************************** * S4: Vertex format, rasterization state */ @@ -92,7 +93,8 @@ static void upload_S2S4(struct i915_context *i915) { unsigned LIS2, LIS4; - /* I915_NEW_VERTEX_FORMAT */ + /* I915_NEW_VERTEX_FORMAT + */ { LIS2 = i915->current.vertex_info.hwfmt[1]; LIS4 = i915->current.vertex_info.hwfmt[0]; @@ -113,35 +115,38 @@ static void upload_S2S4(struct i915_context *i915) } } - const struct i915_tracked_state i915_upload_S2S4 = { - I915_NEW_RASTERIZER | I915_NEW_VERTEX_FORMAT, - upload_S2S4 + "imm S2 S4", + upload_S2S4, + I915_NEW_RASTERIZER | I915_NEW_VERTEX_FORMAT }; /*********************************************************************** - * */ -static void upload_S5( struct i915_context *i915 ) +static void upload_S5(struct i915_context *i915) { unsigned LIS5 = 0; + /* I915_NEW_DEPTH_STENCIL + */ LIS5 |= i915->depth_stencil->stencil_LIS5; /* hope it's safe to set stencil ref value even if stencil test is disabled? */ LIS5 |= i915->stencil_ref.ref_value[0] << S5_STENCIL_REF_SHIFT; + /* I915_NEW_BLEND + */ LIS5 |= i915->blend->LIS5; #if 0 - /* I915_NEW_RASTERIZER */ + /* I915_NEW_RASTERIZER + */ if (i915->state.Polygon->OffsetFill) { LIS5 |= S5_GLOBAL_DEPTH_OFFSET_ENABLE; } #endif - if (LIS5 != i915->current.immediate[I915_IMMEDIATE_S5]) { i915->current.immediate[I915_IMMEDIATE_S5] = LIS5; i915->hardware_dirty |= I915_HW_IMMEDIATE; @@ -149,14 +154,16 @@ static void upload_S5( struct i915_context *i915 ) } const struct i915_tracked_state i915_upload_S5 = { - (I915_NEW_DEPTH_STENCIL | I915_NEW_BLEND | I915_NEW_RASTERIZER), - upload_S5 + "imm S5", + upload_S5, + I915_NEW_DEPTH_STENCIL | I915_NEW_BLEND | I915_NEW_RASTERIZER }; + /*********************************************************************** */ -static void upload_S6( struct i915_context *i915 ) +static void upload_S6(struct i915_context *i915) { unsigned LIS6 = (2 << S6_TRISTRIP_PV_SHIFT); @@ -180,14 +187,16 @@ static void upload_S6( struct i915_context *i915 ) } const struct i915_tracked_state i915_upload_S6 = { - I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL | I915_NEW_FRAMEBUFFER, - upload_S6 + "imm s6", + upload_S6, + I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL | I915_NEW_FRAMEBUFFER }; + /*********************************************************************** */ -static void upload_S7( struct i915_context *i915 ) +static void upload_S7(struct i915_context *i915) { unsigned LIS7; @@ -202,11 +211,15 @@ static void upload_S7( struct i915_context *i915 ) } const struct i915_tracked_state i915_upload_S7 = { - I915_NEW_RASTERIZER, - upload_S7 + "imm S7", + upload_S7, + I915_NEW_RASTERIZER }; + +/*********************************************************************** + */ static const struct i915_tracked_state *atoms[] = { &i915_upload_S0S1, &i915_upload_S2S4, @@ -215,13 +228,17 @@ static const struct i915_tracked_state *atoms[] = { &i915_upload_S7 }; -/* - */ -void i915_update_immediate( struct i915_context *i915 ) +static void update_immediate(struct i915_context *i915) { int i; for (i = 0; i < Elements(atoms); i++) if (i915->dirty & atoms[i]->dirty) - atoms[i]->update( i915 ); + atoms[i]->update(i915); } + +struct i915_tracked_state i915_hw_immediate = { + "immediate", + update_immediate, + ~0 /* all state atoms, becuase we do internal checking */ +}; diff --git a/src/gallium/drivers/i915/i915_state_sampler.c b/src/gallium/drivers/i915/i915_state_sampler.c index 77b9bccbb7..4667e0b78d 100644 --- a/src/gallium/drivers/i915/i915_state_sampler.c +++ b/src/gallium/drivers/i915/i915_state_sampler.c @@ -53,17 +53,23 @@ * * So we need to update the map state when we change samplers and * we need to be change the sampler state when map state is changed. - * The first part is done by calling i915_update_texture in - * i915_update_samplers and the second part is done else where in - * code tracking the state changes. + * The first part is done by calling update_texture in update_samplers + * and the second part is done else where in code tracking the state + * changes. + */ + +static void update_map(struct i915_context *i915, + uint unit, + const struct i915_texture *tex, + const struct i915_sampler_state *sampler, + uint state[2]); + + + +/*********************************************************************** + * Samplers */ -static void -i915_update_texture(struct i915_context *i915, - uint unit, - const struct i915_texture *tex, - const struct i915_sampler_state *sampler, - uint state[6]); /** * Compute i915 texture sampling state. * @@ -74,16 +80,13 @@ i915_update_texture(struct i915_context *i915, */ static void update_sampler(struct i915_context *i915, uint unit, - const struct i915_sampler_state *sampler, - const struct i915_texture *tex, - unsigned state[3] ) + const struct i915_sampler_state *sampler, + const struct i915_texture *tex, + unsigned state[3]) { const struct pipe_resource *pt = &tex->b.b; unsigned minlod, lastlod; - /* Need to do this after updating the maps, which call the - * intel_finalize_mipmap_tree and hence can update firstLevel: - */ state[0] = sampler->state[0]; state[1] = sampler->state[1]; state[2] = sampler->state[2]; @@ -118,7 +121,7 @@ static void update_sampler(struct i915_context *i915, wr == PIPE_TEX_WRAP_CLAMP_TO_BORDER)) { if (i915->conformance_mode > 0) { assert(0); - /* sampler->fallback = true; */ + /* sampler->fallback = true; */ /* TODO */ } } @@ -137,8 +140,7 @@ static void update_sampler(struct i915_context *i915, state[1] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT); } - -void i915_update_samplers( struct i915_context *i915 ) +static void update_samplers(struct i915_context *i915) { uint unit; @@ -152,29 +154,38 @@ void i915_update_samplers( struct i915_context *i915 ) if (i915->fragment_sampler_views[unit]) { struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture); - update_sampler( i915, - unit, - i915->sampler[unit], /* sampler state */ - texture, /* texture */ - i915->current.sampler[unit] /* the result */ - ); - i915_update_texture( i915, - unit, - texture, /* texture */ - i915->sampler[unit], /* sampler state */ - i915->current.texbuffer[unit] ); - - i915->current.sampler_enable_nr++; - i915->current.sampler_enable_flags |= (1 << unit); + update_sampler(i915, + unit, + i915->sampler[unit], /* sampler state */ + texture, /* texture */ + i915->current.sampler[unit]); /* the result */ + update_map(i915, + unit, + texture, /* texture */ + i915->sampler[unit], /* sampler state */ + i915->current.texbuffer[unit]); /* the result */ + + i915->current.sampler_enable_nr++; + i915->current.sampler_enable_flags |= (1 << unit); } } i915->hardware_dirty |= I915_HW_SAMPLER | I915_HW_MAP; } +struct i915_tracked_state i915_hw_samplers = { + "samplers", + update_samplers, + I915_NEW_SAMPLER | I915_NEW_SAMPLER_VIEW +}; -static uint -translate_texture_format(enum pipe_format pipeFormat) + + +/*********************************************************************** + * Sampler views + */ + +static uint translate_texture_format(enum pipe_format pipeFormat) { switch (pipeFormat) { case PIPE_FORMAT_L8_UNORM: @@ -226,19 +237,17 @@ translate_texture_format(enum pipe_format pipeFormat) return (MAPSURF_32BIT | MT_32BIT_xI824); default: debug_printf("i915: translate_texture_format() bad image format %x\n", - pipeFormat); + pipeFormat); assert(0); return 0; } } - -static void -i915_update_texture(struct i915_context *i915, - uint unit, - const struct i915_texture *tex, - const struct i915_sampler_state *sampler, - uint state[6]) +static void update_map(struct i915_context *i915, + uint unit, + const struct i915_texture *tex, + const struct i915_sampler_state *sampler, + uint state[2]) { const struct pipe_resource *pt = &tex->b.b; uint format, pitch; @@ -287,9 +296,7 @@ i915_update_texture(struct i915_context *i915, | ((depth - 1) << MS4_VOLUME_DEPTH_SHIFT)); } - -void -i915_update_textures(struct i915_context *i915) +static void update_maps(struct i915_context *i915) { uint unit; @@ -300,13 +307,19 @@ i915_update_textures(struct i915_context *i915) if (i915->fragment_sampler_views[unit]) { struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture); - i915_update_texture( i915, - unit, - texture, /* texture */ - i915->sampler[unit], /* sampler state */ - i915->current.texbuffer[unit] ); + update_map(i915, + unit, + texture, /* texture */ + i915->sampler[unit], /* sampler state */ + i915->current.texbuffer[unit]); } } i915->hardware_dirty |= I915_HW_MAP; } + +struct i915_tracked_state i915_hw_sampler_views = { + "sampler_views", + update_maps, + I915_NEW_SAMPLER_VIEW +}; diff --git a/src/gallium/drivers/i915/i915_state_static.c b/src/gallium/drivers/i915/i915_state_static.c new file mode 100644 index 0000000000..dc9a4c1e2f --- /dev/null +++ b/src/gallium/drivers/i915/i915_state_static.c @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright © 2010 Jakob Bornecrantz + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_state.h" + + + +/*********************************************************************** + * Update framebuffer state + */ +static void update_framebuffer(struct i915_context *i915) +{ + /* HW emit currently references framebuffer state directly: + */ + i915->hardware_dirty |= I915_HW_STATIC; +} + +struct i915_tracked_state i915_hw_framebuffer = { + "framebuffer", + update_framebuffer, + I915_NEW_FRAMEBUFFER +}; diff --git a/src/gallium/drivers/i915/i915_winsys.h b/src/gallium/drivers/i915/i915_winsys.h index 3aba19fe6a..5385e403d2 100644 --- a/src/gallium/drivers/i915/i915_winsys.h +++ b/src/gallium/drivers/i915/i915_winsys.h @@ -222,11 +222,4 @@ struct i915_winsys { void (*destroy)(struct i915_winsys *iws); }; - -/** - * Create i915 pipe_screen. - */ -struct pipe_screen *i915_screen_create(struct i915_winsys *iws); - - #endif diff --git a/src/gallium/drivers/i965/brw_public.h b/src/gallium/drivers/i965/brw_public.h new file mode 100644 index 0000000000..be2cd6b5c4 --- /dev/null +++ b/src/gallium/drivers/i965/brw_public.h @@ -0,0 +1,13 @@ + +#ifndef BRW_PUBLIC_H +#define BRW_PUBLIC_H + +struct brw_winsys_screen; +struct pipe_screen; + +/** + * Create brw AKA i965 pipe_screen. + */ +struct pipe_screen * brw_screen_create(struct brw_winsys_screen *bws); + +#endif diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index 50a446db91..bdfead73cc 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -34,6 +34,7 @@ #include "brw_context.h" #include "brw_screen.h" #include "brw_winsys.h" +#include "brw_public.h" #include "brw_debug.h" #include "brw_resource.h" @@ -350,7 +351,7 @@ brw_destroy_screen(struct pipe_screen *screen) * Create a new brw_screen object */ struct pipe_screen * -brw_create_screen(struct brw_winsys_screen *sws, uint pci_id) +brw_screen_create(struct brw_winsys_screen *sws) { struct brw_screen *bscreen; struct brw_chipset chipset; @@ -365,9 +366,9 @@ brw_create_screen(struct brw_winsys_screen *sws, uint pci_id) memset(&chipset, 0, sizeof chipset); - chipset.pci_id = pci_id; + chipset.pci_id = sws->pci_id; - switch (pci_id) { + switch (chipset.pci_id) { case PCI_CHIP_I965_G: case PCI_CHIP_I965_Q: case PCI_CHIP_I965_G_1: @@ -393,7 +394,7 @@ brw_create_screen(struct brw_winsys_screen *sws, uint pci_id) default: debug_printf("%s: unknown pci id 0x%x, cannot create screen\n", - __FUNCTION__, pci_id); + __FUNCTION__, chipset.pci_id); return NULL; } diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index f30c7f1813..a06f8bb7d6 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -147,6 +147,7 @@ static INLINE void make_reloc(struct brw_winsys_reloc *reloc, struct brw_winsys_screen { + unsigned pci_id; /** * Buffer functions. @@ -261,12 +262,6 @@ bo_reference(struct brw_winsys_buffer **ptr, struct brw_winsys_buffer *buf) } -/** - * Create brw pipe_screen. - */ -struct pipe_screen *brw_create_screen(struct brw_winsys_screen *iws, unsigned pci_id); - - /************************************************************************* * Cooperative dumping between winsys and driver. TODO: make this diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index 9c67759ad0..f7ee55cc1c 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -678,7 +678,7 @@ static void precalc_tex( struct brw_wm_compile *c, struct brw_fp_src src0, struct brw_fp_src sampler ) { - struct brw_fp_src coord = src_undef(); + struct brw_fp_src coord; struct brw_fp_dst tmp = dst_undef(); assert(unit < BRW_MAX_TEX_UNIT); diff --git a/src/gallium/drivers/identity/Makefile b/src/gallium/drivers/identity/Makefile index e32b9102e5..74692d9761 100644 --- a/src/gallium/drivers/identity/Makefile +++ b/src/gallium/drivers/identity/Makefile @@ -6,7 +6,6 @@ LIBNAME = identity C_SOURCES = \ id_objects.c \ id_context.c \ - id_screen.c \ - id_drm.c + id_screen.c include ../../Makefile.template diff --git a/src/gallium/drivers/identity/SConscript b/src/gallium/drivers/identity/SConscript index 2a68891c28..b364e0acc8 100644 --- a/src/gallium/drivers/identity/SConscript +++ b/src/gallium/drivers/identity/SConscript @@ -6,7 +6,6 @@ identity = env.ConvenienceLibrary( target = 'identity', source = [ 'id_context.c', - 'id_drm.c', 'id_objects.c', 'id_screen.c', ]) diff --git a/src/gallium/drivers/identity/id_objects.c b/src/gallium/drivers/identity/id_objects.c index ca4743f9ef..593928f399 100644 --- a/src/gallium/drivers/identity/id_objects.c +++ b/src/gallium/drivers/identity/id_objects.c @@ -120,13 +120,14 @@ identity_sampler_view_create(struct identity_context *id_context, assert(view->texture == id_resource->resource); - id_view = MALLOC(sizeof(struct identity_sampler_view)); + id_view = CALLOC_STRUCT(identity_sampler_view); id_view->base = *view; id_view->base.reference.count = 1; id_view->base.texture = NULL; pipe_resource_reference(&id_view->base.texture, id_resource->resource); id_view->base.context = id_context->pipe; + id_view->sampler_view = view; return &id_view->base; error: @@ -180,8 +181,8 @@ identity_transfer_destroy(struct identity_context *id_context, struct identity_transfer *id_transfer) { pipe_resource_reference(&id_transfer->base.resource, NULL); - id_transfer->pipe->transfer_destroy(id_context->pipe, - id_transfer->transfer); + id_context->pipe->transfer_destroy(id_context->pipe, + id_transfer->transfer); FREE(id_transfer); } diff --git a/src/gallium/drivers/identity/id_objects.h b/src/gallium/drivers/identity/id_objects.h index 5eea10b0b5..e8deabf4fc 100644 --- a/src/gallium/drivers/identity/id_objects.h +++ b/src/gallium/drivers/identity/id_objects.h @@ -65,7 +65,6 @@ struct identity_transfer { struct pipe_transfer base; - struct pipe_context *pipe; struct pipe_transfer *transfer; }; diff --git a/src/gallium/drivers/llvmpipe/.gitignore b/src/gallium/drivers/llvmpipe/.gitignore index a1b6f56e0d..6ebd2b8a63 100644 --- a/src/gallium/drivers/llvmpipe/.gitignore +++ b/src/gallium/drivers/llvmpipe/.gitignore @@ -3,3 +3,5 @@ lp_test_blend lp_test_conv lp_test_format lp_test_printf +lp_test_round +lp_test_sincos diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index ee28179c30..2892b62920 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -18,6 +18,7 @@ C_SOURCES = \ lp_fence.c \ lp_flush.c \ lp_jit.c \ + lp_memory.c \ lp_perf.c \ lp_query.c \ lp_rast.c \ @@ -53,8 +54,12 @@ PROGS := lp_test_format \ lp_test_blend \ lp_test_conv \ lp_test_printf \ + lp_test_round \ lp_test_sincos +# Need this for the lp_test_*.o files +CLEAN_EXTRA = *.o + lp_test_sincos.o : sse_mathfun.h PROGS_DEPS := ../../auxiliary/libgallium.a diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index a1ef71da89..fd6ba1561e 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -1,3 +1,5 @@ +import distutils.version + Import('*') if not env['llvm']: @@ -23,6 +25,16 @@ env.Depends('lp_tile_soa.c', [ '#src/gallium/auxiliary/util/u_format_pack.py', ]) + +# Only enable SSSE3 for lp_tile_soa_sse3.c +ssse3_env = env.Clone() +if env['gcc'] \ + and distutils.version.LooseVersion(env['CCVERSION']) >= distutils.version.LooseVersion('4.3') \ + and env['machine'] in ('x86', 'x86_64') : + ssse3_env.Append(CCFLAGS = ['-mssse3']) +lp_tile_soa_os = ssse3_env.SharedObject('lp_tile_soa.c') + + llvmpipe = env.ConvenienceLibrary( target = 'llvmpipe', source = [ @@ -38,6 +50,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_fence.c', 'lp_flush.c', 'lp_jit.c', + 'lp_memory.c', 'lp_perf.c', 'lp_query.c', 'lp_rast.c', @@ -65,7 +78,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_tex_sample.c', 'lp_texture.c', 'lp_tile_image.c', - 'lp_tile_soa.c', + lp_tile_soa_os, ]) @@ -82,6 +95,9 @@ if env['platform'] != 'embedded': 'sincos', ] + if not msvc: + tests.append('round') + for test in tests: target = env.Program( target = 'lp_test_' + test, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c index 70d08e71f6..09e9833057 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -190,30 +190,27 @@ lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld, enum lp_build_blend_swizzle rgb_swizzle, unsigned alpha_swizzle) { - if(rgb == alpha) { - if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) - return rgb; - if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) - return lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle); + LLVMValueRef swizzled_rgb; + + switch (rgb_swizzle) { + case LP_BUILD_BLEND_SWIZZLE_RGBA: + swizzled_rgb = rgb; + break; + case LP_BUILD_BLEND_SWIZZLE_AAAA: + swizzled_rgb = lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle); + break; + default: + assert(0); + swizzled_rgb = bld->base.undef; } - else { - if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) { - boolean cond[4] = {0, 0, 0, 0}; - cond[alpha_swizzle] = 1; - return lp_build_select_aos(&bld->base, alpha, rgb, cond); - } - if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) { - unsigned char swizzle[4]; - swizzle[0] = alpha_swizzle; - swizzle[1] = alpha_swizzle; - swizzle[2] = alpha_swizzle; - swizzle[3] = alpha_swizzle; - swizzle[alpha_swizzle] += 4; - return lp_build_swizzle2_aos(&bld->base, rgb, alpha, swizzle); - } + + if (rgb != alpha) { + boolean cond[4] = {0, 0, 0, 0}; + cond[alpha_swizzle] = 1; + swizzled_rgb = lp_build_select_aos(&bld->base, alpha, swizzled_rgb, cond); } - assert(0); - return bld->base.undef; + + return swizzled_rgb; } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index 90d2b26f9f..78744da500 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -261,7 +261,7 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) const unsigned interp = bld->interp[attrib]; for(chan = 0; chan < NUM_CHANNELS; ++chan) { if(mask & (1 << chan)) { - LLVMValueRef a = coeff_bld->undef; + LLVMValueRef a; if (interp == LP_INTERP_CONSTANT || interp == LP_INTERP_FACING) { a = bld->a[attrib][chan]; diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 986e604ce7..b2643ab33c 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -83,6 +83,7 @@ struct llvmpipe_context { int so_count[PIPE_MAX_SO_BUFFERS]; int num_buffers; } so_target; + struct pipe_resource *mapped_vs_tex[PIPE_MAX_VERTEX_SAMPLERS]; unsigned num_samplers; unsigned num_fragment_sampler_views; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index 98780d7631..625d0c8a8c 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -43,18 +43,23 @@ /** - * Draw vertex arrays, with optional indexing. + * Draw vertex arrays, with optional indexing, optional instancing. + * All the other drawing functions are implemented in terms of this function. * Basically, map the vertex buffers (and drawing surfaces), then hand off * the drawing to the 'draw' module. */ static void -llvmpipe_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned min_index, - unsigned max_index, - unsigned mode, unsigned start, unsigned count) +llvmpipe_draw_range_elements_instanced(struct pipe_context *pipe, + struct pipe_resource *indexBuffer, + unsigned indexSize, + int indexBias, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) { struct llvmpipe_context *lp = llvmpipe_context(pipe); struct draw_context *draw = lp->draw; @@ -74,9 +79,11 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, /* Map index buffer, if present */ if (indexBuffer) { void *mapped_indexes = llvmpipe_resource_data(indexBuffer); - draw_set_mapped_element_buffer_range(draw, indexSize, indexBias, - min_index, - max_index, + draw_set_mapped_element_buffer_range(draw, + indexSize, + indexBias, + minIndex, + maxIndex, mapped_indexes); } else { @@ -84,9 +91,13 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, draw_set_mapped_element_buffer_range(draw, 0, 0, start, start + count - 1, NULL); } + llvmpipe_prepare_vertex_sampling(lp, + lp->num_vertex_sampler_views, + lp->vertex_sampler_views); /* draw! */ - draw_arrays(draw, mode, start, count); + draw_arrays_instanced(draw, mode, start, count, + startInstance, instanceCount); /* * unmap vertex/index buffers @@ -97,6 +108,7 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, if (indexBuffer) { draw_set_mapped_element_buffer(draw, 0, 0, NULL); } + llvmpipe_cleanup_vertex_sampling(lp); /* * TODO: Flush only when a user vertex/index buffer is present @@ -108,24 +120,102 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, static void +llvmpipe_draw_arrays_instanced(struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + llvmpipe_draw_range_elements_instanced(pipe, + NULL, /* no indexBuffer */ + 0, 0, /* indexSize, indexBias */ + 0, ~0, /* minIndex, maxIndex */ + mode, + start, + count, + startInstance, + instanceCount); +} + + +static void +llvmpipe_draw_elements_instanced(struct pipe_context *pipe, + struct pipe_resource *indexBuffer, + unsigned indexSize, + int indexBias, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + llvmpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, indexBias, + 0, ~0, /* minIndex, maxIndex */ + mode, + start, + count, + startInstance, + instanceCount); +} + + +static void llvmpipe_draw_elements(struct pipe_context *pipe, struct pipe_resource *indexBuffer, unsigned indexSize, int indexBias, - unsigned mode, unsigned start, unsigned count) + unsigned mode, + unsigned start, + unsigned count) +{ + llvmpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, indexBias, + 0, 0xffffffff, /* min, maxIndex */ + mode, start, count, + 0, /* startInstance */ + 1); /* instanceCount */ +} + + +static void +llvmpipe_draw_range_elements(struct pipe_context *pipe, + struct pipe_resource *indexBuffer, + unsigned indexSize, + int indexBias, + unsigned min_index, + unsigned max_index, + unsigned mode, + unsigned start, + unsigned count) { - llvmpipe_draw_range_elements( pipe, indexBuffer, - indexSize, indexBias, - 0, 0xffffffff, - mode, start, count ); + llvmpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, indexBias, + min_index, max_index, + mode, start, count, + 0, /* startInstance */ + 1); /* instanceCount */ } static void -llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count) +llvmpipe_draw_arrays(struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count) { - llvmpipe_draw_elements(pipe, NULL, 0, 0, mode, start, count); + llvmpipe_draw_range_elements_instanced(pipe, + NULL, /* indexBuffer */ + 0, /* indexSize */ + 0, /* indexBias */ + 0, ~0, /* min, maxIndex */ + mode, start, count, + 0, /* startInstance */ + 1); /* instanceCount */ } @@ -135,4 +225,6 @@ llvmpipe_init_draw_funcs(struct llvmpipe_context *llvmpipe) llvmpipe->pipe.draw_arrays = llvmpipe_draw_arrays; llvmpipe->pipe.draw_elements = llvmpipe_draw_elements; llvmpipe->pipe.draw_range_elements = llvmpipe_draw_range_elements; + llvmpipe->pipe.draw_arrays_instanced = llvmpipe_draw_arrays_instanced; + llvmpipe->pipe.draw_elements_instanced = llvmpipe_draw_elements_instanced; } diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c b/src/gallium/drivers/llvmpipe/lp_fence.c index 75d8d2b825..f9805e5d68 100644 --- a/src/gallium/drivers/llvmpipe/lp_fence.c +++ b/src/gallium/drivers/llvmpipe/lp_fence.c @@ -28,7 +28,6 @@ #include "pipe/p_screen.h" #include "util/u_memory.h" -#include "util/u_inlines.h" #include "lp_debug.h" #include "lp_fence.h" @@ -59,7 +58,7 @@ lp_fence_create(unsigned rank) /** Destroy a fence. Called when refcount hits zero. */ -static void +void lp_fence_destroy(struct lp_fence *fence) { pipe_mutex_destroy(fence->mutex); @@ -77,12 +76,10 @@ llvmpipe_fence_reference(struct pipe_screen *screen, struct pipe_fence_handle **ptr, struct pipe_fence_handle *fence) { - struct lp_fence *old = (struct lp_fence *) *ptr; + struct lp_fence **old = (struct lp_fence **) ptr; struct lp_fence *f = (struct lp_fence *) fence; - if (pipe_reference(&old->reference, &f->reference)) { - lp_fence_destroy(old); - } + lp_fence_reference(old, f); } diff --git a/src/gallium/drivers/llvmpipe/lp_fence.h b/src/gallium/drivers/llvmpipe/lp_fence.h index d9270f5784..13358fb99f 100644 --- a/src/gallium/drivers/llvmpipe/lp_fence.h +++ b/src/gallium/drivers/llvmpipe/lp_fence.h @@ -32,6 +32,7 @@ #include "os/os_thread.h" #include "pipe/p_state.h" +#include "util/u_inlines.h" struct pipe_screen; @@ -61,4 +62,21 @@ void llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen); +void +lp_fence_destroy(struct lp_fence *fence); + +static INLINE void +lp_fence_reference(struct lp_fence **ptr, + struct lp_fence *f) +{ + struct lp_fence *old = *ptr; + + if (pipe_reference(&old->reference, &f->reference)) { + lp_fence_destroy(old); + } + + *ptr = f; +} + + #endif /* LP_FENCE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index 0cd288bb73..845292f4ab 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -40,27 +40,19 @@ /** * \param flags bitmask of PIPE_FLUSH_x flags - * \param fence if non-null, returns pointer to a fench which can be waited on + * \param fence if non-null, returns pointer to a fence which can be waited on */ void llvmpipe_flush( struct pipe_context *pipe, - unsigned flags, + unsigned flags, struct pipe_fence_handle **fence ) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); draw_flush(llvmpipe->draw); - if (fence) { - /* if we're going to flush the setup/rasterization modules, emit - * a fence. - * XXX this (and the code below) may need fine tuning... - */ - *fence = lp_setup_fence( llvmpipe->setup ); - } - /* ask the setup module to flush */ - lp_setup_flush(llvmpipe->setup, flags); + lp_setup_flush(llvmpipe->setup, flags, fence); /* Enable to dump BMPs of the color/depth buffers each frame */ if (0) { diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 23aa34ddec..8e6dfb293d 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -103,10 +103,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) elem_types[LP_JIT_CTX_ALPHA_REF] = LLVMFloatType(); elem_types[LP_JIT_CTX_STENCIL_REF_FRONT] = LLVMInt32Type(); elem_types[LP_JIT_CTX_STENCIL_REF_BACK] = LLVMInt32Type(); - elem_types[LP_JIT_CTX_SCISSOR_XMIN] = LLVMFloatType(); - elem_types[LP_JIT_CTX_SCISSOR_YMIN] = LLVMFloatType(); - elem_types[LP_JIT_CTX_SCISSOR_XMAX] = LLVMFloatType(); - elem_types[LP_JIT_CTX_SCISSOR_YMAX] = LLVMFloatType(); elem_types[LP_JIT_CTX_BLEND_COLOR] = LLVMPointerType(LLVMInt8Type(), 0); elem_types[LP_JIT_CTX_TEXTURES] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); @@ -125,18 +121,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, stencil_ref_back, screen->target, context_type, LP_JIT_CTX_STENCIL_REF_BACK); - LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_xmin, - screen->target, context_type, - LP_JIT_CTX_SCISSOR_XMIN); - LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_ymin, - screen->target, context_type, - LP_JIT_CTX_SCISSOR_YMIN); - LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_xmax, - screen->target, context_type, - LP_JIT_CTX_SCISSOR_XMAX); - LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_ymax, - screen->target, context_type, - LP_JIT_CTX_SCISSOR_YMAX); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color, screen->target, context_type, LP_JIT_CTX_BLEND_COLOR); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 8d06e65725..c94189413a 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -89,9 +89,6 @@ struct lp_jit_context uint32_t stencil_ref_front, stencil_ref_back; - /** floats, not ints */ - float scissor_xmin, scissor_ymin, scissor_xmax, scissor_ymax; - /* FIXME: store (also?) in floats */ uint8_t *blend_color; @@ -108,10 +105,6 @@ enum { LP_JIT_CTX_ALPHA_REF, LP_JIT_CTX_STENCIL_REF_FRONT, LP_JIT_CTX_STENCIL_REF_BACK, - LP_JIT_CTX_SCISSOR_XMIN, - LP_JIT_CTX_SCISSOR_YMIN, - LP_JIT_CTX_SCISSOR_XMAX, - LP_JIT_CTX_SCISSOR_YMAX, LP_JIT_CTX_BLEND_COLOR, LP_JIT_CTX_TEXTURES, LP_JIT_CTX_COUNT @@ -130,18 +123,6 @@ enum { #define lp_jit_context_stencil_ref_back_value(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_STENCIL_REF_BACK, "stencil_ref_back") -#define lp_jit_context_scissor_xmin_value(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_XMIN, "scissor_xmin") - -#define lp_jit_context_scissor_ymin_value(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_YMIN, "scissor_ymin") - -#define lp_jit_context_scissor_xmax_value(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_XMAX, "scissor_xmax") - -#define lp_jit_context_scissor_ymax_value(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_YMAX, "scissor_ymax") - #define lp_jit_context_blend_color(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_BLEND_COLOR, "blend_color") @@ -160,12 +141,7 @@ typedef void const void *dady, uint8_t **color, void *depth, - const int32_t c1, - const int32_t c2, - const int32_t c3, - const int32_t *step1, - const int32_t *step2, - const int32_t *step3, + uint32_t mask, uint32_t *counter); diff --git a/src/gallium/drivers/llvmpipe/lp_memory.c b/src/gallium/drivers/llvmpipe/lp_memory.c new file mode 100644 index 0000000000..0f55d4a80a --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_memory.c @@ -0,0 +1,45 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_debug.h" +#include "lp_limits.h" +#include "lp_memory.h" + +/** + * 32bpp RGBA swizzled tiles. One for for each thread and each + * possible colorbuf. Adds up to quite a bit 8*8*64*64*4 == 1MB. + * Several schemes exist to reduce this, such as scaling back the + * number of threads or using a smaller tilesize when multiple + * colorbuffers are bound. + */ +PIPE_ALIGN_VAR(16) uint8_t lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4]; + + +/* A single dummy tile used in a couple of out-of-memory situations. + */ +PIPE_ALIGN_VAR(16) uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4]; + diff --git a/src/gallium/drivers/llvmpipe/lp_memory.h b/src/gallium/drivers/llvmpipe/lp_memory.h new file mode 100644 index 0000000000..f7418f5e08 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_memory.h @@ -0,0 +1,40 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef LP_MEMORY_H +#define LP_MEMORY_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "lp_limits.h" + +extern PIPE_ALIGN_VAR(16) uint8_t lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4]; + +extern PIPE_ALIGN_VAR(16) uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4]; + +#endif /* LP_MEMORY_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_perf.c b/src/gallium/drivers/llvmpipe/lp_perf.c index a316597675..083e7e30a5 100644 --- a/src/gallium/drivers/llvmpipe/lp_perf.c +++ b/src/gallium/drivers/llvmpipe/lp_perf.c @@ -46,10 +46,10 @@ lp_print_counters(void) { if (LP_DEBUG & DEBUG_COUNTERS) { unsigned total_64, total_16, total_4; - float p1, p2, p3; + float p1, p2, p3, p4; - debug_printf("llvmpipe: nr_triangles: %9u\n", lp_count.nr_tris); - debug_printf("llvmpipe: nr_culled_triangles: %9u\n", lp_count.nr_culled_tris); + debug_printf("llvmpipe: nr_triangles: %9u\n", lp_count.nr_tris); + debug_printf("llvmpipe: nr_culled_triangles: %9u\n", lp_count.nr_culled_tris); total_64 = (lp_count.nr_empty_64 + lp_count.nr_fully_covered_64 + @@ -58,10 +58,13 @@ lp_print_counters(void) p1 = 100.0 * (float) lp_count.nr_empty_64 / (float) total_64; p2 = 100.0 * (float) lp_count.nr_fully_covered_64 / (float) total_64; p3 = 100.0 * (float) lp_count.nr_partially_covered_64 / (float) total_64; + p4 = 100.0 * (float) lp_count.nr_shade_opaque_64 / (float) total_64; - debug_printf("llvmpipe: nr_empty_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_64, p1, total_64); - debug_printf("llvmpipe: nr_fully_covered_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_fully_covered_64, p2, total_64); - debug_printf("llvmpipe: nr_partially_covered_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_partially_covered_64, p3, total_64); + debug_printf("llvmpipe: nr_64x64: %9u\n", total_64); + debug_printf("llvmpipe: nr_fully_covered_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_64, p2, total_64); + debug_printf("llvmpipe: nr_shade_opaque_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_shade_opaque_64, p4, total_64); + debug_printf("llvmpipe: nr_partially_covered_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_64, p3, total_64); + debug_printf("llvmpipe: nr_empty_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_64, p1, total_64); total_16 = (lp_count.nr_empty_16 + lp_count.nr_fully_covered_16 + @@ -71,25 +74,27 @@ lp_print_counters(void) p2 = 100.0 * (float) lp_count.nr_fully_covered_16 / (float) total_16; p3 = 100.0 * (float) lp_count.nr_partially_covered_16 / (float) total_16; - debug_printf("llvmpipe: nr_empty_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_16, p1, total_16); - debug_printf("llvmpipe: nr_fully_covered_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_fully_covered_16, p2, total_16); - debug_printf("llvmpipe: nr_partially_covered_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_partially_covered_16, p3, total_16); + debug_printf("llvmpipe: nr_16x16: %9u\n", total_16); + debug_printf("llvmpipe: nr_fully_covered_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_16, p2, total_16); + debug_printf("llvmpipe: nr_partially_covered_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_16, p3, total_16); + debug_printf("llvmpipe: nr_empty_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_16, p1, total_16); total_4 = (lp_count.nr_empty_4 + lp_count.nr_non_empty_4); p1 = 100.0 * (float) lp_count.nr_empty_4 / (float) total_4; p2 = 100.0 * (float) lp_count.nr_non_empty_4 / (float) total_4; - debug_printf("llvmpipe: nr_empty_4x4: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_4, p1, total_4); - debug_printf("llvmpipe: nr_non_empty_4x4: %9u (%2.0f%% of %u)\n", lp_count.nr_non_empty_4, p2, total_4); + debug_printf("llvmpipe: nr_4x4: %9u\n", total_4); + debug_printf("llvmpipe: nr_empty_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_4, p1, total_4); + debug_printf("llvmpipe: nr_non_empty_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_non_empty_4, p2, total_4); - debug_printf("llvmpipe: nr_color_tile_clear: %9u\n", lp_count.nr_color_tile_clear); - debug_printf("llvmpipe: nr_color_tile_load: %9u\n", lp_count.nr_color_tile_load); - debug_printf("llvmpipe: nr_color_tile_store: %9u\n", lp_count.nr_color_tile_store); + debug_printf("llvmpipe: nr_color_tile_clear: %9u\n", lp_count.nr_color_tile_clear); + debug_printf("llvmpipe: nr_color_tile_load: %9u\n", lp_count.nr_color_tile_load); + debug_printf("llvmpipe: nr_color_tile_store: %9u\n", lp_count.nr_color_tile_store); - debug_printf("llvmpipe: nr_llvm_compiles: %u\n", lp_count.nr_llvm_compiles); - debug_printf("llvmpipe: total LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0); - debug_printf("llvmpipe: average LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0 / lp_count.nr_llvm_compiles); + debug_printf("llvmpipe: nr_llvm_compiles: %u\n", lp_count.nr_llvm_compiles); + debug_printf("llvmpipe: total LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0); + debug_printf("llvmpipe: average LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0 / lp_count.nr_llvm_compiles); } } diff --git a/src/gallium/drivers/llvmpipe/lp_perf.h b/src/gallium/drivers/llvmpipe/lp_perf.h index a9629dae3c..4774f64550 100644 --- a/src/gallium/drivers/llvmpipe/lp_perf.h +++ b/src/gallium/drivers/llvmpipe/lp_perf.h @@ -44,6 +44,7 @@ struct lp_counters unsigned nr_empty_64; unsigned nr_fully_covered_64; unsigned nr_partially_covered_64; + unsigned nr_shade_opaque_64; unsigned nr_empty_16; unsigned nr_fully_covered_16; unsigned nr_partially_covered_16; diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c index c902c04684..02eeaf6487 100644 --- a/src/gallium/drivers/llvmpipe/lp_query.c +++ b/src/gallium/drivers/llvmpipe/lp_query.c @@ -48,7 +48,7 @@ static struct llvmpipe_query *llvmpipe_query( struct pipe_query *p ) static struct pipe_query * llvmpipe_create_query(struct pipe_context *pipe, - unsigned type) + unsigned type) { struct llvmpipe_query *pq; @@ -67,6 +67,16 @@ static void llvmpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q) { struct llvmpipe_query *pq = llvmpipe_query(q); + /* query might still be in process if we never waited for the result */ + if (!pq->done) { + struct pipe_fence_handle *fence = NULL; + llvmpipe_flush(pipe, 0, &fence); + if (fence) { + pipe->screen->fence_finish(pipe->screen, fence, 0); + pipe->screen->fence_reference(pipe->screen, &fence, NULL); + } + } + pipe_mutex_destroy(pq->mutex); FREE(pq); } @@ -74,16 +84,26 @@ llvmpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q) static boolean llvmpipe_get_query_result(struct pipe_context *pipe, - struct pipe_query *q, - boolean wait, - void *vresult) + struct pipe_query *q, + boolean wait, + void *vresult) { - struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); struct llvmpipe_query *pq = llvmpipe_query(q); uint64_t *result = (uint64_t *)vresult; if (!pq->done) { - lp_setup_flush(llvmpipe->setup, 0); + if (wait) { + struct pipe_fence_handle *fence = NULL; + llvmpipe_flush(pipe, 0, &fence); + if (fence) { + pipe->screen->fence_finish(pipe->screen, fence, 0); + pipe->screen->fence_reference(pipe->screen, &fence, NULL); + } + } + /* this is a bit inconsequent but should be ok */ + else { + llvmpipe_flush(pipe, 0, NULL); + } } if (pq->done) { diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 50e44dcb2b..654f4ea48e 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -28,6 +28,7 @@ #include <limits.h> #include "util/u_memory.h" #include "util/u_math.h" +#include "util/u_rect.h" #include "util/u_surface.h" #include "lp_scene_queue.h" @@ -66,7 +67,7 @@ lp_rast_begin( struct lp_rasterizer *rast, cbuf->level, cbuf->zslice, LP_TEX_USAGE_READ_WRITE, - LP_TEX_LAYOUT_NONE); + LP_TEX_LAYOUT_LINEAR); } if (fb->zsbuf) { @@ -81,7 +82,6 @@ lp_rast_begin( struct lp_rasterizer *rast, zsbuf->zslice, LP_TEX_USAGE_READ_WRITE, LP_TEX_LAYOUT_NONE); - assert(rast->zsbuf.map); } lp_scene_bin_iter_begin( scene ); @@ -137,7 +137,6 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task, struct lp_rasterizer *rast = task->rast; struct lp_scene *scene = rast->curr_scene; enum lp_texture_usage usage; - unsigned buf; LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); @@ -147,24 +146,8 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task, task->x = x; task->y = y; - if (scene->has_color_clear) - usage = LP_TEX_USAGE_WRITE_ALL; - else - usage = LP_TEX_USAGE_READ_WRITE; - - /* get pointers to color tile(s) */ - for (buf = 0; buf < rast->state.nr_cbufs; buf++) { - struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf]; - struct llvmpipe_resource *lpt; - assert(cbuf); - lpt = llvmpipe_resource(cbuf->texture); - task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt, - cbuf->face + cbuf->zslice, - cbuf->level, - usage, - x, y); - assert(task->color_tiles[buf]); - } + /* reset pointers to color tile(s) */ + memset(task->color_tiles, 0, sizeof(task->color_tiles)); /* get pointer to depth/stencil tile */ { @@ -188,7 +171,7 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task, /* Get actual pointer to the tile data. Note that depth/stencil * data is tiled differently than color data. */ - task->depth_tile = lp_rast_get_depth_block_pointer(rast, x, y); + task->depth_tile = lp_rast_get_depth_block_pointer(task, x, y); assert(task->depth_tile); } @@ -223,7 +206,8 @@ lp_rast_clear_color(struct lp_rasterizer_task *task, clear_color[2] == clear_color[3]) { /* clear to grayscale value {x, x, x, x} */ for (i = 0; i < rast->state.nr_cbufs; i++) { - uint8_t *ptr = task->color_tiles[i]; + uint8_t *ptr = + lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL); memset(ptr, clear_color[0], TILE_SIZE * TILE_SIZE * 4); } } @@ -235,7 +219,8 @@ lp_rast_clear_color(struct lp_rasterizer_task *task, */ const unsigned chunk = TILE_SIZE / 4; for (i = 0; i < rast->state.nr_cbufs; i++) { - uint8_t *c = task->color_tiles[i]; + uint8_t *c = + lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL); unsigned j; for (j = 0; j < 4 * TILE_SIZE; j++) { @@ -286,8 +271,6 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, dst = task->depth_tile; - assert(dst == lp_rast_get_depth_block_pointer(rast, task->x, task->y)); - switch (block_size) { case 1: memset(dst, (uint8_t) clear_value, height * width); @@ -376,8 +359,8 @@ lp_rast_load_color(struct lp_rasterizer_task *task, * This is a bin command which is stored in all bins. */ void -lp_rast_store_color( struct lp_rasterizer_task *task, - const union lp_rast_cmd_arg arg) +lp_rast_store_linear_color( struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { struct lp_rasterizer *rast = task->rast; struct lp_scene *scene = rast->curr_scene; @@ -387,30 +370,20 @@ lp_rast_store_color( struct lp_rasterizer_task *task, struct pipe_surface *cbuf = scene->fb.cbufs[buf]; const unsigned face = cbuf->face, level = cbuf->level; struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture); - /* this will convert the tiled data to linear if needed */ - (void) llvmpipe_get_texture_tile_linear(lpt, face, level, - LP_TEX_USAGE_READ, - task->x, task->y); - } -} - -/** - * This is a bin command called during bin processing. - */ -void -lp_rast_set_state(struct lp_rasterizer_task *task, - const union lp_rast_cmd_arg arg) -{ - const struct lp_rast_state *state = arg.set_state; + if (!task->color_tiles[buf]) + continue; - LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state); - - /* just set the current state pointer for this rasterizer */ - task->current_state = state; + llvmpipe_unswizzle_cbuf_tile(lpt, + face, + level, + task->x, task->y, + task->color_tiles[buf]); + } } + /** * Run the shader on all blocks in a tile. This is used when a tile is * completely contained inside a triangle. @@ -421,8 +394,8 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { struct lp_rasterizer *rast = task->rast; - const struct lp_rast_state *state = task->current_state; const struct lp_rast_shader_inputs *inputs = arg.shade_tile; + const struct lp_rast_state *state = inputs->state; struct lp_fragment_shader_variant *variant = state->variant; const unsigned tile_x = task->x, tile_y = task->y; unsigned x, y; @@ -442,36 +415,60 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, tile_x + x, tile_y + y); /* depth buffer */ - depth = lp_rast_get_depth_block_pointer(rast, tile_x + x, tile_y + y); + depth = lp_rast_get_depth_block_pointer(task, tile_x + x, tile_y + y); /* run shader on 4x4 block */ variant->jit_function[RAST_WHOLE]( &state->jit_context, - tile_x + x, tile_y + y, - inputs->facing, - inputs->a0, - inputs->dadx, - inputs->dady, - color, - depth, - INT_MIN, INT_MIN, INT_MIN, - NULL, NULL, NULL, &task->vis_counter); + tile_x + x, tile_y + y, + inputs->facing, + inputs->a0, + inputs->dadx, + inputs->dady, + color, + depth, + 0xffff, + &task->vis_counter); } } } /** - * Compute shading for a 4x4 block of pixels. + * Run the shader on all blocks in a tile. This is used when a tile is + * completely contained inside a triangle, and the shader is opaque. + * This is a bin command called during bin processing. + */ +void +lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + struct lp_rasterizer *rast = task->rast; + unsigned i; + + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + + /* this will prevent converting the layout from tiled to linear */ + for (i = 0; i < rast->state.nr_cbufs; i++) { + (void)lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL); + } + + lp_rast_shade_tile(task, arg); +} + + +/** + * Compute shading for a 4x4 block of pixels inside a triangle. * This is a bin command called during bin processing. * \param x X position of quad in window coords * \param y Y position of quad in window coords */ -void lp_rast_shade_quads( struct lp_rasterizer_task *task, - const struct lp_rast_shader_inputs *inputs, - unsigned x, unsigned y, - int32_t c1, int32_t c2, int32_t c3) +void +lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, + const struct lp_rast_shader_inputs *inputs, + unsigned x, unsigned y, + unsigned mask) { - const struct lp_rast_state *state = task->current_state; + const struct lp_rast_state *state = inputs->state; struct lp_fragment_shader_variant *variant = state->variant; struct lp_rasterizer *rast = task->rast; uint8_t *color[PIPE_MAX_COLOR_BUFS]; @@ -494,32 +491,26 @@ void lp_rast_shade_quads( struct lp_rasterizer_task *task, } /* depth buffer */ - depth = lp_rast_get_depth_block_pointer(rast, x, y); + depth = lp_rast_get_depth_block_pointer(task, x, y); assert(lp_check_alignment(state->jit_context.blend_color, 16)); - assert(lp_check_alignment(inputs->step[0], 16)); - assert(lp_check_alignment(inputs->step[1], 16)); - assert(lp_check_alignment(inputs->step[2], 16)); - /* run shader on 4x4 block */ - variant->jit_function[RAST_EDGE_TEST]( &state->jit_context, - x, y, - inputs->facing, - inputs->a0, - inputs->dadx, - inputs->dady, - color, - depth, - c1, c2, c3, - inputs->step[0], - inputs->step[1], - inputs->step[2], - &task->vis_counter); + variant->jit_function[RAST_EDGE_TEST](&state->jit_context, + x, y, + inputs->facing, + inputs->a0, + inputs->dadx, + inputs->dady, + color, + depth, + mask, + &task->vis_counter); } + /** * Set top row and left column of the tile's pixels to white. For debugging. */ @@ -598,6 +589,11 @@ lp_rast_tile_end(struct lp_rasterizer_task *task) (void) outline_subtiles; #endif + { + union lp_rast_cmd_arg dummy = {0}; + lp_rast_store_linear_color(task, dummy); + } + /* debug */ memset(task->color_tiles, 0, sizeof(task->color_tiles)); task->depth_tile = NULL; @@ -627,7 +623,7 @@ void lp_rast_begin_query(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { - /* Reset the the per-task counter */ + /* Reset the per-task counter */ task->vis_counter = 0; } @@ -715,10 +711,16 @@ static struct { { RAST(clear_color), RAST(clear_zstencil), - RAST(triangle), + RAST(triangle_1), + RAST(triangle_2), + RAST(triangle_3), + RAST(triangle_4), + RAST(triangle_5), + RAST(triangle_6), + RAST(triangle_7), RAST(shade_tile), - RAST(set_state), - RAST(store_color), + RAST(shade_tile_opaque), + RAST(store_linear_color), RAST(fence), RAST(begin_query), RAST(end_query), @@ -754,30 +756,8 @@ debug_bin( const struct cmd_bin *bin ) static boolean is_empty_bin( const struct cmd_bin *bin ) { - const struct cmd_block *head = bin->commands.head; - int i; - - if (0) - debug_bin(bin); - - /* We emit at most two load-tile commands at the start of the first - * command block. In addition we seem to emit a couple of - * set-state commands even in empty bins. - * - * As a heuristic, if a bin has more than 4 commands, consider it - * non-empty. - */ - if (head->next != NULL || - head->count > 4) { - return FALSE; - } - - for (i = 0; i < head->count; i++) - if (head->cmd[i] != lp_rast_set_state) { - return FALSE; - } - - return TRUE; + if (0) debug_bin(bin); + return bin->commands.head->count == 0; } @@ -813,6 +793,10 @@ rasterize_scene(struct lp_rasterizer_task *task, } } #endif + + if (scene->fence) { + lp_rast_fence(task, lp_rast_arg_fence(scene->fence)); + } } @@ -983,6 +967,10 @@ lp_rast_create( unsigned num_threads ) /* for synchronizing rasterization threads */ pipe_barrier_init( &rast->barrier, rast->num_threads ); + memset(lp_swizzled_cbuf, 0, sizeof lp_swizzled_cbuf); + + memset(lp_dummy_tile, 0, sizeof lp_dummy_tile); + return rast; } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 80ca68f5a2..eaf2a6f334 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -84,8 +84,7 @@ struct lp_rast_shader_inputs { float (*dadx)[4]; float (*dady)[4]; - /* edge/step info for 3 edges and 4x4 block of pixels */ - PIPE_ALIGN_VAR(16) int step[3][16]; + const struct lp_rast_state *state; }; struct lp_rast_clearzs { @@ -93,6 +92,22 @@ struct lp_rast_clearzs { unsigned clearzs_mask; }; +struct lp_rast_plane { + /* one-pixel sized trivial accept offsets for each plane */ + int ei; + + /* one-pixel sized trivial reject offsets for each plane */ + int eo; + + /* edge function values at minx,miny ?? */ + int c; + + int dcdx; + int dcdy; + + /* edge/step info for 3 edges and 4x4 block of pixels */ + const int *step; +}; /** * Rasterization information for a triangle known to be in this bin, @@ -101,35 +116,16 @@ struct lp_rast_clearzs { * Objects of this type are put into the lp_setup_context::data buffer. */ struct lp_rast_triangle { + /* inputs for the shader */ + struct lp_rast_shader_inputs inputs; + + int step[3][16]; + #ifdef DEBUG float v[3][2]; #endif - /* one-pixel sized trivial accept offsets for each plane */ - int ei1; - int ei2; - int ei3; - - /* one-pixel sized trivial reject offsets for each plane */ - int eo1; - int eo2; - int eo3; - - /* y deltas for vertex pairs (in fixed pt) */ - int dy12; - int dy23; - int dy31; - - /* x deltas for vertex pairs (in fixed pt) */ - int dx12; - int dx23; - int dx31; - - /* edge function values at minx,miny ?? */ - int c1, c2, c3; - - /* inputs for the shader */ - PIPE_ALIGN_VAR(16) struct lp_rast_shader_inputs inputs; + struct lp_rast_plane plane[7]; /* NOTE: may allocate fewer planes */ }; @@ -153,7 +149,10 @@ lp_rast_finish( struct lp_rasterizer *rast ); union lp_rast_cmd_arg { const struct lp_rast_shader_inputs *shade_tile; - const struct lp_rast_triangle *triangle; + struct { + const struct lp_rast_triangle *tri; + unsigned plane_mask; + } triangle; const struct lp_rast_state *set_state; uint8_t clear_color[4]; const struct lp_rast_clearzs *clear_zstencil; @@ -173,10 +172,12 @@ lp_rast_arg_inputs( const struct lp_rast_shader_inputs *shade_tile ) } static INLINE union lp_rast_cmd_arg -lp_rast_arg_triangle( const struct lp_rast_triangle *triangle ) +lp_rast_arg_triangle( const struct lp_rast_triangle *triangle, + unsigned plane_mask) { union lp_rast_cmd_arg arg; - arg.triangle = triangle; + arg.triangle.tri = triangle; + arg.triangle.plane_mask = plane_mask; return arg; } @@ -226,19 +227,31 @@ void lp_rast_clear_color( struct lp_rasterizer_task *, void lp_rast_clear_zstencil( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); -void lp_rast_set_state( struct lp_rasterizer_task *, - const union lp_rast_cmd_arg ); - -void lp_rast_triangle( struct lp_rasterizer_task *, - const union lp_rast_cmd_arg ); +void lp_rast_triangle_1( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_2( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_3( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_4( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_5( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_6( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_7( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); void lp_rast_shade_tile( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); +void lp_rast_shade_tile_opaque( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + void lp_rast_fence( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); -void lp_rast_store_color( struct lp_rasterizer_task *, +void lp_rast_store_linear_color( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index d33dd49f3a..b4a48cfd02 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -31,6 +31,7 @@ #include "os/os_thread.h" #include "util/u_format.h" #include "gallivm/lp_bld_debug.h" +#include "lp_memory.h" #include "lp_rast.h" #include "lp_scene.h" #include "lp_state.h" @@ -52,8 +53,6 @@ struct lp_rasterizer_task uint8_t *color_tiles[PIPE_MAX_COLOR_BUFS]; uint8_t *depth_tile; - const struct lp_rast_state *current_state; - /** "back" pointer */ struct lp_rasterizer *rast; @@ -118,10 +117,12 @@ struct lp_rasterizer }; -void lp_rast_shade_quads( struct lp_rasterizer_task *task, - const struct lp_rast_shader_inputs *inputs, - unsigned x, unsigned y, - int32_t c1, int32_t c2, int32_t c3); +void +lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, + const struct lp_rast_shader_inputs *inputs, + unsigned x, unsigned y, + unsigned mask); + /** @@ -132,18 +133,23 @@ void lp_rast_shade_quads( struct lp_rasterizer_task *task, * \param x, y location of 4x4 block in window coords */ static INLINE void * -lp_rast_get_depth_block_pointer(const struct lp_rasterizer *rast, +lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task, unsigned x, unsigned y) { + const struct lp_rasterizer *rast = task->rast; void *depth; assert((x % TILE_VECTOR_WIDTH) == 0); assert((y % TILE_VECTOR_HEIGHT) == 0); - assert(rast->zsbuf.map || !rast->curr_scene->fb.zsbuf); - - if (!rast->zsbuf.map) - return NULL; + if (!rast->zsbuf.map) { + /* Either out of memory or no zsbuf. Can't tell without access + * to the state. Just use dummy tile memory, but don't print + * the oom warning as this most likely because there is no + * zsbuf. + */ + return lp_dummy_tile; + } depth = (rast->zsbuf.map + rast->zsbuf.stride * y + @@ -155,6 +161,39 @@ lp_rast_get_depth_block_pointer(const struct lp_rasterizer *rast, /** + * Get pointer to the swizzled color tile + */ +static INLINE uint8_t * +lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task, + unsigned buf, enum lp_texture_usage usage) +{ + struct lp_rasterizer *rast = task->rast; + + assert(task->x % TILE_SIZE == 0); + assert(task->y % TILE_SIZE == 0); + assert(buf < rast->state.nr_cbufs); + + if (!task->color_tiles[buf]) { + struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf]; + struct llvmpipe_resource *lpt; + assert(cbuf); + lpt = llvmpipe_resource(cbuf->texture); + task->color_tiles[buf] = lp_swizzled_cbuf[task->thread_index][buf]; + + if (usage != LP_TEX_USAGE_WRITE_ALL) { + llvmpipe_swizzle_cbuf_tile(lpt, + cbuf->face + cbuf->zslice, + cbuf->level, + task->x, task->y, + task->color_tiles[buf]); + } + } + + return task->color_tiles[buf]; +} + + +/** * Get the pointer to a 4x4 color block (within a 64x64 tile). * We'll map the color buffer on demand here. * Note that this may be called even when there's no color buffers - return @@ -171,7 +210,7 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task, assert((x % TILE_VECTOR_WIDTH) == 0); assert((y % TILE_VECTOR_HEIGHT) == 0); - color = task->color_tiles[buf]; + color = lp_rast_get_color_tile_pointer(task, buf, LP_TEX_USAGE_READ_WRITE); assert(color); px = x % TILE_SIZE; @@ -196,8 +235,8 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y ) { - struct lp_rasterizer *rast = task->rast; - const struct lp_rast_state *state = task->current_state; + const struct lp_rasterizer *rast = task->rast; + const struct lp_rast_state *state = inputs->state; struct lp_fragment_shader_variant *variant = state->variant; uint8_t *color[PIPE_MAX_COLOR_BUFS]; void *depth; @@ -207,19 +246,19 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, for (i = 0; i < rast->state.nr_cbufs; i++) color[i] = lp_rast_get_color_block_pointer(task, i, x, y); - depth = lp_rast_get_depth_block_pointer(rast, x, y); + depth = lp_rast_get_depth_block_pointer(task, x, y); /* run shader on 4x4 block */ variant->jit_function[RAST_WHOLE]( &state->jit_context, - x, y, - inputs->facing, - inputs->a0, - inputs->dadx, - inputs->dady, - color, - depth, - INT_MIN, INT_MIN, INT_MIN, - NULL, NULL, NULL, &task->vis_counter ); + x, y, + inputs->facing, + inputs->a0, + inputs->dadx, + inputs->dady, + color, + depth, + 0xffff, + &task->vis_counter ); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index a5f0d14c95..ebe9a8e92b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -113,168 +113,31 @@ block_full_16(struct lp_rasterizer_task *task, block_full_4(task, tri, x + ix, y + iy); } +#define TAG(x) x##_1 +#define NR_PLANES 1 +#include "lp_rast_tri_tmp.h" -/** - * Pass the 4x4 pixel block to the shader function. - * Determination of which of the 16 pixels lies inside the triangle - * will be done as part of the fragment shader. - */ -static void -do_block_4(struct lp_rasterizer_task *task, - const struct lp_rast_triangle *tri, - int x, int y, - int c1, int c2, int c3) -{ - assert(x >= 0); - assert(y >= 0); - - lp_rast_shade_quads(task, &tri->inputs, x, y, -c1, -c2, -c3); -} - - -/** - * Evaluate a 16x16 block of pixels to determine which 4x4 subblocks are in/out - * of the triangle's bounds. - */ -static void -do_block_16(struct lp_rasterizer_task *task, - const struct lp_rast_triangle *tri, - int x, int y, - int c0, int c1, int c2) -{ - unsigned mask = 0; - int eo[3]; - int c[3]; - int i, j; - - assert(x >= 0); - assert(y >= 0); - assert(x % 16 == 0); - assert(y % 16 == 0); - - eo[0] = tri->eo1 * 4; - eo[1] = tri->eo2 * 4; - eo[2] = tri->eo3 * 4; - - c[0] = c0; - c[1] = c1; - c[2] = c2; - - for (j = 0; j < 3; j++) { - const int *step = tri->inputs.step[j]; - const int cx = c[j] + eo[j]; - - /* Mask has bits set whenever we are outside any of the edges. - */ - for (i = 0; i < 16; i++) { - int out = cx + step[i] * 4; - mask |= (out >> 31) & (1 << i); - } - } +#define TAG(x) x##_2 +#define NR_PLANES 2 +#include "lp_rast_tri_tmp.h" - mask = ~mask & 0xffff; - while (mask) { - int i = ffs(mask) - 1; - int px = x + pos_table4[i][0]; - int py = y + pos_table4[i][1]; - int cx1 = c0 + tri->inputs.step[0][i] * 4; - int cx2 = c1 + tri->inputs.step[1][i] * 4; - int cx3 = c2 + tri->inputs.step[2][i] * 4; +#define TAG(x) x##_3 +#define NR_PLANES 3 +#include "lp_rast_tri_tmp.h" - mask &= ~(1 << i); +#define TAG(x) x##_4 +#define NR_PLANES 4 +#include "lp_rast_tri_tmp.h" - /* Don't bother testing if the 4x4 block is entirely in/out of - * the triangle. It's a little faster to do it in the jit code. - */ - LP_COUNT(nr_non_empty_4); - do_block_4(task, tri, px, py, cx1, cx2, cx3); - } -} - - -/** - * Scan the tile in chunks and figure out which pixels to rasterize - * for this triangle. - */ -void -lp_rast_triangle(struct lp_rasterizer_task *task, - const union lp_rast_cmd_arg arg) -{ - const struct lp_rast_triangle *tri = arg.triangle; - const int x = task->x, y = task->y; - int ei[3], eo[3], c[3]; - unsigned outmask, inmask, partial_mask; - unsigned i, j; - - c[0] = tri->c1 + tri->dx12 * y - tri->dy12 * x; - c[1] = tri->c2 + tri->dx23 * y - tri->dy23 * x; - c[2] = tri->c3 + tri->dx31 * y - tri->dy31 * x; - - eo[0] = tri->eo1 * 16; - eo[1] = tri->eo2 * 16; - eo[2] = tri->eo3 * 16; - - ei[0] = tri->ei1 * 16; - ei[1] = tri->ei2 * 16; - ei[2] = tri->ei3 * 16; - - outmask = 0; - inmask = 0xffff; +#define TAG(x) x##_5 +#define NR_PLANES 5 +#include "lp_rast_tri_tmp.h" - for (j = 0; j < 3; j++) { - const int *step = tri->inputs.step[j]; - const int cox = c[j] + eo[j]; - const int cio = ei[j]- eo[j]; +#define TAG(x) x##_6 +#define NR_PLANES 6 +#include "lp_rast_tri_tmp.h" - /* Outmask has bits set whenever we are outside any of the - * edges. - */ - /* Inmask has bits set whenever we are inside all of the edges. - */ - for (i = 0; i < 16; i++) { - int out = cox + step[i] * 16; - int in = out + cio; - outmask |= (out >> 31) & (1 << i); - inmask &= ~((in >> 31) & (1 << i)); - } - } +#define TAG(x) x##_7 +#define NR_PLANES 7 +#include "lp_rast_tri_tmp.h" - assert((outmask & inmask) == 0); - - if (outmask == 0xffff) - return; - - /* Invert mask, so that bits are set whenever we are at least - * partially inside all of the edges: - */ - partial_mask = ~inmask & ~outmask & 0xffff; - - /* Iterate over partials: - */ - while (partial_mask) { - int i = ffs(partial_mask) - 1; - int px = x + pos_table16[i][0]; - int py = y + pos_table16[i][1]; - int cx1 = c[0] + tri->inputs.step[0][i] * 16; - int cx2 = c[1] + tri->inputs.step[1][i] * 16; - int cx3 = c[2] + tri->inputs.step[2][i] * 16; - - partial_mask &= ~(1 << i); - - LP_COUNT(nr_partially_covered_16); - do_block_16(task, tri, px, py, cx1, cx2, cx3); - } - - /* Iterate over fulls: - */ - while (inmask) { - int i = ffs(inmask) - 1; - int px = x + pos_table16[i][0]; - int py = y + pos_table16[i][1]; - - inmask &= ~(1 << i); - - LP_COUNT(nr_fully_covered_16); - block_full_16(task, tri, px, py); - } -} diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h new file mode 100644 index 0000000000..a410c611a3 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -0,0 +1,238 @@ +/************************************************************************** + * + * Copyright 2007-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Rasterization for binned triangles within a tile + */ + + + +/** + * Prototype for a 7 plane rasterizer function. Will codegenerate + * several of these. + * + * XXX: Varients for more/fewer planes. + * XXX: Need ways of dropping planes as we descend. + * XXX: SIMD + */ +static void +TAG(do_block_4)(struct lp_rasterizer_task *task, + const struct lp_rast_triangle *tri, + const struct lp_rast_plane *plane, + int x, int y, + const int *c) +{ + unsigned mask = 0; + int i; + + for (i = 0; i < 16; i++) { + int any_negative = 0; + int j; + + for (j = 0; j < NR_PLANES; j++) + any_negative |= (c[j] - 1 + plane[j].step[i]); + + any_negative >>= 31; + + mask |= (~any_negative) & (1 << i); + } + + /* Now pass to the shader: + */ + if (mask) + lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask); +} + +/** + * Evaluate a 16x16 block of pixels to determine which 4x4 subblocks are in/out + * of the triangle's bounds. + */ +static void +TAG(do_block_16)(struct lp_rasterizer_task *task, + const struct lp_rast_triangle *tri, + const struct lp_rast_plane *plane, + int x, int y, + const int *c) +{ + unsigned outmask, inmask, partmask, partial_mask; + unsigned i, j; + + outmask = 0; /* outside one or more trivial reject planes */ + partmask = 0; /* outside one or more trivial accept planes */ + + for (j = 0; j < NR_PLANES; j++) { + const int *step = plane[j].step; + const int eo = plane[j].eo * 4; + const int ei = plane[j].ei * 4; + const int cox = c[j] + eo; + const int cio = ei - 1 - eo; + + for (i = 0; i < 16; i++) { + int out = cox + step[i] * 4; + int part = out + cio; + outmask |= (out >> 31) & (1 << i); + partmask |= (part >> 31) & (1 << i); + } + } + + if (outmask == 0xffff) + return; + + /* Mask of sub-blocks which are inside all trivial accept planes: + */ + inmask = ~partmask & 0xffff; + + /* Mask of sub-blocks which are inside all trivial reject planes, + * but outside at least one trivial accept plane: + */ + partial_mask = partmask & ~outmask; + + assert((partial_mask & inmask) == 0); + + /* Iterate over partials: + */ + while (partial_mask) { + int i = ffs(partial_mask) - 1; + int px = x + pos_table4[i][0]; + int py = y + pos_table4[i][1]; + int cx[NR_PLANES]; + + for (j = 0; j < NR_PLANES; j++) + cx[j] = c[j] + plane[j].step[i] * 4; + + partial_mask &= ~(1 << i); + + TAG(do_block_4)(task, tri, plane, px, py, cx); + } + + /* Iterate over fulls: + */ + while (inmask) { + int i = ffs(inmask) - 1; + int px = x + pos_table4[i][0]; + int py = y + pos_table4[i][1]; + + inmask &= ~(1 << i); + + block_full_4(task, tri, px, py); + } +} + + +/** + * Scan the tile in chunks and figure out which pixels to rasterize + * for this triangle. + */ +void +TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + const struct lp_rast_triangle *tri = arg.triangle.tri; + unsigned plane_mask = arg.triangle.plane_mask; + const int x = task->x, y = task->y; + struct lp_rast_plane plane[NR_PLANES]; + int c[NR_PLANES]; + unsigned outmask, inmask, partmask, partial_mask; + unsigned i, j, nr_planes = 0; + + while (plane_mask) { + int i = ffs(plane_mask) - 1; + plane[nr_planes] = tri->plane[i]; + plane_mask &= ~(1 << i); + nr_planes++; + }; + + assert(nr_planes == NR_PLANES); + outmask = 0; /* outside one or more trivial reject planes */ + partmask = 0; /* outside one or more trivial accept planes */ + + for (j = 0; j < NR_PLANES; j++) { + const int *step = plane[j].step; + const int eo = plane[j].eo * 16; + const int ei = plane[j].ei * 16; + int cox, cio; + + c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; + cox = c[j] + eo; + cio = ei - 1 - eo; + + for (i = 0; i < 16; i++) { + int out = cox + step[i] * 16; + int part = out + cio; + outmask |= (out >> 31) & (1 << i); + partmask |= (part >> 31) & (1 << i); + } + } + + if (outmask == 0xffff) + return; + + /* Mask of sub-blocks which are inside all trivial accept planes: + */ + inmask = ~partmask & 0xffff; + + /* Mask of sub-blocks which are inside all trivial reject planes, + * but outside at least one trivial accept plane: + */ + partial_mask = partmask & ~outmask; + + assert((partial_mask & inmask) == 0); + + /* Iterate over partials: + */ + while (partial_mask) { + int i = ffs(partial_mask) - 1; + int px = x + pos_table16[i][0]; + int py = y + pos_table16[i][1]; + int cx[NR_PLANES]; + + for (j = 0; j < NR_PLANES; j++) + cx[j] = c[j] + plane[j].step[i] * 16; + + partial_mask &= ~(1 << i); + + LP_COUNT(nr_partially_covered_16); + TAG(do_block_16)(task, tri, plane, px, py, cx); + } + + /* Iterate over fulls: + */ + while (inmask) { + int i = ffs(inmask) - 1; + int px = x + pos_table16[i][0]; + int py = y + pos_table16[i][1]; + + inmask &= ~(1 << i); + + LP_COUNT(nr_fully_covered_16); + block_full_16(task, tri, px, py); + } +} + +#undef TAG +#undef NR_PLANES + diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 845c175cf2..f88a759fe7 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -32,6 +32,7 @@ #include "util/u_simple_list.h" #include "lp_scene.h" #include "lp_scene_queue.h" +#include "lp_fence.h" /** List of texture references */ @@ -162,8 +163,8 @@ lp_scene_reset(struct lp_scene *scene ) /* Free all but last binner command lists: */ - for (i = 0; i < scene->tiles_x; i++) { - for (j = 0; j < scene->tiles_y; j++) { + for (i = 0; i < TILES_X; i++) { + for (j = 0; j < TILES_Y; j++) { lp_scene_bin_reset(scene, i, j); } } @@ -198,6 +199,8 @@ lp_scene_reset(struct lp_scene *scene ) make_empty_list(ref_list); } + lp_fence_reference(&scene->fence, NULL); + scene->scene_size = 0; scene->has_color_clear = FALSE; @@ -303,60 +306,6 @@ lp_scene_is_resource_referenced(const struct lp_scene *scene, } -/** - * Return last command in the bin - */ -static lp_rast_cmd -lp_get_last_command( const struct cmd_bin *bin ) -{ - const struct cmd_block *tail = bin->commands.tail; - const unsigned i = tail->count; - if (i > 0) - return tail->cmd[i - 1]; - else - return NULL; -} - - -/** - * Replace the arg of the last command in the bin. - */ -static void -lp_replace_last_command_arg( struct cmd_bin *bin, - const union lp_rast_cmd_arg arg ) -{ - struct cmd_block *tail = bin->commands.tail; - const unsigned i = tail->count; - assert(i > 0); - tail->arg[i - 1] = arg; -} - - - -/** - * Put a state-change command into all bins. - * If we find that the last command in a bin was also a state-change - * command, we can simply replace that one with the new one. - */ -void -lp_scene_bin_state_command( struct lp_scene *scene, - lp_rast_cmd cmd, - const union lp_rast_cmd_arg arg ) -{ - unsigned i, j; - for (i = 0; i < scene->tiles_x; i++) { - for (j = 0; j < scene->tiles_y; j++) { - struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); - lp_rast_cmd last_cmd = lp_get_last_command(bin); - if (last_cmd == cmd) { - lp_replace_last_command_arg(bin, arg); - } - else { - lp_scene_bin_command( scene, i, j, cmd, arg ); - } - } - } -} /** advance curr_x,y to the next bin */ diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index 4e55d43174..fa1b311fa1 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -112,6 +112,7 @@ struct resource_ref { */ struct lp_scene { struct pipe_context *pipe; + struct lp_fence *fence; /** the framebuffer to render the scene into */ struct pipe_framebuffer_state fb; diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 6432cea862..167cb2ee2e 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -43,6 +43,7 @@ #include "lp_debug.h" #include "lp_public.h" #include "lp_limits.h" +#include "lp_rast.h" #include "state_tracker/sw_winsys.h" @@ -86,7 +87,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: return PIPE_MAX_SAMPLERS; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return 0; + return PIPE_MAX_VERTEX_SAMPLERS; case PIPE_CAP_MAX_COMBINED_SAMPLERS: return PIPE_MAX_SAMPLERS + PIPE_MAX_VERTEX_SAMPLERS; case PIPE_CAP_NPOT_TEXTURES: @@ -166,6 +167,10 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return LP_MAX_TGSI_PREDS; case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: return 1; + case PIPE_CAP_GEOMETRY_SHADER4: + return 1; + case PIPE_CAP_DEPTH_CLAMP: + return 0; default: assert(0); return 0; @@ -294,11 +299,16 @@ llvmpipe_destroy_screen( struct pipe_screen *_screen ) struct llvmpipe_screen *screen = llvmpipe_screen(_screen); struct sw_winsys *winsys = screen->winsys; + if (screen->rast) + lp_rast_destroy(screen->rast); + lp_jit_screen_cleanup(screen); if(winsys->destroy) winsys->destroy(winsys); + pipe_mutex_destroy(screen->rast_mutex); + FREE(screen); } @@ -347,11 +357,6 @@ llvmpipe_create_screen(struct sw_winsys *winsys) lp_jit_screen_init(screen); -#ifdef PIPE_OS_WINDOWS - /* Multithreading not supported on windows until conditions and barriers are - * properly implemented. */ - screen->num_threads = 0; -#else #ifdef PIPE_OS_EMBEDDED screen->num_threads = 0; #else @@ -359,7 +364,14 @@ llvmpipe_create_screen(struct sw_winsys *winsys) #endif screen->num_threads = debug_get_num_option("LP_NUM_THREADS", screen->num_threads); screen->num_threads = MIN2(screen->num_threads, LP_MAX_THREADS); -#endif + + screen->rast = lp_rast_create(screen->num_threads); + if (!screen->rast) { + lp_jit_screen_cleanup(screen); + FREE(screen); + return NULL; + } + pipe_mutex_init(screen->rast_mutex); util_format_s3tc_init(); diff --git a/src/gallium/drivers/llvmpipe/lp_screen.h b/src/gallium/drivers/llvmpipe/lp_screen.h index eb40f6823f..731526dfab 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.h +++ b/src/gallium/drivers/llvmpipe/lp_screen.h @@ -37,6 +37,7 @@ #include "gallivm/lp_bld.h" #include <llvm-c/ExecutionEngine.h> +#include "os/os_thread.h" #include "pipe/p_screen.h" #include "pipe/p_defines.h" @@ -63,6 +64,9 @@ struct llvmpipe_screen /* Increments whenever textures are modified. Contexts can track this. */ unsigned timestamp; + + struct lp_rasterizer *rast; + pipe_mutex rast_mutex; }; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index e8aafee33f..556e571585 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -40,6 +40,7 @@ #include "util/u_memory.h" #include "util/u_pack_color.h" #include "lp_context.h" +#include "lp_memory.h" #include "lp_scene.h" #include "lp_scene_queue.h" #include "lp_texture.h" @@ -63,15 +64,7 @@ struct lp_scene * lp_setup_get_current_scene(struct lp_setup_context *setup) { if (!setup->scene) { - - /* wait for a free/empty scene - */ - setup->scene = lp_scene_dequeue(setup->empty_scenes, TRUE); - - assert(lp_scene_is_empty(setup->scene)); - - lp_scene_begin_binning(setup->scene, - &setup->fb ); + set_scene_state( setup, SETUP_EMPTY ); } return setup->scene; } @@ -159,8 +152,11 @@ static void lp_setup_rasterize_scene( struct lp_setup_context *setup ) { struct lp_scene *scene = lp_setup_get_current_scene(setup); + struct llvmpipe_screen *screen = llvmpipe_screen(scene->pipe->screen); - lp_scene_rasterize(scene, setup->rast); + pipe_mutex_lock(screen->rast_mutex); + lp_scene_rasterize(scene, screen->rast); + pipe_mutex_unlock(screen->rast_mutex); reset_context( setup ); @@ -233,22 +229,36 @@ set_scene_state( struct lp_setup_context *setup, LP_DBG(DEBUG_SETUP, "%s old %d new %d\n", __FUNCTION__, old_state, new_state); switch (new_state) { - case SETUP_ACTIVE: - begin_binning( setup ); + case SETUP_EMPTY: + assert(old_state == SETUP_FLUSHED); + assert(setup->scene == NULL); + + /* wait for a free/empty scene + */ + setup->scene = lp_scene_dequeue(setup->empty_scenes, TRUE); + assert(lp_scene_is_empty(setup->scene)); + lp_scene_begin_binning(setup->scene, + &setup->fb ); break; case SETUP_CLEARED: - if (old_state == SETUP_ACTIVE) { - assert(0); - return; - } + assert(old_state == SETUP_EMPTY); + assert(setup->scene != NULL); break; - + + case SETUP_ACTIVE: + assert(old_state == SETUP_EMPTY || + old_state == SETUP_CLEARED); + assert(setup->scene != NULL); + begin_binning( setup ); + break; + case SETUP_FLUSHED: if (old_state == SETUP_CLEARED) execute_clears( setup ); else lp_setup_rasterize_scene( setup ); + assert(setup->scene == NULL); break; default: @@ -264,23 +274,19 @@ set_scene_state( struct lp_setup_context *setup, */ void lp_setup_flush( struct lp_setup_context *setup, - unsigned flags ) + unsigned flags, + struct pipe_fence_handle **fence) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); if (setup->scene) { - struct lp_scene *scene = lp_setup_get_current_scene(setup); - union lp_rast_cmd_arg dummy = {0}; - - if (flags & (PIPE_FLUSH_SWAPBUFFERS | - PIPE_FLUSH_FRAME)) { - /* Store colors in the linear color buffer(s). - * If we don't do this here, we'll end up converting the tiled - * data to linear in the texture_unmap() function, which will - * not be a parallel/threaded operation as here. + if (fence) { + /* if we're going to flush the setup/rasterization modules, emit + * a fence. */ - lp_scene_bin_everywhere(scene, lp_rast_store_color, dummy); + *fence = lp_setup_fence( setup ); } + } set_scene_state( setup, SETUP_FLUSHED ); @@ -297,6 +303,11 @@ lp_setup_bind_framebuffer( struct lp_setup_context *setup, */ set_scene_state( setup, SETUP_FLUSHED ); + /* + * Ensure the old scene is not reused. + */ + assert(!setup->scene); + /* Set new state. This will be picked up later when we next need a * scene. */ @@ -421,24 +432,27 @@ lp_setup_clear( struct lp_setup_context *setup, struct pipe_fence_handle * lp_setup_fence( struct lp_setup_context *setup ) { - if (setup->num_threads == 0) { + if (setup->scene == NULL) return NULL; - } - else { + else if (setup->num_threads == 0) + return NULL; + else + { struct lp_scene *scene = lp_setup_get_current_scene(setup); - const unsigned rank = lp_scene_get_num_bins( scene ); /* xxx */ - struct lp_fence *fence = lp_fence_create(rank); - - LP_DBG(DEBUG_SETUP, "%s rank %u\n", __FUNCTION__, rank); + const unsigned rank = setup->num_threads; set_scene_state( setup, SETUP_ACTIVE ); + + assert(scene->fence == NULL); + + /* The caller gets a reference, we keep a copy too, so need to + * bump the refcount: + */ + lp_fence_reference(&scene->fence, lp_fence_create(rank)); - /* insert the fence into all command bins */ - lp_scene_bin_everywhere( scene, - lp_rast_fence, - lp_rast_arg_fence(fence) ); + LP_DBG(DEBUG_SETUP, "%s rank %u\n", __FUNCTION__, rank); - return (struct pipe_fence_handle *) fence; + return (struct pipe_fence_handle *) scene->fence; } } @@ -611,6 +625,17 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, LP_TEX_LAYOUT_LINEAR); jit_tex->row_stride[j] = lp_tex->row_stride[j]; jit_tex->img_stride[j] = lp_tex->img_stride[j]; + + if (!jit_tex->data[j]) { + /* out of memory - use dummy tile memory */ + jit_tex->data[j] = lp_dummy_tile; + jit_tex->width = TILE_SIZE; + jit_tex->height = TILE_SIZE; + jit_tex->depth = 1; + jit_tex->last_level = 0; + jit_tex->row_stride[j] = 0; + jit_tex->img_stride[j] = 0; + } } } else { @@ -618,7 +643,6 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, /* * XXX: Where should this be unmapped? */ - struct llvmpipe_screen *screen = llvmpipe_screen(tex->screen); struct sw_winsys *winsys = screen->winsys; jit_tex->data[0] = winsys->displaytarget_map(winsys, lp_tex->dt, @@ -717,28 +741,6 @@ lp_setup_update_state( struct lp_setup_context *setup ) setup->dirty |= LP_SETUP_NEW_FS; } - if (setup->dirty & LP_SETUP_NEW_SCISSOR) { - float *stored; - - stored = lp_scene_alloc_aligned(scene, 4 * sizeof(int32_t), 16); - - if (stored) { - stored[0] = (float) setup->scissor.current.minx; - stored[1] = (float) setup->scissor.current.miny; - stored[2] = (float) setup->scissor.current.maxx; - stored[3] = (float) setup->scissor.current.maxy; - - setup->scissor.stored = stored; - - setup->fs.current.jit_context.scissor_xmin = stored[0]; - setup->fs.current.jit_context.scissor_ymin = stored[1]; - setup->fs.current.jit_context.scissor_xmax = stored[2]; - setup->fs.current.jit_context.scissor_ymax = stored[3]; - } - - setup->dirty |= LP_SETUP_NEW_FS; - } - if(setup->dirty & LP_SETUP_NEW_CONSTANTS) { struct pipe_resource *buffer = setup->constants.current; @@ -792,11 +794,6 @@ lp_setup_update_state( struct lp_setup_context *setup ) &setup->fs.current, sizeof setup->fs.current); setup->fs.stored = stored; - - /* put the state-set command into all bins */ - lp_scene_bin_state_command( scene, - lp_rast_set_state, - lp_rast_arg_state(setup->fs.stored) ); } /* The scene now references the textures in the rasterization @@ -843,8 +840,6 @@ lp_setup_destroy( struct lp_setup_context *setup ) lp_scene_queue_destroy(setup->empty_scenes); - lp_rast_destroy( setup->rast ); - FREE( setup ); } @@ -871,13 +866,7 @@ lp_setup_create( struct pipe_context *pipe, if (!setup->empty_scenes) goto fail; - /* XXX: move this to the screen and share between contexts: - */ setup->num_threads = screen->num_threads; - setup->rast = lp_rast_create(screen->num_threads); - if (!setup->rast) - goto fail; - setup->vbuf = draw_vbuf_stage(draw, &setup->base); if (!setup->vbuf) goto fail; @@ -901,9 +890,6 @@ lp_setup_create( struct pipe_context *pipe, return setup; fail: - if (setup->rast) - lp_rast_destroy( setup->rast ); - if (setup->vbuf) ; @@ -933,6 +919,8 @@ lp_setup_begin_query(struct lp_setup_context *setup, memset(pq->count, 0, sizeof(pq->count)); /* reset all counters */ + set_scene_state( setup, SETUP_ACTIVE ); + cmd_arg.query_obj = pq; lp_scene_bin_everywhere(scene, lp_rast_begin_query, cmd_arg); pq->binned = TRUE; @@ -948,6 +936,8 @@ lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq) struct lp_scene * scene = lp_setup_get_current_scene(setup); union lp_rast_cmd_arg cmd_arg; + set_scene_state( setup, SETUP_ACTIVE ); + cmd_arg.query_obj = pq; lp_scene_bin_everywhere(scene, lp_rast_end_query, cmd_arg); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 6a0dc55129..73b1c85325 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -84,7 +84,8 @@ lp_setup_fence( struct lp_setup_context *setup ); void lp_setup_flush( struct lp_setup_context *setup, - unsigned flags ); + unsigned flags, + struct pipe_fence_handle **fence); void diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index c8b8a2480b..a0606f5034 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -81,7 +81,6 @@ struct lp_setup_context */ struct draw_stage *vbuf; unsigned num_threads; - struct lp_rasterizer *rast; struct lp_scene *scenes[MAX_SCENES]; /**< all the scenes */ struct lp_scene *scene; /**< current scene being built */ struct lp_scene_queue *empty_scenes; /**< queue of empty scenes */ @@ -101,9 +100,10 @@ struct lp_setup_context } clear; enum setup_state { - SETUP_FLUSHED, - SETUP_CLEARED, - SETUP_ACTIVE + SETUP_FLUSHED, /**< scene is null */ + SETUP_EMPTY, /**< scene exists but has only state changes */ + SETUP_CLEARED, /**< scene exists but has only clears */ + SETUP_ACTIVE /**< scene exists and has at least one draw/query */ } state; struct { @@ -129,7 +129,6 @@ struct lp_setup_context struct { struct pipe_scissor_state current; - const void *stored; } scissor; unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 0557d35f8b..7e432503c1 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -38,12 +38,78 @@ #define NUM_CHANNELS 4 +struct tri_info { + + float pixel_offset; + + /* fixed point vertex coordinates */ + int x[3]; + int y[3]; + + /* float x,y deltas - all from the original coordinates + */ + float dy01, dy20; + float dx01, dx20; + float oneoverarea; + + const float (*v0)[4]; + const float (*v1)[4]; + const float (*v2)[4]; + + boolean frontfacing; +}; + + + +static const int step_scissor_minx[16] = { + 0, 1, 0, 1, + 2, 3, 2, 3, + 0, 1, 0, 1, + 2, 3, 2, 3 +}; + +static const int step_scissor_maxx[16] = { + 0, -1, 0, -1, + -2, -3, -2, -3, + 0, -1, 0, -1, + -2, -3, -2, -3 +}; + +static const int step_scissor_miny[16] = { + 0, 0, 1, 1, + 0, 0, 1, 1, + 2, 2, 3, 3, + 2, 2, 3, 3 +}; + +static const int step_scissor_maxy[16] = { + 0, 0, -1, -1, + 0, 0, -1, -1, + -2, -2, -3, -3, + -2, -2, -3, -3 +}; + + + + +static INLINE int +subpixel_snap(float a) +{ + return util_iround(FIXED_ONE * a); +} + +static INLINE float +fixed_to_float(int a) +{ + return a * (1.0 / FIXED_ONE); +} + + /** * Compute a0 for a constant-valued coefficient (GL_FLAT shading). */ -static void constant_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, +static void constant_coef( struct lp_rast_triangle *tri, unsigned slot, const float value, unsigned i ) @@ -54,28 +120,21 @@ static void constant_coef( struct lp_setup_context *setup, } -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a triangle. - */ -static void linear_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, - float oneoverarea, + +static void linear_coef( struct lp_rast_triangle *tri, + const struct tri_info *info, unsigned slot, - const float (*v1)[4], - const float (*v2)[4], - const float (*v3)[4], unsigned vert_attr, unsigned i) { - float a1 = v1[vert_attr][i]; - float a2 = v2[vert_attr][i]; - float a3 = v3[vert_attr][i]; + float a0 = info->v0[vert_attr][i]; + float a1 = info->v1[vert_attr][i]; + float a2 = info->v2[vert_attr][i]; - float da12 = a1 - a2; - float da31 = a3 - a1; - float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea; - float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea; + float da01 = a0 - a1; + float da20 = a2 - a0; + float dadx = (da01 * info->dy20 - info->dy01 * da20) * info->oneoverarea; + float dady = (da20 * info->dx01 - info->dx20 * da01) * info->oneoverarea; tri->inputs.dadx[slot][i] = dadx; tri->inputs.dady[slot][i] = dady; @@ -92,9 +151,9 @@ static void linear_coef( struct lp_setup_context *setup, * to define a0 as the sample at a pixel center somewhere near vmin * instead - i'll switch to this later. */ - tri->inputs.a0[slot][i] = (a1 - - (dadx * (v1[0][0] - setup->pixel_offset) + - dady * (v1[0][1] - setup->pixel_offset))); + tri->inputs.a0[slot][i] = (a0 - + (dadx * (info->v0[0][0] - info->pixel_offset) + + dady * (info->v0[0][1] - info->pixel_offset))); } @@ -106,31 +165,27 @@ static void linear_coef( struct lp_setup_context *setup, * Later, when we compute the value at a particular fragment position we'll * divide the interpolated value by the interpolated W at that fragment. */ -static void perspective_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, - float oneoverarea, +static void perspective_coef( struct lp_rast_triangle *tri, + const struct tri_info *info, unsigned slot, - const float (*v1)[4], - const float (*v2)[4], - const float (*v3)[4], unsigned vert_attr, unsigned i) { /* premultiply by 1/w (v[0][3] is always 1/w): */ - float a1 = v1[vert_attr][i] * v1[0][3]; - float a2 = v2[vert_attr][i] * v2[0][3]; - float a3 = v3[vert_attr][i] * v3[0][3]; - float da12 = a1 - a2; - float da31 = a3 - a1; - float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea; - float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea; + float a0 = info->v0[vert_attr][i] * info->v0[0][3]; + float a1 = info->v1[vert_attr][i] * info->v1[0][3]; + float a2 = info->v2[vert_attr][i] * info->v2[0][3]; + float da01 = a0 - a1; + float da20 = a2 - a0; + float dadx = (da01 * info->dy20 - info->dy01 * da20) * info->oneoverarea; + float dady = (da20 * info->dx01 - info->dx20 * da01) * info->oneoverarea; tri->inputs.dadx[slot][i] = dadx; tri->inputs.dady[slot][i] = dady; - tri->inputs.a0[slot][i] = (a1 - - (dadx * (v1[0][0] - setup->pixel_offset) + - dady * (v1[0][1] - setup->pixel_offset))); + tri->inputs.a0[slot][i] = (a0 - + (dadx * (info->v0[0][0] - info->pixel_offset) + + dady * (info->v0[0][1] - info->pixel_offset))); } @@ -141,13 +196,9 @@ static void perspective_coef( struct lp_setup_context *setup, * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. */ static void -setup_fragcoord_coef(struct lp_setup_context *setup, - struct lp_rast_triangle *tri, - float oneoverarea, +setup_fragcoord_coef(struct lp_rast_triangle *tri, + const struct tri_info *info, unsigned slot, - const float (*v1)[4], - const float (*v2)[4], - const float (*v3)[4], unsigned usage_mask) { /*X*/ @@ -166,12 +217,12 @@ setup_fragcoord_coef(struct lp_setup_context *setup, /*Z*/ if (usage_mask & TGSI_WRITEMASK_Z) { - linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 2); + linear_coef(tri, info, slot, 0, 2); } /*W*/ if (usage_mask & TGSI_WRITEMASK_W) { - linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 3); + linear_coef(tri, info, slot, 0, 3); } } @@ -180,24 +231,23 @@ setup_fragcoord_coef(struct lp_setup_context *setup, * Setup the fragment input attribute with the front-facing value. * \param frontface is the triangle front facing? */ -static void setup_facing_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, +static void setup_facing_coef( struct lp_rast_triangle *tri, unsigned slot, boolean frontface, unsigned usage_mask) { /* convert TRUE to 1.0 and FALSE to -1.0 */ if (usage_mask & TGSI_WRITEMASK_X) - constant_coef( setup, tri, slot, 2.0f * frontface - 1.0f, 0 ); + constant_coef( tri, slot, 2.0f * frontface - 1.0f, 0 ); if (usage_mask & TGSI_WRITEMASK_Y) - constant_coef( setup, tri, slot, 0.0f, 1 ); /* wasted */ + constant_coef( tri, slot, 0.0f, 1 ); /* wasted */ if (usage_mask & TGSI_WRITEMASK_Z) - constant_coef( setup, tri, slot, 0.0f, 2 ); /* wasted */ + constant_coef( tri, slot, 0.0f, 2 ); /* wasted */ if (usage_mask & TGSI_WRITEMASK_W) - constant_coef( setup, tri, slot, 0.0f, 3 ); /* wasted */ + constant_coef( tri, slot, 0.0f, 3 ); /* wasted */ } @@ -206,11 +256,7 @@ static void setup_facing_coef( struct lp_setup_context *setup, */ static void setup_tri_coefficients( struct lp_setup_context *setup, struct lp_rast_triangle *tri, - float oneoverarea, - const float (*v1)[4], - const float (*v2)[4], - const float (*v3)[4], - boolean frontface) + const struct tri_info *info) { unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; unsigned slot; @@ -227,25 +273,25 @@ static void setup_tri_coefficients( struct lp_setup_context *setup, if (setup->flatshade_first) { for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(setup, tri, slot+1, v1[vert_attr][i], i); + constant_coef(tri, slot+1, info->v0[vert_attr][i], i); } else { for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(setup, tri, slot+1, v3[vert_attr][i], i); + constant_coef(tri, slot+1, info->v2[vert_attr][i], i); } break; case LP_INTERP_LINEAR: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - linear_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); + linear_coef(tri, info, slot+1, vert_attr, i); break; case LP_INTERP_PERSPECTIVE: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - perspective_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); + perspective_coef(tri, info, slot+1, vert_attr, i); fragcoord_usage_mask |= TGSI_WRITEMASK_W; break; @@ -259,7 +305,7 @@ static void setup_tri_coefficients( struct lp_setup_context *setup, break; case LP_INTERP_FACING: - setup_facing_coef(setup, tri, slot+1, frontface, usage_mask); + setup_facing_coef(tri, slot+1, info->frontfacing, usage_mask); break; default: @@ -269,16 +315,11 @@ static void setup_tri_coefficients( struct lp_setup_context *setup, /* The internal position input is in slot zero: */ - setup_fragcoord_coef(setup, tri, oneoverarea, 0, v1, v2, v3, - fragcoord_usage_mask); + setup_fragcoord_coef(tri, info, 0, fragcoord_usage_mask); } -static INLINE int subpixel_snap( float a ) -{ - return util_iround(FIXED_ONE * a - (FIXED_ONE / 2)); -} @@ -291,21 +332,23 @@ static INLINE int subpixel_snap( float a ) * \return pointer to triangle space */ static INLINE struct lp_rast_triangle * -alloc_triangle(struct lp_scene *scene, unsigned nr_inputs, unsigned *tri_size) +alloc_triangle(struct lp_scene *scene, + unsigned nr_inputs, + unsigned nr_planes, + unsigned *tri_size) { unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float); struct lp_rast_triangle *tri; - unsigned bytes; + unsigned tri_bytes, bytes; char *inputs; - assert(sizeof(*tri) % 16 == 0); - - bytes = sizeof(*tri) + (3 * input_array_sz); + tri_bytes = align(Offset(struct lp_rast_triangle, plane[nr_planes]), 16); + bytes = tri_bytes + (3 * input_array_sz); tri = lp_scene_alloc_aligned( scene, bytes, 16 ); if (tri) { - inputs = (char *) (tri + 1); + inputs = ((char *)tri) + tri_bytes; tri->inputs.a0 = (float (*)[4]) inputs; tri->inputs.dadx = (float (*)[4]) (inputs + input_array_sz); tri->inputs.dady = (float (*)[4]) (inputs + 2 * input_array_sz); @@ -329,52 +372,71 @@ print_triangle(struct lp_setup_context *setup, uint i; debug_printf("llvmpipe triangle\n"); - for (i = 0; i < setup->fs.nr_inputs; i++) { + for (i = 0; i < 1 + setup->fs.nr_inputs; i++) { debug_printf(" v1[%d]: %f %f %f %f\n", i, v1[i][0], v1[i][1], v1[i][2], v1[i][3]); } - for (i = 0; i < setup->fs.nr_inputs; i++) { + for (i = 0; i < 1 + setup->fs.nr_inputs; i++) { debug_printf(" v2[%d]: %f %f %f %f\n", i, v2[i][0], v2[i][1], v2[i][2], v2[i][3]); } - for (i = 0; i < setup->fs.nr_inputs; i++) { + for (i = 0; i < 1 + setup->fs.nr_inputs; i++) { debug_printf(" v3[%d]: %f %f %f %f\n", i, v3[i][0], v3[i][1], v3[i][2], v3[i][3]); } } +lp_rast_cmd lp_rast_tri_tab[8] = { + NULL, /* should be impossible */ + lp_rast_triangle_1, + lp_rast_triangle_2, + lp_rast_triangle_3, + lp_rast_triangle_4, + lp_rast_triangle_5, + lp_rast_triangle_6, + lp_rast_triangle_7 +}; + /** * Do basic setup for triangle rasterization and determine which * framebuffer tiles are touched. Put the triangle in the scene's * bins for the tiles which we overlap. */ -static void +static void do_triangle_ccw(struct lp_setup_context *setup, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], boolean frontfacing ) { - /* x/y positions in fixed point */ - const int x1 = subpixel_snap(v1[0][0] + 0.5 - setup->pixel_offset); - const int x2 = subpixel_snap(v2[0][0] + 0.5 - setup->pixel_offset); - const int x3 = subpixel_snap(v3[0][0] + 0.5 - setup->pixel_offset); - const int y1 = subpixel_snap(v1[0][1] + 0.5 - setup->pixel_offset); - const int y2 = subpixel_snap(v2[0][1] + 0.5 - setup->pixel_offset); - const int y3 = subpixel_snap(v3[0][1] + 0.5 - setup->pixel_offset); struct lp_scene *scene = lp_setup_get_current_scene(setup); + struct lp_fragment_shader_variant *variant = setup->fs.current.variant; struct lp_rast_triangle *tri; + struct tri_info info; int area; - float oneoverarea; int minx, maxx, miny, maxy; + int ix0, ix1, iy0, iy1; unsigned tri_bytes; - + int i; + int nr_planes = 3; + if (0) print_triangle(setup, v1, v2, v3); - tri = alloc_triangle(scene, setup->fs.nr_inputs, &tri_bytes); + if (setup->scissor_test) { + nr_planes = 7; + } + else { + nr_planes = 3; + } + + + tri = alloc_triangle(scene, + setup->fs.nr_inputs, + nr_planes, + &tri_bytes); if (!tri) return; @@ -387,15 +449,24 @@ do_triangle_ccw(struct lp_setup_context *setup, tri->v[2][1] = v3[0][1]; #endif - tri->dx12 = x1 - x2; - tri->dx23 = x2 - x3; - tri->dx31 = x3 - x1; + /* x/y positions in fixed point */ + info.x[0] = subpixel_snap(v1[0][0] - setup->pixel_offset); + info.x[1] = subpixel_snap(v2[0][0] - setup->pixel_offset); + info.x[2] = subpixel_snap(v3[0][0] - setup->pixel_offset); + info.y[0] = subpixel_snap(v1[0][1] - setup->pixel_offset); + info.y[1] = subpixel_snap(v2[0][1] - setup->pixel_offset); + info.y[2] = subpixel_snap(v3[0][1] - setup->pixel_offset); + + tri->plane[0].dcdy = info.x[0] - info.x[1]; + tri->plane[1].dcdy = info.x[1] - info.x[2]; + tri->plane[2].dcdy = info.x[2] - info.x[0]; - tri->dy12 = y1 - y2; - tri->dy23 = y2 - y3; - tri->dy31 = y3 - y1; + tri->plane[0].dcdx = info.y[0] - info.y[1]; + tri->plane[1].dcdx = info.y[1] - info.y[2]; + tri->plane[2].dcdx = info.y[2] - info.y[0]; - area = (tri->dx12 * tri->dy31 - tri->dx31 * tri->dy12); + area = (tri->plane[0].dcdy * tri->plane[2].dcdx - + tri->plane[2].dcdy * tri->plane[0].dcdx); LP_COUNT(nr_tris); @@ -410,20 +481,35 @@ do_triangle_ccw(struct lp_setup_context *setup, } /* Bounding rectangle (in pixels) */ - minx = (MIN3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER; - maxx = (MAX3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER; - miny = (MIN3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER; - maxy = (MAX3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER; - + { + /* Yes this is necessary to accurately calculate bounding boxes + * with the two fill-conventions we support. GL (normally) ends + * up needing a bottom-left fill convention, which requires + * slightly different rounding. + */ + int adj = (setup->pixel_offset != 0) ? 1 : 0; + + minx = (MIN3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; + maxx = (MAX3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; + miny = (MIN3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + maxy = (MAX3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + } + if (setup->scissor_test) { minx = MAX2(minx, setup->scissor.current.minx); maxx = MIN2(maxx, setup->scissor.current.maxx); miny = MAX2(miny, setup->scissor.current.miny); maxy = MIN2(maxy, setup->scissor.current.maxy); } + else { + minx = MAX2(minx, 0); + miny = MAX2(miny, 0); + maxx = MIN2(maxx, scene->fb.width); + maxy = MIN2(maxy, scene->fb.height); + } + - if (miny == maxy || - minx == maxx) { + if (miny >= maxy || minx >= maxx) { lp_scene_putback_data( scene, tri_bytes ); LP_COUNT(nr_culled_tris); return; @@ -431,75 +517,88 @@ do_triangle_ccw(struct lp_setup_context *setup, /* */ - oneoverarea = ((float)FIXED_ONE) / (float)area; + info.pixel_offset = setup->pixel_offset; + info.v0 = v1; + info.v1 = v2; + info.v2 = v3; + info.dx01 = info.v0[0][0] - info.v1[0][0]; + info.dx20 = info.v2[0][0] - info.v0[0][0]; + info.dy01 = info.v0[0][1] - info.v1[0][1]; + info.dy20 = info.v2[0][1] - info.v0[0][1]; + info.oneoverarea = 1.0 / (info.dx01 * info.dy20 - info.dx20 * info.dy01); + info.frontfacing = frontfacing; /* Setup parameter interpolants: */ - setup_tri_coefficients( setup, tri, oneoverarea, v1, v2, v3, frontfacing ); + setup_tri_coefficients( setup, tri, &info ); tri->inputs.facing = frontfacing ? 1.0F : -1.0F; + tri->inputs.state = setup->fs.stored; - /* half-edge constants, will be interated over the whole render target. - */ - tri->c1 = tri->dy12 * x1 - tri->dx12 * y1; - tri->c2 = tri->dy23 * x2 - tri->dx23 * y2; - tri->c3 = tri->dy31 * x3 - tri->dx31 * y3; - /* correct for top-left fill convention: - */ - if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) tri->c1++; - if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) tri->c2++; - if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) tri->c3++; - - tri->dy12 *= FIXED_ONE; - tri->dy23 *= FIXED_ONE; - tri->dy31 *= FIXED_ONE; - - tri->dx12 *= FIXED_ONE; - tri->dx23 *= FIXED_ONE; - tri->dx31 *= FIXED_ONE; - - /* find trivial reject offsets for each edge for a single-pixel - * sized block. These will be scaled up at each recursive level to - * match the active blocksize. Scaling in this way works best if - * the blocks are square. - */ - tri->eo1 = 0; - if (tri->dy12 < 0) tri->eo1 -= tri->dy12; - if (tri->dx12 > 0) tri->eo1 += tri->dx12; + + for (i = 0; i < 3; i++) { + struct lp_rast_plane *plane = &tri->plane[i]; - tri->eo2 = 0; - if (tri->dy23 < 0) tri->eo2 -= tri->dy23; - if (tri->dx23 > 0) tri->eo2 += tri->dx23; + /* half-edge constants, will be interated over the whole render + * target. + */ + plane->c = plane->dcdx * info.x[i] - plane->dcdy * info.y[i]; + + /* correct for top-left vs. bottom-left fill convention. + * + * note that we're overloading gl_rasterization_rules to mean + * both (0.5,0.5) pixel centers *and* bottom-left filling + * convention. + * + * GL actually has a top-left filling convention, but GL's + * notion of "top" differs from gallium's... + * + * Also, sometimes (in FBO cases) GL will render upside down + * to its usual method, in which case it will probably want + * to use the opposite, top-left convention. + */ + if (plane->dcdx < 0) { + /* both fill conventions want this - adjust for left edges */ + plane->c++; + } + else if (plane->dcdx == 0) { + if (setup->pixel_offset == 0) { + /* correct for top-left fill convention: + */ + if (plane->dcdy > 0) plane->c++; + } + else { + /* correct for bottom-left fill convention: + */ + if (plane->dcdy < 0) plane->c++; + } + } - tri->eo3 = 0; - if (tri->dy31 < 0) tri->eo3 -= tri->dy31; - if (tri->dx31 > 0) tri->eo3 += tri->dx31; + plane->dcdx *= FIXED_ONE; + plane->dcdy *= FIXED_ONE; - /* Calculate trivial accept offsets from the above. - */ - tri->ei1 = tri->dx12 - tri->dy12 - tri->eo1; - tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2; - tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3; + /* find trivial reject offsets for each edge for a single-pixel + * sized block. These will be scaled up at each recursive level to + * match the active blocksize. Scaling in this way works best if + * the blocks are square. + */ + plane->eo = 0; + if (plane->dcdx < 0) plane->eo -= plane->dcdx; + if (plane->dcdy > 0) plane->eo += plane->dcdy; - /* Fill in the inputs.step[][] arrays. - * We've manually unrolled some loops here. - */ - { - const int xstep1 = -tri->dy12; - const int xstep2 = -tri->dy23; - const int xstep3 = -tri->dy31; - const int ystep1 = tri->dx12; - const int ystep2 = tri->dx23; - const int ystep3 = tri->dx31; - -#define SETUP_STEP(i, x, y) \ - do { \ - tri->inputs.step[0][i] = x * xstep1 + y * ystep1; \ - tri->inputs.step[1][i] = x * xstep2 + y * ystep2; \ - tri->inputs.step[2][i] = x * xstep3 + y * ystep3; \ - } while (0) + /* Calculate trivial accept offsets from the above. + */ + plane->ei = plane->dcdy - plane->dcdx - plane->eo; + plane->step = tri->step[i]; + + /* Fill in the inputs.step[][] arrays. + * We've manually unrolled some loops here. + */ +#define SETUP_STEP(j, x, y) \ + tri->step[i][j] = y * plane->dcdy - x * plane->dcdx + SETUP_STEP(0, 0, 0); SETUP_STEP(1, 1, 0); SETUP_STEP(2, 0, 1); @@ -522,63 +621,106 @@ do_triangle_ccw(struct lp_setup_context *setup, #undef STEP } + + /* + * When rasterizing scissored tris, use the intersection of the + * triangle bounding box and the scissor rect to generate the + * scissor planes. + * + * This permits us to cut off the triangle "tails" that are present + * in the intermediate recursive levels caused when two of the + * triangles edges don't diverge quickly enough to trivially reject + * exterior blocks from the triangle. + * + * It's not really clear if it's worth worrying about these tails, + * but since we generate the planes for each scissored tri, it's + * free to trim them in this case. + * + * Note that otherwise, the scissor planes only vary in 'C' value, + * and even then only on state-changes. Could alternatively store + * these planes elsewhere. + */ + if (nr_planes == 7) { + tri->plane[3].step = step_scissor_minx; + tri->plane[3].dcdx = -1; + tri->plane[3].dcdy = 0; + tri->plane[3].c = 1-minx; + tri->plane[3].ei = 0; + tri->plane[3].eo = 1; + + tri->plane[4].step = step_scissor_maxx; + tri->plane[4].dcdx = 1; + tri->plane[4].dcdy = 0; + tri->plane[4].c = maxx; + tri->plane[4].ei = -1; + tri->plane[4].eo = 0; + + tri->plane[5].step = step_scissor_miny; + tri->plane[5].dcdx = 0; + tri->plane[5].dcdy = 1; + tri->plane[5].c = 1-miny; + tri->plane[5].ei = 0; + tri->plane[5].eo = 1; + + tri->plane[6].step = step_scissor_maxy; + tri->plane[6].dcdx = 0; + tri->plane[6].dcdy = -1; + tri->plane[6].c = maxy; + tri->plane[6].ei = -1; + tri->plane[6].eo = 0; + } + + /* * All fields of 'tri' are now set. The remaining code here is * concerned with binning. */ - /* Convert to tile coordinates: + /* Convert to tile coordinates, and inclusive ranges: */ - minx = minx / TILE_SIZE; - miny = miny / TILE_SIZE; - maxx = maxx / TILE_SIZE; - maxy = maxy / TILE_SIZE; + ix0 = minx / TILE_SIZE; + iy0 = miny / TILE_SIZE; + ix1 = (maxx-1) / TILE_SIZE; + iy1 = (maxy-1) / TILE_SIZE; /* * Clamp to framebuffer size */ - minx = MAX2(minx, 0); - miny = MAX2(miny, 0); - maxx = MIN2(maxx, scene->tiles_x - 1); - maxy = MIN2(maxy, scene->tiles_y - 1); + assert(ix0 == MAX2(ix0, 0)); + assert(iy0 == MAX2(iy0, 0)); + assert(ix1 == MIN2(ix1, scene->tiles_x - 1)); + assert(iy1 == MIN2(iy1, scene->tiles_y - 1)); /* Determine which tile(s) intersect the triangle's bounding box */ - if (miny == maxy && minx == maxx) + if (iy0 == iy1 && ix0 == ix1) { /* Triangle is contained in a single tile: */ - lp_scene_bin_command( scene, minx, miny, lp_rast_triangle, - lp_rast_arg_triangle(tri) ); + lp_scene_bin_command( scene, ix0, iy0, + lp_rast_tri_tab[nr_planes], + lp_rast_arg_triangle(tri, (1<<nr_planes)-1) ); } - else + else { - int c1 = (tri->c1 + - tri->dx12 * miny * TILE_SIZE - - tri->dy12 * minx * TILE_SIZE); - int c2 = (tri->c2 + - tri->dx23 * miny * TILE_SIZE - - tri->dy23 * minx * TILE_SIZE); - int c3 = (tri->c3 + - tri->dx31 * miny * TILE_SIZE - - tri->dy31 * minx * TILE_SIZE); - - int ei1 = tri->ei1 << TILE_ORDER; - int ei2 = tri->ei2 << TILE_ORDER; - int ei3 = tri->ei3 << TILE_ORDER; - - int eo1 = tri->eo1 << TILE_ORDER; - int eo2 = tri->eo2 << TILE_ORDER; - int eo3 = tri->eo3 << TILE_ORDER; - - int xstep1 = -(tri->dy12 << TILE_ORDER); - int xstep2 = -(tri->dy23 << TILE_ORDER); - int xstep3 = -(tri->dy31 << TILE_ORDER); - - int ystep1 = tri->dx12 << TILE_ORDER; - int ystep2 = tri->dx23 << TILE_ORDER; - int ystep3 = tri->dx31 << TILE_ORDER; + int c[7]; + int ei[7]; + int eo[7]; + int xstep[7]; + int ystep[7]; int x, y; + + for (i = 0; i < nr_planes; i++) { + c[i] = (tri->plane[i].c + + tri->plane[i].dcdy * iy0 * TILE_SIZE - + tri->plane[i].dcdx * ix0 * TILE_SIZE); + + ei[i] = tri->plane[i].ei << TILE_ORDER; + eo[i] = tri->plane[i].eo << TILE_ORDER; + xstep[i] = -(tri->plane[i].dcdx << TILE_ORDER); + ystep[i] = tri->plane[i].dcdy << TILE_ORDER; + } + /* Test tile-sized blocks against the triangle. @@ -586,63 +728,67 @@ do_triangle_ccw(struct lp_setup_context *setup, * contained inside the tri, bin an lp_rast_shade_tile command. * Else, bin a lp_rast_triangle command. */ - for (y = miny; y <= maxy; y++) + for (y = iy0; y <= iy1; y++) { - int cx1 = c1; - int cx2 = c2; - int cx3 = c3; boolean in = FALSE; /* are we inside the triangle? */ + int cx[7]; + + for (i = 0; i < nr_planes; i++) + cx[i] = c[i]; - for (x = minx; x <= maxx; x++) + for (x = ix0; x <= ix1; x++) { - if (cx1 + eo1 < 0 || - cx2 + eo2 < 0 || - cx3 + eo3 < 0) - { - /* do nothing */ + int out = 0; + int partial = 0; + + for (i = 0; i < nr_planes; i++) { + int planeout = cx[i] + eo[i]; + int planepartial = cx[i] + ei[i] - 1; + out |= (planeout >> 31); + partial |= (planepartial >> 31) & (1<<i); + } + + if (out) { + /* do nothing */ + if (in) + break; /* exiting triangle, all done with this row */ LP_COUNT(nr_empty_64); - if (in) - break; /* exiting triangle, all done with this row */ - } - else if (cx1 + ei1 > 0 && - cx2 + ei2 > 0 && - cx3 + ei3 > 0) - { + } + else if (partial) { + /* Not trivially accepted by at least one plane - + * rasterize/shade partial tile + */ + int count = util_bitcount(partial); + in = TRUE; + lp_scene_bin_command( scene, x, y, + lp_rast_tri_tab[count], + lp_rast_arg_triangle(tri, partial) ); + + LP_COUNT(nr_partially_covered_64); + } + else { /* triangle covers the whole tile- shade whole tile */ LP_COUNT(nr_fully_covered_64); - in = TRUE; - if (setup->fs.current.variant->opaque) { + in = TRUE; + if (variant->opaque && + !setup->fb.zsbuf) { lp_scene_bin_reset( scene, x, y ); - lp_scene_bin_command( scene, x, y, - lp_rast_set_state, - lp_rast_arg_state(setup->fs.stored) ); } lp_scene_bin_command( scene, x, y, lp_rast_shade_tile, lp_rast_arg_inputs(&tri->inputs) ); - } - else - { - /* rasterizer/shade partial tile */ - LP_COUNT(nr_partially_covered_64); - in = TRUE; - lp_scene_bin_command( scene, x, y, - lp_rast_triangle, - lp_rast_arg_triangle(tri) ); - } + } /* Iterate cx values across the region: */ - cx1 += xstep1; - cx2 += xstep2; - cx3 += xstep3; + for (i = 0; i < nr_planes; i++) + cx[i] += xstep[i]; } /* Iterate c values down the region: */ - c1 += ystep1; - c2 += ystep2; - c3 += ystep3; + for (i = 0; i < nr_planes; i++) + c[i] += ystep[i]; } } } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c index f6a424f25a..51948f5bf2 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c @@ -61,7 +61,9 @@ lp_setup_get_vertex_info(struct vbuf_render *vbr) { struct lp_setup_context *setup = lp_setup_context(vbr); - /* vertex size/info depends on the latest state */ + /* Vertex size/info depends on the latest state. + * The draw module may have issued additional state-change commands. + */ lp_setup_update_state(setup); return setup->vertex_info; diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 05d1b93794..86313e1c48 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -130,6 +130,12 @@ llvmpipe_init_rasterizer_funcs(struct llvmpipe_context *llvmpipe); void llvmpipe_init_so_funcs(struct llvmpipe_context *llvmpipe); +void +llvmpipe_prepare_vertex_sampling(struct llvmpipe_context *ctx, + unsigned num, + struct pipe_sampler_view **views); +void +llvmpipe_cleanup_vertex_sampling(struct llvmpipe_context *ctx); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index d20a5218d4..77bec4640b 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -189,7 +189,7 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) llvmpipe->constants[PIPE_SHADER_FRAGMENT][0]); if (llvmpipe->dirty & LP_NEW_SAMPLER_VIEW) - lp_setup_set_fragment_sampler_views(llvmpipe->setup, + lp_setup_set_fragment_sampler_views(llvmpipe->setup, llvmpipe->num_fragment_sampler_views, llvmpipe->fragment_sampler_views); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 65115052cd..5953d690a4 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -31,9 +31,6 @@ * Code generate the whole fragment pipeline. * * The fragment pipeline consists of the following stages: - * - triangle edge in/out testing - * - scissor test - * - stipple (TBI) * - early depth test * - fragment shader * - alpha test @@ -97,6 +94,7 @@ #include "lp_state.h" #include "lp_tex_sample.h" #include "lp_flush.h" +#include "lp_state_fs.h" #include <llvm-c/Analysis.h> @@ -170,177 +168,63 @@ generate_depth_stencil(LLVMBuilderRef builder, /** - * Generate the code to do inside/outside triangle testing for the + * Expand the relevent bits of mask_input to a 4-dword mask for the * four pixels in a 2x2 quad. This will set the four elements of the * quad mask vector to 0 or ~0. - * \param i which quad of the quad group to test, in [0,3] + * + * \param quad which quad of the quad group to test, in [0,3] + * \param mask_input bitwise mask for the whole 4x4 stamp */ -static void -generate_tri_edge_mask(LLVMBuilderRef builder, - unsigned i, - LLVMValueRef *mask, /* ivec4, out */ - LLVMValueRef c0, /* int32 */ - LLVMValueRef c1, /* int32 */ - LLVMValueRef c2, /* int32 */ - LLVMValueRef step0_ptr, /* ivec4 */ - LLVMValueRef step1_ptr, /* ivec4 */ - LLVMValueRef step2_ptr) /* ivec4 */ +static LLVMValueRef +generate_quad_mask(LLVMBuilderRef builder, + struct lp_type fs_type, + unsigned quad, + LLVMValueRef mask_input) /* int32 */ { -#define OPTIMIZE_IN_OUT_TEST 0 -#if OPTIMIZE_IN_OUT_TEST - struct lp_build_if_state ifctx; - LLVMValueRef not_draw_all; -#endif - struct lp_build_flow_context *flow; - struct lp_type i32_type; - LLVMTypeRef i32vec4_type; - LLVMValueRef c0_vec, c1_vec, c2_vec; - LLVMValueRef in_out_mask; - - assert(i < 4); - - /* int32 vector type */ - memset(&i32_type, 0, sizeof i32_type); - i32_type.floating = FALSE; /* values are integers */ - i32_type.sign = TRUE; /* values are signed */ - i32_type.norm = FALSE; /* values are not normalized */ - i32_type.width = 32; /* 32-bit int values */ - i32_type.length = 4; /* 4 elements per vector */ - - i32vec4_type = lp_build_int32_vec4_type(); + struct lp_type mask_type; + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMValueRef bits[4]; + LLVMValueRef mask; /* - * Use a conditional here to do detailed pixel in/out testing. - * We only have to do this if c0 != INT_MIN. + * XXX: We'll need a different path for 16 x u8 */ - flow = lp_build_flow_create(builder); - lp_build_flow_scope_begin(flow); - - { -#if OPTIMIZE_IN_OUT_TEST - /* not_draw_all = (c0 != INT_MIN) */ - not_draw_all = LLVMBuildICmp(builder, - LLVMIntNE, - c0, - LLVMConstInt(LLVMInt32Type(), INT_MIN, 0), - ""); - - in_out_mask = lp_build_const_int_vec(i32_type, ~0); - - - lp_build_flow_scope_declare(flow, &in_out_mask); - - /* if (not_draw_all) {... */ - lp_build_if(&ifctx, flow, builder, not_draw_all); -#endif - { - LLVMValueRef step0_vec, step1_vec, step2_vec; - LLVMValueRef m0_vec, m1_vec, m2_vec; - LLVMValueRef index, m; - - /* c0_vec = {c0, c0, c0, c0} - * Note that we emit this code four times but LLVM optimizes away - * three instances of it. - */ - c0_vec = lp_build_broadcast(builder, i32vec4_type, c0); - c1_vec = lp_build_broadcast(builder, i32vec4_type, c1); - c2_vec = lp_build_broadcast(builder, i32vec4_type, c2); - lp_build_name(c0_vec, "edgeconst0vec"); - lp_build_name(c1_vec, "edgeconst1vec"); - lp_build_name(c2_vec, "edgeconst2vec"); - - /* load step0vec, step1, step2 vec from memory */ - index = LLVMConstInt(LLVMInt32Type(), i, 0); - step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), ""); - step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), ""); - step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), ""); - lp_build_name(step0_vec, "step0vec"); - lp_build_name(step1_vec, "step1vec"); - lp_build_name(step2_vec, "step2vec"); - - /* m0_vec = step0_ptr[i] > c0_vec */ - m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec); - m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec); - m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec); - - /* in_out_mask = m0_vec & m1_vec & m2_vec */ - m = LLVMBuildAnd(builder, m0_vec, m1_vec, ""); - in_out_mask = LLVMBuildAnd(builder, m, m2_vec, ""); - lp_build_name(in_out_mask, "inoutmaskvec"); - } -#if OPTIMIZE_IN_OUT_TEST - lp_build_endif(&ifctx); -#endif - - } - lp_build_flow_scope_end(flow); - lp_build_flow_destroy(flow); + assert(fs_type.width == 32); + assert(fs_type.length == 4); + mask_type = lp_int_type(fs_type); - /* This is the initial alive/dead pixel mask for a quad of four pixels. - * It's an int[4] vector with each word set to 0 or ~0. - * Words will get cleared when pixels faile the Z test, etc. + /* + * mask_input >>= (quad * 4) */ - *mask = in_out_mask; -} - - -static LLVMValueRef -generate_scissor_test(LLVMBuilderRef builder, - LLVMValueRef context_ptr, - const struct lp_build_interp_soa_context *interp, - struct lp_type type) -{ - LLVMTypeRef vec_type = lp_build_vec_type(type); - LLVMValueRef xpos = interp->pos[0], ypos = interp->pos[1]; - LLVMValueRef xmin, ymin, xmax, ymax; - LLVMValueRef m0, m1, m2, m3, m; - - /* xpos, ypos contain the window coords for the four pixels in the quad */ - assert(xpos); - assert(ypos); - - /* get the current scissor bounds, convert to vectors */ - xmin = lp_jit_context_scissor_xmin_value(builder, context_ptr); - xmin = lp_build_broadcast(builder, vec_type, xmin); - - ymin = lp_jit_context_scissor_ymin_value(builder, context_ptr); - ymin = lp_build_broadcast(builder, vec_type, ymin); - xmax = lp_jit_context_scissor_xmax_value(builder, context_ptr); - xmax = lp_build_broadcast(builder, vec_type, xmax); + mask_input = LLVMBuildLShr(builder, + mask_input, + LLVMConstInt(i32t, quad * 4, 0), + ""); - ymax = lp_jit_context_scissor_ymax_value(builder, context_ptr); - ymax = lp_build_broadcast(builder, vec_type, ymax); + /* + * mask = { mask_input & (1 << i), for i in [0,3] } + */ - /* compare the fragment's position coordinates against the scissor bounds */ - m0 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, xpos, xmin); - m1 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, ypos, ymin); - m2 = lp_build_compare(builder, type, PIPE_FUNC_LESS, xpos, xmax); - m3 = lp_build_compare(builder, type, PIPE_FUNC_LESS, ypos, ymax); + mask = lp_build_broadcast(builder, lp_build_vec_type(mask_type), mask_input); - /* AND all the masks together */ - m = LLVMBuildAnd(builder, m0, m1, ""); - m = LLVMBuildAnd(builder, m, m2, ""); - m = LLVMBuildAnd(builder, m, m3, ""); + bits[0] = LLVMConstInt(i32t, 1 << 0, 0); + bits[1] = LLVMConstInt(i32t, 1 << 1, 0); + bits[2] = LLVMConstInt(i32t, 1 << 2, 0); + bits[3] = LLVMConstInt(i32t, 1 << 3, 0); - lp_build_name(m, "scissormask"); + mask = LLVMBuildAnd(builder, mask, LLVMConstVector(bits, 4), ""); - return m; -} + /* + * mask = mask != 0 ? ~0 : 0 + */ + mask = lp_build_compare(builder, + mask_type, PIPE_FUNC_NOTEQUAL, + mask, + lp_build_const_int_vec(mask_type, 0)); -static LLVMValueRef -build_int32_vec_const(int value) -{ - struct lp_type i32_type; - - memset(&i32_type, 0, sizeof i32_type); - i32_type.floating = FALSE; /* values are integers */ - i32_type.sign = TRUE; /* values are signed */ - i32_type.norm = FALSE; /* values are not normalized */ - i32_type.width = 32; /* 32-bit int values */ - i32_type.length = 4; /* 4 elements per vector */ - return lp_build_const_int_vec(i32_type, value); + return mask; } @@ -348,7 +232,7 @@ build_int32_vec_const(int value) /** * Generate the fragment shader, depth/stencil test, and alpha tests. * \param i which quad in the tile, in range [0,3] - * \param do_tri_test if 1, do triangle edge in/out testing + * \param partial_mask if 1, do mask_input testing */ static void generate_fs(struct llvmpipe_context *lp, @@ -364,13 +248,8 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef (*color)[4], LLVMValueRef depth_ptr, LLVMValueRef facing, - unsigned do_tri_test, - LLVMValueRef c0, - LLVMValueRef c1, - LLVMValueRef c2, - LLVMValueRef step0_ptr, - LLVMValueRef step1_ptr, - LLVMValueRef step2_ptr, + unsigned partial_mask, + LLVMValueRef mask_input, LLVMValueRef counter) { const struct tgsi_token *tokens = shader->base.tokens; @@ -411,23 +290,17 @@ generate_fs(struct llvmpipe_context *lp, lp_build_flow_scope_declare(flow, &z); /* do triangle edge testing */ - if (do_tri_test) { - generate_tri_edge_mask(builder, i, pmask, - c0, c1, c2, step0_ptr, step1_ptr, step2_ptr); + if (partial_mask) { + *pmask = generate_quad_mask(builder, type, + i, mask_input); } else { - *pmask = build_int32_vec_const(~0); + *pmask = lp_build_const_int_vec(type, ~0); } /* 'mask' will control execution based on quad's pixel alive/killed state */ lp_build_mask_begin(&mask, flow, type, *pmask); - if (key->scissor) { - LLVMValueRef smask = - generate_scissor_test(builder, context_ptr, interp, type); - lp_build_mask_update(&mask, smask); - } - early_depth_stencil_test = (key->depth.enabled || key->stencil[0].enabled) && !key->alpha.enabled && @@ -579,7 +452,7 @@ static void generate_fragment(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, struct lp_fragment_shader_variant *variant, - unsigned do_tri_test) + unsigned partial_mask) { struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); const struct lp_fragment_shader_variant_key *key = &variant->key; @@ -589,9 +462,8 @@ generate_fragment(struct llvmpipe_context *lp, LLVMTypeRef fs_elem_type; LLVMTypeRef fs_int_vec_type; LLVMTypeRef blend_vec_type; - LLVMTypeRef arg_types[16]; + LLVMTypeRef arg_types[11]; LLVMTypeRef func_type; - LLVMTypeRef int32_vec4_type = lp_build_int32_vec4_type(); LLVMValueRef context_ptr; LLVMValueRef x; LLVMValueRef y; @@ -600,7 +472,8 @@ generate_fragment(struct llvmpipe_context *lp, LLVMValueRef dady_ptr; LLVMValueRef color_ptr_ptr; LLVMValueRef depth_ptr; - LLVMValueRef c0, c1, c2, step0_ptr, step1_ptr, step2_ptr, counter = NULL; + LLVMValueRef mask_input; + LLVMValueRef counter = NULL; LLVMBasicBlockRef block; LLVMBuilderRef builder; struct lp_build_sampler_soa *sampler; @@ -645,7 +518,7 @@ generate_fragment(struct llvmpipe_context *lp, blend_vec_type = lp_build_vec_type(blend_type); util_snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s", - shader->no, variant->no, do_tri_test ? "edge" : "whole"); + shader->no, variant->no, partial_mask ? "partial" : "whole"); arg_types[0] = screen->context_ptr_type; /* context */ arg_types[1] = LLVMInt32Type(); /* x */ @@ -656,23 +529,15 @@ generate_fragment(struct llvmpipe_context *lp, arg_types[6] = LLVMPointerType(fs_elem_type, 0); /* dady */ arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */ arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ - arg_types[9] = LLVMInt32Type(); /* c0 */ - arg_types[10] = LLVMInt32Type(); /* c1 */ - arg_types[11] = LLVMInt32Type(); /* c2 */ - /* Note: the step arrays are built as int32[16] but we interpret - * them here as int32_vec4[4]. - */ - arg_types[12] = LLVMPointerType(int32_vec4_type, 0);/* step0 */ - arg_types[13] = LLVMPointerType(int32_vec4_type, 0);/* step1 */ - arg_types[14] = LLVMPointerType(int32_vec4_type, 0);/* step2 */ - arg_types[15] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */ + arg_types[9] = LLVMInt32Type(); /* mask_input */ + arg_types[10] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); function = LLVMAddFunction(screen->module, func_name, func_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); - variant->function[do_tri_test] = function; + variant->function[partial_mask] = function; /* XXX: need to propagate noalias down into color param now we are @@ -691,12 +556,7 @@ generate_fragment(struct llvmpipe_context *lp, dady_ptr = LLVMGetParam(function, 6); color_ptr_ptr = LLVMGetParam(function, 7); depth_ptr = LLVMGetParam(function, 8); - c0 = LLVMGetParam(function, 9); - c1 = LLVMGetParam(function, 10); - c2 = LLVMGetParam(function, 11); - step0_ptr = LLVMGetParam(function, 12); - step1_ptr = LLVMGetParam(function, 13); - step2_ptr = LLVMGetParam(function, 14); + mask_input = LLVMGetParam(function, 9); lp_build_name(context_ptr, "context"); lp_build_name(x, "x"); @@ -706,15 +566,10 @@ generate_fragment(struct llvmpipe_context *lp, lp_build_name(dady_ptr, "dady"); lp_build_name(color_ptr_ptr, "color_ptr_ptr"); lp_build_name(depth_ptr, "depth"); - lp_build_name(c0, "c0"); - lp_build_name(c1, "c1"); - lp_build_name(c2, "c2"); - lp_build_name(step0_ptr, "step0"); - lp_build_name(step1_ptr, "step1"); - lp_build_name(step2_ptr, "step2"); + lp_build_name(mask_input, "mask_input"); if (key->occlusion_count) { - counter = LLVMGetParam(function, 15); + counter = LLVMGetParam(function, 10); lp_build_name(counter, "counter"); } @@ -763,9 +618,9 @@ generate_fragment(struct llvmpipe_context *lp, out_color, depth_ptr_i, facing, - do_tri_test, - c0, c1, c2, - step0_ptr, step1_ptr, step2_ptr, counter); + partial_mask, + mask_input, + counter); for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) for(chan = 0; chan < NUM_CHANNELS; ++chan) @@ -792,9 +647,13 @@ generate_fragment(struct llvmpipe_context *lp, lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]); } - lp_build_conv_mask(builder, fs_type, blend_type, - fs_mask, num_fs, - &blend_mask, 1); + if (partial_mask || !variant->opaque) { + lp_build_conv_mask(builder, fs_type, blend_type, + fs_mask, num_fs, + &blend_mask, 1); + } else { + blend_mask = lp_build_const_int_vec(blend_type, ~0); + } color_ptr = LLVMBuildLoad(builder, LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""), @@ -832,8 +691,7 @@ generate_fragment(struct llvmpipe_context *lp, #endif /* Apply optimizations to LLVM IR */ - if (1) - LLVMRunFunctionPassManager(screen->pass, function); + LLVMRunFunctionPassManager(screen->pass, function); if (gallivm_debug & GALLIVM_DEBUG_IR) { /* Print the LLVM IR to stderr */ @@ -847,7 +705,7 @@ generate_fragment(struct llvmpipe_context *lp, { void *f = LLVMGetPointerToGlobal(screen->engine, function); - variant->jit_function[do_tri_test] = (lp_jit_frag_func)pointer_to_func(f); + variant->jit_function[partial_mask] = (lp_jit_frag_func)pointer_to_func(f); if (gallivm_debug & GALLIVM_DEBUG_ASM) { lp_disassemble(f); @@ -963,7 +821,6 @@ generate_variant(struct llvmpipe_context *lp, !key->stencil[0].enabled && !key->alpha.enabled && !key->depth.enabled && - !key->scissor && !shader->info.uses_kill ? TRUE : FALSE; @@ -1182,7 +1039,6 @@ make_variant_key(struct llvmpipe_context *lp, /* alpha.ref_value is passed in jit_context */ key->flatshade = lp->rasterizer->flatshade; - key->scissor = lp->rasterizer->scissor; if (lp->active_query_count) { key->occlusion_count = TRUE; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h index 593cd4de6b..37900fc544 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.h +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h @@ -54,7 +54,6 @@ struct lp_fragment_shader_variant_key enum pipe_format zsbuf_format; unsigned nr_cbufs:8; unsigned flatshade:1; - unsigned scissor:1; unsigned occlusion_count:1; struct { diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index e94065fb6a..715ce2f02e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -35,10 +35,9 @@ #include "draw/draw_context.h" #include "lp_context.h" -#include "lp_context.h" +#include "lp_screen.h" #include "lp_state.h" -#include "draw/draw_context.h" - +#include "state_tracker/sw_winsys.h" static void * @@ -100,6 +99,10 @@ llvmpipe_bind_vertex_sampler_states(struct pipe_context *pipe, llvmpipe->num_vertex_samplers = num_samplers; + draw_set_samplers(llvmpipe->draw, + llvmpipe->vertex_samplers, + llvmpipe->num_vertex_samplers); + llvmpipe->dirty |= LP_NEW_SAMPLER; } @@ -166,6 +169,10 @@ llvmpipe_set_vertex_sampler_views(struct pipe_context *pipe, llvmpipe->num_vertex_sampler_views = num; + draw_set_sampler_views(llvmpipe->draw, + llvmpipe->vertex_sampler_views, + llvmpipe->num_vertex_sampler_views); + llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW; } @@ -214,6 +221,77 @@ llvmpipe_delete_sampler_state(struct pipe_context *pipe, } +/** + * Called during state validation when LP_NEW_SAMPLER_VIEW is set. + */ +void +llvmpipe_prepare_vertex_sampling(struct llvmpipe_context *lp, + unsigned num, + struct pipe_sampler_view **views) +{ + unsigned i; + uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS]; + uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS]; + const void *data[DRAW_MAX_TEXTURE_LEVELS]; + + assert(num <= PIPE_MAX_VERTEX_SAMPLERS); + if (!num) + return; + + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + struct pipe_sampler_view *view = i < num ? views[i] : NULL; + + if (view) { + struct pipe_resource *tex = view->texture; + struct llvmpipe_resource *lp_tex = llvmpipe_resource(tex); + + /* We're referencing the texture's internal data, so save a + * reference to it. + */ + pipe_resource_reference(&lp->mapped_vs_tex[i], tex); + + if (!lp_tex->dt) { + /* regular texture - setup array of mipmap level pointers */ + int j; + for (j = 0; j <= tex->last_level; j++) { + data[j] = + llvmpipe_get_texture_image_all(lp_tex, j, LP_TEX_USAGE_READ, + LP_TEX_LAYOUT_LINEAR); + row_stride[j] = lp_tex->row_stride[j]; + img_stride[j] = lp_tex->img_stride[j]; + } + } + else { + /* display target texture/surface */ + /* + * XXX: Where should this be unmapped? + */ + struct llvmpipe_screen *screen = llvmpipe_screen(tex->screen); + struct sw_winsys *winsys = screen->winsys; + data[0] = winsys->displaytarget_map(winsys, lp_tex->dt, + PIPE_TRANSFER_READ); + row_stride[0] = lp_tex->row_stride[0]; + img_stride[0] = lp_tex->img_stride[0]; + assert(data[0]); + } + draw_set_mapped_texture(lp->draw, + i, + tex->width0, tex->height0, tex->depth0, + tex->last_level, + row_stride, img_stride, data); + } + } +} + +void +llvmpipe_cleanup_vertex_sampling(struct llvmpipe_context *ctx) +{ + unsigned i; + for (i = 0; i < Elements(ctx->mapped_vs_tex); i++) { + pipe_resource_reference(&ctx->mapped_vs_tex[i], NULL); + } +} + void llvmpipe_init_sampler_funcs(struct llvmpipe_context *llvmpipe) { diff --git a/src/gallium/drivers/llvmpipe/lp_state_so.c b/src/gallium/drivers/llvmpipe/lp_state_so.c index 4c64a5b142..30b17c9881 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_so.c +++ b/src/gallium/drivers/llvmpipe/lp_state_so.c @@ -29,7 +29,6 @@ #include "lp_state.h" #include "lp_texture.h" -#include "util/u_format.h" #include "util/u_memory.h" #include "draw/draw_context.h" diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c index 76b3fce1fa..f761e82850 100644 --- a/src/gallium/drivers/llvmpipe/lp_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_surface.c @@ -67,14 +67,14 @@ lp_resource_copy(struct pipe_context *pipe, dst, subdst.face, subdst.level, 0, /* flush_flags */ FALSE, /* read_only */ - FALSE, /* cpu_access */ + TRUE, /* cpu_access */ FALSE); /* do_not_block */ llvmpipe_flush_resource(pipe, src, subsrc.face, subsrc.level, 0, /* flush_flags */ TRUE, /* read_only */ - FALSE, /* cpu_access */ + TRUE, /* cpu_access */ FALSE); /* do_not_block */ /* @@ -106,19 +106,27 @@ lp_resource_copy(struct pipe_context *pipe, unsigned x, y; enum lp_texture_usage usage; - /* XXX for the tiles which are completely contained by the - * dest rectangle, we could set the usage mode to WRITE_ALL. - * Just test for the case of replacing the whole dest region for now. - */ - if (width == dst_tex->base.width0 && height == dst_tex->base.height0) - usage = LP_TEX_USAGE_WRITE_ALL; - else - usage = LP_TEX_USAGE_READ_WRITE; - adjust_to_tile_bounds(dstx, dsty, width, height, &tx, &ty, &tw, &th); for (y = 0; y < th; y += TILE_SIZE) { + boolean contained_y = ty + y >= dsty && + ty + y + TILE_SIZE <= dsty + height ? + TRUE : FALSE; + for (x = 0; x < tw; x += TILE_SIZE) { + boolean contained_x = tx + x >= dstx && + tx + x + TILE_SIZE <= dstx + width ? + TRUE : FALSE; + + /* + * Set the usage mode to WRITE_ALL for the tiles which are + * completely contained by the dest rectangle. + */ + if (contained_y && contained_x) + usage = LP_TEX_USAGE_WRITE_ALL; + else + usage = LP_TEX_USAGE_READ_WRITE; + (void) llvmpipe_get_texture_tile_linear(dst_tex, subdst.face, subdst.level, usage, @@ -138,13 +146,15 @@ lp_resource_copy(struct pipe_context *pipe, subdst.level, LP_TEX_LAYOUT_LINEAR); - util_copy_rect(dst_linear_ptr, format, - llvmpipe_resource_stride(&dst_tex->base, subdst.level), - dstx, dsty, - width, height, - src_linear_ptr, - llvmpipe_resource_stride(&src_tex->base, subsrc.level), - srcx, srcy); + if (dst_linear_ptr && src_linear_ptr) { + util_copy_rect(dst_linear_ptr, format, + llvmpipe_resource_stride(&dst_tex->base, subdst.level), + dstx, dsty, + width, height, + src_linear_ptr, + llvmpipe_resource_stride(&src_tex->base, subsrc.level), + srcx, srcy); + } } } diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index 9b02f436c5..cf41b40581 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -167,19 +167,26 @@ test_one(unsigned verbose, unsigned i, j; void *code; + if (src_type.width * src_type.length != dst_type.width * dst_type.length && + src_type.length != dst_type.length) { + return TRUE; + } + if(verbose >= 1) dump_conv_types(stdout, src_type, dst_type); - if(src_type.length > dst_type.length) { + if (src_type.length > dst_type.length) { num_srcs = 1; num_dsts = src_type.length/dst_type.length; } - else { + else if (src_type.length < dst_type.length) { num_dsts = 1; num_srcs = dst_type.length/src_type.length; } - - assert(src_type.width * src_type.length == dst_type.width * dst_type.length); + else { + num_dsts = 1; + num_srcs = 1; + } /* We must not loose or gain channels. Only precision */ assert(src_type.length * num_srcs == dst_type.length * num_dsts); @@ -381,6 +388,11 @@ const struct lp_type conv_types[] = { { FALSE, FALSE, TRUE, FALSE, 8, 16 }, { FALSE, FALSE, FALSE, TRUE, 8, 16 }, { FALSE, FALSE, FALSE, FALSE, 8, 16 }, + + { FALSE, FALSE, TRUE, TRUE, 8, 4 }, + { FALSE, FALSE, TRUE, FALSE, 8, 4 }, + { FALSE, FALSE, FALSE, TRUE, 8, 4 }, + { FALSE, FALSE, FALSE, FALSE, 8, 4 }, }; diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 8b6dc1c7f5..2855d7cea4 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -31,6 +31,7 @@ #include <float.h> #include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_debug.h" #include "gallivm/lp_bld_init.h" #include <llvm-c/Analysis.h> #include <llvm-c/Target.h> @@ -38,6 +39,7 @@ #include "util/u_memory.h" #include "util/u_pointer.h" +#include "util/u_string.h" #include "util/u_format.h" #include "util/u_format_tests.h" #include "util/u_format_s3tc.h" @@ -71,17 +73,20 @@ write_tsv_row(FILE *fp, typedef void -(*fetch_ptr_t)(float *, const void *packed, +(*fetch_ptr_t)(void *unpacked, const void *packed, unsigned i, unsigned j); static LLVMValueRef -add_fetch_rgba_test(LLVMModuleRef lp_build_module, - const struct util_format_description *desc) +add_fetch_rgba_test(unsigned verbose, + const struct util_format_description *desc, + struct lp_type type) { + char name[256]; LLVMTypeRef args[4]; LLVMValueRef func; LLVMValueRef packed_ptr; + LLVMValueRef offset = LLVMConstNull(LLVMInt32Type()); LLVMValueRef rgba_ptr; LLVMValueRef i; LLVMValueRef j; @@ -89,11 +94,15 @@ add_fetch_rgba_test(LLVMModuleRef lp_build_module, LLVMBuilderRef builder; LLVMValueRef rgba; - args[0] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); + util_snprintf(name, sizeof name, "fetch_%s_%s", desc->short_name, + type.floating ? "float" : "unorm8"); + + args[0] = LLVMPointerType(lp_build_vec_type(type), 0); args[1] = LLVMPointerType(LLVMInt8Type(), 0); args[3] = args[2] = LLVMInt32Type(); - func = LLVMAddFunction(lp_build_module, "fetch", LLVMFunctionType(LLVMVoidType(), args, Elements(args), 0)); + func = LLVMAddFunction(lp_build_module, name, + LLVMFunctionType(LLVMVoidType(), args, Elements(args), 0)); LLVMSetFunctionCallConv(func, LLVMCCallConv); rgba_ptr = LLVMGetParam(func, 0); packed_ptr = LLVMGetParam(func, 1); @@ -104,91 +113,104 @@ add_fetch_rgba_test(LLVMModuleRef lp_build_module, builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); - rgba = lp_build_fetch_rgba_aos(builder, desc, packed_ptr, i, j); + rgba = lp_build_fetch_rgba_aos(builder, desc, type, + packed_ptr, offset, i, j); LLVMBuildStore(builder, rgba, rgba_ptr); LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); + + if (LLVMVerifyFunction(func, LLVMPrintMessageAction)) { + LLVMDumpValue(func); + abort(); + } + + LLVMRunFunctionPassManager(lp_build_pass, func); + + if (verbose >= 1) { + LLVMDumpValue(func); + } + return func; } PIPE_ALIGN_STACK static boolean -test_format(unsigned verbose, FILE *fp, - const struct util_format_description *desc, - const struct util_format_test_case *test) +test_format_float(unsigned verbose, FILE *fp, + const struct util_format_description *desc) { LLVMValueRef fetch = NULL; - LLVMPassManagerRef pass = NULL; fetch_ptr_t fetch_ptr; PIPE_ALIGN_VAR(16) float unpacked[4]; - boolean success; - unsigned i, j, k; + boolean first = TRUE; + boolean success = TRUE; + unsigned i, j, k, l; + void *f; - fetch = add_fetch_rgba_test(lp_build_module, desc); + fetch = add_fetch_rgba_test(verbose, desc, lp_float32_vec4_type()); - if (LLVMVerifyFunction(fetch, LLVMPrintMessageAction)) { - LLVMDumpValue(fetch); - abort(); + f = LLVMGetPointerToGlobal(lp_build_engine, fetch); + fetch_ptr = (fetch_ptr_t) pointer_to_func(f); + + if (verbose >= 2) { + lp_disassemble(f); } -#if 0 - pass = LLVMCreatePassManager(); - LLVMAddTargetData(LLVMGetExecutionEngineTargetData(lp_build_engine), pass); - /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, - * but there are more on SVN. */ - LLVMAddConstantPropagationPass(pass); - LLVMAddInstructionCombiningPass(pass); - LLVMAddPromoteMemoryToRegisterPass(pass); - LLVMAddGVNPass(pass); - LLVMAddCFGSimplificationPass(pass); - LLVMRunPassManager(pass, lp_build_module); -#else - (void)pass; -#endif - - fetch_ptr = (fetch_ptr_t)pointer_to_func(LLVMGetPointerToGlobal(lp_build_engine, fetch)); - - for (i = 0; i < desc->block.height; ++i) { - for (j = 0; j < desc->block.width; ++j) { - - memset(unpacked, 0, sizeof unpacked); - - fetch_ptr(unpacked, test->packed, j, i); - - success = TRUE; - for(k = 0; k < 4; ++k) - if (fabs((float)test->unpacked[i][j][k] - unpacked[k]) > FLT_EPSILON) - success = FALSE; - - if (!success) { - printf("FAILED\n"); - printf(" Packed: %02x %02x %02x %02x\n", - test->packed[0], test->packed[1], test->packed[2], test->packed[3]); - printf(" Unpacked (%u,%u): %f %f %f %f obtained\n", - j, i, - unpacked[0], unpacked[1], unpacked[2], unpacked[3]); - printf(" %f %f %f %f expected\n", - test->unpacked[i][j][0], - test->unpacked[i][j][1], - test->unpacked[i][j][2], - test->unpacked[i][j][3]); + for (l = 0; l < util_format_nr_test_cases; ++l) { + const struct util_format_test_case *test = &util_format_test_cases[l]; + + if (test->format == desc->format) { + + if (first) { + printf("Testing %s (float) ...\n", + desc->name); + first = FALSE; + } + + for (i = 0; i < desc->block.height; ++i) { + for (j = 0; j < desc->block.width; ++j) { + boolean match; + + memset(unpacked, 0, sizeof unpacked); + + fetch_ptr(unpacked, test->packed, j, i); + + match = TRUE; + for(k = 0; k < 4; ++k) + if (fabs((float)test->unpacked[i][j][k] - unpacked[k]) > FLT_EPSILON) + match = FALSE; + + if (!match) { + printf("FAILED\n"); + printf(" Packed: %02x %02x %02x %02x\n", + test->packed[0], test->packed[1], test->packed[2], test->packed[3]); + printf(" Unpacked (%u,%u): %f %f %f %f obtained\n", + j, i, + unpacked[0], unpacked[1], unpacked[2], unpacked[3]); + printf(" %f %f %f %f expected\n", + test->unpacked[i][j][0], + test->unpacked[i][j][1], + test->unpacked[i][j][2], + test->unpacked[i][j][3]); + success = FALSE; + } + } } } } - if (!success) - LLVMDumpValue(fetch); + if (!success) { + if (verbose < 1) { + LLVMDumpValue(fetch); + } + } LLVMFreeMachineCodeForFunction(lp_build_engine, fetch); LLVMDeleteFunction(fetch); - if(pass) - LLVMDisposePassManager(pass); - if(fp) write_tsv_row(fp, desc, success); @@ -196,32 +218,104 @@ test_format(unsigned verbose, FILE *fp, } - +PIPE_ALIGN_STACK static boolean -test_one(unsigned verbose, FILE *fp, - const struct util_format_description *format_desc) +test_format_unorm8(unsigned verbose, FILE *fp, + const struct util_format_description *desc) { - unsigned i; + LLVMValueRef fetch = NULL; + fetch_ptr_t fetch_ptr; + uint8_t unpacked[4]; boolean first = TRUE; boolean success = TRUE; + unsigned i, j, k, l; + void *f; - for (i = 0; i < util_format_nr_test_cases; ++i) { - const struct util_format_test_case *test = &util_format_test_cases[i]; + fetch = add_fetch_rgba_test(verbose, desc, lp_unorm8_vec4_type()); - if (test->format == format_desc->format) { + f = LLVMGetPointerToGlobal(lp_build_engine, fetch); + fetch_ptr = (fetch_ptr_t) pointer_to_func(f); + + if (verbose >= 2) { + lp_disassemble(f); + } + + for (l = 0; l < util_format_nr_test_cases; ++l) { + const struct util_format_test_case *test = &util_format_test_cases[l]; + + if (test->format == desc->format) { if (first) { - printf("Testing %s ...\n", - format_desc->name); + printf("Testing %s (unorm8) ...\n", + desc->name); first = FALSE; } - if (!test_format(verbose, fp, format_desc, test)) { - success = FALSE; + for (i = 0; i < desc->block.height; ++i) { + for (j = 0; j < desc->block.width; ++j) { + boolean match; + + memset(unpacked, 0, sizeof unpacked); + + fetch_ptr(unpacked, test->packed, j, i); + + match = TRUE; + for(k = 0; k < 4; ++k) { + int error = float_to_ubyte(test->unpacked[i][j][k]) - unpacked[k]; + if (error < 0) + error = -error; + if (error > 1) + match = FALSE; + } + + if (!match) { + printf("FAILED\n"); + printf(" Packed: %02x %02x %02x %02x\n", + test->packed[0], test->packed[1], test->packed[2], test->packed[3]); + printf(" Unpacked (%u,%u): %02x %02x %02x %02x obtained\n", + j, i, + unpacked[0], unpacked[1], unpacked[2], unpacked[3]); + printf(" %02x %02x %02x %02x expected\n", + float_to_ubyte(test->unpacked[i][j][0]), + float_to_ubyte(test->unpacked[i][j][1]), + float_to_ubyte(test->unpacked[i][j][2]), + float_to_ubyte(test->unpacked[i][j][3])); + success = FALSE; + } + } } } } + if (!success) + LLVMDumpValue(fetch); + + LLVMFreeMachineCodeForFunction(lp_build_engine, fetch); + LLVMDeleteFunction(fetch); + + if(fp) + write_tsv_row(fp, desc, success); + + return success; +} + + + + +static boolean +test_one(unsigned verbose, FILE *fp, + const struct util_format_description *format_desc) +{ + boolean success = TRUE; + + if (!test_format_float(verbose, fp, format_desc)) { + success = FALSE; + } + + if (!test_format_unorm8(verbose, fp, format_desc)) { + success = FALSE; + } + return success; } diff --git a/src/gallium/drivers/llvmpipe/lp_test_round.c b/src/gallium/drivers/llvmpipe/lp_test_round.c new file mode 100644 index 0000000000..f571a81a4a --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_test_round.c @@ -0,0 +1,277 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <stdlib.h> +#include <stdio.h> + +#include "util/u_pointer.h" +#include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_printf.h" +#include "gallivm/lp_bld_arit.h" + +#include <llvm-c/Analysis.h> +#include <llvm-c/ExecutionEngine.h> +#include <llvm-c/Target.h> +#include <llvm-c/Transforms/Scalar.h> + +#include "lp_test.h" + + +void +write_tsv_header(FILE *fp) +{ + fprintf(fp, + "result\t" + "format\n"); + + fflush(fp); +} + + +#ifdef PIPE_ARCH_SSE + +#define USE_SSE2 +#include "sse_mathfun.h" + +typedef __m128 (*test_round_t)(__m128); + +typedef LLVMValueRef (*lp_func_t)(struct lp_build_context *, LLVMValueRef); + + +static LLVMValueRef +add_test(LLVMModuleRef module, const char *name, lp_func_t lp_func) +{ + LLVMTypeRef v4sf = LLVMVectorType(LLVMFloatType(), 4); + LLVMTypeRef args[1] = { v4sf }; + LLVMValueRef func = LLVMAddFunction(module, name, LLVMFunctionType(v4sf, args, 1, 0)); + LLVMValueRef arg1 = LLVMGetParam(func, 0); + LLVMBuilderRef builder = LLVMCreateBuilder(); + LLVMBasicBlockRef block = LLVMAppendBasicBlock(func, "entry"); + LLVMValueRef ret; + struct lp_build_context bld; + + bld.builder = builder; + bld.type.floating = 1; + bld.type.width = 32; + bld.type.length = 4; + + LLVMSetFunctionCallConv(func, LLVMCCallConv); + + LLVMPositionBuilderAtEnd(builder, block); + + ret = lp_func(&bld, arg1); + + LLVMBuildRet(builder, ret); + LLVMDisposeBuilder(builder); + return func; +} + +static void +printv(char* string, v4sf value) +{ + v4sf v = value; + float *f = (float *)&v; + printf("%s: %10f %10f %10f %10f\n", string, + f[0], f[1], f[2], f[3]); +} + +static void +compare(v4sf x, v4sf y) +{ + float *xp = (float *) &x; + float *yp = (float *) &y; + if (xp[0] != yp[0] || + xp[1] != yp[1] || + xp[2] != yp[2] || + xp[3] != yp[3]) { + printf(" Incorrect result! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ \n"); + } +} + + + +PIPE_ALIGN_STACK +static boolean +test_round(unsigned verbose, FILE *fp) +{ + LLVMModuleRef module = NULL; + LLVMValueRef test_round = NULL, test_trunc, test_floor, test_ceil; + LLVMExecutionEngineRef engine = NULL; + LLVMModuleProviderRef provider = NULL; + LLVMPassManagerRef pass = NULL; + char *error = NULL; + test_round_t round_func, trunc_func, floor_func, ceil_func; + float unpacked[4]; + unsigned packed; + boolean success = TRUE; + int i; + + module = LLVMModuleCreateWithName("test"); + + test_round = add_test(module, "round", lp_build_round); + test_trunc = add_test(module, "trunc", lp_build_trunc); + test_floor = add_test(module, "floor", lp_build_floor); + test_ceil = add_test(module, "ceil", lp_build_ceil); + + if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { + printf("LLVMVerifyModule: %s\n", error); + LLVMDumpModule(module); + abort(); + } + LLVMDisposeMessage(error); + + provider = LLVMCreateModuleProviderForExistingModule(module); + if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { + fprintf(stderr, "%s\n", error); + LLVMDisposeMessage(error); + abort(); + } + +#if 0 + pass = LLVMCreatePassManager(); + LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); + /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, + * but there are more on SVN. */ + LLVMAddConstantPropagationPass(pass); + LLVMAddInstructionCombiningPass(pass); + LLVMAddPromoteMemoryToRegisterPass(pass); + LLVMAddGVNPass(pass); + LLVMAddCFGSimplificationPass(pass); + LLVMRunPassManager(pass, module); +#else + (void)pass; +#endif + + round_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_round)); + trunc_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_trunc)); + floor_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_floor)); + ceil_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_ceil)); + + memset(unpacked, 0, sizeof unpacked); + packed = 0; + + if (0) + LLVMDumpModule(module); + + for (i = 0; i < 3; i++) { + v4sf xvals[3] = { + {-10.0, -1, 0, 12.0}, + {-1.5, -0.25, 1.25, 2.5}, + {-0.99, -0.01, 0.01, 0.99} + }; + v4sf x = xvals[i]; + v4sf y, ref; + float *xp = (float *) &x; + float *refp = (float *) &ref; + + printf("\n"); + printv("x ", x); + + refp[0] = round(xp[0]); + refp[1] = round(xp[1]); + refp[2] = round(xp[2]); + refp[3] = round(xp[3]); + y = round_func(x); + printv("C round(x) ", ref); + printv("LLVM round(x)", y); + compare(ref, y); + + refp[0] = trunc(xp[0]); + refp[1] = trunc(xp[1]); + refp[2] = trunc(xp[2]); + refp[3] = trunc(xp[3]); + y = trunc_func(x); + printv("C trunc(x) ", ref); + printv("LLVM trunc(x)", y); + compare(ref, y); + + refp[0] = floor(xp[0]); + refp[1] = floor(xp[1]); + refp[2] = floor(xp[2]); + refp[3] = floor(xp[3]); + y = floor_func(x); + printv("C floor(x) ", ref); + printv("LLVM floor(x)", y); + compare(ref, y); + + refp[0] = ceil(xp[0]); + refp[1] = ceil(xp[1]); + refp[2] = ceil(xp[2]); + refp[3] = ceil(xp[3]); + y = ceil_func(x); + printv("C ceil(x) ", ref); + printv("LLVM ceil(x) ", y); + compare(ref, y); + } + + LLVMFreeMachineCodeForFunction(engine, test_round); + LLVMFreeMachineCodeForFunction(engine, test_trunc); + LLVMFreeMachineCodeForFunction(engine, test_floor); + LLVMFreeMachineCodeForFunction(engine, test_ceil); + + LLVMDisposeExecutionEngine(engine); + if(pass) + LLVMDisposePassManager(pass); + + return success; +} + +#else /* !PIPE_ARCH_SSE */ + +static boolean +test_round(unsigned verbose, FILE *fp) +{ + return TRUE; +} + +#endif /* !PIPE_ARCH_SSE */ + + +boolean +test_all(unsigned verbose, FILE *fp) +{ + boolean success = TRUE; + + test_round(verbose, fp); + + return success; +} + + +boolean +test_some(unsigned verbose, FILE *fp, unsigned long n) +{ + return test_all(verbose, fp); +} + +boolean +test_single(unsigned verbose, FILE *fp) +{ + printf("no test_single()"); + return TRUE; +} diff --git a/src/gallium/drivers/llvmpipe/lp_test_sincos.c b/src/gallium/drivers/llvmpipe/lp_test_sincos.c index c7a903a025..1366ecddcb 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_sincos.c +++ b/src/gallium/drivers/llvmpipe/lp_test_sincos.c @@ -108,7 +108,6 @@ test_sincos(unsigned verbose, FILE *fp) test_sincos_t sin_func; test_sincos_t cos_func; float unpacked[4]; - unsigned packed; boolean success = TRUE; module = LLVMModuleCreateWithName("test"); @@ -149,7 +148,6 @@ test_sincos(unsigned verbose, FILE *fp) cos_func = (test_sincos_t)LLVMGetPointerToGlobal(engine, test_cos); memset(unpacked, 0, sizeof unpacked); - packed = 0; // LLVMDumpModule(module); diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 0d526ead89..25112c10a6 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -36,6 +36,7 @@ #include "pipe/p_defines.h" #include "util/u_inlines.h" +#include "util/u_cpu_detect.h" #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -55,6 +56,7 @@ #ifdef DEBUG static struct llvmpipe_resource resource_list; #endif +static unsigned id_counter = 0; static INLINE boolean @@ -183,8 +185,8 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, */ const unsigned width = align(lpr->base.width0, TILE_SIZE); const unsigned height = align(lpr->base.height0, TILE_SIZE); - const unsigned width_t = align(width, TILE_SIZE) / TILE_SIZE; - const unsigned height_t = align(height, TILE_SIZE) / TILE_SIZE; + const unsigned width_t = width / TILE_SIZE; + const unsigned height_t = height / TILE_SIZE; lpr->tiles_per_row[0] = width_t; lpr->tiles_per_image[0] = width_t * height_t; @@ -209,7 +211,6 @@ static struct pipe_resource * llvmpipe_resource_create(struct pipe_screen *_screen, const struct pipe_resource *templat) { - static unsigned id_counter = 0; struct llvmpipe_screen *screen = llvmpipe_screen(_screen); struct llvmpipe_resource *lpr = CALLOC_STRUCT(llvmpipe_resource); if (!lpr) @@ -389,7 +390,6 @@ llvmpipe_resource_map(struct pipe_resource *resource, map = llvmpipe_get_texture_image(lpr, face + zslice, level, tex_usage, layout); - assert(map); return map; } else { @@ -446,6 +446,10 @@ llvmpipe_resource_from_handle(struct pipe_screen *screen, { struct sw_winsys *winsys = llvmpipe_screen(screen)->winsys; struct llvmpipe_resource *lpr = CALLOC_STRUCT(llvmpipe_resource); + unsigned width, height, width_t, height_t; + + /* XXX Seems like from_handled depth textures doesn't work that well */ + if (!lpr) return NULL; @@ -453,6 +457,25 @@ llvmpipe_resource_from_handle(struct pipe_screen *screen, pipe_reference_init(&lpr->base.reference, 1); lpr->base.screen = screen; + width = align(lpr->base.width0, TILE_SIZE); + height = align(lpr->base.height0, TILE_SIZE); + width_t = width / TILE_SIZE; + height_t = height / TILE_SIZE; + + /* + * Looks like unaligned displaytargets work just fine, + * at least sampler/render ones. + */ +#if 0 + assert(lpr->base.width0 == width); + assert(lpr->base.height0 == height); +#endif + + lpr->tiles_per_row[0] = width_t; + lpr->tiles_per_image[0] = width_t * height_t; + lpr->num_slices_faces[0] = 1; + lpr->img_stride[0] = 0; + lpr->dt = winsys->displaytarget_from_handle(winsys, template, whandle, @@ -460,6 +483,17 @@ llvmpipe_resource_from_handle(struct pipe_screen *screen, if (!lpr->dt) goto fail; + lpr->layout[0] = alloc_layout_array(1, lpr->base.width0, lpr->base.height0); + + assert(lpr->layout[0]); + assert(lpr->layout[0][0] == LP_TEX_LAYOUT_NONE); + + lpr->id = id_counter++; + +#ifdef DEBUG + insert_at_tail(&resource_list, lpr); +#endif + return &lpr->base; fail: @@ -899,13 +933,15 @@ static void alloc_image_data(struct llvmpipe_resource *lpr, unsigned level, enum lp_texture_layout layout) { + uint alignment = MAX2(16, util_cpu_caps.cacheline); + if (lpr->dt) assert(level == 0); if (layout == LP_TEX_LAYOUT_TILED) { /* tiled data is stored in regular memory */ uint buffer_size = tex_image_size(lpr, level, layout); - lpr->tiled[level].data = align_malloc(buffer_size, 16); + lpr->tiled[level].data = align_malloc(buffer_size, alignment); } else { assert(layout == LP_TEX_LAYOUT_LINEAR); @@ -921,7 +957,7 @@ alloc_image_data(struct llvmpipe_resource *lpr, unsigned level, else { /* not a display target - allocate regular memory */ uint buffer_size = tex_image_size(lpr, level, LP_TEX_LAYOUT_LINEAR); - lpr->linear[level].data = align_malloc(buffer_size, 16); + lpr->linear[level].data = align_malloc(buffer_size, alignment); } } } @@ -1035,7 +1071,7 @@ llvmpipe_get_texture_image(struct llvmpipe_resource *lpr, layout_logic(cur_layout, layout, usage, &new_layout, &convert); - if (convert) { + if (convert && other_data && target_data) { if (layout == LP_TEX_LAYOUT_TILED) { lp_linear_to_tiled(other_data, target_data, x * TILE_SIZE, y * TILE_SIZE, @@ -1067,8 +1103,6 @@ llvmpipe_get_texture_image(struct llvmpipe_resource *lpr, width_t, height_t, layout); } - assert(target_data); - return target_data; } @@ -1138,7 +1172,7 @@ llvmpipe_get_texture_tile_linear(struct llvmpipe_resource *lpr, layout_logic(cur_layout, LP_TEX_LAYOUT_LINEAR, usage, &new_layout, &convert); - if (convert) { + if (convert && tiled_image && linear_image) { lp_tiled_to_linear(tiled_image, linear_image, x, y, TILE_SIZE, TILE_SIZE, lpr->base.format, lpr->row_stride[level], @@ -1187,13 +1221,16 @@ llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr, cur_layout = llvmpipe_get_texture_tile_layout(lpr, face_slice, level, tx, ty); layout_logic(cur_layout, LP_TEX_LAYOUT_TILED, usage, &new_layout, &convert); - if (convert) { + if (convert && linear_image && tiled_image) { lp_linear_to_tiled(linear_image, tiled_image, x, y, TILE_SIZE, TILE_SIZE, lpr->base.format, lpr->row_stride[level], lpr->tiles_per_row[level]); } + if (!tiled_image) + return NULL; + if (new_layout != cur_layout) llvmpipe_set_texture_tile_layout(lpr, face_slice, level, tx, ty, new_layout); @@ -1206,6 +1243,94 @@ llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr, /** + * Get pointer to tiled data for rendering. + * \return pointer to the tiled data at the given tile position + */ +void +llvmpipe_unswizzle_cbuf_tile(struct llvmpipe_resource *lpr, + unsigned face_slice, unsigned level, + unsigned x, unsigned y, + uint8_t *tile) +{ + struct llvmpipe_texture_image *linear_img = &lpr->linear[level]; + const unsigned tx = x / TILE_SIZE, ty = y / TILE_SIZE; + uint8_t *linear_image; + + assert(x % TILE_SIZE == 0); + assert(y % TILE_SIZE == 0); + + if (!linear_img->data) { + /* allocate memory for the linear image now */ + alloc_image_data(lpr, level, LP_TEX_LAYOUT_LINEAR); + } + + /* compute address of the slice/face of the image that contains the tile */ + linear_image = llvmpipe_get_texture_image_address(lpr, face_slice, level, + LP_TEX_LAYOUT_LINEAR); + + { + uint ii = x, jj = y; + uint tile_offset = jj / TILE_SIZE + ii / TILE_SIZE; + uint byte_offset = tile_offset * TILE_SIZE * TILE_SIZE * 4; + + /* Note that lp_tiled_to_linear expects the tile parameter to + * point at the first tile in a whole-image sized array. In + * this code, we have only a single tile and have to do some + * pointer arithmetic to figure out where the "image" would have + * started. + */ + lp_tiled_to_linear(tile - byte_offset, linear_image, + x, y, TILE_SIZE, TILE_SIZE, + lpr->base.format, + lpr->row_stride[level], + 1); /* tiles per row */ + } + + llvmpipe_set_texture_tile_layout(lpr, face_slice, level, tx, ty, + LP_TEX_LAYOUT_LINEAR); +} + + +/** + * Get pointer to tiled data for rendering. + * \return pointer to the tiled data at the given tile position + */ +void +llvmpipe_swizzle_cbuf_tile(struct llvmpipe_resource *lpr, + unsigned face_slice, unsigned level, + unsigned x, unsigned y, + uint8_t *tile) +{ + uint8_t *linear_image; + + assert(x % TILE_SIZE == 0); + assert(y % TILE_SIZE == 0); + + /* compute address of the slice/face of the image that contains the tile */ + linear_image = llvmpipe_get_texture_image_address(lpr, face_slice, level, + LP_TEX_LAYOUT_LINEAR); + + if (linear_image) { + uint ii = x, jj = y; + uint tile_offset = jj / TILE_SIZE + ii / TILE_SIZE; + uint byte_offset = tile_offset * TILE_SIZE * TILE_SIZE * 4; + + /* Note that lp_linear_to_tiled expects the tile parameter to + * point at the first tile in a whole-image sized array. In + * this code, we have only a single tile and have to do some + * pointer arithmetic to figure out where the "image" would have + * started. + */ + lp_linear_to_tiled(linear_image, tile - byte_offset, + x, y, TILE_SIZE, TILE_SIZE, + lpr->base.format, + lpr->row_stride[level], + 1); /* tiles per row */ + } +} + + +/** * Return size of resource in bytes */ unsigned diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h index 503b6a19a8..4e4a65dcb4 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.h +++ b/src/gallium/drivers/llvmpipe/lp_texture.h @@ -223,6 +223,17 @@ llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr, unsigned x, unsigned y); +void +llvmpipe_unswizzle_cbuf_tile(struct llvmpipe_resource *lpr, + unsigned face_slice, unsigned level, + unsigned x, unsigned y, + uint8_t *tile); + +void +llvmpipe_swizzle_cbuf_tile(struct llvmpipe_resource *lpr, + unsigned face_slice, unsigned level, + unsigned x, unsigned y, + uint8_t *tile); extern void llvmpipe_print_resources(void); diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.c b/src/gallium/drivers/llvmpipe/lp_tile_image.c index 2b63992dd7..0938f7aea7 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_image.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_image.c @@ -204,7 +204,7 @@ lp_tiled_to_linear(const void *src, void *dst, lp_tile_unswizzle_4ub(format, src_tile, dst, dst_stride, - ii, jj, tile_w, tile_h); + ii, jj); } } } @@ -293,7 +293,7 @@ lp_linear_to_tiled(const void *src, void *dst, lp_tile_swizzle_4ub(format, dst_tile, src, src_stride, - ii, jj, tile_w, tile_h); + ii, jj); } } } diff --git a/src/gallium/drivers/llvmpipe/lp_tile_shuffle_mask.py b/src/gallium/drivers/llvmpipe/lp_tile_shuffle_mask.py new file mode 100644 index 0000000000..ea2fc0f375 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_tile_shuffle_mask.py @@ -0,0 +1,32 @@ + +tile = [[0,1,4,5], + [2,3,6,7], + [8,9,12,13], + [10,11,14,15]] +shift = 0 +align = 1 +value = 0L +holder = [] + +import sys + +basemask = [0x +fd = sys.stdout +indent = " "*9 +for c in range(4): + fd.write(indent + "*pdst++ = \n"); + for l,line in enumerate(tile): + fd.write(indent + " %s_mm_shuffle_epi8(line%d, (__m128i){"%(l and '+' or ' ',l)) + for i,pos in enumerate(line): + mask = 0x00ffffffff & (~(0xffL << shift)) + value = mask | ((pos) << shift) + holder.append(value) + if holder and (i + 1) %2 == 0: + fd.write("0x%8.0x"%(holder[0] + (holder[1] << 32))) + holder = [] + if (i) %4 == 1: + fd.write( ',') + + fd.write("})%s\n"%((l == 3) and ';' or '')) + print + shift += 8 diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h index 07f71b8411..12dac1da6c 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h @@ -79,14 +79,14 @@ void lp_tile_swizzle_4ub(enum pipe_format format, uint8_t *dst, const void *src, unsigned src_stride, - unsigned x, unsigned y, unsigned w, unsigned h); + unsigned x, unsigned y); void lp_tile_unswizzle_4ub(enum pipe_format format, const uint8_t *src, void *dst, unsigned dst_stride, - unsigned x, unsigned y, unsigned w, unsigned h); + unsigned x, unsigned y); diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index 5ab63cbac6..c71ec8066c 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -75,13 +75,13 @@ def generate_format_read(format, dst_channel, dst_native_type, dst_suffix): src_native_type = native_type(format) print 'static void' - print 'lp_tile_%s_swizzle_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, dst_suffix, dst_native_type) + print 'lp_tile_%s_swizzle_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)' % (name, dst_suffix, dst_native_type) print '{' print ' unsigned x, y;' print ' const uint8_t *src_row = src + y0*src_stride;' - print ' for (y = 0; y < h; ++y) {' + print ' for (y = 0; y < TILE_SIZE; ++y) {' print ' const %s *src_pixel = (const %s *)(src_row + x0*%u);' % (src_native_type, src_native_type, format.stride()) - print ' for (x = 0; x < w; ++x) {' + print ' for (x = 0; x < TILE_SIZE; ++x) {' names = ['']*4 if format.colorspace in ('rgb', 'srgb'): @@ -202,9 +202,9 @@ def emit_unrolled_unswizzle_code(format, src_channel): print ' %s *dstpix = (%s *) dst;' % (dst_native_type, dst_native_type) print ' unsigned int qx, qy, i;' print - print ' for (qy = 0; qy < h; qy += TILE_VECTOR_HEIGHT) {' + print ' for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {' print ' const unsigned py = y0 + qy;' - print ' for (qx = 0; qx < w; qx += TILE_VECTOR_WIDTH) {' + print ' for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {' print ' const unsigned px = x0 + qx;' print ' const uint8_t *r = src + 0 * TILE_C_STRIDE;' print ' const uint8_t *g = src + 1 * TILE_C_STRIDE;' @@ -231,9 +231,9 @@ def emit_tile_pixel_unswizzle_code(format, src_channel): print ' unsigned x, y;' print ' uint8_t *dst_row = dst + y0*dst_stride;' - print ' for (y = 0; y < h; ++y) {' + print ' for (y = 0; y < TILE_SIZE; ++y) {' print ' %s *dst_pixel = (%s *)(dst_row + x0*%u);' % (dst_native_type, dst_native_type, format.stride()) - print ' for (x = 0; x < w; ++x) {' + print ' for (x = 0; x < TILE_SIZE; ++x) {' if format.layout == PLAIN: if not format.is_array(): @@ -273,7 +273,7 @@ def generate_format_write(format, src_channel, src_native_type, src_suffix): name = format.short_name() print 'static void' - print 'lp_tile_%s_unswizzle_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type) + print 'lp_tile_%s_unswizzle_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)' % (name, src_suffix, src_native_type) print '{' if format.layout == PLAIN \ and format.colorspace == 'rgb' \ @@ -289,6 +289,202 @@ def generate_format_write(format, src_channel, src_native_type, src_suffix): print +def generate_ssse3(): + print ''' +#if defined(PIPE_ARCH_SSE) + + +#if defined(PIPE_ARCH_SSSE3) + +#include <tmmintrin.h> + +#else + +#include <emmintrin.h> + +/** + * Describe _mm_shuffle_epi8() with gcc extended inline assembly, for cases + * where -mssse3 is not supported/enabled. + * + * MSVC will never get in here as its intrinsics support do not rely on + * compiler command line options. + */ +static __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shuffle_epi8(__m128i a, __m128i mask) +{ + __m128i result; + __asm__("pshufb %1, %0" + : "=x" (result) + : "xm" (mask), "0" (a)); + return result; +} + +#endif + + +static void +lp_tile_b8g8r8a8_unorm_swizzle_4ub_ssse3(uint8_t *dst, + const uint8_t *src, unsigned src_stride, + unsigned x0, unsigned y0) +{ + + unsigned x, y; + __m128i *pdst = (__m128i*) dst; + const uint8_t *ysrc0 = src + y0*src_stride + x0*sizeof(uint32_t); + unsigned int tile_stridex = src_stride*(TILE_VECTOR_HEIGHT - 1) - sizeof(uint32_t)*TILE_VECTOR_WIDTH; + unsigned int tile_stridey = src_stride*TILE_VECTOR_HEIGHT; + + const __m128i shuffle00 = _mm_setr_epi8(0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); + const __m128i shuffle01 = _mm_setr_epi8(0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); + const __m128i shuffle02 = _mm_setr_epi8(0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); + const __m128i shuffle03 = _mm_setr_epi8(0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); + + const __m128i shuffle10 = _mm_setr_epi8(0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); + const __m128i shuffle11 = _mm_setr_epi8(0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); + const __m128i shuffle12 = _mm_setr_epi8(0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); + const __m128i shuffle13 = _mm_setr_epi8(0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); + + const __m128i shuffle20 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff); + const __m128i shuffle21 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff); + const __m128i shuffle22 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff); + const __m128i shuffle23 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff); + + const __m128i shuffle30 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e); + const __m128i shuffle31 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d); + const __m128i shuffle32 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c); + const __m128i shuffle33 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f); + + for (y = 0; y < TILE_SIZE; y += TILE_VECTOR_HEIGHT) { + __m128i line0 = *(__m128i*)ysrc0; + const uint8_t *ysrc = ysrc0 + src_stride; + ysrc0 += tile_stridey; + + for (x = 0; x < TILE_SIZE; x += TILE_VECTOR_WIDTH) { + __m128i r, g, b, a, line1; + line1 = *(__m128i*)ysrc; + PIPE_READ_WRITE_BARRIER(); + ysrc += src_stride; + r = _mm_shuffle_epi8(line0, shuffle00); + g = _mm_shuffle_epi8(line0, shuffle01); + b = _mm_shuffle_epi8(line0, shuffle02); + a = _mm_shuffle_epi8(line0, shuffle03); + + line0 = *(__m128i*)ysrc; + PIPE_READ_WRITE_BARRIER(); + ysrc += src_stride; + r = _mm_or_si128(r, _mm_shuffle_epi8(line1, shuffle10)); + g = _mm_or_si128(g, _mm_shuffle_epi8(line1, shuffle11)); + b = _mm_or_si128(b, _mm_shuffle_epi8(line1, shuffle12)); + a = _mm_or_si128(a, _mm_shuffle_epi8(line1, shuffle13)); + + line1 = *(__m128i*)ysrc; + PIPE_READ_WRITE_BARRIER(); + ysrc -= tile_stridex; + r = _mm_or_si128(r, _mm_shuffle_epi8(line0, shuffle20)); + g = _mm_or_si128(g, _mm_shuffle_epi8(line0, shuffle21)); + b = _mm_or_si128(b, _mm_shuffle_epi8(line0, shuffle22)); + a = _mm_or_si128(a, _mm_shuffle_epi8(line0, shuffle23)); + + if (x + 1 < TILE_SIZE) { + line0 = *(__m128i*)ysrc; + ysrc += src_stride; + } + + PIPE_READ_WRITE_BARRIER(); + r = _mm_or_si128(r, _mm_shuffle_epi8(line1, shuffle30)); + g = _mm_or_si128(g, _mm_shuffle_epi8(line1, shuffle31)); + b = _mm_or_si128(b, _mm_shuffle_epi8(line1, shuffle32)); + a = _mm_or_si128(a, _mm_shuffle_epi8(line1, shuffle33)); + + *pdst++ = r; + *pdst++ = g; + *pdst++ = b; + *pdst++ = a; + } + } + +} + +static void +lp_tile_b8g8r8a8_unorm_unswizzle_4ub_ssse3(const uint8_t *src, + uint8_t *dst, unsigned dst_stride, + unsigned x0, unsigned y0) +{ + unsigned int x, y; + const __m128i *psrc = (__m128i*) src; + const __m128i *end = (__m128i*) (src + (y0 + TILE_SIZE - 1)*dst_stride + (x0 + TILE_SIZE - 1)*sizeof(uint32_t)); + uint8_t *pdst = dst + y0 * dst_stride + x0 * sizeof(uint32_t); + __m128i c0 = *psrc++; + __m128i c1; + + const __m128i shuffle00 = _mm_setr_epi8(0xff,0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff); + const __m128i shuffle01 = _mm_setr_epi8(0xff,0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff); + const __m128i shuffle02 = _mm_setr_epi8(0xff,0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff); + const __m128i shuffle03 = _mm_setr_epi8(0xff,0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff); + + const __m128i shuffle10 = _mm_setr_epi8(0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff,0xff); + const __m128i shuffle11 = _mm_setr_epi8(0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff,0xff); + const __m128i shuffle12 = _mm_setr_epi8(0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff,0xff); + const __m128i shuffle13 = _mm_setr_epi8(0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff,0xff); + + const __m128i shuffle20 = _mm_setr_epi8(0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff,0xff,0xff); + const __m128i shuffle21 = _mm_setr_epi8(0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff,0xff,0xff); + const __m128i shuffle22 = _mm_setr_epi8(0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff,0xff,0xff); + const __m128i shuffle23 = _mm_setr_epi8(0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff,0xff,0xff); + + const __m128i shuffle30 = _mm_setr_epi8(0xff,0xff,0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05); + const __m128i shuffle31 = _mm_setr_epi8(0xff,0xff,0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07); + const __m128i shuffle32 = _mm_setr_epi8(0xff,0xff,0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d); + const __m128i shuffle33 = _mm_setr_epi8(0xff,0xff,0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f); + + for (y = 0; y < TILE_SIZE; y += TILE_VECTOR_HEIGHT) { + __m128i *tile = (__m128i*) pdst; + pdst += dst_stride * TILE_VECTOR_HEIGHT; + for (x = 0; x < TILE_SIZE; x += TILE_VECTOR_WIDTH) { + uint8_t *linep = (uint8_t*) (tile++); + __m128i line0, line1, line2, line3; + + c1 = *psrc++; /* r */ + PIPE_READ_WRITE_BARRIER(); + line0 = _mm_shuffle_epi8(c0, shuffle00); + line1 = _mm_shuffle_epi8(c0, shuffle01); + line2 = _mm_shuffle_epi8(c0, shuffle02); + line3 = _mm_shuffle_epi8(c0, shuffle03); + + c0 = *psrc++; /* g */ + PIPE_READ_WRITE_BARRIER(); + line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c1, shuffle10)); + line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c1, shuffle11)); + line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c1, shuffle12)); + line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c1, shuffle13)); + + c1 = *psrc++; /* b */ + PIPE_READ_WRITE_BARRIER(); + line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c0, shuffle20)); + line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c0, shuffle21)); + line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c0, shuffle22)); + line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c0, shuffle23)); + + if (psrc != end) + c0 = *psrc++; /* a */ + PIPE_READ_WRITE_BARRIER(); + line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c1, shuffle30)); + line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c1, shuffle31)); + line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c1, shuffle32)); + line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c1, shuffle33)); + + *(__m128i*) (linep) = line0; + *(__m128i*) (((char*)linep) + dst_stride) = line1; + *(__m128i*) (((char*)linep) + 2 * dst_stride) = line2; + *(__m128i*) (((char*)linep) + 3 * dst_stride) = line3; + } + } +} + +#endif /* PIPE_ARCH_SSSE3 */ +''' + + def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix): '''Generate the dispatch function to read pixels from any format''' @@ -297,9 +493,9 @@ def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix): generate_format_read(format, dst_channel, dst_native_type, dst_suffix) print 'void' - print 'lp_tile_swizzle_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (dst_suffix, dst_native_type) + print 'lp_tile_swizzle_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y)' % (dst_suffix, dst_native_type) print '{' - print ' void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % dst_native_type + print ' void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0);' % dst_native_type print '#ifdef DEBUG' print ' lp_tile_swizzle_count += 1;' print '#endif' @@ -307,13 +503,21 @@ def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix): for format in formats: if is_format_supported(format): print ' case %s:' % format.name - print ' func = &lp_tile_%s_swizzle_%s;' % (format.short_name(), dst_suffix) + func_name = 'lp_tile_%s_swizzle_%s' % (format.short_name(), dst_suffix) + if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM': + print '#ifdef PIPE_ARCH_SSE' + print ' func = util_cpu_caps.has_ssse3 ? %s_ssse3 : %s;' % (func_name, func_name) + print '#else' + print ' func = %s;' % (func_name,) + print '#endif' + else: + print ' func = %s;' % (func_name,) print ' break;' print ' default:' print ' debug_printf("%s: unsupported format %s\\n", __FUNCTION__, util_format_name(format));' print ' return;' print ' }' - print ' func(dst, (const uint8_t *)src, src_stride, x, y, w, h);' + print ' func(dst, (const uint8_t *)src, src_stride, x, y);' print '}' print @@ -326,10 +530,10 @@ def generate_unswizzle(formats, src_channel, src_native_type, src_suffix): generate_format_write(format, src_channel, src_native_type, src_suffix) print 'void' - print 'lp_tile_unswizzle_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (src_suffix, src_native_type) + print 'lp_tile_unswizzle_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y)' % (src_suffix, src_native_type) print '{' - print ' void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % src_native_type + print ' void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0);' % src_native_type print '#ifdef DEBUG' print ' lp_tile_unswizzle_count += 1;' print '#endif' @@ -337,13 +541,21 @@ def generate_unswizzle(formats, src_channel, src_native_type, src_suffix): for format in formats: if is_format_supported(format): print ' case %s:' % format.name - print ' func = &lp_tile_%s_unswizzle_%s;' % (format.short_name(), src_suffix) + func_name = 'lp_tile_%s_unswizzle_%s' % (format.short_name(), src_suffix) + if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM': + print '#ifdef PIPE_ARCH_SSE' + print ' func = util_cpu_caps.has_ssse3 ? %s_ssse3 : %s;' % (func_name, func_name) + print '#else' + print ' func = %s;' % (func_name,) + print '#endif' + else: + print ' func = %s;' % (func_name,) print ' break;' print ' default:' print ' debug_printf("%s: unsupported format %s\\n", __FUNCTION__, util_format_name(format));' print ' return;' print ' }' - print ' func(src, (uint8_t *)dst, dst_stride, x, y, w, h);' + print ' func(src, (uint8_t *)dst, dst_stride, x, y);' print '}' print @@ -362,6 +574,7 @@ def main(): print '#include "util/u_format.h"' print '#include "util/u_math.h"' print '#include "util/u_half.h"' + print '#include "util/u_cpu_detect.h"' print '#include "lp_tile_soa.h"' print print '#ifdef DEBUG' @@ -391,6 +604,8 @@ def main(): print '};' print + generate_ssse3() + channel = Channel(UNSIGNED, True, 8) native_type = 'uint8_t' suffix = '4ub' diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index 60bdd7276a..513e5e02bc 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -6,6 +6,7 @@ #include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_format_s3tc.h" +#include "util/u_string.h" #include <stdio.h> #include <errno.h> @@ -15,7 +16,7 @@ #include "nouveau_screen.h" /* XXX this should go away */ -#include "state_tracker/drm_api.h" +#include "state_tracker/drm_driver.h" #include "util/u_simple_screen.h" static const char * @@ -24,7 +25,7 @@ nouveau_screen_get_name(struct pipe_screen *pscreen) struct nouveau_device *dev = nouveau_screen(pscreen)->device; static char buffer[128]; - snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); + util_snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); return buffer; } @@ -181,7 +182,7 @@ nouveau_screen_bo_from_handle(struct pipe_screen *pscreen, ret = nouveau_bo_handle_ref(dev, whandle->handle, &bo); if (ret) { debug_printf("%s: ref name 0x%08x failed with %d\n", - __func__, whandle->handle, ret); + __FUNCTION__, whandle->handle, ret); return NULL; } diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h index 8eacdff035..8c290273fb 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.h +++ b/src/gallium/drivers/nouveau/nouveau_screen.h @@ -14,7 +14,7 @@ struct nouveau_screen { unsigned index_buffer_flags; }; -static inline struct nouveau_screen * +static INLINE struct nouveau_screen * nouveau_screen(struct pipe_screen *pscreen) { return (struct nouveau_screen *)pscreen; @@ -67,13 +67,13 @@ void nouveau_screen_fini(struct nouveau_screen *); -static __inline__ unsigned +static INLINE unsigned RING_3D(unsigned mthd, unsigned size) { return (7 << 13) | (size << 18) | mthd; } -static __inline__ unsigned +static INLINE unsigned RING_3D_NI(unsigned mthd, unsigned size) { return 0x40000000 | (7 << 13) | (size << 18) | mthd; diff --git a/src/gallium/drivers/nouveau/nouveau_util.h b/src/gallium/drivers/nouveau/nouveau_util.h index ed6e643785..a5e8537533 100644 --- a/src/gallium/drivers/nouveau/nouveau_util.h +++ b/src/gallium/drivers/nouveau/nouveau_util.h @@ -103,7 +103,7 @@ struct u_split_prim { uint edgeflag_off:1; }; -static inline void +static INLINE void u_split_prim_init(struct u_split_prim *s, unsigned mode, unsigned start, unsigned count) { diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index cd7da9977d..df79ca89ca 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -13,7 +13,7 @@ #include "nouveau/nouveau_resource.h" #include "nouveau/nouveau_pushbuf.h" -static inline uint32_t +static INLINE uint32_t nouveau_screen_transfer_flags(unsigned pipe) { uint32_t flags = 0; diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 61807dd999..12c4a93a9b 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -21,7 +21,7 @@ #include "nv50_program.h" #define NOUVEAU_ERR(fmt, args...) \ - fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); + fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args); #define NOUVEAU_MSG(fmt, args...) \ fprintf(stderr, "nouveau: "fmt, ##args); @@ -50,6 +50,7 @@ #define NV50_NEW_SAMPLER (1 << 15) #define NV50_NEW_TEXTURE (1 << 16) #define NV50_NEW_STENCIL_REF (1 << 17) +#define NV50_NEW_CLIP (1 << 18) struct nv50_blend_stateobj { struct pipe_blend_state pipe; @@ -140,6 +141,7 @@ struct nv50_context { struct pipe_scissor_state scissor; struct pipe_viewport_state viewport; struct pipe_framebuffer_state framebuffer; + struct pipe_clip_state clip; struct nv50_program *vertprog; struct nv50_program *fragprog; struct nv50_program *geomprog; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 21908bcd3c..ca4b01b12b 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -186,6 +186,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_MAX_VS_TEMPS: case PIPE_CAP_MAX_FS_TEMPS: /* no spilling atm */ return 128 / 4; + case PIPE_CAP_DEPTH_CLAMP: + return 1; default: NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); return 0; @@ -525,6 +527,9 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) OUT_RINGf (chan, 0.0f); OUT_RINGf (chan, 1.0f); + BEGIN_RING(chan, screen->tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1); + OUT_RING (chan, 1); + /* no dynamic combination of TIC & TSC entries => only BIND_TIC used */ BEGIN_RING(chan, screen->tesla, NV50TCL_LINKED_TSC, 1); OUT_RING (chan, 1); diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index f8bff764f2..42c5a58318 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -658,6 +658,10 @@ static void nv50_set_clip_state(struct pipe_context *pipe, const struct pipe_clip_state *clip) { + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->clip.depth_clamp = clip->depth_clamp; + nv50->dirty |= NV50_NEW_CLIP; } static void diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 14c3490599..524696f35d 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -277,7 +277,7 @@ static struct nouveau_stateobj * validate_viewport(struct nv50_context *nv50) { struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_stateobj *so = so_new(5, 9, 0); + struct nouveau_stateobj *so = so_new(3, 7, 0); so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3); so_data (so, fui(nv50->viewport.translate[0])); @@ -288,15 +288,6 @@ validate_viewport(struct nv50_context *nv50) so_data (so, fui(nv50->viewport.scale[1])); so_data (so, fui(nv50->viewport.scale[2])); - so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1); - so_data (so, 1); - /* 0x0000 = remove whole primitive only (xyz) - * 0x1018 = remove whole primitive only (xy), clamp z - * 0x1080 = clip primitive (xyz) - * 0x1098 = clip primitive (xy), clamp z - */ - so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1); - so_data (so, 0x1080); /* no idea what 0f90 does */ so_method(so, tesla, 0x0f90, 1); so_data (so, 0); @@ -341,6 +332,26 @@ validate_vtxattr(struct nv50_context *nv50) return so; } +static struct nouveau_stateobj * +validate_clip(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *so = so_new(1, 1, 0); + uint32_t vvcc; + + /* 0x0000 = remove whole primitive only (xyz) + * 0x1018 = remove whole primitive only (xy), clamp z + * 0x1080 = clip primitive (xyz) + * 0x1098 = clip primitive (xy), clamp z + */ + vvcc = nv50->clip.depth_clamp ? 0x1098 : 0x1080; + + so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1); + so_data (so, vvcc); + + return so; +} + struct state_validate { struct nouveau_stateobj *(*func)(struct nv50_context *nv50); unsigned states; @@ -365,6 +376,7 @@ struct state_validate { { nv50_vbo_validate , NV50_NEW_ARRAYS }, { validate_vtxbuf , NV50_NEW_ARRAYS }, { validate_vtxattr , NV50_NEW_ARRAYS }, + { validate_clip , NV50_NEW_CLIP }, {} }; #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c index 6772d9bd51..ee41f03b9b 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragprog.c +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c @@ -842,7 +842,6 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) struct nouveau_channel* chan = nvfx->screen->base.channel; struct nvfx_fragment_program *fp = nvfx->fragprog; int update = 0; - int i; if (!fp->translated) { @@ -895,6 +894,7 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) { struct nvfx_fragment_program_bo* fpbo = os_malloc_aligned(sizeof(struct nvfx_fragment_program) + fp->prog_size * fp->progs_per_bo, 16); char *map, *buf; + int i; if(fp->fpbo) { @@ -910,7 +910,7 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) map = fpbo->bo->map; buf = fpbo->insn; - for(int i = 0; i < fp->progs_per_bo; ++i) + for(i = 0; i < fp->progs_per_bo; ++i) { memcpy(buf, fp->insn, fp->insn_len * 4); nvfx_fp_memcpy(map, fp->insn, fp->insn_len * 4); @@ -931,6 +931,7 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) uint32_t* map = pipe_buffer_map(&nvfx->pipe, constbuf, PIPE_TRANSFER_READ, &transfer); uint32_t* fpmap = (uint32_t*)((char*)fp->fpbo->bo->map + offset); uint32_t* buf = (uint32_t*)((char*)fp->fpbo->insn + offset); + int i; for (i = 0; i < fp->nr_consts; ++i) { unsigned off = fp->consts[i].offset; unsigned idx = fp->consts[i].index * 4; diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index a78d2411a0..80db28a07c 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -56,6 +56,8 @@ nvfx_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 0; case PIPE_CAP_TEXTURE_SHADOW_MAP: return 1; + case PIPE_CAP_TEXTURE_SWIZZLE: + return 1; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: return 13; case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: @@ -127,6 +129,8 @@ nvfx_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 2; case PIPE_CAP_MAX_VS_PREDS: return screen->is_nv4x ? 1 : 0; + case PIPE_CAP_GEOMETRY_SHADER4: + return 0; default: NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); return 0; diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index dd897f6072..728bc40a5b 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -21,10 +21,10 @@ C_SOURCES = \ r300_screen_buffer.c \ r300_state.c \ r300_state_derived.c \ - r300_state_invariant.c \ r300_vs.c \ r300_vs_draw.c \ r300_texture.c \ + r300_texture_desc.c \ r300_tgsi_to_rc.c \ r300_transfer.c diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index ee19e9d278..bf023daaa5 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -31,10 +31,10 @@ r300 = env.ConvenienceLibrary( 'r300_screen_buffer.c', 'r300_state.c', 'r300_state_derived.c', - 'r300_state_invariant.c', 'r300_vs.c', 'r300_vs_draw.c', 'r300_texture.c', + 'r300_texture_desc.c', 'r300_tgsi_to_rc.c', 'r300_transfer.c', ] + r300compiler) + r300compiler diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 2a47701291..d125196b6d 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -24,12 +24,13 @@ #include "r300_texture.h" #include "util/u_format.h" +#include "util/u_pack_color.h" -enum r300_blitter_op +enum r300_blitter_op /* bitmask */ { - R300_CLEAR, - R300_CLEAR_SURFACE, - R300_COPY + R300_CLEAR = 1, + R300_CLEAR_SURFACE = 2, + R300_COPY = 4 }; static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op op) @@ -79,6 +80,31 @@ static void r300_blitter_end(struct r300_context *r300) } } +static uint32_t r300_depth_clear_cb_value(enum pipe_format format, + const float* rgba) +{ + union util_color uc; + util_pack_color(rgba, format, &uc); + + if (util_format_get_blocksizebits(format) == 32) + return uc.ui; + else + return uc.us | (uc.us << 16); +} + +static boolean r300_cbzb_clear_allowed(struct r300_context *r300, + unsigned clear_buffers) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + + /* Only color clear allowed, and only one colorbuffer. */ + if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1) + return FALSE; + + return r300_surface(fb->cbufs[0])->cbzb_allowed; +} + /* Clear currently bound buffers. */ static void r300_clear(struct pipe_context* pipe, unsigned buffers, @@ -86,39 +112,81 @@ static void r300_clear(struct pipe_context* pipe, double depth, unsigned stencil) { - /* XXX Implement fastfill. + /* My notes about fastfill: + * + * 1) Only the zbuffer is cleared. + * + * 2) The zbuffer must be micro-tiled and whole microtiles must be + * written. If microtiling is disabled, it locks up. * - * If fastfill is enabled, a few facts should be considered: + * 3) There is Z Mask RAM which contains a compressed zbuffer and + * it interacts with fastfill. We should figure out how to use it + * to get more performance. + * This is what we know about the Z Mask: * - * 1) Zbuffer must be micro-tiled and whole microtiles must be - * written. + * Each dword of the Z Mask contains compression information + * for 16 4x4 pixel blocks, that is 2 bits for each block. + * On chips with 2 Z pipes, every other dword maps to a different + * pipe. * - * 2) ZB_DEPTHCLEARVALUE is used to clear a zbuffer and Z Mask must be - * equal to 0. + * 4) ZB_DEPTHCLEARVALUE is used to clear the zbuffer and the Z Mask must + * be equal to 0. (clear the Z Mask RAM with zeros) * - * 3) For 16-bit integer buffering, compression causes a hung with one or + * 5) For 16-bit zbuffer, compression causes a hung with one or * two samples and should not be used. * - * 4) Fastfill must not be used if reading of compressed Z data is disabled + * 6) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears + * to avoid needless decompression. + * + * 7) Fastfill must not be used if reading of compressed Z data is disabled * and writing of compressed Z data is enabled (RD/WR_COMP_ENABLE), * i.e. it cannot be used to compress the zbuffer. - * (what the hell does that mean and how does it fit in clearing - * the buffers?) + * + * 8) ZB_CB_CLEAR does not interact with fastfill in any way. * * - Marek */ struct r300_context* r300 = r300_context(pipe); - struct pipe_framebuffer_state* fb = + struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_hyperz_state *hyperz = + (struct r300_hyperz_state*)r300->hyperz_state.state; + uint32_t width = fb->width; + uint32_t height = fb->height; + + /* Enable CBZB clear. */ + if (r300_cbzb_clear_allowed(r300, buffers)) { + struct r300_surface *surf = r300_surface(fb->cbufs[0]); + + hyperz->zb_depthclearvalue = + r300_depth_clear_cb_value(surf->base.format, rgba); + + width = surf->cbzb_width; + height = surf->cbzb_height; + + r300->cbzb_clear = TRUE; + r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); + } + /* Clear. */ r300_blitter_begin(r300, R300_CLEAR); util_blitter_clear(r300->blitter, - fb->width, - fb->height, + width, + height, fb->nr_cbufs, buffers, rgba, depth, stencil); r300_blitter_end(r300); + + /* Disable CBZB clear. */ + if (r300->cbzb_clear) { + r300->cbzb_clear = FALSE; + r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); + } + + /* XXX this flush "fixes" a hardlock in the cubestorm xscreensaver */ + if (r300->flush_counter == 0) + pipe->flush(pipe, 0, NULL); } /* Clear a region of a color surface to a constant value. */ @@ -185,14 +253,6 @@ static void r300_resource_copy_region(struct pipe_context *pipe, enum pipe_format old_format = dst->format; enum pipe_format new_format = old_format; - if (dst->format != src->format) { - debug_printf("r300: Implementation error: Format mismatch in %s\n" - " : src: %s dst: %s\n", __FUNCTION__, - util_format_short_name(src->format), - util_format_short_name(dst->format)); - debug_assert(0); - } - if (!pipe->screen->is_format_supported(pipe->screen, old_format, src->target, src->nr_samples, diff --git a/src/gallium/drivers/r300/r300_cb.h b/src/gallium/drivers/r300/r300_cb.h index 6987471244..9d3d4fc1b1 100644 --- a/src/gallium/drivers/r300/r300_cb.h +++ b/src/gallium/drivers/r300/r300_cb.h @@ -89,9 +89,6 @@ CB_DEBUG(cs_count = size;) \ } while (0) -#define BEGIN_CS_AS_CB(r300, size) \ - BEGIN_CB(r300->rws->get_cs_pointer(r300->rws, dwords), dwords) - #define END_CB do { \ CB_DEBUG(if (cs_count != 0) \ debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \ diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index e6dca66d4a..21f3b9d261 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -36,6 +36,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->num_vert_fpus = 2; caps->num_tex_units = 16; caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE; + caps->has_hiz = TRUE; caps->is_r400 = FALSE; caps->is_r500 = FALSE; caps->high_second_pipe = FALSE; @@ -76,6 +77,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4E54: case 0x4E56: caps->family = CHIP_FAMILY_RV350; + caps->has_hiz = FALSE; caps->high_second_pipe = TRUE; break; @@ -106,6 +108,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5B64: case 0x5B65: caps->family = CHIP_FAMILY_RV370; + caps->has_hiz = FALSE; caps->high_second_pipe = TRUE; break; @@ -201,24 +204,28 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5954: case 0x5955: caps->family = CHIP_FAMILY_RS480; + caps->has_hiz = FALSE; caps->has_tcl = FALSE; break; case 0x5974: case 0x5975: caps->family = CHIP_FAMILY_RS482; + caps->has_hiz = FALSE; caps->has_tcl = FALSE; break; case 0x5A41: case 0x5A42: caps->family = CHIP_FAMILY_RS400; + caps->has_hiz = FALSE; caps->has_tcl = FALSE; break; case 0x5A61: case 0x5A62: caps->family = CHIP_FAMILY_RC410; + caps->has_hiz = FALSE; caps->has_tcl = FALSE; break; diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index ab649c3857..65750f54e7 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -42,6 +42,8 @@ struct r300_capabilities { unsigned num_tex_units; /* Whether or not TCL is physically present */ boolean has_tcl; + /* Some chipsets do not have HiZ RAM. */ + boolean has_hiz; /* Whether or not this is RV350 or newer, including all r400 and r500 * chipsets. The differences compared to the oldest r300 chips are: * - Blend LTE/GTE thresholds diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 16a75aa612..df90359058 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -32,23 +32,72 @@ #include "r300_emit.h" #include "r300_screen.h" #include "r300_screen_buffer.h" -#include "r300_state_invariant.h" #include "r300_winsys.h" #include <inttypes.h> -static void r300_destroy_context(struct pipe_context* context) +static void r300_update_num_contexts(struct r300_screen *r300screen, + int diff) { - struct r300_context* r300 = r300_context(context); + if (diff > 0) { + p_atomic_inc(&r300screen->num_contexts); + + if (r300screen->num_contexts > 1) + util_mempool_set_thread_safety(&r300screen->pool_buffers, + UTIL_MEMPOOL_MULTITHREADED); + } else { + p_atomic_dec(&r300screen->num_contexts); + + if (r300screen->num_contexts <= 1) + util_mempool_set_thread_safety(&r300screen->pool_buffers, + UTIL_MEMPOOL_SINGLETHREADED); + } +} + +static void r300_release_referenced_objects(struct r300_context *r300) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_textures_state *textures = + (struct r300_textures_state*)r300->textures_state.state; struct r300_query *query, *temp; - struct r300_atom *atom; + unsigned i; + /* Framebuffer state. */ + util_assign_framebuffer_state(fb, NULL); + + /* Textures. */ + for (i = 0; i < textures->sampler_view_count; i++) + pipe_sampler_view_reference( + (struct pipe_sampler_view**)&textures->sampler_views[i], NULL); + + /* The special dummy texture for texkill. */ if (r300->texkill_sampler) { pipe_sampler_view_reference( (struct pipe_sampler_view**)&r300->texkill_sampler, NULL); } + /* The SWTCL VBO. */ + pipe_resource_reference(&r300->vbo, NULL); + + /* Vertex buffers. */ + for (i = 0; i < r300->vertex_buffer_count; i++) { + pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL); + } + + /* If there are any queries pending or not destroyed, remove them now. */ + foreach_s(query, temp, &r300->query_list) { + remove_from_list(query); + FREE(query); + } +} + +static void r300_destroy_context(struct pipe_context* context) +{ + struct r300_context* r300 = r300_context(context); + struct r300_atom *atom; + util_blitter_destroy(r300->blitter); draw_destroy(r300->draw); @@ -62,23 +111,30 @@ static void r300_destroy_context(struct pipe_context* context) } } - /* If there are any queries pending or not destroyed, remove them now. */ - foreach_s(query, temp, &r300->query_list) { - remove_from_list(query); - FREE(query); - } - u_upload_destroy(r300->upload_vb); u_upload_destroy(r300->upload_ib); translate_cache_destroy(r300->tran.translate_cache); + r300_release_referenced_objects(r300); + + r300->rws->cs_destroy(r300->cs); + + util_mempool_destroy(&r300->pool_transfers); + + r300_update_num_contexts(r300->screen, -1); + + FREE(r300->aa_state.state); FREE(r300->blend_color_state.state); FREE(r300->clip_state.state); FREE(r300->fb_state.state); + FREE(r300->gpu_flush.state); + FREE(r300->hyperz_state.state); + FREE(r300->invariant_state.state); FREE(r300->rs_block_state.state); FREE(r300->scissor_state.state); FREE(r300->textures_state.state); + FREE(r300->vap_invariant_state.state); FREE(r300->viewport_state.state); FREE(r300->ztop_state.state); FREE(r300->fs_constants.state); @@ -89,7 +145,7 @@ static void r300_destroy_context(struct pipe_context* context) FREE(r300); } -static void r300_flush_cb(void *data) +void r300_flush_cb(void *data) { struct r300_context* const cs_context_copy = data; @@ -106,8 +162,10 @@ static void r300_flush_cb(void *data) static void r300_setup_atoms(struct r300_context* r300) { + boolean is_rv350 = r300->screen->caps.is_rv350; boolean is_r500 = r300->screen->caps.is_r500; boolean has_tcl = r300->screen->caps.has_tcl; + boolean drm_2_3_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); /* Create the actual atom list. * @@ -115,44 +173,75 @@ static void r300_setup_atoms(struct r300_context* r300) * can affect performance and conformance if not handled with care. * * Some atoms never change size, others change every emit - those have - * the size of 0 here. */ + * the size of 0 here. + * + * NOTE: The framebuffer state is split into these atoms: + * - gpu_flush (unpipelined regs) + * - aa_state (unpipelined regs) + * - fb_state (unpipelined regs) + * - hyperz_state (unpipelined regs followed by pipelined ones) + * - fb_state_pipelined (pipelined regs) + * The motivation behind this is to be able to emit a strict + * subset of the regs, and to have reasonable register ordering. */ make_empty_list(&r300->atom_list); - R300_INIT_ATOM(invariant_state, 71); + /* SC, GB (unpipelined), RB3D (unpipelined), ZB (unpipelined). */ + R300_INIT_ATOM(gpu_flush, 9); + R300_INIT_ATOM(aa_state, 4); + R300_INIT_ATOM(fb_state, 0); + /* ZB (unpipelined), SC. */ + R300_INIT_ATOM(hyperz_state, 6); R300_INIT_ATOM(ztop_state, 2); - R300_INIT_ATOM(query_start, 4); + /* ZB, FG. */ + R300_INIT_ATOM(dsa_state, is_r500 ? 8 : 6); + /* RB3D. */ R300_INIT_ATOM(blend_state, 8); R300_INIT_ATOM(blend_color_state, is_r500 ? 3 : 2); - R300_INIT_ATOM(clip_state, has_tcl ? 5 + (6 * 4) : 2); - R300_INIT_ATOM(dsa_state, is_r500 ? 8 : 6); - R300_INIT_ATOM(fb_state, 0); - R300_INIT_ATOM(rs_state, 0); + /* SC. */ R300_INIT_ATOM(scissor_state, 3); + /* GB, FG, GA, SU, SC, RB3D. */ + R300_INIT_ATOM(invariant_state, 16 + (is_rv350 ? 4 : 0)); + /* VAP. */ R300_INIT_ATOM(viewport_state, 9); - R300_INIT_ATOM(rs_block_state, 0); - R300_INIT_ATOM(vertex_stream_state, 0); R300_INIT_ATOM(pvs_flush, 2); + R300_INIT_ATOM(vap_invariant_state, 9); + R300_INIT_ATOM(vertex_stream_state, 0); R300_INIT_ATOM(vs_state, 0); R300_INIT_ATOM(vs_constants, 0); - R300_INIT_ATOM(texture_cache_inval, 2); - R300_INIT_ATOM(textures_state, 0); + R300_INIT_ATOM(clip_state, has_tcl ? 5 + (6 * 4) : 2); + /* VAP, RS, GA, GB, SU, SC. */ + R300_INIT_ATOM(rs_block_state, 0); + R300_INIT_ATOM(rs_state, 0); + /* SC, US. */ + R300_INIT_ATOM(fb_state_pipelined, 5 + (drm_2_3_0 ? 3 : 0)); + /* US. */ R300_INIT_ATOM(fs, 0); R300_INIT_ATOM(fs_rc_constant_state, 0); R300_INIT_ATOM(fs_constants, 0); + /* TX. */ + R300_INIT_ATOM(texture_cache_inval, 2); + R300_INIT_ATOM(textures_state, 0); + /* ZB (unpipelined), SU. */ + R300_INIT_ATOM(query_start, 4); /* Replace emission functions for r500. */ - if (r300->screen->caps.is_r500) { + if (is_r500) { r300->fs.emit = r500_emit_fs; r300->fs_rc_constant_state.emit = r500_emit_fs_rc_constant_state; r300->fs_constants.emit = r500_emit_fs_constants; } /* Some non-CSO atoms need explicit space to store the state locally. */ + r300->aa_state.state = CALLOC_STRUCT(r300_aa_state); r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state); r300->clip_state.state = CALLOC_STRUCT(r300_clip_state); r300->fb_state.state = CALLOC_STRUCT(pipe_framebuffer_state); + r300->gpu_flush.state = CALLOC_STRUCT(pipe_framebuffer_state); + r300->hyperz_state.state = CALLOC_STRUCT(r300_hyperz_state); + r300->invariant_state.state = CALLOC_STRUCT(r300_invariant_state); r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block); r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); r300->textures_state.state = CALLOC_STRUCT(r300_textures_state); + r300->vap_invariant_state.state = CALLOC_STRUCT(r300_vap_invariant_state); r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); r300->fs_constants.state = CALLOC_STRUCT(r300_constant_buffer); @@ -162,27 +251,45 @@ static void r300_setup_atoms(struct r300_context* r300) } /* Some non-CSO atoms don't use the state pointer. */ - r300->invariant_state.allow_null_state = TRUE; + r300->fb_state_pipelined.allow_null_state = TRUE; r300->fs_rc_constant_state.allow_null_state = TRUE; r300->pvs_flush.allow_null_state = TRUE; r300->query_start.allow_null_state = TRUE; r300->texture_cache_inval.allow_null_state = TRUE; + + /* Some states must be marked as dirty here to properly set up + * hardware in the first command stream. */ + r300->invariant_state.dirty = TRUE; + r300->pvs_flush.dirty = TRUE; + r300->vap_invariant_state.dirty = TRUE; + r300->texture_cache_inval.dirty = TRUE; + r300->textures_state.dirty = TRUE; } /* Not every state tracker calls every driver function before the first draw * call and we must initialize the command buffers somehow. */ static void r300_init_states(struct pipe_context *pipe) { + struct r300_context *r300 = r300_context(pipe); struct pipe_blend_color bc = {{0}}; struct pipe_clip_state cs = {{{0}}}; struct pipe_scissor_state ss = {0}; struct r300_clip_state *clip = - (struct r300_clip_state*)r300_context(pipe)->clip_state.state; + (struct r300_clip_state*)r300->clip_state.state; + struct r300_gpu_flush *gpuflush = + (struct r300_gpu_flush*)r300->gpu_flush.state; + struct r300_vap_invariant_state *vap_invariant = + (struct r300_vap_invariant_state*)r300->vap_invariant_state.state; + struct r300_invariant_state *invariant = + (struct r300_invariant_state*)r300->invariant_state.state; + struct r300_hyperz_state *hyperz = + (struct r300_hyperz_state*)r300->hyperz_state.state; CB_LOCALS; pipe->set_blend_color(pipe, &bc); pipe->set_scissor_state(pipe, &ss); + /* Initialize the clip state. */ if (r300_context(pipe)->screen->caps.has_tcl) { pipe->set_clip_state(pipe, &cs); } else { @@ -190,6 +297,66 @@ static void r300_init_states(struct pipe_context *pipe) OUT_CB_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); END_CB; } + + /* Initialize the GPU flush. */ + { + BEGIN_CB(gpuflush->cb_flush_clean, 6); + + /* Flush and free renderbuffer caches. */ + OUT_CB_REG(R300_RB3D_DSTCACHE_CTLSTAT, + R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | + R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); + OUT_CB_REG(R300_ZB_ZCACHE_CTLSTAT, + R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | + R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); + + /* Wait until the GPU is idle. + * This fixes random pixels sometimes appearing probably caused + * by incomplete rendering. */ + OUT_CB_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); + END_CB; + } + + /* Initialize the VAP invariant state. */ + { + BEGIN_CB(vap_invariant->cb, 9); + OUT_CB_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xffff); + OUT_CB_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4); + OUT_CB_32F(1.0); + OUT_CB_32F(1.0); + OUT_CB_32F(1.0); + OUT_CB_32F(1.0); + OUT_CB_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO); + END_CB; + } + + /* Initialize the invariant state. */ + { + BEGIN_CB(invariant->cb, r300->invariant_state.size); + OUT_CB_REG(R300_GB_SELECT, 0); + OUT_CB_REG(R300_FG_FOG_BLEND, 0); + OUT_CB_REG(R300_GA_ROUND_MODE, 1); + OUT_CB_REG(R300_GA_OFFSET, 0); + OUT_CB_REG(R300_SU_TEX_WRAP, 0); + OUT_CB_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF); + OUT_CB_REG(R300_SU_DEPTH_OFFSET, 0); + OUT_CB_REG(R300_SC_EDGERULE, 0x2DA49525); + + if (r300->screen->caps.is_rv350) { + OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); + OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE); + } + END_CB; + } + + /* Initialize the hyperz state. */ + { + BEGIN_CB(&hyperz->cb_begin, r300->hyperz_state.size); + OUT_CB_REG(R300_ZB_BW_CNTL, 0); + OUT_CB_REG(R300_ZB_DEPTHCLEARVALUE, 0); + OUT_CB_REG(R300_SC_HYPERZ, R300_SC_HYPERZ_ADJ_2); + END_CB; + } } struct pipe_context* r300_create_context(struct pipe_screen* screen, @@ -202,6 +369,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, if (!r300) return NULL; + r300_update_num_contexts(r300screen, 1); + r300->rws = rws; r300->screen = r300screen; @@ -211,6 +380,12 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.destroy = r300_destroy_context; + r300->cs = rws->cs_create(rws); + + util_mempool_create(&r300->pool_transfers, + sizeof(struct pipe_transfer), 64, + UTIL_MEMPOOL_SINGLETHREADED); + if (!r300screen->caps.has_tcl) { /* Create a Draw. This is used for SW TCL. */ r300->draw = draw_create(&r300->context); @@ -230,16 +405,15 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_init_blit_functions(r300); r300_init_flush_functions(r300); r300_init_query_functions(r300); - r300_init_render_functions(r300); r300_init_state_functions(r300); r300_init_resource_functions(r300); - r300->invariant_state.dirty = TRUE; + r300->blitter = util_blitter_create(&r300->context); - rws->set_flush_cb(r300->rws, r300_flush_cb, r300); - r300->dirty_hw++; + /* Render functions must be initialized after blitter. */ + r300_init_render_functions(r300); - r300->blitter = util_blitter_create(&r300->context); + rws->cs_set_flush(r300->cs, r300_flush_cb, r300); r300->upload_ib = u_upload_create(&r300->context, 32 * 1024, 16, @@ -280,11 +454,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.create_sampler_view(&r300->context, tex, &vtempl); pipe_resource_reference(&tex, NULL); - - /* This will make sure that the dummy texture is set up - * from the beginning even if an application does not use - * textures. */ - r300->textures_state.dirty = TRUE; } return &r300->context; @@ -296,11 +465,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, return NULL; } -boolean r300_check_cs(struct r300_context *r300, unsigned size) -{ - return size <= r300->rws->get_cs_free_dwords(r300->rws); -} - void r300_finish(struct r300_context *r300) { struct pipe_framebuffer_state *fb; diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 8d0b4bb3d3..b4256c6278 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -61,6 +61,13 @@ struct r300_atom { boolean allow_null_state; }; +struct r300_aa_state { + struct r300_surface *dest; + + uint32_t aa_config; + uint32_t aaresolve_ctl; +}; + struct r300_blend_state { uint32_t cb[8]; uint32_t cb_no_readwrite[8]; @@ -98,40 +105,39 @@ struct r300_dsa_state { boolean two_sided_stencil_ref; }; +struct r300_hyperz_state { + /* This is actually a command buffer with named dwords. */ + uint32_t cb_begin; + uint32_t zb_bw_cntl; /* R300_ZB_BW_CNTL */ + uint32_t cb_reg1; + uint32_t zb_depthclearvalue; /* R300_ZB_DEPTHCLEARVALUE */ + uint32_t cb_reg2; + uint32_t sc_hyperz; /* R300_SC_HYPERZ */ +}; + +struct r300_gpu_flush { + uint32_t cb_flush_clean[6]; +}; + struct r300_rs_state { /* Original rasterizer state. */ struct pipe_rasterizer_state rs; /* Draw-specific rasterizer state. */ struct pipe_rasterizer_state rs_draw; - uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ - uint32_t multisample_position_0;/* R300_GB_MSPOS0: 0x4010 */ - uint32_t multisample_position_1;/* R300_GB_MSPOS1: 0x4014 */ - uint32_t antialiasing_config; /* R300_GB_AA_CONFIG: 0x4020 */ - uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ - uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */ - uint32_t line_control; /* R300_GA_LINE_CNTL: 0x4234 */ - float depth_scale; /* R300_SU_POLY_OFFSET_FRONT_SCALE: 0x42a4 */ - /* R300_SU_POLY_OFFSET_BACK_SCALE: 0x42ac */ - float depth_offset; /* R300_SU_POLY_OFFSET_FRONT_OFFSET: 0x42a8 */ - /* R300_SU_POLY_OFFSET_BACK_OFFSET: 0x42b0 */ - uint32_t polygon_offset_enable; /* R300_SU_POLY_OFFSET_ENABLE: 0x42b4 */ - uint32_t cull_mode; /* R300_SU_CULL_MODE: 0x42b8 */ - uint32_t line_stipple_config; /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */ - uint32_t line_stipple_value; /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */ + /* Command buffers. */ + uint32_t cb_main[25]; + uint32_t cb_poly_offset_zb16[5]; + uint32_t cb_poly_offset_zb24[5]; + + /* The index to cb_main where the cull_mode register value resides. */ + unsigned cull_mode_index; + + /* Whether polygon offset is enabled. */ + boolean polygon_offset_enable; + + /* This is emitted in the draw function. */ uint32_t color_control; /* R300_GA_COLOR_CONTROL: 0x4278 */ - uint32_t polygon_mode; /* R300_GA_POLY_MODE: 0x4288 */ - uint32_t clip_rule; /* R300_SC_CLIP_RULE: 0x43D0 */ - - /* Specifies top of Raster pipe specific enable controls, - * i.e. texture coordinates stuffing for points, lines, triangles */ - uint32_t stuffing_enable; /* R300_GB_ENABLE: 0x4008 */ - - /* Point sprites texture coordinates, 0: lower left, 1: upper right */ - float point_texcoord_left; /* R300_GA_POINT_S0: 0x4200 */ - float point_texcoord_bottom; /* R300_GA_POINT_T0: 0x4204 */ - float point_texcoord_right; /* R300_GA_POINT_S1: 0x4208 */ - float point_texcoord_top; /* R300_GA_POINT_T1: 0x420c */ }; struct r300_rs_block { @@ -214,6 +220,14 @@ struct r300_vertex_stream_state { unsigned count; }; +struct r300_invariant_state { + uint32_t cb[20]; +}; + +struct r300_vap_invariant_state { + uint32_t cb[9]; +}; + struct r300_viewport_state { float xscale; /* R300_VAP_VPORT_XSCALE: 0x2098 */ float xoffset; /* R300_VAP_VPORT_XOFFSET: 0x209c */ @@ -233,8 +247,8 @@ struct r300_ztop_state { struct r300_constant_buffer { /* Buffer of constants */ - uint32_t constants[256][4]; - /* Total number of constants */ + uint32_t *ptr; + /* Total number of vec4s */ unsigned count; }; @@ -294,32 +308,48 @@ struct r300_surface { enum r300_buffer_domain domain; - uint32_t offset; + uint32_t offset; /* COLOROFFSET or DEPTHOFFSET. */ uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */ - uint32_t format; /* US_OUT_FMT or R300_ZB_FORMAT. */ + uint32_t format; /* US_OUT_FMT or ZB_FORMAT. */ + + /* Parameters dedicated to the CBZB clear. */ + uint32_t cbzb_width; /* Aligned width. */ + uint32_t cbzb_height; /* Half of the height. */ + uint32_t cbzb_midpoint_offset; /* DEPTHOFFSET. */ + uint32_t cbzb_pitch; /* DEPTHPITCH. */ + uint32_t cbzb_format; /* ZB_FORMAT. */ + + /* Whether the CBZB clear is allowed on the surface. */ + boolean cbzb_allowed; }; -struct r300_texture { - /* Parent class */ +struct r300_texture_desc { + /* Parent class. */ struct u_resource b; - enum r300_buffer_domain domain; + /* Buffer tiling. + * Macrotiling is specified per-level because small mipmaps cannot + * be macrotiled. */ + enum r300_buffer_tiling microtile; + enum r300_buffer_tiling macrotile[R300_MAX_TEXTURE_LEVELS]; /* Offsets into the buffer. */ - unsigned offset[R300_MAX_TEXTURE_LEVELS]; + unsigned offset_in_bytes[R300_MAX_TEXTURE_LEVELS]; - /* A pitch for each mip-level */ - unsigned pitch[R300_MAX_TEXTURE_LEVELS]; + /* Strides for each mip-level. */ + unsigned stride_in_pixels[R300_MAX_TEXTURE_LEVELS]; + unsigned stride_in_bytes[R300_MAX_TEXTURE_LEVELS]; - /* A pitch multiplied by blockwidth as hardware wants - * the number of pixels instead of the number of blocks. */ - unsigned hwpitch[R300_MAX_TEXTURE_LEVELS]; + /* Size of one zslice or face or 2D image based on the texture target. */ + unsigned layer_size_in_bytes[R300_MAX_TEXTURE_LEVELS]; - /* Size of one zslice or face based on the texture target */ - unsigned layer_size[R300_MAX_TEXTURE_LEVELS]; + /* Total size of this texture, in bytes, + * derived from the texture properties. */ + unsigned size_in_bytes; - /* Whether the mipmap level is macrotiled. */ - enum r300_buffer_tiling mip_macrotile[R300_MAX_TEXTURE_LEVELS]; + /* Total size of the buffer backing this texture, in bytes. + * It must be >= size. */ + unsigned buffer_size_in_bytes; /** * If non-zero, override the natural texture layout with @@ -329,16 +359,24 @@ struct r300_texture { * * \sa r300_texture_get_stride */ - unsigned stride_override; + unsigned stride_in_bytes_override; - /* Total size of this texture, in bytes. */ - unsigned size; + /* Whether this texture has non-power-of-two dimensions. + * It can be either a regular texture or a rectangle one. */ + boolean is_npot; - /* Whether this texture has non-power-of-two dimensions - * or a user-specified pitch. - * It can be either a regular texture or a rectangle one. - */ - boolean uses_pitch; + /* This flag says that hardware must use the stride for addressing + * instead of the width. */ + boolean uses_stride_addressing; + + /* Whether CBZB fast color clear is allowed on the miplevel. */ + boolean cbzb_allowed[R300_MAX_TEXTURE_LEVELS]; +}; + +struct r300_texture { + struct r300_texture_desc desc; + + enum r300_buffer_domain domain; /* Pipe buffer backing this texture. */ struct r300_winsys_buffer *buffer; @@ -349,8 +387,9 @@ struct r300_texture { /* All bits should be filled in. */ struct r300_texture_fb_state fb_state; - /* Buffer tiling */ - enum r300_buffer_tiling microtile, macrotile; + /* This is the level tiling flags were last time set for. + * It's used to prevent redundant tiling-flags changes from happening.*/ + unsigned surface_level; }; struct r300_vertex_element_state { @@ -391,6 +430,8 @@ struct r300_context { /* The interface to the windowing system, etc. */ struct r300_winsys_screen *rws; + /* The command stream. */ + struct r300_winsys_cs *cs; /* Screen. */ struct r300_screen *screen; /* Draw module. Used mostly for SW TCL. */ @@ -421,6 +462,8 @@ struct r300_context { /* Various CSO state objects. */ /* Beginning of atom list. */ struct r300_atom atom_list; + /* Anti-aliasing (MSAA) state. */ + struct r300_atom aa_state; /* Blend state. */ struct r300_atom blend_state; /* Blend color state. */ @@ -437,6 +480,10 @@ struct r300_context { struct r300_atom fs_constants; /* Framebuffer state. */ struct r300_atom fb_state; + /* Framebuffer state (pipelined regs). */ + struct r300_atom fb_state_pipelined; + /* HyperZ state (various SC/ZB bits). */ + struct r300_atom hyperz_state; /* Occlusion query. */ struct r300_atom query_start; /* Rasterizer state. */ @@ -459,8 +506,12 @@ struct r300_context { struct r300_atom ztop_state; /* PVS flush. */ struct r300_atom pvs_flush; + /* VAP invariant state. */ + struct r300_atom vap_invariant_state; /* Texture cache invalidate. */ struct r300_atom texture_cache_inval; + /* GPU flush. */ + struct r300_atom gpu_flush; /* Invariant state. This must be emitted to get the engine started. */ struct r300_atom invariant_state; @@ -497,10 +548,13 @@ struct r300_context { /* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */ boolean incompatible_vb_layout; + boolean cbzb_clear; /* upload managers */ struct u_upload_mgr *upload_vb; struct u_upload_mgr *upload_ib; + struct util_mempool pool_transfers; + /* Stat counter. */ uint64_t flush_counter; }; @@ -534,8 +588,8 @@ static INLINE struct r300_fragment_shader *r300_fs(struct r300_context *r300) struct pipe_context* r300_create_context(struct pipe_screen* screen, void *priv); -boolean r300_check_cs(struct r300_context *r300, unsigned size); void r300_finish(struct r300_context *r300); +void r300_flush_cb(void *data); /* Context initialization. */ struct draw_stage* r300_draw_stage(struct r300_context* r300); @@ -563,6 +617,13 @@ void r300_translate_index_buffer(struct r300_context *r300, void r300_plug_in_stencil_ref_fallback(struct r300_context *r300); /* r300_state.c */ +enum r300_fb_state_change { + R300_CHANGED_FB_STATE = 0, + R300_CHANGED_CBZB_FLAG +}; + +void r300_mark_fb_state_dirty(struct r300_context *r300, + enum r300_fb_state_change change); void r300_mark_fs_code_dirty(struct r300_context *r300); /* r300_debug.c */ diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 1db7da642b..c194d6a1b0 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -46,12 +46,12 @@ */ #define CS_LOCALS(context) \ - struct r300_context* const cs_context_copy = (context); \ - struct r300_winsys_screen *cs_winsys = cs_context_copy->rws; \ - CS_DEBUG(int cs_count = 0; (void) cs_count;) + struct r300_winsys_cs *cs_copy = (context)->cs; \ + struct r300_winsys_screen *cs_winsys = (context)->rws; \ + int cs_count = 0; (void) cs_count; (void) cs_winsys; #define BEGIN_CS(size) do { \ - assert(r300_check_cs(cs_context_copy, (size))); \ + assert(size <= (cs_copy->ndw - cs_copy->cdw)); \ CS_DEBUG(cs_count = size;) \ } while (0) @@ -66,49 +66,39 @@ #define END_CS #endif + /** * Writing pure DWORDs. */ #define OUT_CS(value) do { \ - cs_winsys->write_cs_dword(cs_winsys, (value)); \ + cs_copy->ptr[cs_copy->cdw++] = (value); \ CS_DEBUG(cs_count--;) \ } while (0) -#define OUT_CS_32F(value) do { \ - cs_winsys->write_cs_dword(cs_winsys, fui(value)); \ - CS_DEBUG(cs_count--;) \ -} while (0) +#define OUT_CS_32F(value) \ + OUT_CS(fui(value)) #define OUT_CS_REG(register, value) do { \ - assert(register); \ - cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0(register, 0)); \ - cs_winsys->write_cs_dword(cs_winsys, value); \ - CS_DEBUG(cs_count -= 2;) \ + OUT_CS(CP_PACKET0(register, 0)); \ + OUT_CS(value); \ } while (0) /* Note: This expects count to be the number of registers, * not the actual packet0 count! */ -#define OUT_CS_REG_SEQ(register, count) do { \ - assert(register); \ - cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0((register), ((count) - 1))); \ - CS_DEBUG(cs_count--;) \ -} while (0) +#define OUT_CS_REG_SEQ(register, count) \ + OUT_CS(CP_PACKET0((register), ((count) - 1))) -#define OUT_CS_TABLE(values, count) do { \ - cs_winsys->write_cs_table(cs_winsys, values, count); \ - CS_DEBUG(cs_count -= count;) \ -} while (0) +#define OUT_CS_ONE_REG(register, count) \ + OUT_CS(CP_PACKET0((register), ((count) - 1)) | RADEON_ONE_REG_WR) -#define OUT_CS_ONE_REG(register, count) do { \ - assert(register); \ - cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0((register), ((count) - 1)) | RADEON_ONE_REG_WR); \ - CS_DEBUG(cs_count--;) \ -} while (0) +#define OUT_CS_PKT3(op, count) \ + OUT_CS(CP_PACKET3(op, count)) -#define OUT_CS_PKT3(op, count) do { \ - cs_winsys->write_cs_dword(cs_winsys, CP_PACKET3(op, count)); \ - CS_DEBUG(cs_count--;) \ +#define OUT_CS_TABLE(values, count) do { \ + memcpy(cs_copy->ptr + cs_copy->cdw, values, count * 4); \ + cs_copy->cdw += count; \ + CS_DEBUG(cs_count -= count;) \ } while (0) @@ -116,26 +106,26 @@ * Writing relocations. */ -#define OUT_CS_RELOC(bo, offset, rd, wd, flags) do { \ +#define OUT_CS_RELOC(bo, offset, rd, wd) do { \ assert(bo); \ - cs_winsys->write_cs_dword(cs_winsys, offset); \ - cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \ - CS_DEBUG(cs_count -= 3;) \ + OUT_CS(offset); \ + cs_winsys->cs_write_reloc(cs_copy, bo, rd, wd); \ + CS_DEBUG(cs_count -= 2;) \ } while (0) -#define OUT_CS_BUF_RELOC(bo, offset, rd, wd, flags) do { \ +#define OUT_CS_BUF_RELOC(bo, offset, rd, wd) do { \ assert(bo); \ - OUT_CS_RELOC(r300_buffer(bo)->buf, offset, rd, wd, flags); \ + OUT_CS_RELOC(r300_buffer(bo)->buf, offset, rd, wd); \ } while (0) -#define OUT_CS_TEX_RELOC(tex, offset, rd, wd, flags) do { \ +#define OUT_CS_TEX_RELOC(tex, offset, rd, wd) do { \ assert(tex); \ - OUT_CS_RELOC(tex->buffer, offset, rd, wd, flags); \ + OUT_CS_RELOC(tex->buffer, offset, rd, wd); \ } while (0) -#define OUT_CS_BUF_RELOC_NO_OFFSET(bo, rd, wd, flags) do { \ +#define OUT_CS_BUF_RELOC_NO_OFFSET(bo, rd, wd) do { \ assert(bo); \ - cs_winsys->write_cs_reloc(cs_winsys, r300_buffer(bo)->buf, rd, wd, flags); \ + cs_winsys->cs_write_reloc(cs_copy, r300_buffer(bo)->buf, rd, wd); \ CS_DEBUG(cs_count -= 2;) \ } while (0) @@ -146,7 +136,8 @@ #define WRITE_CS_TABLE(values, count) do { \ CS_DEBUG(assert(cs_count == 0);) \ - cs_winsys->write_cs_table(cs_winsys, values, count); \ + memcpy(cs_copy->ptr + cs_copy->cdw, (values), (count) * 4); \ + cs_copy->cdw += (count); \ } while (0) #endif /* R300_CS_H */ diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index a6cd86e392..053a64ea6d 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -29,17 +29,21 @@ static const struct debug_named_value debug_options[] = { { "fp", DBG_FP, "Fragment program handling (for debugging)" }, { "vp", DBG_VP, "Vertex program handling (for debugging)" }, - { "draw", DBG_DRAW, "Draw and emit (for debugging)" }, + { "draw", DBG_DRAW, "Draw calls (for debugging)" }, + { "swtcl", DBG_SWTCL, "SWTCL-specific info (for debugging)" }, + { "rsblock", DBG_RS_BLOCK, "Rasterizer registers (for debugging)" }, + { "psc", DBG_PSC, "Vertex stream registers (for debugging)" }, { "tex", DBG_TEX, "Textures (for debugging)" }, { "texalloc", DBG_TEXALLOC, "Texture allocation (for debugging)" }, { "fall", DBG_FALL, "Fallbacks (for debugging)" }, { "rs", DBG_RS, "Rasterizer (for debugging)" }, { "fb", DBG_FB, "Framebuffer (for debugging)" }, + { "cbzb", DBG_CBZB, "Fast color clear info (for debugging)" }, + { "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries (for debugging)" }, { "anisohq", DBG_ANISOHQ, "High quality anisotropic filtering (for benchmarking)" }, { "notiling", DBG_NO_TILING, "Disable tiling (for benchmarking)" }, { "noimmd", DBG_NO_IMMD, "Disable immediate mode (for benchmarking)" }, - { "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries (for lulz)" }, - { "stats", DBG_STATS, "Gather statistics (for lulz)" }, + { "stats", DBG_STATS, "Gather statistics" }, /* must be last */ DEBUG_NAMED_VALUE_END diff --git a/src/gallium/drivers/r300/r300_defines.h b/src/gallium/drivers/r300/r300_defines.h index d510d80a7b..896aeef395 100644 --- a/src/gallium/drivers/r300/r300_defines.h +++ b/src/gallium/drivers/r300/r300_defines.h @@ -36,7 +36,10 @@ enum r300_buffer_tiling { R300_BUFFER_LINEAR = 0, R300_BUFFER_TILED, - R300_BUFFER_SQUARETILED + R300_BUFFER_SQUARETILED, + + R300_BUFFER_UNKNOWN, + R300_BUFFER_SELECT_LAYOUT = R300_BUFFER_UNKNOWN }; enum r300_buffer_domain { /* bitfield */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index e2c40d823d..36a26a7871 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -170,15 +170,18 @@ void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat { struct r300_fragment_shader *fs = r300_fs(r300); struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state; - unsigned count = fs->shader->externals_count * 4; + unsigned count = fs->shader->externals_count; + unsigned i, j; CS_LOCALS(r300); if (count == 0) return; BEGIN_CS(size); - OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, count); - OUT_CS_TABLE(buf->constants, count); + OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, count * 4); + for (i = 0; i < count; i++) + for (j = 0; j < 4; j++) + OUT_CS(pack_float24(*(float*)&buf->ptr[i*4+j])); END_CS; } @@ -190,7 +193,6 @@ void r300_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo unsigned count = fs->shader->rc_state_count; unsigned first = fs->shader->externals_count; unsigned end = constants->Count; - uint32_t cdata[4]; unsigned j; CS_LOCALS(r300); @@ -203,11 +205,9 @@ void r300_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo const float *data = get_rc_constant_state(r300, &constants->Constants[i]); - for (j = 0; j < 4; j++) - cdata[j] = pack_float24(data[j]); - OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X + i * 16, 4); - OUT_CS_TABLE(cdata, 4); + for (j = 0; j < 4; j++) + OUT_CS(pack_float24(data[j])); } } END_CS; @@ -234,7 +234,7 @@ void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat BEGIN_CS(size); OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST); OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count); - OUT_CS_TABLE(buf->constants, count); + OUT_CS_TABLE(buf->ptr, count); END_CS; } @@ -267,13 +267,22 @@ void r500_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo END_CS; } -void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) +void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state) { - struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state; - struct r300_surface* surf; - unsigned i; + struct r300_gpu_flush *gpuflush = (struct r300_gpu_flush*)state; + struct pipe_framebuffer_state* fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + uint32_t height = fb->height; + uint32_t width = fb->width; CS_LOCALS(r300); + if (r300->cbzb_clear) { + struct r300_surface *surf = r300_surface(fb->cbufs[0]); + + height = surf->cbzb_height; + width = surf->cbzb_width; + } + BEGIN_CS(size); /* Set up scissors. @@ -281,27 +290,48 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); if (r300->screen->caps.is_r500) { OUT_CS(0); - OUT_CS(((fb->width - 1) << R300_SCISSORS_X_SHIFT) | - ((fb->height - 1) << R300_SCISSORS_Y_SHIFT)); + OUT_CS(((width - 1) << R300_SCISSORS_X_SHIFT) | + ((height - 1) << R300_SCISSORS_Y_SHIFT)); } else { OUT_CS((1440 << R300_SCISSORS_X_SHIFT) | (1440 << R300_SCISSORS_Y_SHIFT)); - OUT_CS(((fb->width + 1440-1) << R300_SCISSORS_X_SHIFT) | - ((fb->height + 1440-1) << R300_SCISSORS_Y_SHIFT)); + OUT_CS(((width + 1440-1) << R300_SCISSORS_X_SHIFT) | + ((height + 1440-1) << R300_SCISSORS_Y_SHIFT)); + } + + /* Flush CB & ZB caches and wait until the 3D engine is idle and clean. */ + OUT_CS_TABLE(gpuflush->cb_flush_clean, 6); + END_CS; +} + +void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state) +{ + struct r300_aa_state *aa = (struct r300_aa_state*)state; + CS_LOCALS(r300); + + BEGIN_CS(size); + OUT_CS_REG(R300_GB_AA_CONFIG, aa->aa_config); + + if (aa->dest) { + OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 1); + OUT_CS_RELOC(aa->dest->buffer, aa->dest->offset, 0, aa->dest->domain); + + OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_PITCH, 1); + OUT_CS_RELOC(aa->dest->buffer, aa->dest->pitch, 0, aa->dest->domain); } - /* Flush and free renderbuffer caches. */ - OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, - R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | - R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); - OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, - R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | - R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); + OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, aa->aaresolve_ctl); + END_CS; +} - /* Wait until the GPU is idle. - * This fixes random pixels sometimes appearing probably caused - * by incomplete rendering. */ - OUT_CS_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); +void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) +{ + struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state; + struct r300_surface* surf; + unsigned i; + CS_LOCALS(r300); + + BEGIN_CS(size); /* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers, which is not * what we usually want. */ @@ -317,28 +347,123 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) surf = r300_surface(fb->cbufs[i]); OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); - OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain, 0); + OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain); OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); - OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain, 0); - - OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), surf->format); - } - for (; i < 4; i++) { - OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), R300_US_OUT_FMT_UNUSED); + OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain); } - /* Set up a zbuffer. */ - if (fb->zsbuf) { - surf = r300_surface(fb->zsbuf); + /* Set up the ZB part of the CBZB clear. */ + if (r300->cbzb_clear) { + surf = r300_surface(fb->cbufs[0]); + + OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format); OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain, 0); + OUT_CS_RELOC(surf->buffer, surf->cbzb_midpoint_offset, 0, surf->domain); + + OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); + OUT_CS_RELOC(surf->buffer, surf->cbzb_pitch, 0, surf->domain); + } + /* Set up a zbuffer. */ + else if (fb->zsbuf) { + surf = r300_surface(fb->zsbuf); OUT_CS_REG(R300_ZB_FORMAT, surf->format); + OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); + OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain); + OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain, 0); + OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain); + + /* HiZ RAM. */ + if (r300->screen->caps.has_hiz) { + OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); + OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); + } + + /* Z Mask RAM. (compressed zbuffer) */ + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0); + } + + END_CS; +} + +void r300_emit_hyperz_state(struct r300_context *r300, + unsigned size, void *state) +{ + CS_LOCALS(r300); + WRITE_CS_TABLE(state, size); +} + +void r300_emit_hyperz_end(struct r300_context *r300) +{ + struct r300_hyperz_state z = + *(struct r300_hyperz_state*)r300->hyperz_state.state; + + z.zb_bw_cntl = 0; + z.zb_depthclearvalue = 0; + z.sc_hyperz = R300_SC_HYPERZ_ADJ_2; + + r300_emit_hyperz_state(r300, r300->hyperz_state.size, &z); +} + +void r300_emit_fb_state_pipelined(struct r300_context *r300, + unsigned size, void *state) +{ + struct pipe_framebuffer_state* fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + unsigned i; + CS_LOCALS(r300); + + BEGIN_CS(size); + + /* Colorbuffer format in the US block. + * (must be written after unpipelined regs) */ + OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4); + for (i = 0; i < fb->nr_cbufs; i++) { + OUT_CS(r300_surface(fb->cbufs[i])->format); + } + for (; i < 4; i++) { + OUT_CS(R300_US_OUT_FMT_UNUSED); + } + + /* Multisampling. Depends on framebuffer sample count. + * These are pipelined regs and as such cannot be moved + * to the AA state. */ + if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { + unsigned mspos0 = 0x66666666; + unsigned mspos1 = 0x6666666; + + if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) { + /* Subsample placement. These may not be optimal. */ + switch (fb->cbufs[0]->texture->nr_samples) { + case 2: + mspos0 = 0x33996633; + mspos1 = 0x6666663; + break; + case 3: + mspos0 = 0x33936933; + mspos1 = 0x6666663; + break; + case 4: + mspos0 = 0x33939933; + mspos1 = 0x3966663; + break; + case 6: + mspos0 = 0x22a2aa22; + mspos1 = 0x2a65672; + break; + default: + debug_printf("r300: Bad number of multisamples!\n"); + } + } + + OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); + OUT_CS(mspos0); + OUT_CS(mspos1); } END_CS; } @@ -387,13 +512,13 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, OUT_CS_REG(R300_SU_REG_DEST, 1 << 3); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); OUT_CS_RELOC(buf, (query->num_results + 3) * 4, - 0, query->domain, 0); + 0, query->domain); case 3: /* pipe 2 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 2); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); OUT_CS_RELOC(buf, (query->num_results + 2) * 4, - 0, query->domain, 0); + 0, query->domain); case 2: /* pipe 1 only */ /* As mentioned above, accomodate RV380 and older. */ @@ -401,13 +526,13 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, 1 << (caps->high_second_pipe ? 3 : 1)); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); OUT_CS_RELOC(buf, (query->num_results + 1) * 4, - 0, query->domain, 0); + 0, query->domain); case 1: /* pipe 0 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); OUT_CS_RELOC(buf, (query->num_results + 0) * 4, - 0, query->domain, 0); + 0, query->domain); break; default: fprintf(stderr, "r300: Implementation error: Chipset reports %d" @@ -429,7 +554,7 @@ static void rv530_emit_query_end_single_z(struct r300_context *r300, BEGIN_CS(8); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, query->num_results * 4, 0, query->domain, 0); + OUT_CS_RELOC(buf, query->num_results * 4, 0, query->domain); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -443,10 +568,10 @@ static void rv530_emit_query_end_double_z(struct r300_context *r300, BEGIN_CS(14); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 0) * 4, 0, query->domain, 0); + OUT_CS_RELOC(buf, (query->num_results + 0) * 4, 0, query->domain); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 1) * 4, 0, query->domain, 0); + OUT_CS_RELOC(buf, (query->num_results + 1) * 4, 0, query->domain); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -480,102 +605,27 @@ void r300_emit_query_end(struct r300_context* r300) } } +void r300_emit_invariant_state(struct r300_context *r300, + unsigned size, void *state) +{ + CS_LOCALS(r300); + WRITE_CS_TABLE(state, size); +} + void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state) { struct r300_rs_state* rs = state; - struct pipe_framebuffer_state* fb = r300->fb_state.state; - float scale, offset; - unsigned mspos0, mspos1, aa_config; CS_LOCALS(r300); BEGIN_CS(size); - OUT_CS_REG(R300_VAP_CNTL_STATUS, rs->vap_control_status); - - /* Multisampling. Depends on framebuffer sample count. */ - if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { - if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) { - aa_config = R300_GB_AA_CONFIG_AA_ENABLE; - /* Subsample placement. These may not be optimal. */ - switch (fb->cbufs[0]->texture->nr_samples) { - case 2: - aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2; - mspos0 = 0x33996633; - mspos1 = 0x6666663; - break; - case 3: - aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3; - mspos0 = 0x33936933; - mspos1 = 0x6666663; - break; - case 4: - aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4; - mspos0 = 0x33939933; - mspos1 = 0x3966663; - break; - case 6: - aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6; - mspos0 = 0x22a2aa22; - mspos1 = 0x2a65672; - break; - default: - debug_printf("r300: Bad number of multisamples!\n"); - mspos0 = rs->multisample_position_0; - mspos1 = rs->multisample_position_1; - break; - } - - OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); - OUT_CS(mspos0); - OUT_CS(mspos1); - - OUT_CS_REG(R300_GB_AA_CONFIG, aa_config); - } else { - OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); - OUT_CS(rs->multisample_position_0); - OUT_CS(rs->multisample_position_1); - - OUT_CS_REG(R300_GB_AA_CONFIG, rs->antialiasing_config); - } - } - - OUT_CS_REG(R300_GA_POINT_SIZE, rs->point_size); - OUT_CS_REG_SEQ(R300_GA_POINT_MINMAX, 2); - OUT_CS(rs->point_minmax); - OUT_CS(rs->line_control); - + OUT_CS_TABLE(rs->cb_main, 25); if (rs->polygon_offset_enable) { - scale = rs->depth_scale * 12; - offset = rs->depth_offset; - - switch (r300->zbuffer_bpp) { - case 16: - offset *= 4; - break; - case 24: - offset *= 2; - break; + if (r300->zbuffer_bpp == 16) { + OUT_CS_TABLE(rs->cb_poly_offset_zb16, 5); + } else { + OUT_CS_TABLE(rs->cb_poly_offset_zb24, 5); } - - OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4); - OUT_CS_32F(scale); - OUT_CS_32F(offset); - OUT_CS_32F(scale); - OUT_CS_32F(offset); } - - OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_ENABLE, 2); - OUT_CS(rs->polygon_offset_enable); - OUT_CS(rs->cull_mode); - OUT_CS_REG(R300_GA_LINE_STIPPLE_CONFIG, rs->line_stipple_config); - OUT_CS_REG(R300_GA_LINE_STIPPLE_VALUE, rs->line_stipple_value); - OUT_CS_REG(R300_GA_POLY_MODE, rs->polygon_mode); - OUT_CS_REG(R300_SC_CLIP_RULE, rs->clip_rule); - OUT_CS_REG(R300_GB_ENABLE, rs->stuffing_enable); - OUT_CS_REG_SEQ(R300_GA_POINT_S0, 4); - OUT_CS_32F(rs->point_texcoord_left); - OUT_CS_32F(rs->point_texcoord_bottom); - OUT_CS_32F(rs->point_texcoord_right); - OUT_CS_32F(rs->point_texcoord_top); END_CS; } @@ -588,11 +638,20 @@ void r300_emit_rs_block_state(struct r300_context* r300, unsigned count = (rs->inst_count & R300_RS_INST_COUNT_MASK) + 1; CS_LOCALS(r300); - if (SCREEN_DBG_ON(r300->screen, DBG_DRAW)) { + if (DBG_ON(r300, DBG_RS_BLOCK)) { r500_dump_rs_block(rs); - } - DBG(r300, DBG_DRAW, "r300: RS emit:\n"); + fprintf(stderr, "r300: RS emit:\n"); + + for (i = 0; i < count; i++) + fprintf(stderr, " : ip %d: 0x%08x\n", i, rs->ip[i]); + + for (i = 0; i < count; i++) + fprintf(stderr, " : inst %d: 0x%08x\n", i, rs->inst[i]); + + fprintf(stderr, " : count: 0x%08x inst_count: 0x%08x\n", + rs->count, rs->inst_count); + } BEGIN_CS(size); OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); @@ -608,9 +667,6 @@ void r300_emit_rs_block_state(struct r300_context* r300, OUT_CS_REG_SEQ(R300_RS_IP_0, count); } OUT_CS_TABLE(rs->ip, count); - for (i = 0; i < count; i++) { - DBG(r300, DBG_DRAW, " : ip %d: 0x%08x\n", i, rs->ip[i]); - } OUT_CS_REG_SEQ(R300_RS_COUNT, 2); OUT_CS(rs->count); @@ -622,13 +678,6 @@ void r300_emit_rs_block_state(struct r300_context* r300, OUT_CS_REG_SEQ(R300_RS_INST_0, count); } OUT_CS_TABLE(rs->inst, count); - for (i = 0; i < count; i++) { - DBG(r300, DBG_DRAW, " : inst %d: 0x%08x\n", i, rs->inst[i]); - } - - DBG(r300, DBG_DRAW, " : count: 0x%08x inst_count: 0x%08x\n", - rs->count, rs->inst_count); - END_CS; } @@ -682,7 +731,7 @@ void r300_emit_textures_state(struct r300_context *r300, OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (i * 4), 1); OUT_CS_TEX_RELOC(tex, texstate->format.tile_config, tex->domain, - 0, 0); + 0); } } END_CS; @@ -725,7 +774,7 @@ void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed) for (i = 0; i < aos_count; i++) { buf = r300_buffer(vbuf[velem[i].vertex_buffer_index].buffer); - OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b, buf->domain, 0, 0); + OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b, buf->domain, 0); } END_CS; } @@ -734,7 +783,7 @@ void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed) { CS_LOCALS(r300); - DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, " + DBG(r300, DBG_SWTCL, "r300: Preparing vertex buffer %p for render, " "vertex size %d\n", r300->vbo, r300->vertex_info.size); /* Set the pointer to our vertex buffer. The emitted values are this: @@ -750,7 +799,7 @@ void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed) OUT_CS(r300->vertex_info.size | (r300->vertex_info.size << 8)); OUT_CS(r300->vbo_offset); - OUT_CS_BUF_RELOC(r300->vbo, 0, r300_buffer(r300->vbo)->domain, 0, 0); + OUT_CS_BUF_RELOC(r300->vbo, 0, r300_buffer(r300->vbo)->domain, 0); END_CS; } @@ -762,21 +811,25 @@ void r300_emit_vertex_stream_state(struct r300_context* r300, unsigned i; CS_LOCALS(r300); - DBG(r300, DBG_DRAW, "r300: PSC emit:\n"); + if (DBG_ON(r300, DBG_PSC)) { + fprintf(stderr, "r300: PSC emit:\n"); + + for (i = 0; i < streams->count; i++) { + fprintf(stderr, " : prog_stream_cntl%d: 0x%08x\n", i, + streams->vap_prog_stream_cntl[i]); + } + + for (i = 0; i < streams->count; i++) { + fprintf(stderr, " : prog_stream_cntl_ext%d: 0x%08x\n", i, + streams->vap_prog_stream_cntl_ext[i]); + } + } BEGIN_CS(size); OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, streams->count); OUT_CS_TABLE(streams->vap_prog_stream_cntl, streams->count); - for (i = 0; i < streams->count; i++) { - DBG(r300, DBG_DRAW, " : prog_stream_cntl%d: 0x%08x\n", i, - streams->vap_prog_stream_cntl[i]); - } OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, streams->count); OUT_CS_TABLE(streams->vap_prog_stream_cntl_ext, streams->count); - for (i = 0; i < streams->count; i++) { - DBG(r300, DBG_DRAW, " : prog_stream_cntl_ext%d: 0x%08x\n", i, - streams->vap_prog_stream_cntl_ext[i]); - } END_CS; } @@ -789,6 +842,13 @@ void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state) END_CS; } +void r300_emit_vap_invariant_state(struct r300_context *r300, + unsigned size, void *state) +{ + CS_LOCALS(r300); + WRITE_CS_TABLE(state, size); +} + void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) { struct r300_vertex_shader* vs = (struct r300_vertex_shader*)state; @@ -813,6 +873,7 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) CS_LOCALS(r300); BEGIN_CS(size); + /* R300_VAP_PVS_CODE_CNTL_0 * R300_VAP_PVS_CONST_CNTL * R300_VAP_PVS_CODE_CNTL_1 @@ -865,7 +926,7 @@ void r300_emit_vs_constants(struct r300_context* r300, (r300->screen->caps.is_r500 ? R500_PVS_CONST_START : R300_PVS_CONST_START)); OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, count * 4); - OUT_CS_TABLE(buf->constants, count * 4); + OUT_CS_TABLE(buf->ptr, count * 4); END_CS; } @@ -924,27 +985,22 @@ void r300_emit_buffer_validate(struct r300_context *r300, } /* Clean out BOs. */ - r300->rws->reset_bos(r300->rws); + r300->rws->cs_reset_buffers(r300->cs); validate: /* Color buffers... */ for (i = 0; i < fb->nr_cbufs; i++) { tex = r300_texture(fb->cbufs[i]->texture); assert(tex && tex->buffer && "cbuf is marked, but NULL!"); - if (!r300_add_texture(r300->rws, tex, 0, tex->domain)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } + r300->rws->cs_add_buffer(r300->cs, tex->buffer, 0, + r300_surface(fb->cbufs[i])->domain); } /* ...depth buffer... */ if (fb->zsbuf) { tex = r300_texture(fb->zsbuf->texture); assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); - if (!r300_add_texture(r300->rws, tex, - 0, tex->domain)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } + r300->rws->cs_add_buffer(r300->cs, tex->buffer, 0, + r300_surface(fb->zsbuf)->domain); } /* ...textures... */ for (i = 0; i < texstate->count; i++) { @@ -953,48 +1009,31 @@ validate: } tex = r300_texture(texstate->sampler_views[i]->base.texture); - if (!r300_add_texture(r300->rws, tex, tex->domain, 0)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } + r300->rws->cs_add_buffer(r300->cs, tex->buffer, tex->domain, 0); } /* ...occlusion query buffer... */ - if (r300->query_current) { - if (!r300->rws->add_buffer(r300->rws, r300->query_current->buffer, - 0, r300->query_current->domain)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } - } + if (r300->query_current) + r300->rws->cs_add_buffer(r300->cs, r300->query_current->buffer, + 0, r300->query_current->domain); /* ...vertex buffer for SWTCL path... */ - if (r300->vbo) { - if (!r300_add_buffer(r300->rws, r300->vbo, - r300_buffer(r300->vbo)->domain, 0)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } - } + if (r300->vbo) + r300->rws->cs_add_buffer(r300->cs, r300_buffer(r300->vbo)->buf, + r300_buffer(r300->vbo)->domain, 0); /* ...vertex buffers for HWTCL path... */ if (do_validate_vertex_buffers) { for (i = 0; i < r300->velems->count; i++) { pbuf = vbuf[velem[i].vertex_buffer_index].buffer; - if (!r300_add_buffer(r300->rws, pbuf, - r300_buffer(pbuf)->domain, 0)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } + r300->rws->cs_add_buffer(r300->cs, r300_buffer(pbuf)->buf, + r300_buffer(pbuf)->domain, 0); } } /* ...and index buffer for HWTCL path. */ - if (index_buffer) { - if (!r300_add_buffer(r300->rws, index_buffer, - r300_buffer(index_buffer)->domain, 0)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } - } - if (!r300->rws->validate(r300->rws)) { + if (index_buffer) + r300->rws->cs_add_buffer(r300->cs, r300_buffer(index_buffer)->buf, + r300_buffer(index_buffer)->domain, 0); + + if (!r300->rws->cs_validate(r300->cs)) { r300->context.flush(&r300->context, 0, NULL); if (invalid) { /* Well, hell. */ diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 36a29894d0..5d05039669 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -45,6 +45,11 @@ void r300_emit_clip_state(struct r300_context* r300, void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state); +void r300_emit_hyperz_state(struct r300_context *r300, + unsigned size, void *state); + +void r300_emit_hyperz_end(struct r300_context *r300); + void r300_emit_fs(struct r300_context* r300, unsigned size, void *state); void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *state); @@ -59,6 +64,13 @@ void r500_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state); +void r300_emit_fb_state_pipelined(struct r300_context *r300, + unsigned size, void *state); + +void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state); + +void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state); + void r300_emit_query_start(struct r300_context *r300, unsigned size, void *state); void r300_emit_query_end(struct r300_context* r300); @@ -76,6 +88,9 @@ void r300_emit_textures_state(struct r300_context *r300, void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed); +void r300_emit_vap_invariant_state(struct r300_context *r300, + unsigned size, void *state); + void r300_emit_vertex_stream_state(struct r300_context* r300, unsigned size, void* state); @@ -94,6 +109,9 @@ void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state); void r300_emit_texture_cache_inval(struct r300_context* r300, unsigned size, void* state); +void r300_emit_invariant_state(struct r300_context *r300, + unsigned size, void *state); + unsigned r300_get_num_dirty_dwords(struct r300_context *r300); /* Emit all dirty state. */ diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index ba840bfff8..ae7b5759e7 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -25,6 +25,7 @@ #include "draw/draw_private.h" #include "util/u_simple_list.h" +#include "util/u_upload_mgr.h" #include "r300_context.h" #include "r300_cs.h" @@ -39,6 +40,9 @@ static void r300_flush(struct pipe_context* pipe, struct r300_atom *atom; struct r300_fence **rfence = (struct r300_fence**)fence; + u_upload_flush(r300->upload_vb); + u_upload_flush(r300->upload_ib); + /* We probably need to flush Draw, but we may have been called from * within Draw. This feels kludgy, but it might be the best thing. * @@ -48,12 +52,11 @@ static void r300_flush(struct pipe_context* pipe, } if (r300->dirty_hw) { + r300_emit_hyperz_end(r300); r300_emit_query_end(r300); - if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) { - r300->flush_counter++; - } - r300->rws->flush_cs(r300->rws); + r300->flush_counter++; + r300->rws->cs_flush(r300->cs); r300->dirty_hw = 0; /* New kitchen sink, baby. */ diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index e585394304..db5269912e 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -173,7 +173,7 @@ static void get_external_state( t = (struct r300_texture*)texstate->sampler_views[i]->base.texture; /* XXX this should probably take into account STR, not just S. */ - if (t->uses_pitch) { + if (t->desc.is_npot) { switch (s->state.wrap_s) { case PIPE_TEX_WRAP_REPEAT: state->unit[i].wrap_mode = RC_WRAP_REPEAT; @@ -246,13 +246,14 @@ static void r300_emit_fs_code_to_buffer( if (r300->screen->caps.is_r500) { struct r500_fragment_program_code *code = &generic_code->code.r500; - shader->cb_code_size = 17 + + shader->cb_code_size = 19 + ((code->inst_end + 1) * 6) + imm_count * 7; NEW_CB(shader->cb_code, shader->cb_code_size); OUT_CB_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); OUT_CB_REG(R500_US_PIXSIZE, code->max_temp_idx); + OUT_CB_REG(R500_US_FC_CTRL, code->us_fc_ctrl); OUT_CB_REG(R500_US_CODE_RANGE, R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end)); OUT_CB_REG(R500_US_CODE_OFFSET, 0); @@ -288,11 +289,16 @@ static void r300_emit_fs_code_to_buffer( struct r300_fragment_program_code *code = &generic_code->code.r300; shader->cb_code_size = 19 + + (r300->screen->caps.is_r400 ? 2 : 0) + code->alu.length * 4 + (code->tex.length ? (1 + code->tex.length) : 0) + imm_count * 5; NEW_CB(shader->cb_code, shader->cb_code_size); + + if (r300->screen->caps.is_r400) + OUT_CB_REG(R400_US_CODE_BANK, 0); + OUT_CB_REG(R300_US_CONFIG, code->config); OUT_CB_REG(R300_US_PIXSIZE, code->pixsize); OUT_CB_REG(R300_US_CODE_OFFSET, code->code_offset); diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index e5c7658952..e952895601 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -21,13 +21,28 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "r300_hyperz.h" #include "r300_context.h" +#include "r300_hyperz.h" #include "r300_reg.h" #include "r300_fs.h" /*****************************************************************************/ +/* The HyperZ setup */ +/*****************************************************************************/ + +static void r300_update_hyperz(struct r300_context* r300) +{ + struct r300_hyperz_state *z = + (struct r300_hyperz_state*)r300->hyperz_state.state; + + z->zb_bw_cntl = 0; + z->sc_hyperz = R300_SC_HYPERZ_ADJ_2; + + if (r300->cbzb_clear) + z->zb_bw_cntl |= R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY; +} + +/*****************************************************************************/ /* The ZTOP state */ /*****************************************************************************/ @@ -119,4 +134,7 @@ static void r300_update_ztop(struct r300_context* r300) void r300_update_hyperz_state(struct r300_context* r300) { r300_update_ztop(r300); + if (r300->hyperz_state.dirty) { + r300_update_hyperz(r300); + } } diff --git a/src/gallium/drivers/r300/r300_public.h b/src/gallium/drivers/r300/r300_public.h new file mode 100644 index 0000000000..8e7a963c55 --- /dev/null +++ b/src/gallium/drivers/r300/r300_public.h @@ -0,0 +1,9 @@ + +#ifndef R300_PUBLIC_H +#define R300_PUBLIC_H + +struct r300_winsys_screen; + +struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws); + +#endif diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 10cb468dfc..5b0121ce9e 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -37,7 +37,9 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, struct r300_screen *r300screen = r300->screen; struct r300_query *q; - assert(query_type == PIPE_QUERY_OCCLUSION_COUNTER); + if (query_type != PIPE_QUERY_OCCLUSION_COUNTER) { + return NULL; + } q = CALLOC_STRUCT(r300_query); if (!q) @@ -55,7 +57,9 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, insert_at_tail(&r300->query_list, q); /* Open up the occlusion query buffer. */ - q->buffer = r300->rws->buffer_create(r300->rws, 4096, 0, q->domain, q->buffer_size); + q->buffer = r300->rws->buffer_create(r300->rws, q->buffer_size, 4096, + PIPE_BIND_CUSTOM, PIPE_USAGE_STREAM, + q->domain); return (struct pipe_query*)q; } @@ -132,7 +136,7 @@ static boolean r300_get_query_result(struct pipe_context* pipe, flags = PIPE_TRANSFER_READ | (!wait ? PIPE_TRANSFER_DONTBLOCK : 0); - map = r300->rws->buffer_map(r300->rws, q->buffer, flags); + map = r300->rws->buffer_map(r300->rws, q->buffer, r300->cs, flags); if (!map) return FALSE; diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index c783998c78..2acc1a903e 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -2617,7 +2617,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_WR_COMP_DISABLE (0 << 4) # define R300_WR_COMP_ENABLE (1 << 4) # define R300_ZB_CB_CLEAR_RMW (0 << 5) -# define R300_ZB_CB_CLEAR_CACHE_LINEAR (1 << 5) +# define R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY (1 << 5) # define R300_FORCE_COMPRESSED_STENCIL_VALUE_DISABLE (0 << 6) # define R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE (1 << 6) @@ -2673,6 +2673,24 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* Z Buffer Clear Value */ #define R300_ZB_DEPTHCLEARVALUE 0x4f28 +/* Z Mask RAM is a Z compression buffer. + * Each dword of the Z Mask contains compression info for 16 4x4 pixel blocks, + * that is 2 bits for each block. + * On chips with 2 Z pipes, every other dword maps to a different pipe. + */ + +/* The dword offset into Z mask RAM (bits 18:4) */ +#define R300_ZB_ZMASK_OFFSET 0x4f30 + +/* Z Mask Pitch. */ +#define R300_ZB_ZMASK_PITCH 0x4f34 + +/* Access to Z Mask RAM in a manner similar to HiZ RAM. + * The indices are autoincrementing. */ +#define R300_ZB_ZMASK_WRINDEX 0x4f38 +#define R300_ZB_ZMASK_DWORD 0x4f3c +#define R300_ZB_ZMASK_RDINDEX 0x4f40 + /* Hierarchical Z Memory Offset */ #define R300_ZB_HIZ_OFFSET 0x4f44 @@ -3264,8 +3282,8 @@ enum { # define R500_FC_B_OP0_NONE (0 << 24) # define R500_FC_B_OP0_DECR (1 << 24) # define R500_FC_B_OP0_INCR (2 << 24) -# define R500_FC_B_OP1_DECR (0 << 26) -# define R500_FC_B_OP1_NONE (1 << 26) +# define R500_FC_B_OP1_NONE (0 << 26) +# define R500_FC_B_OP1_DECR (1 << 26) # define R500_FC_B_OP1_INCR (2 << 26) # define R500_FC_IGNORE_UNCOVERED (1 << 28) #define R500_US_FC_INT_CONST_0 0x4c00 diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 4afd124c0e..bae02135da 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -35,7 +35,6 @@ #include "util/u_prim.h" #include "r300_cs.h" -#include "r300_cb.h" #include "r300_context.h" #include "r300_screen_buffer.h" #include "r300_emit.h" @@ -224,11 +223,12 @@ static void r300_prepare_for_rendering(struct r300_context *r300, /* Emitted in flush. */ end_dwords += 26; /* emit_query_end */ + end_dwords += r300->hyperz_state.size; /* emit_hyperz_end */ cs_dwords += end_dwords; /* Reserve requested CS space. */ - if (!r300_check_cs(r300, cs_dwords)) { + if (cs_dwords > (r300->cs->ndw - r300->cs->cdw)) { r300->context.flush(&r300->context, 0, NULL); flushed = TRUE; } @@ -278,7 +278,6 @@ static boolean immd_is_good_idea(struct r300_context *r300, /* We shouldn't map buffers referenced by CS, busy buffers, * and ones placed in VRAM. */ - /* XXX Check for VRAM buffers. */ for (i = 0; i < vertex_element_count; i++) { velem = &r300->velems->velem[i]; vbi = velem->vertex_buffer_index; @@ -286,6 +285,10 @@ static boolean immd_is_good_idea(struct r300_context *r300, if (!checked[vbi]) { vbuf = &r300->vertex_buffer[vbi]; + if (!(r300_buffer(vbuf->buffer)->domain & R300_DOMAIN_GTT)) { + return FALSE; + } + if (r300_buffer_is_referenced(&r300->context, vbuf->buffer, R300_REF_CS | R300_REF_HW)) { @@ -299,8 +302,7 @@ static boolean immd_is_good_idea(struct r300_context *r300, } /***************************************************************************** - * The emission of draw packets for r500. Older GPUs may use these functions * - * after resolving fallback issues (e.g. stencil ref two-sided). * + * The HWTCL draw functions. * ****************************************************************************/ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, @@ -316,74 +318,70 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, /* Size of the vertex, in dwords. */ unsigned vertex_size = r300->velems->vertex_size_dwords; - /* Offsets of the attribute, in dwords, from the start of the vertex. */ - unsigned offset[PIPE_MAX_ATTRIBS]; - /* Size of the vertex element, in dwords. */ unsigned size[PIPE_MAX_ATTRIBS]; /* Stride to the same attrib in the next vertex in the vertex buffer, * in dwords. */ - unsigned stride[PIPE_MAX_ATTRIBS] = {0}; + unsigned stride[PIPE_MAX_ATTRIBS]; /* Mapped vertex buffers. */ - uint32_t* map[PIPE_MAX_ATTRIBS] = {0}; - struct pipe_transfer* transfer[PIPE_MAX_ATTRIBS] = {NULL}; + uint32_t* map[PIPE_MAX_ATTRIBS]; + uint32_t* mapelem[PIPE_MAX_ATTRIBS]; + struct pipe_transfer* transfer[PIPE_MAX_ATTRIBS] = {0}; - CB_LOCALS; + CS_LOCALS(r300); /* Calculate the vertex size, offsets, strides etc. and map the buffers. */ for (i = 0; i < vertex_element_count; i++) { velem = &r300->velems->velem[i]; - offset[i] = velem->src_offset / 4; size[i] = r300->velems->hw_format_size[i] / 4; vbi = velem->vertex_buffer_index; + vbuf = &r300->vertex_buffer[vbi]; + stride[i] = vbuf->stride / 4; /* Map the buffer. */ - if (!map[vbi]) { - vbuf = &r300->vertex_buffer[vbi]; + if (!transfer[vbi]) { map[vbi] = (uint32_t*)pipe_buffer_map(&r300->context, vbuf->buffer, PIPE_TRANSFER_READ, &transfer[vbi]); - stride[vbi] = vbuf->stride / 4; - map[vbi] += vbuf->buffer_offset / 4 + stride[vbi] * start; + map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * start; } + mapelem[i] = map[vbi] + (velem->src_offset / 4); } dwords = 9 + count * vertex_size; r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL); - BEGIN_CS_AS_CB(r300, dwords); - OUT_CB_REG(R300_GA_COLOR_CONTROL, + BEGIN_CS(dwords); + OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, mode)); - OUT_CB_REG(R300_VAP_VTX_SIZE, vertex_size); - OUT_CB_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); - OUT_CB(count - 1); - OUT_CB(0); - OUT_CB_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); - OUT_CB(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | + OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); + OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); + OUT_CS(count - 1); + OUT_CS(0); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | r300_translate_primitive(mode)); /* Emit vertices. */ for (v = 0; v < count; v++) { for (i = 0; i < vertex_element_count; i++) { - vbi = r300->velems->velem[i].vertex_buffer_index; - - OUT_CB_TABLE(&map[vbi][offset[i] + stride[vbi] * v], size[i]); + OUT_CS_TABLE(&mapelem[i][stride[i] * v], size[i]); } } - END_CB; + END_CS; /* Unmap buffers. */ for (i = 0; i < vertex_element_count; i++) { vbi = r300->velems->velem[i].vertex_buffer_index; - if (map[vbi]) { + if (transfer[vbi]) { vbuf = &r300->vertex_buffer[vbi]; pipe_buffer_unmap(&r300->context, vbuf->buffer, transfer[vbi]); - map[vbi] = NULL; + transfer[vbi] = NULL; } } } @@ -475,7 +473,7 @@ static void r300_emit_draw_elements(struct r300_context *r300, (0 << R300_INDX_BUFFER_SKIP_SHIFT)); OUT_CS(offset_dwords << 2); OUT_CS_BUF_RELOC(indexBuffer, count_dwords, - r300_buffer(indexBuffer)->domain, 0, 0); + r300_buffer(indexBuffer)->domain, 0); END_CS; } @@ -499,6 +497,7 @@ static void r300_draw_range_elements(struct pipe_context* pipe, unsigned short_count; int buffer_offset = 0, index_offset = 0; /* for index bias emulation */ boolean translate = FALSE; + unsigned new_offset; if (r300->skip_rendering) { return; @@ -508,6 +507,12 @@ static void r300_draw_range_elements(struct pipe_context* pipe, return; } + /* Index buffer range checking. */ + if ((start + count) * indexSize > indexBuffer->width0) { + fprintf(stderr, "r300: Invalid index buffer range. Skipping rendering.\n"); + return; + } + /* Set up fallback for incompatible vertex layout if needed. */ if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) { r300_begin_vertex_translate(r300); @@ -522,18 +527,17 @@ static void r300_draw_range_elements(struct pipe_context* pipe, &start, count); r300_update_derived_state(r300); - r300_upload_index_buffer(r300, &indexBuffer, indexSize, start, count); + r300_upload_index_buffer(r300, &indexBuffer, indexSize, start, count, &new_offset); + start = new_offset; /* 15 dwords for emit_draw_elements */ r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED, indexBuffer, 15, buffer_offset, indexBias, NULL); - u_upload_flush(r300->upload_vb); - u_upload_flush(r300->upload_ib); if (alt_num_verts || count <= 65535) { r300_emit_draw_elements(r300, indexBuffer, indexSize, - minIndex, maxIndex, mode, start, count); + minIndex, maxIndex, mode, start, count); } else { do { short_count = MIN2(count, 65534); @@ -865,13 +869,12 @@ static void r300_render_draw_arrays(struct vbuf_render* render, unsigned dwords = 6; CS_LOCALS(r300); - (void) i; (void) ptr; r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL, NULL, dwords, 0, 0, NULL); - DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count); + DBG(r300, DBG_DRAW, "r300: render_draw_arrays (count: %d)\n", count); /* Uncomment to dump all VBOs rendered through this interface. * Slow and noisy! @@ -914,6 +917,7 @@ static void r300_render_draw_elements(struct vbuf_render* render, unsigned free_dwords; CS_LOCALS(r300); + DBG(r300, DBG_DRAW, "r300: render_draw_elements (count: %d)\n", count); /* Reserve at least 256 dwords. * @@ -924,7 +928,7 @@ static void r300_render_draw_elements(struct vbuf_render* render, NULL, 256, 0, 0, &end_cs_dwords); while (count) { - free_dwords = r300->rws->get_cs_free_dwords(r300->rws); + free_dwords = r300->cs->ndw - r300->cs->cdw; short_count = MIN2(count, (free_dwords - end_cs_dwords - 6) * 2); @@ -1015,6 +1019,88 @@ struct draw_stage* r300_draw_stage(struct r300_context* r300) * End of SW TCL functions * ***************************************************************************/ +/* If we used a quad to draw a rectangle, the pixels on the main diagonal + * would be computed and stored twice, which makes the clear/copy codepaths + * somewhat inefficient. Instead we use a rectangular point sprite. */ +static void r300_blitter_draw_rectangle(struct blitter_context *blitter, + unsigned x1, unsigned y1, + unsigned x2, unsigned y2, + float depth, + enum blitter_attrib_type type, + const float attrib[4]) +{ + struct r300_context *r300 = r300_context(util_blitter_get_pipe(blitter)); + unsigned last_sprite_coord_enable = r300->sprite_coord_enable; + unsigned width = x2 - x1; + unsigned height = y2 - y1; + unsigned vertex_size = + type == UTIL_BLITTER_ATTRIB_COLOR || !r300->draw ? 8 : 4; + unsigned dwords = 13 + vertex_size + + (type == UTIL_BLITTER_ATTRIB_TEXCOORD ? 7 : 0); + const float zeros[4] = {0, 0, 0, 0}; + CS_LOCALS(r300); + + if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) + r300->sprite_coord_enable = 1; + + r300_update_derived_state(r300); + + /* Mark some states we don't care about as non-dirty. */ + r300->clip_state.dirty = FALSE; + r300->viewport_state.dirty = FALSE; + + r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL); + + DBG(r300, DBG_DRAW, "r300: draw_rectangle\n"); + + BEGIN_CS(dwords); + /* Set up GA. */ + OUT_CS_REG(R300_GA_POINT_SIZE, (height * 6) | ((width * 6) << 16)); + + if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) { + /* Set up the GA to generate texcoords. */ + OUT_CS_REG(R300_GB_ENABLE, R300_GB_POINT_STUFF_ENABLE | + (R300_GB_TEX_STR << R300_GB_TEX0_SOURCE_SHIFT)); + OUT_CS_REG_SEQ(R300_GA_POINT_S0, 4); + OUT_CS_32F(attrib[0]); + OUT_CS_32F(attrib[3]); + OUT_CS_32F(attrib[2]); + OUT_CS_32F(attrib[1]); + } + + /* Set up VAP controls. */ + OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); + OUT_CS_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT); + OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); + OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); + OUT_CS(1); + OUT_CS(0); + + /* Draw. */ + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, vertex_size); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (1 << 16) | + R300_VAP_VF_CNTL__PRIM_POINTS); + + OUT_CS_32F(x1 + width * 0.5f); + OUT_CS_32F(y1 + height * 0.5f); + OUT_CS_32F(depth); + OUT_CS_32F(1); + + if (vertex_size == 8) { + if (!attrib) + attrib = zeros; + OUT_CS_TABLE(attrib, 4); + } + END_CS; + + /* Restore the state. */ + r300->clip_state.dirty = TRUE; + r300->rs_state.dirty = TRUE; + r300->viewport_state.dirty = TRUE; + + r300->sprite_coord_enable = last_sprite_coord_enable; +} + static void r300_resource_resolve(struct pipe_context* pipe, struct pipe_resource* dest, struct pipe_subresource subdest, @@ -1022,33 +1108,35 @@ static void r300_resource_resolve(struct pipe_context* pipe, struct pipe_subresource subsrc) { struct r300_context* r300 = r300_context(pipe); - struct r300_surface* destsurf = r300_surface( - dest->screen->get_tex_surface(dest->screen, - dest, subdest.face, subdest.level, 0, 0)); + struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; struct pipe_surface* srcsurf = src->screen->get_tex_surface(src->screen, src, subsrc.face, subsrc.level, 0, 0); float color[] = {0, 0, 0, 0}; - CS_LOCALS(r300); DBG(r300, DBG_DRAW, "r300: Resolving resource...\n"); - OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 1); - OUT_CS_RELOC(destsurf->buffer, destsurf->offset, 0, destsurf->domain, 0); - - OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_PITCH, 1); - OUT_CS_RELOC(destsurf->buffer, destsurf->pitch, 0, destsurf->domain, 0); + /* Enable AA resolve. */ + aa->dest = r300_surface( + dest->screen->get_tex_surface(dest->screen, dest, subdest.face, + subdest.level, 0, 0)); - OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, + aa->aaresolve_ctl = R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE | - R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE); + R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE; + r300->aa_state.size = 12; + r300->aa_state.dirty = TRUE; + /* Resolve the surface. */ r300->context.clear_render_target(pipe, srcsurf, color, 0, 0, src->width0, src->height0); - OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x0); + /* Disable AA resolve. */ + aa->aaresolve_ctl = 0; + r300->aa_state.size = 4; + r300->aa_state.dirty = TRUE; pipe_surface_reference((struct pipe_surface**)&srcsurf, NULL); - pipe_surface_reference((struct pipe_surface**)&destsurf, NULL); + pipe_surface_reference((struct pipe_surface**)&aa->dest, NULL); } void r300_init_render_functions(struct r300_context *r300) @@ -1066,6 +1154,7 @@ void r300_init_render_functions(struct r300_context *r300) } r300->context.resource_resolve = r300_resource_resolve; + r300->blitter->draw_rectangle = r300_blitter_draw_rectangle; /* Plug in the two-sided stencil reference value fallback if needed. */ if (!r300->screen->caps.is_r500) diff --git a/src/gallium/drivers/r300/r300_render_stencilref.c b/src/gallium/drivers/r300/r300_render_stencilref.c index d509ded3ec..9a6b4e12ff 100644 --- a/src/gallium/drivers/r300/r300_render_stencilref.c +++ b/src/gallium/drivers/r300/r300_render_stencilref.c @@ -64,12 +64,12 @@ static void r300_stencilref_begin(struct r300_context *r300) struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; /* Save state. */ - sr->rs_cull_mode = rs->cull_mode; + sr->rs_cull_mode = rs->cb_main[rs->cull_mode_index]; sr->zb_stencilrefmask = dsa->stencil_ref_mask; sr->ref_value_front = r300->stencil_ref.ref_value[0]; /* We *cull* pixels, therefore no need to mask out the bits. */ - rs->cull_mode |= R300_CULL_BACK; + rs->cb_main[rs->cull_mode_index] |= R300_CULL_BACK; r300->rs_state.dirty = TRUE; } @@ -81,7 +81,7 @@ static void r300_stencilref_switch_side(struct r300_context *r300) struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; - rs->cull_mode = sr->rs_cull_mode | R300_CULL_FRONT; + rs->cb_main[rs->cull_mode_index] = sr->rs_cull_mode | R300_CULL_FRONT; dsa->stencil_ref_mask = dsa->stencil_ref_bf; r300->stencil_ref.ref_value[0] = r300->stencil_ref.ref_value[1]; @@ -97,7 +97,7 @@ static void r300_stencilref_end(struct r300_context *r300) struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; /* Restore state. */ - rs->cull_mode = sr->rs_cull_mode; + rs->cb_main[rs->cull_mode_index] = sr->rs_cull_mode; dsa->stencil_ref_mask = sr->zb_stencilrefmask; r300->stencil_ref.ref_value[0] = sr->ref_value_front; diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 8f7c96b829..676430f5fe 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -30,6 +30,7 @@ #include "r300_screen_buffer.h" #include "r300_state_inlines.h" #include "r300_winsys.h" +#include "r300_public.h" /* Return the identifier behind whom the brave coders responsible for this * amalgamation of code, sweat, and duct tape, routinely obscure their names. @@ -114,6 +115,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_MIRROR_REPEAT: case PIPE_CAP_BLEND_EQUATION_SEPARATE: case PIPE_CAP_TEXTURE_SWIZZLE: + case PIPE_CAP_DEPTH_CLAMP: return 1; /* Unsupported features (boolean caps). */ @@ -206,6 +208,8 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return 1; /* XXX guessed */ case PIPE_CAP_MAX_VS_PREDS: return is_r500 ? 4 : 0; /* XXX guessed. */ + case PIPE_CAP_GEOMETRY_SHADER4: + return 0; default: fprintf(stderr, "r300: Implementation error: Bad param %d\n", @@ -253,9 +257,6 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, uint32_t retval = 0; boolean is_r500 = r300_screen(screen)->caps.is_r500; boolean is_r400 = r300_screen(screen)->caps.is_r400; - boolean is_rv350 = r300_screen(screen)->caps.is_rv350; - boolean is_z24 = format == PIPE_FORMAT_X8Z24_UNORM || - format == PIPE_FORMAT_S8_USCALED_Z24_UNORM; boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM || format == PIPE_FORMAT_R10G10B10X2_SNORM || format == PIPE_FORMAT_B10G10R10A2_UNORM || @@ -269,12 +270,7 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, format == PIPE_FORMAT_R16G16B16_FLOAT || format == PIPE_FORMAT_R16G16B16A16_FLOAT; - if (target >= PIPE_MAX_TEXTURE_TYPES) { - fprintf(stderr, "r300: Implementation error: Received bogus texture " - "target %d in %s\n", target, __FUNCTION__); - return FALSE; - } - + /* Check multisampling support. */ switch (sample_count) { case 0: case 1: @@ -295,8 +291,6 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, /* Check sampler format support. */ if ((usage & PIPE_BIND_SAMPLER_VIEW) && - /* Z24 cannot be sampled from on non-r5xx. */ - (is_r500 || !is_z24) && /* ATI1N is r5xx-only. */ (is_r500 || !is_ati1n) && /* ATI2N is supported on r4xx-r5xx. */ @@ -329,7 +323,7 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, /* Check vertex buffer format support. */ if (usage & PIPE_BIND_VERTEX_BUFFER && /* Half float is supported on >= RV350. */ - (is_rv350 || !is_half_float) && + (is_r400 || is_r500 || !is_half_float) && r300_translate_vertex_data_type(format) != R300_INVALID_FORMAT) { retval |= PIPE_BIND_VERTEX_BUFFER; } @@ -348,6 +342,8 @@ static void r300_destroy_screen(struct pipe_screen* pscreen) struct r300_screen* r300screen = r300_screen(pscreen); struct r300_winsys_screen *rws = r300_winsys_screen(pscreen); + util_mempool_destroy(&r300screen->pool_buffers); + if (rws) rws->destroy(rws); @@ -387,7 +383,7 @@ static int r300_fence_finish(struct pipe_screen *screen, return 0; /* 0 == success */ } -struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws) +struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws) { struct r300_screen *r300screen = CALLOC_STRUCT(r300_screen); @@ -403,6 +399,10 @@ struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws) r300_init_debug(r300screen); r300_parse_chipset(&r300screen->caps); + util_mempool_create(&r300screen->pool_buffers, + sizeof(struct r300_buffer), 64, + UTIL_MEMPOOL_SINGLETHREADED); + r300screen->rws = rws; r300screen->screen.winsys = (struct pipe_winsys*)rws; r300screen->screen.destroy = r300_destroy_screen; @@ -423,9 +423,3 @@ struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws) return &r300screen->screen; } - -struct r300_winsys_screen * -r300_winsys_screen(struct pipe_screen *screen) -{ - return r300_screen(screen)->rws; -} diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 29cd5dbe26..18745b83a0 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -28,8 +28,12 @@ #include "r300_chipset.h" +#include "util/u_mempool.h" + #include <stdio.h> +struct r300_winsys_screen; + struct r300_screen { /* Parent class */ struct pipe_screen screen; @@ -39,16 +43,28 @@ struct r300_screen { /* Chipset capabilities */ struct r300_capabilities caps; + /* Memory pools. */ + struct util_mempool pool_buffers; + /** Combination of DBG_xxx flags */ unsigned debug; + + /* The number of created contexts to know whether we have multiple + * contexts or not. */ + int num_contexts; }; -/* Convenience cast wrapper. */ +/* Convenience cast wrappers. */ static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) { return (struct r300_screen*)screen; } +static INLINE struct r300_winsys_screen * +r300_winsys_screen(struct pipe_screen *screen) { + return r300_screen(screen)->rws; +} + /* Debug functionality. */ /** @@ -61,17 +77,20 @@ static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) { * those changes. */ /*@{*/ -#define DBG_HELP (1 << 0) + /* Logging. */ +#define DBG_PSC (1 << 0) #define DBG_FP (1 << 1) #define DBG_VP (1 << 2) -/* The bit (1 << 3) is unused. */ +#define DBG_SWTCL (1 << 3) #define DBG_DRAW (1 << 4) #define DBG_TEX (1 << 5) #define DBG_TEXALLOC (1 << 6) #define DBG_RS (1 << 7) #define DBG_FALL (1 << 8) #define DBG_FB (1 << 9) +#define DBG_RS_BLOCK (1 << 10) +#define DBG_CBZB (1 << 11) /* Features. */ #define DBG_ANISOHQ (1 << 16) #define DBG_NO_TILING (1 << 17) diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index 7959e6a2f9..37a080ba48 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -43,7 +43,7 @@ unsigned r300_buffer_is_referenced(struct pipe_context *context, if (r300_buffer_is_user_buffer(buf)) return PIPE_UNREFERENCED; - if (r300->rws->is_buffer_referenced(r300->rws, rbuf->buf, domain)) + if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->buf, domain)) return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; return PIPE_UNREFERENCED; @@ -62,7 +62,8 @@ int r300_upload_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, unsigned index_size, unsigned start, - unsigned count) + unsigned count, + unsigned *out_offset) { struct pipe_resource *upload_buffer = NULL; unsigned index_offset = start * index_size; @@ -79,7 +80,10 @@ int r300_upload_index_buffer(struct r300_context *r300, goto done; } *index_buffer = upload_buffer; - } + *out_offset = index_offset / index_size; + } else + *out_offset = start; + done: // if (upload_buffer) // pipe_resource_reference(&upload_buffer, NULL); @@ -119,31 +123,59 @@ int r300_upload_user_buffers(struct r300_context *r300) return ret; } -static void r300_winsys_buffer_destroy(struct r300_screen *r300screen, - struct r300_buffer *rbuf) +static void r300_buffer_destroy(struct pipe_screen *screen, + struct pipe_resource *buf) { + struct r300_screen *r300screen = r300_screen(screen); + struct r300_buffer *rbuf = r300_buffer(buf); struct r300_winsys_screen *rws = r300screen->rws; - if (rbuf->buf) { - rws->buffer_reference(rws, &rbuf->buf, NULL); - rbuf->buf = NULL; - } + if (rbuf->constant_buffer) + FREE(rbuf->constant_buffer); + + if (rbuf->buf) + rws->buffer_reference(rws, &rbuf->buf, NULL); + + util_mempool_free(&r300screen->pool_buffers, rbuf); } -static void r300_buffer_destroy(struct pipe_screen *screen, - struct pipe_resource *buf) +static struct pipe_transfer* +r300_default_get_transfer(struct pipe_context *context, + struct pipe_resource *resource, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box) { - struct r300_screen *r300screen = r300_screen(screen); - struct r300_buffer *rbuf = r300_buffer(buf); + struct r300_context *r300 = r300_context(context); + struct pipe_transfer *transfer = + util_mempool_malloc(&r300->pool_transfers); + + transfer->resource = resource; + transfer->sr = sr; + transfer->usage = usage; + transfer->box = *box; + transfer->stride = 0; + transfer->slice_stride = 0; + transfer->data = NULL; + + /* Note strides are zero, this is ok for buffers, but not for + * textures 2d & higher at least. + */ + return transfer; +} - r300_winsys_buffer_destroy(r300screen, rbuf); - FREE(rbuf); +static void r300_default_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *transfer) +{ + struct r300_context *r300 = r300_context(pipe); + util_mempool_free(&r300->pool_transfers, transfer); } static void * r300_buffer_transfer_map( struct pipe_context *pipe, struct pipe_transfer *transfer ) { + struct r300_context *r300 = r300_context(pipe); struct r300_screen *r300screen = r300_screen(pipe->screen); struct r300_winsys_screen *rws = r300screen->rws; struct r300_buffer *rbuf = r300_buffer(transfer->resource); @@ -153,10 +185,8 @@ r300_buffer_transfer_map( struct pipe_context *pipe, if (rbuf->user_buffer) return (uint8_t *) rbuf->user_buffer + transfer->box.x; - - if (rbuf->b.b.bind & PIPE_BIND_CONSTANT_BUFFER) { - goto just_map; - } + if (rbuf->constant_buffer) + return (uint8_t *) rbuf->constant_buffer + transfer->box.x; /* check if the mapping is to a range we already flushed */ if (transfer->usage & PIPE_TRANSFER_DISCARD) { @@ -170,16 +200,18 @@ r300_buffer_transfer_map( struct pipe_context *pipe, rws->buffer_reference(rws, &rbuf->buf, NULL); rbuf->num_ranges = 0; - rbuf->buf = r300screen->rws->buffer_create(r300screen->rws, 16, - rbuf->b.b.bind, - rbuf->domain, - rbuf->b.b.width0); + rbuf->buf = + r300screen->rws->buffer_create(r300screen->rws, + rbuf->b.b.width0, 16, + rbuf->b.b.bind, + rbuf->b.b.usage, + rbuf->domain); break; } } } -just_map: - map = rws->buffer_map(rws, rbuf->buf, transfer->usage); + + map = rws->buffer_map(rws, rbuf->buf, r300->cs, transfer->usage); if (map == NULL) return NULL; @@ -204,9 +236,8 @@ static void r300_buffer_transfer_flush_region( struct pipe_context *pipe, if (rbuf->user_buffer) return; - - if (rbuf->b.b.bind & PIPE_BIND_CONSTANT_BUFFER) - return; + if (rbuf->constant_buffer) + return; /* mark the range as used */ for(i = 0; i < rbuf->num_ranges; ++i) { @@ -237,14 +268,14 @@ static void r300_buffer_transfer_unmap( struct pipe_context *pipe, struct u_resource_vtbl r300_buffer_vtbl = { u_default_resource_get_handle, /* get_handle */ - r300_buffer_destroy, /* resource_destroy */ - r300_buffer_is_referenced_by_cs, /* is_buffer_referenced */ - u_default_get_transfer, /* get_transfer */ - u_default_transfer_destroy, /* transfer_destroy */ - r300_buffer_transfer_map, /* transfer_map */ + r300_buffer_destroy, /* resource_destroy */ + r300_buffer_is_referenced_by_cs, /* is_buffer_referenced */ + r300_default_get_transfer, /* get_transfer */ + r300_default_transfer_destroy, /* transfer_destroy */ + r300_buffer_transfer_map, /* transfer_map */ r300_buffer_transfer_flush_region, /* transfer_flush_region */ - r300_buffer_transfer_unmap, /* transfer_unmap */ - u_default_transfer_inline_write /* transfer_inline_write */ + r300_buffer_transfer_unmap, /* transfer_unmap */ + u_default_transfer_inline_write /* transfer_inline_write */ }; struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, @@ -254,9 +285,7 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, struct r300_buffer *rbuf; unsigned alignment = 16; - rbuf = CALLOC_STRUCT(r300_buffer); - if (!rbuf) - goto error1; + rbuf = util_mempool_malloc(&r300screen->pool_buffers); rbuf->magic = R300_BUFFER_MAGIC; @@ -265,21 +294,29 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, pipe_reference_init(&rbuf->b.b.reference, 1); rbuf->b.b.screen = screen; rbuf->domain = R300_DOMAIN_GTT; + rbuf->num_ranges = 0; + rbuf->buf = NULL; + rbuf->constant_buffer = NULL; + rbuf->user_buffer = NULL; + + /* Alloc constant buffers in RAM. */ + if (templ->bind & PIPE_BIND_CONSTANT_BUFFER) { + rbuf->constant_buffer = MALLOC(templ->width0); + return &rbuf->b.b; + } - rbuf->buf = r300screen->rws->buffer_create(r300screen->rws, - alignment, - rbuf->b.b.bind, - rbuf->domain, - rbuf->b.b.width0); + rbuf->buf = + r300screen->rws->buffer_create(r300screen->rws, + rbuf->b.b.width0, alignment, + rbuf->b.b.bind, rbuf->b.b.usage, + rbuf->domain); - if (!rbuf->buf) - goto error2; + if (!rbuf->buf) { + util_mempool_free(&r300screen->pool_buffers, rbuf); + return NULL; + } return &rbuf->b.b; -error2: - FREE(rbuf); -error1: - return NULL; } struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen, @@ -287,28 +324,28 @@ struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen, unsigned bytes, unsigned bind) { + struct r300_screen *r300screen = r300_screen(screen); struct r300_buffer *rbuf; - rbuf = CALLOC_STRUCT(r300_buffer); - if (!rbuf) - goto no_rbuf; + rbuf = util_mempool_malloc(&r300screen->pool_buffers); rbuf->magic = R300_BUFFER_MAGIC; pipe_reference_init(&rbuf->b.b.reference, 1); rbuf->b.vtbl = &r300_buffer_vtbl; rbuf->b.b.screen = screen; + rbuf->b.b.target = PIPE_BUFFER; rbuf->b.b.format = PIPE_FORMAT_R8_UNORM; rbuf->b.b.usage = PIPE_USAGE_IMMUTABLE; rbuf->b.b.bind = bind; rbuf->b.b.width0 = bytes; rbuf->b.b.height0 = 1; rbuf->b.b.depth0 = 1; + rbuf->b.b.flags = 0; rbuf->domain = R300_DOMAIN_GTT; - + rbuf->num_ranges = 0; + rbuf->buf = NULL; + rbuf->constant_buffer = NULL; rbuf->user_buffer = ptr; return &rbuf->b.b; - -no_rbuf: - return NULL; } diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h index ff35585870..cafa9f96f2 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.h +++ b/src/gallium/drivers/r300/r300_screen_buffer.h @@ -55,6 +55,7 @@ struct r300_buffer enum r300_buffer_domain domain; void *user_buffer; + void *constant_buffer; struct r300_buffer_range ranges[R300_BUFFER_MAX_RANGES]; unsigned num_ranges; }; @@ -67,7 +68,7 @@ int r300_upload_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, unsigned index_size, unsigned start, - unsigned count); + unsigned count, unsigned *out_offset); struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ); @@ -97,23 +98,4 @@ static INLINE boolean r300_buffer_is_user_buffer(struct pipe_resource *buffer) return r300_buffer(buffer)->user_buffer ? true : false; } -static INLINE boolean r300_add_buffer(struct r300_winsys_screen *rws, - struct pipe_resource *buffer, - int rd, int wr) -{ - struct r300_buffer *buf = r300_buffer(buffer); - - if (!buf->buf) - return true; - - return rws->add_buffer(rws, buf->buf, rd, wr); -} - -static INLINE boolean r300_add_texture(struct r300_winsys_screen *rws, - struct r300_texture *tex, - int rd, int wr) -{ - return rws->add_buffer(rws, tex->buffer, rd, wr); -} - #endif diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index bc2b62ba54..3e221f2e02 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -23,6 +23,7 @@ #include "draw/draw_context.h" +#include "util/u_blitter.h" #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_pack_color.h" @@ -428,14 +429,19 @@ static void r300_set_clip_state(struct pipe_context* pipe, clip->clip = *state; if (r300->screen->caps.has_tcl) { - BEGIN_CB(clip->cb, 29); - OUT_CB_REG(R300_VAP_PVS_VECTOR_INDX_REG, - (r300->screen->caps.is_r500 ? - R500_PVS_UCP_START : R300_PVS_UCP_START)); - OUT_CB_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, 6 * 4); - OUT_CB_TABLE(state->ucp, 6 * 4); + r300->clip_state.size = 2 + !!state->nr * 3 + state->nr * 4; + + BEGIN_CB(clip->cb, r300->clip_state.size); + if (state->nr) { + OUT_CB_REG(R300_VAP_PVS_VECTOR_INDX_REG, + (r300->screen->caps.is_r500 ? + R500_PVS_UCP_START : R300_PVS_UCP_START)); + OUT_CB_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, state->nr * 4); + OUT_CB_TABLE(state->ucp, state->nr * 4); + } OUT_CB_REG(R300_VAP_CLIP_CNTL, ((1 << state->nr) - 1) | - R300_PS_UCP_MODE_CLIP_AS_TRIFAN); + R300_PS_UCP_MODE_CLIP_AS_TRIFAN | + (state->depth_clamp ? R300_CLIP_DISABLE : 0)); END_CB; r300->clip_state.dirty = TRUE; @@ -608,32 +614,43 @@ static void r300_set_stencil_ref(struct pipe_context* pipe, r300->dsa_state.dirty = TRUE; } +static void r300_tex_set_tiling_flags(struct r300_context *r300, + struct r300_texture *tex, unsigned level) +{ + /* Check if the macrotile flag needs to be changed. + * Skip changing the flags otherwise. */ + if (tex->desc.macrotile[tex->surface_level] != + tex->desc.macrotile[level]) { + /* Tiling determines how DRM treats the buffer data. + * We must flush CS when changing it if the buffer is referenced. */ + if (r300->rws->cs_is_buffer_referenced(r300->cs, + tex->buffer, R300_REF_CS)) + r300->context.flush(&r300->context, 0, NULL); + + r300->rws->buffer_set_tiling(r300->rws, tex->buffer, + tex->desc.microtile, tex->desc.macrotile[level], + tex->desc.stride_in_bytes[0]); + + tex->surface_level = level; + } +} + /* This switcheroo is needed just because of goddamned MACRO_SWITCH. */ static void r300_fb_set_tiling_flags(struct r300_context *r300, - const struct pipe_framebuffer_state *old_state, - const struct pipe_framebuffer_state *new_state) + const struct pipe_framebuffer_state *state) { - struct r300_texture *tex; - unsigned i, level; + unsigned i; /* Set tiling flags for new surfaces. */ - for (i = 0; i < new_state->nr_cbufs; i++) { - tex = r300_texture(new_state->cbufs[i]->texture); - level = new_state->cbufs[i]->level; - - r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->pitch[0] * util_format_get_blocksize(tex->b.b.format), - tex->microtile, - tex->mip_macrotile[level]); + for (i = 0; i < state->nr_cbufs; i++) { + r300_tex_set_tiling_flags(r300, + r300_texture(state->cbufs[i]->texture), + state->cbufs[i]->level); } - if (new_state->zsbuf) { - tex = r300_texture(new_state->zsbuf->texture); - level = new_state->zsbuf->level; - - r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->pitch[0] * util_format_get_blocksize(tex->b.b.format), - tex->microtile, - tex->mip_macrotile[level]); + if (state->zsbuf) { + r300_tex_set_tiling_flags(r300, + r300_texture(state->zsbuf->texture), + state->zsbuf->level); } } @@ -654,26 +671,49 @@ static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index, surf->zslice, surf->face, surf->level, util_format_short_name(surf->format), - rtex->macrotile ? "YES" : " NO", rtex->microtile ? "YES" : " NO", - rtex->hwpitch[0], tex->width0, tex->height0, tex->depth0, + rtex->desc.macrotile[0] ? "YES" : " NO", + rtex->desc.microtile ? "YES" : " NO", + rtex->desc.stride_in_pixels[0], + tex->width0, tex->height0, tex->depth0, tex->last_level, util_format_short_name(tex->format)); } +void r300_mark_fb_state_dirty(struct r300_context *r300, + enum r300_fb_state_change change) +{ + struct pipe_framebuffer_state *state = r300->fb_state.state; + + /* What is marked as dirty depends on the enum r300_fb_state_change. */ + r300->gpu_flush.dirty = TRUE; + r300->fb_state.dirty = TRUE; + r300->hyperz_state.dirty = TRUE; + + if (change == R300_CHANGED_FB_STATE) { + r300->aa_state.dirty = TRUE; + r300->fb_state_pipelined.dirty = TRUE; + } + + /* Now compute the fb_state atom size. */ + r300->fb_state.size = 2 + (8 * state->nr_cbufs); + + if (r300->cbzb_clear) + r300->fb_state.size += 10; + else if (state->zsbuf) + r300->fb_state.size += r300->screen->caps.has_hiz ? 18 : 14; + + /* The size of the rest of atoms stays the same. */ +} + static void r300_set_framebuffer_state(struct pipe_context* pipe, const struct pipe_framebuffer_state* state) { struct r300_context* r300 = r300_context(pipe); + struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; struct pipe_framebuffer_state *old_state = r300->fb_state.state; unsigned max_width, max_height, i; uint32_t zbuffer_bpp = 0; - if (state->nr_cbufs > 4) { - fprintf(stderr, "r300: Implementation error: Too many MRTs in %s, " - "refusing to bind framebuffer state!\n", __FUNCTION__); - return; - } - if (r300->screen->caps.is_r500) { max_width = max_height = 4096; } else if (r300->screen->caps.is_r400) { @@ -692,8 +732,6 @@ static void draw_flush(r300->draw); } - r300->fb_state.dirty = TRUE; - /* If nr_cbufs is changed from zero to non-zero or vice versa... */ if (!!old_state->nr_cbufs != !!state->nr_cbufs) { r300->blend_state.dirty = TRUE; @@ -704,12 +742,11 @@ static void } /* The tiling flags are dependent on the surface miplevel, unfortunately. */ - r300_fb_set_tiling_flags(r300, r300->fb_state.state, state); + r300_fb_set_tiling_flags(r300, state); - memcpy(r300->fb_state.state, state, sizeof(struct pipe_framebuffer_state)); + util_assign_framebuffer_state(r300->fb_state.state, state); - r300->fb_state.size = (10 * state->nr_cbufs) + (2 * (4 - state->nr_cbufs)) + - (state->zsbuf ? 10 : 0) + 11; + r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE); /* Polygon offset depends on the zbuffer bit depth. */ if (state->zsbuf && r300->polygon_offset_enabled) { @@ -728,6 +765,30 @@ static void } } + /* Set up AA config. */ + if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { + if (state->nr_cbufs && state->cbufs[0]->texture->nr_samples > 1) { + aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE; + + switch (state->cbufs[0]->texture->nr_samples) { + case 2: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2; + break; + case 3: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3; + break; + case 4: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4; + break; + case 6: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6; + break; + } + } else { + aa->aa_config = 0; + } + } + if (DBG_ON(r300, DBG_FB)) { fprintf(stderr, "r300: set_framebuffer_state:\n"); for (i = 0; i < state->nr_cbufs; i++) { @@ -826,6 +887,27 @@ static void* r300_create_rs_state(struct pipe_context* pipe, struct r300_rs_state* rs = CALLOC_STRUCT(r300_rs_state); int i; float psiz; + uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ + uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ + uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */ + uint32_t line_control; /* R300_GA_LINE_CNTL: 0x4234 */ + uint32_t polygon_offset_enable; /* R300_SU_POLY_OFFSET_ENABLE: 0x42b4 */ + uint32_t cull_mode; /* R300_SU_CULL_MODE: 0x42b8 */ + uint32_t line_stipple_config; /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */ + uint32_t line_stipple_value; /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */ + uint32_t polygon_mode; /* R300_GA_POLY_MODE: 0x4288 */ + uint32_t clip_rule; /* R300_SC_CLIP_RULE: 0x43D0 */ + + /* Specifies top of Raster pipe specific enable controls, + * i.e. texture coordinates stuffing for points, lines, triangles */ + uint32_t stuffing_enable; /* R300_GB_ENABLE: 0x4008 */ + + /* Point sprites texture coordinates, 0: lower left, 1: upper right */ + float point_texcoord_left; /* R300_GA_POINT_S0: 0x4200 */ + float point_texcoord_bottom = 0;/* R300_GA_POINT_T0: 0x4204 */ + float point_texcoord_right; /* R300_GA_POINT_S1: 0x4208 */ + float point_texcoord_top = 0; /* R300_GA_POINT_T1: 0x420c */ + CB_LOCALS; /* Copy rasterizer state. */ rs->rs = *state; @@ -835,18 +917,18 @@ static void* r300_create_rs_state(struct pipe_context* pipe, rs->rs_draw.sprite_coord_enable = 0; /* We can do this in HW. */ #ifdef PIPE_ARCH_LITTLE_ENDIAN - rs->vap_control_status = R300_VC_NO_SWAP; + vap_control_status = R300_VC_NO_SWAP; #else - rs->vap_control_status = R300_VC_32BIT_SWAP; + vap_control_status = R300_VC_32BIT_SWAP; #endif /* If no TCL engine is present, turn off the HW TCL. */ if (!r300_screen(pipe->screen)->caps.has_tcl) { - rs->vap_control_status |= R300_VAP_TCL_BYPASS; + vap_control_status |= R300_VAP_TCL_BYPASS; } /* Point size width and height. */ - rs->point_size = + point_size = pack_float_16_6x(state->point_size) | (pack_float_16_6x(state->point_size) << R300_POINTSIZE_X_SHIFT); @@ -856,68 +938,70 @@ static void* r300_create_rs_state(struct pipe_context* pipe, * Clamp to [0, max FB size] */ psiz = pipe->screen->get_paramf(pipe->screen, PIPE_CAP_MAX_POINT_WIDTH); - rs->point_minmax = + point_minmax = pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MAX_SHIFT; } else { /* We cannot disable the point-size vertex output, * so clamp it. */ psiz = state->point_size; - rs->point_minmax = + point_minmax = (pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MIN_SHIFT) | (pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MAX_SHIFT); } /* Line control. */ - rs->line_control = pack_float_16_6x(state->line_width) | + line_control = pack_float_16_6x(state->line_width) | R300_GA_LINE_CNTL_END_TYPE_COMP; /* Enable polygon mode */ + polygon_mode = 0; if (state->fill_front != PIPE_POLYGON_MODE_FILL || state->fill_back != PIPE_POLYGON_MODE_FILL) { - rs->polygon_mode = R300_GA_POLY_MODE_DUAL; + polygon_mode = R300_GA_POLY_MODE_DUAL; } /* Front face */ if (state->front_ccw) - rs->cull_mode = R300_FRONT_FACE_CCW; + cull_mode = R300_FRONT_FACE_CCW; else - rs->cull_mode = R300_FRONT_FACE_CW; + cull_mode = R300_FRONT_FACE_CW; /* Polygon offset */ + polygon_offset_enable = 0; if (util_get_offset(state, state->fill_front)) { - rs->polygon_offset_enable |= R300_FRONT_ENABLE; + polygon_offset_enable |= R300_FRONT_ENABLE; } if (util_get_offset(state, state->fill_back)) { - rs->polygon_offset_enable |= R300_BACK_ENABLE; + polygon_offset_enable |= R300_BACK_ENABLE; } + rs->polygon_offset_enable = polygon_offset_enable != 0; + /* Polygon mode */ - if (rs->polygon_mode) { - rs->polygon_mode |= + if (polygon_mode) { + polygon_mode |= r300_translate_polygon_mode_front(state->fill_front); - rs->polygon_mode |= + polygon_mode |= r300_translate_polygon_mode_back(state->fill_back); } if (state->cull_face & PIPE_FACE_FRONT) { - rs->cull_mode |= R300_CULL_FRONT; + cull_mode |= R300_CULL_FRONT; } if (state->cull_face & PIPE_FACE_BACK) { - rs->cull_mode |= R300_CULL_BACK; - } - - if (rs->polygon_offset_enable) { - rs->depth_offset = state->offset_units; - rs->depth_scale = state->offset_scale; + cull_mode |= R300_CULL_BACK; } if (state->line_stipple_enable) { - rs->line_stipple_config = + line_stipple_config = R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE | (fui((float)state->line_stipple_factor) & R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK); /* XXX this might need to be scaled up */ - rs->line_stipple_value = state->line_stipple_pattern; + line_stipple_value = state->line_stipple_pattern; + } else { + line_stipple_config = 0; + line_stipple_value = 0; } if (state->flatshade) { @@ -926,35 +1010,78 @@ static void* r300_create_rs_state(struct pipe_context* pipe, rs->color_control = R300_SHADE_MODEL_SMOOTH; } - rs->clip_rule = state->scissor ? 0xAAAA : 0xFFFF; + clip_rule = state->scissor ? 0xAAAA : 0xFFFF; /* Point sprites */ + stuffing_enable = 0; if (state->sprite_coord_enable) { - rs->stuffing_enable = R300_GB_POINT_STUFF_ENABLE; + stuffing_enable = R300_GB_POINT_STUFF_ENABLE; for (i = 0; i < 8; i++) { if (state->sprite_coord_enable & (1 << i)) - rs->stuffing_enable |= + stuffing_enable |= R300_GB_TEX_STR << (R300_GB_TEX0_SOURCE_SHIFT + (i*2)); } - rs->point_texcoord_left = 0.0f; - rs->point_texcoord_right = 1.0f; + point_texcoord_left = 0.0f; + point_texcoord_right = 1.0f; switch (state->sprite_coord_mode) { case PIPE_SPRITE_COORD_UPPER_LEFT: - rs->point_texcoord_top = 0.0f; - rs->point_texcoord_bottom = 1.0f; + point_texcoord_top = 0.0f; + point_texcoord_bottom = 1.0f; break; case PIPE_SPRITE_COORD_LOWER_LEFT: - rs->point_texcoord_top = 1.0f; - rs->point_texcoord_bottom = 0.0f; + point_texcoord_top = 1.0f; + point_texcoord_bottom = 0.0f; break; } } - if (state->gl_rasterization_rules) { - rs->multisample_position_0 = 0x66666666; - rs->multisample_position_1 = 0x6666666; + /* Build the main command buffer. */ + BEGIN_CB(rs->cb_main, 25); + OUT_CB_REG(R300_VAP_CNTL_STATUS, vap_control_status); + OUT_CB_REG(R300_GA_POINT_SIZE, point_size); + OUT_CB_REG_SEQ(R300_GA_POINT_MINMAX, 2); + OUT_CB(point_minmax); + OUT_CB(line_control); + OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_ENABLE, 2); + OUT_CB(polygon_offset_enable); + rs->cull_mode_index = 9; + OUT_CB(cull_mode); + OUT_CB_REG(R300_GA_LINE_STIPPLE_CONFIG, line_stipple_config); + OUT_CB_REG(R300_GA_LINE_STIPPLE_VALUE, line_stipple_value); + OUT_CB_REG(R300_GA_POLY_MODE, polygon_mode); + OUT_CB_REG(R300_SC_CLIP_RULE, clip_rule); + OUT_CB_REG(R300_GB_ENABLE, stuffing_enable); + OUT_CB_REG_SEQ(R300_GA_POINT_S0, 4); + OUT_CB_32F(point_texcoord_left); + OUT_CB_32F(point_texcoord_bottom); + OUT_CB_32F(point_texcoord_right); + OUT_CB_32F(point_texcoord_top); + END_CB; + + /* Build the two command buffers for polygon offset setup. */ + if (polygon_offset_enable) { + float scale = state->offset_scale * 12; + float offset = state->offset_units * 4; + + BEGIN_CB(rs->cb_poly_offset_zb16, 5); + OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4); + OUT_CB_32F(scale); + OUT_CB_32F(offset); + OUT_CB_32F(scale); + OUT_CB_32F(offset); + END_CB; + + offset = state->offset_units * 2; + + BEGIN_CB(rs->cb_poly_offset_zb24, 5); + OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4); + OUT_CB_32F(scale); + OUT_CB_32F(offset); + OUT_CB_32F(scale); + OUT_CB_32F(offset); + END_CB; } return (void*)rs; @@ -986,8 +1113,7 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) } UPDATE_STATE(state, r300->rs_state); - r300->rs_state.size = 25 + (r300->polygon_offset_enabled ? 5 : 0) + - (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0) ? 5 : 0); + r300->rs_state.size = 25 + (r300->polygon_offset_enabled ? 5 : 0); if (last_sprite_coord_enable != r300->sprite_coord_enable || last_two_sided_color != r300->two_sided_color) { @@ -1056,7 +1182,7 @@ static void* lod_bias = CLAMP((int)(state->lod_bias * 32 + 1), -(1 << 9), (1 << 9) - 1); - sampler->filter1 |= lod_bias << R300_LOD_BIAS_SHIFT; + sampler->filter1 |= (lod_bias << R300_LOD_BIAS_SHIFT) & R300_LOD_BIAS_MASK; /* This is very high quality anisotropic filtering for R5xx. * It's good for benchmarking the performance of texturing but @@ -1170,7 +1296,7 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, /* Set the texrect factor in the fragment shader. * Needed for RECT and NPOT fallback. */ texture = r300_texture(views[i]->texture); - if (texture->uses_pitch) { + if (texture->desc.is_npot) { r300->fs_rc_constant_state.dirty = TRUE; } @@ -1204,6 +1330,7 @@ r300_create_sampler_view(struct pipe_context *pipe, { struct r300_sampler_view *view = CALLOC_STRUCT(r300_sampler_view); struct r300_texture *tex = r300_texture(texture); + boolean is_r500 = r300_screen(pipe->screen)->caps.is_r500; if (view) { view->base = *templ; @@ -1219,8 +1346,9 @@ r300_create_sampler_view(struct pipe_context *pipe, view->format = tex->tx_format; view->format.format1 |= r300_translate_texformat(templ->format, - view->swizzle); - if (r300_screen(pipe->screen)->caps.is_r500) { + view->swizzle, + is_r500); + if (is_r500) { view->format.format2 |= r500_tx_format_msb_bit(templ->format); } } @@ -1544,7 +1672,6 @@ static void* r300_create_vs_state(struct pipe_context* pipe, const struct pipe_shader_state* shader) { struct r300_context* r300 = r300_context(pipe); - struct r300_vertex_shader* vs = CALLOC_STRUCT(r300_vertex_shader); /* Copy state directly into shader. */ @@ -1621,8 +1748,7 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, { struct r300_context* r300 = r300_context(pipe); struct r300_constant_buffer *cbuf; - struct pipe_transfer *tr; - float *mapped; + uint32_t *mapped = r300_buffer(buf)->user_buffer; int max_size = 0, max_size_bytes = 0, clamped_size = 0; switch (shader) { @@ -1645,8 +1771,7 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, max_size_bytes = max_size * 4 * sizeof(float); if (buf == NULL || buf->width0 == 0 || - (mapped = pipe_buffer_map(pipe, buf, PIPE_TRANSFER_READ, &tr)) == NULL) - { + (mapped = r300_buffer(buf)->constant_buffer) == NULL) { cbuf->count = 0; return; } @@ -1664,17 +1789,7 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, clamped_size = MIN2(buf->width0, max_size_bytes); cbuf->count = clamped_size / (4 * sizeof(float)); - - if (shader == PIPE_SHADER_FRAGMENT && !r300->screen->caps.is_r500) { - unsigned i,j; - - /* Convert constants to float24. */ - for (i = 0; i < cbuf->count; i++) - for (j = 0; j < 4; j++) - cbuf->constants[i][j] = pack_float24(mapped[i*4+j]); - } else { - memcpy(cbuf->constants, mapped, clamped_size); - } + cbuf->ptr = mapped; } if (shader == PIPE_SHADER_VERTEX) { @@ -1690,8 +1805,6 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, } else if (shader == PIPE_SHADER_FRAGMENT) { r300->fs_constants.dirty = TRUE; } - - pipe_buffer_unmap(pipe, buf, tr); } void r300_init_state_functions(struct r300_context* r300) diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 3aa8deb63c..a85db27064 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -102,7 +102,8 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300) * they won't be rasterized. */ gen_count = 0; for (i = 0; i < ATTR_GENERIC_COUNT && gen_count < 8; i++) { - if (vs_outputs->generic[i] != ATTR_UNUSED) { + if (vs_outputs->generic[i] != ATTR_UNUSED && + !(r300->sprite_coord_enable & (1 << i))) { r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, vs_outputs->generic[i]); gen_count++; @@ -118,7 +119,7 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300) /* WPOS. */ if (r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED && gen_count < 8) { - DBG(r300, DBG_DRAW, "draw_emit_attrib: WPOS, index: %i\n", + DBG(r300, DBG_SWTCL, "draw_emit_attrib: WPOS, index: %i\n", vs_outputs->wpos); r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, vs_outputs->wpos); @@ -140,18 +141,19 @@ static void r300_swtcl_vertex_psc(struct r300_context *r300) /* For each Draw attribute, route it to the fragment shader according * to the vs_output_tab. */ attrib_count = vinfo->num_attribs; - DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count); + DBG(r300, DBG_SWTCL, "r300: attrib count: %d\n", attrib_count); for (i = 0; i < attrib_count; i++) { - DBG(r300, DBG_DRAW, "r300: attrib: index %d, interp %d, emit %d," - " vs_output_tab %d\n", vinfo->attrib[i].src_index, - vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit, - vs_output_tab[i]); - - /* Make sure we have a proper destination for our attribute. */ - assert(vs_output_tab[i] != -1); + if (vs_output_tab[i] == -1) { + assert(0); + abort(); + } format = draw_translate_vinfo_format(vinfo->attrib[i].emit); + DBG(r300, DBG_SWTCL, + "r300: swtcl_vertex_psc [%i] <- %s\n", + vs_output_tab[i], util_format_short_name(format)); + /* Obtain the type of data in this attribute. */ type = r300_translate_vertex_data_type(format); if (type == R300_INVALID_FORMAT) { @@ -526,15 +528,9 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) struct r300_sampler_state *sampler; struct r300_sampler_view *view; struct r300_texture *tex; - unsigned min_level, max_level, i, size; + unsigned min_level, max_level, i, j, size; unsigned count = MIN2(state->sampler_view_count, state->sampler_state_count); - unsigned char depth_swizzle[4] = { - UTIL_FORMAT_SWIZZLE_X, - UTIL_FORMAT_SWIZZLE_X, - UTIL_FORMAT_SWIZZLE_X, - UTIL_FORMAT_SWIZZLE_X - }; /* The KIL opcode fix, see below. */ if (!count && !r300->screen->caps.is_r500) @@ -561,14 +557,29 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) /* Assign a texture cache region. */ texstate->format.format1 |= view->texcache_region; - /* If compare mode is disabled, the sampler view swizzles - * are stored in the format. - * Otherwise, swizzles must be applied after the compare mode - * in the fragment shader. */ - if (util_format_is_depth_or_stencil(tex->b.b.format)) { + /* Depth textures are kinda special. */ + if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) { + unsigned char depth_swizzle[4]; + + if (!r300->screen->caps.is_r500 && + util_format_get_blocksizebits(tex->desc.b.b.format) == 32) { + /* X24x8 is sampled as Y16X16 on r3xx-r4xx. + * The depth here is at the Y component. */ + for (j = 0; j < 4; j++) + depth_swizzle[j] = UTIL_FORMAT_SWIZZLE_Y; + } else { + for (j = 0; j < 4; j++) + depth_swizzle[j] = UTIL_FORMAT_SWIZZLE_X; + } + + /* If compare mode is disabled, sampler view swizzles + * are stored in the format. + * Otherwise, the swizzles must be applied after the compare + * mode in the fragment shader. */ if (sampler->state.compare_mode == PIPE_TEX_COMPARE_NONE) { texstate->format.format1 |= - r300_get_swizzle_combined(depth_swizzle, view->swizzle); + r300_get_swizzle_combined(depth_swizzle, + view->swizzle); } else { texstate->format.format1 |= r300_get_swizzle_combined(depth_swizzle, 0); @@ -576,12 +587,12 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) } /* to emulate 1D textures through 2D ones correctly */ - if (tex->b.b.target == PIPE_TEXTURE_1D) { + if (tex->desc.b.b.target == PIPE_TEXTURE_1D) { texstate->filter0 &= ~R300_TX_WRAP_T_MASK; texstate->filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); } - if (tex->uses_pitch) { + if (tex->desc.is_npot) { /* NPOT textures don't support mip filter, unfortunately. * This prevents incorrect rendering. */ texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK; @@ -608,7 +619,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) /* determine min/max levels */ /* the MAX_MIP level is the largest (finest) one */ max_level = MIN3(sampler->max_lod + view->base.first_level, - tex->b.b.last_level, view->base.last_level); + tex->desc.b.b.last_level, view->base.last_level); min_level = MIN2(sampler->min_lod + view->base.first_level, max_level); texstate->format.format0 |= R300_TX_NUM_LEVELS(max_level); diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c deleted file mode 100644 index e67a0ae244..0000000000 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright 2009 Joakim Sindholt <opensource@zhasha.com> - * Corbin Simpson <MostAwesomeDude@gmail.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "r300_context.h" -#include "r300_cs.h" -#include "r300_reg.h" -#include "r300_screen.h" -#include "r300_state_invariant.h" - -/* Calculate and emit invariant state. This is data that the 3D engine - * will probably want at the beginning of every CS, but it's not currently - * handled by any CSO setup, and in addition it doesn't really change much. - * - * Note that eventually this should be empty, but it's useful for development - * and general unduplication of code. */ -void r300_emit_invariant_state(struct r300_context* r300, - unsigned size, void* state) -{ - CS_LOCALS(r300); - - BEGIN_CS(12 + (r300->screen->caps.has_tcl ? 2 : 0)); - - /*** Graphics Backend (GB) ***/ - /* Source of fog depth */ - OUT_CS_REG(R300_GB_SELECT, R300_GB_FOG_SELECT_1_1_W); - - /*** Fog (FG) ***/ - OUT_CS_REG(R300_FG_FOG_BLEND, 0x0); - OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x0); - OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x0); - OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x0); - - /*** VAP ***/ - /* Sign/normalize control */ - OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO); - /* TCL-only stuff */ - if (r300->screen->caps.has_tcl) { - /* Amount of time to wait for vertex fetches in PVS */ - OUT_CS_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xffff); - } - - END_CS; - - /* XXX unsorted stuff from surface_fill */ - BEGIN_CS(38 + (r300->screen->caps.has_tcl ? 7 : 0) + - (r300->screen->caps.is_rv350 ? 4 : 0) + - (r300->screen->caps.is_r400 ? 2 : 0)); - - if (r300->screen->caps.has_tcl) { - /*Flushing PVS is required before the VAP_GB registers can be changed*/ - OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0); - OUT_CS_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4); - OUT_CS_32F(1.0); - OUT_CS_32F(1.0); - OUT_CS_32F(1.0); - OUT_CS_32F(1.0); - } - /* XXX line tex stuffing */ - OUT_CS_REG_SEQ(R300_GA_LINE_S0, 1); - OUT_CS_32F(0.0); - OUT_CS_REG_SEQ(R300_GA_LINE_S1, 1); - OUT_CS_32F(1.0); - OUT_CS_REG(R300_GA_TRIANGLE_STIPPLE, 0x5 | - (0x5 << R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT)); - /* XXX this big chunk should be refactored into rs_state */ - OUT_CS_REG(R300_GA_SOLID_RG, 0x00000000); - OUT_CS_REG(R300_GA_SOLID_BA, 0x00000000); - OUT_CS_REG(R300_GA_ROUND_MODE, 0x00000001); - OUT_CS_REG(R300_GA_OFFSET, 0x00000000); - OUT_CS_REG(R300_GA_FOG_SCALE, 0x3DBF1412); - OUT_CS_REG(R300_GA_FOG_OFFSET, 0x00000000); - OUT_CS_REG(R300_SU_TEX_WRAP, 0x00000000); - OUT_CS_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF); - OUT_CS_REG(R300_SU_DEPTH_OFFSET, 0x00000000); - OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C); - OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525); - OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000); - - if (r300->screen->caps.is_rv350) { - OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); - OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE); - } - - OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000); - OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000); - OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000); - OUT_CS_REG(R300_ZB_HIZ_PITCH, 0x00000000); - if (r300->screen->caps.is_r400) - OUT_CS_REG(R400_US_CODE_BANK, 0); - END_CS; -} diff --git a/src/gallium/drivers/r300/r300_state_invariant.h b/src/gallium/drivers/r300/r300_state_invariant.h deleted file mode 100644 index 83d031c7fe..0000000000 --- a/src/gallium/drivers/r300/r300_state_invariant.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R300_STATE_INVARIANT_H -#define R300_STATE_INVARIANT_H - -struct r300_context; - -void r300_emit_invariant_state(struct r300_context* r300, - unsigned size, void* state); - -#endif /* R300_STATE_INVARIANT_H */ diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index ddb6600056..fcdca5605e 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -26,6 +26,7 @@ #include "r300_context.h" #include "r300_reg.h" +#include "r300_texture_desc.h" #include "r300_transfer.h" #include "r300_screen.h" #include "r300_winsys.h" @@ -36,12 +37,6 @@ #include "util/u_memory.h" #include "pipe/p_screen.h" -#include "state_tracker/drm_api.h" - -enum r300_dim { - DIM_WIDTH = 0, - DIM_HEIGHT = 1 -}; unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, const unsigned char *swizzle_view) @@ -110,7 +105,8 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, * The FORMAT specifies how the texture sampler will treat the texture, and * makes available X, Y, Z, W, ZERO, and ONE for swizzling. */ uint32_t r300_translate_texformat(enum pipe_format format, - const unsigned char *swizzle_view) + const unsigned char *swizzle_view, + boolean is_r500) { uint32_t result = 0; const struct util_format_description *desc; @@ -135,7 +131,10 @@ uint32_t r300_translate_texformat(enum pipe_format format, return R300_TX_FORMAT_X16; case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - return R500_TX_FORMAT_Y8X24; + if (is_r500) + return R500_TX_FORMAT_Y8X24; + else + return R300_TX_FORMAT_Y16X16; default: return ~0; /* Unsupported. */ } @@ -538,26 +537,27 @@ boolean r300_is_zs_format_supported(enum pipe_format format) boolean r300_is_sampler_format_supported(enum pipe_format format) { - return r300_translate_texformat(format, 0) != ~0; + return r300_translate_texformat(format, 0, TRUE) != ~0; } static void r300_texture_setup_immutable_state(struct r300_screen* screen, struct r300_texture* tex) { struct r300_texture_format_state* f = &tex->tx_format; - struct pipe_resource *pt = &tex->b.b; + struct pipe_resource *pt = &tex->desc.b.b; boolean is_r500 = screen->caps.is_r500; /* Set sampler state. */ f->format0 = R300_TX_WIDTH((pt->width0 - 1) & 0x7ff) | R300_TX_HEIGHT((pt->height0 - 1) & 0x7ff); - if (tex->uses_pitch) { + if (tex->desc.uses_stride_addressing) { /* rectangles love this */ f->format0 |= R300_TX_PITCH_EN; - f->format2 = (tex->hwpitch[0] - 1) & 0x1fff; + f->format2 = (tex->desc.stride_in_pixels[0] - 1) & 0x1fff; } else { - /* power of two textures (3D, mipmaps, and no pitch) */ + /* Power of two textures (3D, mipmaps, and no pitch), + * also NPOT textures with a width being POT. */ f->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth0) & 0xf); } @@ -580,8 +580,8 @@ static void r300_texture_setup_immutable_state(struct r300_screen* screen, } } - f->tile_config = R300_TXO_MACRO_TILE(tex->macrotile) | - R300_TXO_MICRO_TILE(tex->microtile); + f->tile_config = R300_TXO_MACRO_TILE(tex->desc.macrotile[0]) | + R300_TXO_MICRO_TILE(tex->desc.microtile); } static void r300_texture_setup_fb_state(struct r300_screen* screen, @@ -590,23 +590,23 @@ static void r300_texture_setup_fb_state(struct r300_screen* screen, unsigned i; /* Set framebuffer state. */ - if (util_format_is_depth_or_stencil(tex->b.b.format)) { - for (i = 0; i <= tex->b.b.last_level; i++) { + if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) { + for (i = 0; i <= tex->desc.b.b.last_level; i++) { tex->fb_state.pitch[i] = - tex->hwpitch[i] | - R300_DEPTHMACROTILE(tex->mip_macrotile[i]) | - R300_DEPTHMICROTILE(tex->microtile); + tex->desc.stride_in_pixels[i] | + R300_DEPTHMACROTILE(tex->desc.macrotile[i]) | + R300_DEPTHMICROTILE(tex->desc.microtile); } - tex->fb_state.format = r300_translate_zsformat(tex->b.b.format); + tex->fb_state.format = r300_translate_zsformat(tex->desc.b.b.format); } else { - for (i = 0; i <= tex->b.b.last_level; i++) { + for (i = 0; i <= tex->desc.b.b.last_level; i++) { tex->fb_state.pitch[i] = - tex->hwpitch[i] | - r300_translate_colorformat(tex->b.b.format) | - R300_COLOR_TILE(tex->mip_macrotile[i]) | - R300_COLOR_MICROTILE(tex->microtile); + tex->desc.stride_in_pixels[i] | + r300_translate_colorformat(tex->desc.b.b.format) | + R300_COLOR_TILE(tex->desc.macrotile[i]) | + R300_COLOR_MICROTILE(tex->desc.microtile); } - tex->fb_state.format = r300_translate_out_fmt(tex->b.b.format); + tex->fb_state.format = r300_translate_out_fmt(tex->desc.b.b.format); } } @@ -626,282 +626,6 @@ void r300_texture_reinterpret_format(struct pipe_screen *screen, r300_texture_setup_fb_state(r300_screen(screen), r300_texture(tex)); } -unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, - unsigned zslice, unsigned face) -{ - unsigned offset = tex->offset[level]; - - switch (tex->b.b.target) { - case PIPE_TEXTURE_3D: - assert(face == 0); - return offset + zslice * tex->layer_size[level]; - - case PIPE_TEXTURE_CUBE: - assert(zslice == 0); - return offset + face * tex->layer_size[level]; - - default: - assert(zslice == 0 && face == 0); - return offset; - } -} - -/* Returns the number of pixels that the texture should be aligned to - * in the given dimension. */ -static unsigned r300_get_pixel_alignment(struct r300_texture *tex, - enum r300_buffer_tiling macrotile, - enum r300_dim dim) -{ - static const unsigned table[2][5][3][2] = - { - { - /* Macro: linear linear linear - Micro: linear tiled square-tiled */ - {{ 32, 1}, { 8, 4}, { 0, 0}}, /* 8 bits per pixel */ - {{ 16, 1}, { 8, 2}, { 4, 4}}, /* 16 bits per pixel */ - {{ 8, 1}, { 4, 2}, { 0, 0}}, /* 32 bits per pixel */ - {{ 4, 1}, { 0, 0}, { 2, 2}}, /* 64 bits per pixel */ - {{ 2, 1}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ - }, - { - /* Macro: tiled tiled tiled - Micro: linear tiled square-tiled */ - {{256, 8}, {64, 32}, { 0, 0}}, /* 8 bits per pixel */ - {{128, 8}, {64, 16}, {32, 32}}, /* 16 bits per pixel */ - {{ 64, 8}, {32, 16}, { 0, 0}}, /* 32 bits per pixel */ - {{ 32, 8}, { 0, 0}, {16, 16}}, /* 64 bits per pixel */ - {{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ - } - }; - static const unsigned aa_block[2] = {4, 8}; - unsigned res = 0; - unsigned pixsize = util_format_get_blocksize(tex->b.b.format); - - assert(macrotile <= R300_BUFFER_TILED); - assert(tex->microtile <= R300_BUFFER_SQUARETILED); - assert(pixsize <= 16); - assert(dim <= DIM_HEIGHT); - - if (tex->b.b.nr_samples > 1) { - /* Multisampled textures have their own alignment scheme. */ - if (pixsize == 4) - res = aa_block[dim]; - } else { - /* Standard alignment. */ - res = table[macrotile][util_logbase2(pixsize)][tex->microtile][dim]; - } - - assert(res); - return res; -} - -/* Return true if macrotiling should be enabled on the miplevel. */ -static boolean r300_texture_macro_switch(struct r300_texture *tex, - unsigned level, - boolean rv350_mode, - enum r300_dim dim) -{ - unsigned tile, texdim; - - tile = r300_get_pixel_alignment(tex, R300_BUFFER_TILED, dim); - if (dim == DIM_WIDTH) { - texdim = u_minify(tex->b.b.width0, level); - } else { - texdim = u_minify(tex->b.b.height0, level); - } - - /* See TX_FILTER1_n.MACRO_SWITCH. */ - if (rv350_mode) { - return texdim >= tile; - } else { - return texdim > tile; - } -} - -/** - * Return the stride, in bytes, of the texture images of the given texture - * at the given level. - */ -unsigned r300_texture_get_stride(struct r300_screen* screen, - struct r300_texture* tex, unsigned level) -{ - unsigned tile_width, width, stride; - - if (tex->stride_override) - return tex->stride_override; - - /* Check the level. */ - if (level > tex->b.b.last_level) { - SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n", - __FUNCTION__, level, tex->b.b.last_level); - return 0; - } - - width = u_minify(tex->b.b.width0, level); - - if (util_format_is_plain(tex->b.b.format)) { - tile_width = r300_get_pixel_alignment(tex, tex->mip_macrotile[level], - DIM_WIDTH); - width = align(width, tile_width); - - stride = util_format_get_stride(tex->b.b.format, width); - - /* Some IGPs need a minimum stride of 64 bytes, hmm... - * This doesn't seem to apply to tiled textures, according to r300c. */ - if (!tex->microtile && !tex->mip_macrotile[level] && - (screen->caps.family == CHIP_FAMILY_RS600 || - screen->caps.family == CHIP_FAMILY_RS690 || - screen->caps.family == CHIP_FAMILY_RS740)) { - return stride < 64 ? 64 : stride; - } - - /* The alignment to 32 bytes is sort of implied by the layout... */ - return stride; - } else { - return align(util_format_get_stride(tex->b.b.format, width), 32); - } -} - -static unsigned r300_texture_get_nblocksy(struct r300_texture* tex, - unsigned level) -{ - unsigned height, tile_height; - - height = u_minify(tex->b.b.height0, level); - - if (util_format_is_plain(tex->b.b.format)) { - tile_height = r300_get_pixel_alignment(tex, tex->mip_macrotile[level], - DIM_HEIGHT); - height = align(height, tile_height); - - /* This is needed for the kernel checker, unfortunately. */ - height = util_next_power_of_two(height); - } - - return util_format_get_nblocksy(tex->b.b.format, height); -} - -static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, - struct r300_texture *tex) -{ - /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures - * incorrectly. This is a workaround to prevent CS from being rejected. */ - - unsigned i, size; - - if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) && - tex->b.b.target == PIPE_TEXTURE_3D && - tex->b.b.last_level > 0) { - size = 0; - - for (i = 0; i <= tex->b.b.last_level; i++) { - size += r300_texture_get_stride(screen, tex, i) * - r300_texture_get_nblocksy(tex, i); - } - - size *= tex->b.b.depth0; - tex->size = size; - } -} - -static void r300_setup_miptree(struct r300_screen* screen, - struct r300_texture* tex) -{ - struct pipe_resource* base = &tex->b.b; - unsigned stride, size, layer_size, nblocksy, i; - boolean rv350_mode = screen->caps.is_rv350; - - SCREEN_DBG(screen, DBG_TEXALLOC, - "r300: Making miptree for texture, format %s\n", - util_format_short_name(base->format)); - - for (i = 0; i <= base->last_level; i++) { - /* Let's see if this miplevel can be macrotiled. */ - tex->mip_macrotile[i] = - (tex->macrotile == R300_BUFFER_TILED && - r300_texture_macro_switch(tex, i, rv350_mode, DIM_WIDTH) && - r300_texture_macro_switch(tex, i, rv350_mode, DIM_HEIGHT)) ? - R300_BUFFER_TILED : R300_BUFFER_LINEAR; - - stride = r300_texture_get_stride(screen, tex, i); - nblocksy = r300_texture_get_nblocksy(tex, i); - layer_size = stride * nblocksy; - - if (base->nr_samples) { - layer_size *= base->nr_samples; - } - - if (base->target == PIPE_TEXTURE_CUBE) - size = layer_size * 6; - else - size = layer_size * u_minify(base->depth0, i); - - tex->offset[i] = tex->size; - tex->size = tex->offset[i] + size; - tex->layer_size[i] = layer_size; - tex->pitch[i] = stride / util_format_get_blocksize(base->format); - tex->hwpitch[i] = - tex->pitch[i] * util_format_get_blockwidth(base->format); - - SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d " - "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", - i, u_minify(base->width0, i), u_minify(base->height0, i), - u_minify(base->depth0, i), stride, tex->size, - tex->mip_macrotile[i] ? "TRUE" : "FALSE"); - } -} - -static void r300_setup_flags(struct r300_texture* tex) -{ - tex->uses_pitch = !util_is_power_of_two(tex->b.b.width0) || - !util_is_power_of_two(tex->b.b.height0) || - tex->stride_override; -} - -static void r300_setup_tiling(struct pipe_screen *screen, - struct r300_texture *tex) -{ - struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; - enum pipe_format format = tex->b.b.format; - boolean rv350_mode = r300_screen(screen)->caps.is_rv350; - boolean is_zb = util_format_is_depth_or_stencil(format); - boolean dbg_no_tiling = SCREEN_DBG_ON(r300_screen(screen), DBG_NO_TILING); - - if (!util_format_is_plain(format)) { - return; - } - - /* If height == 1, disable microtiling except for zbuffer. */ - if (!is_zb && (tex->b.b.height0 == 1 || dbg_no_tiling)) { - return; - } - - /* Set microtiling. */ - switch (util_format_get_blocksize(format)) { - case 1: - case 4: - tex->microtile = R300_BUFFER_TILED; - break; - - case 2: - case 8: - if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) { - tex->microtile = R300_BUFFER_SQUARETILED; - } - break; - } - - if (dbg_no_tiling) { - return; - } - - /* Set macrotiling. */ - if (r300_texture_macro_switch(tex, 0, rv350_mode, DIM_WIDTH) && - r300_texture_macro_switch(tex, 0, rv350_mode, DIM_HEIGHT)) { - tex->macrotile = R300_BUFFER_TILED; - } -} - static unsigned r300_texture_is_referenced(struct pipe_context *context, struct pipe_resource *texture, unsigned face, unsigned level) @@ -909,7 +633,8 @@ static unsigned r300_texture_is_referenced(struct pipe_context *context, struct r300_context *r300 = r300_context(context); struct r300_texture *rtex = (struct r300_texture *)texture; - if (r300->rws->is_buffer_referenced(r300->rws, rtex->buffer, R300_REF_CS)) + if (r300->rws->cs_is_buffer_referenced(r300->cs, + rtex->buffer, R300_REF_CS)) return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; return PIPE_UNREFERENCED; @@ -936,12 +661,11 @@ static boolean r300_texture_get_handle(struct pipe_screen* screen, return FALSE; } - whandle->stride = r300_texture_get_stride(r300_screen(screen), tex, 0); - - return rws->buffer_get_handle(rws, tex->buffer, whandle); + return rws->buffer_get_handle(rws, tex->buffer, + tex->desc.stride_in_bytes[0], whandle); } -struct u_resource_vtbl r300_texture_vtbl = +struct u_resource_vtbl r300_texture_vtbl = { r300_texture_get_handle, /* get_handle */ r300_texture_destroy, /* resource_destroy */ @@ -954,17 +678,69 @@ struct u_resource_vtbl r300_texture_vtbl = u_default_transfer_inline_write /* transfer_inline_write */ }; -/* Create a new texture. */ -struct pipe_resource* r300_texture_create(struct pipe_screen* screen, - const struct pipe_resource* base) +/* The common texture constructor. */ +static struct r300_texture* +r300_texture_create_object(struct r300_screen *rscreen, + const struct pipe_resource *base, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + unsigned stride_in_bytes_override, + unsigned max_buffer_size, + struct r300_winsys_buffer *buffer) { - struct r300_texture* tex = CALLOC_STRUCT(r300_texture); - struct r300_screen* rscreen = r300_screen(screen); - struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; - + struct r300_winsys_screen *rws = rscreen->rws; + struct r300_texture *tex = CALLOC_STRUCT(r300_texture); if (!tex) { + if (buffer) + rws->buffer_reference(rws, &buffer, NULL); + return NULL; + } + + /* Initialize the descriptor. */ + if (!r300_texture_desc_init(rscreen, &tex->desc, base, + microtile, macrotile, + stride_in_bytes_override, + max_buffer_size)) { + if (buffer) + rws->buffer_reference(rws, &buffer, NULL); + FREE(tex); return NULL; } + /* Initialize the hardware state. */ + r300_texture_setup_immutable_state(rscreen, tex); + r300_texture_setup_fb_state(rscreen, tex); + + tex->desc.b.vtbl = &r300_texture_vtbl; + pipe_reference_init(&tex->desc.b.b.reference, 1); + tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? + R300_DOMAIN_GTT : + R300_DOMAIN_VRAM | R300_DOMAIN_GTT; + tex->buffer = buffer; + + /* Create the backing buffer if needed. */ + if (!tex->buffer) { + tex->buffer = rws->buffer_create(rws, tex->desc.size_in_bytes, 2048, + base->bind, base->usage, tex->domain); + + if (!tex->buffer) { + FREE(tex); + return NULL; + } + } + + rws->buffer_set_tiling(rws, tex->buffer, + tex->desc.microtile, tex->desc.macrotile[0], + tex->desc.stride_in_bytes[0]); + + return tex; +} + +/* Create a new texture. */ +struct pipe_resource *r300_texture_create(struct pipe_screen *screen, + const struct pipe_resource *base) +{ + struct r300_screen *rscreen = r300_screen(screen); + enum r300_buffer_tiling microtile, macrotile; /* Refuse to create a texture with size 0. */ if (!base->width0 || @@ -974,58 +750,70 @@ struct pipe_resource* r300_texture_create(struct pipe_screen* screen, fprintf(stderr, "r300: texture_create: " "Got invalid texture dimensions: %ix%ix%i\n", base->width0, base->height0, base->depth0); - FREE(tex); return NULL; } - tex->b.b = *base; - tex->b.vtbl = &r300_texture_vtbl; - pipe_reference_init(&tex->b.b.reference, 1); - tex->b.b.screen = screen; + if ((base->flags & R300_RESOURCE_FLAG_TRANSFER) || + (base->bind & PIPE_BIND_SCANOUT)) { + microtile = R300_BUFFER_LINEAR; + macrotile = R300_BUFFER_LINEAR; + } else { + microtile = R300_BUFFER_SELECT_LAYOUT; + macrotile = R300_BUFFER_SELECT_LAYOUT; + } + + return (struct pipe_resource*) + r300_texture_create_object(rscreen, base, microtile, macrotile, + 0, 0, NULL); +} + +struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, + const struct pipe_resource *base, + struct winsys_handle *whandle) +{ + struct r300_winsys_screen *rws = (struct r300_winsys_screen*)screen->winsys; + struct r300_screen *rscreen = r300_screen(screen); + struct r300_winsys_buffer *buffer; + enum r300_buffer_tiling microtile, macrotile; + unsigned stride, size; - r300_setup_flags(tex); - if (!(base->flags & R300_RESOURCE_FLAG_TRANSFER) && - !(base->bind & PIPE_BIND_SCANOUT)) { - r300_setup_tiling(screen, tex); + /* Support only 2D textures without mipmaps */ + if (base->target != PIPE_TEXTURE_2D || + base->depth0 != 1 || + base->last_level != 0) { + return NULL; } - r300_setup_miptree(rscreen, tex); - r300_texture_3d_fix_mipmapping(rscreen, tex); - r300_texture_setup_immutable_state(rscreen, tex); - r300_texture_setup_fb_state(rscreen, tex); - SCREEN_DBG(rscreen, DBG_TEX, - "r300: texture_create: Macro: %s, Micro: %s, Pitch: %i, " - "Dim: %ix%ix%i, LastLevel: %i, Size: %i, Format: %s\n", - tex->macrotile ? "YES" : " NO", - tex->microtile ? "YES" : " NO", - tex->hwpitch[0], - base->width0, base->height0, base->depth0, base->last_level, - tex->size, - util_format_short_name(base->format)); + buffer = rws->buffer_from_handle(rws, whandle, &stride, &size); + if (!buffer) + return NULL; - tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? R300_DOMAIN_GTT : - R300_DOMAIN_VRAM; + rws->buffer_get_tiling(rws, buffer, µtile, ¯otile); - tex->buffer = rws->buffer_create(rws, 2048, base->bind, tex->domain, - tex->size); + /* Enforce a microtiled zbuffer. */ + if (util_format_is_depth_or_stencil(base->format) && + microtile == R300_BUFFER_LINEAR) { + switch (util_format_get_blocksize(base->format)) { + case 4: + microtile = R300_BUFFER_TILED; + break; - if (!tex->buffer) { - FREE(tex); - return NULL; + case 2: + if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) + microtile = R300_BUFFER_SQUARETILED; + break; + } } - rws->buffer_set_tiling(rws, tex->buffer, - tex->pitch[0] * util_format_get_blocksize(tex->b.b.format), - tex->microtile, - tex->macrotile); - - return (struct pipe_resource*)tex; + return (struct pipe_resource*) + r300_texture_create_object(rscreen, base, microtile, macrotile, + stride, size, buffer); } /* Not required to implement u_resource_vtbl, consider moving to another file: */ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, - struct pipe_resource* texture, + struct pipe_resource* texture, unsigned face, unsigned level, unsigned zslice, @@ -1035,6 +823,8 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, struct r300_surface* surface = CALLOC_STRUCT(r300_surface); if (surface) { + uint32_t offset, tile_height; + pipe_reference_init(&surface->base.reference, 1); pipe_resource_reference(&surface->base.texture, texture); surface->base.format = texture->format; @@ -1046,10 +836,49 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, surface->base.level = level; surface->buffer = tex->buffer; + + /* Prefer VRAM if there are multiple domains to choose from. */ surface->domain = tex->domain; - surface->offset = r300_texture_get_offset(tex, level, zslice, face); + if (surface->domain & R300_DOMAIN_VRAM) + surface->domain &= ~R300_DOMAIN_GTT; + + surface->offset = r300_texture_get_offset(&tex->desc, + level, zslice, face); surface->pitch = tex->fb_state.pitch[level]; surface->format = tex->fb_state.format; + + /* Parameters for the CBZB clear. */ + surface->cbzb_allowed = tex->desc.cbzb_allowed[level]; + surface->cbzb_width = align(surface->base.width, 64); + + /* Height must be aligned to the size of a tile. */ + tile_height = r300_get_pixel_alignment(tex->desc.b.b.format, + tex->desc.b.b.nr_samples, + tex->desc.microtile, + tex->desc.macrotile[level], + DIM_HEIGHT); + + surface->cbzb_height = align((surface->base.height + 1) / 2, + tile_height); + + /* Offset must be aligned to 2K and must point at the beginning + * of a scanline. */ + offset = surface->offset + + tex->desc.stride_in_bytes[level] * surface->cbzb_height; + surface->cbzb_midpoint_offset = offset & ~2047; + + surface->cbzb_pitch = surface->pitch & 0x1ffffc; + + if (util_format_get_blocksizebits(surface->base.format) == 32) + surface->cbzb_format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; + else + surface->cbzb_format = R300_DEPTHFORMAT_16BIT_INT_Z; + + SCREEN_DBG(r300_screen(screen), DBG_CBZB, + "CBZB Dim: %ix%i, Misalignment: %i, Macro: %s\n", + surface->cbzb_width, surface->cbzb_height, + offset & 2047, + tex->desc.macrotile[level] ? "YES" : " NO"); } return &surface->base; @@ -1062,88 +891,3 @@ void r300_tex_surface_destroy(struct pipe_surface* s) pipe_resource_reference(&s->texture, NULL); FREE(s); } - -struct pipe_resource* -r300_texture_from_handle(struct pipe_screen* screen, - const struct pipe_resource* base, - struct winsys_handle *whandle) -{ - struct r300_winsys_screen *rws = (struct r300_winsys_screen*)screen->winsys; - struct r300_screen* rscreen = r300_screen(screen); - struct r300_winsys_buffer *buffer; - struct r300_texture* tex; - boolean override_zb_flags; - - /* Support only 2D textures without mipmaps */ - if (base->target != PIPE_TEXTURE_2D || - base->depth0 != 1 || - base->last_level != 0) { - return NULL; - } - - buffer = rws->buffer_from_handle(rws, whandle->handle); - if (!buffer) { - return NULL; - } - - tex = CALLOC_STRUCT(r300_texture); - if (!tex) { - return NULL; - } - - tex->b.b = *base; - tex->b.vtbl = &r300_texture_vtbl; - pipe_reference_init(&tex->b.b.reference, 1); - tex->b.b.screen = screen; - tex->domain = R300_DOMAIN_VRAM; - - tex->stride_override = whandle->stride; - - /* one ref already taken */ - tex->buffer = buffer; - - rws->buffer_get_tiling(rws, buffer, &tex->microtile, &tex->macrotile); - r300_setup_flags(tex); - SCREEN_DBG(rscreen, DBG_TEX, - "r300: texture_from_handle: Macro: %s, Micro: %s, " - "Pitch: % 4i, Dim: %ix%i, Format: %s\n", - tex->macrotile ? "YES" : " NO", - tex->microtile ? "YES" : " NO", - whandle->stride / util_format_get_blocksize(base->format), - base->width0, base->height0, - util_format_short_name(base->format)); - - /* Enforce microtiled zbuffer. */ - override_zb_flags = util_format_is_depth_or_stencil(base->format) && - tex->microtile == R300_BUFFER_LINEAR; - - if (override_zb_flags) { - switch (util_format_get_blocksize(base->format)) { - case 4: - tex->microtile = R300_BUFFER_TILED; - break; - - case 2: - if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) { - tex->microtile = R300_BUFFER_SQUARETILED; - break; - } - /* Pass through. */ - - default: - override_zb_flags = FALSE; - } - } - - r300_setup_miptree(rscreen, tex); - r300_texture_setup_immutable_state(rscreen, tex); - r300_texture_setup_fb_state(rscreen, tex); - - if (override_zb_flags) { - rws->buffer_set_tiling(rws, tex->buffer, - tex->pitch[0] * util_format_get_blocksize(tex->b.b.format), - tex->microtile, - tex->macrotile); - } - return (struct pipe_resource*)tex; -} diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 99e7694254..a4524320fd 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -35,16 +35,11 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, const unsigned char *swizzle_view); uint32_t r300_translate_texformat(enum pipe_format format, - const unsigned char *swizzle_view); + const unsigned char *swizzle_view, + boolean is_r500); uint32_t r500_tx_format_msb_bit(enum pipe_format format); -unsigned r300_texture_get_stride(struct r300_screen* screen, - struct r300_texture* tex, unsigned level); - -unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, - unsigned zslice, unsigned face); - void r300_texture_reinterpret_format(struct pipe_screen *screen, struct pipe_resource *tex, enum pipe_format new_format); diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c new file mode 100644 index 0000000000..343089bf2c --- /dev/null +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -0,0 +1,465 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Copyright 2010 Marek Olšák <maraeo@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_texture_desc.h" + +#include "r300_context.h" +#include "r300_winsys.h" + +#include "util/u_format.h" + +/* Returns the number of pixels that the texture should be aligned to + * in the given dimension. */ +unsigned r300_get_pixel_alignment(enum pipe_format format, + unsigned num_samples, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + enum r300_dim dim) +{ + static const unsigned table[2][5][3][2] = + { + { + /* Macro: linear linear linear + Micro: linear tiled square-tiled */ + {{ 32, 1}, { 8, 4}, { 0, 0}}, /* 8 bits per pixel */ + {{ 16, 1}, { 8, 2}, { 4, 4}}, /* 16 bits per pixel */ + {{ 8, 1}, { 4, 2}, { 0, 0}}, /* 32 bits per pixel */ + {{ 4, 1}, { 0, 0}, { 2, 2}}, /* 64 bits per pixel */ + {{ 2, 1}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ + }, + { + /* Macro: tiled tiled tiled + Micro: linear tiled square-tiled */ + {{256, 8}, {64, 32}, { 0, 0}}, /* 8 bits per pixel */ + {{128, 8}, {64, 16}, {32, 32}}, /* 16 bits per pixel */ + {{ 64, 8}, {32, 16}, { 0, 0}}, /* 32 bits per pixel */ + {{ 32, 8}, { 0, 0}, {16, 16}}, /* 64 bits per pixel */ + {{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ + } + }; + static const unsigned aa_block[2] = {4, 8}; + unsigned tile = 0; + unsigned pixsize = util_format_get_blocksize(format); + + assert(macrotile <= R300_BUFFER_TILED); + assert(microtile <= R300_BUFFER_SQUARETILED); + assert(pixsize <= 16); + assert(dim <= DIM_HEIGHT); + + if (num_samples > 1) { + /* Multisampled textures have their own alignment scheme. */ + if (pixsize == 4) + tile = aa_block[dim]; + /* XXX FP16 AA. */ + } else { + /* Standard alignment. */ + tile = table[macrotile][util_logbase2(pixsize)][microtile][dim]; + } + + assert(tile); + return tile; +} + +/* Return true if macrotiling should be enabled on the miplevel. */ +static boolean r300_texture_macro_switch(struct r300_texture_desc *desc, + unsigned level, + boolean rv350_mode, + enum r300_dim dim) +{ + unsigned tile, texdim; + + tile = r300_get_pixel_alignment(desc->b.b.format, desc->b.b.nr_samples, + desc->microtile, R300_BUFFER_TILED, dim); + if (dim == DIM_WIDTH) { + texdim = u_minify(desc->b.b.width0, level); + } else { + texdim = u_minify(desc->b.b.height0, level); + } + + /* See TX_FILTER1_n.MACRO_SWITCH. */ + if (rv350_mode) { + return texdim >= tile; + } else { + return texdim > tile; + } +} + +/** + * Return the stride, in bytes, of the texture image of the given texture + * at the given level. + */ +static unsigned r300_texture_get_stride(struct r300_screen *screen, + struct r300_texture_desc *desc, + unsigned level) +{ + unsigned tile_width, width, stride; + + if (desc->stride_in_bytes_override) + return desc->stride_in_bytes_override; + + /* Check the level. */ + if (level > desc->b.b.last_level) { + SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n", + __FUNCTION__, level, desc->b.b.last_level); + return 0; + } + + width = u_minify(desc->b.b.width0, level); + + if (util_format_is_plain(desc->b.b.format)) { + tile_width = r300_get_pixel_alignment(desc->b.b.format, + desc->b.b.nr_samples, + desc->microtile, + desc->macrotile[level], + DIM_WIDTH); + width = align(width, tile_width); + + stride = util_format_get_stride(desc->b.b.format, width); + + /* Some IGPs need a minimum stride of 64 bytes, hmm... + * This doesn't seem to apply to tiled textures, according to r300c. */ + if (!desc->microtile && !desc->macrotile[level] && + (screen->caps.family == CHIP_FAMILY_RS600 || + screen->caps.family == CHIP_FAMILY_RS690 || + screen->caps.family == CHIP_FAMILY_RS740)) { + return stride < 64 ? 64 : stride; + } + + /* The alignment to 32 bytes is sort of implied by the layout... */ + return stride; + } else { + return align(util_format_get_stride(desc->b.b.format, width), 32); + } +} + +static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, + unsigned level, + boolean *out_aligned_for_cbzb) +{ + unsigned height, tile_height; + + height = u_minify(desc->b.b.height0, level); + + if (util_format_is_plain(desc->b.b.format)) { + tile_height = r300_get_pixel_alignment(desc->b.b.format, + desc->b.b.nr_samples, + desc->microtile, + desc->macrotile[level], + DIM_HEIGHT); + height = align(height, tile_height); + + /* This is needed for the kernel checker, unfortunately. */ + if ((desc->b.b.target != PIPE_TEXTURE_1D && + desc->b.b.target != PIPE_TEXTURE_2D) || + desc->b.b.last_level != 0) { + height = util_next_power_of_two(height); + } + + /* See if the CBZB clear can be used on the buffer, + * taking the texture size into account. */ + if (out_aligned_for_cbzb) { + if (desc->macrotile[level]) { + /* When clearing, the layer (width*height) is horizontally split + * into two, and the upper and lower halves are cleared by the CB + * and ZB units, respectively. Therefore, the number of macrotiles + * in the Y direction must be even. */ + + /* Align the height so that there is an even number of macrotiles. + * Do so for 3 or more macrotiles in the Y direction. */ + if (level == 0 && desc->b.b.last_level == 0 && + (desc->b.b.target == PIPE_TEXTURE_1D || + desc->b.b.target == PIPE_TEXTURE_2D) && + height >= tile_height * 3) { + height = align(height, tile_height * 2); + } + + *out_aligned_for_cbzb = height % (tile_height * 2) == 0; + } else { + *out_aligned_for_cbzb = FALSE; + } + } + } + + return util_format_get_nblocksy(desc->b.b.format, height); +} + +static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, + struct r300_texture_desc *desc) +{ + /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures + * incorrectly. This is a workaround to prevent CS from being rejected. */ + + unsigned i, size; + + if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) && + desc->b.b.target == PIPE_TEXTURE_3D && + desc->b.b.last_level > 0) { + size = 0; + + for (i = 0; i <= desc->b.b.last_level; i++) { + size += desc->stride_in_bytes[i] * + r300_texture_get_nblocksy(desc, i, FALSE); + } + + size *= desc->b.b.depth0; + desc->size_in_bytes = size; + } +} + +/* Get a width in pixels from a stride in bytes. */ +static unsigned stride_to_width(enum pipe_format format, + unsigned stride_in_bytes) +{ + return (stride_in_bytes / util_format_get_blocksize(format)) * + util_format_get_blockwidth(format); +} + +static void r300_setup_miptree(struct r300_screen *screen, + struct r300_texture_desc *desc, + boolean align_for_cbzb) +{ + struct pipe_resource *base = &desc->b.b; + unsigned stride, size, layer_size, nblocksy, i; + boolean rv350_mode = screen->caps.is_rv350; + boolean aligned_for_cbzb; + + desc->size_in_bytes = 0; + + SCREEN_DBG(screen, DBG_TEXALLOC, + "r300: Making miptree for texture, format %s\n", + util_format_short_name(base->format)); + + for (i = 0; i <= base->last_level; i++) { + /* Let's see if this miplevel can be macrotiled. */ + desc->macrotile[i] = + (desc->macrotile[0] == R300_BUFFER_TILED && + r300_texture_macro_switch(desc, i, rv350_mode, DIM_WIDTH) && + r300_texture_macro_switch(desc, i, rv350_mode, DIM_HEIGHT)) ? + R300_BUFFER_TILED : R300_BUFFER_LINEAR; + + stride = r300_texture_get_stride(screen, desc, i); + + /* Compute the number of blocks in Y, see if the CBZB clear can be + * used on the texture. */ + aligned_for_cbzb = FALSE; + if (align_for_cbzb && desc->cbzb_allowed[i]) + nblocksy = r300_texture_get_nblocksy(desc, i, &aligned_for_cbzb); + else + nblocksy = r300_texture_get_nblocksy(desc, i, NULL); + + layer_size = stride * nblocksy; + + if (base->nr_samples) { + layer_size *= base->nr_samples; + } + + if (base->target == PIPE_TEXTURE_CUBE) + size = layer_size * 6; + else + size = layer_size * u_minify(base->depth0, i); + + desc->offset_in_bytes[i] = desc->size_in_bytes; + desc->size_in_bytes = desc->offset_in_bytes[i] + size; + desc->layer_size_in_bytes[i] = layer_size; + desc->stride_in_bytes[i] = stride; + desc->stride_in_pixels[i] = stride_to_width(desc->b.b.format, stride); + desc->cbzb_allowed[i] = desc->cbzb_allowed[i] && aligned_for_cbzb; + + SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d " + "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", + i, u_minify(base->width0, i), u_minify(base->height0, i), + u_minify(base->depth0, i), stride, desc->size_in_bytes, + desc->macrotile[i] ? "TRUE" : "FALSE"); + } +} + +static void r300_setup_flags(struct r300_texture_desc *desc) +{ + desc->uses_stride_addressing = + !util_is_power_of_two(desc->b.b.width0) || + !util_is_power_of_two(desc->b.b.height0) || + (desc->stride_in_bytes_override && + stride_to_width(desc->b.b.format, + desc->stride_in_bytes_override) != desc->b.b.width0); + + desc->is_npot = + desc->uses_stride_addressing || + !util_is_power_of_two(desc->b.b.height0); +} + +static void r300_setup_cbzb_flags(struct r300_screen *rscreen, + struct r300_texture_desc *desc) +{ + unsigned i, bpp; + boolean first_level_valid; + + bpp = util_format_get_blocksizebits(desc->b.b.format); + + /* 1) The texture must be point-sampled, + * 2) The depth must be 16 or 32 bits. + * 3) If the midpoint ZB offset is not aligned to 2048, it returns garbage + * with certain texture sizes. Macrotiling ensures the alignment. */ + first_level_valid = desc->b.b.nr_samples <= 1 && + (bpp == 16 || bpp == 32) && + desc->macrotile[0]; + + for (i = 0; i <= desc->b.b.last_level; i++) + desc->cbzb_allowed[i] = first_level_valid && desc->macrotile[i]; +} + +static void r300_setup_tiling(struct r300_screen *screen, + struct r300_texture_desc *desc) +{ + struct r300_winsys_screen *rws = screen->rws; + enum pipe_format format = desc->b.b.format; + boolean rv350_mode = screen->caps.is_rv350; + boolean is_zb = util_format_is_depth_or_stencil(format); + boolean dbg_no_tiling = SCREEN_DBG_ON(screen, DBG_NO_TILING); + + if (!util_format_is_plain(format)) { + return; + } + + /* If height == 1, disable microtiling except for zbuffer. */ + if (!is_zb && (desc->b.b.height0 == 1 || dbg_no_tiling)) { + return; + } + + /* Set microtiling. */ + switch (util_format_get_blocksize(format)) { + case 1: + case 4: + desc->microtile = R300_BUFFER_TILED; + break; + + case 2: + case 8: + if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) { + desc->microtile = R300_BUFFER_SQUARETILED; + } + break; + } + + if (dbg_no_tiling) { + return; + } + + /* Set macrotiling. */ + if (r300_texture_macro_switch(desc, 0, rv350_mode, DIM_WIDTH) && + r300_texture_macro_switch(desc, 0, rv350_mode, DIM_HEIGHT)) { + desc->macrotile[0] = R300_BUFFER_TILED; + } +} + +static void r300_tex_print_info(struct r300_screen *rscreen, + struct r300_texture_desc *desc, + const char *func) +{ + fprintf(stderr, + "r300: %s: Macro: %s, Micro: %s, Pitch: %i, Dim: %ix%ix%i, " + "LastLevel: %i, Size: %i, Format: %s\n", + func, + desc->macrotile[0] ? "YES" : " NO", + desc->microtile ? "YES" : " NO", + desc->stride_in_pixels[0], + desc->b.b.width0, desc->b.b.height0, desc->b.b.depth0, + desc->b.b.last_level, desc->size_in_bytes, + util_format_short_name(desc->b.b.format)); +} + +boolean r300_texture_desc_init(struct r300_screen *rscreen, + struct r300_texture_desc *desc, + const struct pipe_resource *base, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + unsigned stride_in_bytes_override, + unsigned max_buffer_size) +{ + desc->b.b = *base; + desc->b.b.screen = &rscreen->screen; + + desc->stride_in_bytes_override = stride_in_bytes_override; + + if (microtile == R300_BUFFER_SELECT_LAYOUT || + macrotile == R300_BUFFER_SELECT_LAYOUT) { + r300_setup_tiling(rscreen, desc); + } else { + desc->microtile = microtile; + desc->macrotile[0] = macrotile; + assert(desc->b.b.last_level == 0); + } + + r300_setup_flags(desc); + r300_setup_cbzb_flags(rscreen, desc); + + /* Setup the miptree description. */ + r300_setup_miptree(rscreen, desc, TRUE); + /* If the required buffer size is larger the given max size, + * try again without the alignment for the CBZB clear. */ + if (max_buffer_size && desc->size_in_bytes > max_buffer_size) { + r300_setup_miptree(rscreen, desc, FALSE); + } + + r300_texture_3d_fix_mipmapping(rscreen, desc); + + if (max_buffer_size) { + /* Make sure the buffer we got is large enough. */ + if (desc->size_in_bytes > max_buffer_size) { + fprintf(stderr, "r300: texture_from_handle: The buffer is not " + "large enough. Got: %i, Need: %i, Info:\n", + max_buffer_size, desc->size_in_bytes); + r300_tex_print_info(rscreen, desc, "texture_from_handle"); + return FALSE; + } + + desc->buffer_size_in_bytes = max_buffer_size; + } else { + desc->buffer_size_in_bytes = desc->size_in_bytes; + } + + if (SCREEN_DBG_ON(rscreen, DBG_TEX)) + r300_tex_print_info(rscreen, desc, "texture_from_handle"); + + return TRUE; +} + +unsigned r300_texture_get_offset(struct r300_texture_desc *desc, + unsigned level, unsigned zslice, + unsigned face) +{ + unsigned offset = desc->offset_in_bytes[level]; + + switch (desc->b.b.target) { + case PIPE_TEXTURE_3D: + assert(face == 0); + return offset + zslice * desc->layer_size_in_bytes[level]; + + case PIPE_TEXTURE_CUBE: + assert(zslice == 0); + return offset + face * desc->layer_size_in_bytes[level]; + + default: + assert(zslice == 0 && face == 0); + return offset; + } +} diff --git a/src/gallium/drivers/r300/r300_texture_desc.h b/src/gallium/drivers/r300/r300_texture_desc.h new file mode 100644 index 0000000000..95de66f654 --- /dev/null +++ b/src/gallium/drivers/r300/r300_texture_desc.h @@ -0,0 +1,57 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Copyright 2010 Marek Olšák <maraeo@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_TEXTURE_DESC_H +#define R300_TEXTURE_DESC_H + +#include "r300_defines.h" + +struct pipe_resource; +struct r300_screen; +struct r300_texture_desc; +struct r300_texture; + +enum r300_dim { + DIM_WIDTH = 0, + DIM_HEIGHT = 1 +}; + +unsigned r300_get_pixel_alignment(enum pipe_format format, + unsigned num_samples, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + enum r300_dim dim); + +boolean r300_texture_desc_init(struct r300_screen *rscreen, + struct r300_texture_desc *desc, + const struct pipe_resource *base, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + unsigned stride_in_bytes_override, + unsigned max_buffer_size); + +unsigned r300_texture_get_offset(struct r300_texture_desc *desc, + unsigned level, unsigned zslice, + unsigned face); + +#endif diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 5394e04f72..51b2c55550 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -71,7 +71,7 @@ static unsigned translate_opcode(unsigned opcode) case TGSI_OPCODE_COS: return RC_OPCODE_COS; case TGSI_OPCODE_DDX: return RC_OPCODE_DDX; case TGSI_OPCODE_DDY: return RC_OPCODE_DDY; - /* case TGSI_OPCODE_KILP: return RC_OPCODE_KILP; */ + case TGSI_OPCODE_KILP: return RC_OPCODE_KILP; /* case TGSI_OPCODE_PK2H: return RC_OPCODE_PK2H; */ /* case TGSI_OPCODE_PK2US: return RC_OPCODE_PK2US; */ /* case TGSI_OPCODE_PK4B: return RC_OPCODE_PK4B; */ diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index d41f258836..e9333b35ef 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -22,7 +22,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_transfer.h" -#include "r300_texture.h" +#include "r300_texture_desc.h" #include "r300_screen_buffer.h" #include "util/u_memory.h" @@ -35,8 +35,8 @@ struct r300_transfer { /* Offset from start of buffer. */ unsigned offset; - /* Detiled texture. */ - struct r300_texture *detiled_texture; + /* Linear texture. */ + struct r300_texture *linear_texture; }; /* Convenience cast wrapper. */ @@ -57,7 +57,7 @@ static void r300_copy_from_tiled_texture(struct pipe_context *ctx, subdst.face = 0; subdst.level = 0; - ctx->resource_copy_region(ctx, &r300transfer->detiled_texture->b.b, subdst, + ctx->resource_copy_region(ctx, &r300transfer->linear_texture->desc.b.b, subdst, 0, 0, 0, tex, transfer->sr, transfer->box.x, transfer->box.y, transfer->box.z, @@ -77,9 +77,11 @@ static void r300_copy_into_tiled_texture(struct pipe_context *ctx, ctx->resource_copy_region(ctx, tex, transfer->sr, transfer->box.x, transfer->box.y, transfer->box.z, - &r300transfer->detiled_texture->b.b, subsrc, + &r300transfer->linear_texture->desc.b.b, subsrc, 0, 0, 0, transfer->box.width, transfer->box.height); + + ctx->flush(ctx, 0, NULL); } struct pipe_transfer* @@ -89,19 +91,21 @@ r300_texture_get_transfer(struct pipe_context *ctx, unsigned usage, const struct pipe_box *box) { + struct r300_context *r300 = r300_context(ctx); struct r300_texture *tex = r300_texture(texture); - struct r300_screen *r300screen = r300_screen(ctx->screen); struct r300_transfer *trans; struct pipe_resource base; boolean referenced_cs, referenced_hw, blittable; - referenced_cs = r300screen->rws->is_buffer_referenced( - r300screen->rws, tex->buffer, R300_REF_CS); + referenced_cs = + r300->rws->cs_is_buffer_referenced(r300->cs, + tex->buffer, R300_REF_CS); if (referenced_cs) { referenced_hw = TRUE; } else { - referenced_hw = r300screen->rws->is_buffer_referenced( - r300screen->rws, tex->buffer, R300_REF_HW); + referenced_hw = + r300->rws->cs_is_buffer_referenced(r300->cs, + tex->buffer, R300_REF_HW); } blittable = ctx->screen->is_format_supported( @@ -119,7 +123,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, /* If the texture is tiled, we must create a temporary detiled texture * for this transfer. * Also make write transfers pipelined. */ - if (tex->microtile || tex->macrotile || + if (tex->desc.microtile || tex->desc.macrotile[sr.level] || ((referenced_hw & !(usage & PIPE_TRANSFER_READ)) && blittable)) { base.target = PIPE_TEXTURE_2D; base.format = texture->format; @@ -144,23 +148,23 @@ r300_texture_get_transfer(struct pipe_context *ctx, } /* Create the temporary texture. */ - trans->detiled_texture = r300_texture( + trans->linear_texture = r300_texture( ctx->screen->resource_create(ctx->screen, &base)); - if (!trans->detiled_texture) { + if (!trans->linear_texture) { /* Oh crap, the thing can't create the texture. * Let's flush and try again. */ ctx->flush(ctx, 0, NULL); - trans->detiled_texture = r300_texture( + trans->linear_texture = r300_texture( ctx->screen->resource_create(ctx->screen, &base)); - if (!trans->detiled_texture) { + if (!trans->linear_texture) { /* For linear textures, it's safe to fallback to * an unpipelined transfer. */ - if (!tex->microtile && !tex->macrotile) { + if (!tex->desc.microtile && !tex->desc.macrotile[sr.level]) { goto unpipelined; } @@ -172,8 +176,8 @@ r300_texture_get_transfer(struct pipe_context *ctx, } } - assert(!trans->detiled_texture->microtile && - !trans->detiled_texture->macrotile); + assert(!trans->linear_texture->desc.microtile && + !trans->linear_texture->desc.macrotile[0]); /* Set the stride. * @@ -183,7 +187,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, * right thing internally. */ trans->transfer.stride = - r300_texture_get_stride(r300screen, trans->detiled_texture, 0); + trans->linear_texture->desc.stride_in_bytes[0]; if (usage & PIPE_TRANSFER_READ) { /* We cannot map a tiled texture directly because the data is @@ -198,11 +202,11 @@ r300_texture_get_transfer(struct pipe_context *ctx, unpipelined: /* Unpipelined transfer. */ - trans->transfer.stride = - r300_texture_get_stride(r300screen, tex, sr.level); - trans->offset = r300_texture_get_offset(tex, sr.level, box->z, sr.face); + trans->transfer.stride = tex->desc.stride_in_bytes[sr.level]; + trans->offset = r300_texture_get_offset(&tex->desc, + sr.level, box->z, sr.face); - if (referenced_cs && (usage & PIPE_TRANSFER_READ)) + if (referenced_cs) ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); return &trans->transfer; } @@ -214,13 +218,13 @@ void r300_texture_transfer_destroy(struct pipe_context *ctx, { struct r300_transfer *r300transfer = r300_transfer(trans); - if (r300transfer->detiled_texture) { + if (r300transfer->linear_texture) { if (trans->usage & PIPE_TRANSFER_WRITE) { r300_copy_into_tiled_texture(ctx, r300transfer); } pipe_resource_reference( - (struct pipe_resource**)&r300transfer->detiled_texture, NULL); + (struct pipe_resource**)&r300transfer->linear_texture, NULL); } pipe_resource_reference(&trans->resource, NULL); FREE(trans); @@ -229,21 +233,23 @@ void r300_texture_transfer_destroy(struct pipe_context *ctx, void* r300_texture_transfer_map(struct pipe_context *ctx, struct pipe_transfer *transfer) { + struct r300_context *r300 = r300_context(ctx); struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys; struct r300_transfer *r300transfer = r300_transfer(transfer); struct r300_texture *tex = r300_texture(transfer->resource); char *map; - enum pipe_format format = tex->b.b.format; + enum pipe_format format = tex->desc.b.b.format; - if (r300transfer->detiled_texture) { + if (r300transfer->linear_texture) { /* The detiled texture is of the same size as the region being mapped * (no offset needed). */ return rws->buffer_map(rws, - r300transfer->detiled_texture->buffer, + r300transfer->linear_texture->buffer, + r300->cs, transfer->usage); } else { /* Tiling is disabled. */ - map = rws->buffer_map(rws, tex->buffer, + map = rws->buffer_map(rws, tex->buffer, r300->cs, transfer->usage); if (!map) { @@ -263,8 +269,8 @@ void r300_texture_transfer_unmap(struct pipe_context *ctx, struct r300_transfer *r300transfer = r300_transfer(transfer); struct r300_texture *tex = r300_texture(transfer->resource); - if (r300transfer->detiled_texture) { - rws->buffer_unmap(rws, r300transfer->detiled_texture->buffer); + if (r300transfer->linear_texture) { + rws->buffer_unmap(rws, r300transfer->linear_texture->buffer); } else { rws->buffer_unmap(rws, tex->buffer); } diff --git a/src/gallium/drivers/r300/r300_vs_draw.c b/src/gallium/drivers/r300/r300_vs_draw.c index d64040b891..2939963c35 100644 --- a/src/gallium/drivers/r300/r300_vs_draw.c +++ b/src/gallium/drivers/r300/r300_vs_draw.c @@ -185,7 +185,7 @@ static void transform_decl(struct tgsi_transform_context *ctx, if (decl->Semantic.Index == 1 && !vsctx->bcolor_used[0]) { insert_output(ctx, decl, TGSI_SEMANTIC_BCOLOR, 0, TGSI_INTERPOLATE_LINEAR); - vsctx->color_used[2] = TRUE; + vsctx->bcolor_used[0] = TRUE; } /* One more case is handled in insert_trailing_bcolor. */ break; diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 77c1c13ef9..ff11546a64 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -1,5 +1,6 @@ /* * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Copyright 2010 Marek Olšák <maraeo@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -23,17 +24,25 @@ #ifndef R300_WINSYS_H #define R300_WINSYS_H -/* The public interface header for the r300 pipe driver. - * Any winsys hosting this pipe needs to implement r300_winsys and then - * call r300_create_screen to start things. */ +/* The public winsys interface header for the r300 pipe driver. + * Any winsys hosting this pipe needs to implement r300_winsys_screen and then + * call r300_screen_create to start things. */ #include "pipe/p_defines.h" #include "pipe/p_state.h" #include "r300_defines.h" +struct r300_winsys_screen; + struct r300_winsys_buffer; +struct r300_winsys_cs { + uint32_t *ptr; /* Pointer to the beginning of the CS. */ + unsigned cdw; /* Number of used dwords. */ + unsigned ndw; /* Size of the CS in dwords. */ +}; + enum r300_value_id { R300_VID_PCI_ID, R300_VID_GB_PIPES, @@ -48,121 +57,251 @@ enum r300_reference_domain { /* bitfield */ }; struct r300_winsys_screen { + /** + * Destroy this winsys. + * + * \param ws The winsys this function is called from. + */ void (*destroy)(struct r300_winsys_screen *ws); - + /** + * Query a system value from a winsys. + * + * \param ws The winsys this function is called from. + * \param vid One of the R300_VID_* enums. + */ + uint32_t (*get_value)(struct r300_winsys_screen *ws, + enum r300_value_id vid); + + /************************************************************************** * Buffer management. Buffer attributes are mostly fixed over its lifetime. * * Remember that gallium gets to choose the interface it needs, and the * window systems must then implement that interface (rather than the * other way around...). + *************************************************************************/ + + /** + * Create a buffer object. * - * usage is a bitmask of R300_WINSYS_BUFFER_USAGE_PIXEL/VERTEX/INDEX/CONSTANT. This - * usage argument is only an optimization hint, not a guarantee, therefore - * proper behavior must be observed in all circumstances. - * - * alignment indicates the client's alignment requirements, eg for - * SSE instructions. + * \param ws The winsys this function is called from. + * \param size The size to allocate. + * \param alignment An alignment of the buffer in memory. + * \param bind A bitmask of the PIPE_BIND_* flags. + * \param usage A bitmask of the PIPE_USAGE_* flags. + * \param domain A bitmask of the R300_DOMAIN_* flags. + * \return The created buffer object. */ struct r300_winsys_buffer *(*buffer_create)(struct r300_winsys_screen *ws, - unsigned alignment, - unsigned usage, - enum r300_buffer_domain domain, - unsigned size); + unsigned size, + unsigned alignment, + unsigned bind, + unsigned usage, + enum r300_buffer_domain domain); /** - * Map the entire data store of a buffer object into the client's address. - * flags is bitmask of R300_WINSYS_BUFFER_USAGE_CPU_READ/WRITE flags. + * Reference a buffer object (assign with reference counting). + * + * \param ws The winsys this function is called from. + * \param pdst A destination pointer to set the source buffer to. + * \param src A source buffer object. */ - void *(*buffer_map)( struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf, - unsigned usage); + void (*buffer_reference)(struct r300_winsys_screen *ws, + struct r300_winsys_buffer **pdst, + struct r300_winsys_buffer *src); - void (*buffer_unmap)( struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf ); + /** + * Map the entire data store of a buffer object into the client's address + * space. + * + * \param ws The winsys this function is called from. + * \param buf A winsys buffer object to map. + * \param cs A command stream to flush if the buffer is referenced by it. + * \param usage A bitmask of the PIPE_TRANSFER_* flags. + * \return The pointer at the beginning of the buffer. + */ + void *(*buffer_map)(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf, + struct r300_winsys_cs *cs, + enum pipe_transfer_usage usage); - void (*buffer_destroy)( struct r300_winsys_buffer *buf ); + /** + * Unmap a buffer object from the client's address space. + * + * \param ws The winsys this function is called from. + * \param buf A winsys buffer object to unmap. + */ + void (*buffer_unmap)(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf); + /** + * Wait for a buffer object until it is not used by a GPU. This is + * equivalent to a fence placed after the last command using the buffer, + * and synchronizing to the fence. + * + * \param ws The winsys this function is called from. + * \param buf A winsys buffer object to wait for. + */ + void (*buffer_wait)(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf); - void (*buffer_reference)(struct r300_winsys_screen *rws, - struct r300_winsys_buffer **pdst, - struct r300_winsys_buffer *src); + /** + * Return tiling flags describing a memory layout of a buffer object. + * + * \param ws The winsys this function is called from. + * \param buf A winsys buffer object to get the flags from. + * \param macrotile A pointer to the return value of the microtile flag. + * \param microtile A pointer to the return value of the macrotile flag. + * + * \note microtile and macrotile are not bitmasks! + */ + void (*buffer_get_tiling)(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf, + enum r300_buffer_tiling *microtile, + enum r300_buffer_tiling *macrotile); - void (*buffer_wait)(struct r300_winsys_screen *rws, - struct r300_winsys_buffer *buf); + /** + * Set tiling flags describing a memory layout of a buffer object. + * + * \param ws The winsys this function is called from. + * \param buf A winsys buffer object to set the flags for. + * \param macrotile A macrotile flag. + * \param microtile A microtile flag. + * \param stride A stride of the buffer in bytes, for texturing. + * + * \note microtile and macrotile are not bitmasks! + */ + void (*buffer_set_tiling)(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + unsigned stride); - /* Add a pipe_resource to the list of buffer objects to validate. */ - boolean (*add_buffer)(struct r300_winsys_screen *winsys, - struct r300_winsys_buffer *buf, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd); + /** + * Get a winsys buffer from a winsys handle. The internal structure + * of the handle is platform-specific and only a winsys should access it. + * + * \param ws The winsys this function is called from. + * \param whandle A winsys handle pointer as was received from a state + * tracker. + * \param stride The returned buffer stride in bytes. + * \param size The returned buffer size. + */ + struct r300_winsys_buffer *(*buffer_from_handle)(struct r300_winsys_screen *ws, + struct winsys_handle *whandle, + unsigned *stride, + unsigned *size); + /** + * Get a winsys handle from a winsys buffer. The internal structure + * of the handle is platform-specific and only a winsys should access it. + * + * \param ws The winsys this function is called from. + * \param buf A winsys buffer object to get the handle from. + * \param whandle A winsys handle pointer. + * \param stride A stride of the buffer in bytes, for texturing. + * \return TRUE on success. + */ + boolean (*buffer_get_handle)(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf, + unsigned stride, + struct winsys_handle *whandle); - /* Revalidate all currently setup pipe_buffers. - * Returns TRUE if a flush is required. */ - boolean (*validate)(struct r300_winsys_screen* winsys); + /************************************************************************** + * Command submission. + * + * Each pipe context should create its own command stream and submit + * commands independently of other contexts. + *************************************************************************/ - /* Return the number of free dwords in CS. */ - unsigned (*get_cs_free_dwords)(struct r300_winsys_screen *winsys); + /** + * Create a command stream. + * + * \param ws The winsys this function is called from. + */ + struct r300_winsys_cs *(*cs_create)(struct r300_winsys_screen *ws); - /* Return the pointer to the first free dword in CS and assume a pipe - * driver wants to fill "count" dwords. */ - uint32_t *(*get_cs_pointer)(struct r300_winsys_screen *winsys, - unsigned count); + /** + * Destroy a command stream. + * + * \param cs A command stream to destroy. + */ + void (*cs_destroy)(struct r300_winsys_cs *cs); - /* Write a dword to the command buffer. */ - void (*write_cs_dword)(struct r300_winsys_screen* winsys, uint32_t dword); + /** + * Add a buffer object to the list of buffers to validate. + * + * \param cs A command stream to add buffer for validation against. + * \param buf A winsys buffer to validate. + * \param rd A read domain containing a bitmask + * of the R300_DOMAIN_* flags. + * \param wd A write domain containing a bitmask + * of the R300_DOMAIN_* flags. + */ + void (*cs_add_buffer)(struct r300_winsys_cs *cs, + struct r300_winsys_buffer *buf, + enum r300_buffer_domain rd, + enum r300_buffer_domain wd); - /* Write a table of dwords to the command buffer. */ - void (*write_cs_table)(struct r300_winsys_screen* winsys, - const void *dwords, unsigned count); + /** + * Revalidate all currently set up winsys buffers. + * Returns TRUE if a flush is required. + * + * \param cs A command stream to validate. + */ + boolean (*cs_validate)(struct r300_winsys_cs *cs); - /* Write a relocated dword to the command buffer. */ - void (*write_cs_reloc)(struct r300_winsys_screen *winsys, + /** + * Write a relocated dword to a command buffer. + * + * \param cs A command stream the relocation is written to. + * \param buf A winsys buffer to write the relocation for. + * \param rd A read domain containing a bitmask of the R300_DOMAIN_* flags. + * \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags. + */ + void (*cs_write_reloc)(struct r300_winsys_cs *cs, struct r300_winsys_buffer *buf, enum r300_buffer_domain rd, - enum r300_buffer_domain wd, - uint32_t flags); - - /* Flush the CS. */ - void (*flush_cs)(struct r300_winsys_screen* winsys); - - /* winsys flush - callback from winsys when flush required */ - void (*set_flush_cb)(struct r300_winsys_screen *winsys, - void (*flush_cb)(void *), void *data); - - void (*reset_bos)(struct r300_winsys_screen *winsys); - - void (*buffer_get_tiling)(struct r300_winsys_screen *winsys, - struct r300_winsys_buffer *buffer, - enum r300_buffer_tiling *microtiled, - enum r300_buffer_tiling *macrotiled); + enum r300_buffer_domain wd); - void (*buffer_set_tiling)(struct r300_winsys_screen *winsys, - struct r300_winsys_buffer *buffer, - uint32_t pitch, - enum r300_buffer_tiling microtiled, - enum r300_buffer_tiling macrotiled); - - uint32_t (*get_value)(struct r300_winsys_screen *winsys, - enum r300_value_id vid); + /** + * Flush a command stream. + * + * \param cs A command stream to flush. + */ + void (*cs_flush)(struct r300_winsys_cs *cs); - struct r300_winsys_buffer *(*buffer_from_handle)(struct r300_winsys_screen *winsys, - unsigned handle); + /** + * Set a flush callback which is called from winsys when flush is + * required. + * + * \param cs A command stream to set the callback for. + * \param flush A flush callback function associated with the command stream. + * \param user A user pointer that will be passed to the flush callback. + */ + void (*cs_set_flush)(struct r300_winsys_cs *cs, + void (*flush)(void *), + void *user); - boolean (*buffer_get_handle)(struct r300_winsys_screen *winsys, - struct r300_winsys_buffer *buffer, - struct winsys_handle *whandle); + /** + * Reset the list of buffer objects to validate, usually called + * prior to adding buffer objects for validation. + * + * \param cs A command stream to reset buffers for. + */ + void (*cs_reset_buffers)(struct r300_winsys_cs *cs); - boolean (*is_buffer_referenced)(struct r300_winsys_screen *winsys, - struct r300_winsys_buffer *buffer, - enum r300_reference_domain domain); + /** + * Return TRUE if a buffer is referenced by a command stream or by hardware + * (i.e. is busy), based on the domain parameter. + * + * \param cs A command stream. + * \param buf A winsys buffer. + * \param domain A bitmask of the R300_REF_* enums. + */ + boolean (*cs_is_buffer_referenced)(struct r300_winsys_cs *cs, + struct r300_winsys_buffer *buf, + enum r300_reference_domain domain); }; -struct r300_winsys_screen * -r300_winsys_screen(struct pipe_screen *screen); - -/* Creates a new r300 screen. */ -struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws); - #endif /* R300_WINSYS_H */ diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile index aae31a6a6e..8f1e1366b5 100644 --- a/src/gallium/drivers/r600/Makefile +++ b/src/gallium/drivers/r600/Makefile @@ -18,10 +18,7 @@ C_SOURCES = \ r600_state.c \ r600_texture.c \ r600_shader.c \ - r600_compiler.c \ - r600_compiler_tgsi.c \ - r600_compiler_dump.c \ - r600_compiler_r600.c \ - r600_compiler_r700.c + r600_asm.c \ + r700_asm.c include ../../Makefile.template diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript index 26e2f1941c..99c8644e02 100644 --- a/src/gallium/drivers/r600/SConscript +++ b/src/gallium/drivers/r600/SConscript @@ -27,11 +27,8 @@ r600 = env.ConvenienceLibrary( 'r600_state.c', 'r600_texture.c', 'r600_shader.c', - 'r600_compiler.c', - 'r600_compiler_tgsi.c', - 'r600_compiler_dump.c', - 'r600_compiler_r600.c', - 'r600_compiler_r700.c' + 'r600_asm.c', + 'r700_asm.c', ]) Export('r600') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c new file mode 100644 index 0000000000..e678a2fdf2 --- /dev/null +++ b/src/gallium/drivers/r600/r600_asm.c @@ -0,0 +1,468 @@ +/* + * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "r600_asm.h" +#include "r600_context.h" +#include "util/u_memory.h" +#include "r600_sq.h" +#include <stdio.h> +#include <errno.h> + +int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id); + +static struct r600_bc_cf *r600_bc_cf(void) +{ + struct r600_bc_cf *cf = CALLOC_STRUCT(r600_bc_cf); + + if (cf == NULL) + return NULL; + LIST_INITHEAD(&cf->list); + LIST_INITHEAD(&cf->alu); + LIST_INITHEAD(&cf->vtx); + LIST_INITHEAD(&cf->tex); + return cf; +} + +static struct r600_bc_alu *r600_bc_alu(void) +{ + struct r600_bc_alu *alu = CALLOC_STRUCT(r600_bc_alu); + + if (alu == NULL) + return NULL; + LIST_INITHEAD(&alu->list); + return alu; +} + +static struct r600_bc_vtx *r600_bc_vtx(void) +{ + struct r600_bc_vtx *vtx = CALLOC_STRUCT(r600_bc_vtx); + + if (vtx == NULL) + return NULL; + LIST_INITHEAD(&vtx->list); + return vtx; +} + +static struct r600_bc_tex *r600_bc_tex(void) +{ + struct r600_bc_tex *tex = CALLOC_STRUCT(r600_bc_tex); + + if (tex == NULL) + return NULL; + LIST_INITHEAD(&tex->list); + return tex; +} + +int r600_bc_init(struct r600_bc *bc, enum radeon_family family) +{ + LIST_INITHEAD(&bc->cf); + bc->family = family; + return 0; +} + +static int r600_bc_add_cf(struct r600_bc *bc) +{ + struct r600_bc_cf *cf = r600_bc_cf(); + + if (cf == NULL) + return -ENOMEM; + LIST_ADDTAIL(&cf->list, &bc->cf); + if (bc->cf_last) + cf->id = bc->cf_last->id + 2; + bc->cf_last = cf; + bc->ncf++; + bc->ndw += 2; + return 0; +} + +int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output) +{ + int r; + + r = r600_bc_add_cf(bc); + if (r) + return r; + bc->cf_last->inst = output->inst; + memcpy(&bc->cf_last->output, output, sizeof(struct r600_bc_output)); + return 0; +} + +int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) +{ + struct r600_bc_alu *nalu = r600_bc_alu(); + struct r600_bc_alu *lalu; + int i, r; + + if (nalu == NULL) + return -ENOMEM; + memcpy(nalu, alu, sizeof(struct r600_bc_alu)); + nalu->nliteral = 0; + + /* cf can contains only alu or only vtx or only tex */ + if (bc->cf_last == NULL || bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) { + r = r600_bc_add_cf(bc); + if (r) { + free(nalu); + return r; + } + bc->cf_last->inst = V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3; + } + /* number of gpr == the last gpr used in any alu */ + for (i = 0; i < 3; i++) { + if (alu->src[i].sel >= bc->ngpr && alu->src[i].sel < 128) { + bc->ngpr = alu->src[i].sel + 1; + } + /* compute how many literal are needed + * either 2 or 4 literals + */ + if (alu->src[i].sel == 253) { + if (((alu->src[i].chan + 2) & 0x6) > nalu->nliteral) { + nalu->nliteral = (alu->src[i].chan + 2) & 0x6; + } + } + } + if (!LIST_IS_EMPTY(&bc->cf_last->alu)) { + lalu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list); + if (!lalu->last && lalu->nliteral > nalu->nliteral) { + nalu->nliteral = lalu->nliteral; + } + } + if (alu->dst.sel >= bc->ngpr) { + bc->ngpr = alu->dst.sel + 1; + } + LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu); + /* each alu use 2 dwords */ + bc->cf_last->ndw += 2; + bc->ndw += 2; + return 0; +} + +int r600_bc_add_literal(struct r600_bc *bc, const u32 *value) +{ + struct r600_bc_alu *alu; + + if (bc->cf_last == NULL) { + R600_ERR("no last CF\n"); + return -EINVAL; + } + if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) { + return 0; + } + if (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) || + LIST_IS_EMPTY(&bc->cf_last->alu)) { + R600_ERR("last CF is not ALU (%p)\n", bc->cf_last); + return -EINVAL; + } + alu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list); + if (!alu->last || !alu->nliteral) { + return 0; + } + memcpy(alu->value, value, 4 * 4); + bc->cf_last->ndw += alu->nliteral; + bc->ndw += alu->nliteral; + return 0; +} + +int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) +{ + struct r600_bc_vtx *nvtx = r600_bc_vtx(); + int r; + + if (nvtx == NULL) + return -ENOMEM; + memcpy(nvtx, vtx, sizeof(struct r600_bc_vtx)); + + /* cf can contains only alu or only vtx or only tex */ + if (bc->cf_last == NULL || + (bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX && + bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC)) { + r = r600_bc_add_cf(bc); + if (r) { + free(nvtx); + return r; + } + bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_VTX; + } + LIST_ADDTAIL(&nvtx->list, &bc->cf_last->vtx); + /* each fetch use 4 dwords */ + bc->cf_last->ndw += 4; + bc->ndw += 4; + return 0; +} + +int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) +{ + struct r600_bc_tex *ntex = r600_bc_tex(); + int r; + + if (ntex == NULL) + return -ENOMEM; + memcpy(ntex, tex, sizeof(struct r600_bc_tex)); + + /* cf can contains only alu or only vtx or only tex */ + if (bc->cf_last == NULL || + bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX) { + r = r600_bc_add_cf(bc); + if (r) { + free(ntex); + return r; + } + bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_TEX; + } + LIST_ADDTAIL(&ntex->list, &bc->cf_last->tex); + /* each texture fetch use 4 dwords */ + bc->cf_last->ndw += 4; + bc->ndw += 4; + return 0; +} + +static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id) +{ + bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | + S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | + S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x) | + S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count); + bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) | + S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) | + S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) | + S_SQ_VTX_WORD1_DST_SEL_W(vtx->dst_sel_w) | + S_SQ_VTX_WORD1_USE_CONST_FIELDS(1) | + S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr); + bc->bytecode[id++] = S_SQ_VTX_WORD2_MEGA_FETCH(1); + bc->bytecode[id++] = 0; + return 0; +} + +static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsigned id) +{ + bc->bytecode[id++] = S_SQ_TEX_WORD0_TEX_INST(tex->inst) | + S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) | + S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) | + S_SQ_TEX_WORD0_SRC_REL(tex->src_rel); + bc->bytecode[id++] = S_SQ_TEX_WORD1_DST_GPR(tex->dst_gpr) | + S_SQ_TEX_WORD1_DST_REL(tex->dst_rel) | + S_SQ_TEX_WORD1_DST_SEL_X(tex->dst_sel_x) | + S_SQ_TEX_WORD1_DST_SEL_Y(tex->dst_sel_y) | + S_SQ_TEX_WORD1_DST_SEL_Z(tex->dst_sel_z) | + S_SQ_TEX_WORD1_DST_SEL_W(tex->dst_sel_w) | + S_SQ_TEX_WORD1_LOD_BIAS(tex->lod_bias) | + S_SQ_TEX_WORD1_COORD_TYPE_X(tex->coord_type_x) | + S_SQ_TEX_WORD1_COORD_TYPE_Y(tex->coord_type_y) | + S_SQ_TEX_WORD1_COORD_TYPE_Z(tex->coord_type_z) | + S_SQ_TEX_WORD1_COORD_TYPE_W(tex->coord_type_w); + bc->bytecode[id++] = S_SQ_TEX_WORD2_OFFSET_X(tex->offset_x) | + S_SQ_TEX_WORD2_OFFSET_Y(tex->offset_y) | + S_SQ_TEX_WORD2_OFFSET_Z(tex->offset_z) | + S_SQ_TEX_WORD2_SAMPLER_ID(tex->sampler_id) | + S_SQ_TEX_WORD2_SRC_SEL_X(tex->src_sel_x) | + S_SQ_TEX_WORD2_SRC_SEL_Y(tex->src_sel_y) | + S_SQ_TEX_WORD2_SRC_SEL_Z(tex->src_sel_z) | + S_SQ_TEX_WORD2_SRC_SEL_W(tex->src_sel_w); + bc->bytecode[id++] = 0; + return 0; +} + +int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) +{ + unsigned i; + + /* don't replace gpr by pv or ps for destination register */ + if (alu->is_op3) { + bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | + S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | + S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | + S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | + S_SQ_ALU_WORD0_LAST(alu->last); + bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | + S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | + S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) | + S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) | + S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) | + S_SQ_ALU_WORD1_OP3_ALU_INST(alu->inst) | + S_SQ_ALU_WORD1_BANK_SWIZZLE(0); + } else { + bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | + S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | + S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) | + S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | + S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | + S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) | + S_SQ_ALU_WORD0_LAST(alu->last); + bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | + S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | + S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | + S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | + S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | + S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) | + S_SQ_ALU_WORD1_BANK_SWIZZLE(0); + } + if (alu->last) { + for (i = 0; i < alu->nliteral; i++) { + bc->bytecode[id++] = alu->value[i]; + } + } + return 0; +} + +int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) +{ + unsigned id = cf->id; + + switch (cf->inst) { + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1); + bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) | + S_SQ_CF_ALU_WORD1_BARRIER(1) | + S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); + break; + case V_SQ_CF_WORD1_SQ_CF_INST_TEX: + case V_SQ_CF_WORD1_SQ_CF_INST_VTX: + case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: + bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); + bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1); + break; + case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: + case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: + bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) | + S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | + S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | + S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type); + bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) | + S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) | + S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) | + S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) | + S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) | + S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst) | + S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program); + break; + default: + R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); + return -EINVAL; + } + return 0; +} + +int r600_bc_build(struct r600_bc *bc) +{ + struct r600_bc_cf *cf; + struct r600_bc_alu *alu; + struct r600_bc_vtx *vtx; + struct r600_bc_tex *tex; + unsigned addr; + int r; + + + /* first path compute addr of each CF block */ + /* addr start after all the CF instructions */ + addr = bc->cf_last->id + 2; + LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { + switch (cf->inst) { + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + break; + case V_SQ_CF_WORD1_SQ_CF_INST_TEX: + case V_SQ_CF_WORD1_SQ_CF_INST_VTX: + case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: + /* fetch node need to be 16 bytes aligned*/ + addr += 3; + addr &= 0xFFFFFFFCUL; + break; + case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: + case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: + break; + default: + R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); + return -EINVAL; + } + cf->addr = addr; + addr += cf->ndw; + bc->ndw = cf->addr + cf->ndw; + } + free(bc->bytecode); + bc->bytecode = calloc(1, bc->ndw * 4); + if (bc->bytecode == NULL) + return -ENOMEM; + LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { + addr = cf->addr; + r = r600_bc_cf_build(bc, cf); + if (r) + return r; + switch (cf->inst) { + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { + switch (bc->family) { + case CHIP_R600: + case CHIP_RV610: + case CHIP_RV630: + case CHIP_RV670: + case CHIP_RV620: + case CHIP_RV635: + case CHIP_RS780: + case CHIP_RS880: + r = r600_bc_alu_build(bc, alu, addr); + break; + case CHIP_RV770: + case CHIP_RV730: + case CHIP_RV710: + case CHIP_RV740: + r = r700_bc_alu_build(bc, alu, addr); + break; + default: + R600_ERR("unknown family %d\n", bc->family); + return -EINVAL; + } + if (r) + return r; + addr += 2; + if (alu->last) { + addr += alu->nliteral; + } + } + break; + case V_SQ_CF_WORD1_SQ_CF_INST_VTX: + case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: + LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { + r = r600_bc_vtx_build(bc, vtx, addr); + if (r) + return r; + addr += 4; + } + break; + case V_SQ_CF_WORD1_SQ_CF_INST_TEX: + LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { + r = r600_bc_tex_build(bc, tex, addr); + if (r) + return r; + addr += 4; + } + break; + case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: + case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: + break; + default: + R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); + return -EINVAL; + } + } + return 0; +} diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h new file mode 100644 index 0000000000..88fb957440 --- /dev/null +++ b/src/gallium/drivers/r600/r600_asm.h @@ -0,0 +1,141 @@ +/* + * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef R600_ASM_H +#define R600_ASM_H + +#include "radeon.h" +#include "util/u_double_list.h" + +struct r600_bc_alu_src { + unsigned sel; + unsigned chan; + unsigned neg; + unsigned abs; +}; + +struct r600_bc_alu_dst { + unsigned sel; + unsigned chan; + unsigned clamp; + unsigned write; +}; + +struct r600_bc_alu { + struct list_head list; + struct r600_bc_alu_src src[3]; + struct r600_bc_alu_dst dst; + unsigned inst; + unsigned last; + unsigned is_op3; + unsigned nliteral; + u32 value[4]; +}; + +struct r600_bc_tex { + struct list_head list; + unsigned inst; + unsigned resource_id; + unsigned src_gpr; + unsigned src_rel; + unsigned dst_gpr; + unsigned dst_rel; + unsigned dst_sel_x; + unsigned dst_sel_y; + unsigned dst_sel_z; + unsigned dst_sel_w; + unsigned lod_bias; + unsigned coord_type_x; + unsigned coord_type_y; + unsigned coord_type_z; + unsigned coord_type_w; + unsigned offset_x; + unsigned offset_y; + unsigned offset_z; + unsigned sampler_id; + unsigned src_sel_x; + unsigned src_sel_y; + unsigned src_sel_z; + unsigned src_sel_w; +}; + +struct r600_bc_vtx { + struct list_head list; + unsigned inst; + unsigned fetch_type; + unsigned buffer_id; + unsigned src_gpr; + unsigned src_sel_x; + unsigned mega_fetch_count; + unsigned dst_gpr; + unsigned dst_sel_x; + unsigned dst_sel_y; + unsigned dst_sel_z; + unsigned dst_sel_w; +}; + +struct r600_bc_output { + unsigned array_base; + unsigned type; + unsigned end_of_program; + unsigned inst; + unsigned elem_size; + unsigned gpr; + unsigned swizzle_x; + unsigned swizzle_y; + unsigned swizzle_z; + unsigned swizzle_w; + unsigned barrier; +}; + +struct r600_bc_cf { + struct list_head list; + unsigned inst; + unsigned addr; + unsigned ndw; + unsigned id; + struct list_head alu; + struct list_head tex; + struct list_head vtx; + struct r600_bc_output output; +}; + +struct r600_bc { + enum radeon_family family; + struct list_head cf; + struct r600_bc_cf *cf_last; + unsigned ndw; + unsigned ncf; + unsigned ngpr; + unsigned nresource; + u32 *bytecode; +}; + +int r600_bc_init(struct r600_bc *bc, enum radeon_family family); +int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu); +int r600_bc_add_literal(struct r600_bc *bc, const u32 *value); +int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx); +int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex); +int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output); +int r600_bc_build(struct r600_bc *bc); + +#endif diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 272f4dd673..bc6e336ba7 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -29,7 +29,7 @@ #include <util/u_math.h> #include <util/u_inlines.h> #include <util/u_memory.h> -#include "state_tracker/drm_api.h" +#include "state_tracker/drm_driver.h" #include "r600_screen.h" #include "r600_context.h" diff --git a/src/gallium/drivers/r600/r600_compiler.c b/src/gallium/drivers/r600/r600_compiler.c deleted file mode 100644 index f1be2bbdf4..0000000000 --- a/src/gallium/drivers/r600/r600_compiler.c +++ /dev/null @@ -1,446 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#include <stdlib.h> -#include <string.h> -#include <stdint.h> -#include <stdio.h> -#include <errno.h> -#include "r600_compiler.h" - -struct c_vector *c_vector_new(void) -{ - struct c_vector *v = calloc(1, sizeof(struct c_vector)); - - if (v == NULL) { - return NULL; - } - c_list_init(v); - return v; -} - -static unsigned c_opcode_is_alu(unsigned opcode) -{ - switch (opcode) { - case C_OPCODE_MOV: - case C_OPCODE_MUL: - case C_OPCODE_MAD: - case C_OPCODE_ARL: - case C_OPCODE_LIT: - case C_OPCODE_RCP: - case C_OPCODE_RSQ: - case C_OPCODE_EXP: - case C_OPCODE_LOG: - case C_OPCODE_ADD: - case C_OPCODE_DP3: - case C_OPCODE_DP4: - case C_OPCODE_DST: - case C_OPCODE_MIN: - case C_OPCODE_MAX: - case C_OPCODE_SLT: - case C_OPCODE_SGE: - case C_OPCODE_SUB: - case C_OPCODE_LRP: - case C_OPCODE_CND: - case C_OPCODE_DP2A: - case C_OPCODE_FRC: - case C_OPCODE_CLAMP: - case C_OPCODE_FLR: - case C_OPCODE_ROUND: - case C_OPCODE_EX2: - case C_OPCODE_LG2: - case C_OPCODE_POW: - case C_OPCODE_XPD: - case C_OPCODE_ABS: - case C_OPCODE_RCC: - case C_OPCODE_DPH: - case C_OPCODE_COS: - case C_OPCODE_DDX: - case C_OPCODE_DDY: - case C_OPCODE_PK2H: - case C_OPCODE_PK2US: - case C_OPCODE_PK4B: - case C_OPCODE_PK4UB: - case C_OPCODE_RFL: - case C_OPCODE_SEQ: - case C_OPCODE_SFL: - case C_OPCODE_SGT: - case C_OPCODE_SIN: - case C_OPCODE_SLE: - case C_OPCODE_SNE: - case C_OPCODE_STR: - case C_OPCODE_UP2H: - case C_OPCODE_UP2US: - case C_OPCODE_UP4B: - case C_OPCODE_UP4UB: - case C_OPCODE_X2D: - case C_OPCODE_ARA: - case C_OPCODE_ARR: - case C_OPCODE_BRA: - case C_OPCODE_SSG: - case C_OPCODE_CMP: - case C_OPCODE_SCS: - case C_OPCODE_NRM: - case C_OPCODE_DIV: - case C_OPCODE_DP2: - case C_OPCODE_CEIL: - case C_OPCODE_I2F: - case C_OPCODE_NOT: - case C_OPCODE_TRUNC: - case C_OPCODE_SHL: - case C_OPCODE_AND: - case C_OPCODE_OR: - case C_OPCODE_MOD: - case C_OPCODE_XOR: - case C_OPCODE_SAD: - case C_OPCODE_NRM4: - case C_OPCODE_F2I: - case C_OPCODE_IDIV: - case C_OPCODE_IMAX: - case C_OPCODE_IMIN: - case C_OPCODE_INEG: - case C_OPCODE_ISGE: - case C_OPCODE_ISHR: - case C_OPCODE_ISLT: - case C_OPCODE_F2U: - case C_OPCODE_U2F: - case C_OPCODE_UADD: - case C_OPCODE_UDIV: - case C_OPCODE_UMAD: - case C_OPCODE_UMAX: - case C_OPCODE_UMIN: - case C_OPCODE_UMOD: - case C_OPCODE_UMUL: - case C_OPCODE_USEQ: - case C_OPCODE_USGE: - case C_OPCODE_USHR: - case C_OPCODE_USLT: - case C_OPCODE_USNE: - return 1; - case C_OPCODE_END: - case C_OPCODE_VFETCH: - case C_OPCODE_KILP: - case C_OPCODE_CAL: - case C_OPCODE_RET: - case C_OPCODE_TXB: - case C_OPCODE_TXL: - case C_OPCODE_BRK: - case C_OPCODE_IF: - case C_OPCODE_BGNFOR: - case C_OPCODE_REP: - case C_OPCODE_ELSE: - case C_OPCODE_ENDIF: - case C_OPCODE_ENDFOR: - case C_OPCODE_ENDREP: - case C_OPCODE_PUSHA: - case C_OPCODE_POPA: - case C_OPCODE_TXF: - case C_OPCODE_TXQ: - case C_OPCODE_CONT: - case C_OPCODE_EMIT: - case C_OPCODE_ENDPRIM: - case C_OPCODE_BGNLOOP: - case C_OPCODE_BGNSUB: - case C_OPCODE_ENDLOOP: - case C_OPCODE_ENDSUB: - case C_OPCODE_NOP: - case C_OPCODE_CALLNZ: - case C_OPCODE_IFC: - case C_OPCODE_BREAKC: - case C_OPCODE_KIL: - case C_OPCODE_TEX: - case C_OPCODE_TXD: - case C_OPCODE_TXP: - case C_OPCODE_SWITCH: - case C_OPCODE_CASE: - case C_OPCODE_DEFAULT: - case C_OPCODE_ENDSWITCH: - default: - return 0; - } -} - - -/* NEW */ -void c_node_init(struct c_node *node) -{ - memset(node, 0, sizeof(struct c_node)); - c_list_init(&node->predecessors); - c_list_init(&node->successors); - c_list_init(&node->childs); - c_list_init(&node->insts); - node->parent = NULL; -} - -static struct c_node_link *c_node_link_new(struct c_node *node) -{ - struct c_node_link *link; - - link = calloc(1, sizeof(struct c_node_link)); - if (link == NULL) - return NULL; - c_list_init(link); - link->node = node; - return link; -} - -int c_node_cfg_link(struct c_node *predecessor, struct c_node *successor) -{ - struct c_node_link *pedge, *sedge; - - pedge = c_node_link_new(successor); - sedge = c_node_link_new(predecessor); - if (sedge == NULL || pedge == NULL) { - free(sedge); - free(pedge); - return -ENOMEM; - } - c_list_add_tail(pedge, &predecessor->successors); - c_list_add_tail(sedge, &successor->predecessors); - return 0; -} - -int c_node_add_new_instruction_head(struct c_node *node, struct c_instruction *instruction) -{ - struct c_instruction *inst = calloc(1, sizeof(struct c_instruction)); - - if (inst == NULL) - return -ENOMEM; - memcpy(inst, instruction, sizeof(struct c_instruction)); - c_list_add(inst, &node->insts); - return 0; -} - -int c_node_add_new_instruction(struct c_node *node, struct c_instruction *instruction) -{ - struct c_instruction *inst = calloc(1, sizeof(struct c_instruction)); - - if (inst == NULL) - return -ENOMEM; - memcpy(inst, instruction, sizeof(struct c_instruction)); - c_list_add_tail(inst, &node->insts); - return 0; -} - -struct c_node *c_shader_cfg_new_node_after(struct c_shader *shader, struct c_node *predecessor) -{ - struct c_node *node = calloc(1, sizeof(struct c_node)); - - if (node == NULL) - return NULL; - c_node_init(node); - if (c_node_cfg_link(predecessor, node)) { - free(node); - return NULL; - } - c_list_add_tail(node, &shader->nodes); - return node; -} - -int c_shader_init(struct c_shader *shader, unsigned type) -{ - unsigned i; - int r; - - shader->type = type; - for (i = 0; i < C_FILE_COUNT; i++) { - shader->files[i].nvectors = 0; - c_list_init(&shader->files[i].vectors); - } - c_list_init(&shader->nodes); - c_node_init(&shader->entry); - c_node_init(&shader->end); - shader->entry.opcode = C_OPCODE_ENTRY; - shader->end.opcode = C_OPCODE_END; - r = c_node_cfg_link(&shader->entry, &shader->end); - if (r) - return r; - return 0; -} - -struct c_vector *c_shader_vector_new(struct c_shader *shader, unsigned file, unsigned name, int sid) -{ - struct c_vector *v = calloc(1, sizeof(struct c_vector)); - int i; - - if (v == NULL) { - return NULL; - } - for (i = 0; i < 4; i++) { - v->channel[i] = calloc(1, sizeof(struct c_channel)); - if (v->channel[i] == NULL) - goto out_err; - v->channel[i]->vindex = i; - v->channel[i]->vector = v; - } - v->file = file; - v->name = name; - v->sid = sid; - shader->files[v->file].nvectors++; - v->id = shader->nvectors++; - c_list_add_tail(v, &shader->files[v->file].vectors); - return v; -out_err: - for (i = 0; i < 4; i++) { - free(v->channel[i]); - } - free(v); - return NULL; -} - -static void c_node_remove_link(struct c_node_link *head, struct c_node *node) -{ - struct c_node_link *link, *tmp; - - c_list_for_each_safe(link, tmp, head) { - if (link->node == node) { - c_list_del(link); - free(link); - } - } -} - -static void c_node_destroy(struct c_node *node) -{ - struct c_instruction *i, *ni; - struct c_node_link *link, *tmp; - - c_list_for_each_safe(i, ni, &node->insts) { - c_list_del(i); - free(i); - } - if (node->parent) - c_node_remove_link(&node->parent->childs, node); - node->parent = NULL; - c_list_for_each_safe(link, tmp, &node->predecessors) { - c_node_remove_link(&link->node->successors, node); - c_list_del(link); - free(link); - } - c_list_for_each_safe(link, tmp, &node->successors) { - c_node_remove_link(&link->node->predecessors, node); - c_list_del(link); - free(link); - } - c_list_for_each_safe(link, tmp, &node->childs) { - link->node->parent = NULL; - c_list_del(link); - free(link); - } -} - -void c_shader_destroy(struct c_shader *shader) -{ - struct c_node *n, *nn; - struct c_vector *v, *nv; - unsigned i; - - for (i = 0; i < C_FILE_COUNT; i++) { - shader->files[i].nvectors = 0; - c_list_for_each_safe(v, nv, &shader->files[i].vectors) { - c_list_del(v); - free(v->channel[0]); - free(v->channel[1]); - free(v->channel[2]); - free(v->channel[3]); - free(v); - } - } - c_list_for_each_safe(n, nn, &shader->nodes) { - c_list_del(n); - c_node_destroy(n); - } - memset(shader, 0, sizeof(struct c_shader)); -} - -static void c_shader_dfs_without_rec(struct c_node *entry, struct c_node *node) -{ - struct c_node_link *link; - - if (entry == node || entry->visited) - return; - entry->visited = 1; - c_list_for_each(link, &entry->successors) { - c_shader_dfs_without_rec(link->node, node); - } -} - -static void c_shader_dfs_without(struct c_shader *shader, struct c_node *node) -{ - struct c_node *n; - - shader->entry.visited = 0; - shader->end.visited = 0; - c_list_for_each(n, &shader->nodes) { - n->visited = 0; - } - c_shader_dfs_without_rec(&shader->entry, node); -} - -static int c_shader_build_dominator_tree_rec(struct c_shader *shader, struct c_node *node) -{ - struct c_node_link *link, *nlink; - unsigned found = 0; - int r; - - if (node->done) - return 0; - node->done = 1; - c_list_for_each(link, &node->predecessors) { - /* if we remove this predecessor can we reach the current node ? */ - c_shader_dfs_without(shader, link->node); - if (node->visited == 0) { - /* we were unable to visit current node thus current - * predecessor is the immediate dominator of node, as - * their can be only one immediate dominator we break - */ - node->parent = link->node; - nlink = c_node_link_new(node); - if (nlink == NULL) - return -ENOMEM; - c_list_add_tail(nlink, &link->node->childs); - found = 1; - break; - } - } - /* this shouldn't happen there should at least be 1 denominator for each node */ - if (!found && node->opcode != C_OPCODE_ENTRY) { - fprintf(stderr, "invalid flow control graph node %p (%d) has no immediate dominator\n", - node, node->opcode); - return -EINVAL; - } - c_list_for_each(link, &node->predecessors) { - r = c_shader_build_dominator_tree_rec(shader, link->node); - if (r) - return r; - } - return 0; -} - -int c_shader_build_dominator_tree(struct c_shader *shader) -{ - struct c_node *node; - c_list_for_each(node, &shader->nodes) { - node->done = 0; - } - return c_shader_build_dominator_tree_rec(shader, &shader->end); -} diff --git a/src/gallium/drivers/r600/r600_compiler.h b/src/gallium/drivers/r600/r600_compiler.h deleted file mode 100644 index 3de19970c3..0000000000 --- a/src/gallium/drivers/r600/r600_compiler.h +++ /dev/null @@ -1,331 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef R600_COMPILER_H -#define R600_COMPILER_H - -struct c_vector; - -/* operand are the basic source/destination of each operation */ -struct c_channel { - struct c_channel *next; - struct c_channel *prev; - unsigned vindex; /**< index in vector X,Y,Z,W (0,1,2,3) */ - unsigned value; /**< immediate value 32bits */ - struct c_vector *vector; /**< vector to which it belongs */ -}; - -/* in GPU world most of the time operand are grouped into vector - * of 4 component this structure is mostly and handler to group - * operand into a same vector - */ -struct c_vector { - struct c_vector *next; - struct c_vector *prev; - unsigned id; /**< vector uniq id */ - unsigned name; /**< semantic name */ - unsigned file; /**< operand file C_FILE_* */ - int sid; /**< semantic id */ - struct c_channel *channel[4]; /**< operands */ -}; - -#define c_list_init(e) do { (e)->next = e; (e)->prev = e; } while(0) -#define c_list_add(e, h) do { (e)->next = (h)->next; (e)->prev = h; (h)->next = e; (e)->next->prev = e; } while(0) -#define c_list_add_tail(e, h) do { (e)->next = h; (e)->prev = (h)->prev; (h)->prev = e; (e)->prev->next = e; } while(0) -#define c_list_del(e) do { (e)->next->prev = (e)->prev; (e)->prev->next = (e)->next; c_list_init(e); } while(0) -#define c_list_for_each(p, h) for (p = (h)->next; p != (h); p = p->next) -#define c_list_for_each_from(p, s, h) for (p = s; p != (h); p = p->next) -#define c_list_for_each_safe(p, n, h) for (p = (h)->next, n = p->next; p != (h); p = n, n = p->next) -#define c_list_empty(h) ((h)->next == h) - - -#define C_PROGRAM_TYPE_VS 0 -#define C_PROGRAM_TYPE_FS 1 -#define C_PROGRAM_TYPE_COUNT 2 - -#define C_NODE_FLAG_ALU 1 -#define C_NODE_FLAG_FETCH 2 - -#define C_SWIZZLE_X 0 -#define C_SWIZZLE_Y 1 -#define C_SWIZZLE_Z 2 -#define C_SWIZZLE_W 3 -#define C_SWIZZLE_0 4 -#define C_SWIZZLE_1 5 -#define C_SWIZZLE_D 6 - -#define C_FILE_NULL 0 -#define C_FILE_CONSTANT 1 -#define C_FILE_INPUT 2 -#define C_FILE_OUTPUT 3 -#define C_FILE_TEMPORARY 4 -#define C_FILE_SAMPLER 5 -#define C_FILE_ADDRESS 6 -#define C_FILE_IMMEDIATE 7 -#define C_FILE_LOOP 8 -#define C_FILE_PREDICATE 9 -#define C_FILE_SYSTEM_VALUE 10 -#define C_FILE_RESOURCE 11 -#define C_FILE_COUNT 12 - -#define C_SEMANTIC_POSITION 0 -#define C_SEMANTIC_COLOR 1 -#define C_SEMANTIC_BCOLOR 2 /**< back-face color */ -#define C_SEMANTIC_FOG 3 -#define C_SEMANTIC_PSIZE 4 -#define C_SEMANTIC_GENERIC 5 -#define C_SEMANTIC_NORMAL 6 -#define C_SEMANTIC_FACE 7 -#define C_SEMANTIC_EDGEFLAG 8 -#define C_SEMANTIC_PRIMID 9 -#define C_SEMANTIC_INSTANCEID 10 -#define C_SEMANTIC_VERTEXID 11 -#define C_SEMANTIC_COUNT 12 /**< number of semantic values */ - -#define C_OPCODE_NOP 0 -#define C_OPCODE_MOV 1 -#define C_OPCODE_LIT 2 -#define C_OPCODE_RCP 3 -#define C_OPCODE_RSQ 4 -#define C_OPCODE_EXP 5 -#define C_OPCODE_LOG 6 -#define C_OPCODE_MUL 7 -#define C_OPCODE_ADD 8 -#define C_OPCODE_DP3 9 -#define C_OPCODE_DP4 10 -#define C_OPCODE_DST 11 -#define C_OPCODE_MIN 12 -#define C_OPCODE_MAX 13 -#define C_OPCODE_SLT 14 -#define C_OPCODE_SGE 15 -#define C_OPCODE_MAD 16 -#define C_OPCODE_SUB 17 -#define C_OPCODE_LRP 18 -#define C_OPCODE_CND 19 -/* gap */ -#define C_OPCODE_DP2A 21 -/* gap */ -#define C_OPCODE_FRC 24 -#define C_OPCODE_CLAMP 25 -#define C_OPCODE_FLR 26 -#define C_OPCODE_ROUND 27 -#define C_OPCODE_EX2 28 -#define C_OPCODE_LG2 29 -#define C_OPCODE_POW 30 -#define C_OPCODE_XPD 31 -/* gap */ -#define C_OPCODE_ABS 33 -#define C_OPCODE_RCC 34 -#define C_OPCODE_DPH 35 -#define C_OPCODE_COS 36 -#define C_OPCODE_DDX 37 -#define C_OPCODE_DDY 38 -#define C_OPCODE_KILP 39 /* predicated kill */ -#define C_OPCODE_PK2H 40 -#define C_OPCODE_PK2US 41 -#define C_OPCODE_PK4B 42 -#define C_OPCODE_PK4UB 43 -#define C_OPCODE_RFL 44 -#define C_OPCODE_SEQ 45 -#define C_OPCODE_SFL 46 -#define C_OPCODE_SGT 47 -#define C_OPCODE_SIN 48 -#define C_OPCODE_SLE 49 -#define C_OPCODE_SNE 50 -#define C_OPCODE_STR 51 -#define C_OPCODE_TEX 52 -#define C_OPCODE_TXD 53 -#define C_OPCODE_TXP 54 -#define C_OPCODE_UP2H 55 -#define C_OPCODE_UP2US 56 -#define C_OPCODE_UP4B 57 -#define C_OPCODE_UP4UB 58 -#define C_OPCODE_X2D 59 -#define C_OPCODE_ARA 60 -#define C_OPCODE_ARR 61 -#define C_OPCODE_BRA 62 -#define C_OPCODE_CAL 63 -#define C_OPCODE_RET 64 -#define C_OPCODE_SSG 65 /* SGN */ -#define C_OPCODE_CMP 66 -#define C_OPCODE_SCS 67 -#define C_OPCODE_TXB 68 -#define C_OPCODE_NRM 69 -#define C_OPCODE_DIV 70 -#define C_OPCODE_DP2 71 -#define C_OPCODE_TXL 72 -#define C_OPCODE_BRK 73 -#define C_OPCODE_IF 74 -#define C_OPCODE_BGNFOR 75 -#define C_OPCODE_REP 76 -#define C_OPCODE_ELSE 77 -#define C_OPCODE_ENDIF 78 -#define C_OPCODE_ENDFOR 79 -#define C_OPCODE_ENDREP 80 -#define C_OPCODE_PUSHA 81 -#define C_OPCODE_POPA 82 -#define C_OPCODE_CEIL 83 -#define C_OPCODE_I2F 84 -#define C_OPCODE_NOT 85 -#define C_OPCODE_TRUNC 86 -#define C_OPCODE_SHL 87 -/* gap */ -#define C_OPCODE_AND 89 -#define C_OPCODE_OR 90 -#define C_OPCODE_MOD 91 -#define C_OPCODE_XOR 92 -#define C_OPCODE_SAD 93 -#define C_OPCODE_TXF 94 -#define C_OPCODE_TXQ 95 -#define C_OPCODE_CONT 96 -#define C_OPCODE_EMIT 97 -#define C_OPCODE_ENDPRIM 98 -#define C_OPCODE_BGNLOOP 99 -#define C_OPCODE_BGNSUB 100 -#define C_OPCODE_ENDLOOP 101 -#define C_OPCODE_ENDSUB 102 -/* gap */ -#define C_OPCODE_NRM4 112 -#define C_OPCODE_CALLNZ 113 -#define C_OPCODE_IFC 114 -#define C_OPCODE_BREAKC 115 -#define C_OPCODE_KIL 116 /* conditional kill */ -#define C_OPCODE_END 117 /* aka HALT */ -/* gap */ -#define C_OPCODE_F2I 119 -#define C_OPCODE_IDIV 120 -#define C_OPCODE_IMAX 121 -#define C_OPCODE_IMIN 122 -#define C_OPCODE_INEG 123 -#define C_OPCODE_ISGE 124 -#define C_OPCODE_ISHR 125 -#define C_OPCODE_ISLT 126 -#define C_OPCODE_F2U 127 -#define C_OPCODE_U2F 128 -#define C_OPCODE_UADD 129 -#define C_OPCODE_UDIV 130 -#define C_OPCODE_UMAD 131 -#define C_OPCODE_UMAX 132 -#define C_OPCODE_UMIN 133 -#define C_OPCODE_UMOD 134 -#define C_OPCODE_UMUL 135 -#define C_OPCODE_USEQ 136 -#define C_OPCODE_USGE 137 -#define C_OPCODE_USHR 138 -#define C_OPCODE_USLT 139 -#define C_OPCODE_USNE 140 -#define C_OPCODE_SWITCH 141 -#define C_OPCODE_CASE 142 -#define C_OPCODE_DEFAULT 143 -#define C_OPCODE_ENDSWITCH 144 -#define C_OPCODE_VFETCH 145 -#define C_OPCODE_ENTRY 146 -#define C_OPCODE_ARL 147 -#define C_OPCODE_LAST 148 - -#define C_OPERAND_FLAG_ABS (1 << 0) -#define C_OPERAND_FLAG_NEG (1 << 1) - -struct c_operand { - struct c_vector *vector; - unsigned swizzle; - unsigned flag; -}; - -struct c_op { - unsigned ninput; - struct c_operand input[3]; - struct c_operand output; - unsigned opcode; -}; - -struct c_instruction { - struct c_instruction *next, *prev; - unsigned nop; - struct c_op op[5]; -}; - -struct c_node; - -struct c_node_link { - struct c_node_link *next; - struct c_node_link *prev; - struct c_node *node; -}; - -/** - * struct c_node - * - * @next: all node are in a double linked list, this point to - * next node - * @next: all node are in a double linked list, this point to - * previous node - * @predecessors: list of all predecessor nodes in the flow graph - * @successors: list of all sucessor nodes in the flow graph - * @parent: parent node in the depth first walk tree - * @childs: child nodes in the depth first walk tree - */ -struct c_node { - struct c_node *next, *prev; - struct c_node_link predecessors; - struct c_node_link successors; - struct c_node *parent; - struct c_node_link childs; - struct c_instruction insts; - unsigned opcode; - unsigned visited; - unsigned done; - void *backend; -}; - -struct c_file { - unsigned nvectors; - struct c_vector vectors; -}; - -struct c_shader { - unsigned nvectors; - struct c_file files[C_FILE_COUNT]; - struct c_node nodes; - struct c_node entry; - struct c_node end; - unsigned type; -}; - -int c_shader_init(struct c_shader *shader, unsigned type); -void c_shader_destroy(struct c_shader *shader); -struct c_vector *c_shader_vector_new(struct c_shader *shader, unsigned file, unsigned name, int sid); -int c_shader_build_dominator_tree(struct c_shader *shader); -void c_shader_dump(struct c_shader *shader); - -void c_node_init(struct c_node *node); -int c_node_add_new_instruction(struct c_node *node, struct c_instruction *instruction); -int c_node_add_new_instruction_head(struct c_node *node, struct c_instruction *instruction); - -/* control flow graph functions */ -int c_node_cfg_link(struct c_node *predecessor, struct c_node *successor); -struct c_node *c_node_cfg_new_after(struct c_node *predecessor); -struct c_node *c_shader_cfg_new_node_after(struct c_shader *shader, struct c_node *predecessor); - -struct c_vector *c_vector_new(void); - -#endif diff --git a/src/gallium/drivers/r600/r600_compiler_dump.c b/src/gallium/drivers/r600/r600_compiler_dump.c deleted file mode 100644 index 485032088c..0000000000 --- a/src/gallium/drivers/r600/r600_compiler_dump.c +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#include <stdlib.h> -#include <string.h> -#include <stdint.h> -#include <stdio.h> -#include "r600_compiler.h" - -static const char *c_file_swz[] = { - "x", - "y", - "z", - "w", - "0", - "1", - ".", -}; - -static const char *c_file_str[] = { - "NULL", - "CONSTANT", - "INPUT", - "OUTPUT", - "TEMPORARY", - "SAMPLER", - "ADDRESS", - "IMMEDIATE", - "LOOP", - "PREDICATE", - "SYSTEM_VALUE", -}; - -static const char *c_semantic_str[] = { - "POSITION", - "COLOR", - "BCOLOR", - "FOG", - "PSIZE", - "GENERIC", - "NORMAL", - "FACE", - "EDGEFLAG", - "PRIMID", - "INSTANCEID", -}; - -static const char *c_opcode_str[] = { - "ARL", - "MOV", - "LIT", - "RCP", - "RSQ", - "EXP", - "LOG", - "MUL", - "ADD", - "DP3", - "DP4", - "DST", - "MIN", - "MAX", - "SLT", - "SGE", - "MAD", - "SUB", - "LRP", - "CND", - "(INVALID)", - "DP2A", - "(INVALID)", - "(INVALID)", - "FRC", - "CLAMP", - "FLR", - "ROUND", - "EX2", - "LG2", - "POW", - "XPD", - "(INVALID)", - "ABS", - "RCC", - "DPH", - "COS", - "DDX", - "DDY", - "KILP", - "PK2H", - "PK2US", - "PK4B", - "PK4UB", - "RFL", - "SEQ", - "SFL", - "SGT", - "SIN", - "SLE", - "SNE", - "STR", - "TEX", - "TXD", - "TXP", - "UP2H", - "UP2US", - "UP4B", - "UP4UB", - "X2D", - "ARA", - "ARR", - "BRA", - "CAL", - "RET", - "SSG", - "CMP", - "SCS", - "TXB", - "NRM", - "DIV", - "DP2", - "TXL", - "BRK", - "IF", - "BGNFOR", - "REP", - "ELSE", - "ENDIF", - "ENDFOR", - "ENDREP", - "PUSHA", - "POPA", - "CEIL", - "I2F", - "NOT", - "TRUNC", - "SHL", - "(INVALID)", - "AND", - "OR", - "MOD", - "XOR", - "SAD", - "TXF", - "TXQ", - "CONT", - "EMIT", - "ENDPRIM", - "BGNLOOP", - "BGNSUB", - "ENDLOOP", - "ENDSUB", - "(INVALID)", - "(INVALID)", - "(INVALID)", - "(INVALID)", - "NOP", - "(INVALID)", - "(INVALID)", - "(INVALID)", - "(INVALID)", - "NRM4", - "CALLNZ", - "IFC", - "BREAKC", - "KIL", - "END", - "(INVALID)", - "F2I", - "IDIV", - "IMAX", - "IMIN", - "INEG", - "ISGE", - "ISHR", - "ISLT", - "F2U", - "U2F", - "UADD", - "UDIV", - "UMAD", - "UMAX", - "UMIN", - "UMOD", - "UMUL", - "USEQ", - "USGE", - "USHR", - "USLT", - "USNE", - "SWITCH", - "CASE", - "DEFAULT", - "ENDSWITCH", - "VFETCH", - "ENTRY", -}; - -static inline const char *c_get_name(const char *name[], unsigned i) -{ - return name[i]; -} - -static void pindent(unsigned indent) -{ - unsigned i; - for (i = 0; i < indent; i++) - fprintf(stderr, " "); -} - -static void c_node_dump(struct c_node *node, unsigned indent) -{ - struct c_instruction *i; - unsigned j, k; - - pindent(indent); fprintf(stderr, "# node %s\n", c_get_name(c_opcode_str, node->opcode)); - c_list_for_each(i, &node->insts) { - for (k = 0; k < i->nop; k++) { - pindent(indent); - fprintf(stderr, "%s", c_get_name(c_opcode_str, i->op[k].opcode)); - fprintf(stderr, " %s[%d][%s]", - c_get_name(c_file_str, i->op[k].output.vector->file), - i->op[k].output.vector->id, - c_get_name(c_file_swz, i->op[k].output.swizzle)); - for (j = 0; j < i->op[k].ninput; j++) { - fprintf(stderr, " %s[%d][%s]", - c_get_name(c_file_str, i->op[k].input[j].vector->file), - i->op[k].input[j].vector->id, - c_get_name(c_file_swz, i->op[k].input[j].swizzle)); - } - fprintf(stderr, ";\n"); - } - } -} - -static void c_shader_dump_rec(struct c_shader *shader, struct c_node *node, unsigned indent) -{ - struct c_node_link *link; - - c_node_dump(node, indent); - c_list_for_each(link, &node->childs) { - c_shader_dump_rec(shader, link->node, indent + 1); - } -} - -void c_shader_dump(struct c_shader *shader) -{ - c_shader_dump_rec(shader, &shader->entry, 0); -} diff --git a/src/gallium/drivers/r600/r600_compiler_r600.c b/src/gallium/drivers/r600/r600_compiler_r600.c deleted file mode 100644 index 14ea8ab6e8..0000000000 --- a/src/gallium/drivers/r600/r600_compiler_r600.c +++ /dev/null @@ -1,891 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#include <stdlib.h> -#include <string.h> -#include <stdint.h> -#include <stdio.h> -#include <errno.h> -#include <util/u_format.h> -#include "r600_screen.h" -#include "r600_context.h" -#include "r600_sq.h" - - -struct r600_alu_instruction { - unsigned copcode; - enum r600_instruction instruction; -}; - -static int r600_shader_alu_translate(struct r600_shader *rshader, - struct r600_shader_node *node, - struct c_instruction *instruction); -struct r600_alu_instruction r600_alu_instruction[C_OPCODE_LAST]; -struct r600_instruction_info r600_instruction_info[]; - -int r600_shader_insert_fetch(struct c_shader *shader) -{ - struct c_vector *vi, *vr, *v, *nv; - struct c_instruction instruction; - int r; - - if (shader->type != C_PROGRAM_TYPE_VS) - return 0; - vi = c_shader_vector_new(shader, C_FILE_INPUT, C_SEMANTIC_VERTEXID, -1); - if (vi == NULL) - return -ENOMEM; - c_list_for_each_safe(v, nv, &shader->files[C_FILE_INPUT].vectors) { - if (v == vi) - continue; - vr = c_shader_vector_new(shader, C_FILE_RESOURCE, C_SEMANTIC_GENERIC, -1); - if (vr == NULL) - return -ENOMEM; - memset(&instruction, 0, sizeof(struct c_instruction)); - instruction.nop = 4; - instruction.op[0].opcode = C_OPCODE_VFETCH; - instruction.op[1].opcode = C_OPCODE_VFETCH; - instruction.op[2].opcode = C_OPCODE_VFETCH; - instruction.op[3].opcode = C_OPCODE_VFETCH; - instruction.op[0].ninput = 2; - instruction.op[1].ninput = 2; - instruction.op[2].ninput = 2; - instruction.op[3].ninput = 2; - instruction.op[0].output.vector = v; - instruction.op[1].output.vector = v; - instruction.op[2].output.vector = v; - instruction.op[3].output.vector = v; - instruction.op[0].input[0].vector = vi; - instruction.op[0].input[1].vector = vr; - instruction.op[1].input[0].vector = vi; - instruction.op[1].input[1].vector = vr; - instruction.op[2].input[0].vector = vi; - instruction.op[2].input[1].vector = vr; - instruction.op[3].input[0].vector = vi; - instruction.op[3].input[1].vector = vr; - instruction.op[0].output.swizzle = C_SWIZZLE_X; - instruction.op[1].output.swizzle = C_SWIZZLE_Y; - instruction.op[2].output.swizzle = C_SWIZZLE_Z; - instruction.op[3].output.swizzle = C_SWIZZLE_W; - r = c_node_add_new_instruction_head(&shader->entry, &instruction); - if (r) - return r; - c_list_del(v); - shader->files[C_FILE_INPUT].nvectors--; - c_list_add_tail(v, &shader->files[C_FILE_TEMPORARY].vectors); - shader->files[C_FILE_TEMPORARY].nvectors++; - v->file = C_FILE_TEMPORARY; - } - return 0; -} - -void r600_shader_cleanup(struct r600_shader *rshader) -{ - struct r600_shader_node *n, *nn; - struct r600_shader_vfetch *vf, *nvf; - struct r600_shader_alu *alu, *nalu; - int i; - - if (rshader == NULL) - return; - if (rshader->gpr) { - for (i = 0; i < rshader->nvector; i++) { - free(rshader->gpr[i]); - } - free(rshader->gpr); - rshader->gpr = NULL; - } - c_list_for_each_safe(n, nn, &rshader->nodes) { - c_list_del(n); - c_list_for_each_safe(vf, nvf, &n->vfetch) { - c_list_del(vf); - free(vf); - } - c_list_for_each_safe(alu, nalu, &n->alu) { - c_list_del(alu); - free(alu); - } - free(n); - } - free(rshader->bcode); - return; -} - -int r600_shader_vfetch_bytecode(struct r600_shader *rshader, - struct r600_shader_node *rnode, - struct r600_shader_vfetch *vfetch, - unsigned *cid) -{ - unsigned id = *cid; - - vfetch->cf_addr = id; - rshader->bcode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vfetch->src[1].sel) | - S_SQ_VTX_WORD0_SRC_GPR(vfetch->src[0].sel) | - S_SQ_VTX_WORD0_SRC_SEL_X(vfetch->src[0].sel) | - S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(0x1F); - rshader->bcode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vfetch->dst[0].chan) | - S_SQ_VTX_WORD1_DST_SEL_Y(vfetch->dst[1].chan) | - S_SQ_VTX_WORD1_DST_SEL_Z(vfetch->dst[2].chan) | - S_SQ_VTX_WORD1_DST_SEL_W(vfetch->dst[3].chan) | - S_SQ_VTX_WORD1_USE_CONST_FIELDS(1) | - S_SQ_VTX_WORD1_GPR_DST_GPR(vfetch->dst[0].sel); - rshader->bcode[id++] = S_SQ_VTX_WORD2_MEGA_FETCH(1); - rshader->bcode[id++] = 0; - *cid = id; - return 0; -} - -int r600_shader_update(struct r600_shader *rshader, enum pipe_format *resource_format) -{ - struct r600_shader_node *rnode; - struct r600_shader_vfetch *vfetch; - unsigned i; - - memcpy(rshader->resource_format, resource_format, - rshader->nresource * sizeof(enum pipe_format)); - c_list_for_each(rnode, &rshader->nodes) { - c_list_for_each(vfetch, &rnode->vfetch) { - const struct util_format_description *desc; - i = vfetch->cf_addr + 1; - rshader->bcode[i] &= C_SQ_VTX_WORD1_DST_SEL_X; - rshader->bcode[i] &= C_SQ_VTX_WORD1_DST_SEL_Y; - rshader->bcode[i] &= C_SQ_VTX_WORD1_DST_SEL_Z; - rshader->bcode[i] &= C_SQ_VTX_WORD1_DST_SEL_W; - desc = util_format_description(resource_format[vfetch->src[1].sel]); - if (desc == NULL) { - fprintf(stderr, "%s unknown format %d\n", __func__, resource_format[vfetch->src[1].sel]); - continue; - } - /* WARNING so far TGSI swizzle match R600 ones */ - rshader->bcode[i] |= S_SQ_VTX_WORD1_DST_SEL_X(desc->swizzle[0]); - rshader->bcode[i] |= S_SQ_VTX_WORD1_DST_SEL_Y(desc->swizzle[1]); - rshader->bcode[i] |= S_SQ_VTX_WORD1_DST_SEL_Z(desc->swizzle[2]); - rshader->bcode[i] |= S_SQ_VTX_WORD1_DST_SEL_W(desc->swizzle[3]); - } - } - return 0; -} - -int r600_shader_register(struct r600_shader *rshader) -{ - struct c_vector *v, *nv; - unsigned tid, cid, rid, i; - - rshader->nvector = rshader->cshader.nvectors; - rshader->gpr = calloc(rshader->nvector, sizeof(void*)); - if (rshader->gpr == NULL) - return -ENOMEM; - tid = 0; - cid = 0; - rid = 0; - /* alloc input first */ - c_list_for_each(v, &rshader->cshader.files[C_FILE_INPUT].vectors) { - nv = c_vector_new(); - if (nv == NULL) { - return -ENOMEM; - } - memcpy(nv, v, sizeof(struct c_vector)); - nv->id = tid++; - rshader->gpr[v->id] = nv; - } - for (i = 0; i < C_FILE_COUNT; i++) { - if (i == C_FILE_INPUT || i == C_FILE_IMMEDIATE) - continue; - c_list_for_each(v, &rshader->cshader.files[i].vectors) { - switch (v->file) { - case C_FILE_OUTPUT: - case C_FILE_TEMPORARY: - nv = c_vector_new(); - if (nv == NULL) { - return -ENOMEM; - } - memcpy(nv, v, sizeof(struct c_vector)); - nv->id = tid++; - rshader->gpr[v->id] = nv; - break; - case C_FILE_CONSTANT: - nv = c_vector_new(); - if (nv == NULL) { - return -ENOMEM; - } - memcpy(nv, v, sizeof(struct c_vector)); - nv->id = (cid++) + 256; - rshader->gpr[v->id] = nv; - break; - case C_FILE_RESOURCE: - nv = c_vector_new(); - if (nv == NULL) { - return -ENOMEM; - } - memcpy(nv, v, sizeof(struct c_vector)); - nv->id = (rid++); - rshader->gpr[v->id] = nv; - break; - default: - fprintf(stderr, "%s:%d unsupported file %d\n", __func__, __LINE__, v->file); - return -EINVAL; - } - } - } - rshader->ngpr = tid; - rshader->nconstant = cid; - rshader->nresource = rid; - return 0; -} - -int r600_shader_find_gpr(struct r600_shader *rshader, struct c_vector *v, unsigned swizzle, - struct r600_shader_operand *operand) -{ - struct c_vector *tmp; - - /* Values [0,127] correspond to GPR[0..127]. - * Values [256,511] correspond to cfile constants c[0..255]. - * Other special values are shown in the list below. - * 248 SQ_ALU_SRC_0: special constant 0.0. - * 249 SQ_ALU_SRC_1: special constant 1.0 float. - * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. - * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. - * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. - * 253 SQ_ALU_SRC_LITERAL: literal constant. - * 254 SQ_ALU_SRC_PV: previous vector result. - * 255 SQ_ALU_SRC_PS: previous scalar result. - */ - operand->vector = v; - operand->sel = 248; - operand->chan = 0; - operand->neg = 0; - operand->abs = 0; - if (v == NULL) - return 0; - if (v->file == C_FILE_IMMEDIATE) { - operand->sel = 253; - } else { - tmp = rshader->gpr[v->id]; - if (tmp == NULL) { - fprintf(stderr, "%s %d unknown register\n", __FILE__, __LINE__); - return -EINVAL; - } - operand->sel = tmp->id; - } - operand->chan = swizzle; - switch (swizzle) { - case C_SWIZZLE_X: - case C_SWIZZLE_Y: - case C_SWIZZLE_Z: - case C_SWIZZLE_W: - break; - case C_SWIZZLE_0: - operand->sel = 248; - operand->chan = 0; - break; - case C_SWIZZLE_1: - operand->sel = 249; - operand->chan = 0; - break; - default: - fprintf(stderr, "%s %d invalid swizzle %d\n", __FILE__, __LINE__, swizzle); - return -EINVAL; - } - return 0; -} - -static struct r600_shader_node *r600_shader_new_node(struct r600_shader *rshader, struct c_node *node) -{ - struct r600_shader_node *rnode; - - rnode = CALLOC_STRUCT(r600_shader_node); - if (rnode == NULL) - return NULL; - rnode->node = node; - c_list_init(&rnode->vfetch); - c_list_init(&rnode->alu); - c_list_add_tail(rnode, &rshader->nodes); - return rnode; -} - -static int r600_shader_add_vfetch(struct r600_shader *rshader, - struct r600_shader_node *node, - struct c_instruction *instruction) -{ - struct r600_shader_vfetch *vfetch; - struct r600_shader_node *rnode; - int r; - - if (instruction == NULL) - return 0; - if (instruction->op[0].opcode != C_OPCODE_VFETCH) - return 0; - if (!c_list_empty(&node->alu)) { - rnode = r600_shader_new_node(rshader, node->node); - if (rnode == NULL) - return -ENOMEM; - node = rnode; - } - vfetch = calloc(1, sizeof(struct r600_shader_vfetch)); - if (vfetch == NULL) - return -ENOMEM; - r = r600_shader_find_gpr(rshader, instruction->op[0].output.vector, 0, &vfetch->dst[0]); - if (r) - return r; - r = r600_shader_find_gpr(rshader, instruction->op[0].input[0].vector, 0, &vfetch->src[0]); - if (r) - return r; - r = r600_shader_find_gpr(rshader, instruction->op[0].input[1].vector, 0, &vfetch->src[1]); - if (r) - return r; - vfetch->dst[0].chan = C_SWIZZLE_X; - vfetch->dst[1].chan = C_SWIZZLE_Y; - vfetch->dst[2].chan = C_SWIZZLE_Z; - vfetch->dst[3].chan = C_SWIZZLE_W; - c_list_add_tail(vfetch, &node->vfetch); - node->nslot += 2; - return 0; -} - -static int r600_node_translate(struct r600_shader *rshader, struct c_node *node) -{ - struct c_instruction *instruction; - struct r600_shader_node *rnode; - int r; - - rnode = r600_shader_new_node(rshader, node); - if (rnode == NULL) - return -ENOMEM; - c_list_for_each(instruction, &node->insts) { - switch (instruction->op[0].opcode) { - case C_OPCODE_VFETCH: - r = r600_shader_add_vfetch(rshader, rnode, instruction); - if (r) { - fprintf(stderr, "%s %d vfetch failed\n", __func__, __LINE__); - return r; - } - break; - default: - r = r600_shader_alu_translate(rshader, rnode, instruction); - if (r) { - fprintf(stderr, "%s %d alu failed\n", __func__, __LINE__); - return r; - } - break; - } - } - return 0; -} - -int r600_shader_translate_rec(struct r600_shader *rshader, struct c_node *node) -{ - struct c_node_link *link; - int r; - - if (node->opcode == C_OPCODE_END) - return 0; - r = r600_node_translate(rshader, node); - if (r) - return r; - c_list_for_each(link, &node->childs) { - r = r600_shader_translate_rec(rshader, link->node); - if (r) - return r; - } - return 0; -} - -static struct r600_shader_alu *r600_shader_insert_alu(struct r600_shader *rshader, struct r600_shader_node *node) -{ - struct r600_shader_alu *alu; - - alu = CALLOC_STRUCT(r600_shader_alu); - if (alu == NULL) - return NULL; - alu->alu[0].inst = INST_NOP; - alu->alu[1].inst = INST_NOP; - alu->alu[2].inst = INST_NOP; - alu->alu[3].inst = INST_NOP; - alu->alu[4].inst = INST_NOP; - alu->alu[0].opcode = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; - alu->alu[1].opcode = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; - alu->alu[2].opcode = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; - alu->alu[3].opcode = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; - alu->alu[4].opcode = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; - c_list_add_tail(alu, &node->alu); - return alu; -} - -static int r600_shader_alu_translate(struct r600_shader *rshader, - struct r600_shader_node *node, - struct c_instruction *instruction) -{ - struct r600_shader_node *rnode; - struct r600_shader_alu *alu; - int i, j, r, comp, litteral_lastcomp = -1; - - if (!c_list_empty(&node->vfetch)) { - rnode = r600_shader_new_node(rshader, node->node); - if (rnode == NULL) { - fprintf(stderr, "%s %d new node failed\n", __func__, __LINE__); - return -ENOMEM; - } - node = rnode; - } - - /* initialize alu */ - alu = r600_shader_insert_alu(rshader, node); - - /* check special operation like lit */ - - /* go through operation */ - for (i = 0; i < instruction->nop; i++) { - struct r600_alu_instruction *ainfo = &r600_alu_instruction[instruction->op[i].opcode]; - struct r600_instruction_info *iinfo = &r600_instruction_info[ainfo->instruction]; - unsigned comp; - - /* check that output is a valid component */ - comp = instruction->op[i].output.swizzle; - switch (comp) { - case C_SWIZZLE_X: - case C_SWIZZLE_Y: - case C_SWIZZLE_Z: - case C_SWIZZLE_W: - break; - case C_SWIZZLE_0: - case C_SWIZZLE_1: - default: - fprintf(stderr, "%s %d invalid output\n", __func__, __LINE__); - return -EINVAL; - } - alu->alu[comp].inst = ainfo->instruction; - alu->alu[comp].opcode = iinfo->opcode; - alu->alu[comp].is_op3 = iinfo->is_op3; - for (j = 0; j < instruction->op[i].ninput; j++) { - r = r600_shader_find_gpr(rshader, instruction->op[i].input[j].vector, - instruction->op[i].input[j].swizzle, &alu->alu[comp].src[j]); - if (r) { - fprintf(stderr, "%s %d register failed\n", __FILE__, __LINE__); - return r; - } - if (instruction->op[i].input[j].vector->file == C_FILE_IMMEDIATE) { - r = instruction->op[i].input[j].swizzle; - switch (r) { - case C_SWIZZLE_X: - case C_SWIZZLE_Y: - case C_SWIZZLE_Z: - case C_SWIZZLE_W: - break; - case C_SWIZZLE_0: - case C_SWIZZLE_1: - default: - fprintf(stderr, "%s %d invalid input\n", __func__, __LINE__); - return -EINVAL; - } - alu->literal[r] = instruction->op[i].input[j].vector->channel[r]->value; - if (r > litteral_lastcomp) { - litteral_lastcomp = r; - } - } - } - r = r600_shader_find_gpr(rshader, instruction->op[i].output.vector, - instruction->op[i].output.swizzle, &alu->alu[comp].dst); - if (r) { - fprintf(stderr, "%s %d register failed\n", __FILE__, __LINE__); - return r; - } - } - switch (litteral_lastcomp) { - case 0: - case 1: - alu->nliteral = 2; - break; - case 2: - case 3: - alu->nliteral = 4; - break; - case -1: - default: - break; - } -printf("nliteral: %d\n", alu->nliteral); - for (i = instruction->nop; i >= 0; i--) { - if (alu->alu[i].inst != INST_NOP) { - alu->alu[i].last = 1; - alu->nalu = i + 1; - break; - } - } - return 0; -} - -void r600_shader_node_place(struct r600_shader *rshader) -{ - struct r600_shader_node *node, *nnode; - struct r600_shader_alu *alu, *nalu; - struct r600_shader_vfetch *vfetch, *nvfetch; - unsigned cf_id = 0, cf_addr = 0; - - rshader->ncf = 0; - rshader->nslot = 0; - c_list_for_each_safe(node, nnode, &rshader->nodes) { - c_list_for_each_safe(alu, nalu, &node->alu) { - node->nslot += alu->nalu; - node->nslot += alu->nliteral >> 1; - } - node->nfetch = 0; - c_list_for_each_safe(vfetch, nvfetch, &node->vfetch) { - node->nslot += 2; - node->nfetch += 1; - } - if (!c_list_empty(&node->vfetch)) { - /* fetch node need to be 16 bytes aligned*/ - cf_addr += 1; - cf_addr &= 0xFFFFFFFEUL; - } - node->cf_id = cf_id; - node->cf_addr = cf_addr; - cf_id += 2; - cf_addr += node->nslot * 2; - rshader->ncf++; - } - rshader->nslot = cf_addr; - c_list_for_each_safe(node, nnode, &rshader->nodes) { - node->cf_addr += cf_id * 2; - } - rshader->ncf += rshader->cshader.files[C_FILE_OUTPUT].nvectors; - rshader->ndw = rshader->ncf * 2 + rshader->nslot * 2; -} - -int r600_shader_legalize(struct r600_shader *rshader) -{ - return 0; -} - - -static int r600_cshader_legalize_rec(struct c_shader *shader, struct c_node *node) -{ - struct c_node_link *link; - struct c_instruction *i; - struct c_operand operand; - unsigned k; - int r; - - c_list_for_each(i, &node->insts) { - for (k = 0; k < i->nop; k++) { - switch (i->op[k].opcode) { - case C_OPCODE_SLT: - i->op[k].opcode = C_OPCODE_SGT; - memcpy(&operand, &i->op[k].input[0], sizeof(struct c_operand)); - memcpy(&i->op[k].input[0], &i->op[k].input[1], sizeof(struct c_operand)); - memcpy(&i->op[k].input[1], &operand, sizeof(struct c_operand)); - break; - default: - break; - } - } - } - c_list_for_each(link, &node->childs) { - r = r600_cshader_legalize_rec(shader, link->node); - if (r) { - return r; - } - } - return 0; -} - -int r600_cshader_legalize(struct c_shader *shader) -{ - return r600_cshader_legalize_rec(shader, &shader->entry); -} - - -struct r600_instruction_info r600_instruction_info[] = { - {INST_ADD, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, 0, 0}, - {INST_MUL, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, 0, 0}, - {INST_MUL_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE, 0, 0}, - {INST_MAX, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, 0, 0}, - {INST_MIN, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, 0, 0}, - {INST_MAX_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_DX10, 0, 0}, - {INST_MIN_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_DX10, 0, 0}, - {INST_SETE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, 0, 0}, - {INST_SETGT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, 0, 0}, - {INST_SETGE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, 0, 0}, - {INST_SETNE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, 0, 0}, - {INST_SETE_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_DX10, 0, 0}, - {INST_SETGT_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_DX10, 0, 0}, - {INST_SETGE_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_DX10, 0, 0}, - {INST_SETNE_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_DX10, 0, 0}, - {INST_FRACT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, 0, 0}, - {INST_TRUNC, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, 0, 0}, - {INST_CEIL, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, 0, 0}, - {INST_RNDNE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, 0, 0}, - {INST_FLOOR, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, 0, 0}, - {INST_MOVA, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA, 0, 0}, - {INST_MOVA_FLOOR, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR, 0, 0}, - {INST_MOVA_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, 0, 0}, - {INST_MOV, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, 0, 0}, - {INST_NOP, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, 0, 0}, - {INST_PRED_SETGT_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT, 0, 0}, - {INST_PRED_SETGE_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT, 0, 0}, - {INST_PRED_SETE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE, 0, 0}, - {INST_PRED_SETGT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT, 0, 0}, - {INST_PRED_SETGE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE, 0, 0}, - {INST_PRED_SETNE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE, 0, 0}, - {INST_PRED_SET_INV, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV, 0, 0}, - {INST_PRED_SET_POP, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP, 0, 0}, - {INST_PRED_SET_CLR, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR, 0, 0}, - {INST_PRED_SET_RESTORE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE, 0, 0}, - {INST_PRED_SETE_PUSH, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH, 0, 0}, - {INST_PRED_SETGT_PUSH, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH, 0, 0}, - {INST_PRED_SETGE_PUSH, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH, 0, 0}, - {INST_PRED_SETNE_PUSH, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH, 0, 0}, - {INST_KILLE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE, 0, 0}, - {INST_KILLGT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, 0, 0}, - {INST_KILLGE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE, 0, 0}, - {INST_KILLNE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE, 0, 0}, - {INST_AND_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, 0, 0}, - {INST_OR_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, 0, 0}, - {INST_XOR_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, 0, 0}, - {INST_NOT_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, 0, 0}, - {INST_ADD_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, 0, 0}, - {INST_SUB_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, 0, 0}, - {INST_MAX_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, 0, 0}, - {INST_MIN_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, 0, 0}, - {INST_MAX_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, 0, 0}, - {INST_MIN_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, 0, 0}, - {INST_SETE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, 0, 0}, - {INST_SETGT_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, 0, 0}, - {INST_SETGE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, 0, 0}, - {INST_SETNE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, 0, 0}, - {INST_SETGT_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, 0, 0}, - {INST_SETGE_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, 0, 0}, - {INST_KILLGT_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT, 0, 0}, - {INST_KILLGE_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT, 0, 0}, - {INST_PRED_SETE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT, 0, 0}, - {INST_PRED_SETGT_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT, 0, 0}, - {INST_PRED_SETGE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT, 0, 0}, - {INST_PRED_SETNE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT, 0, 0}, - {INST_KILLE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT, 0, 0}, - {INST_KILLGT_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT, 0, 0}, - {INST_KILLGE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT, 0, 0}, - {INST_KILLNE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT, 0, 0}, - {INST_PRED_SETE_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT, 0, 0}, - {INST_PRED_SETGT_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT, 0, 0}, - {INST_PRED_SETGE_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT, 0, 0}, - {INST_PRED_SETNE_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT, 0, 0}, - {INST_PRED_SETLT_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT, 0, 0}, - {INST_PRED_SETLE_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT, 0, 0}, - {INST_DOT4, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, 0, 0}, - {INST_DOT4_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE, 0, 0}, - {INST_CUBE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE, 0, 0}, - {INST_MAX4, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4, 0, 0}, - {INST_MOVA_GPR_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT, 0, 0}, - {INST_EXP_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, 1, 0}, - {INST_LOG_CLAMPED, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED, 1, 0}, - {INST_LOG_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, 1, 0}, - {INST_RECIP_CLAMPED, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, 1, 0}, - {INST_RECIP_FF, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF, 1, 0}, - {INST_RECIP_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, 1, 0}, - {INST_RECIPSQRT_CLAMPED, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED, 1, 0}, - {INST_RECIPSQRT_FF, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF, 1, 0}, - {INST_RECIPSQRT_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, 1, 0}, - {INST_SQRT_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE, 1, 0}, - {INST_FLT_TO_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, 1, 0}, - {INST_INT_TO_FLT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, 1, 0}, - {INST_UINT_TO_FLT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, 1, 0}, - {INST_SIN, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, 1, 0}, - {INST_COS, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, 1, 0}, - {INST_ASHR_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, 1, 0}, - {INST_LSHR_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, 1, 0}, - {INST_LSHL_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, 1, 0}, - {INST_MULLO_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT, 1, 0}, - {INST_MULHI_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT, 1, 0}, - {INST_MULLO_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, 1, 0}, - {INST_MULHI_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT, 1, 0}, - {INST_RECIP_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT, 1, 0}, - {INST_RECIP_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT, 1, 0}, - {INST_FLT_TO_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, 1, 0}, - {INST_MUL_LIT, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT, 1, 1}, - {INST_MUL_LIT_M2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2, 1, 1}, - {INST_MUL_LIT_M4, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4, 1, 1}, - {INST_MUL_LIT_D2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_D2, 1, 1}, - {INST_MULADD, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, 0, 1}, - {INST_MULADD_M2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_M2, 0, 1}, - {INST_MULADD_M4, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_M4, 0, 1}, - {INST_MULADD_D2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_D2, 0, 1}, - {INST_MULADD_IEEE, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE, 0, 1}, - {INST_MULADD_IEEE_M2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_M2, 0, 1}, - {INST_MULADD_IEEE_M4, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_M4, 0, 1}, - {INST_MULADD_IEEE_D2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_D2, 0, 1}, - {INST_CNDE, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE, 0, 1}, - {INST_CNDGT, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT, 0, 1}, - {INST_CNDGE, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE, 0, 1}, - {INST_CNDE_INT, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT, 0, 1}, - {INST_CNDGT_INT, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT, 0, 1}, - {INST_CNDGE_INT, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT, 0, 1}, -}; - -struct r600_alu_instruction r600_alu_instruction[C_OPCODE_LAST] = { - {C_OPCODE_NOP, INST_NOP}, - {C_OPCODE_MOV, INST_MOV}, - {C_OPCODE_LIT, INST_NOP}, - {C_OPCODE_RCP, INST_RECIP_IEEE}, - {C_OPCODE_RSQ, INST_RECIPSQRT_IEEE}, - {C_OPCODE_EXP, INST_EXP_IEEE}, - {C_OPCODE_LOG, INST_LOG_IEEE}, - {C_OPCODE_MUL, INST_MUL}, - {C_OPCODE_ADD, INST_ADD}, - {C_OPCODE_DP3, INST_DOT4}, - {C_OPCODE_DP4, INST_DOT4}, - {C_OPCODE_DST, INST_NOP}, - {C_OPCODE_MIN, INST_MIN}, - {C_OPCODE_MAX, INST_MAX}, - {C_OPCODE_SLT, INST_NOP}, - {C_OPCODE_SGE, INST_NOP}, - {C_OPCODE_MAD, INST_MULADD}, - {C_OPCODE_SUB, INST_COUNT}, - {C_OPCODE_LRP, INST_NOP}, - {C_OPCODE_CND, INST_NOP}, - {20, INST_NOP}, - {C_OPCODE_DP2A, INST_NOP}, - {22, INST_NOP}, - {23, INST_NOP}, - {C_OPCODE_FRC, INST_NOP}, - {C_OPCODE_CLAMP, INST_NOP}, - {C_OPCODE_FLR, INST_NOP}, - {C_OPCODE_ROUND, INST_NOP}, - {C_OPCODE_EX2, INST_NOP}, - {C_OPCODE_LG2, INST_NOP}, - {C_OPCODE_POW, INST_NOP}, - {C_OPCODE_XPD, INST_NOP}, - {32, INST_NOP}, - {C_OPCODE_ABS, INST_COUNT}, - {C_OPCODE_RCC, INST_NOP}, - {C_OPCODE_DPH, INST_NOP}, - {C_OPCODE_COS, INST_COS}, - {C_OPCODE_DDX, INST_NOP}, - {C_OPCODE_DDY, INST_NOP}, - {C_OPCODE_KILP, INST_NOP}, - {C_OPCODE_PK2H, INST_NOP}, - {C_OPCODE_PK2US, INST_NOP}, - {C_OPCODE_PK4B, INST_NOP}, - {C_OPCODE_PK4UB, INST_NOP}, - {C_OPCODE_RFL, INST_NOP}, - {C_OPCODE_SEQ, INST_NOP}, - {C_OPCODE_SFL, INST_NOP}, - {C_OPCODE_SGT, INST_SETGT}, - {C_OPCODE_SIN, INST_SIN}, - {C_OPCODE_SLE, INST_NOP}, - {C_OPCODE_SNE, INST_NOP}, - {C_OPCODE_STR, INST_NOP}, - {C_OPCODE_TEX, INST_NOP}, - {C_OPCODE_TXD, INST_NOP}, - {C_OPCODE_TXP, INST_NOP}, - {C_OPCODE_UP2H, INST_NOP}, - {C_OPCODE_UP2US, INST_NOP}, - {C_OPCODE_UP4B, INST_NOP}, - {C_OPCODE_UP4UB, INST_NOP}, - {C_OPCODE_X2D, INST_NOP}, - {C_OPCODE_ARA, INST_NOP}, - {C_OPCODE_ARR, INST_NOP}, - {C_OPCODE_BRA, INST_NOP}, - {C_OPCODE_CAL, INST_NOP}, - {C_OPCODE_RET, INST_NOP}, - {C_OPCODE_SSG, INST_NOP}, - {C_OPCODE_CMP, INST_NOP}, - {C_OPCODE_SCS, INST_NOP}, - {C_OPCODE_TXB, INST_NOP}, - {C_OPCODE_NRM, INST_NOP}, - {C_OPCODE_DIV, INST_NOP}, - {C_OPCODE_DP2, INST_NOP}, - {C_OPCODE_TXL, INST_NOP}, - {C_OPCODE_BRK, INST_NOP}, - {C_OPCODE_IF, INST_NOP}, - {C_OPCODE_BGNFOR, INST_NOP}, - {C_OPCODE_REP, INST_NOP}, - {C_OPCODE_ELSE, INST_NOP}, - {C_OPCODE_ENDIF, INST_NOP}, - {C_OPCODE_ENDFOR, INST_NOP}, - {C_OPCODE_ENDREP, INST_NOP}, - {C_OPCODE_PUSHA, INST_NOP}, - {C_OPCODE_POPA, INST_NOP}, - {C_OPCODE_CEIL, INST_NOP}, - {C_OPCODE_I2F, INST_NOP}, - {C_OPCODE_NOT, INST_NOP}, - {C_OPCODE_TRUNC, INST_NOP}, - {C_OPCODE_SHL, INST_NOP}, - {88, INST_NOP}, - {C_OPCODE_AND, INST_NOP}, - {C_OPCODE_OR, INST_NOP}, - {C_OPCODE_MOD, INST_NOP}, - {C_OPCODE_XOR, INST_NOP}, - {C_OPCODE_SAD, INST_NOP}, - {C_OPCODE_TXF, INST_NOP}, - {C_OPCODE_TXQ, INST_NOP}, - {C_OPCODE_CONT, INST_NOP}, - {C_OPCODE_EMIT, INST_NOP}, - {C_OPCODE_ENDPRIM, INST_NOP}, - {C_OPCODE_BGNLOOP, INST_NOP}, - {C_OPCODE_BGNSUB, INST_NOP}, - {C_OPCODE_ENDLOOP, INST_NOP}, - {C_OPCODE_ENDSUB, INST_NOP}, - {103, INST_NOP}, - {104, INST_NOP}, - {105, INST_NOP}, - {106, INST_NOP}, - {107, INST_NOP}, - {108, INST_NOP}, - {109, INST_NOP}, - {110, INST_NOP}, - {111, INST_NOP}, - {C_OPCODE_NRM4, INST_NOP}, - {C_OPCODE_CALLNZ, INST_NOP}, - {C_OPCODE_IFC, INST_NOP}, - {C_OPCODE_BREAKC, INST_NOP}, - {C_OPCODE_KIL, INST_NOP}, - {C_OPCODE_END, INST_NOP}, - {118, INST_NOP}, - {C_OPCODE_F2I, INST_NOP}, - {C_OPCODE_IDIV, INST_NOP}, - {C_OPCODE_IMAX, INST_NOP}, - {C_OPCODE_IMIN, INST_NOP}, - {C_OPCODE_INEG, INST_NOP}, - {C_OPCODE_ISGE, INST_NOP}, - {C_OPCODE_ISHR, INST_NOP}, - {C_OPCODE_ISLT, INST_NOP}, - {C_OPCODE_F2U, INST_NOP}, - {C_OPCODE_U2F, INST_NOP}, - {C_OPCODE_UADD, INST_NOP}, - {C_OPCODE_UDIV, INST_NOP}, - {C_OPCODE_UMAD, INST_NOP}, - {C_OPCODE_UMAX, INST_NOP}, - {C_OPCODE_UMIN, INST_NOP}, - {C_OPCODE_UMOD, INST_NOP}, - {C_OPCODE_UMUL, INST_NOP}, - {C_OPCODE_USEQ, INST_NOP}, - {C_OPCODE_USGE, INST_NOP}, - {C_OPCODE_USHR, INST_NOP}, - {C_OPCODE_USLT, INST_NOP}, - {C_OPCODE_USNE, INST_NOP}, - {C_OPCODE_SWITCH, INST_NOP}, - {C_OPCODE_CASE, INST_NOP}, - {C_OPCODE_DEFAULT, INST_NOP}, - {C_OPCODE_ENDSWITCH, INST_NOP}, - {C_OPCODE_VFETCH, INST_NOP}, - {C_OPCODE_ENTRY, INST_NOP}, - {C_OPCODE_ARL, INST_NOP}, -}; diff --git a/src/gallium/drivers/r600/r600_compiler_r700.c b/src/gallium/drivers/r600/r600_compiler_r700.c deleted file mode 100644 index 809a57ae5c..0000000000 --- a/src/gallium/drivers/r600/r600_compiler_r700.c +++ /dev/null @@ -1,214 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#include <stdlib.h> -#include <string.h> -#include <stdint.h> -#include <stdio.h> -#include <errno.h> -#include "r600_context.h" -#include "r700_sq.h" - -static int r700_shader_cf_node_bytecode(struct r600_shader *rshader, - struct r600_shader_node *rnode, - unsigned *cid) -{ - unsigned id = *cid; - - if (rnode->nfetch) { - rshader->bcode[id++] = S_SQ_CF_WORD0_ADDR(rnode->cf_addr >> 1); - rshader->bcode[id++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(rnode->nfetch - 1); - } else { - rshader->bcode[id++] = S_SQ_CF_ALU_WORD0_ADDR(rnode->cf_addr >> 1); - rshader->bcode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) | - S_SQ_CF_ALU_WORD1_BARRIER(1) | - S_SQ_CF_ALU_WORD1_COUNT(rnode->nslot - 1); - } - *cid = id; - return 0; -} - -static int r700_shader_cf_output_bytecode(struct r600_shader *rshader, - struct c_vector *v, - unsigned *cid, - unsigned end) -{ - struct r600_shader_operand out; - unsigned id = *cid; - int r; - - r = r600_shader_find_gpr(rshader, v, 0, &out); - if (r) - return r; - rshader->bcode[id + 0] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(out.sel) | - S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(3); - rshader->bcode[id + 1] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(0) | - S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(1) | - S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(2) | - S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(3) | - S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(1) | - S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE) | - S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(end); - switch (v->name) { - case C_SEMANTIC_POSITION: - rshader->bcode[id + 0] |= S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(60) | - S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS); - break; - case C_SEMANTIC_COLOR: - if (rshader->cshader.type == C_PROGRAM_TYPE_VS) { - rshader->output[rshader->noutput].gpr = out.sel; - rshader->output[rshader->noutput].sid = v->sid; - rshader->output[rshader->noutput].name = v->name; - rshader->bcode[id + 0] |= S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(rshader->noutput++) | - S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM); - } else { - rshader->bcode[id + 0] |= S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(0) | - S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL); - } - break; - case C_SEMANTIC_GENERIC: - rshader->output[rshader->noutput].gpr = out.sel; - rshader->output[rshader->noutput].sid = v->sid; - rshader->output[rshader->noutput].name = v->name; - rshader->bcode[id + 0] |= S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(rshader->noutput++) | - S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM); - break; - default: - fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - *cid = id + 2; - return 0; -} - -static int r700_shader_alu_bytecode(struct r600_shader *rshader, - struct r600_shader_node *rnode, - struct r600_shader_inst *alu, - unsigned *cid) -{ - unsigned id = *cid; - - /* don't replace gpr by pv or ps for destination register */ - if (alu->is_op3) { - rshader->bcode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | - S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | - S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | - S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | - S_SQ_ALU_WORD0_LAST(alu->last); - rshader->bcode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | - S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | - S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) | - S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) | - S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) | - S_SQ_ALU_WORD1_OP3_ALU_INST(alu->opcode) | - S_SQ_ALU_WORD1_BANK_SWIZZLE(0); - } else { - rshader->bcode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | - S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | - S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) | - S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | - S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | - S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) | - S_SQ_ALU_WORD0_LAST(alu->last); - rshader->bcode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | - S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | - S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | - S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | - S_SQ_ALU_WORD1_OP2_WRITE_MASK(1) | - S_SQ_ALU_WORD1_OP2_ALU_INST(alu->opcode) | - S_SQ_ALU_WORD1_BANK_SWIZZLE(0); - } - *cid = id; - return 0; -} - -int r700_shader_translate(struct r600_shader *rshader) -{ - struct c_shader *shader = &rshader->cshader; - struct r600_shader_node *rnode; - struct r600_shader_vfetch *vfetch; - struct r600_shader_alu *alu; - struct c_vector *v; - unsigned id, i, end; - int r; - - r = r600_shader_register(rshader); - if (r) { - fprintf(stderr, "%s %d register allocation failed\n", __FILE__, __LINE__); - return r; - } - r = r600_shader_translate_rec(rshader, &shader->entry); - if (r) { - fprintf(stderr, "%s %d translation failed\n", __FILE__, __LINE__); - return r; - } - r = r600_shader_legalize(rshader); - if (r) { - fprintf(stderr, "%s %d legalize failed\n", __FILE__, __LINE__); - return r; - } - r600_shader_node_place(rshader); - rshader->bcode = malloc(rshader->ndw * 4); - if (rshader->bcode == NULL) - return -ENOMEM; - c_list_for_each(rnode, &rshader->nodes) { - id = rnode->cf_addr; - c_list_for_each(vfetch, &rnode->vfetch) { - r = r600_shader_vfetch_bytecode(rshader, rnode, vfetch, &id); - if (r) - return r; - } - c_list_for_each(alu, &rnode->alu) { - for (i = 0; i < alu->nalu; i++) { - r = r700_shader_alu_bytecode(rshader, rnode, &alu->alu[i], &id); - if (r) - return r; - } - for (i = 0; i < alu->nliteral; i++) { - rshader->bcode[id++] = alu->literal[i]; - } - } - } - id = 0; - c_list_for_each(rnode, &rshader->nodes) { - r = r700_shader_cf_node_bytecode(rshader, rnode, &id); - if (r) - return r; - } - c_list_for_each(v, &rshader->cshader.files[C_FILE_OUTPUT].vectors) { - end = 0; - if (v->next == &rshader->cshader.files[C_FILE_OUTPUT].vectors) - end = 1; - r = r700_shader_cf_output_bytecode(rshader, v, &id, end); - if (r) - return r; - } - c_list_for_each(v, &rshader->cshader.files[C_FILE_INPUT].vectors) { - rshader->input[rshader->ninput].gpr = rshader->ninput; - rshader->input[rshader->ninput].sid = v->sid; - rshader->input[rshader->ninput].name = v->name; - rshader->ninput++; - } - return 0; -} diff --git a/src/gallium/drivers/r600/r600_compiler_tgsi.c b/src/gallium/drivers/r600/r600_compiler_tgsi.c deleted file mode 100644 index 172cf154a3..0000000000 --- a/src/gallium/drivers/r600/r600_compiler_tgsi.c +++ /dev/null @@ -1,730 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#include <stdlib.h> -#include <string.h> -#include <stdint.h> -#include <stdio.h> -#include <errno.h> -#include <tgsi/tgsi_parse.h> -#include <tgsi/tgsi_scan.h> -#include "r600_shader.h" -#include "r600_context.h" - -struct tgsi_shader { - struct c_vector **v[TGSI_FILE_COUNT]; - struct tgsi_shader_info info; - struct tgsi_parse_context parser; - const struct tgsi_token *tokens; - struct c_shader *shader; - struct c_node *node; -}; - -static unsigned tgsi_file_to_c_file(unsigned file); -static unsigned tgsi_sname_to_c_sname(unsigned sname); -static int tgsi_opcode_to_c_opcode(unsigned opcode, unsigned *copcode); - -static int tgsi_shader_init(struct tgsi_shader *ts, - const struct tgsi_token *tokens, - struct c_shader *shader) -{ - int i; - - ts->shader = shader; - ts->tokens = tokens; - tgsi_scan_shader(ts->tokens, &ts->info); - tgsi_parse_init(&ts->parser, ts->tokens); - /* initialize to NULL in case of error */ - for (i = 0; i < C_FILE_COUNT; i++) { - ts->v[i] = NULL; - } - for (i = 0; i < TGSI_FILE_COUNT; i++) { - if (ts->info.file_count[i] > 0) { - ts->v[i] = calloc(ts->info.file_count[i], sizeof(void*)); - if (ts->v[i] == NULL) { - fprintf(stderr, "%s:%d unsupported %d %d\n", __func__, __LINE__, i, ts->info.file_count[i]); - return -ENOMEM; - } - } - } - return 0; -} - -static void tgsi_shader_destroy(struct tgsi_shader *ts) -{ - int i; - - for (i = 0; i < TGSI_FILE_COUNT; i++) { - free(ts->v[i]); - } - tgsi_parse_free(&ts->parser); -} - -static int ntransform_declaration(struct tgsi_shader *ts) -{ - struct tgsi_full_declaration *fd = &ts->parser.FullToken.FullDeclaration; - struct c_vector *v; - unsigned file; - unsigned name; - int sid; - int i; - - if (fd->Declaration.Dimension) { - fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - for (i = fd->Range.First ; i <= fd->Range.Last; i++) { - sid = i; - name = C_SEMANTIC_GENERIC; - file = tgsi_file_to_c_file(fd->Declaration.File); - if (file == TGSI_FILE_NULL) { - fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - if (fd->Declaration.Semantic) { - name = tgsi_sname_to_c_sname(fd->Semantic.Name); - sid = fd->Semantic.Index; - } - v = c_shader_vector_new(ts->shader, file, name, sid); - if (v == NULL) { - fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); - return -ENOMEM; - } - ts->v[fd->Declaration.File][i] = v; - } - return 0; -} - -static int ntransform_immediate(struct tgsi_shader *ts) -{ - struct tgsi_full_immediate *fd = &ts->parser.FullToken.FullImmediate; - struct c_vector *v; - unsigned file; - unsigned name; - - if (fd->Immediate.DataType != TGSI_IMM_FLOAT32) { - fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - name = C_SEMANTIC_GENERIC; - file = C_FILE_IMMEDIATE; - v = c_shader_vector_new(ts->shader, file, name, 0); - if (v == NULL) { - fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); - return -ENOMEM; - } - v->channel[0]->value = fd->u[0].Uint; - v->channel[1]->value = fd->u[1].Uint; - v->channel[2]->value = fd->u[2].Uint; - v->channel[3]->value = fd->u[3].Uint; - ts->v[TGSI_FILE_IMMEDIATE][0] = v; - return 0; -} - -static int ntransform_instruction(struct tgsi_shader *ts) -{ - struct tgsi_full_instruction *fi = &ts->parser.FullToken.FullInstruction; - struct c_shader *shader = ts->shader; - struct c_instruction instruction; - unsigned opcode; - int i, j, r; - - if (fi->Instruction.NumDstRegs > 1) { - fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - if (fi->Instruction.Saturate) { - fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - if (fi->Instruction.Predicate) { - fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - if (fi->Instruction.Label) { - fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - if (fi->Instruction.Texture) { - fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - for (i = 0; i < fi->Instruction.NumSrcRegs; i++) { - if (fi->Src[i].Register.Indirect || - fi->Src[i].Register.Dimension || - fi->Src[i].Register.Absolute) { - fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - } - for (i = 0; i < fi->Instruction.NumDstRegs; i++) { - if (fi->Dst[i].Register.Indirect || fi->Dst[i].Register.Dimension) { - fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - } - r = tgsi_opcode_to_c_opcode(fi->Instruction.Opcode, &opcode); - if (r) { - fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); - return r; - } - if (opcode == C_OPCODE_END) { - return c_node_cfg_link(ts->node, &shader->end); - } - /* FIXME add flow instruction handling */ - memset(&instruction, 0, sizeof(struct c_instruction)); - instruction.nop = 0; - for (j = 0; j < 4; j++) { - instruction.op[instruction.nop].opcode = opcode; - instruction.op[instruction.nop].ninput = fi->Instruction.NumSrcRegs; - for (i = 0; i < fi->Instruction.NumSrcRegs; i++) { - instruction.op[instruction.nop].input[i].vector = ts->v[fi->Src[i].Register.File][fi->Src[i].Register.Index]; - switch (j) { - case 0: - instruction.op[instruction.nop].input[i].swizzle = fi->Src[i].Register.SwizzleX; - break; - case 1: - instruction.op[instruction.nop].input[i].swizzle = fi->Src[i].Register.SwizzleY; - break; - case 2: - instruction.op[instruction.nop].input[i].swizzle = fi->Src[i].Register.SwizzleZ; - break; - case 3: - instruction.op[instruction.nop].input[i].swizzle = fi->Src[i].Register.SwizzleW; - break; - default: - return -EINVAL; - } - } - instruction.op[instruction.nop].output.vector = ts->v[fi->Dst[0].Register.File][fi->Dst[0].Register.Index]; - switch (j) { - case 0: - instruction.op[instruction.nop].output.swizzle = (fi->Dst[0].Register.WriteMask & 0x1) ? C_SWIZZLE_X : C_SWIZZLE_D; - break; - case 1: - instruction.op[instruction.nop].output.swizzle = (fi->Dst[0].Register.WriteMask & 0x1) ? C_SWIZZLE_Y : C_SWIZZLE_D; - break; - case 2: - instruction.op[instruction.nop].output.swizzle = (fi->Dst[0].Register.WriteMask & 0x1) ? C_SWIZZLE_Z : C_SWIZZLE_D; - break; - case 3: - instruction.op[instruction.nop].output.swizzle = (fi->Dst[0].Register.WriteMask & 0x1) ? C_SWIZZLE_W : C_SWIZZLE_D; - break; - default: - return -EINVAL; - } - instruction.nop++; - } - return c_node_add_new_instruction(ts->node, &instruction); -} - -int c_shader_from_tgsi(struct c_shader *shader, unsigned type, - const struct tgsi_token *tokens) -{ - struct tgsi_shader ts; - int r = 0; - - c_shader_init(shader, type); - r = tgsi_shader_init(&ts, tokens, shader); - if (r) - goto out_err; - ts.shader = shader; - ts.node = &shader->entry; - while (!tgsi_parse_end_of_tokens(&ts.parser)) { - tgsi_parse_token(&ts.parser); - switch (ts.parser.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_IMMEDIATE: - r = ntransform_immediate(&ts); - if (r) - goto out_err; - break; - case TGSI_TOKEN_TYPE_DECLARATION: - r = ntransform_declaration(&ts); - if (r) - goto out_err; - break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - r = ntransform_instruction(&ts); - if (r) - goto out_err; - break; - default: - r = -EINVAL; - goto out_err; - } - } - tgsi_shader_destroy(&ts); - return 0; -out_err: - c_shader_destroy(shader); - tgsi_shader_destroy(&ts); - return r; -} - -static unsigned tgsi_file_to_c_file(unsigned file) -{ - switch (file) { - case TGSI_FILE_CONSTANT: - return C_FILE_CONSTANT; - case TGSI_FILE_INPUT: - return C_FILE_INPUT; - case TGSI_FILE_OUTPUT: - return C_FILE_OUTPUT; - case TGSI_FILE_TEMPORARY: - return C_FILE_TEMPORARY; - case TGSI_FILE_SAMPLER: - return C_FILE_SAMPLER; - case TGSI_FILE_ADDRESS: - return C_FILE_ADDRESS; - case TGSI_FILE_IMMEDIATE: - return C_FILE_IMMEDIATE; - case TGSI_FILE_PREDICATE: - return C_FILE_PREDICATE; - case TGSI_FILE_SYSTEM_VALUE: - return C_FILE_SYSTEM_VALUE; - case TGSI_FILE_NULL: - return C_FILE_NULL; - default: - fprintf(stderr, "%s:%d unsupported file %d\n", __func__, __LINE__, file); - return C_FILE_NULL; - } -} - -static unsigned tgsi_sname_to_c_sname(unsigned sname) -{ - switch (sname) { - case TGSI_SEMANTIC_POSITION: - return C_SEMANTIC_POSITION; - case TGSI_SEMANTIC_COLOR: - return C_SEMANTIC_COLOR; - case TGSI_SEMANTIC_BCOLOR: - return C_SEMANTIC_BCOLOR; - case TGSI_SEMANTIC_FOG: - return C_SEMANTIC_FOG; - case TGSI_SEMANTIC_PSIZE: - return C_SEMANTIC_PSIZE; - case TGSI_SEMANTIC_GENERIC: - return C_SEMANTIC_GENERIC; - case TGSI_SEMANTIC_NORMAL: - return C_SEMANTIC_NORMAL; - case TGSI_SEMANTIC_FACE: - return C_SEMANTIC_FACE; - case TGSI_SEMANTIC_EDGEFLAG: - return C_SEMANTIC_EDGEFLAG; - case TGSI_SEMANTIC_PRIMID: - return C_SEMANTIC_PRIMID; - case TGSI_SEMANTIC_INSTANCEID: - return C_SEMANTIC_INSTANCEID; - default: - return C_SEMANTIC_GENERIC; - } -} - -static int tgsi_opcode_to_c_opcode(unsigned opcode, unsigned *copcode) -{ - switch (opcode) { - case TGSI_OPCODE_MOV: - *copcode = C_OPCODE_MOV; - return 0; - case TGSI_OPCODE_MUL: - *copcode = C_OPCODE_MUL; - return 0; - case TGSI_OPCODE_MAD: - *copcode = C_OPCODE_MAD; - return 0; - case TGSI_OPCODE_END: - *copcode = C_OPCODE_END; - return 0; - case TGSI_OPCODE_ARL: - *copcode = C_OPCODE_ARL; - return 0; - case TGSI_OPCODE_LIT: - *copcode = C_OPCODE_LIT; - return 0; - case TGSI_OPCODE_RCP: - *copcode = C_OPCODE_RCP; - return 0; - case TGSI_OPCODE_RSQ: - *copcode = C_OPCODE_RSQ; - return 0; - case TGSI_OPCODE_EXP: - *copcode = C_OPCODE_EXP; - return 0; - case TGSI_OPCODE_LOG: - *copcode = C_OPCODE_LOG; - return 0; - case TGSI_OPCODE_ADD: - *copcode = C_OPCODE_ADD; - return 0; - case TGSI_OPCODE_DP3: - *copcode = C_OPCODE_DP3; - return 0; - case TGSI_OPCODE_DP4: - *copcode = C_OPCODE_DP4; - return 0; - case TGSI_OPCODE_DST: - *copcode = C_OPCODE_DST; - return 0; - case TGSI_OPCODE_MIN: - *copcode = C_OPCODE_MIN; - return 0; - case TGSI_OPCODE_MAX: - *copcode = C_OPCODE_MAX; - return 0; - case TGSI_OPCODE_SLT: - *copcode = C_OPCODE_SLT; - return 0; - case TGSI_OPCODE_SGE: - *copcode = C_OPCODE_SGE; - return 0; - case TGSI_OPCODE_SUB: - *copcode = C_OPCODE_SUB; - return 0; - case TGSI_OPCODE_LRP: - *copcode = C_OPCODE_LRP; - return 0; - case TGSI_OPCODE_CND: - *copcode = C_OPCODE_CND; - return 0; - case TGSI_OPCODE_DP2A: - *copcode = C_OPCODE_DP2A; - return 0; - case TGSI_OPCODE_FRC: - *copcode = C_OPCODE_FRC; - return 0; - case TGSI_OPCODE_CLAMP: - *copcode = C_OPCODE_CLAMP; - return 0; - case TGSI_OPCODE_FLR: - *copcode = C_OPCODE_FLR; - return 0; - case TGSI_OPCODE_ROUND: - *copcode = C_OPCODE_ROUND; - return 0; - case TGSI_OPCODE_EX2: - *copcode = C_OPCODE_EX2; - return 0; - case TGSI_OPCODE_LG2: - *copcode = C_OPCODE_LG2; - return 0; - case TGSI_OPCODE_POW: - *copcode = C_OPCODE_POW; - return 0; - case TGSI_OPCODE_XPD: - *copcode = C_OPCODE_XPD; - return 0; - case TGSI_OPCODE_ABS: - *copcode = C_OPCODE_ABS; - return 0; - case TGSI_OPCODE_RCC: - *copcode = C_OPCODE_RCC; - return 0; - case TGSI_OPCODE_DPH: - *copcode = C_OPCODE_DPH; - return 0; - case TGSI_OPCODE_COS: - *copcode = C_OPCODE_COS; - return 0; - case TGSI_OPCODE_DDX: - *copcode = C_OPCODE_DDX; - return 0; - case TGSI_OPCODE_DDY: - *copcode = C_OPCODE_DDY; - return 0; - case TGSI_OPCODE_KILP: - *copcode = C_OPCODE_KILP; - return 0; - case TGSI_OPCODE_PK2H: - *copcode = C_OPCODE_PK2H; - return 0; - case TGSI_OPCODE_PK2US: - *copcode = C_OPCODE_PK2US; - return 0; - case TGSI_OPCODE_PK4B: - *copcode = C_OPCODE_PK4B; - return 0; - case TGSI_OPCODE_PK4UB: - *copcode = C_OPCODE_PK4UB; - return 0; - case TGSI_OPCODE_RFL: - *copcode = C_OPCODE_RFL; - return 0; - case TGSI_OPCODE_SEQ: - *copcode = C_OPCODE_SEQ; - return 0; - case TGSI_OPCODE_SFL: - *copcode = C_OPCODE_SFL; - return 0; - case TGSI_OPCODE_SGT: - *copcode = C_OPCODE_SGT; - return 0; - case TGSI_OPCODE_SIN: - *copcode = C_OPCODE_SIN; - return 0; - case TGSI_OPCODE_SLE: - *copcode = C_OPCODE_SLE; - return 0; - case TGSI_OPCODE_SNE: - *copcode = C_OPCODE_SNE; - return 0; - case TGSI_OPCODE_STR: - *copcode = C_OPCODE_STR; - return 0; - case TGSI_OPCODE_TEX: - *copcode = C_OPCODE_TEX; - return 0; - case TGSI_OPCODE_TXD: - *copcode = C_OPCODE_TXD; - return 0; - case TGSI_OPCODE_TXP: - *copcode = C_OPCODE_TXP; - return 0; - case TGSI_OPCODE_UP2H: - *copcode = C_OPCODE_UP2H; - return 0; - case TGSI_OPCODE_UP2US: - *copcode = C_OPCODE_UP2US; - return 0; - case TGSI_OPCODE_UP4B: - *copcode = C_OPCODE_UP4B; - return 0; - case TGSI_OPCODE_UP4UB: - *copcode = C_OPCODE_UP4UB; - return 0; - case TGSI_OPCODE_X2D: - *copcode = C_OPCODE_X2D; - return 0; - case TGSI_OPCODE_ARA: - *copcode = C_OPCODE_ARA; - return 0; - case TGSI_OPCODE_ARR: - *copcode = C_OPCODE_ARR; - return 0; - case TGSI_OPCODE_BRA: - *copcode = C_OPCODE_BRA; - return 0; - case TGSI_OPCODE_CAL: - *copcode = C_OPCODE_CAL; - return 0; - case TGSI_OPCODE_RET: - *copcode = C_OPCODE_RET; - return 0; - case TGSI_OPCODE_SSG: - *copcode = C_OPCODE_SSG; - return 0; - case TGSI_OPCODE_CMP: - *copcode = C_OPCODE_CMP; - return 0; - case TGSI_OPCODE_SCS: - *copcode = C_OPCODE_SCS; - return 0; - case TGSI_OPCODE_TXB: - *copcode = C_OPCODE_TXB; - return 0; - case TGSI_OPCODE_NRM: - *copcode = C_OPCODE_NRM; - return 0; - case TGSI_OPCODE_DIV: - *copcode = C_OPCODE_DIV; - return 0; - case TGSI_OPCODE_DP2: - *copcode = C_OPCODE_DP2; - return 0; - case TGSI_OPCODE_TXL: - *copcode = C_OPCODE_TXL; - return 0; - case TGSI_OPCODE_BRK: - *copcode = C_OPCODE_BRK; - return 0; - case TGSI_OPCODE_IF: - *copcode = C_OPCODE_IF; - return 0; - case TGSI_OPCODE_ELSE: - *copcode = C_OPCODE_ELSE; - return 0; - case TGSI_OPCODE_ENDIF: - *copcode = C_OPCODE_ENDIF; - return 0; - case TGSI_OPCODE_PUSHA: - *copcode = C_OPCODE_PUSHA; - return 0; - case TGSI_OPCODE_POPA: - *copcode = C_OPCODE_POPA; - return 0; - case TGSI_OPCODE_CEIL: - *copcode = C_OPCODE_CEIL; - return 0; - case TGSI_OPCODE_I2F: - *copcode = C_OPCODE_I2F; - return 0; - case TGSI_OPCODE_NOT: - *copcode = C_OPCODE_NOT; - return 0; - case TGSI_OPCODE_TRUNC: - *copcode = C_OPCODE_TRUNC; - return 0; - case TGSI_OPCODE_SHL: - *copcode = C_OPCODE_SHL; - return 0; - case TGSI_OPCODE_AND: - *copcode = C_OPCODE_AND; - return 0; - case TGSI_OPCODE_OR: - *copcode = C_OPCODE_OR; - return 0; - case TGSI_OPCODE_MOD: - *copcode = C_OPCODE_MOD; - return 0; - case TGSI_OPCODE_XOR: - *copcode = C_OPCODE_XOR; - return 0; - case TGSI_OPCODE_SAD: - *copcode = C_OPCODE_SAD; - return 0; - case TGSI_OPCODE_TXF: - *copcode = C_OPCODE_TXF; - return 0; - case TGSI_OPCODE_TXQ: - *copcode = C_OPCODE_TXQ; - return 0; - case TGSI_OPCODE_CONT: - *copcode = C_OPCODE_CONT; - return 0; - case TGSI_OPCODE_EMIT: - *copcode = C_OPCODE_EMIT; - return 0; - case TGSI_OPCODE_ENDPRIM: - *copcode = C_OPCODE_ENDPRIM; - return 0; - case TGSI_OPCODE_BGNLOOP: - *copcode = C_OPCODE_BGNLOOP; - return 0; - case TGSI_OPCODE_BGNSUB: - *copcode = C_OPCODE_BGNSUB; - return 0; - case TGSI_OPCODE_ENDLOOP: - *copcode = C_OPCODE_ENDLOOP; - return 0; - case TGSI_OPCODE_ENDSUB: - *copcode = C_OPCODE_ENDSUB; - return 0; - case TGSI_OPCODE_NOP: - *copcode = C_OPCODE_NOP; - return 0; - case TGSI_OPCODE_NRM4: - *copcode = C_OPCODE_NRM4; - return 0; - case TGSI_OPCODE_CALLNZ: - *copcode = C_OPCODE_CALLNZ; - return 0; - case TGSI_OPCODE_IFC: - *copcode = C_OPCODE_IFC; - return 0; - case TGSI_OPCODE_BREAKC: - *copcode = C_OPCODE_BREAKC; - return 0; - case TGSI_OPCODE_KIL: - *copcode = C_OPCODE_KIL; - return 0; - case TGSI_OPCODE_F2I: - *copcode = C_OPCODE_F2I; - return 0; - case TGSI_OPCODE_IDIV: - *copcode = C_OPCODE_IDIV; - return 0; - case TGSI_OPCODE_IMAX: - *copcode = C_OPCODE_IMAX; - return 0; - case TGSI_OPCODE_IMIN: - *copcode = C_OPCODE_IMIN; - return 0; - case TGSI_OPCODE_INEG: - *copcode = C_OPCODE_INEG; - return 0; - case TGSI_OPCODE_ISGE: - *copcode = C_OPCODE_ISGE; - return 0; - case TGSI_OPCODE_ISHR: - *copcode = C_OPCODE_ISHR; - return 0; - case TGSI_OPCODE_ISLT: - *copcode = C_OPCODE_ISLT; - return 0; - case TGSI_OPCODE_F2U: - *copcode = C_OPCODE_F2U; - return 0; - case TGSI_OPCODE_U2F: - *copcode = C_OPCODE_U2F; - return 0; - case TGSI_OPCODE_UADD: - *copcode = C_OPCODE_UADD; - return 0; - case TGSI_OPCODE_UDIV: - *copcode = C_OPCODE_UDIV; - return 0; - case TGSI_OPCODE_UMAD: - *copcode = C_OPCODE_UMAD; - return 0; - case TGSI_OPCODE_UMAX: - *copcode = C_OPCODE_UMAX; - return 0; - case TGSI_OPCODE_UMIN: - *copcode = C_OPCODE_UMIN; - return 0; - case TGSI_OPCODE_UMOD: - *copcode = C_OPCODE_UMOD; - return 0; - case TGSI_OPCODE_UMUL: - *copcode = C_OPCODE_UMUL; - return 0; - case TGSI_OPCODE_USEQ: - *copcode = C_OPCODE_USEQ; - return 0; - case TGSI_OPCODE_USGE: - *copcode = C_OPCODE_USGE; - return 0; - case TGSI_OPCODE_USHR: - *copcode = C_OPCODE_USHR; - return 0; - case TGSI_OPCODE_USLT: - *copcode = C_OPCODE_USLT; - return 0; - case TGSI_OPCODE_USNE: - *copcode = C_OPCODE_USNE; - return 0; - case TGSI_OPCODE_SWITCH: - *copcode = C_OPCODE_SWITCH; - return 0; - case TGSI_OPCODE_CASE: - *copcode = C_OPCODE_CASE; - return 0; - case TGSI_OPCODE_DEFAULT: - *copcode = C_OPCODE_DEFAULT; - return 0; - case TGSI_OPCODE_ENDSWITCH: - *copcode = C_OPCODE_ENDSWITCH; - return 0; - default: - fprintf(stderr, "%s:%d unsupported opcode %d\n", __func__, __LINE__, opcode); - return -EINVAL; - } -} diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index 0a7efe3bfb..05575b5767 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -32,6 +32,7 @@ #include "r600_resource.h" #include "r600_screen.h" #include "r600_context.h" +#include "r600d.h" static void r600_destroy_context(struct pipe_context *context) { @@ -62,6 +63,245 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, dc++; } +static void r600_init_config(struct r600_context *rctx) +{ + int ps_prio; + int vs_prio; + int gs_prio; + int es_prio; + int num_ps_gprs; + int num_vs_gprs; + int num_gs_gprs; + int num_es_gprs; + int num_temp_gprs; + int num_ps_threads; + int num_vs_threads; + int num_gs_threads; + int num_es_threads; + int num_ps_stack_entries; + int num_vs_stack_entries; + int num_gs_stack_entries; + int num_es_stack_entries; + enum radeon_family family; + + family = radeon_get_family(rctx->rw); + ps_prio = 0; + vs_prio = 1; + gs_prio = 2; + es_prio = 3; + switch (family) { + case CHIP_R600: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV630: + case CHIP_RV635: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 40; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV610: + case CHIP_RV620: + case CHIP_RS780: + case CHIP_RS880: + default: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV670: + num_ps_gprs = 144; + num_vs_gprs = 40; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV770: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 256; + num_vs_stack_entries = 256; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV730: + case CHIP_RV740: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV710: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 48; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + } + printf("ps_prio : %d\n", ps_prio); + printf("vs_prio : %d\n", vs_prio); + printf("gs_prio : %d\n", gs_prio); + printf("es_prio : %d\n", es_prio); + printf("num_ps_gprs : %d\n", num_ps_gprs); + printf("num_vs_gprs : %d\n", num_vs_gprs); + printf("num_gs_gprs : %d\n", num_gs_gprs); + printf("num_es_gprs : %d\n", num_es_gprs); + printf("num_temp_gprs : %d\n", num_temp_gprs); + printf("num_ps_threads : %d\n", num_ps_threads); + printf("num_vs_threads : %d\n", num_vs_threads); + printf("num_gs_threads : %d\n", num_gs_threads); + printf("num_es_threads : %d\n", num_es_threads); + printf("num_ps_stack_entries : %d\n", num_ps_stack_entries); + printf("num_vs_stack_entries : %d\n", num_vs_stack_entries); + printf("num_gs_stack_entries : %d\n", num_gs_stack_entries); + printf("num_es_stack_entries : %d\n", num_es_stack_entries); + + rctx->config = radeon_state(rctx->rw, R600_CONFIG_TYPE, R600_CONFIG); + + rctx->config->states[R600_CONFIG__SQ_CONFIG] = 0x00000000; + switch (family) { + case CHIP_RV610: + case CHIP_RV620: + case CHIP_RS780: + case CHIP_RS880: + case CHIP_RV710: + break; + default: + rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1); + break; + } + rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_DX9_CONSTS(1); + rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ALU_INST_PREFER_VECTOR(1); + rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio); + rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio); + rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio); + rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio); + + rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0; + rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs); + rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs); + rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); + + rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0; + rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs); + rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_es_gprs); + + rctx->config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0; + rctx->config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_PS_THREADS(num_ps_threads); + rctx->config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_VS_THREADS(num_vs_threads); + rctx->config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_GS_THREADS(num_gs_threads); + rctx->config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_ES_THREADS(num_es_threads); + + rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0; + rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); + rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); + + rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0; + rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); + rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); + + rctx->config->states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000; + rctx->config->states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002; + rctx->config->states[R600_CONFIG__VC_ENHANCE] = 0x00000000; + rctx->config->states[R600_CONFIG__DB_DEBUG] = 0x00000000; + rctx->config->states[R600_CONFIG__DB_WATERMARKS] = 0x00420204; + rctx->config->states[R600_CONFIG__SX_MISC] = 0x00000000; + rctx->config->states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001; + rctx->config->states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003; + rctx->config->states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000; + rctx->config->states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000; + rctx->config->states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000; + rctx->config->states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->config->states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->config->states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->config->states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000; + rctx->config->states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000; + rctx->config->states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_GS_MODE] = 0x00000000; + rctx->config->states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000; + rctx->config->states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001; + rctx->config->states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000; + rctx->config->states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000; + radeon_state_pm4(rctx->config); +} + struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) { struct r600_context *rctx = CALLOC_STRUCT(r600_context); @@ -107,49 +347,7 @@ struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) rctx->cb_cntl->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF; radeon_state_pm4(rctx->cb_cntl); - rctx->config = radeon_state(rscreen->rw, R600_CONFIG_TYPE, R600_CONFIG); - rctx->config->states[R600_CONFIG__SQ_CONFIG] = 0xE400000C; - rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0x403800C0; - rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0x00003090; - rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0x00800080; - rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000; - rctx->config->states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002; - rctx->config->states[R600_CONFIG__VC_ENHANCE] = 0x00000000; - rctx->config->states[R600_CONFIG__DB_DEBUG] = 0x00000000; - rctx->config->states[R600_CONFIG__DB_WATERMARKS] = 0x00420204; - rctx->config->states[R600_CONFIG__SX_MISC] = 0x00000000; - rctx->config->states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001; - rctx->config->states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003; - rctx->config->states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_GS_MODE] = 0x00000000; - rctx->config->states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000; - rctx->config->states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001; - rctx->config->states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000; - radeon_state_pm4(rctx->config); + r600_init_config(rctx); rctx->ctx = radeon_ctx(rscreen->rw); rctx->draw = radeon_draw(rscreen->rw); diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index f27ff58ed4..669aaec0b2 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -40,7 +40,6 @@ struct r600_vertex_elements_state }; struct r600_pipe_shader { - unsigned type; struct r600_shader shader; struct radeon_bo *bo; struct radeon_state *state; @@ -92,8 +91,10 @@ struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *rpshader); struct r600_pipe_shader *r600_pipe_shader_create(struct pipe_context *ctx, - unsigned type, const struct tgsi_token *tokens); int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rpshader); +#define R600_ERR(fmt, args...) \ + fprintf(stderr, "EE %s/%s:%d - "fmt, __FILE__, __func__, __LINE__, ##args) + #endif diff --git a/src/gallium/drivers/r600/r600_helper.c b/src/gallium/drivers/r600/r600_helper.c index e3175b627a..7241ab1c17 100644 --- a/src/gallium/drivers/r600/r600_helper.c +++ b/src/gallium/drivers/r600/r600_helper.c @@ -27,6 +27,7 @@ #include <errno.h> #include <util/u_inlines.h> #include "r600_screen.h" +#include "r600_context.h" #include "r600d.h" int r600_conv_pipe_format(unsigned pformat, unsigned *format) @@ -49,6 +50,12 @@ int r600_conv_pipe_format(unsigned pformat, unsigned *format) case PIPE_FORMAT_R8G8B8A8_SSCALED: *format = V_0280A0_COLOR_8_8_8_8; return 0; + case PIPE_FORMAT_R32_FLOAT: + *format = V_0280A0_COLOR_32_FLOAT; + return 0; + case PIPE_FORMAT_R32G32_FLOAT: + *format = V_0280A0_COLOR_32_32_FLOAT; + return 0; case PIPE_FORMAT_L8_UNORM: case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: @@ -60,8 +67,6 @@ int r600_conv_pipe_format(unsigned pformat, unsigned *format) case PIPE_FORMAT_R64G64_FLOAT: case PIPE_FORMAT_R64G64B64_FLOAT: case PIPE_FORMAT_R64G64B64A64_FLOAT: - case PIPE_FORMAT_R32_FLOAT: - case PIPE_FORMAT_R32G32_FLOAT: case PIPE_FORMAT_R32_UNORM: case PIPE_FORMAT_R32G32_UNORM: case PIPE_FORMAT_R32G32B32_UNORM: @@ -111,7 +116,7 @@ int r600_conv_pipe_format(unsigned pformat, unsigned *format) case PIPE_FORMAT_R32G32B32_FIXED: case PIPE_FORMAT_R32G32B32A32_FIXED: default: - fprintf(stderr, "%s:%d unsupported %d\n", __func__, __LINE__, pformat); + R600_ERR("unsupported %d\n", pformat); return -EINVAL; } } diff --git a/src/gallium/drivers/r600/r600_public.h b/src/gallium/drivers/r600/r600_public.h new file mode 100644 index 0000000000..1d89c9f9f6 --- /dev/null +++ b/src/gallium/drivers/r600/r600_public.h @@ -0,0 +1,9 @@ + +#ifndef R600_PUBLIC_H +#define R600_PUBLIC_H + +struct radeon; + +struct pipe_screen* r600_screen_create(struct radeon *rw); + +#endif diff --git a/src/gallium/drivers/r600/r600_screen.c b/src/gallium/drivers/r600/r600_screen.c index 1d83383fd9..dec6fa8d27 100644 --- a/src/gallium/drivers/r600/r600_screen.c +++ b/src/gallium/drivers/r600/r600_screen.c @@ -31,6 +31,7 @@ #include "r600_screen.h" #include "r600_texture.h" #include "r600_context.h" +#include "r600_public.h" #include <stdio.h> static const char* r600_get_vendor(struct pipe_screen* pscreen) @@ -40,7 +41,13 @@ static const char* r600_get_vendor(struct pipe_screen* pscreen) static const char* r600_get_name(struct pipe_screen* pscreen) { - return "R600/R700 (HD2XXX,HD3XXX,HD4XXX)"; + struct r600_screen *screen = r600_screen(pscreen); + enum radeon_family family = radeon_get_family(screen->rw); + + if (family >= CHIP_R600 && family < CHIP_RV770) + return "R600 (HD2XXX,HD3XXX)"; + else + return "R700 (HD4XXX)"; } static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) @@ -240,7 +247,7 @@ static void r600_destroy_screen(struct pipe_screen* pscreen) FREE(rscreen); } -struct pipe_screen *radeon_create_screen(struct radeon *rw) +struct pipe_screen *r600_screen_create(struct radeon *rw) { struct r600_screen* rscreen; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 6b29d33379..e5e6786fd0 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -19,40 +19,113 @@ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse */ -#include <stdio.h> -#include <errno.h> -#include <util/u_inlines.h> -#include <util/u_format.h> -#include <util/u_memory.h> -#include <tgsi/tgsi_dump.h> +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_scan.h" +#include "tgsi/tgsi_dump.h" +#include "util/u_format.h" #include "r600_screen.h" #include "r600_context.h" +#include "r600_shader.h" +#include "r600_asm.h" +#include "r600_sq.h" #include "r600d.h" +#include <stdio.h> +#include <errno.h> + +static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); + +static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader) +{ + struct r600_context *rctx = r600_context(ctx); + const struct util_format_description *desc; + enum pipe_format resource_format[160]; + unsigned i, nresources = 0; + struct r600_bc *bc = &shader->bc; + struct r600_bc_cf *cf; + struct r600_bc_vtx *vtx; + + if (shader->processor_type != TGSI_PROCESSOR_VERTEX) + return 0; + for (i = 0; i < rctx->vertex_elements->count; i++) { + resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format; + } + LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { + switch (cf->inst) { + case V_SQ_CF_WORD1_SQ_CF_INST_VTX: + case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: + LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { + desc = util_format_description(resource_format[vtx->buffer_id]); + if (desc == NULL) { + R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]); + return -EINVAL; + } + vtx->dst_sel_x = desc->swizzle[0]; + vtx->dst_sel_y = desc->swizzle[1]; + vtx->dst_sel_z = desc->swizzle[2]; + vtx->dst_sel_w = desc->swizzle[3]; + } + break; + default: + break; + } + } + return r600_bc_build(&shader->bc); +} + +struct r600_pipe_shader *r600_pipe_shader_create(struct pipe_context *ctx, + const struct tgsi_token *tokens) +{ + struct r600_screen *rscreen = r600_screen(ctx->screen); + struct r600_pipe_shader *rpshader = CALLOC_STRUCT(r600_pipe_shader); + int r; + +fprintf(stderr, "--------------------------------------------------------------\n"); +tgsi_dump(tokens, 0); + if (rpshader == NULL) + return NULL; + rpshader->shader.family = radeon_get_family(rscreen->rw); + r = r600_shader_from_tgsi(tokens, &rpshader->shader); + if (r) { + R600_ERR("translation from TGSI failed !\n"); + goto out_err; + } + r = r600_bc_build(&rpshader->shader.bc); + if (r) { + R600_ERR("building bytecode failed !\n"); + goto out_err; + } +fprintf(stderr, "______________________________________________________________\n"); + return rpshader; +out_err: + free(rpshader); + return NULL; +} static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *rpshader) { struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_shader *rshader = &rpshader->shader; struct radeon_state *state; - unsigned i, tmp; + unsigned i, j, tmp; rpshader->state = radeon_state_decref(rpshader->state); state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER); if (state == NULL) return -ENOMEM; - for (i = 0; i < rshader->noutput; i += 4) { - tmp = rshader->output[i].sid; - tmp |= rshader->output[i + 1].sid << 8; - tmp |= rshader->output[i + 2].sid << 16; - tmp |= rshader->output[i + 3].sid << 24; - state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] = tmp; - } - state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 1); - state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->ngpr); + for (i = 0; i < 10; i++) { + state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0; + } + for (i = 0, j = 0; i < rshader->noutput; i++) { + if (rshader->output[i].name != TGSI_SEMANTIC_POSITION) { + tmp = rshader->output[i].sid << ((j & 3) * 8); + state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + j / 4] |= tmp; + j++; + } + } + state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); + state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr); rpshader->state = state; rpshader->state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); rpshader->state->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo); @@ -81,7 +154,7 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) | S_0286CC_PERSP_GRADIENT_ENA(1); state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; - state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->ngpr); + state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr); state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = 0x00000002; rpshader->state = state; rpshader->state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); @@ -100,21 +173,21 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *r /* copy new shader */ radeon_bo_decref(rscreen->rw, rpshader->bo); rpshader->bo = NULL; - rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->ndw * 4, + rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4, 4096, NULL); if (rpshader->bo == NULL) { return -ENOMEM; } radeon_bo_map(rscreen->rw, rpshader->bo); - memcpy(rpshader->bo->data, rshader->bcode, rshader->ndw * 4); + memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4); radeon_bo_unmap(rscreen->rw, rpshader->bo); /* build state */ rshader->flat_shade = rctx->flat_shade; - switch (rpshader->type) { - case C_PROGRAM_TYPE_VS: + switch (rshader->processor_type) { + case TGSI_PROCESSOR_VERTEX: r = r600_pipe_shader_vs(ctx, rpshader); break; - case C_PROGRAM_TYPE_FS: + case TGSI_PROCESSOR_FRAGMENT: r = r600_pipe_shader_ps(ctx, rpshader); break; default: @@ -124,100 +197,813 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *r return r; } -struct r600_pipe_shader *r600_pipe_shader_create(struct pipe_context *ctx, unsigned type, const struct tgsi_token *tokens) +int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rpshader) { - struct r600_pipe_shader *rpshader = CALLOC_STRUCT(r600_pipe_shader); - struct r600_shader *rshader = &rpshader->shader; + struct r600_context *rctx = r600_context(ctx); int r; if (rpshader == NULL) - return NULL; - rpshader->type = type; - c_list_init(&rshader->nodes); - fprintf(stderr, "<<\n"); - tgsi_dump(tokens, 0); - fprintf(stderr, "--------------------------------------------------------------\n"); - r = c_shader_from_tgsi(&rshader->cshader, type, tokens); - if (r) { - r600_pipe_shader_destroy(ctx, rpshader); - fprintf(stderr, "ERROR(%s %d)>>\n\n", __func__, __LINE__); - return NULL; + return -EINVAL; + /* there should be enough input */ + if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) { + R600_ERR("%d resources provided, expecting %d\n", + rctx->vertex_elements->count, rpshader->shader.bc.nresource); + return -EINVAL; } - r = r600_shader_insert_fetch(&rshader->cshader); - if (r) { - r600_pipe_shader_destroy(ctx, rpshader); - fprintf(stderr, "ERROR(%s %d)>>\n\n", __func__, __LINE__); - return NULL; + r = r600_shader_update(ctx, &rpshader->shader); + if (r) + return r; + return r600_pipe_shader(ctx, rpshader); +} + +struct r600_shader_tgsi_instruction; + +struct r600_shader_ctx { + struct tgsi_shader_info info; + struct tgsi_parse_context parse; + const struct tgsi_token *tokens; + unsigned type; + unsigned file_offset[TGSI_FILE_COUNT]; + unsigned temp_reg; + struct r600_shader_tgsi_instruction *inst_info; + struct r600_bc *bc; + struct r600_shader *shader; +}; + +struct r600_shader_tgsi_instruction { + unsigned tgsi_opcode; + unsigned is_op3; + unsigned r600_opcode; + int (*process)(struct r600_shader_ctx *ctx); +}; + +static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]; + +static int tgsi_is_supported(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; + int j; + + if (i->Instruction.NumDstRegs > 1) { + R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); + return -EINVAL; } - r = c_shader_build_dominator_tree(&rshader->cshader); - if (r) { - r600_pipe_shader_destroy(ctx, rpshader); - fprintf(stderr, "ERROR(%s %d)>>\n\n", __func__, __LINE__); - return NULL; + if (i->Instruction.Saturate) { + R600_ERR("staturate unsupported\n"); + return -EINVAL; } - c_shader_dump(&rshader->cshader); - r = r600_cshader_legalize(&rshader->cshader); - if (r) { - r600_pipe_shader_destroy(ctx, rpshader); - fprintf(stderr, "ERROR(%s %d)>>\n\n", __func__, __LINE__); - return NULL; + if (i->Instruction.Predicate) { + R600_ERR("predicate unsupported\n"); + return -EINVAL; } - r = r700_shader_translate(rshader); - if (r) { - r600_pipe_shader_destroy(ctx, rpshader); - fprintf(stderr, "ERROR(%s %d)>>\n\n", __func__, __LINE__); - return NULL; + if (i->Instruction.Label) { + R600_ERR("label unsupported\n"); + return -EINVAL; } -#if 1 -#if 0 - fprintf(stderr, "--------------------------------------------------------------\n"); - for (int i = 0; i < rshader->ndw; i++) { - fprintf(stderr, "0x%08X\n", rshader->bcode[i]); + for (j = 0; j < i->Instruction.NumSrcRegs; j++) { + if (i->Src[j].Register.Indirect || + i->Src[j].Register.Dimension || + i->Src[j].Register.Absolute) { + R600_ERR("unsupported src (indirect|dimension|absolute)\n"); + return -EINVAL; + } } -#endif - fprintf(stderr, ">>\n\n"); -#endif - return rpshader; + for (j = 0; j < i->Instruction.NumDstRegs; j++) { + if (i->Dst[j].Register.Indirect || i->Dst[j].Register.Dimension) { + R600_ERR("unsupported dst (indirect|dimension)\n"); + return -EINVAL; + } + } + return 0; } -void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *rpshader) +static int tgsi_declaration(struct r600_shader_ctx *ctx) { - struct r600_screen *rscreen = r600_screen(ctx->screen); + struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; + struct r600_bc_vtx vtx; + unsigned i; + int r; - if (rpshader == NULL) - return; - radeon_bo_decref(rscreen->rw, rpshader->bo); - rpshader->bo = NULL; - r600_shader_cleanup(&rpshader->shader); - FREE(rpshader); + switch (d->Declaration.File) { + case TGSI_FILE_INPUT: + i = ctx->shader->ninput++; + ctx->shader->input[i].name = d->Semantic.Name; + ctx->shader->input[i].sid = d->Semantic.Index; + ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; + if (ctx->type == TGSI_PROCESSOR_VERTEX) { + /* turn input into fetch */ + memset(&vtx, 0, sizeof(struct r600_bc_vtx)); + vtx.inst = 0; + vtx.fetch_type = 0; + vtx.buffer_id = i; + /* register containing the index into the buffer */ + vtx.src_gpr = 0; + vtx.src_sel_x = 0; + vtx.mega_fetch_count = 0x1F; + vtx.dst_gpr = ctx->shader->input[i].gpr; + vtx.dst_sel_x = 0; + vtx.dst_sel_y = 1; + vtx.dst_sel_z = 2; + vtx.dst_sel_w = 3; + r = r600_bc_add_vtx(ctx->bc, &vtx); + if (r) + return r; + } + break; + case TGSI_FILE_OUTPUT: + i = ctx->shader->noutput++; + ctx->shader->output[i].name = d->Semantic.Name; + ctx->shader->output[i].sid = d->Semantic.Index; + ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; + break; + case TGSI_FILE_CONSTANT: + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_SAMPLER: + break; + default: + R600_ERR("unsupported file %d declaration\n", d->Declaration.File); + return -EINVAL; + } + return 0; } -int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rpshader) +int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) { - struct r600_context *rctx = r600_context(ctx); - struct r600_shader *rshader; - enum pipe_format resource_format[160]; - unsigned i, nresources = 0; - int r; + struct tgsi_full_immediate *immediate; + struct r600_shader_ctx ctx; + struct r600_bc_output output; + unsigned opcode; + int i, r = 0, pos0; + u32 value[4]; - if (rpshader == NULL) - return -EINVAL; - rshader = &rpshader->shader; - switch (rpshader->type) { - case C_PROGRAM_TYPE_VS: - for (i = 0; i < rctx->vertex_elements->count; i++) { - resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format; + ctx.bc = &shader->bc; + ctx.shader = shader; + r = r600_bc_init(ctx.bc, shader->family); + if (r) + return r; + ctx.tokens = tokens; + tgsi_scan_shader(tokens, &ctx.info); + tgsi_parse_init(&ctx.parse, tokens); + ctx.type = ctx.parse.FullHeader.Processor.Processor; + shader->processor_type = ctx.type; + + /* register allocations */ + /* Values [0,127] correspond to GPR[0..127]. + * Values [256,511] correspond to cfile constants c[0..255]. + * Other special values are shown in the list below. + * 248 SQ_ALU_SRC_0: special constant 0.0. + * 249 SQ_ALU_SRC_1: special constant 1.0 float. + * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. + * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. + * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. + * 253 SQ_ALU_SRC_LITERAL: literal constant. + * 254 SQ_ALU_SRC_PV: previous vector result. + * 255 SQ_ALU_SRC_PS: previous scalar result. + */ + for (i = 0; i < TGSI_FILE_COUNT; i++) { + ctx.file_offset[i] = 0; + } + if (ctx.type == TGSI_PROCESSOR_VERTEX) { + ctx.file_offset[TGSI_FILE_INPUT] = 1; + } + ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + + ctx.info.file_count[TGSI_FILE_INPUT]; + ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + + ctx.info.file_count[TGSI_FILE_OUTPUT]; + ctx.file_offset[TGSI_FILE_CONSTANT] = 256; + ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253; + ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + + ctx.info.file_count[TGSI_FILE_TEMPORARY]; + + while (!tgsi_parse_end_of_tokens(&ctx.parse)) { + tgsi_parse_token(&ctx.parse); + switch (ctx.parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + immediate = &ctx.parse.FullToken.FullImmediate; + value[0] = immediate->u[0].Uint; + value[1] = immediate->u[1].Uint; + value[2] = immediate->u[2].Uint; + value[3] = immediate->u[3].Uint; + break; + case TGSI_TOKEN_TYPE_DECLARATION: + r = tgsi_declaration(&ctx); + if (r) + goto out_err; + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + r = tgsi_is_supported(&ctx); + if (r) + goto out_err; + opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; + ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; + r = ctx.inst_info->process(&ctx); + if (r) + goto out_err; + r = r600_bc_add_literal(ctx.bc, value); + if (r) + goto out_err; + break; + default: + R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); + r = -EINVAL; + goto out_err; + } + } + /* export output */ + for (i = 0, pos0 = 0; i < shader->noutput; i++) { + memset(&output, 0, sizeof(struct r600_bc_output)); + output.gpr = shader->output[i].gpr; + output.elem_size = 3; + output.swizzle_x = 0; + output.swizzle_y = 1; + output.swizzle_z = 2; + output.swizzle_w = 3; + output.barrier = 1; + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; + output.array_base = i - pos0; + output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE; + switch (ctx.type == TGSI_PROCESSOR_VERTEX) { + case TGSI_PROCESSOR_VERTEX: + if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { + output.array_base = 60; + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + /* position doesn't count in array_base */ + pos0 = 1; + } + break; + case TGSI_PROCESSOR_FRAGMENT: + if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { + output.array_base = 0; + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + } else { + R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); + r = -EINVAL; + goto out_err; + } + break; + default: + R600_ERR("unsupported processor type %d\n", ctx.type); + r = -EINVAL; + goto out_err; } + if (i == (shader->noutput - 1)) { + output.end_of_program = 1; + } + r = r600_bc_add_output(ctx.bc, &output); + if (r) + goto out_err; + } + tgsi_parse_free(&ctx.parse); + return 0; +out_err: + tgsi_parse_free(&ctx.parse); + return r; +} + +static int tgsi_unsupported(struct r600_shader_ctx *ctx) +{ + R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode); + return -EINVAL; +} + +static int tgsi_end(struct r600_shader_ctx *ctx) +{ + return 0; +} + +static int tgsi_src(struct r600_shader_ctx *ctx, + const struct tgsi_full_src_register *tgsi_src, + unsigned swizzle, + struct r600_bc_alu_src *r600_src) +{ + r600_src->sel = tgsi_src->Register.Index; + if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { + r600_src->sel = 0; + } + r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; + switch (swizzle) { + case 0: + r600_src->chan = tgsi_src->Register.SwizzleX; break; - default: + case 1: + r600_src->chan = tgsi_src->Register.SwizzleY; break; - } - /* there should be enough input */ - if (nresources < rshader->nresource) + case 2: + r600_src->chan = tgsi_src->Register.SwizzleZ; + break; + case 3: + r600_src->chan = tgsi_src->Register.SwizzleW; + break; + default: return -EINVAL; - /* FIXME compare resources */ - r = r600_shader_update(rshader, resource_format); - if (r) + } + return 0; +} + +static int tgsi_dst(struct r600_shader_ctx *ctx, + const struct tgsi_full_dst_register *tgsi_dst, + unsigned swizzle, + struct r600_bc_alu_dst *r600_dst) +{ + r600_dst->sel = tgsi_dst->Register.Index; + r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; + r600_dst->chan = swizzle; + r600_dst->write = 1; + return 0; +} + +static int tgsi_op2(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, r; + + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; + } else { + alu.inst = ctx->inst_info->r600_opcode; + for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { + r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]); + if (r) + return r; + } + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + } + /* handle some special cases */ + switch (ctx->inst_info->tgsi_opcode) { + case TGSI_OPCODE_SUB: + alu.src[1].neg = 1; + break; + default: + break; + } + if (i == 3) { + alu.last = 1; + } + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + +static int tgsi_slt(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, r; + + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; + } else { + alu.inst = ctx->inst_info->r600_opcode; + r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]); + if (r) + return r; + r = tgsi_src(ctx, &inst->Src[1], i, &alu.src[0]); + if (r) + return r; + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + } + if (i == 3) { + alu.last = 1; + } + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + +static int tgsi_lit(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + + int r; + + + if (inst->Dst[0].Register.WriteMask & (1 << 0)) + { + /* dst.x, <- 1.0 */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.src[0].sel = 249; /*1.0*/ + alu.src[0].chan = 0; + r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); + if (r) + return r; + if ((inst->Dst[0].Register.WriteMask & 0xe) == 0) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + + if (inst->Dst[0].Register.WriteMask & (1 << 1)) + { + /* dst.y = max(src.x, 0.0) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX; + r = tgsi_src(ctx, &inst->Src[0], 0, &alu.src[0]); + if (r) + return r; + alu.src[1].sel = 248; /*0.0*/ + alu.src[1].chan = 0; + r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); + if (r) + return r; + if ((inst->Dst[0].Register.WriteMask & 0xa) == 0) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + if (inst->Dst[0].Register.WriteMask & (1 << 3)) + { + /* dst.w, <- 1.0 */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.src[0].sel = 249; + alu.src[0].chan = 0; + r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); + if (r) + return r; + if ((inst->Dst[0].Register.WriteMask & 0x4) == 0) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + if (inst->Dst[0].Register.WriteMask & (1 << 2)) + { + /* dst.z = log(src.y) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED; + r = tgsi_src(ctx, &inst->Src[0], 1, &alu.src[0]); + if (r) + return r; + r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); + if (r) + return r; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + int chan = alu.dst.chan; + int sel = alu.dst.sel; + + /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT; + r = tgsi_src(ctx, &inst->Src[0], 3, &alu.src[0]); + if (r) return r; - return r600_pipe_shader(ctx, rpshader); + alu.src[1].sel = sel; + alu.src[1].chan = chan; + r = tgsi_src(ctx, &inst->Src[0], 0, &alu.src[2]); + if (r) + return r; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 0; + alu.dst.write = 1; + alu.is_op3 = 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + /* dst.z = exp(tmp.x) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); + if (r) + return r; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + +static int tgsi_trans(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, r; + + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + if (inst->Dst[0].Register.WriteMask & (1 << i)) { + alu.inst = ctx->inst_info->r600_opcode; + for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { + r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]); + if (r) + return r; + } + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } + return 0; } + +static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) +{ + struct r600_bc_alu alu; + int i, r; + + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; + } else { + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = i; + } + if (i == 3) { + alu.last = 1; + } + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + +static int tgsi_op3(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, r; + + /* do it in 2 step as op3 doesn't support writemask */ + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = ctx->inst_info->r600_opcode; + for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { + r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]); + if (r) + return r; + } + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + alu.dst.write = 1; + alu.is_op3 = 1; + if (i == 3) { + alu.last = 1; + } + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return tgsi_helper_copy(ctx, inst); +} + +static int tgsi_dp(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, r; + + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = ctx->inst_info->r600_opcode; + for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { + r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]); + if (r) + return r; + } + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + alu.dst.write = 1; + /* handle some special cases */ + switch (ctx->inst_info->tgsi_opcode) { + case TGSI_OPCODE_DP2: + if (i > 1) { + alu.src[0].sel = alu.src[1].sel = 248; + alu.src[0].chan = alu.src[1].chan = 0; + } + break; + case TGSI_OPCODE_DP3: + if (i > 2) { + alu.src[0].sel = alu.src[1].sel = 248; + alu.src[0].chan = alu.src[1].chan = 0; + } + break; + default: + break; + } + if (i == 3) { + alu.last = 1; + } + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return tgsi_helper_copy(ctx, inst); +} + +static int tgsi_tex(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_tex tex; + + memset(&tex, 0, sizeof(struct r600_bc_tex)); + tex.inst = ctx->inst_info->r600_opcode; + tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; + tex.sampler_id = tex.resource_id; + tex.src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; + tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Src[0].Register.Index; + tex.dst_sel_x = 0; + tex.dst_sel_y = 1; + tex.dst_sel_z = 2; + tex.dst_sel_w = 3; + tex.src_sel_x = 0; + tex.src_sel_y = 1; + tex.src_sel_z = 2; + tex.src_sel_w = 3; + return r600_bc_add_tex(ctx->bc, &tex); +} + +static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { + {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, + {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, + {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans}, + {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, + {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, + {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, + {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, + {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, + {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_slt}, + {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, + {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, + {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DDX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DDY, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* predicated kill */ + {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TEX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXP, 0, 0x10, tgsi_tex}, + {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* SGN */ + {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, + {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BRK, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CONT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* conditional kill */ + {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ + /* gap */ + {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, +}; diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 7d30ca79d1..23b6a83b9a 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -23,241 +23,23 @@ #ifndef R600_SHADER_H #define R600_SHADER_H -#include "r600_compiler.h" -#include "radeon.h" - -struct r600_shader_operand { - struct c_vector *vector; - unsigned sel; - unsigned chan; - unsigned neg; - unsigned abs; -}; - -struct r600_shader_vfetch { - struct r600_shader_vfetch *next; - struct r600_shader_vfetch *prev; - unsigned cf_addr; - struct r600_shader_operand src[2]; - struct r600_shader_operand dst[4]; -}; - -struct r600_shader_inst { - unsigned is_op3; - unsigned opcode; - unsigned inst; - struct r600_shader_operand src[3]; - struct r600_shader_operand dst; - unsigned last; -}; - -struct r600_shader_alu { - struct r600_shader_alu *next; - struct r600_shader_alu *prev; - unsigned nalu; - unsigned nliteral; - unsigned nconstant; - struct r600_shader_inst alu[5]; - u32 literal[4]; -}; - -struct r600_shader_node { - struct r600_shader_node *next; - struct r600_shader_node *prev; - unsigned cf_id; /**< cf index (in dw) in byte code */ - unsigned cf_addr; /**< instructions index (in dw) in byte code */ - unsigned nslot; /**< number of slot (2 dw) needed by this node */ - unsigned nfetch; - struct c_node *node; /**< compiler node from which this node originate */ - struct r600_shader_vfetch vfetch; /**< list of vfetch instructions */ - struct r600_shader_alu alu; /**< list of alu instructions */ -}; +#include "r600_asm.h" struct r600_shader_io { - unsigned name; - unsigned gpr; - int sid; + unsigned name; + unsigned gpr; + int sid; }; struct r600_shader { - unsigned stack_size; /**< stack size needed by this shader */ - unsigned ngpr; /**< number of GPR needed by this shader */ - unsigned nconstant; /**< number of constants used by this shader */ - unsigned nresource; /**< number of resources used by this shader */ - unsigned noutput; - unsigned ninput; - unsigned nvector; - unsigned ncf; /**< total number of cf clauses */ - unsigned nslot; /**< total number of slots (2 dw) */ - unsigned flat_shade; /**< are we flat shading */ - struct r600_shader_node nodes; /**< list of node */ - struct r600_shader_io input[32]; - struct r600_shader_io output[32]; - /* TODO replace GPR by some better register allocator */ - struct c_vector **gpr; - unsigned ndw; /**< bytes code size in dw */ - u32 *bcode; /**< bytes code */ - enum pipe_format resource_format[160]; /**< format of resource */ - struct c_shader cshader; + unsigned processor_type; + struct r600_bc bc; + boolean flat_shade; + unsigned ninput; + unsigned noutput; + struct r600_shader_io input[32]; + struct r600_shader_io output[32]; + enum radeon_family family; }; -void r600_shader_cleanup(struct r600_shader *rshader); -int r600_shader_register(struct r600_shader *rshader); -int r600_shader_node(struct r600_shader *shader); -void r600_shader_node_place(struct r600_shader *rshader); -int r600_shader_find_gpr(struct r600_shader *rshader, struct c_vector *v, unsigned swizzle, - struct r600_shader_operand *operand); -int r600_shader_vfetch_bytecode(struct r600_shader *rshader, - struct r600_shader_node *rnode, - struct r600_shader_vfetch *vfetch, - unsigned *cid); -int r600_shader_update(struct r600_shader *rshader, - enum pipe_format *resource_format); -int r600_shader_legalize(struct r600_shader *rshader); -int r600_cshader_legalize(struct c_shader *shader); - -int r700_shader_translate(struct r600_shader *rshader); - -int c_shader_from_tgsi(struct c_shader *shader, unsigned type, - const struct tgsi_token *tokens); -int r600_shader_register(struct r600_shader *rshader); -int r600_shader_translate_rec(struct r600_shader *rshader, struct c_node *node); -int r700_shader_translate(struct r600_shader *rshader); -int r600_shader_insert_fetch(struct c_shader *shader); - -enum r600_instruction { - INST_ADD = 0, - INST_MUL = 1, - INST_MUL_IEEE = 2, - INST_MAX = 3, - INST_MIN = 4, - INST_MAX_DX10 = 5, - INST_MIN_DX10 = 6, - INST_SETE = 7, - INST_SETGT = 8, - INST_SETGE = 9, - INST_SETNE = 10, - INST_SETE_DX10 = 11, - INST_SETGT_DX10 = 12, - INST_SETGE_DX10 = 13, - INST_SETNE_DX10 = 14, - INST_FRACT = 15, - INST_TRUNC = 16, - INST_CEIL = 17, - INST_RNDNE = 18, - INST_FLOOR = 19, - INST_MOVA = 20, - INST_MOVA_FLOOR = 21, - INST_MOVA_INT = 22, - INST_MOV = 23, - INST_NOP = 24, - INST_PRED_SETGT_UINT = 25, - INST_PRED_SETGE_UINT = 26, - INST_PRED_SETE = 27, - INST_PRED_SETGT = 28, - INST_PRED_SETGE = 29, - INST_PRED_SETNE = 30, - INST_PRED_SET_INV = 31, - INST_PRED_SET_POP = 32, - INST_PRED_SET_CLR = 33, - INST_PRED_SET_RESTORE = 34, - INST_PRED_SETE_PUSH = 35, - INST_PRED_SETGT_PUSH = 36, - INST_PRED_SETGE_PUSH = 37, - INST_PRED_SETNE_PUSH = 38, - INST_KILLE = 39, - INST_KILLGT = 40, - INST_KILLGE = 41, - INST_KILLNE = 42, - INST_AND_INT = 43, - INST_OR_INT = 44, - INST_XOR_INT = 45, - INST_NOT_INT = 46, - INST_ADD_INT = 47, - INST_SUB_INT = 48, - INST_MAX_INT = 49, - INST_MIN_INT = 50, - INST_MAX_UINT = 51, - INST_MIN_UINT = 52, - INST_SETE_INT = 53, - INST_SETGT_INT = 54, - INST_SETGE_INT = 55, - INST_SETNE_INT = 56, - INST_SETGT_UINT = 57, - INST_SETGE_UINT = 58, - INST_KILLGT_UINT = 59, - INST_KILLGE_UINT = 60, - INST_PRED_SETE_INT = 61, - INST_PRED_SETGT_INT = 62, - INST_PRED_SETGE_INT = 63, - INST_PRED_SETNE_INT = 64, - INST_KILLE_INT = 65, - INST_KILLGT_INT = 66, - INST_KILLGE_INT = 67, - INST_KILLNE_INT = 68, - INST_PRED_SETE_PUSH_INT = 69, - INST_PRED_SETGT_PUSH_INT = 70, - INST_PRED_SETGE_PUSH_INT = 71, - INST_PRED_SETNE_PUSH_INT = 72, - INST_PRED_SETLT_PUSH_INT = 73, - INST_PRED_SETLE_PUSH_INT = 74, - INST_DOT4 = 75, - INST_DOT4_IEEE = 76, - INST_CUBE = 77, - INST_MAX4 = 78, - INST_MOVA_GPR_INT = 79, - INST_EXP_IEEE = 80, - INST_LOG_CLAMPED = 81, - INST_LOG_IEEE = 82, - INST_RECIP_CLAMPED = 83, - INST_RECIP_FF = 84, - INST_RECIP_IEEE = 85, - INST_RECIPSQRT_CLAMPED = 86, - INST_RECIPSQRT_FF = 87, - INST_RECIPSQRT_IEEE = 88, - INST_SQRT_IEEE = 89, - INST_FLT_TO_INT = 90, - INST_INT_TO_FLT = 91, - INST_UINT_TO_FLT = 92, - INST_SIN = 93, - INST_COS = 94, - INST_ASHR_INT = 95, - INST_LSHR_INT = 96, - INST_LSHL_INT = 97, - INST_MULLO_INT = 98, - INST_MULHI_INT = 99, - INST_MULLO_UINT = 100, - INST_MULHI_UINT = 101, - INST_RECIP_INT = 102, - INST_RECIP_UINT = 103, - INST_FLT_TO_UINT = 104, - INST_MUL_LIT = 105, - INST_MUL_LIT_M2 = 106, - INST_MUL_LIT_M4 = 107, - INST_MUL_LIT_D2 = 108, - INST_MULADD = 109, - INST_MULADD_M2 = 110, - INST_MULADD_M4 = 111, - INST_MULADD_D2 = 112, - INST_MULADD_IEEE = 113, - INST_MULADD_IEEE_M2 = 114, - INST_MULADD_IEEE_M4 = 115, - INST_MULADD_IEEE_D2 = 116, - INST_CNDE = 117, - INST_CNDGT = 118, - INST_CNDGE = 119, - INST_CNDE_INT = 120, - INST_CNDGT_INT = 121, - INST_CNDGE_INT = 122, - INST_COUNT -}; - -struct r600_instruction_info { - enum r600_instruction instruction; - unsigned opcode; - unsigned is_trans; - unsigned is_op3; -}; - - #endif diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h index 71aa09719e..002660c654 100644 --- a/src/gallium/drivers/r600/r600_sq.h +++ b/src/gallium/drivers/r600/r600_sq.h @@ -87,9 +87,9 @@ #define G_SQ_CF_WORD1_BARRIER(x) (((x) >> 31) & 0x1) #define C_SQ_CF_WORD1_BARRIER 0x7FFFFFFF #define P_SQ_CF_ALU_WORD0 -#define S_SQ_CF_ALU_WORD0_ALU_ADDR(x) (((x) & 0x3FFFFF) << 0) -#define G_SQ_CF_ALU_WORD0_ALU_ADDR(x) (((x) >> 0) & 0x3FFFFF) -#define C_SQ_CF_ALU_WORD0_ALU_ADDR 0xFFC00000 +#define S_SQ_CF_ALU_WORD0_ADDR(x) (((x) & 0x3FFFFF) << 0) +#define G_SQ_CF_ALU_WORD0_ADDR(x) (((x) >> 0) & 0x3FFFFF) +#define C_SQ_CF_ALU_WORD0_ADDR 0xFFC00000 #define S_SQ_CF_ALU_WORD0_KCACHE_BANK0(x) (((x) & 0xF) << 22) #define G_SQ_CF_ALU_WORD0_KCACHE_BANK0(x) (((x) >> 22) & 0xF) #define C_SQ_CF_ALU_WORD0_KCACHE_BANK0 0xFC3FFFFF @@ -109,15 +109,15 @@ #define S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(x) (((x) & 0xFF) << 10) #define G_SQ_CF_ALU_WORD1_KCACHE_ADDR1(x) (((x) >> 10) & 0xFF) #define C_SQ_CF_ALU_WORD1_KCACHE_ADDR1 0xFFFC03FF -#define S_SQ_CF_ALU_WORD1_ALU_COUNT(x) (((x) & 0x7F) << 18) -#define G_SQ_CF_ALU_WORD1_ALU_COUNT(x) (((x) >> 18) & 0x7F) -#define C_SQ_CF_ALU_WORD1_ALU_COUNT 0xFE03FFFF +#define S_SQ_CF_ALU_WORD1_COUNT(x) (((x) & 0x7F) << 18) +#define G_SQ_CF_ALU_WORD1_COUNT(x) (((x) >> 18) & 0x7F) +#define C_SQ_CF_ALU_WORD1_COUNT 0xFE03FFFF #define S_SQ_CF_ALU_WORD1_USES_WATERFALL(x) (((x) & 0x1) << 25) #define G_SQ_CF_ALU_WORD1_USES_WATERFALL(x) (((x) >> 25) & 0x1) #define C_SQ_CF_ALU_WORD1_USES_WATERFALL 0xFDFFFFFF -#define S_SQ_CF_ALU_WORD1_CF_ALU_INST(x) (((x) & 0xF) << 26) -#define G_SQ_CF_ALU_WORD1_CF_ALU_INST(x) (((x) >> 26) & 0xF) -#define C_SQ_CF_ALU_WORD1_CF_ALU_INST 0xC3FFFFFF +#define S_SQ_CF_ALU_WORD1_CF_INST(x) (((x) & 0xF) << 26) +#define G_SQ_CF_ALU_WORD1_CF_INST(x) (((x) >> 26) & 0xF) +#define C_SQ_CF_ALU_WORD1_CF_INST 0xC3FFFFFF #define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU 0x00000008 #define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE 0x00000009 #define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER 0x0000000A @@ -546,6 +546,8 @@ #define S_SQ_TEX_WORD1_COORD_TYPE_X(x) (((x) & 0x1) << 28) #define G_SQ_TEX_WORD1_COORD_TYPE_X(x) (((x) >> 28) & 0x1) #define C_SQ_TEX_WORD1_COORD_TYPE_X 0xEFFFFFFF +#define V_SQ_TEX_WORD1_COORD_UNNORMALIZED 0x00000000 +#define V_SQ_TEX_WORD1_COORD_NORMALIZED 0x00000001 #define S_SQ_TEX_WORD1_COORD_TYPE_Y(x) (((x) & 0x1) << 29) #define G_SQ_TEX_WORD1_COORD_TYPE_Y(x) (((x) >> 29) & 0x1) #define C_SQ_TEX_WORD1_COORD_TYPE_Y 0xDFFFFFFF @@ -580,27 +582,5 @@ #define S_SQ_TEX_WORD2_SRC_SEL_W(x) (((x) & 0x7) << 29) #define G_SQ_TEX_WORD2_SRC_SEL_W(x) (((x) >> 29) & 0x7) #define C_SQ_TEX_WORD2_SRC_SEL_W 0x1FFFFFFF -#define P_SQ_ALU_WORD1_OP2_V2 -#define S_SQ_ALU_WORD1_OP2_V2_SRC0_ABS(x) (((x) & 0x1) << 0) -#define G_SQ_ALU_WORD1_OP2_V2_SRC0_ABS(x) (((x) >> 0) & 0x1) -#define C_SQ_ALU_WORD1_OP2_V2_SRC0_ABS 0xFFFFFFFE -#define S_SQ_ALU_WORD1_OP2_V2_SRC1_ABS(x) (((x) & 0x1) << 1) -#define G_SQ_ALU_WORD1_OP2_V2_SRC1_ABS(x) (((x) >> 1) & 0x1) -#define C_SQ_ALU_WORD1_OP2_V2_SRC1_ABS 0xFFFFFFFD -#define S_SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK(x) (((x) & 0x1) << 2) -#define G_SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK(x) (((x) >> 2) & 0x1) -#define C_SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK 0xFFFFFFFB -#define S_SQ_ALU_WORD1_OP2_V2_UPDATE_PRED(x) (((x) & 0x1) << 3) -#define G_SQ_ALU_WORD1_OP2_V2_UPDATE_PRED(x) (((x) >> 3) & 0x1) -#define C_SQ_ALU_WORD1_OP2_V2_UPDATE_PRED 0xFFFFFFF7 -#define S_SQ_ALU_WORD1_OP2_V2_WRITE_MASK(x) (((x) & 0x1) << 4) -#define G_SQ_ALU_WORD1_OP2_V2_WRITE_MASK(x) (((x) >> 4) & 0x1) -#define C_SQ_ALU_WORD1_OP2_V2_WRITE_MASK 0xFFFFFFEF -#define S_SQ_ALU_WORD1_OP2_V2_OMOD(x) (((x) & 0x3) << 5) -#define G_SQ_ALU_WORD1_OP2_V2_OMOD(x) (((x) >> 5) & 0x3) -#define C_SQ_ALU_WORD1_OP2_V2_OMOD 0xFFFFFF9F -#define S_SQ_ALU_WORD1_OP2_V2_ALU_INST(x) (((x) & 0x7FF) << 7) -#define G_SQ_ALU_WORD1_OP2_V2_ALU_INST(x) (((x) >> 7) & 0x7FF) -#define C_SQ_ALU_WORD1_OP2_V2_ALU_INST 0xFFFC007F #endif diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 4150f88785..84a13e4ef7 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -151,7 +151,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, static void *r600_create_fs_state(struct pipe_context *ctx, const struct pipe_shader_state *shader) { - return r600_pipe_shader_create(ctx, C_PROGRAM_TYPE_FS, shader->tokens); + return r600_pipe_shader_create(ctx, shader->tokens); } static void r600_bind_fs_state(struct pipe_context *ctx, void *state) @@ -164,7 +164,7 @@ static void r600_bind_fs_state(struct pipe_context *ctx, void *state) static void *r600_create_vs_state(struct pipe_context *ctx, const struct pipe_shader_state *shader) { - return r600_pipe_shader_create(ctx, C_PROGRAM_TYPE_VS, shader->tokens); + return r600_pipe_shader_create(ctx, shader->tokens); } static void r600_bind_vs_state(struct pipe_context *ctx, void *state) diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 7d94bbe510..903cfad80a 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -29,7 +29,7 @@ #include <util/u_math.h> #include <util/u_inlines.h> #include <util/u_memory.h> -#include "state_tracker/drm_api.h" +#include "state_tracker/drm_driver.h" #include "r600_screen.h" #include "r600_texture.h" diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index d2c7248ff2..44834984c6 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -81,6 +81,81 @@ #define PKT3(op, count) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count)) /* Registers */ +#define R_008C00_SQ_CONFIG 0x00008C00 +#define S_008C00_VC_ENABLE(x) (((x) & 0x1) << 0) +#define G_008C00_VC_ENABLE(x) (((x) >> 0) & 0x1) +#define C_008C00_VC_ENABLE(x) 0xFFFFFFFE +#define S_008C00_EXPORT_SRC_C(x) (((x) & 0x1) << 1) +#define G_008C00_EXPORT_SRC_C(x) (((x) >> 1) & 0x1) +#define C_008C00_EXPORT_SRC_C(x) 0xFFFFFFFD +#define S_008C00_DX9_CONSTS(x) (((x) & 0x1) << 2) +#define G_008C00_DX9_CONSTS(x) (((x) >> 2) & 0x1) +#define C_008C00_DX9_CONSTS(x) 0xFFFFFFFB +#define S_008C00_ALU_INST_PREFER_VECTOR(x) (((x) & 0x1) << 3) +#define G_008C00_ALU_INST_PREFER_VECTOR(x) (((x) >> 3) & 0x1) +#define C_008C00_ALU_INST_PREFER_VECTOR(x) 0xFFFFFFF7 +#define S_008C00_DX10_CLAMP(x) (((x) & 0x1) << 4) +#define G_008C00_DX10_CLAMP(x) (((x) >> 4) & 0x1) +#define C_008C00_DX10_CLAMP(x) 0xFFFFFFEF +#define S_008C00_CLAUSE_SEQ_PRIO(x) (((x) & 0x3) << 8) +#define G_008C00_CLAUSE_SEQ_PRIO(x) (((x) >> 8) & 0x3) +#define C_008C00_CLAUSE_SEQ_PRIO(x) 0xFFFFFCFF +#define S_008C00_PS_PRIO(x) (((x) & 0x3) << 24) +#define G_008C00_PS_PRIO(x) (((x) >> 24) & 0x3) +#define C_008C00_PS_PRIO(x) 0xFCFFFFFF +#define S_008C00_VS_PRIO(x) (((x) & 0x3) << 26) +#define G_008C00_VS_PRIO(x) (((x) >> 26) & 0x3) +#define C_008C00_VS_PRIO(x) 0xF3FFFFFF +#define S_008C00_GS_PRIO(x) (((x) & 0x3) << 28) +#define G_008C00_GS_PRIO(x) (((x) >> 28) & 0x3) +#define C_008C00_GS_PRIO(x) 0xCFFFFFFF +#define S_008C00_ES_PRIO(x) (((x) & 0x3) << 30) +#define G_008C00_ES_PRIO(x) (((x) >> 30) & 0x3) +#define C_008C00_ES_PRIO(x) 0x3FFFFFFF +#define R_008C04_SQ_GPR_RESOURCE_MGMT_1 0x00008C04 +#define S_008C04_NUM_PS_GPRS(x) (((x) & 0xFF) << 0) +#define G_008C04_NUM_PS_GPRS(x) (((x) >> 0) & 0xFF) +#define C_008C04_NUM_PS_GPRS(x) 0xFFFFFF00 +#define S_008C04_NUM_VS_GPRS(x) (((x) & 0xFF) << 16) +#define G_008C04_NUM_VS_GPRS(x) (((x) >> 16) & 0xFF) +#define C_008C04_NUM_VS_GPRS(x) 0xFF00FFFF +#define S_008C04_NUM_CLAUSE_TEMP_GPRS(x) (((x) & 0xF) << 28) +#define G_008C04_NUM_CLAUSE_TEMP_GPRS(x) (((x) >> 28) & 0xF) +#define C_008C04_NUM_CLAUSE_TEMP_GPRS(x) 0x0FFFFFFF +#define R_008C08_SQ_GPR_RESOURCE_MGMT_2 0x00008C08 +#define S_008C08_NUM_GS_GPRS(x) (((x) & 0xFF) << 0) +#define G_008C08_NUM_GS_GPRS(x) (((x) >> 0) & 0xFF) +#define C_008C08_NUM_GS_GPRS(x) 0xFFFFFF00 +#define S_008C08_NUM_ES_GPRS(x) (((x) & 0xFF) << 16) +#define G_008C08_NUM_ES_GPRS(x) (((x) >> 16) & 0xFF) +#define C_008C08_NUM_ES_GPRS(x) 0xFF00FFFF +#define R_008C0C_SQ_THREAD_RESOURCE_MGMT 0x00008C0C +#define S_008C0C_NUM_PS_THREADS(x) (((x) & 0xFF) << 0) +#define G_008C0C_NUM_PS_THREADS(x) (((x) >> 0) & 0xFF) +#define C_008C0C_NUM_PS_THREADS(x) 0xFFFFFF00 +#define S_008C0C_NUM_VS_THREADS(x) (((x) & 0xFF) << 8) +#define G_008C0C_NUM_VS_THREADS(x) (((x) >> 8) & 0xFF) +#define C_008C0C_NUM_VS_THREADS(x) 0xFFFF00FF +#define S_008C0C_NUM_GS_THREADS(x) (((x) & 0xFF) << 16) +#define G_008C0C_NUM_GS_THREADS(x) (((x) >> 16) & 0xFF) +#define C_008C0C_NUM_GS_THREADS(x) 0xFF00FFFF +#define S_008C0C_NUM_ES_THREADS(x) (((x) & 0xFF) << 24) +#define G_008C0C_NUM_ES_THREADS(x) (((x) >> 24) & 0xFF) +#define C_008C0C_NUM_ES_THREADS(x) 0x00FFFFFF +#define R_008C10_SQ_STACK_RESOURCE_MGMT_1 0x00008C10 +#define S_008C10_NUM_PS_STACK_ENTRIES(x) (((x) & 0xFFF) << 0) +#define G_008C10_NUM_PS_STACK_ENTRIES(x) (((x) >> 0) & 0xFFF) +#define C_008C10_NUM_PS_STACK_ENTRIES(x) 0xFFFFF000 +#define S_008C10_NUM_VS_STACK_ENTRIES(x) (((x) & 0xFFF) << 16) +#define G_008C10_NUM_VS_STACK_ENTRIES(x) (((x) >> 16) & 0xFFF) +#define C_008C10_NUM_VS_STACK_ENTRIES(x) 0xF000FFFF +#define R_008C14_SQ_STACK_RESOURCE_MGMT_2 0x00008C14 +#define S_008C14_NUM_GS_STACK_ENTRIES(x) (((x) & 0xFFF) << 0) +#define G_008C14_NUM_GS_STACK_ENTRIES(x) (((x) >> 0) & 0xFFF) +#define C_008C14_NUM_GS_STACK_ENTRIES(x) 0xFFFFF000 +#define S_008C14_NUM_ES_STACK_ENTRIES(x) (((x) & 0xFFF) << 16) +#define G_008C14_NUM_ES_STACK_ENTRIES(x) (((x) >> 16) & 0xFFF) +#define C_008C14_NUM_ES_STACK_ENTRIES(x) 0xF000FFFF #define R_0280A0_CB_COLOR0_INFO 0x0280A0 #define S_0280A0_ENDIAN(x) (((x) & 0x3) << 0) #define G_0280A0_ENDIAN(x) (((x) >> 0) & 0x3) diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c new file mode 100644 index 0000000000..3532ba5b0c --- /dev/null +++ b/src/gallium/drivers/r600/r700_asm.c @@ -0,0 +1,70 @@ +/* + * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "r600_asm.h" +#include "r600_context.h" +#include "util/u_memory.h" +#include "r700_sq.h" +#include <stdio.h> +#include <errno.h> + +int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) +{ + unsigned i; + + /* don't replace gpr by pv or ps for destination register */ + if (alu->is_op3) { + bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | + S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | + S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | + S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | + S_SQ_ALU_WORD0_LAST(alu->last); + bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | + S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | + S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) | + S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) | + S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) | + S_SQ_ALU_WORD1_OP3_ALU_INST(alu->inst) | + S_SQ_ALU_WORD1_BANK_SWIZZLE(0); + } else { + bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | + S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | + S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) | + S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | + S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | + S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) | + S_SQ_ALU_WORD0_LAST(alu->last); + bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | + S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | + S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | + S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | + S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | + S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) | + S_SQ_ALU_WORD1_BANK_SWIZZLE(0); + } + if (alu->last) { + for (i = 0; i < alu->nliteral; i++) { + bc->bytecode[id++] = alu->value[i]; + } + } + return 0; +} diff --git a/src/gallium/drivers/r600/r700_sq.h b/src/gallium/drivers/r600/r700_sq.h index 8266af6d1f..9a117aeb1d 100644 --- a/src/gallium/drivers/r600/r700_sq.h +++ b/src/gallium/drivers/r600/r700_sq.h @@ -583,27 +583,5 @@ #define S_SQ_TEX_WORD2_SRC_SEL_W(x) (((x) & 0x7) << 29) #define G_SQ_TEX_WORD2_SRC_SEL_W(x) (((x) >> 29) & 0x7) #define C_SQ_TEX_WORD2_SRC_SEL_W 0x1FFFFFFF -#define P_SQ_ALU_WORD1_OP2_V2 -#define S_SQ_ALU_WORD1_OP2_V2_SRC0_ABS(x) (((x) & 0x1) << 0) -#define G_SQ_ALU_WORD1_OP2_V2_SRC0_ABS(x) (((x) >> 0) & 0x1) -#define C_SQ_ALU_WORD1_OP2_V2_SRC0_ABS 0xFFFFFFFE -#define S_SQ_ALU_WORD1_OP2_V2_SRC1_ABS(x) (((x) & 0x1) << 1) -#define G_SQ_ALU_WORD1_OP2_V2_SRC1_ABS(x) (((x) >> 1) & 0x1) -#define C_SQ_ALU_WORD1_OP2_V2_SRC1_ABS 0xFFFFFFFD -#define S_SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK(x) (((x) & 0x1) << 2) -#define G_SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK(x) (((x) >> 2) & 0x1) -#define C_SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK 0xFFFFFFFB -#define S_SQ_ALU_WORD1_OP2_V2_UPDATE_PRED(x) (((x) & 0x1) << 3) -#define G_SQ_ALU_WORD1_OP2_V2_UPDATE_PRED(x) (((x) >> 3) & 0x1) -#define C_SQ_ALU_WORD1_OP2_V2_UPDATE_PRED 0xFFFFFFF7 -#define S_SQ_ALU_WORD1_OP2_V2_WRITE_MASK(x) (((x) & 0x1) << 4) -#define G_SQ_ALU_WORD1_OP2_V2_WRITE_MASK(x) (((x) >> 4) & 0x1) -#define C_SQ_ALU_WORD1_OP2_V2_WRITE_MASK 0xFFFFFFEF -#define S_SQ_ALU_WORD1_OP2_V2_OMOD(x) (((x) & 0x3) << 5) -#define G_SQ_ALU_WORD1_OP2_V2_OMOD(x) (((x) >> 5) & 0x3) -#define C_SQ_ALU_WORD1_OP2_V2_OMOD 0xFFFFFF9F -#define S_SQ_ALU_WORD1_OP2_V2_ALU_INST(x) (((x) & 0x7FF) << 7) -#define G_SQ_ALU_WORD1_OP2_V2_ALU_INST(x) (((x) >> 7) & 0x7FF) -#define C_SQ_ALU_WORD1_OP2_V2_ALU_INST 0xFFFC007F #endif diff --git a/src/gallium/drivers/r600/radeon.h b/src/gallium/drivers/r600/radeon.h index ec94b112d6..3a8405f9b4 100644 --- a/src/gallium/drivers/r600/radeon.h +++ b/src/gallium/drivers/r600/radeon.h @@ -28,8 +28,6 @@ typedef uint8_t u8; struct radeon; -struct pipe_screen *radeon_create_screen(struct radeon *rw); - enum radeon_family { CHIP_UNKNOWN, CHIP_R100, @@ -79,6 +77,8 @@ enum radeon_family { CHIP_LAST, }; +enum radeon_family radeon_get_family(struct radeon *rw); + /* * radeon object functions */ diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c index 00b167e256..e0dd5cf8c2 100644 --- a/src/gallium/drivers/rbug/rbug_context.c +++ b/src/gallium/drivers/rbug/rbug_context.c @@ -97,15 +97,7 @@ rbug_draw_block_locked(struct rbug_context *rb_pipe, int flag) /* wait for rbug to clear the blocked flag */ while (rb_pipe->draw_blocked & flag) { rb_pipe->draw_blocked |= flag; -#ifdef PIPE_THREAD_HAVE_CONDVAR pipe_condvar_wait(rb_pipe->draw_cond, rb_pipe->draw_mutex); -#else - pipe_mutex_unlock(rb_pipe->draw_mutex); -#ifdef PIPE_SUBSYSTEM_WINDOWS_USER - Sleep(1); -#endif - pipe_mutex_lock(rb_pipe->draw_mutex); -#endif } } diff --git a/src/gallium/drivers/rbug/rbug_core.c b/src/gallium/drivers/rbug/rbug_core.c index f1aab3869b..9dc663b079 100644 --- a/src/gallium/drivers/rbug/rbug_core.c +++ b/src/gallium/drivers/rbug/rbug_core.c @@ -407,9 +407,7 @@ rbug_context_draw_step(struct rbug_rbug *tr_rbug, struct rbug_header *header, ui } pipe_mutex_unlock(rb_context->draw_mutex); -#ifdef PIPE_THREAD_HAVE_CONDVAR pipe_condvar_broadcast(rb_context->draw_cond); -#endif pipe_mutex_unlock(rb_screen->list_mutex); @@ -442,9 +440,7 @@ rbug_context_draw_unblock(struct rbug_rbug *tr_rbug, struct rbug_header *header, rb_context->draw_blocker &= ~unblock->unblock; pipe_mutex_unlock(rb_context->draw_mutex); -#ifdef PIPE_THREAD_HAVE_CONDVAR pipe_condvar_broadcast(rb_context->draw_cond); -#endif pipe_mutex_unlock(rb_screen->list_mutex); @@ -476,9 +472,7 @@ rbug_context_draw_rule(struct rbug_rbug *tr_rbug, struct rbug_header *header, ui rb_context->draw_blocker |= RBUG_BLOCK_RULE; pipe_mutex_unlock(rb_context->draw_mutex); -#ifdef PIPE_THREAD_HAVE_CONDVAR pipe_condvar_broadcast(rb_context->draw_cond); -#endif pipe_mutex_unlock(rb_screen->list_mutex); diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 79daa68f3b..9e727c9381 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -47,43 +47,6 @@ - -/** - * Draw vertex arrays, with optional indexing. - * Basically, map the vertex buffers (and drawing surfaces), then hand off - * the drawing to the 'draw' module. - */ -static void -softpipe_draw_range_elements_instanced(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount); - - -void -softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count) -{ - softpipe_draw_range_elements_instanced(pipe, - NULL, - 0, - 0, - 0, - 0xffffffff, - mode, - start, - count, - 0, - 1); -} - void softpipe_draw_stream_output(struct pipe_context *pipe, unsigned mode) { @@ -136,6 +99,93 @@ softpipe_draw_stream_output(struct pipe_context *pipe, unsigned mode) } +/** + * This function handles drawing indexed and non-indexed prims, + * instanced and non-instanced drawing, with or without min/max element + * indexes. + * All the other drawing functions are expressed in terms of this + * function. + * + * For non-indexed prims, indexBuffer should be NULL. + * For non-instanced drawing, instanceCount should be 1. + * When the min/max element indexes aren't known, minIndex should be 0 + * and maxIndex should be ~0. + */ +static void +softpipe_draw_range_elements_instanced(struct pipe_context *pipe, + struct pipe_resource *indexBuffer, + unsigned indexSize, + int indexBias, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + struct softpipe_context *sp = softpipe_context(pipe); + struct draw_context *draw = sp->draw; + unsigned i; + + if (!softpipe_check_render_cond(sp)) + return; + + sp->reduced_api_prim = u_reduced_prim(mode); + + if (sp->dirty) { + softpipe_update_derived(sp); + } + + softpipe_map_transfers(sp); + + /* Map vertex buffers */ + for (i = 0; i < sp->num_vertex_buffers; i++) { + void *buf = softpipe_resource(sp->vertex_buffer[i].buffer)->data; + draw_set_mapped_vertex_buffer(draw, i, buf); + } + + /* Map index buffer, if present */ + if (indexBuffer) { + void *mapped_indexes = softpipe_resource(indexBuffer)->data; + draw_set_mapped_element_buffer_range(draw, + indexSize, + indexBias, + minIndex, + maxIndex, + mapped_indexes); + } else { + /* no index/element buffer */ + draw_set_mapped_element_buffer_range(draw, + 0, 0, + start, + start + count - 1, + NULL); + } + + /* draw! */ + draw_arrays_instanced(draw, mode, start, count, startInstance, instanceCount); + + /* unmap vertex/index buffers - will cause draw module to flush */ + for (i = 0; i < sp->num_vertex_buffers; i++) { + draw_set_mapped_vertex_buffer(draw, i, NULL); + } + if (indexBuffer) { + draw_set_mapped_element_buffer(draw, 0, 0, NULL); + } + + /* + * TODO: Flush only when a user vertex/index buffer is present + * (or even better, modify draw module to do this + * internally when this condition is seen?) + */ + draw_flush(draw); + + /* Note: leave drawing surfaces mapped */ + sp->dirty_render_cache = TRUE; +} + + void softpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_resource *indexBuffer, @@ -223,76 +273,20 @@ softpipe_draw_elements_instanced(struct pipe_context *pipe, instanceCount); } -static void -softpipe_draw_range_elements_instanced(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount) +void +softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count) { - struct softpipe_context *sp = softpipe_context(pipe); - struct draw_context *draw = sp->draw; - unsigned i; - - if (!softpipe_check_render_cond(sp)) - return; - - sp->reduced_api_prim = u_reduced_prim(mode); - - if (sp->dirty) { - softpipe_update_derived(sp); - } - - softpipe_map_transfers(sp); - - /* Map vertex buffers */ - for (i = 0; i < sp->num_vertex_buffers; i++) { - void *buf = softpipe_resource(sp->vertex_buffer[i].buffer)->data; - draw_set_mapped_vertex_buffer(draw, i, buf); - } - - /* Map index buffer, if present */ - if (indexBuffer) { - void *mapped_indexes = softpipe_resource(indexBuffer)->data; - draw_set_mapped_element_buffer_range(draw, - indexSize, - indexBias, - minIndex, - maxIndex, - mapped_indexes); - } else { - /* no index/element buffer */ - draw_set_mapped_element_buffer_range(draw, - 0, 0, - start, - start + count - 1, - NULL); - } - - /* draw! */ - draw_arrays_instanced(draw, mode, start, count, startInstance, instanceCount); - - /* unmap vertex/index buffers - will cause draw module to flush */ - for (i = 0; i < sp->num_vertex_buffers; i++) { - draw_set_mapped_vertex_buffer(draw, i, NULL); - } - if (indexBuffer) { - draw_set_mapped_element_buffer(draw, 0, 0, NULL); - } - - /* - * TODO: Flush only when a user vertex/index buffer is present - * (or even better, modify draw module to do this - * internally when this condition is seen?) - */ - draw_flush(draw); - - /* Note: leave drawing surfaces mapped */ - sp->dirty_render_cache = TRUE; + softpipe_draw_range_elements_instanced(pipe, + NULL, + 0, + 0, + 0, + 0xffffffff, + mode, + start, + count, + 0, + 1); } + diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 00187febf0..6af1b2d061 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -208,7 +208,7 @@ logicop_quad(struct quad_stage *qs, res4[j] = ~0; break; default: - assert(0); + assert(0 && "invalid logicop mode"); } for (j = 0; j < 4; j++) { @@ -221,11 +221,18 @@ logicop_quad(struct quad_stage *qs, +/** + * Do blending for a 2x2 quad for one color buffer. + * \param quadColor the incoming quad colors + * \param dest the destination/framebuffer quad colors + * \param blend_index which set of blending terms to use + * \param has_dst_alpha does the dest color buffer have an alpha channel? + */ static void blend_quad(struct quad_stage *qs, float (*quadColor)[4], float (*dest)[4], - unsigned cbuf, + unsigned blend_index, boolean has_dst_alpha) { static const float zero[4] = { 0, 0, 0, 0 }; @@ -236,7 +243,7 @@ blend_quad(struct quad_stage *qs, /* * Compute src/first term RGB */ - switch (softpipe->blend->rt[cbuf].rgb_src_factor) { + switch (softpipe->blend->rt[blend_index].rgb_src_factor) { case PIPE_BLENDFACTOR_ONE: VEC4_COPY(source[0], quadColor[0]); /* R */ VEC4_COPY(source[1], quadColor[1]); /* G */ @@ -395,13 +402,13 @@ blend_quad(struct quad_stage *qs, assert(0); /* to do */ break; default: - assert(0); + assert(0 && "invalid rgb src factor"); } /* * Compute src/first term A */ - switch (softpipe->blend->rt[cbuf].alpha_src_factor) { + switch (softpipe->blend->rt[blend_index].alpha_src_factor) { case PIPE_BLENDFACTOR_ONE: VEC4_COPY(source[3], quadColor[3]); /* A */ break; @@ -469,14 +476,14 @@ blend_quad(struct quad_stage *qs, } break; default: - assert(0); + assert(0 && "invalid alpha src factor"); } /* * Compute dest/second term RGB */ - switch (softpipe->blend->rt[cbuf].rgb_dst_factor) { + switch (softpipe->blend->rt[blend_index].rgb_dst_factor) { case PIPE_BLENDFACTOR_ONE: /* dest = dest * 1 NO-OP, leave dest as-is */ break; @@ -625,13 +632,13 @@ blend_quad(struct quad_stage *qs, assert(0); break; default: - assert(0); + assert(0 && "invalid rgb dst factor"); } /* * Compute dest/second term A */ - switch (softpipe->blend->rt[cbuf].alpha_dst_factor) { + switch (softpipe->blend->rt[blend_index].alpha_dst_factor) { case PIPE_BLENDFACTOR_ONE: /* dest = dest * 1 NO-OP, leave dest as-is */ break; @@ -696,13 +703,13 @@ blend_quad(struct quad_stage *qs, } break; default: - assert(0); + assert(0 && "invalid alpha dst factor"); } /* * Combine RGB terms */ - switch (softpipe->blend->rt[cbuf].rgb_func) { + switch (softpipe->blend->rt[blend_index].rgb_func) { case PIPE_BLEND_ADD: VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */ VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */ @@ -729,13 +736,13 @@ blend_quad(struct quad_stage *qs, VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */ break; default: - assert(0); + assert(0 && "invalid rgb blend func"); } /* * Combine A terms */ - switch (softpipe->blend->rt[cbuf].alpha_func) { + switch (softpipe->blend->rt[blend_index].alpha_func) { case PIPE_BLEND_ADD: VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */ break; @@ -752,7 +759,7 @@ blend_quad(struct quad_stage *qs, VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ break; default: - assert(0); + assert(0 && "invalid alpha blend func"); } } @@ -822,7 +829,7 @@ blend_fallback(struct quad_stage *qs, logicop_quad( qs, quadColor, dest ); } else if (blend->rt[blend_buf].blend_enable) { - blend_quad( qs, quadColor, dest, cbuf, has_dst_alpha ); + blend_quad( qs, quadColor, dest, blend_buf, has_dst_alpha ); } if (blend->rt[blend_buf].colormask != 0xf) diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 72117c233e..5590d40892 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -82,7 +82,7 @@ get_depth_stencil_values( struct depth_data *data, data->bzzzz[j] = tile->data.depth32[y][x] & 0xffffff; data->stencilVals[j] = tile->data.depth32[y][x] >> 24; } - break; + break; case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8_USCALED_Z24_UNORM: for (j = 0; j < QUAD_SIZE; j++) { @@ -92,6 +92,14 @@ get_depth_stencil_values( struct depth_data *data, data->stencilVals[j] = tile->data.depth32[y][x] & 0xff; } break; + case PIPE_FORMAT_S8_USCALED: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + data->bzzzz[j] = 0; + data->stencilVals[j] = tile->data.stencil8[y][x]; + } + break; default: assert(0); } @@ -227,6 +235,14 @@ write_depth_stencil_values( struct depth_data *data, tile->data.depth32[y][x] = data->bzzzz[j] << 8; } break; + case PIPE_FORMAT_S8_USCALED: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.stencil8[y][x] = data->stencilVals[j]; + } + break; + default: assert(0); } @@ -661,20 +677,6 @@ static unsigned mask_count[16] = -/** helper to get number of Z buffer bits */ -static unsigned -get_depth_bits(struct quad_stage *qs) -{ - struct pipe_surface *zsurf = qs->softpipe->framebuffer.zsbuf; - if (zsurf) - return util_format_get_component_bits(zsurf->format, - UTIL_FORMAT_COLORSPACE_ZS, 0); - else - return 0; -} - - - /** * General depth/stencil test function. Used when there's no fast-path. */ @@ -693,9 +695,8 @@ depth_test_quads_fallback(struct quad_stage *qs, nr = alpha_test_quads(qs, quads, nr); } - if (get_depth_bits(qs) > 0 && - (qs->softpipe->depth_stencil->depth.enabled || - qs->softpipe->depth_stencil->stencil[0].enabled)) { + if (qs->softpipe->depth_stencil->depth.enabled || + qs->softpipe->depth_stencil->stencil[0].enabled) { data.ps = qs->softpipe->framebuffer.zsbuf; data.format = data.ps->format; @@ -794,8 +795,7 @@ choose_depth_test(struct quad_stage *qs, boolean alpha = qs->softpipe->depth_stencil->alpha.enabled; - boolean depth = (get_depth_bits(qs) > 0 && - qs->softpipe->depth_stencil->depth.enabled); + boolean depth = qs->softpipe->depth_stencil->depth.enabled; unsigned depthfunc = qs->softpipe->depth_stencil->depth.func; diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 907e94b59b..d240bcbf3b 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -109,7 +109,7 @@ shade_quads(struct quad_stage *qs, { struct softpipe_context *softpipe = qs->softpipe; struct tgsi_exec_machine *machine = softpipe->fs_machine; - unsigned i, pass = 0; + unsigned i, nr_quads = 0; for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { machine->Consts[i] = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT][i]; @@ -123,11 +123,11 @@ shade_quads(struct quad_stage *qs, if (/*do_coverage*/ 0) coverage_quad( qs, quads[i] ); - quads[pass++] = quads[i]; + quads[nr_quads++] = quads[i]; } - if (pass) - qs->next->run(qs->next, quads, pass); + if (nr_quads) + qs->next->run(qs->next, quads, nr_quads); } diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index fc57d3eb61..93af6ee5b0 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -149,6 +149,9 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: return 0; + + case PIPE_CAP_GEOMETRY_SHADER4: + return 1; default: return 0; } diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index ff83c66d8b..cf7ab81405 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -71,7 +71,7 @@ lerp(float a, float v0, float v1) /** - * Do 2D/biliner interpolation of float values. + * Do 2D/bilinear interpolation of float values. * v00, v10, v01 and v11 are typically four texture samples in a square/box. * a and b are the horizontal and vertical interpolants. * It's important that this function is inlined when compiled with diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c index b3e1c49406..eb74f14a7b 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c @@ -63,19 +63,21 @@ sp_create_tex_tile_cache( struct pipe_context *pipe ) void sp_destroy_tex_tile_cache(struct softpipe_tex_tile_cache *tc) { - uint pos; + if (tc) { + uint pos; - for (pos = 0; pos < NUM_ENTRIES; pos++) { - /*assert(tc->entries[pos].x < 0);*/ - } - if (tc->transfer) { - tc->pipe->transfer_destroy(tc->pipe, tc->transfer); - } - if (tc->tex_trans) { - tc->pipe->transfer_destroy(tc->pipe, tc->tex_trans); - } + for (pos = 0; pos < NUM_ENTRIES; pos++) { + /*assert(tc->entries[pos].x < 0);*/ + } + if (tc->transfer) { + tc->pipe->transfer_destroy(tc->pipe, tc->transfer); + } + if (tc->tex_trans) { + tc->pipe->transfer_destroy(tc->pipe, tc->tex_trans); + } - FREE( tc ); + FREE( tc ); + } } diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index f4db6f6ef0..bf33fd9417 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -115,16 +115,18 @@ sp_create_tile_cache( struct pipe_context *pipe ) void sp_destroy_tile_cache(struct softpipe_tile_cache *tc) { - uint pos; + if (tc) { + uint pos; - for (pos = 0; pos < NUM_ENTRIES; pos++) { - /*assert(tc->entries[pos].x < 0);*/ - } - if (tc->transfer) { - tc->pipe->transfer_destroy(tc->pipe, tc->transfer); - } + for (pos = 0; pos < NUM_ENTRIES; pos++) { + /*assert(tc->entries[pos].x < 0);*/ + } + if (tc->transfer) { + tc->pipe->transfer_destroy(tc->pipe, tc->transfer); + } - FREE( tc ); + FREE( tc ); + } } @@ -284,7 +286,11 @@ sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc) assert(pt->resource); /* clear the scratch tile to the clear value */ - clear_tile(&tc->tile, pt->resource->format, tc->clear_val); + if (tc->depth_stencil) { + clear_tile(&tc->tile, pt->resource->format, tc->clear_val); + } else { + clear_tile_rgba(&tc->tile, pt->resource->format, tc->clear_color); + } /* push the tile to all positions marked as clear */ for (y = 0; y < h; y += TILE_SIZE) { @@ -292,11 +298,18 @@ sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc) union tile_address addr = tile_address(x, y); if (is_clear_flag_set(tc->clear_flags, addr)) { - pipe_put_tile_raw(tc->pipe, - pt, - x, y, TILE_SIZE, TILE_SIZE, - tc->tile.data.color32, 0/*STRIDE*/); - + /* write the scratch tile to the surface */ + if (tc->depth_stencil) { + pipe_put_tile_raw(tc->pipe, + pt, + x, y, TILE_SIZE, TILE_SIZE, + tc->tile.data.any, 0/*STRIDE*/); + } + else { + pipe_put_tile_rgba(tc->pipe, pt, + x, y, TILE_SIZE, TILE_SIZE, + (float *) tc->tile.data.color); + } numCleared++; } } diff --git a/src/gallium/drivers/svga/svga_public.h b/src/gallium/drivers/svga/svga_public.h new file mode 100644 index 0000000000..ded2e2482a --- /dev/null +++ b/src/gallium/drivers/svga/svga_public.h @@ -0,0 +1,42 @@ +/********************************************************** + * Copyright 2010 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * VMware SVGA public interface. Used by targets to create a stack. + * + * @author Jakob Bornecrantz Fonseca <jakob@vmware.com> + */ + +#ifndef SVGA_PUBLIC_H_ +#define SVGA_PUBLIC_H_ + +struct pipe_screen; +struct svga_winsys_screen; + +struct pipe_screen * +svga_screen_create(struct svga_winsys_screen *sws); + +#endif /* SVGA_PUBLIC_H_ */ diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index 54d9faeb72..077ff9a2cf 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -29,6 +29,7 @@ #include "util/u_math.h" #include "svga_winsys.h" +#include "svga_public.h" #include "svga_context.h" #include "svga_screen.h" #include "svga_resource_texture.h" diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h index a2dcc84f7d..5e4bdeff2e 100644 --- a/src/gallium/drivers/svga/svga_winsys.h +++ b/src/gallium/drivers/svga/svga_winsys.h @@ -288,9 +288,6 @@ struct svga_winsys_screen }; -struct pipe_screen * -svga_screen_create(struct svga_winsys_screen *sws); - struct svga_winsys_screen * svga_winsys_screen(struct pipe_screen *screen); diff --git a/src/gallium/drivers/trace/Makefile b/src/gallium/drivers/trace/Makefile index 1b0c087a2a..99e5fb81c2 100644 --- a/src/gallium/drivers/trace/Makefile +++ b/src/gallium/drivers/trace/Makefile @@ -8,7 +8,6 @@ C_SOURCES = \ tr_dump.c \ tr_dump_state.c \ tr_screen.c \ - tr_drm.c \ tr_texture.c include ../../Makefile.template diff --git a/src/gallium/drivers/trace/SConscript b/src/gallium/drivers/trace/SConscript index 0dc43a9ec4..06b0c4863a 100644 --- a/src/gallium/drivers/trace/SConscript +++ b/src/gallium/drivers/trace/SConscript @@ -6,7 +6,6 @@ trace = env.ConvenienceLibrary( target = 'trace', source = [ 'tr_context.c', - 'tr_drm.c', 'tr_dump.c', 'tr_dump_state.c', 'tr_screen.c', diff --git a/src/gallium/drivers/trace/tr_drm.c b/src/gallium/drivers/trace/tr_drm.c deleted file mode 100644 index e685033212..0000000000 --- a/src/gallium/drivers/trace/tr_drm.c +++ /dev/null @@ -1,101 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "state_tracker/drm_api.h" - -#include "util/u_memory.h" -#include "rbug/rbug_public.h" -#include "tr_drm.h" -#include "tr_screen.h" -#include "tr_public.h" - -struct trace_drm_api -{ - struct drm_api base; - - struct drm_api *api; -}; - -static INLINE struct trace_drm_api * -trace_drm_api(struct drm_api *_api) -{ - return (struct trace_drm_api *)_api; -} - -static struct pipe_screen * -trace_drm_create_screen(struct drm_api *_api, int fd) -{ - struct trace_drm_api *tr_api = trace_drm_api(_api); - struct drm_api *api = tr_api->api; - struct pipe_screen *screen; - - /* TODO trace call */ - - screen = api->create_screen(api, fd); - - return trace_screen_create(rbug_screen_create(screen)); -} - -static void -trace_drm_destroy(struct drm_api *_api) -{ - struct trace_drm_api *tr_api = trace_drm_api(_api); - struct drm_api *api = tr_api->api; - - if (api->destroy) - api->destroy(api); - - FREE(tr_api); -} - -struct drm_api * -trace_drm_create(struct drm_api *api) -{ - struct trace_drm_api *tr_api; - - if (!api) - goto error; - - if (!trace_enabled() && !rbug_enabled()) - goto error; - - tr_api = CALLOC_STRUCT(trace_drm_api); - - if (!tr_api) - goto error; - - tr_api->base.name = api->name; - tr_api->base.driver_name = api->driver_name; - tr_api->base.create_screen = trace_drm_create_screen; - tr_api->base.destroy = trace_drm_destroy; - tr_api->api = api; - - return &tr_api->base; - -error: - return api; -} diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index a14486a5fb..0358c14e24 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -60,6 +60,11 @@ #include <stdbool.h> +#ifdef __cplusplus +extern "C" { +#endif + + #if !defined(__HAIKU__) && !defined(__USE_MISC) typedef unsigned int uint; typedef unsigned short ushort; @@ -184,6 +189,25 @@ typedef unsigned char boolean; #endif + +#if defined(__GNUC__) + +#define PIPE_READ_WRITE_BARRIER() __asm__("":::"memory") + +#elif defined(_MSC_VER) + +void _ReadWriteBarrier(void); +#pragma intrinsic(_ReadWriteBarrier) +#define PIPE_READ_WRITE_BARRIER() _ReadWriteBarrier() + +#else + +#warning "Unsupported compiler" +#define PIPE_READ_WRITE_BARRIER() /* */ + +#endif + + /* You should use these macros to mark if blocks where the if condition * is either likely to be true, or unlikely to be true. * @@ -224,4 +248,10 @@ typedef unsigned char boolean; #define unlikely(x) !!(x) #endif + +#if defined(__cplusplus) +} +#endif + + #endif /* P_COMPILER_H */ diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h index b81702a4fa..74a1fa2978 100644 --- a/src/gallium/include/pipe/p_config.h +++ b/src/gallium/include/pipe/p_config.h @@ -92,6 +92,11 @@ #else #define PIPE_ARCH_SSE #endif +#if defined(PIPE_CC_GCC) && !defined(__SSSE3__) +/* #warning SSE3 support requires -msse3 compiler options */ +#else +#define PIPE_ARCH_SSSE3 +#endif #endif #if defined(__PPC__) @@ -146,6 +151,11 @@ #define PIPE_OS_UNIX #endif +#if defined(__GNU__) +#define PIPE_OS_HURD +#define PIPE_OS_UNIX +#endif + #if defined(__sun) #define PIPE_OS_SOLARIS #define PIPE_OS_UNIX diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 3b87d998ce..00aa2076ed 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -489,7 +489,10 @@ enum pipe_cap { PIPE_CAP_MAX_VS_CONSTS, PIPE_CAP_MAX_VS_TEMPS, PIPE_CAP_MAX_VS_ADDRS, - PIPE_CAP_MAX_VS_PREDS + PIPE_CAP_MAX_VS_PREDS, + + PIPE_CAP_GEOMETRY_SHADER4, + PIPE_CAP_DEPTH_CLAMP }; diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 6231f06ec7..301fe2b74f 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -61,8 +61,8 @@ extern "C" { #define PIPE_MAX_SAMPLERS 16 #define PIPE_MAX_VERTEX_SAMPLERS 16 #define PIPE_MAX_GEOMETRY_SAMPLERS 16 -#define PIPE_MAX_SHADER_INPUTS 16 -#define PIPE_MAX_SHADER_OUTPUTS 16 +#define PIPE_MAX_SHADER_INPUTS 32 +#define PIPE_MAX_SHADER_OUTPUTS 32 #define PIPE_MAX_TEXTURE_LEVELS 16 #define PIPE_MAX_SO_BUFFERS 4 @@ -155,6 +155,7 @@ struct pipe_clip_state { float ucp[PIPE_MAX_CLIP_PLANES][4]; unsigned nr; + unsigned depth_clamp:1; }; @@ -419,7 +420,7 @@ struct pipe_vertex_element /** Which vertex_buffer (as given to pipe->set_vertex_buffer()) does * this attribute live in? */ - unsigned vertex_buffer_index:8; + unsigned vertex_buffer_index; enum pipe_format src_format; }; diff --git a/src/gallium/include/state_tracker/drm_api.h b/src/gallium/include/state_tracker/drm_api.h deleted file mode 100644 index 4572c7e042..0000000000 --- a/src/gallium/include/state_tracker/drm_api.h +++ /dev/null @@ -1,57 +0,0 @@ - -#ifndef _DRM_API_H_ -#define _DRM_API_H_ - -#include "pipe/p_compiler.h" - -struct pipe_screen; -struct pipe_winsys; -struct pipe_context; -struct pipe_resource; - -#define DRM_API_HANDLE_TYPE_SHARED 0 -#define DRM_API_HANDLE_TYPE_KMS 1 - -/** - * For use with pipe_screen::{texture_from_handle|texture_get_handle}. - */ -struct winsys_handle -{ - /** - * Unused for texture_from_handle, always - * DRM_API_HANDLE_TYPE_SHARED. Input to texture_get_handle, - * use TEXTURE_USAGE to select handle for kms or ipc. - */ - unsigned type; - /** - * Input to texture_from_handle. - * Output for texture_get_handle. - */ - unsigned handle; - /** - * Input to texture_from_handle. - * Output for texture_get_handle. - */ - unsigned stride; -}; - -struct drm_api -{ - void (*destroy)(struct drm_api *api); - - const char *name; - - /** - * Kernel driver name, as accepted by drmOpenByName. - */ - const char *driver_name; - - /** - * Create a pipe srcreen. - */ - struct pipe_screen* (*create_screen)(struct drm_api *api, int drm_fd); -}; - -extern struct drm_api * drm_api_create(void); - -#endif diff --git a/src/gallium/include/state_tracker/drm_driver.h b/src/gallium/include/state_tracker/drm_driver.h new file mode 100644 index 0000000000..d94c1e6a7c --- /dev/null +++ b/src/gallium/include/state_tracker/drm_driver.h @@ -0,0 +1,71 @@ + +#ifndef _DRM_DRIVER_H_ +#define _DRM_DRIVER_H_ + +#include "pipe/p_compiler.h" + +struct pipe_screen; +struct pipe_winsys; +struct pipe_context; +struct pipe_resource; + +#define DRM_API_HANDLE_TYPE_SHARED 0 +#define DRM_API_HANDLE_TYPE_KMS 1 + +/** + * For use with pipe_screen::{texture_from_handle|texture_get_handle}. + */ +struct winsys_handle +{ + /** + * Unused for texture_from_handle, always + * DRM_API_HANDLE_TYPE_SHARED. Input to texture_get_handle, + * use TEXTURE_USAGE to select handle for kms or ipc. + */ + unsigned type; + /** + * Input to texture_from_handle. + * Output for texture_get_handle. + */ + unsigned handle; + /** + * Input to texture_from_handle. + * Output for texture_get_handle. + */ + unsigned stride; +}; + +struct drm_driver_descriptor +{ + /** + * Identifying sufix/prefix of the binary, used by egl. + */ + const char *name; + + /** + * Kernel driver name, as accepted by drmOpenByName. + */ + const char *driver_name; + + /** + * Create a pipe srcreen. + * + * This function does any wrapping of the screen. + * For example wrapping trace or rbug debugging drivers around it. + */ + struct pipe_screen* (*create_screen)(int drm_fd); +}; + +extern struct drm_driver_descriptor driver_descriptor; + +/** + * Instantiate a drm_driver_descriptor struct. + */ +#define DRM_DRIVER_DESCRIPTOR(name_str, driver_name_str, func) \ +struct drm_driver_descriptor driver_descriptor = { \ + .name = name_str, \ + .driver_name = driver_name_str, \ + .create_screen = func, \ +}; + +#endif diff --git a/src/gallium/include/state_tracker/st_api.h b/src/gallium/include/state_tracker/st_api.h index 621bdae5c8..1142461188 100644 --- a/src/gallium/include/state_tracker/st_api.h +++ b/src/gallium/include/state_tracker/st_api.h @@ -369,12 +369,6 @@ struct st_api st_proc_t (*get_proc_address)(struct st_api *stapi, const char *procname); /** - * Return true if the visual is supported by the state tracker. - */ - boolean (*is_visual_supported)(struct st_api *stapi, - const struct st_visual *visual); - - /** * Create a rendering context. */ struct st_context_iface *(*create_context)(struct st_api *stapi, diff --git a/src/gallium/state_trackers/dri/common/dri_screen.h b/src/gallium/state_trackers/dri/common/dri_screen.h index 9ff925d4be..087ae8d2a4 100644 --- a/src/gallium/state_trackers/dri/common/dri_screen.h +++ b/src/gallium/state_trackers/dri/common/dri_screen.h @@ -39,7 +39,6 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" #include "state_tracker/st_api.h" -#include "state_tracker/drm_api.h" struct dri_context; struct dri_drawable; @@ -75,7 +74,6 @@ struct dri_screen enum st_attachment_type statt); /* gallium */ - struct drm_api *api; boolean d_depth_bits_last; boolean sd_depth_bits_last; boolean auto_fake_front; diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c index f4cc8d77eb..5c6573fa69 100644 --- a/src/gallium/state_trackers/dri/drm/dri2.c +++ b/src/gallium/state_trackers/dri/drm/dri2.c @@ -33,7 +33,7 @@ #include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_debug.h" -#include "state_tracker/drm_api.h" +#include "state_tracker/drm_driver.h" #include "dri_screen.h" #include "dri_context.h" @@ -484,11 +484,11 @@ static const __DRIextension *dri_screen_extensions[] = { &driReadDrawableExtension, &driCopySubBufferExtension.base, &driSwapControlExtension.base, - &driFrameTrackingExtension.base, &driMediaStreamCounterExtension.base, &dri2TexBufferExtension.base, &dri2FlushExtension.base, &dri2ImageExtension.base, + &dri2ConfigQueryExtension.base, NULL }; @@ -508,7 +508,6 @@ dri2_init_screen(__DRIscreen * sPriv) if (!screen) return NULL; - screen->api = drm_api_create(); screen->sPriv = sPriv; screen->fd = sPriv->fd; screen->lookup_egl_image = dri2_lookup_egl_image; @@ -518,7 +517,7 @@ dri2_init_screen(__DRIscreen * sPriv) sPriv->private = (void *)screen; sPriv->extensions = dri_screen_extensions; - pscreen = screen->api->create_screen(screen->api, screen->fd); + pscreen = driver_descriptor.create_screen(screen->fd); /* dri_init_screen_helper checks pscreen for us */ configs = dri_init_screen_helper(screen, pscreen, 32); diff --git a/src/gallium/state_trackers/dri/sw/drisw.c b/src/gallium/state_trackers/dri/sw/drisw.c index dcf645593f..23e99aa0ad 100644 --- a/src/gallium/state_trackers/dri/sw/drisw.c +++ b/src/gallium/state_trackers/dri/sw/drisw.c @@ -255,7 +255,6 @@ drisw_init_screen(__DRIscreen * sPriv) if (!screen) return NULL; - screen->api = NULL; /* not needed */ screen->sPriv = sPriv; screen->fd = -1; screen->allocate_textures = drisw_allocate_textures; diff --git a/src/gallium/state_trackers/egl/Makefile b/src/gallium/state_trackers/egl/Makefile index fec178ffb3..9e9e479e7e 100644 --- a/src/gallium/state_trackers/egl/Makefile +++ b/src/gallium/state_trackers/egl/Makefile @@ -5,14 +5,12 @@ common_INCLUDES = \ -I. \ -I$(TOP)/src/gallium/include \ -I$(TOP)/src/gallium/auxiliary \ - -I$(TOP)/src/gallium/drivers \ -I$(TOP)/src/egl/main \ -I$(TOP)/include common_SOURCES = $(wildcard common/*.c) common_OBJECTS = $(common_SOURCES:.c=.o) - x11_INCLUDES = \ -I$(TOP)/src/gallium/drivers \ -I$(TOP)/src/glx \ @@ -31,30 +29,37 @@ kms_SOURCES = $(wildcard kms/*.c) kms_OBJECTS = $(kms_SOURCES:.c=.o) -fbdev_INCLUDES = -I$(TOP)/src/gallium/winsys/sw -I$(TOP)/src/gallium/drivers +fbdev_INCLUDES = -I$(TOP)/src/gallium/winsys/sw fbdev_SOURCES = $(wildcard fbdev/*.c) fbdev_OBJECTS = $(fbdev_SOURCES:.c=.o) ALL_INCLUDES = $(common_INCLUDES) $(x11_INCLUDES) $(kms_INCLUDES) $(fbdev_INCLUDES) ALL_SOURCES = $(common_SOURCES) $(x11_SOURCES) $(kms_SOURCES) $(fbdev_SOURCES) -ALL_OBJECTS = $(common_OBJECTS) $(x11_OBJECTS) $(kms_OBJECTS) $(fbdev_OBJECTS) - -##### TARGETS ##### -EGL_PLATFORMS_MODS = $(foreach plat, $(EGL_PLATFORMS), libegl$(plat).a) +EGL_OBJECTS = $(common_OBJECTS) +EGL_CPPFLAGS = $(common_INCLUDES) + +# add backends +ifneq ($(findstring x11, $(EGL_PLATFORMS)),) +EGL_OBJECTS += $(x11_OBJECTS) +EGL_CPPFLAGS += -DHAVE_X11_BACKEND +endif +ifneq ($(findstring kms, $(EGL_PLATFORMS)),) +EGL_OBJECTS += $(kms_OBJECTS) +EGL_CPPFLAGS += -DHAVE_KMS_BACKEND +endif +ifneq ($(findstring fbdev, $(EGL_PLATFORMS)),) +EGL_OBJECTS += $(fbdev_OBJECTS) +EGL_CPPFLAGS += -DHAVE_FBDEV_BACKEND +endif -default: depend $(EGL_PLATFORMS_MODS) - - -libeglx11.a: $(x11_OBJECTS) $(common_OBJECTS) Makefile - $(MKLIB) -o eglx11 -static $(x11_OBJECTS) $(common_OBJECTS) +##### TARGETS ##### -libeglkms.a: $(kms_OBJECTS) $(common_OBJECTS) Makefile - $(MKLIB) -o eglkms -static $(kms_OBJECTS) $(common_OBJECTS) +default: depend libegl.a -libeglfbdev.a: $(fbdev_OBJECTS) $(common_OBJECTS) Makefile - $(MKLIB) -o eglfbdev -static $(fbdev_OBJECTS) $(common_OBJECTS) +libegl.a: $(EGL_OBJECTS) Makefile + $(MKLIB) -o egl -static $(EGL_OBJECTS) depend: rm -f depend @@ -62,8 +67,8 @@ depend: $(MKDEP) $(MKDEP_OPTIONS) $(ALL_INCLUDES) $(ALL_SOURCES) 2> /dev/null clean: - rm -f $(ALL_OBJECTS) - rm -f $(EGL_PLATFORMS_MODS) + rm -f libegl.a + rm -f $(EGL_OBJECTS) rm -f depend depend.bak # Dummy target @@ -72,16 +77,20 @@ install: ##### RULES ##### +define egl-cc +$(CC) -c $(common_INCLUDES) $($(1)_INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@ +endef + $(common_OBJECTS): %.o: %.c - $(CC) -c $(common_INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@ + $(CC) -c $(EGL_CPPFLAGS) $(DEFINES) $(CFLAGS) $< -o $@ $(x11_OBJECTS): %.o: %.c - $(CC) -c $(common_INCLUDES) $(x11_INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@ + $(call egl-cc,x11) $(kms_OBJECTS): %.o: %.c - $(CC) -c $(common_INCLUDES) $(kms_INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@ + $(call egl-cc,kms) $(fbdev_OBJECTS): %.o: %.c - $(CC) -c $(common_INCLUDES) $(fbdev_INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@ + $(call egl-cc,fbdev) sinclude depend diff --git a/src/gallium/state_trackers/egl/SConscript b/src/gallium/state_trackers/egl/SConscript index c4d01d6b28..e71aec35b7 100644 --- a/src/gallium/state_trackers/egl/SConscript +++ b/src/gallium/state_trackers/egl/SConscript @@ -12,6 +12,9 @@ if 'egl' in env['statetrackers']: '#/src/gallium/winsys/sw', '.', ]) + env.Append(CPPDEFINES = [ + 'HAVE_GDI_BACKEND', + ]) common_sources = [ 'common/egl_g3d.c', diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.c b/src/gallium/state_trackers/egl/common/egl_g3d.c index 361cc7960b..b6321e6b43 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d.c @@ -35,71 +35,57 @@ #include "egl_g3d.h" #include "egl_g3d_api.h" #include "egl_g3d_st.h" +#include "egl_g3d_loader.h" #include "native.h" /** - * Initialize the state trackers. + * Get the native platform. */ -static void -egl_g3d_init_st(_EGLDriver *drv) +static const struct native_platform * +egl_g3d_get_platform(_EGLDriver *drv, _EGLPlatformType plat) { struct egl_g3d_driver *gdrv = egl_g3d_driver(drv); - EGLint i; - /* already initialized */ - if (gdrv->api_mask) - return; + if (!gdrv->platforms[plat]) { + const char *plat_name = NULL; + const struct native_platform *nplat = NULL; - egl_g3d_init_st_apis(gdrv->stapis); - for (i = 0; i < ST_API_COUNT; i++) { - if (gdrv->stapis[i]) - gdrv->api_mask |= egl_g3d_st_api_bit(i); - } + switch (plat) { + case _EGL_PLATFORM_WINDOWS: + plat_name = "Windows"; +#ifdef HAVE_GDI_BACKEND + nplat = native_get_gdi_platform(); +#endif + break; + case _EGL_PLATFORM_X11: + plat_name = "X11"; +#ifdef HAVE_X11_BACKEND + nplat = native_get_x11_platform(); +#endif + break; + case _EGL_PLATFORM_DRM: + plat_name = "DRM"; +#ifdef HAVE_KMS_BACKEND + nplat = native_get_kms_platform(); +#endif + break; + case _EGL_PLATFORM_FBDEV: + plat_name = "FBDEV"; +#ifdef HAVE_FBDEV_BACKEND + nplat = native_get_fbdev_platform(); +#endif + break; + default: + break; + } - if (gdrv->api_mask) - _eglLog(_EGL_DEBUG, "Driver API mask: 0x%x", gdrv->api_mask); - else - _eglLog(_EGL_WARNING, "No supported client API"); -} + if (!nplat) + _eglLog(_EGL_WARNING, "unsupported platform %s", plat_name); -/** - * Get the probe object of the display. - * - * Note that this function may be called before the display is initialized. - */ -static struct native_probe * -egl_g3d_get_probe(_EGLDriver *drv, _EGLDisplay *dpy) -{ - struct egl_g3d_driver *gdrv = egl_g3d_driver(drv); - struct native_probe *nprobe; - - nprobe = (struct native_probe *) _eglGetProbeCache(gdrv->probe_key); - if (!nprobe || nprobe->display != dpy->NativeDisplay) { - if (nprobe) - nprobe->destroy(nprobe); - nprobe = native_create_probe(dpy->NativeDisplay); - _eglSetProbeCache(gdrv->probe_key, (void *) nprobe); + gdrv->platforms[plat] = nplat; } - return nprobe; -} - -/** - * Destroy the probe object of the display. The display may be NULL. - * - * Note that this function may be called before the display is initialized. - */ -static void -egl_g3d_destroy_probe(_EGLDriver *drv, _EGLDisplay *dpy) -{ - struct egl_g3d_driver *gdrv = egl_g3d_driver(drv); - struct native_probe *nprobe; - - nprobe = (struct native_probe *) _eglGetProbeCache(gdrv->probe_key); - if (nprobe && (!dpy || nprobe->display == dpy->NativeDisplay)) { - nprobe->destroy(nprobe); - _eglSetProbeCache(gdrv->probe_key, NULL); - } + return gdrv->platforms[plat]; } #ifdef EGL_MESA_screen_surface @@ -268,11 +254,9 @@ egl_g3d_init_config(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf, const struct native_config *nconf, enum pipe_format depth_stencil_format) { - struct egl_g3d_driver *gdrv = egl_g3d_driver(drv); struct egl_g3d_config *gconf = egl_g3d_config(conf); EGLint buffer_mask, api_mask; EGLBoolean valid; - EGLint i; buffer_mask = 0x0; if (nconf->buffer_mask & (1 << NATIVE_ATTACHMENT_FRONT_LEFT)) @@ -293,14 +277,7 @@ egl_g3d_init_config(_EGLDriver *drv, _EGLDisplay *dpy, gconf->stvis.render_buffer = (buffer_mask & ST_ATTACHMENT_BACK_LEFT_MASK) ? ST_ATTACHMENT_BACK_LEFT : ST_ATTACHMENT_FRONT_LEFT; - api_mask = 0; - for (i = 0; i < ST_API_COUNT; i++) { - struct st_api *stapi = gdrv->stapis[i]; - if (stapi) { - if (stapi->is_visual_supported(stapi, &gconf->stvis)) - api_mask |= egl_g3d_st_api_bit(i); - } - } + api_mask = dpy->ClientAPIsMask; /* this is required by EGL, not by OpenGL ES */ if (nconf->window_bit && gconf->stvis.render_buffer != ST_ATTACHMENT_BACK_LEFT) @@ -425,31 +402,64 @@ egl_g3d_invalid_surface(struct native_display *ndpy, gctx->stctxi->notify_invalid_framebuffer(gctx->stctxi, gsurf->stfbi); } +static struct pipe_screen * +egl_g3d_new_drm_screen(struct native_display *ndpy, const char *name, int fd) +{ + _EGLDisplay *dpy = (_EGLDisplay *) ndpy->user_data; + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + return gdpy->loader->create_drm_screen(name, fd); +} + +static struct pipe_screen * +egl_g3d_new_sw_screen(struct native_display *ndpy, struct sw_winsys *ws) +{ + _EGLDisplay *dpy = (_EGLDisplay *) ndpy->user_data; + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + return gdpy->loader->create_sw_screen(ws); +} + static struct native_event_handler egl_g3d_native_event_handler = { - egl_g3d_invalid_surface + egl_g3d_invalid_surface, + egl_g3d_new_drm_screen, + egl_g3d_new_sw_screen }; +static void +egl_g3d_free_config(void *conf) +{ + struct egl_g3d_config *gconf = egl_g3d_config((_EGLConfig *) conf); + FREE(gconf); +} + +static void +egl_g3d_free_screen(void *scr) +{ + struct egl_g3d_screen *gscr = egl_g3d_screen((_EGLScreen *) scr); + FREE(gscr->native_modes); + FREE(gscr); +} + static EGLBoolean egl_g3d_terminate(_EGLDriver *drv, _EGLDisplay *dpy) { struct egl_g3d_display *gdpy = egl_g3d_display(dpy); - EGLint i; _eglReleaseDisplayResources(drv, dpy); - _eglCleanupDisplay(dpy); if (gdpy->pipe) gdpy->pipe->destroy(gdpy->pipe); + if (dpy->Configs) { + _eglDestroyArray(dpy->Configs, egl_g3d_free_config); + dpy->Configs = NULL; + } if (dpy->Screens) { - for (i = 0; i < dpy->NumScreens; i++) { - struct egl_g3d_screen *gscr = egl_g3d_screen(dpy->Screens[i]); - FREE(gscr->native_modes); - FREE(gscr); - } - FREE(dpy->Screens); + _eglDestroyArray(dpy->Screens, egl_g3d_free_screen); + dpy->Screens = NULL; } + _eglCleanupDisplay(dpy); + if (gdpy->smapi) egl_g3d_destroy_st_manager(gdpy->smapi); @@ -468,28 +478,36 @@ egl_g3d_initialize(_EGLDriver *drv, _EGLDisplay *dpy, { struct egl_g3d_driver *gdrv = egl_g3d_driver(drv); struct egl_g3d_display *gdpy; + const struct native_platform *nplat; - /* the probe object is unlikely to be needed again */ - egl_g3d_destroy_probe(drv, dpy); + nplat = egl_g3d_get_platform(drv, dpy->Platform); + if (!nplat) + return EGL_FALSE; gdpy = CALLOC_STRUCT(egl_g3d_display); if (!gdpy) { _eglError(EGL_BAD_ALLOC, "eglInitialize"); goto fail; } + gdpy->loader = gdrv->loader; dpy->DriverData = gdpy; - gdpy->native = native_create_display(dpy->NativeDisplay, - &egl_g3d_native_event_handler); + _eglLog(_EGL_INFO, "use %s for display %p", nplat->name, dpy->PlatformDisplay); + gdpy->native = nplat->create_display(dpy->PlatformDisplay, + &egl_g3d_native_event_handler, (void *) dpy); if (!gdpy->native) { _eglError(EGL_NOT_INITIALIZED, "eglInitialize(no usable display)"); goto fail; } - gdpy->native->user_data = (void *) dpy; - - egl_g3d_init_st(&gdrv->base); - dpy->ClientAPIsMask = gdrv->api_mask; + if (gdpy->loader->api_mask & (1 << ST_API_OPENGL)) + dpy->ClientAPIsMask |= EGL_OPENGL_BIT; + if (gdpy->loader->api_mask & (1 << ST_API_OPENGL_ES1)) + dpy->ClientAPIsMask |= EGL_OPENGL_ES_BIT; + if (gdpy->loader->api_mask & (1 << ST_API_OPENGL_ES2)) + dpy->ClientAPIsMask |= EGL_OPENGL_ES2_BIT; + if (gdpy->loader->api_mask & (1 << ST_API_OPENVG)) + dpy->ClientAPIsMask |= EGL_OPENVG_BIT; gdpy->smapi = egl_g3d_create_st_manager(dpy); if (!gdpy->smapi) { @@ -530,87 +548,51 @@ static _EGLProc egl_g3d_get_proc_address(_EGLDriver *drv, const char *procname) { struct egl_g3d_driver *gdrv = egl_g3d_driver(drv); - _EGLProc proc; - EGLint i; - - /* in case this is called before a display is initialized */ - egl_g3d_init_st(&gdrv->base); + struct st_api *stapi = NULL; - for (i = 0; i < ST_API_COUNT; i++) { - struct st_api *stapi = gdrv->stapis[i]; - if (stapi) { - proc = (_EGLProc) stapi->get_proc_address(stapi, procname); - if (proc) - return proc; - } - } + if (procname && procname[0] == 'v' && procname[1] == 'g') + stapi = gdrv->loader->get_st_api(ST_API_OPENVG); + else if (procname && procname[0] == 'g' && procname[1] == 'l') + stapi = gdrv->loader->guess_gl_api(); - return (_EGLProc) NULL; + return (_EGLProc) ((stapi) ? + stapi->get_proc_address(stapi, procname) : NULL); } static EGLint egl_g3d_probe(_EGLDriver *drv, _EGLDisplay *dpy) { - struct native_probe *nprobe; - enum native_probe_result res; - EGLint score; - - nprobe = egl_g3d_get_probe(drv, dpy); - res = native_get_probe_result(nprobe); - - switch (res) { - case NATIVE_PROBE_UNKNOWN: - default: - score = 0; - break; - case NATIVE_PROBE_FALLBACK: - score = 40; - break; - case NATIVE_PROBE_SUPPORTED: - score = 50; - break; - case NATIVE_PROBE_EXACT: - score = 100; - break; - } - - return score; -} - -static void -egl_g3d_unload(_EGLDriver *drv) -{ - struct egl_g3d_driver *gdrv = egl_g3d_driver(drv); - - egl_g3d_destroy_st_apis(); - egl_g3d_destroy_probe(drv, NULL); - FREE(gdrv); + return (egl_g3d_get_platform(drv, dpy->Platform)) ? 90 : 0; } _EGLDriver * -_eglMain(const char *args) +egl_g3d_create_driver(const struct egl_g3d_loader *loader) { - static char driver_name[64]; struct egl_g3d_driver *gdrv; - util_snprintf(driver_name, sizeof(driver_name), - "Gallium/%s", native_get_name()); - gdrv = CALLOC_STRUCT(egl_g3d_driver); if (!gdrv) return NULL; + gdrv->loader = loader; + egl_g3d_init_driver_api(&gdrv->base); gdrv->base.API.Initialize = egl_g3d_initialize; gdrv->base.API.Terminate = egl_g3d_terminate; gdrv->base.API.GetProcAddress = egl_g3d_get_proc_address; - gdrv->base.Name = driver_name; gdrv->base.Probe = egl_g3d_probe; - gdrv->base.Unload = egl_g3d_unload; - /* the key is " EGL G3D" */ - gdrv->probe_key = 0x0E61063D; + /* to be filled by the caller */ + gdrv->base.Name = NULL; + gdrv->base.Unload = NULL; return &gdrv->base; } + +void +egl_g3d_destroy_driver(_EGLDriver *drv) +{ + struct egl_g3d_driver *gdrv = egl_g3d_driver(drv); + FREE(gdrv); +} diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.h b/src/gallium/state_trackers/egl/common/egl_g3d.h index d516d8fe03..ed2b0409bb 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d.h +++ b/src/gallium/state_trackers/egl/common/egl_g3d.h @@ -41,18 +41,18 @@ #include "native.h" #include "egl_g3d_st.h" +#include "egl_g3d_loader.h" struct egl_g3d_driver { _EGLDriver base; - struct st_api *stapis[ST_API_COUNT]; - EGLint api_mask; - - EGLint probe_key; + const struct egl_g3d_loader *loader; + const struct native_platform *platforms[_EGL_NUM_PLATFORMS]; }; struct egl_g3d_display { struct native_display *native; + const struct egl_g3d_loader *loader; struct st_manager *smapi; struct pipe_context *pipe; }; diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_api.c b/src/gallium/state_trackers/egl/common/egl_g3d_api.c index 255a1fb730..edac72a822 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d_api.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d_api.c @@ -35,6 +35,7 @@ #include "egl_g3d_api.h" #include "egl_g3d_image.h" #include "egl_g3d_st.h" +#include "egl_g3d_loader.h" #include "native.h" /** @@ -44,7 +45,6 @@ static struct st_api * egl_g3d_choose_st(_EGLDriver *drv, _EGLContext *ctx) { struct egl_g3d_driver *gdrv = egl_g3d_driver(drv); - struct st_api *stapi; EGLint idx = -1; switch (ctx->ClientAPI) { @@ -73,8 +73,7 @@ egl_g3d_choose_st(_EGLDriver *drv, _EGLContext *ctx) break; } - stapi = (idx >= 0) ? gdrv->stapis[idx] : NULL; - return stapi; + return (idx >= 0) ? gdrv->loader->get_st_api(idx) : NULL; } static _EGLContext * @@ -774,13 +773,13 @@ egl_g3d_find_pixmap_config(_EGLDisplay *dpy, EGLNativePixmapType pix) struct egl_g3d_config *gconf; EGLint i; - for (i = 0; i < dpy->NumConfigs; i++) { - gconf = egl_g3d_config(dpy->Configs[i]); + for (i = 0; i < dpy->Configs->Size; i++) { + gconf = egl_g3d_config((_EGLConfig *) dpy->Configs->Elements[i]); if (gdpy->native->is_pixmap_supported(gdpy->native, pix, gconf->native)) break; } - return (i < dpy->NumConfigs) ? &gconf->base : NULL; + return (i < dpy->Configs->Size) ? &gconf->base : NULL; } void diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_image.c b/src/gallium/state_trackers/egl/common/egl_g3d_image.c index b1fe30a776..1e13cfcf7e 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d_image.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d_image.c @@ -78,7 +78,7 @@ egl_g3d_create_image(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx, gimg = CALLOC_STRUCT(egl_g3d_image); if (!gimg) { - _eglError(EGL_BAD_ALLOC, "eglCreatePbufferSurface"); + _eglError(EGL_BAD_ALLOC, "eglCreateEGLImageKHR"); return NULL; } diff --git a/src/gallium/state_trackers/egl/common/native_probe.h b/src/gallium/state_trackers/egl/common/egl_g3d_loader.h index aeed9f85dd..c9141f8ad4 100644 --- a/src/gallium/state_trackers/egl/common/native_probe.h +++ b/src/gallium/state_trackers/egl/common/egl_g3d_loader.h @@ -1,8 +1,8 @@ /* * Mesa 3-D graphics library - * Version: 7.8 + * Version: 7.9 * - * Copyright (C) 2009-2010 Chia-I Wu <olv@0xlab.org> + * Copyright (C) 2010 LunarG Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,48 +21,34 @@ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <olv@lunarg.com> */ -#ifndef _NATIVE_PROBE_H_ -#define _NATIVE_PROBE_H_ +#ifndef _EGL_G3D_LOADER_H_ +#define _EGL_G3D_LOADER_H_ -#include "EGL/egl.h" /* for EGL native types */ +#include "pipe/p_compiler.h" +#include "state_tracker/st_api.h" +#include "egltypedefs.h" -/** - * Enumerations for probe results. - */ -enum native_probe_result { - NATIVE_PROBE_UNKNOWN, - NATIVE_PROBE_FALLBACK, - NATIVE_PROBE_SUPPORTED, - NATIVE_PROBE_EXACT, -}; +struct pipe_screen; +struct sw_winsys; -/** - * A probe object for display probe. - */ -struct native_probe { - int magic; - EGLNativeDisplayType display; - void *data; +struct egl_g3d_loader { + uint api_mask; + struct st_api *(*get_st_api)(enum st_api_type api); + struct st_api *(*guess_gl_api)(void); - void (*destroy)(struct native_probe *nprobe); + struct pipe_screen *(*create_drm_screen)(const char *name, int fd); + struct pipe_screen *(*create_sw_screen)(struct sw_winsys *ws); }; -/** - * Return a probe object for the given display. - * - * Note that the returned object may be cached and used by different native - * display modules. It allows fast probing when multiple modules probe the - * same display. - */ -struct native_probe * -native_create_probe(EGLNativeDisplayType dpy); +_EGLDriver * +egl_g3d_create_driver(const struct egl_g3d_loader *loader); -/** - * Probe the probe object. - */ -enum native_probe_result -native_get_probe_result(struct native_probe *nprobe); +void +egl_g3d_destroy_driver(_EGLDriver *drv); -#endif /* _NATIVE_PROBE_H_ */ +#endif /* _EGL_G3D_LOADER_H_ */ diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_st.c b/src/gallium/state_trackers/egl/common/egl_g3d_st.c index 683b74f62b..05cdb0d421 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d_st.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d_st.c @@ -49,173 +49,6 @@ egl_g3d_st_manager(struct st_manager *smapi) return (struct egl_g3d_st_manager *) smapi; } -static struct egl_g3d_st_module { - const char *filename; - struct util_dl_library *lib; - struct st_api *stapi; -} egl_g3d_st_modules[ST_API_COUNT]; - -static EGLBoolean -egl_g3d_search_path_callback(const char *dir, size_t len, void *callback_data) -{ - struct egl_g3d_st_module *stmod = - (struct egl_g3d_st_module *) callback_data; - char path[1024]; - int ret; - - if (!len) { - stmod->lib = util_dl_open(stmod->filename); - return !(stmod->lib); - } - - ret = util_snprintf(path, sizeof(path), - "%.*s/%s", len, dir, stmod->filename); - if (ret > 0 && ret < sizeof(path)) - stmod->lib = util_dl_open(path); - - return !(stmod->lib); -} - -static boolean -egl_g3d_load_st_module(struct egl_g3d_st_module *stmod, - const char *filename, const char *procname) -{ - struct st_api *(*create_api)(void); - - stmod->filename = filename; - if (stmod->filename) - _eglSearchPathForEach(egl_g3d_search_path_callback, (void *) stmod); - else - stmod->lib = util_dl_open(NULL); - - if (stmod->lib) { - create_api = (struct st_api *(*)(void)) - util_dl_get_proc_address(stmod->lib, procname); - if (create_api) - stmod->stapi = create_api(); - - if (!stmod->stapi) { - util_dl_close(stmod->lib); - stmod->lib = NULL; - } - } - - if (stmod->stapi) { - return TRUE; - } - else { - stmod->filename = NULL; - return FALSE; - } -} - -#ifdef PIPE_OS_WINDOWS -#define ST_MODULE_SUFFIX ".dll" -#else -#define ST_MODULE_SUFFIX ".so" -#endif - -void -egl_g3d_init_st_apis(struct st_api *stapis[ST_API_COUNT]) -{ - const char *skip_checks[ST_API_COUNT], *symbols[ST_API_COUNT]; - const char *filenames[ST_API_COUNT][4]; - struct util_dl_library *self; - int num_needed = 0, api; - - self = util_dl_open(NULL); - - /* collect the necessary data for loading modules */ - for (api = 0; api < ST_API_COUNT; api++) { - int count = 0; - - switch (api) { - case ST_API_OPENGL: - skip_checks[api] = "glColor4d"; - symbols[api] = ST_CREATE_OPENGL_SYMBOL; - filenames[api][count++] = "api_GL" ST_MODULE_SUFFIX; - break; - case ST_API_OPENGL_ES1: - skip_checks[api] = "glColor4x"; - symbols[api] = ST_CREATE_OPENGL_ES1_SYMBOL; - filenames[api][count++] = "api_GLESv1_CM" ST_MODULE_SUFFIX; - filenames[api][count++] = "api_GL" ST_MODULE_SUFFIX; - break; - case ST_API_OPENGL_ES2: - skip_checks[api] = "glShaderBinary"; - symbols[api] = ST_CREATE_OPENGL_ES2_SYMBOL; - filenames[api][count++] = "api_GLESv2" ST_MODULE_SUFFIX; - filenames[api][count++] = "api_GL" ST_MODULE_SUFFIX; - break; - case ST_API_OPENVG: - skip_checks[api] = "vgClear"; - symbols[api] = ST_CREATE_OPENVG_SYMBOL; - filenames[api][count++]= "api_OpenVG" ST_MODULE_SUFFIX; - break; - default: - assert(!"Unknown API Type\n"); - skip_checks[api] = NULL; - symbols[api] = NULL; - break; - } - filenames[api][count++]= NULL; - assert(count < Elements(filenames[api])); - - /* heuristicically decide if the module is needed */ - if (!self || !skip_checks[api] || - util_dl_get_proc_address(self, skip_checks[api])) { - /* unset so the module is not skipped */ - skip_checks[api] = NULL; - num_needed++; - } - } - /* mark all moudles needed if we wrongly decided that none is needed */ - if (!num_needed) - memset(skip_checks, 0, sizeof(skip_checks)); - - if (self) - util_dl_close(self); - - for (api = 0; api < ST_API_COUNT; api++) { - struct egl_g3d_st_module *stmod = &egl_g3d_st_modules[api]; - const char **p; - - /* skip the module */ - if (skip_checks[api]) - continue; - - /* try all filenames, including NULL */ - for (p = filenames[api]; *p; p++) { - if (egl_g3d_load_st_module(stmod, *p, symbols[api])) - break; - } - if (!stmod->stapi) - egl_g3d_load_st_module(stmod, NULL, symbols[api]); - - stapis[api] = stmod->stapi; - } -} - -void -egl_g3d_destroy_st_apis(void) -{ - int api; - - for (api = 0; api < ST_API_COUNT; api++) { - struct egl_g3d_st_module *stmod = &egl_g3d_st_modules[api]; - - if (stmod->stapi) { - stmod->stapi->destroy(stmod->stapi); - stmod->stapi = NULL; - } - if (stmod->lib) { - util_dl_close(stmod->lib); - stmod->lib = NULL; - } - stmod->filename = NULL; - } -} - static boolean egl_g3d_st_manager_get_egl_image(struct st_manager *smapi, struct st_context_iface *stctx, diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_st.h b/src/gallium/state_trackers/egl/common/egl_g3d_st.h index ee53799b02..aa25cc042d 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d_st.h +++ b/src/gallium/state_trackers/egl/common/egl_g3d_st.h @@ -33,12 +33,6 @@ #include "state_tracker/st_api.h" #include "egltypedefs.h" -void -egl_g3d_init_st_apis(struct st_api *stapis[ST_API_COUNT]); - -void -egl_g3d_destroy_st_apis(void); - struct st_manager * egl_g3d_create_st_manager(_EGLDisplay *dpy); diff --git a/src/gallium/state_trackers/egl/common/native.h b/src/gallium/state_trackers/egl/common/native.h index 3f60348c48..9f34c517ef 100644 --- a/src/gallium/state_trackers/egl/common/native.h +++ b/src/gallium/state_trackers/egl/common/native.h @@ -32,8 +32,8 @@ #include "pipe/p_screen.h" #include "pipe/p_context.h" #include "pipe/p_state.h" +#include "state_tracker/sw_winsys.h" -#include "native_probe.h" #include "native_modeset.h" /** @@ -196,6 +196,11 @@ struct native_event_handler { void (*invalid_surface)(struct native_display *ndpy, struct native_surface *nsurf, unsigned int seq_num); + + struct pipe_screen *(*new_drm_screen)(struct native_display *ndpy, + const char *name, int fd); + struct pipe_screen *(*new_sw_screen)(struct native_display *ndpy, + struct sw_winsys *ws); }; /** @@ -207,11 +212,24 @@ native_attachment_mask_test(uint mask, enum native_attachment att) return !!(mask & (1 << att)); } -const char * -native_get_name(void); +struct native_platform { + const char *name; + + struct native_display *(*create_display)(void *dpy, + struct native_event_handler *handler, + void *user_data); +}; + +const struct native_platform * +native_get_gdi_platform(void); + +const struct native_platform * +native_get_x11_platform(void); + +const struct native_platform * +native_get_kms_platform(void); -struct native_display * -native_create_display(EGLNativeDisplayType dpy, - struct native_event_handler *handler); +const struct native_platform * +native_get_fbdev_platform(void); #endif /* _NATIVE_H_ */ diff --git a/src/gallium/state_trackers/egl/common/native_helper.c b/src/gallium/state_trackers/egl/common/native_helper.c index 206817ed66..7832b2b693 100644 --- a/src/gallium/state_trackers/egl/common/native_helper.c +++ b/src/gallium/state_trackers/egl/common/native_helper.c @@ -31,9 +31,6 @@ #include "pipe/p_screen.h" #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "softpipe/sp_public.h" -#include "llvmpipe/lp_public.h" -#include "target-helpers/wrap_screen.h" #include "native_helper.h" @@ -236,18 +233,3 @@ resource_surface_present(struct resource_surface *rsurf, return TRUE; } - -struct pipe_screen * -native_create_sw_screen(struct sw_winsys *ws) -{ - struct pipe_screen *screen = NULL; - -#if defined(GALLIUM_LLVMPIPE) - if (!screen && !debug_get_bool_option("GALLIUM_NO_LLVM", FALSE)) - screen = llvmpipe_create_screen(ws); -#endif - if (!screen) - screen = softpipe_create_screen(ws); - - return (screen) ? gallium_wrap_screen(screen) : NULL; -} diff --git a/src/gallium/state_trackers/egl/common/native_helper.h b/src/gallium/state_trackers/egl/common/native_helper.h index bdb9629466..d1569ac3ea 100644 --- a/src/gallium/state_trackers/egl/common/native_helper.h +++ b/src/gallium/state_trackers/egl/common/native_helper.h @@ -69,6 +69,3 @@ boolean resource_surface_present(struct resource_surface *rsurf, enum native_attachment which, void *winsys_drawable_handle); - -struct pipe_screen * -native_create_sw_screen(struct sw_winsys *ws); diff --git a/src/gallium/state_trackers/egl/fbdev/native_fbdev.c b/src/gallium/state_trackers/egl/fbdev/native_fbdev.c index d70b7c6eb9..e459402076 100644 --- a/src/gallium/state_trackers/egl/fbdev/native_fbdev.c +++ b/src/gallium/state_trackers/egl/fbdev/native_fbdev.c @@ -386,8 +386,10 @@ fbdev_display_init(struct native_display *ndpy) return FALSE; ws = fbdev_create_sw_winsys(fbdpy->fd, fbdpy->config.color_format); - if (ws) - fbdpy->base.screen = native_create_sw_screen(ws); + if (ws) { + fbdpy->base.screen = + fbdpy->event_handler->new_sw_screen(&fbdpy->base, ws); + } if (fbdpy->base.screen) { if (!fbdpy->base.screen->is_format_supported(fbdpy->base.screen, @@ -402,7 +404,8 @@ fbdev_display_init(struct native_display *ndpy) } static struct native_display * -fbdev_display_create(int fd, struct native_event_handler *event_handler) +fbdev_display_create(int fd, struct native_event_handler *event_handler, + void *user_data) { struct fbdev_display *fbdpy; @@ -412,6 +415,7 @@ fbdev_display_create(int fd, struct native_event_handler *event_handler) fbdpy->fd = fd; fbdpy->event_handler = event_handler; + fbdpy->base.user_data = user_data; if (!fbdev_display_init(&fbdpy->base)) { FREE(fbdpy); @@ -427,44 +431,37 @@ fbdev_display_create(int fd, struct native_event_handler *event_handler) return &fbdpy->base; } -struct native_probe * -native_create_probe(EGLNativeDisplayType dpy) -{ - return NULL; -} - -enum native_probe_result -native_get_probe_result(struct native_probe *nprobe) -{ - return NATIVE_PROBE_UNKNOWN; -} - -const char * -native_get_name(void) -{ - return "FBDEV"; -} - -struct native_display * -native_create_display(EGLNativeDisplayType dpy, - struct native_event_handler *event_handler) +static struct native_display * +native_create_display(void *dpy, struct native_event_handler *event_handler, + void *user_data) { struct native_display *ndpy; int fd; /* well, this makes fd 0 being ignored */ - if (dpy == EGL_DEFAULT_DISPLAY) { + if (!dpy) { fd = open("/dev/fb0", O_RDWR); } else { - fd = dup((int) pointer_to_intptr((void *) dpy)); + fd = dup((int) pointer_to_intptr(dpy)); } if (fd < 0) return NULL; - ndpy = fbdev_display_create(fd, event_handler); + ndpy = fbdev_display_create(fd, event_handler, user_data); if (!ndpy) close(fd); return ndpy; } + +static const struct native_platform fbdev_platform = { + "FBDEV", /* name */ + native_create_display +}; + +const struct native_platform * +native_get_fbdev_platform(void) +{ + return &fbdev_platform; +} diff --git a/src/gallium/state_trackers/egl/gdi/native_gdi.c b/src/gallium/state_trackers/egl/gdi/native_gdi.c index 1791d198d5..91701e5b7d 100644 --- a/src/gallium/state_trackers/egl/gdi/native_gdi.c +++ b/src/gallium/state_trackers/egl/gdi/native_gdi.c @@ -343,10 +343,11 @@ gdi_display_destroy(struct native_display *ndpy) } static struct native_display * -gdi_create_display(HDC hDC, struct pipe_screen *screen, - struct native_event_handler *event_handler) +gdi_create_display(HDC hDC, struct native_event_handler *event_handler, + void *user_data) { struct gdi_display *gdpy; + struct sw_winsys *winsys; gdpy = CALLOC_STRUCT(gdi_display); if (!gdpy) @@ -354,8 +355,21 @@ gdi_create_display(HDC hDC, struct pipe_screen *screen, gdpy->hDC = hDC; gdpy->event_handler = event_handler; + gdpy->base.user_data = user_data; - gdpy->base.screen = screen; + winsys = gdi_create_sw_winsys(); + if (!winsys) { + FREE(gdpy); + return NULL; + } + + gdpy->base.screen = gdpy->event_handler->new_sw_screen(&gdpy->base, winsys); + if (!gdpy->base.screen) { + if (winsys->destroy) + winsys->destroy(winsys); + FREE(gdpy); + return NULL; + } gdpy->base.destroy = gdi_display_destroy; gdpy->base.get_param = gdi_display_get_param; @@ -366,41 +380,20 @@ gdi_create_display(HDC hDC, struct pipe_screen *screen, return &gdpy->base; } -struct native_probe * -native_create_probe(EGLNativeDisplayType dpy) -{ - return NULL; -} - -enum native_probe_result -native_get_probe_result(struct native_probe *nprobe) +static struct native_display * +native_create_display(void *dpy, struct native_event_handler *event_handler, + void *user_data) { - return NATIVE_PROBE_UNKNOWN; + return gdi_create_display((HDC) dpy, event_handler, user_data); } -const char * -native_get_name(void) -{ - return "GDI"; -} +static const struct native_platform gdi_platform = { + "GDI", /* name */ + native_create_display +}; -struct native_display * -native_create_display(EGLNativeDisplayType dpy, - struct native_event_handler *event_handler) +const struct native_platform * +native_get_gdi_platform(void) { - struct sw_winsys *winsys; - struct pipe_screen *screen; - - winsys = gdi_create_sw_winsys(); - if (!winsys) - return NULL; - - screen = native_create_sw_screen(winsys); - if (!screen) { - if (winsys->destroy) - winsys->destroy(winsys); - return NULL; - } - - return gdi_create_display((HDC) dpy, screen, event_handler); + return &gdi_platform; } diff --git a/src/gallium/state_trackers/egl/kms/native_kms.c b/src/gallium/state_trackers/egl/kms/native_kms.c index bfb4a9d258..d4e8fbc913 100644 --- a/src/gallium/state_trackers/egl/kms/native_kms.c +++ b/src/gallium/state_trackers/egl/kms/native_kms.c @@ -23,6 +23,10 @@ * DEALINGS IN THE SOFTWARE. */ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + #include "pipe/p_screen.h" #include "pipe/p_context.h" #include "util/u_debug.h" @@ -655,10 +659,8 @@ kms_display_destroy(struct native_display *ndpy) kdpy->base.screen->destroy(kdpy->base.screen); if (kdpy->fd >= 0) - drmClose(kdpy->fd); + close(kdpy->fd); - if (kdpy->api && kdpy->api->destroy) - kdpy->api->destroy(kdpy->api); FREE(kdpy); } @@ -669,50 +671,23 @@ static boolean kms_display_init_screen(struct native_display *ndpy) { struct kms_display *kdpy = kms_display(ndpy); - int fd; - - fd = kdpy->fd; - if (fd >= 0) { - drmVersionPtr version = drmGetVersion(fd); - if (!version || strcmp(version->name, kdpy->api->driver_name)) { - if (version) { - _eglLog(_EGL_WARNING, "unknown driver name %s", version->name); - drmFreeVersion(version); - } - else { - _eglLog(_EGL_WARNING, "invalid fd %d", fd); - } - - return FALSE; - } - - drmFreeVersion(version); - } - else { - fd = drmOpen(kdpy->api->driver_name, NULL); - } + drmVersionPtr version; - if (fd < 0) { - _eglLog(_EGL_WARNING, "failed to open DRM device"); + version = drmGetVersion(kdpy->fd); + if (!version) { + _eglLog(_EGL_WARNING, "invalid fd %d", kdpy->fd); return FALSE; } -#if 0 - if (drmSetMaster(fd)) { - _eglLog(_EGL_WARNING, "failed to become DRM master"); - return FALSE; - } -#endif + kdpy->base.screen = kdpy->event_handler->new_drm_screen(&kdpy->base, + version->name, kdpy->fd);; + drmFreeVersion(version); - kdpy->base.screen = kdpy->api->create_screen(kdpy->api, fd); if (!kdpy->base.screen) { _eglLog(_EGL_WARNING, "failed to create DRM screen"); - drmClose(fd); return FALSE; } - kdpy->fd = fd; - return TRUE; } @@ -725,7 +700,7 @@ static struct native_display_modeset kms_display_modeset = { static struct native_display * kms_create_display(int fd, struct native_event_handler *event_handler, - struct drm_api *api) + void *user_data) { struct kms_display *kdpy; @@ -733,16 +708,10 @@ kms_create_display(int fd, struct native_event_handler *event_handler, if (!kdpy) return NULL; + kdpy->fd = fd; kdpy->event_handler = event_handler; + kdpy->base.user_data = user_data; - kdpy->api = api; - if (!kdpy->api) { - _eglLog(_EGL_WARNING, "failed to create DRM API"); - FREE(kdpy); - return NULL; - } - - kdpy->fd = fd; if (!kms_display_init_screen(&kdpy->base)) { kms_display_destroy(&kdpy->base); return NULL; @@ -778,53 +747,31 @@ kms_create_display(int fd, struct native_event_handler *event_handler, return &kdpy->base; } -struct native_probe * -native_create_probe(EGLNativeDisplayType dpy) -{ - return NULL; -} - -enum native_probe_result -native_get_probe_result(struct native_probe *nprobe) -{ - return NATIVE_PROBE_UNKNOWN; -} - -/* the api is destroyed with the native display */ -static struct drm_api *drm_api; - -const char * -native_get_name(void) +static struct native_display * +native_create_display(void *dpy, struct native_event_handler *event_handler, + void *user_data) { - static char kms_name[32]; - - if (!drm_api) - drm_api = drm_api_create(); + int fd; - if (drm_api) - util_snprintf(kms_name, sizeof(kms_name), "KMS/%s", drm_api->name); - else - util_snprintf(kms_name, sizeof(kms_name), "KMS"); + if (dpy) { + fd = dup((int) pointer_to_intptr(dpy)); + } + else { + fd = open("/dev/dri/card0", O_RDWR); + } + if (fd < 0) + return NULL; - return kms_name; + return kms_create_display(fd, event_handler, user_data); } -struct native_display * -native_create_display(EGLNativeDisplayType dpy, - struct native_event_handler *event_handler) -{ - struct native_display *ndpy = NULL; - int fd; - - if (!drm_api) - drm_api = drm_api_create(); - - if (drm_api) { - /* well, this makes fd 0 being ignored */ - fd = (dpy != EGL_DEFAULT_DISPLAY) ? - (int) pointer_to_intptr((void *) dpy) : -1; - ndpy = kms_create_display(fd, event_handler, drm_api); - } +static const struct native_platform kms_platform = { + "KMS", /* name */ + native_create_display +}; - return ndpy; +const struct native_platform * +native_get_kms_platform(void) +{ + return &kms_platform; } diff --git a/src/gallium/state_trackers/egl/kms/native_kms.h b/src/gallium/state_trackers/egl/kms/native_kms.h index d69c8d38c8..cd8e4ff0b2 100644 --- a/src/gallium/state_trackers/egl/kms/native_kms.h +++ b/src/gallium/state_trackers/egl/kms/native_kms.h @@ -32,7 +32,7 @@ #include "pipe/p_compiler.h" #include "util/u_format.h" #include "pipe/p_state.h" -#include "state_tracker/drm_api.h" +#include "state_tracker/drm_driver.h" #include "common/native.h" #include "common/native_helper.h" @@ -53,7 +53,6 @@ struct kms_display { struct native_event_handler *event_handler; int fd; - struct drm_api *api; drmModeResPtr resources; struct kms_config *config; diff --git a/src/gallium/state_trackers/egl/x11/glxinit.c b/src/gallium/state_trackers/egl/x11/glxinit.c index 1ed2afd345..809a0987e5 100644 --- a/src/gallium/state_trackers/egl/x11/glxinit.c +++ b/src/gallium/state_trackers/egl/x11/glxinit.c @@ -16,6 +16,8 @@ #include "glxinit.h" +#ifdef GLX_DIRECT_RENDERING + typedef struct GLXGenericGetString { CARD8 reqType; @@ -183,9 +185,11 @@ FreeScreenConfigs(__GLXdisplayPrivate * priv) GLint i, screens; /* Free screen configuration information */ - psc = priv->screenConfigs; screens = ScreenCount(priv->dpy); - for (i = 0; i < screens; i++, psc++) { + for (i = 0; i < screens; i++) { + psc = priv->screenConfigs[i]; + if (!psc) + continue; if (psc->configs) { _gl_context_modes_destroy(psc->configs); psc->configs = NULL; /* NOTE: just for paranoia */ @@ -502,15 +506,15 @@ createConfigsFromProperties(Display * dpy, int nvisuals, int nprops, } static GLboolean -getVisualConfigs(Display * dpy, __GLXdisplayPrivate * priv, int screen) +getVisualConfigs(__GLXscreenConfigs *psc, + __GLXdisplayPrivate *priv, int screen) { xGLXGetVisualConfigsReq *req; - __GLXscreenConfigs *psc; xGLXGetVisualConfigsReply reply; + Display *dpy = priv->dpy; LockDisplay(dpy); - psc = priv->screenConfigs + screen; psc->visuals = NULL; GetReq(GLXGetVisualConfigs, req); req->reqType = priv->majorOpcode; @@ -531,15 +535,14 @@ getVisualConfigs(Display * dpy, __GLXdisplayPrivate * priv, int screen) } static GLboolean -getFBConfigs(Display * dpy, __GLXdisplayPrivate * priv, int screen) +getFBConfigs(__GLXscreenConfigs *psc, __GLXdisplayPrivate *priv, int screen) { xGLXGetFBConfigsReq *fb_req; xGLXGetFBConfigsSGIXReq *sgi_req; xGLXVendorPrivateWithReplyReq *vpreq; xGLXGetFBConfigsReply reply; - __GLXscreenConfigs *psc; + Display *dpy = priv->dpy; - psc = priv->screenConfigs + screen; psc->serverGLXexts = __glXQueryServerString(dpy, priv->majorOpcode, screen, GLX_EXTENSIONS); @@ -578,6 +581,32 @@ getFBConfigs(Display * dpy, __GLXdisplayPrivate * priv, int screen) return psc->configs != NULL; } +_X_HIDDEN Bool +glx_screen_init(__GLXscreenConfigs *psc, + int screen, __GLXdisplayPrivate * priv) +{ + /* Initialize per screen dynamic client GLX extensions */ + psc->ext_list_first_time = GL_TRUE; + psc->scr = screen; + psc->dpy = priv->dpy; + + getVisualConfigs(psc, priv, screen); + getFBConfigs(psc, priv, screen); + + return GL_TRUE; +} + +static __GLXscreenConfigs * +createIndirectScreen() +{ + __GLXscreenConfigs *psc; + + psc = Xmalloc(sizeof *psc); + memset(psc, 0, sizeof *psc); + + return psc; +} + static GLboolean AllocAndFetchScreenConfigs(Display * dpy, __GLXdisplayPrivate * priv) { @@ -588,12 +617,10 @@ AllocAndFetchScreenConfigs(Display * dpy, __GLXdisplayPrivate * priv) ** First allocate memory for the array of per screen configs. */ screens = ScreenCount(dpy); - psc = (__GLXscreenConfigs *) Xmalloc(screens * sizeof(__GLXscreenConfigs)); - if (!psc) { + priv->screenConfigs = Xmalloc(screens * sizeof *priv->screenConfigs); + if (!priv->screenConfigs) { return GL_FALSE; } - memset(psc, 0, screens * sizeof(__GLXscreenConfigs)); - priv->screenConfigs = psc; priv->serverGLXversion = __glXQueryServerString(dpy, priv->majorOpcode, 0, GLX_VERSION); @@ -602,11 +629,12 @@ AllocAndFetchScreenConfigs(Display * dpy, __GLXdisplayPrivate * priv) return GL_FALSE; } - for (i = 0; i < screens; i++, psc++) { - getFBConfigs(dpy, priv, i); - getVisualConfigs(dpy, priv, i); - psc->scr = i; - psc->dpy = dpy; + for (i = 0; i < screens; i++) { + psc = createIndirectScreen(); + if (!psc) + return GL_FALSE; + glx_screen_init(psc, i, priv); + priv->screenConfigs[i] = psc; } SyncHandle(); @@ -680,3 +708,5 @@ __glXInitialize(Display * dpy) return dpyPriv; } + +#endif /* GLX_DIRECT_RENDERING */ diff --git a/src/gallium/state_trackers/egl/x11/native_dri2.c b/src/gallium/state_trackers/egl/x11/native_dri2.c index 3f802dd713..1be1e42468 100644 --- a/src/gallium/state_trackers/egl/x11/native_dri2.c +++ b/src/gallium/state_trackers/egl/x11/native_dri2.c @@ -32,12 +32,14 @@ #include "pipe/p_screen.h" #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "state_tracker/drm_api.h" +#include "state_tracker/drm_driver.h" #include "egllog.h" #include "native_x11.h" #include "x11_screen.h" +#ifdef GLX_DIRECT_RENDERING + enum dri2_surface_type { DRI2_SURFACE_TYPE_WINDOW, DRI2_SURFACE_TYPE_PIXMAP, @@ -50,7 +52,6 @@ struct dri2_display { struct native_event_handler *event_handler; - struct drm_api *api; struct x11_screen *xscr; int xscr_number; const char *dri_driver; @@ -662,8 +663,6 @@ dri2_display_destroy(struct native_display *ndpy) x11_screen_destroy(dri2dpy->xscr); if (dri2dpy->own_dpy) XCloseDisplay(dri2dpy->dpy); - if (dri2dpy->api && dri2dpy->api->destroy) - dri2dpy->api->destroy(dri2dpy->api); FREE(dri2dpy); } @@ -695,7 +694,6 @@ static boolean dri2_display_init_screen(struct native_display *ndpy) { struct dri2_display *dri2dpy = dri2_display(ndpy); - const char *driver = dri2dpy->api->name; int fd; if (!x11_screen_support(dri2dpy->xscr, X11_SCREEN_EXTENSION_DRI2) || @@ -706,19 +704,15 @@ dri2_display_init_screen(struct native_display *ndpy) dri2dpy->dri_driver = x11_screen_probe_dri2(dri2dpy->xscr, &dri2dpy->dri_major, &dri2dpy->dri_minor); - if (!dri2dpy->dri_driver || !driver || - strcmp(dri2dpy->dri_driver, driver) != 0) { - _eglLog(_EGL_WARNING, "Driver mismatch: %s != %s", - dri2dpy->dri_driver, dri2dpy->api->name); - return FALSE; - } fd = x11_screen_enable_dri2(dri2dpy->xscr, dri2_display_invalidate_buffers, &dri2dpy->base); if (fd < 0) return FALSE; - dri2dpy->base.screen = dri2dpy->api->create_screen(dri2dpy->api, fd); + dri2dpy->base.screen = + dri2dpy->event_handler->new_drm_screen(&dri2dpy->base, + dri2dpy->dri_driver, fd); if (!dri2dpy->base.screen) { _eglLog(_EGL_WARNING, "failed to create DRM screen"); return FALSE; @@ -741,9 +735,9 @@ dri2_display_hash_table_compare(void *key1, void *key2) } struct native_display * -x11_create_dri2_display(EGLNativeDisplayType dpy, +x11_create_dri2_display(Display *dpy, struct native_event_handler *event_handler, - struct drm_api *api) + void *user_data) { struct dri2_display *dri2dpy; @@ -752,7 +746,7 @@ x11_create_dri2_display(EGLNativeDisplayType dpy, return NULL; dri2dpy->event_handler = event_handler; - dri2dpy->api = api; + dri2dpy->base.user_data = user_data; dri2dpy->dpy = dpy; if (!dri2dpy->dpy) { @@ -792,3 +786,15 @@ x11_create_dri2_display(EGLNativeDisplayType dpy, return &dri2dpy->base; } + +#else /* GLX_DIRECT_RENDERING */ + +struct native_display * +x11_create_dri2_display(Display *dpy, + struct native_event_handler *event_handler, + void *user_data) +{ + return NULL; +} + +#endif /* GLX_DIRECT_RENDERING */ diff --git a/src/gallium/state_trackers/egl/x11/native_x11.c b/src/gallium/state_trackers/egl/x11/native_x11.c index b6d51bbf9f..37c8b01541 100644 --- a/src/gallium/state_trackers/egl/x11/native_x11.c +++ b/src/gallium/state_trackers/egl/x11/native_x11.c @@ -23,130 +23,44 @@ * DEALINGS IN THE SOFTWARE. */ -#include <string.h> #include "util/u_debug.h" #include "util/u_memory.h" #include "util/u_string.h" -#include "state_tracker/drm_api.h" #include "egllog.h" #include "native_x11.h" -#include "x11_screen.h" -#define X11_PROBE_MAGIC 0x11980BE /* "X11PROBE" */ - -static struct drm_api *api; - -static void -x11_probe_destroy(struct native_probe *nprobe) -{ - if (nprobe->data) - FREE(nprobe->data); - FREE(nprobe); -} - -struct native_probe * -native_create_probe(EGLNativeDisplayType dpy) -{ - struct native_probe *nprobe; - struct x11_screen *xscr; - int scr; - const char *driver_name = NULL; - Display *xdpy; - - nprobe = CALLOC_STRUCT(native_probe); - if (!nprobe) - return NULL; - - xdpy = dpy; - if (!xdpy) { - xdpy = XOpenDisplay(NULL); - if (!xdpy) { - FREE(nprobe); - return NULL; - } - } - - scr = DefaultScreen(xdpy); - xscr = x11_screen_create(xdpy, scr); - if (xscr) { - if (x11_screen_support(xscr, X11_SCREEN_EXTENSION_DRI2)) { - driver_name = x11_screen_probe_dri2(xscr, NULL, NULL); - if (driver_name) - nprobe->data = strdup(driver_name); - } - - x11_screen_destroy(xscr); - } - - if (xdpy != dpy) - XCloseDisplay(xdpy); - - nprobe->magic = X11_PROBE_MAGIC; - nprobe->display = dpy; - - nprobe->destroy = x11_probe_destroy; - - return nprobe; -} - -enum native_probe_result -native_get_probe_result(struct native_probe *nprobe) -{ - if (!nprobe || nprobe->magic != X11_PROBE_MAGIC) - return NATIVE_PROBE_UNKNOWN; - - if (!api) - api = drm_api_create(); - - /* this is a software driver */ - if (!api) - return NATIVE_PROBE_SUPPORTED; - - /* the display does not support DRI2 or the driver mismatches */ - if (!nprobe->data || strcmp(api->name, (const char *) nprobe->data) != 0) - return NATIVE_PROBE_FALLBACK; - - return NATIVE_PROBE_EXACT; -} - -const char * -native_get_name(void) -{ - static char x11_name[32]; - - if (!api) - api = drm_api_create(); - - if (api) - util_snprintf(x11_name, sizeof(x11_name), "X11/%s", api->name); - else - util_snprintf(x11_name, sizeof(x11_name), "X11"); - - return x11_name; -} - -struct native_display * -native_create_display(EGLNativeDisplayType dpy, - struct native_event_handler *event_handler) +static struct native_display * +native_create_display(void *dpy, struct native_event_handler *event_handler, + void *user_data) { struct native_display *ndpy = NULL; boolean force_sw; - if (!api) - api = drm_api_create(); - force_sw = debug_get_bool_option("EGL_SOFTWARE", FALSE); - if (api && !force_sw) { - ndpy = x11_create_dri2_display(dpy, event_handler, api); + if (!force_sw) { + ndpy = x11_create_dri2_display((Display *) dpy, + event_handler, user_data); } if (!ndpy) { EGLint level = (force_sw) ? _EGL_INFO : _EGL_WARNING; _eglLog(level, "use software fallback"); - ndpy = x11_create_ximage_display(dpy, event_handler); + ndpy = x11_create_ximage_display((Display *) dpy, + event_handler, user_data); } return ndpy; } + +static const struct native_platform x11_platform = { + "X11", /* name */ + native_create_display +}; + +const struct native_platform * +native_get_x11_platform(void) +{ + return &x11_platform; +} diff --git a/src/gallium/state_trackers/egl/x11/native_x11.h b/src/gallium/state_trackers/egl/x11/native_x11.h index 1678403b45..0b47837e1b 100644 --- a/src/gallium/state_trackers/egl/x11/native_x11.h +++ b/src/gallium/state_trackers/egl/x11/native_x11.h @@ -26,16 +26,16 @@ #ifndef _NATIVE_X11_H_ #define _NATIVE_X11_H_ -#include "state_tracker/drm_api.h" #include "common/native.h" struct native_display * -x11_create_ximage_display(EGLNativeDisplayType dpy, - struct native_event_handler *event_handler); +x11_create_ximage_display(Display *dpy, + struct native_event_handler *event_handler, + void *user_data); struct native_display * -x11_create_dri2_display(EGLNativeDisplayType dpy, +x11_create_dri2_display(Display *dpy, struct native_event_handler *event_handler, - struct drm_api *api); + void *user_data); #endif /* _NATIVE_X11_H_ */ diff --git a/src/gallium/state_trackers/egl/x11/native_ximage.c b/src/gallium/state_trackers/egl/x11/native_ximage.c index 45679fc9b4..4b32f6e36e 100644 --- a/src/gallium/state_trackers/egl/x11/native_ximage.c +++ b/src/gallium/state_trackers/egl/x11/native_ximage.c @@ -441,8 +441,9 @@ ximage_display_destroy(struct native_display *ndpy) } struct native_display * -x11_create_ximage_display(EGLNativeDisplayType dpy, - struct native_event_handler *event_handler) +x11_create_ximage_display(Display *dpy, + struct native_event_handler *event_handler, + void *user_data) { struct ximage_display *xdpy; struct sw_winsys *winsys = NULL; @@ -462,6 +463,7 @@ x11_create_ximage_display(EGLNativeDisplayType dpy, } xdpy->event_handler = event_handler; + xdpy->base.user_data = user_data; xdpy->xscr_number = DefaultScreen(xdpy->dpy); xdpy->xscr = x11_screen_create(xdpy->dpy, xdpy->xscr_number); @@ -472,7 +474,8 @@ x11_create_ximage_display(EGLNativeDisplayType dpy, if (!winsys) goto fail; - xdpy->base.screen = native_create_sw_screen(winsys); + xdpy->base.screen = + xdpy->event_handler->new_sw_screen(&xdpy->base, winsys); if (!xdpy->base.screen) goto fail; diff --git a/src/gallium/state_trackers/egl/x11/x11_screen.c b/src/gallium/state_trackers/egl/x11/x11_screen.c index 6bdff26ec0..bc6482ab15 100644 --- a/src/gallium/state_trackers/egl/x11/x11_screen.c +++ b/src/gallium/state_trackers/egl/x11/x11_screen.c @@ -39,8 +39,10 @@ #include "glxinit.h" struct x11_screen { +#ifdef GLX_DIRECT_RENDERING /* dummy base class */ struct __GLXDRIdisplayRec base; +#endif Display *dpy; int number; @@ -103,15 +105,19 @@ x11_screen_destroy(struct x11_screen *xscr) if (xscr->dri_device) Xfree(xscr->dri_device); +#ifdef GLX_DIRECT_RENDERING /* xscr->glx_dpy will be destroyed with the X display */ if (xscr->glx_dpy) xscr->glx_dpy->dri2Display = NULL; +#endif if (xscr->visuals) XFree(xscr->visuals); FREE(xscr); } +#ifdef GLX_DIRECT_RENDERING + static boolean x11_screen_init_dri2(struct x11_screen *xscr) { @@ -133,6 +139,8 @@ x11_screen_init_glx(struct x11_screen *xscr) return (xscr->glx_dpy != NULL); } +#endif /* GLX_DIRECT_RENDERING */ + /** * Return true if the screen supports the extension. */ @@ -145,12 +153,14 @@ x11_screen_support(struct x11_screen *xscr, enum x11_screen_extension ext) case X11_SCREEN_EXTENSION_XSHM: supported = XShmQueryExtension(xscr->dpy); break; +#ifdef GLX_DIRECT_RENDERING case X11_SCREEN_EXTENSION_GLX: supported = x11_screen_init_glx(xscr); break; case X11_SCREEN_EXTENSION_DRI2: supported = x11_screen_init_dri2(xscr); break; +#endif default: break; } @@ -177,13 +187,46 @@ x11_screen_get_visuals(struct x11_screen *xscr, int *num_visuals) } /** + * Return the depth of a drawable. + * + * Unlike other drawable functions, the drawable needs not be a DRI2 drawable. + */ +uint +x11_drawable_get_depth(struct x11_screen *xscr, Drawable drawable) +{ + unsigned int depth; + + if (drawable != xscr->last_drawable) { + Window root; + int x, y; + unsigned int w, h, border; + Status ok; + + ok = XGetGeometry(xscr->dpy, drawable, &root, + &x, &y, &w, &h, &border, &depth); + if (!ok) + depth = 0; + + xscr->last_drawable = drawable; + xscr->last_depth = depth; + } + else { + depth = xscr->last_depth; + } + + return depth; +} + +#ifdef GLX_DIRECT_RENDERING + +/** * Return the GLX fbconfigs. */ const __GLcontextModes * x11_screen_get_glx_configs(struct x11_screen *xscr) { return (x11_screen_init_glx(xscr)) - ? xscr->glx_dpy->screenConfigs[xscr->number].configs + ? xscr->glx_dpy->screenConfigs[xscr->number]->configs : NULL; } @@ -194,7 +237,7 @@ const __GLcontextModes * x11_screen_get_glx_visuals(struct x11_screen *xscr) { return (x11_screen_init_glx(xscr)) - ? xscr->glx_dpy->screenConfigs[xscr->number].visuals + ? xscr->glx_dpy->screenConfigs[xscr->number]->visuals : NULL; } @@ -335,37 +378,6 @@ x11_drawable_get_buffers(struct x11_screen *xscr, Drawable drawable, } /** - * Return the depth of a drawable. - * - * Unlike other drawable functions, the drawable needs not be a DRI2 drawable. - */ -uint -x11_drawable_get_depth(struct x11_screen *xscr, Drawable drawable) -{ - unsigned int depth; - - if (drawable != xscr->last_drawable) { - Window root; - int x, y; - unsigned int w, h, border; - Status ok; - - ok = XGetGeometry(xscr->dpy, drawable, &root, - &x, &y, &w, &h, &border, &depth); - if (!ok) - depth = 0; - - xscr->last_drawable = drawable; - xscr->last_depth = depth; - } - else { - depth = xscr->last_depth; - } - - return depth; -} - -/** * Create a mode list of the given size. */ __GLcontextModes * @@ -432,3 +444,5 @@ dri2InvalidateBuffers(Display *dpy, XID drawable) xscr->dri_invalidate_buffers(xscr, drawable, xscr->dri_user_data); } + +#endif /* GLX_DIRECT_RENDERING */ diff --git a/src/gallium/state_trackers/egl/x11/x11_screen.h b/src/gallium/state_trackers/egl/x11/x11_screen.h index a3c5ee1491..bc0ef69ec6 100644 --- a/src/gallium/state_trackers/egl/x11/x11_screen.h +++ b/src/gallium/state_trackers/egl/x11/x11_screen.h @@ -67,20 +67,18 @@ x11_screen_support(struct x11_screen *xscr, enum x11_screen_extension ext); const XVisualInfo * x11_screen_get_visuals(struct x11_screen *xscr, int *num_visuals); +uint +x11_drawable_get_depth(struct x11_screen *xscr, Drawable drawable); + +#ifdef GLX_DIRECT_RENDERING + +/* GLX */ const __GLcontextModes * x11_screen_get_glx_configs(struct x11_screen *xscr); const __GLcontextModes * x11_screen_get_glx_visuals(struct x11_screen *xscr); -const char * -x11_screen_probe_dri2(struct x11_screen *xscr, int *major, int *minor); - -int -x11_screen_enable_dri2(struct x11_screen *xscr, - x11_drawable_invalidate_buffers invalidate_buffers, - void *user_data); - __GLcontextModes * x11_context_modes_create(unsigned count); @@ -90,6 +88,15 @@ x11_context_modes_destroy(__GLcontextModes *modes); unsigned x11_context_modes_count(const __GLcontextModes *modes); +/* DRI2 */ +const char * +x11_screen_probe_dri2(struct x11_screen *xscr, int *major, int *minor); + +int +x11_screen_enable_dri2(struct x11_screen *xscr, + x11_drawable_invalidate_buffers invalidate_buffers, + void *user_data); + void x11_drawable_enable_dri2(struct x11_screen *xscr, Drawable drawable, boolean on); @@ -104,7 +111,6 @@ x11_drawable_get_buffers(struct x11_screen *xscr, Drawable drawable, int *width, int *height, unsigned int *attachments, boolean with_format, int num_ins, int *num_outs); -uint -x11_drawable_get_depth(struct x11_screen *xscr, Drawable drawable); +#endif /* GLX_DIRECT_RENDERING */ #endif /* _X11_SCREEN_H_ */ diff --git a/src/gallium/state_trackers/vega/image.c b/src/gallium/state_trackers/vega/image.c index 7c421dfcd4..c12dc71b86 100644 --- a/src/gallium/state_trackers/vega/image.c +++ b/src/gallium/state_trackers/vega/image.c @@ -355,7 +355,7 @@ void image_destroy(struct vg_image *img) } pipe_sampler_view_reference(&img->sampler_view, NULL); - free(img); + FREE(img); } void image_clear(struct vg_image *img, diff --git a/src/gallium/state_trackers/vega/mask.c b/src/gallium/state_trackers/vega/mask.c index 6d627b0e8d..ef28ebd740 100644 --- a/src/gallium/state_trackers/vega/mask.c +++ b/src/gallium/state_trackers/vega/mask.c @@ -520,7 +520,7 @@ void mask_layer_destroy(struct vg_mask_layer *layer) vg_context_remove_object(ctx, VG_OBJECT_MASK, layer); pipe_resource_release(&layer->texture); - free(layer); + FREE(layer); } void mask_layer_fill(struct vg_mask_layer *layer, diff --git a/src/gallium/state_trackers/vega/paint.c b/src/gallium/state_trackers/vega/paint.c index 05540e8275..2c0eb6b23d 100644 --- a/src/gallium/state_trackers/vega/paint.c +++ b/src/gallium/state_trackers/vega/paint.c @@ -236,7 +236,7 @@ void paint_destroy(struct vg_paint *paint) free(paint->gradient.ramp_stopsi); free(paint->gradient.ramp_stops); - free(paint); + FREE(paint); } void paint_set_color(struct vg_paint *paint, diff --git a/src/gallium/state_trackers/vega/path.c b/src/gallium/state_trackers/vega/path.c index 4fc23a7a27..05f8b0d997 100644 --- a/src/gallium/state_trackers/vega/path.c +++ b/src/gallium/state_trackers/vega/path.c @@ -218,7 +218,7 @@ void path_destroy(struct path *p) if (p->stroked.path) path_destroy(p->stroked.path); - free(p); + FREE(p); } VGbitfield path_capabilities(struct path *p) diff --git a/src/gallium/state_trackers/vega/renderer.c b/src/gallium/state_trackers/vega/renderer.c index c40ea8675e..8c023044c4 100644 --- a/src/gallium/state_trackers/vega/renderer.c +++ b/src/gallium/state_trackers/vega/renderer.c @@ -203,7 +203,7 @@ void renderer_destroy(struct renderer *ctx) ctx->fs = NULL; } #endif - free(ctx); + FREE(ctx); } void renderer_draw_quad(struct renderer *r, diff --git a/src/gallium/state_trackers/vega/shader.c b/src/gallium/state_trackers/vega/shader.c index 6eef94ce76..eab1349639 100644 --- a/src/gallium/state_trackers/vega/shader.c +++ b/src/gallium/state_trackers/vega/shader.c @@ -68,7 +68,7 @@ struct shader * shader_create(struct vg_context *ctx) void shader_destroy(struct shader *shader) { - free(shader); + FREE(shader); } void shader_set_masking(struct shader *shader, VGboolean set) diff --git a/src/gallium/state_trackers/vega/shaders_cache.c b/src/gallium/state_trackers/vega/shaders_cache.c index f43fe6ee4c..53e6bfcf16 100644 --- a/src/gallium/state_trackers/vega/shaders_cache.c +++ b/src/gallium/state_trackers/vega/shaders_cache.c @@ -381,7 +381,7 @@ void shaders_cache_destroy(struct shaders_cache *sc) } cso_hash_delete(sc->hash); - free(sc); + FREE(sc); } void * shaders_cache_fill(struct shaders_cache *sc, @@ -410,7 +410,7 @@ struct vg_shader * shader_create_from_text(struct pipe_context *pipe, const char *txt, int num_tokens, int type) { - struct vg_shader *shader = (struct vg_shader *)malloc( + struct vg_shader *shader = (struct vg_shader *)MALLOC( sizeof(struct vg_shader)); struct tgsi_token *tokens = tokens_from_assembly(txt, num_tokens); struct pipe_shader_state state; @@ -435,6 +435,6 @@ void vg_shader_destroy(struct vg_context *ctx, struct vg_shader *shader) cso_delete_fragment_shader(ctx->cso_context, shader->driver); else cso_delete_vertex_shader(ctx->cso_context, shader->driver); - free(shader->tokens); - free(shader); + FREE(shader->tokens); + FREE(shader); } diff --git a/src/gallium/state_trackers/vega/vg_context.c b/src/gallium/state_trackers/vega/vg_context.c index f02db8949d..5cb2590602 100644 --- a/src/gallium/state_trackers/vega/vg_context.c +++ b/src/gallium/state_trackers/vega/vg_context.c @@ -65,6 +65,32 @@ static void init_clear(struct vg_context *st) st->clear.fs = util_make_fragment_passthrough_shader(pipe); } + +/** + * A depth/stencil rb will be needed regardless of what the visual says. + */ +static boolean +choose_depth_stencil_format(struct vg_context *ctx) +{ + struct pipe_screen *screen = ctx->pipe->screen; + enum pipe_format formats[] = { + PIPE_FORMAT_Z24_UNORM_S8_USCALED, + PIPE_FORMAT_S8_USCALED_Z24_UNORM, + PIPE_FORMAT_NONE + }; + enum pipe_format *fmt; + + for (fmt = formats; *fmt != PIPE_FORMAT_NONE; fmt++) { + if (screen->is_format_supported(screen, *fmt, + PIPE_TEXTURE_2D, 0, PIPE_BIND_DEPTH_STENCIL, 0)) + break; + } + + ctx->ds_format = *fmt; + + return (ctx->ds_format != PIPE_FORMAT_NONE); +} + void vg_set_current_context(struct vg_context *ctx) { _vg_context = ctx; @@ -81,6 +107,10 @@ struct vg_context * vg_create_context(struct pipe_context *pipe, ctx = CALLOC_STRUCT(vg_context); ctx->pipe = pipe; + if (!choose_depth_stencil_format(ctx)) { + FREE(ctx); + return NULL; + } ctx->dispatch = api_create_dispatch(); @@ -191,7 +221,7 @@ void vg_destroy_context(struct vg_context *ctx) api_destroy_dispatch(ctx->dispatch); - free(ctx); + FREE(ctx); } void vg_init_object(struct vg_object *obj, struct vg_context *ctx, enum vg_object_type type) diff --git a/src/gallium/state_trackers/vega/vg_context.h b/src/gallium/state_trackers/vega/vg_context.h index 7b59ad512a..80a6c07c69 100644 --- a/src/gallium/state_trackers/vega/vg_context.h +++ b/src/gallium/state_trackers/vega/vg_context.h @@ -94,6 +94,7 @@ struct vg_context struct mapi_table *dispatch; struct pipe_context *pipe; + enum pipe_format ds_format; struct { struct vg_state vg; diff --git a/src/gallium/state_trackers/vega/vg_manager.c b/src/gallium/state_trackers/vega/vg_manager.c index 3b04816df0..c2aa98b231 100644 --- a/src/gallium/state_trackers/vega/vg_manager.c +++ b/src/gallium/state_trackers/vega/vg_manager.c @@ -388,7 +388,7 @@ destroy_renderbuffer(struct st_renderbuffer *strb) { pipe_surface_reference(&strb->surface, NULL); pipe_resource_reference(&strb->texture, NULL); - free(strb); + FREE(strb); } /** @@ -448,11 +448,10 @@ vg_context_bind_framebuffers(struct st_context_iface *stctxi, /* free the existing fb */ if (!stdrawi || stfb->strb_att != strb_att || - stfb->strb->format != stdrawi->visual->color_format || - stfb->dsrb->format != stdrawi->visual->depth_stencil_format) { + stfb->strb->format != stdrawi->visual->color_format) { destroy_renderbuffer(stfb->strb); destroy_renderbuffer(stfb->dsrb); - free(stfb); + FREE(stfb); ctx->draw_buffer = NULL; } @@ -472,14 +471,14 @@ vg_context_bind_framebuffers(struct st_context_iface *stctxi, stfb->strb = create_renderbuffer(stdrawi->visual->color_format); if (!stfb->strb) { - free(stfb); + FREE(stfb); return FALSE; } - stfb->dsrb = create_renderbuffer(stdrawi->visual->depth_stencil_format); + stfb->dsrb = create_renderbuffer(ctx->ds_format); if (!stfb->dsrb) { - free(stfb->strb); - free(stfb); + FREE(stfb->strb); + FREE(stfb); return FALSE; } @@ -517,14 +516,6 @@ vg_api_get_current(struct st_api *stapi) return (ctx) ? &ctx->iface : NULL; } -static boolean -vg_api_is_visual_supported(struct st_api *stapi, - const struct st_visual *visual) -{ - /* the impl requires a depth/stencil buffer */ - return util_format_is_depth_and_stencil(visual->depth_stencil_format); -} - static st_proc_t vg_api_get_proc_address(struct st_api *stapi, const char *procname) { @@ -539,7 +530,6 @@ vg_api_destroy(struct st_api *stapi) static const struct st_api vg_api = { vg_api_destroy, vg_api_get_proc_address, - vg_api_is_visual_supported, vg_api_create_context, vg_api_make_current, vg_api_get_current, diff --git a/src/gallium/state_trackers/xorg/xorg_crtc.c b/src/gallium/state_trackers/xorg/xorg_crtc.c index f1a07bd863..26a907f205 100644 --- a/src/gallium/state_trackers/xorg/xorg_crtc.c +++ b/src/gallium/state_trackers/xorg/xorg_crtc.c @@ -50,6 +50,7 @@ #include <X11/extensions/dpms.h> #endif +#include "state_tracker/drm_driver.h" #include "util/u_inlines.h" #include "util/u_rect.h" @@ -93,7 +94,8 @@ crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode, struct crtc_private *crtcp = crtc->driver_private; drmModeCrtcPtr drm_crtc = crtcp->drm_crtc; drmModeModeInfo drm_mode; - int i, ret, connector_id; + int i, ret; + unsigned int connector_id; for (i = 0; i < config->num_output; output = NULL, i++) { output = config->output[i]; diff --git a/src/gallium/state_trackers/xorg/xorg_dri2.c b/src/gallium/state_trackers/xorg/xorg_dri2.c index 4e01bd1030..704aed6a82 100644 --- a/src/gallium/state_trackers/xorg/xorg_dri2.c +++ b/src/gallium/state_trackers/xorg/xorg_dri2.c @@ -42,6 +42,8 @@ #include "util/u_format.h" +#include "state_tracker/drm_driver.h" + /* Make all the #if cases in the code esier to read */ #ifndef DRI2INFOREC_VERSION #define DRI2INFOREC_VERSION 1 diff --git a/src/gallium/state_trackers/xorg/xorg_driver.c b/src/gallium/state_trackers/xorg/xorg_driver.c index a7e57634ae..e10ff2f950 100644 --- a/src/gallium/state_trackers/xorg/xorg_driver.c +++ b/src/gallium/state_trackers/xorg/xorg_driver.c @@ -51,6 +51,7 @@ #include <pciaccess.h> +#include "state_tracker/drm_driver.h" #include "pipe/p_context.h" #include "xorg_tracker.h" #include "xorg_winsys.h" @@ -189,6 +190,7 @@ drv_crtc_resize(ScrnInfoPtr pScrn, int width, int height) { xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(pScrn); modesettingPtr ms = modesettingPTR(pScrn); + CustomizerPtr cust = ms->cust; ScreenPtr pScreen = pScrn->pScreen; int old_width, old_height; PixmapPtr rootPixmap; @@ -197,6 +199,16 @@ drv_crtc_resize(ScrnInfoPtr pScrn, int width, int height) if (width == pScrn->virtualX && height == pScrn->virtualY) return TRUE; + if (cust && cust->winsys_check_fb_size && + !cust->winsys_check_fb_size(cust, width*pScrn->bitsPerPixel / 8, + height)) { + xf86DrvMsg(pScrn->scrnIndex, X_ERROR, + "Requested framebuffer size %dx%dx%d will not fit " + "in display memory.\n", + width, height, pScrn->bitsPerPixel); + return FALSE; + } + old_width = pScrn->virtualX; old_height = pScrn->virtualY; pScrn->virtualX = width; @@ -269,18 +281,13 @@ drv_init_drm(ScrnInfoPtr pScrn) ); - ms->api = drm_api_create(); - ms->fd = drmOpen(ms->api ? ms->api->driver_name : NULL, BusID); + ms->fd = drmOpen(driver_descriptor.driver_name, BusID); + ms->isMaster = TRUE; xfree(BusID); if (ms->fd >= 0) return TRUE; - if (ms->api && ms->api->destroy) - ms->api->destroy(ms->api); - - ms->api = NULL; - return FALSE; } @@ -288,21 +295,6 @@ drv_init_drm(ScrnInfoPtr pScrn) } static Bool -drv_close_drm(ScrnInfoPtr pScrn) -{ - modesettingPtr ms = modesettingPTR(pScrn); - - if (ms->api && ms->api->destroy) - ms->api->destroy(ms->api); - ms->api = NULL; - - drmClose(ms->fd); - ms->fd = -1; - - return TRUE; -} - -static Bool drv_init_resource_management(ScrnInfoPtr pScrn) { modesettingPtr ms = modesettingPTR(pScrn); @@ -316,18 +308,11 @@ drv_init_resource_management(ScrnInfoPtr pScrn) if (ms->screen || ms->kms) return TRUE; - if (ms->api) { - ms->screen = ms->no3D ? NULL : - ms->api->create_screen(ms->api, ms->fd); - - if (ms->screen) - return TRUE; - - if (ms->api->destroy) - ms->api->destroy(ms->api); + if (!ms->no3D) + ms->screen = driver_descriptor.create_screen(ms->fd); - ms->api = NULL; - } + if (ms->screen) + return TRUE; #ifdef HAVE_LIBKMS if (!kms_create(ms->fd, &ms->kms)) @@ -337,25 +322,6 @@ drv_init_resource_management(ScrnInfoPtr pScrn) return FALSE; } -static Bool -drv_close_resource_management(ScrnInfoPtr pScrn) -{ - modesettingPtr ms = modesettingPTR(pScrn); - - if (ms->screen) { - assert(ms->ctx == NULL); - ms->screen->destroy(ms->screen); - } - ms->screen = NULL; - -#ifdef HAVE_LIBKMS - if (ms->kms) - kms_destroy(&ms->kms); -#endif - - return TRUE; -} - static void drv_cleanup_fences(ScrnInfoPtr pScrn) { @@ -380,8 +346,8 @@ drv_pre_init(ScrnInfoPtr pScrn, int flags) rgb defaultWeight = { 0, 0, 0 }; EntityInfoPtr pEnt; EntPtr msEnt = NULL; - int max_width, max_height; CustomizerPtr cust; + Bool use3D; if (pScrn->numEntities != 1) return FALSE; @@ -433,10 +399,22 @@ drv_pre_init(ScrnInfoPtr pScrn, int flags) } ms->fd = -1; - ms->api = NULL; if (!drv_init_drm(pScrn)) return FALSE; + use3D = cust ? !cust->no_3d : TRUE; + ms->from_3D = xf86GetOptValBool(ms->Options, OPTION_3D_ACCEL, + &use3D) ? + X_CONFIG : X_PROBED; + + ms->no3D = !use3D; + + if (!drv_init_resource_management(pScrn)) { + xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Could not init " + "Gallium3D or libKMS."); + return FALSE; + } + pScrn->monitor = pScrn->confScreen->monitor; pScrn->progClock = TRUE; pScrn->rgbBits = 8; @@ -475,9 +453,36 @@ drv_pre_init(ScrnInfoPtr pScrn, int flags) xf86CrtcConfigInit(pScrn, &crtc_config_funcs); xf86_config = XF86_CRTC_CONFIG_PTR(pScrn); - max_width = 2048; /* A very low default */ - max_height = 2048; /* see screen_init */ - xf86CrtcSetSizeRange(pScrn, 320, 200, max_width, max_height); + /* get max width and height */ + { + drmModeResPtr res; + int max_width, max_height; + + res = drmModeGetResources(ms->fd); + max_width = res->max_width; + max_height = res->max_height; + + if (ms->screen) { + int max; + + max = ms->screen->get_param(ms->screen, + PIPE_CAP_MAX_TEXTURE_2D_LEVELS); + max = 1 << (max - 1); + max_width = max < max_width ? max : max_width; + max_height = max < max_height ? max : max_height; + } + + xf86CrtcSetSizeRange(pScrn, res->min_width, + res->min_height, max_width, max_height); + xf86DrvMsg(pScrn->scrnIndex, X_PROBED, + "Min width %d, Max Width %d.\n", + res->min_width, max_width); + xf86DrvMsg(pScrn->scrnIndex, X_PROBED, + "Min height %d, Max Height %d.\n", + res->min_height, max_height); + drmModeFreeResources(res); + } + if (xf86ReturnOptValBool(ms->Options, OPTION_SW_CURSOR, FALSE)) { ms->SWCursor = TRUE; @@ -486,6 +491,9 @@ drv_pre_init(ScrnInfoPtr pScrn, int flags) xorg_crtc_init(pScrn); xorg_output_init(pScrn); + if (cust && cust->winsys_pre_init && !cust->winsys_pre_init(cust, ms->fd)) + return FALSE; + if (!xf86InitialConfiguration(pScrn, TRUE)) { xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "No valid modes.\n"); return FALSE; @@ -640,59 +648,45 @@ drv_create_screen_resources(ScreenPtr pScreen) } static Bool +drv_set_master(ScrnInfoPtr pScrn) +{ + modesettingPtr ms = modesettingPTR(pScrn); + + if (!ms->isMaster && drmSetMaster(ms->fd) != 0) { + if (errno == EINVAL) { + xf86DrvMsg(pScrn->scrnIndex, X_WARNING, + "drmSetMaster failed: 2.6.29 or newer kernel required for " + "multi-server DRI\n"); + } else { + xf86DrvMsg(pScrn->scrnIndex, X_WARNING, + "drmSetMaster failed: %s\n", strerror(errno)); + } + return FALSE; + } + + ms->isMaster = TRUE; + return TRUE; +} + + +static Bool drv_screen_init(int scrnIndex, ScreenPtr pScreen, int argc, char **argv) { ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; modesettingPtr ms = modesettingPTR(pScrn); - unsigned max_width, max_height; VisualPtr visual; CustomizerPtr cust = ms->cust; MessageType from_st; MessageType from_dt; - MessageType from_3D; - Bool use3D; - if (!drv_init_drm(pScrn)) { - FatalError("Could not init DRM"); + if (!drv_set_master(pScrn)) return FALSE; - } - - use3D = cust ? !cust->no_3d : TRUE; - from_3D = xf86GetOptValBool(ms->Options, OPTION_3D_ACCEL, - &use3D) ? - X_CONFIG : X_PROBED; - - ms->no3D = !use3D; - - if (!drv_init_resource_management(pScrn)) { - FatalError("Could not init resource management (!pipe_screen && !libkms)"); - return FALSE; - } if (!drv_init_front_buffer_functions(pScrn)) { FatalError("Could not init front buffer manager"); return FALSE; } - /* get max width and height */ - { - drmModeResPtr res; - res = drmModeGetResources(ms->fd); - max_width = res->max_width; - max_height = res->max_height; - drmModeFreeResources(res); - } - - if (ms->screen) { - int max; - max = ms->screen->get_param(ms->screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS); - max = 1 << (max - 1); - max_width = max < max_width ? max : max_width; - max_height = max < max_height ? max : max_height; - } - - xf86CrtcSetSizeRange(pScrn, 1, 1, max_width, max_height); - pScrn->pScreen = pScreen; /* HW dependent - FIXME */ @@ -745,7 +739,7 @@ drv_screen_init(int scrnIndex, ScreenPtr pScreen, int argc, char **argv) ms->debug_fallback = xf86ReturnOptValBool(ms->Options, OPTION_DEBUG_FALLBACK, ms->accelerate_2d); if (cust && cust->winsys_screen_init) - cust->winsys_screen_init(cust, ms->fd); + cust->winsys_screen_init(cust); ms->swapThrottling = cust ? cust->swap_throttling : TRUE; from_st = xf86GetOptValBool(ms->Options, OPTION_THROTTLE_SWAP, @@ -776,7 +770,7 @@ drv_screen_init(int scrnIndex, ScreenPtr pScreen, int argc, char **argv) xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Fallback debugging is %s\n", ms->debug_fallback ? "enabled" : "disabled"); #ifdef DRI2 - xf86DrvMsg(pScrn->scrnIndex, from_3D, "3D Acceleration is %s\n", + xf86DrvMsg(pScrn->scrnIndex, ms->from_3D, "3D Acceleration is %s\n", ms->screen ? "enabled" : "disabled"); #else xf86DrvMsg(pScrn->scrnIndex, X_INFO, "3D Acceleration is disabled\n"); @@ -879,6 +873,7 @@ drv_leave_vt(int scrnIndex, int flags) xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "drmDropMaster failed: %s\n", strerror(errno)); + ms->isMaster = FALSE; pScrn->vtSema = FALSE; } @@ -892,16 +887,8 @@ drv_enter_vt(int scrnIndex, int flags) modesettingPtr ms = modesettingPTR(pScrn); CustomizerPtr cust = ms->cust; - if (drmSetMaster(ms->fd)) { - if (errno == EINVAL) { - xf86DrvMsg(pScrn->scrnIndex, X_WARNING, - "drmSetMaster failed: 2.6.29 or newer kernel required for " - "multi-server DRI\n"); - } else { - xf86DrvMsg(pScrn->scrnIndex, X_WARNING, - "drmSetMaster failed: %s\n", strerror(errno)); - } - } + if (!drv_set_master(pScrn)) + return FALSE; if (!ms->create_front_buffer(pScrn)) return FALSE; @@ -969,12 +956,9 @@ drv_close_screen(int scrnIndex, ScreenPtr pScreen) drv_leave_vt(scrnIndex, 0); } - drv_close_resource_management(pScrn); - - drv_close_drm(pScrn); - pScrn->vtSema = FALSE; pScreen->CloseScreen = ms->CloseScreen; + return (*pScreen->CloseScreen) (scrnIndex, pScreen); } diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c index bd84668300..6b2c80fbca 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa.c +++ b/src/gallium/state_trackers/xorg/xorg_exa.c @@ -881,7 +881,6 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height, if (priv->tex) { struct pipe_subresource subdst, subsrc; - struct pipe_surface *src_surf; subdst.face = 0; subdst.level = 0; diff --git a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c index 3e5e6bd6a6..fe1aab3ab9 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c +++ b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c @@ -642,7 +642,7 @@ void xorg_shaders_destroy(struct xorg_shaders *sc) cache_destroy(sc->r->cso, sc->fs_hash, PIPE_SHADER_FRAGMENT); - free(sc); + FREE(sc); } static INLINE void * diff --git a/src/gallium/state_trackers/xorg/xorg_output.c b/src/gallium/state_trackers/xorg/xorg_output.c index 056098f76b..61206ed751 100644 --- a/src/gallium/state_trackers/xorg/xorg_output.c +++ b/src/gallium/state_trackers/xorg/xorg_output.c @@ -162,6 +162,15 @@ output_get_modes(xf86OutputPtr output) static int output_mode_valid(xf86OutputPtr output, DisplayModePtr pMode) { + modesettingPtr ms = modesettingPTR(output->scrn); + CustomizerPtr cust = ms->cust; + + if (cust && cust->winsys_check_fb_size && + !cust->winsys_check_fb_size(cust, pMode->HDisplay * + output->scrn->bitsPerPixel / 8, + pMode->VDisplay)) + return MODE_BAD; + return MODE_OK; } diff --git a/src/gallium/state_trackers/xorg/xorg_tracker.h b/src/gallium/state_trackers/xorg/xorg_tracker.h index df56ad1b15..be1a9fda48 100644 --- a/src/gallium/state_trackers/xorg/xorg_tracker.h +++ b/src/gallium/state_trackers/xorg/xorg_tracker.h @@ -49,7 +49,6 @@ #include "pipe/p_screen.h" #include "util/u_inlines.h" #include "util/u_debug.h" -#include "state_tracker/drm_api.h" #define DRV_ERROR(msg) xf86DrvMsg(pScrn->scrnIndex, X_ERROR, msg); @@ -77,13 +76,17 @@ typedef struct _CustomizerRec Bool dirty_throttling; Bool swap_throttling; Bool no_3d; - Bool (*winsys_screen_init)(struct _CustomizerRec *cust, int fd); + Bool (*winsys_pre_init) (struct _CustomizerRec *cust, int fd); + Bool (*winsys_screen_init)(struct _CustomizerRec *cust); Bool (*winsys_screen_close)(struct _CustomizerRec *cust); Bool (*winsys_enter_vt)(struct _CustomizerRec *cust); Bool (*winsys_leave_vt)(struct _CustomizerRec *cust); void (*winsys_context_throttle)(struct _CustomizerRec *cust, struct pipe_context *pipe, enum xorg_throttling_reason reason); + Bool (*winsys_check_fb_size) (struct _CustomizerRec *cust, + unsigned long pitch, + unsigned long height); } CustomizerRec, *CustomizerPtr; typedef struct _modesettingRec @@ -106,6 +109,8 @@ typedef struct _modesettingRec Bool dirtyThrottling; CloseScreenProcPtr CloseScreen; Bool no3D; + Bool from_3D; + Bool isMaster; /* Broken-out options. */ OptionInfoPtr Options; @@ -125,7 +130,6 @@ typedef struct _modesettingRec struct kms_bo *root_bo; /* gallium */ - struct drm_api *api; struct pipe_screen *screen; struct pipe_context *ctx; boolean d_depth_bits_last; diff --git a/src/gallium/targets/Makefile.egl b/src/gallium/targets/Makefile.egl deleted file mode 100644 index 315856014b..0000000000 --- a/src/gallium/targets/Makefile.egl +++ /dev/null @@ -1,108 +0,0 @@ -# src/gallium/winsys/drm/Makefile.egl - -# The driver Makefile should define -# -# EGL_DRIVER_NAME, the name of the driver -# EGL_DRIVER_SOURCES, the sources of the driver -# EGL_DRIVER_LIBS, extra libraries needed by the driver -# EGL_DRIVER_PIPES, the pipe drivers of the driver -# -# before including this file. - -EGL_DRIVER_OBJECTS = $(EGL_DRIVER_SOURCES:.c=.o) - -common_LIBS = -ldrm -lm -ldl - -# ximage backend calls gallium_wrap_screen, which requires libidentity.a and -# libtrace.a -x11_ST = $(TOP)/src/gallium/state_trackers/egl/libeglx11.a \ - $(TOP)/src/gallium/winsys/sw/xlib/libws_xlib.a \ - $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ - $(TOP)/src/gallium/drivers/identity/libidentity.a \ - $(TOP)/src/gallium/drivers/trace/libtrace.a \ - $(TOP)/src/gallium/drivers/rbug/librbug.a - -x11_LIBS = $(common_LIBS) -lX11 -lXext -lXfixes - -kms_ST = $(TOP)/src/gallium/state_trackers/egl/libeglkms.a -kms_LIBS = $(common_LIBS) - -fbdev_ST = \ - $(TOP)/src/gallium/state_trackers/egl/libeglfbdev.a \ - $(TOP)/src/gallium/winsys/sw/fbdev/libfbdev.a \ - $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ - $(TOP)/src/gallium/drivers/identity/libidentity.a \ - $(TOP)/src/gallium/drivers/trace/libtrace.a \ - $(TOP)/src/gallium/drivers/rbug/librbug.a -fbdev_LIBS = $(common_LIBS) - -ifeq ($(MESA_LLVM),1) -x11_ST += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a -x11_LIBS += $(LLVM_LIBS) -fbdev_ST += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a -fbdev_LIBS += $(LLVM_LIBS) -LDFLAGS += $(LLVM_LDFLAGS) -endif - -### Include directories -INCLUDES = \ - -I$(TOP)/include \ - -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/auxiliary \ - -I$(TOP)/src/gallium/drivers \ - -I$(TOP)/src/gallium/winsys \ - -I$(TOP)/src/egl/main \ - $(LIBDRM_CFLAGS) - -##### RULES ##### - -.c.o: - $(CC) -c $(INCLUDES) $(CFLAGS) $(EGL_DRIVER_DEFINES) $< -o $@ - - -##### TARGETS ##### - -ifeq ($(EGL_DRIVER_NAME),swrast) -EGL_PLATFORMS := $(filter-out kms, $(EGL_PLATFORMS)) -else -EGL_PLATFORMS := $(filter-out fbdev, $(EGL_PLATFORMS)) -endif - -EGL_PLATFORM_DRIVERS = $(foreach plat, $(EGL_PLATFORMS), egl_$(plat)_$(EGL_DRIVER_NAME).so) - -EGL_PLATFORM_LIBS = $(foreach drv, $(EGL_PLATFORM_DRIVERS), $(TOP)/$(LIB_DIR)/egl/$(drv)) - -default: $(EGL_PLATFORM_LIBS) - -$(EGL_PLATFORM_LIBS): $(TOP)/$(LIB_DIR)/egl/%.so: %.so - @$(INSTALL) -d $(TOP)/$(LIB_DIR)/egl - $(INSTALL) $< $(TOP)/$(LIB_DIR)/egl - -define mklib-egl -$(MKLIB) -o $@ -noprefix -linker '$(CC)' -ldflags '$(LDFLAGS)' \ - $(MKLIB_OPTIONS) $(EGL_DRIVER_OBJECTS) \ - -Wl,--start-group $($(1)_ST) $(EGL_DRIVER_PIPES) \ - $(GALLIUM_AUXILIARIES) -Wl,--end-group \ - $($(1)_LIBS) $(EGL_DRIVER_LIBS) -L$(TOP)/$(LIB_DIR) -l$(EGL_LIB) -endef - -egl_x11_$(EGL_DRIVER_NAME).so: $(EGL_DRIVER_OBJECTS) $(x11_ST) $(EGL_DRIVER_PIPES) $(GALLIUM_AUXILIARIES) Makefile - $(call mklib-egl,x11) - -egl_kms_$(EGL_DRIVER_NAME).so: $(EGL_DRIVER_OBJECTS) $(kms_ST) $(EGL_DRIVER_PIPES) $(GALLIUM_AUXILIARIES) Makefile - $(call mklib-egl,kms) - -egl_fbdev_$(EGL_DRIVER_NAME).so: $(EGL_DRIVER_OBJECTS) $(fbdev_ST) $(EGL_DRIVER_PIPES) $(GALLIUM_AUXILIARIES) Makefile - $(call mklib-egl,fbdev) - -clean: - -rm -f $(EGL_DRIVER_OBJECTS) - -rm -f $(EGL_PLATFORM_DRIVERS) - -install: $(EGL_PLATFORM_LIBS) - $(INSTALL) -d $(DESTDIR)$(EGL_DRIVER_INSTALL_DIR) - for lib in $(EGL_PLATFORM_LIBS); do \ - $(MINSTALL) -m 755 "$$lib" $(DESTDIR)$(EGL_DRIVER_INSTALL_DIR); \ - done - -depend: diff --git a/src/gallium/targets/Makefile.xorg b/src/gallium/targets/Makefile.xorg index 4237f944e0..762c905985 100644 --- a/src/gallium/targets/Makefile.xorg +++ b/src/gallium/targets/Makefile.xorg @@ -9,7 +9,8 @@ # Optional defines: # DRIVER_INCLUDES are appended to the list of includes directories. # DRIVER_DEFINES is not used for makedepend, but for compilation. -# DRIVER_LINKS are flags given to the linker +# DRIVER_PIPES are pipe drivers and modules that the driver depends on. +# DRIVER_LINKS are flags given to the linker. ### Basic defines ### @@ -27,13 +28,21 @@ INCLUDES = \ LIBNAME_STAGING = $(TOP)/$(LIB_DIR)/gallium/$(TARGET) +ifeq ($(MESA_LLVM),1) +LD = g++ +LDFLAGS += $(LLVM_LDFLAGS) +USE_CXX=1 +DRIVER_PIPES += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a +DRIVER_LINKS += $(LLVM_LIBS) -lm -ldl +endif + ##### TARGETS ##### default: depend $(TOP)/$(LIB_DIR)/gallium $(LIBNAME) $(LIBNAME_STAGING) -$(LIBNAME): $(OBJECTS) Makefile $(LIBS) - $(MKLIB) -noprefix -o $@ $(OBJECTS) $(DRIVER_LINKS) +$(LIBNAME): $(OBJECTS) Makefile ../Makefile.xorg $(LIBS) $(DRIVER_PIPES) + $(MKLIB) -noprefix -o $@ $(LDFLAGS) $(OBJECTS) $(DRIVER_PIPES) $(GALLIUM_AUXILIARIES) $(DRIVER_LINKS) depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS) $(GENERATED_SOURCES) rm -f depend diff --git a/src/gallium/targets/SConscript b/src/gallium/targets/SConscript index 97187030ab..f8276b1555 100644 --- a/src/gallium/targets/SConscript +++ b/src/gallium/targets/SConscript @@ -32,8 +32,7 @@ if 'xorg' in env['statetrackers']: if 'egl' in env['statetrackers']: SConscript([ - 'egl-swrast/SConscript', - 'egl-apis/SConscript', + 'egl-gdi/SConscript', ]) # Ideally all non-target directories would produce convenience diff --git a/src/gallium/targets/SConscript.dri b/src/gallium/targets/SConscript.dri index 74b53e5023..e5981c2461 100644 --- a/src/gallium/targets/SConscript.dri +++ b/src/gallium/targets/SConscript.dri @@ -13,6 +13,7 @@ drienv.Replace(CPPPATH = [ '#src/gallium/include', '#src/gallium/auxiliary', '#src/gallium/drivers', + '#src/gallium/winsys', '#src/mesa', '#src/mesa/main', '#src/mesa/glapi', diff --git a/src/gallium/targets/dri-i915/Makefile b/src/gallium/targets/dri-i915/Makefile index fdcfd08c22..9c10d71a4a 100644 --- a/src/gallium/targets/dri-i915/Makefile +++ b/src/gallium/targets/dri-i915/Makefile @@ -6,16 +6,25 @@ LIBNAME = i915_dri.so PIPE_DRIVERS = \ $(TOP)/src/gallium/state_trackers/dri/drm/libdridrm.a \ $(TOP)/src/gallium/winsys/i915/drm/libi915drm.a \ + $(TOP)/src/gallium/winsys/sw/wrapper/libwsw.a \ + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/galahad/libgalahad.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/drivers/rbug/librbug.a \ - $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ - $(TOP)/src/gallium/drivers/identity/libidentity.a \ $(TOP)/src/gallium/drivers/i915/libi915.a C_SOURCES = \ + target.c \ $(COMMON_GALLIUM_SOURCES) \ $(DRIVER_SOURCES) +DRIVER_DEFINES = \ + -DGALLIUM_RBUG -DGALLIUM_TRACE -DGALLIUM_GALAHAD -DGALLIUM_SOFTPIPE + +ifeq ($(MESA_LLVM),1) +DRIVER_DEFINES += -DGALLIUM_LLVMPIPE +endif + include ../Makefile.dri DRI_LIB_DEPS += -ldrm_intel diff --git a/src/gallium/targets/dri-i915/SConscript b/src/gallium/targets/dri-i915/SConscript index 65c4239887..6f9336b5ac 100644 --- a/src/gallium/targets/dri-i915/SConscript +++ b/src/gallium/targets/dri-i915/SConscript @@ -8,10 +8,13 @@ env = drienv.Clone() env.ParseConfig('pkg-config --cflags --libs libdrm_intel') +env.Append(CPPDEFINES = ['GALLIUM_RBUG', 'GALLIUM_TRACE', 'GALLIUM_GALAHAD']) + env.Prepend(LIBS = [ st_dri, i915drm, i915, + galahad, trace, rbug, mesa, @@ -22,6 +25,6 @@ env.Prepend(LIBS = [ env.LoadableModule( target = 'i915_dri.so', - source = 'dummy.c', + source = 'target.c', SHLIBPREFIX = '', ) diff --git a/src/gallium/targets/dri-i915/dummy.c b/src/gallium/targets/dri-i915/dummy.c deleted file mode 100644 index e69de29bb2..0000000000 --- a/src/gallium/targets/dri-i915/dummy.c +++ /dev/null diff --git a/src/gallium/targets/dri-i915/target.c b/src/gallium/targets/dri-i915/target.c new file mode 100644 index 0000000000..5ae6ca367d --- /dev/null +++ b/src/gallium/targets/dri-i915/target.c @@ -0,0 +1,30 @@ + +#include "state_tracker/drm_driver.h" +#include "target-helpers/inline_wrapper_sw_helper.h" +#include "target-helpers/inline_debug_helper.h" +#include "i915/drm/i915_drm_public.h" +#include "i915/i915_public.h" + +static struct pipe_screen * +create_screen(int fd) +{ + struct i915_winsys *iws; + struct pipe_screen *screen; + + iws = i915_drm_winsys_create(fd); + if (!iws) + return NULL; + + screen = i915_screen_create(iws); + if (!screen) + return NULL; + + if (debug_get_bool_option("I915_SOFTWARE", FALSE)) + screen = sw_screen_wrap(screen); + + screen = debug_screen_wrap(screen); + + return screen; +} + +DRM_DRIVER_DESCRIPTOR("i915", "i915", create_screen) diff --git a/src/gallium/targets/dri-i965/Makefile b/src/gallium/targets/dri-i965/Makefile index 13987c643e..4b50d04255 100644 --- a/src/gallium/targets/dri-i965/Makefile +++ b/src/gallium/targets/dri-i965/Makefile @@ -6,18 +6,25 @@ LIBNAME = i965_dri.so PIPE_DRIVERS = \ $(TOP)/src/gallium/state_trackers/dri/drm/libdridrm.a \ $(TOP)/src/gallium/winsys/i965/drm/libi965drm.a \ - $(TOP)/src/gallium/drivers/trace/libtrace.a \ - $(TOP)/src/gallium/drivers/rbug/librbug.a \ - $(TOP)/src/gallium/winsys/sw/drm/libswdrm.a \ $(TOP)/src/gallium/winsys/sw/wrapper/libwsw.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ - $(TOP)/src/gallium/drivers/identity/libidentity.a \ + $(TOP)/src/gallium/drivers/galahad/libgalahad.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/rbug/librbug.a \ $(TOP)/src/gallium/drivers/i965/libi965.a C_SOURCES = \ + target.c \ $(COMMON_GALLIUM_SOURCES) \ $(DRIVER_SOURCES) +DRIVER_DEFINES = \ + -DGALLIUM_RBUG -DGALLIUM_TRACE -DGALLIUM_GALAHAD -DGALLIUM_SOFTPIPE + +ifeq ($(MESA_LLVM),1) +DRIVER_DEFINES += -DGALLIUM_LLVMPIPE +endif + include ../Makefile.dri DRI_LIB_DEPS += -ldrm_intel diff --git a/src/gallium/targets/dri-i965/SConscript b/src/gallium/targets/dri-i965/SConscript index 13ac5a2d8e..684e3488f7 100644 --- a/src/gallium/targets/dri-i965/SConscript +++ b/src/gallium/targets/dri-i965/SConscript @@ -8,10 +8,15 @@ env = drienv.Clone() env.ParseConfig('pkg-config --cflags --libs libdrm_intel') +env.Append(CPPDEFINES = [ + 'GALLIUM_SOFTPIPE', + 'GALLIUM_RBUG', + 'GALLIUM_TRACE' +]) + env.Prepend(LIBS = [ st_dri, i965drm, - ws_drm, ws_wrapper, i965, trace, @@ -24,6 +29,6 @@ env.Prepend(LIBS = [ env.LoadableModule( target = 'i965_dri.so', - source = 'dummy.c', + source = 'target.c', SHLIBPREFIX = '', ) diff --git a/src/gallium/targets/dri-i965/dummy.c b/src/gallium/targets/dri-i965/dummy.c deleted file mode 100644 index e69de29bb2..0000000000 --- a/src/gallium/targets/dri-i965/dummy.c +++ /dev/null diff --git a/src/gallium/targets/dri-i965/target.c b/src/gallium/targets/dri-i965/target.c new file mode 100644 index 0000000000..ce97f82027 --- /dev/null +++ b/src/gallium/targets/dri-i965/target.c @@ -0,0 +1,30 @@ + +#include "target-helpers/inline_wrapper_sw_helper.h" +#include "target-helpers/inline_debug_helper.h" +#include "state_tracker/drm_driver.h" +#include "i965/drm/i965_drm_public.h" +#include "i965/brw_public.h" + +static struct pipe_screen * +create_screen(int fd) +{ + struct brw_winsys_screen *bws; + struct pipe_screen *screen; + + bws = i965_drm_winsys_screen_create(fd); + if (!bws) + return NULL; + + screen = brw_screen_create(bws); + if (!screen) + return NULL; + + if (debug_get_bool_option("BRW_SOFTPIPE", FALSE)) + screen = sw_screen_wrap(screen); + + screen = debug_screen_wrap(screen); + + return screen; +} + +DRM_DRIVER_DESCRIPTOR("i915", "i965", create_screen) diff --git a/src/gallium/targets/dri-nouveau/Makefile b/src/gallium/targets/dri-nouveau/Makefile index 74d352c6a7..2f64f312b8 100644 --- a/src/gallium/targets/dri-nouveau/Makefile +++ b/src/gallium/targets/dri-nouveau/Makefile @@ -6,14 +6,20 @@ LIBNAME = nouveau_dri.so PIPE_DRIVERS = \ $(TOP)/src/gallium/state_trackers/dri/drm/libdridrm.a \ $(TOP)/src/gallium/winsys/nouveau/drm/libnouveaudrm.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/rbug/librbug.a \ $(TOP)/src/gallium/drivers/nvfx/libnvfx.a \ $(TOP)/src/gallium/drivers/nv50/libnv50.a \ $(TOP)/src/gallium/drivers/nouveau/libnouveau.a C_SOURCES = \ + target.c \ $(COMMON_GALLIUM_SOURCES) \ $(DRIVER_SOURCES) +DRIVER_DEFINES = \ + -DGALLIUM_RBUG -DGALLIUM_TRACE + include ../Makefile.dri DRI_LIB_DEPS += $(shell pkg-config libdrm_nouveau --libs) diff --git a/src/gallium/targets/dri-nouveau/target.c b/src/gallium/targets/dri-nouveau/target.c new file mode 100644 index 0000000000..e725a4d9b7 --- /dev/null +++ b/src/gallium/targets/dri-nouveau/target.c @@ -0,0 +1,20 @@ + +#include "target-helpers/inline_debug_helper.h" +#include "state_tracker/drm_driver.h" +#include "nouveau/drm/nouveau_drm_public.h" + +static struct pipe_screen * +create_screen(int fd) +{ + struct pipe_screen *screen; + + screen = nouveau_drm_screen_create(fd); + if (!screen) + return NULL; + + screen = debug_screen_wrap(screen); + + return screen; +} + +DRM_DRIVER_DESCRIPTOR("nouveau", "nouveau", create_screen) diff --git a/src/gallium/targets/dri-r600/Makefile b/src/gallium/targets/dri-r600/Makefile index 0213200fbc..932303d194 100644 --- a/src/gallium/targets/dri-r600/Makefile +++ b/src/gallium/targets/dri-r600/Makefile @@ -12,9 +12,13 @@ PIPE_DRIVERS = \ $(TOP)/src/gallium/drivers/r600/libr600.a C_SOURCES = \ + target.c \ $(COMMON_GALLIUM_SOURCES) \ $(DRIVER_SOURCES) +DRIVER_DEFINES = \ + -DGALLIUM_RBUG -DGALLIUM_TRACE + include ../Makefile.dri DRI_LIB_DEPS += -ldrm_radeon diff --git a/src/gallium/targets/dri-r600/SConscript b/src/gallium/targets/dri-r600/SConscript index 6c23c050eb..97c5df01fe 100644 --- a/src/gallium/targets/dri-r600/SConscript +++ b/src/gallium/targets/dri-r600/SConscript @@ -8,6 +8,8 @@ env = drienv.Clone() env.ParseConfig('pkg-config --cflags --libs libdrm_radeon') +env.Append(CPPDEFINES = ['GALLIUM_RBUG', 'GALLIUM_TRACE']) + env.Prepend(LIBS = [ st_dri, r600drm, @@ -22,6 +24,6 @@ env.Prepend(LIBS = [ env.SharedLibrary( target ='r600_dri.so', - source = 'dummy.c', + source = 'target.c', SHLIBPREFIX = '', ) diff --git a/src/gallium/targets/dri-r600/dummy.c b/src/gallium/targets/dri-r600/dummy.c deleted file mode 100644 index e69de29bb2..0000000000 --- a/src/gallium/targets/dri-r600/dummy.c +++ /dev/null diff --git a/src/gallium/targets/dri-r600/target.c b/src/gallium/targets/dri-r600/target.c new file mode 100644 index 0000000000..a01f4ed49f --- /dev/null +++ b/src/gallium/targets/dri-r600/target.c @@ -0,0 +1,26 @@ + +#include "state_tracker/drm_driver.h" +#include "target-helpers/inline_debug_helper.h" +#include "r600/drm/r600_drm_public.h" +#include "r600/r600_public.h" + +static struct pipe_screen * +create_screen(int fd) +{ + struct radeon *rw; + struct pipe_screen *screen; + + rw = r600_drm_winsys_create(fd); + if (!rw) + return NULL; + + screen = r600_screen_create(rw); + if (!screen) + return NULL; + + screen = debug_screen_wrap(screen); + + return screen; +} + +DRM_DRIVER_DESCRIPTOR("r600", "radeon", create_screen) diff --git a/src/gallium/targets/dri-radeong/Makefile b/src/gallium/targets/dri-radeong/Makefile index 8ef24c0821..3f9ec36166 100644 --- a/src/gallium/targets/dri-radeong/Makefile +++ b/src/gallium/targets/dri-radeong/Makefile @@ -7,14 +7,19 @@ PIPE_DRIVERS = \ $(TOP)/src/gallium/state_trackers/dri/drm/libdridrm.a \ $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/galahad/libgalahad.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/drivers/rbug/librbug.a \ $(TOP)/src/gallium/drivers/r300/libr300.a C_SOURCES = \ + target.c \ $(COMMON_GALLIUM_SOURCES) \ $(DRIVER_SOURCES) +DRIVER_DEFINES = \ + -DGALLIUM_RBUG -DGALLIUM_TRACE -DGALLIUM_GALAHAD + include ../Makefile.dri DRI_LIB_DEPS += -ldrm_radeon diff --git a/src/gallium/targets/dri-radeong/SConscript b/src/gallium/targets/dri-radeong/SConscript index 4c6cfb84eb..1402c3bd12 100644 --- a/src/gallium/targets/dri-radeong/SConscript +++ b/src/gallium/targets/dri-radeong/SConscript @@ -8,10 +8,13 @@ env = drienv.Clone() env.ParseConfig('pkg-config --cflags --libs libdrm_radeon') +env.Append(CPPDEFINES = ['GALLIUM_RBUG', 'GALLIUM_TRACE', 'GALLIUM_GALAHAD']) + env.Prepend(LIBS = [ st_dri, radeonwinsys, r300, + galahad, trace, rbug, mesa, @@ -22,6 +25,6 @@ env.Prepend(LIBS = [ env.SharedLibrary( target ='radeon_dri.so', - source = 'dummy.c', + source = 'target.c', SHLIBPREFIX = '', ) diff --git a/src/gallium/targets/dri-radeong/dummy.c b/src/gallium/targets/dri-radeong/dummy.c deleted file mode 100644 index e69de29bb2..0000000000 --- a/src/gallium/targets/dri-radeong/dummy.c +++ /dev/null diff --git a/src/gallium/targets/dri-radeong/target.c b/src/gallium/targets/dri-radeong/target.c new file mode 100644 index 0000000000..5a0a8dc573 --- /dev/null +++ b/src/gallium/targets/dri-radeong/target.c @@ -0,0 +1,26 @@ + +#include "target-helpers/inline_debug_helper.h" +#include "state_tracker/drm_driver.h" +#include "radeon/drm/radeon_drm_public.h" +#include "r300/r300_public.h" + +static struct pipe_screen * +create_screen(int fd) +{ + struct r300_winsys_screen *sws; + struct pipe_screen *screen; + + sws = r300_drm_winsys_screen_create(fd); + if (!sws) + return NULL; + + screen = r300_screen_create(sws); + if (!screen) + return NULL; + + screen = debug_screen_wrap(screen); + + return screen; +} + +DRM_DRIVER_DESCRIPTOR("radeon", "radeon", create_screen) diff --git a/src/gallium/targets/dri-swrast/Makefile b/src/gallium/targets/dri-swrast/Makefile index 0a53eb56c4..948c45abe5 100644 --- a/src/gallium/targets/dri-swrast/Makefile +++ b/src/gallium/targets/dri-swrast/Makefile @@ -3,7 +3,9 @@ include $(TOP)/configs/current LIBNAME = swrastg_dri.so -DRIVER_DEFINES = -D__NOT_HAVE_DRM_H -DGALLIUM_SOFTPIPE +DRIVER_DEFINES = \ + -D__NOT_HAVE_DRM_H -DGALLIUM_SOFTPIPE \ + -DGALLIUM_RBUG -DGALLIUM_TRACE PIPE_DRIVERS = \ $(TOP)/src/gallium/state_trackers/dri/sw/libdrisw.a \ diff --git a/src/gallium/targets/dri-swrast/SConscript b/src/gallium/targets/dri-swrast/SConscript index 679afab41c..d814347119 100644 --- a/src/gallium/targets/dri-swrast/SConscript +++ b/src/gallium/targets/dri-swrast/SConscript @@ -18,7 +18,11 @@ env.Prepend(LIBS = [ ]) if True: - env.Append(CPPDEFINES = 'GALLIUM_SOFTPIPE') + env.Append(CPPDEFINES = [ + 'GALLIUM_SOFTPIPE', + 'GALLIUM_RBUG', + 'GALLIUM_TRACE', + ]) env.Prepend(LIBS = [softpipe]) if env['llvm']: diff --git a/src/gallium/targets/dri-swrast/swrast_drm_api.c b/src/gallium/targets/dri-swrast/swrast_drm_api.c index 84142be80c..8d741c6343 100644 --- a/src/gallium/targets/dri-swrast/swrast_drm_api.c +++ b/src/gallium/targets/dri-swrast/swrast_drm_api.c @@ -28,61 +28,11 @@ #include "pipe/p_compiler.h" #include "util/u_memory.h" -#include "state_tracker/drm_api.h" -#include "state_tracker/sw_winsys.h" #include "dri_sw_winsys.h" -#include "trace/tr_public.h" -/* Copied from targets/libgl-xlib */ +#include "target-helpers/inline_debug_helper.h" +#include "target-helpers/inline_sw_helper.h" -#ifdef GALLIUM_SOFTPIPE -#include "softpipe/sp_public.h" -#endif - -#ifdef GALLIUM_LLVMPIPE -#include "llvmpipe/lp_public.h" -#endif - -#ifdef GALLIUM_CELL -#include "cell/ppu/cell_public.h" -#endif - -static struct pipe_screen * -swrast_create_screen(struct sw_winsys *winsys) -{ - const char *default_driver; - const char *driver; - struct pipe_screen *screen = NULL; - -#if defined(GALLIUM_CELL) - default_driver = "cell"; -#elif defined(GALLIUM_LLVMPIPE) - default_driver = "llvmpipe"; -#elif defined(GALLIUM_SOFTPIPE) - default_driver = "softpipe"; -#else - default_driver = ""; -#endif - - driver = debug_get_option("GALLIUM_DRIVER", default_driver); - -#if defined(GALLIUM_CELL) - if (screen == NULL && strcmp(driver, "cell") == 0) - screen = cell_create_screen( winsys ); -#endif - -#if defined(GALLIUM_LLVMPIPE) - if (screen == NULL && strcmp(driver, "llvmpipe") == 0) - screen = llvmpipe_create_screen( winsys ); -#endif - -#if defined(GALLIUM_SOFTPIPE) - if (screen == NULL) - screen = softpipe_create_screen( winsys ); -#endif - - return trace_screen_create(screen);; -} struct pipe_screen * drisw_create_screen(struct drisw_loader_funcs *lf) @@ -94,10 +44,12 @@ drisw_create_screen(struct drisw_loader_funcs *lf) if (winsys == NULL) return NULL; - screen = swrast_create_screen(winsys); + screen = sw_screen_create(winsys); if (!screen) goto fail; + screen = debug_screen_wrap(screen); + return screen; fail: diff --git a/src/gallium/targets/dri-vmwgfx/Makefile b/src/gallium/targets/dri-vmwgfx/Makefile index b5b679f3c7..97c703b373 100644 --- a/src/gallium/targets/dri-vmwgfx/Makefile +++ b/src/gallium/targets/dri-vmwgfx/Makefile @@ -11,8 +11,12 @@ PIPE_DRIVERS = \ $(TOP)/src/gallium/drivers/svga/libsvga.a C_SOURCES = \ + target.c \ $(COMMON_GALLIUM_SOURCES) +DRIVER_DEFINES = \ + -DGALLIUM_RBUG -DGALLIUM_TRACE + include ../Makefile.dri symlinks: diff --git a/src/gallium/targets/dri-vmwgfx/SConscript b/src/gallium/targets/dri-vmwgfx/SConscript index 09a0c254c3..7afabc7429 100644 --- a/src/gallium/targets/dri-vmwgfx/SConscript +++ b/src/gallium/targets/dri-vmwgfx/SConscript @@ -6,6 +6,8 @@ if not 'svga' in env['drivers']: env = drienv.Clone() +env.Append(CPPDEFINES = ['GALLIUM_RBUG', 'GALLIUM_TRACE']) + env.Prepend(LIBS = [ st_dri, svgadrm, @@ -20,6 +22,6 @@ env.Prepend(LIBS = [ env.LoadableModule( target = 'vmwgfx_dri.so', - source = 'dummy.c', + source = 'target.c', SHLIBPREFIX = '', ) diff --git a/src/gallium/targets/dri-vmwgfx/dummy.c b/src/gallium/targets/dri-vmwgfx/dummy.c deleted file mode 100644 index e69de29bb2..0000000000 --- a/src/gallium/targets/dri-vmwgfx/dummy.c +++ /dev/null diff --git a/src/gallium/targets/dri-vmwgfx/target.c b/src/gallium/targets/dri-vmwgfx/target.c new file mode 100644 index 0000000000..15089d6db2 --- /dev/null +++ b/src/gallium/targets/dri-vmwgfx/target.c @@ -0,0 +1,26 @@ + +#include "target-helpers/inline_debug_helper.h" +#include "state_tracker/drm_driver.h" +#include "svga/drm/svga_drm_public.h" +#include "svga/svga_public.h" + +static struct pipe_screen * +create_screen(int fd) +{ + struct svga_winsys_screen *sws; + struct pipe_screen *screen; + + sws = svga_drm_winsys_screen_create(fd); + if (!sws) + return NULL; + + screen = svga_screen_create(sws); + if (!screen) + return NULL; + + screen = debug_screen_wrap(screen); + + return screen; +} + +DRM_DRIVER_DESCRIPTOR("vmwgfx", "vmwgfx", create_screen) diff --git a/src/gallium/targets/egl-apis/Makefile b/src/gallium/targets/egl-apis/Makefile deleted file mode 100644 index 88915bfc5e..0000000000 --- a/src/gallium/targets/egl-apis/Makefile +++ /dev/null @@ -1,77 +0,0 @@ -# src/gallium/targets/egl-apis - -TOP = ../../../.. -include $(TOP)/configs/current - -OUTPUT_PREFIX := api_ -OUTPUT_PATH := $(TOP)/$(LIB_DIR)/egl - -OUTPUTS := $(addsuffix .so, $(EGL_CLIENT_APIS)) -OUTPUTS := $(addprefix $(OUTPUT_PATH)/$(OUTPUT_PREFIX), $(OUTPUTS)) - -# include dirs -GL_INCLUDES := -I$(TOP)/src/mesa -I$(TOP)/src/gallium/include -GLESv1_CM_INCLUDES := $(GL_INCLUDES) -GLESv2_INCLUDES := $(GL_INCLUDES) -OpenVG_INCLUDES := -I$(TOP)/src/gallium/state_trackers/vega -I$(TOP)/src/gallium/include - -# system libs -GL_SYS := -lpthread -lm -L$(TOP)/$(LIB_DIR) -l$(GL_LIB) -GLESv1_CM_SYS := -lpthread -lm -L$(TOP)/$(LIB_DIR) -l$(GLESv1_CM_LIB) -GLESv2_SYS := -lpthread -lm -L$(TOP)/$(LIB_DIR) -l$(GLESv2_LIB) -OpenVG_SYS := -lm -L$(TOP)/$(LIB_DIR) -l$(VG_LIB) - -# $(LLVM_LIBS) will be discarded except for OpenGL, which creates a private -# draw context for selection/feedback mode. -ifeq ($(MESA_LLVM),1) -GL_SYS += $(LLVM_LIBS) -GLESv1_CM_SYS += $(LLVM_LIBS) -GLESv2_SYS += $(LLVM_LIBS) -OpenVG_SYS += $(LLVM_LIBS) -LDFLAGS += $(LLVM_LDFLAGS) -endif - -# project libs -GL_LIBS := $(TOP)/src/mesa/libmesagallium.a -GLESv1_CM_LIBS := $(TOP)/src/mesa/libes1gallium.a -GLESv2_LIBS := $(TOP)/src/mesa/libes2gallium.a -OpenVG_LIBS := $(TOP)/src/gallium/state_trackers/vega/libvega.a - -# objects -GL_OBJECTS := api_GL.o -GLESv1_CM_OBJECTS := api_GLESv1_CM.o -GLESv2_OBJECTS := api_GLESv2.o -OpenVG_OBJECTS := api_OpenVG.o - -default: $(OUTPUTS) - -api_%.o: api_%.c - $(CC) -c -o $@ $< $($*_INCLUDES) $(DEFINES) $(CFLAGS) - -define mklib -$(MKLIB) -o $(notdir $@) -noprefix -linker '$(CC)' -ldflags '$(LDFLAGS)' \ - -install $(OUTPUT_PATH) $(MKLIB_OPTIONS) \ - $($(1)_OBJECTS) $($(1)_LIBS) $(GALLIUM_AUXILIARIES) $($(1)_SYS) -endef - -$(OUTPUT_PATH)/$(OUTPUT_PREFIX)$(GL_LIB).so: $(GL_OBJECTS) $(GL_LIBS) - $(call mklib,GL) - -$(OUTPUT_PATH)/$(OUTPUT_PREFIX)$(GLESv1_CM_LIB).so: $(GLESv1_CM_OBJECTS) $(GLESv1_CM_LIBS) - $(call mklib,GLESv1_CM) - -$(OUTPUT_PATH)/$(OUTPUT_PREFIX)$(GLESv2_LIB).so: $(GLESv2_OBJECTS) $(GLESv2_LIBS) - $(call mklib,GLESv2) - -$(OUTPUT_PATH)/$(OUTPUT_PREFIX)$(VG_LIB).so: $(OpenVG_OBJECTS) $(OpenVG_LIBS) - $(call mklib,OpenVG) - -install: $(OUTPUTS) - $(INSTALL) -d $(DESTDIR)$(EGL_DRIVER_INSTALL_DIR) - for out in $(OUTPUTS); do \ - $(MINSTALL) -m 755 "$$out" $(DESTDIR)$(EGL_DRIVER_INSTALL_DIR); \ - done - -clean: - -rm -f $(OUTPUTS) - -rm -f *.o diff --git a/src/gallium/targets/egl-apis/SConscript b/src/gallium/targets/egl-apis/SConscript deleted file mode 100644 index 0ca3d1fb9e..0000000000 --- a/src/gallium/targets/egl-apis/SConscript +++ /dev/null @@ -1,33 +0,0 @@ -####################################################################### -# SConscript for egl-apis target - -Import('*') - -if env['platform'] == 'windows': - - env = env.Clone() - - env.Append(CPPPATH = [ - '#/src/gallium/state_trackers/vega', - ]) - - env.Append(LIBS = [ - 'gdi32', - 'user32', - 'kernel32', - 'ws2_32', - ]) - - env['no_import_lib'] = 1 - - api_libs = { - 'OpenVG': vgapi + st_vega, - } - - for name in api_libs.keys(): - api = env.SharedLibrary( - target = 'api_' + name, - source = ['api_' + name + '.c'], - LIBS = api_libs[name] + gallium + env['LIBS'], - ) - env.InstallSharedLibrary(api) diff --git a/src/gallium/targets/egl-apis/api_GLESv2.c b/src/gallium/targets/egl-apis/api_GLESv2.c deleted file mode 100644 index 5c773aaf93..0000000000 --- a/src/gallium/targets/egl-apis/api_GLESv2.c +++ /dev/null @@ -1,8 +0,0 @@ -#include "state_tracker/st_gl_api.h" - -PUBLIC struct st_api * -st_api_create_OpenGL_ES2() -{ - /* linker magic creates different versions */ - return st_gl_api_create(); -} diff --git a/src/gallium/targets/egl-gdi/SConscript b/src/gallium/targets/egl-gdi/SConscript new file mode 100644 index 0000000000..8f8b28ef67 --- /dev/null +++ b/src/gallium/targets/egl-gdi/SConscript @@ -0,0 +1,47 @@ +####################################################################### +# SConscript for egl-gdi target + +Import('*') + +if env['platform'] == 'windows': + + env = env.Clone() + + env.Append(CPPPATH = [ + '#/src/gallium/state_trackers/egl', + '#/src/gallium/state_trackers/vega', + '#/src/egl/main', + '#/src/mesa', + ]) + + env.Append(CPPDEFINES = [ + 'FEATURE_VG=1', + 'GALLIUM_SOFTPIPE', + 'GALLIUM_RBUG', + 'GALLIUM_TRACE', + ]) + + env.Append(LIBS = [ + 'gdi32', + 'user32', + 'kernel32', + 'ws2_32', + ]) + + env['no_import_lib'] = 1 + + drivers = [softpipe] + if env['llvm']: + env.Append(CPPDEFINES = 'GALLIUM_LLVMPIPE') + drivers += [llvmpipe] + drivers += [identity, trace, rbug] + + apis = [vgapi, st_vega] + + egl_gallium = env.SharedLibrary( + target ='egl_gallium', + source = 'egl-static.c', + LIBS = st_egl_gdi + ws_gdi + drivers + apis + gallium + egl + env['LIBS'], + ) + + env.InstallSharedLibrary(egl_gallium) diff --git a/src/gallium/targets/egl-gdi/egl-static.c b/src/gallium/targets/egl-gdi/egl-static.c new file mode 100644 index 0000000000..ec2f865c31 --- /dev/null +++ b/src/gallium/targets/egl-gdi/egl-static.c @@ -0,0 +1,148 @@ +/* + * Mesa 3-D graphics library + * Version: 7.9 + * + * Copyright (C) 2010 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <olv@lunarg.com> + */ + +#include "common/egl_g3d_loader.h" +#include "state_tracker/st_gl_api.h" +#include "vg_api.h" +#include "target-helpers/inline_sw_helper.h" +#include "target-helpers/inline_debug_helper.h" +#include "egldriver.h" + +static uint +get_api_mask(void) +{ + uint api_mask = 0x0; + +#if FEATURE_GL + api_mask |= 1 << ST_API_OPENGL; +#endif +#if FEATURE_ES1 + api_mask |= 1 << ST_API_OPENGL_ES1; +#endif +#if FEATURE_ES2 + api_mask |= 1 << ST_API_OPENGL_ES2; +#endif +#if FEATURE_VG + api_mask |= 1 << ST_API_OPENVG; +#endif + + return api_mask; +} + +static struct st_api * +get_st_api(enum st_api_type api) +{ + struct st_api *stapi = NULL; + + switch (api) { +#if FEATURE_GL + case ST_API_OPENGL: + stapi = st_gl_api_create(); + break; +#endif +#if FEATURE_ES1 + case ST_API_OPENGL_ES1: + stapi = st_gl_api_create_es1(); + break; +#endif +#if FEATURE_ES2 + case ST_API_OPENGL_ES2: + stapi = st_gl_api_create_es2(); + break; +#endif +#if FEATURE_VG + case ST_API_OPENVG: + stapi = (struct st_api *) vg_api_get(); + break; +#endif + default: + break; + } + + return stapi; +} + +static struct st_api * +guess_gl_api(void) +{ + return NULL; +} + +static struct pipe_screen * +create_drm_screen(const char *name, int fd) +{ + return NULL; +} + +static struct pipe_screen * +create_sw_screen(struct sw_winsys *ws) +{ + struct pipe_screen *screen; + + screen = sw_screen_create(ws); + if (screen) + screen = debug_screen_wrap(screen); + + return screen; +} + +static void +init_loader(struct egl_g3d_loader *loader) +{ + if (loader->api_mask) + return; + + loader->api_mask = get_api_mask(); + loader->get_st_api = get_st_api; + loader->guess_gl_api = guess_gl_api; + loader->create_drm_screen = create_drm_screen; + loader->create_sw_screen = create_sw_screen; +} + +static void +egl_g3d_unload(_EGLDriver *drv) +{ + egl_g3d_destroy_driver(drv); +} + +static struct egl_g3d_loader loader; + +_EGLDriver * +_eglMain(const char *args) +{ + _EGLDriver *drv; + + init_loader(&loader); + drv = egl_g3d_create_driver(&loader); + if (drv) { + drv->Name = "Gallium"; + drv->Unload = egl_g3d_unload; + } + + return drv; +} diff --git a/src/gallium/targets/egl-i915/Makefile b/src/gallium/targets/egl-i915/Makefile deleted file mode 100644 index a4b41842ff..0000000000 --- a/src/gallium/targets/egl-i915/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -EGL_DRIVER_NAME = i915 -EGL_DRIVER_SOURCES = dummy.c -EGL_DRIVER_LIBS = -ldrm_intel - -EGL_DRIVER_PIPES = \ - $(TOP)/src/gallium/winsys/i915/drm/libi915drm.a \ - $(TOP)/src/gallium/drivers/trace/libtrace.a \ - $(TOP)/src/gallium/drivers/rbug/librbug.a \ - $(TOP)/src/gallium/drivers/i915/libi915.a - -include ../Makefile.egl diff --git a/src/gallium/targets/egl-i915/dummy.c b/src/gallium/targets/egl-i915/dummy.c deleted file mode 100644 index 3181d0ba7e..0000000000 --- a/src/gallium/targets/egl-i915/dummy.c +++ /dev/null @@ -1,3 +0,0 @@ -/* A poor man's --whole-archive for EGL drivers */ -void *_eglMain(void *); -void *_eglWholeArchive = (void *) _eglMain; diff --git a/src/gallium/targets/egl-i965/Makefile b/src/gallium/targets/egl-i965/Makefile deleted file mode 100644 index d4730824a5..0000000000 --- a/src/gallium/targets/egl-i965/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -EGL_DRIVER_NAME = i965 -EGL_DRIVER_SOURCES = dummy.c -EGL_DRIVER_LIBS = -ldrm_intel - -EGL_DRIVER_PIPES = \ - $(TOP)/src/gallium/winsys/i965/drm/libi965drm.a \ - $(TOP)/src/gallium/drivers/trace/libtrace.a \ - $(TOP)/src/gallium/drivers/rbug/librbug.a \ - $(TOP)/src/gallium/drivers/i965/libi965.a \ - $(TOP)/src/gallium/winsys/sw/drm/libswdrm.a \ - $(TOP)/src/gallium/winsys/sw/wrapper/libwsw.a \ - $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a - -include ../Makefile.egl diff --git a/src/gallium/targets/egl-i965/dummy.c b/src/gallium/targets/egl-i965/dummy.c deleted file mode 100644 index 3181d0ba7e..0000000000 --- a/src/gallium/targets/egl-i965/dummy.c +++ /dev/null @@ -1,3 +0,0 @@ -/* A poor man's --whole-archive for EGL drivers */ -void *_eglMain(void *); -void *_eglWholeArchive = (void *) _eglMain; diff --git a/src/gallium/targets/egl-nouveau/Makefile b/src/gallium/targets/egl-nouveau/Makefile deleted file mode 100644 index e3fa8937e8..0000000000 --- a/src/gallium/targets/egl-nouveau/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -EGL_DRIVER_NAME = nouveau -EGL_DRIVER_SOURCES = dummy.c -EGL_DRIVER_LIBS = -ldrm_nouveau - -EGL_DRIVER_PIPES = \ - $(TOP)/src/gallium/winsys/nouveau/drm/libnouveaudrm.a \ - $(TOP)/src/gallium/drivers/nvfx/libnvfx.a \ - $(TOP)/src/gallium/drivers/nv50/libnv50.a \ - $(TOP)/src/gallium/drivers/nouveau/libnouveau.a - -include ../Makefile.egl diff --git a/src/gallium/targets/egl-nouveau/dummy.c b/src/gallium/targets/egl-nouveau/dummy.c deleted file mode 100644 index 3181d0ba7e..0000000000 --- a/src/gallium/targets/egl-nouveau/dummy.c +++ /dev/null @@ -1,3 +0,0 @@ -/* A poor man's --whole-archive for EGL drivers */ -void *_eglMain(void *); -void *_eglWholeArchive = (void *) _eglMain; diff --git a/src/gallium/targets/egl-radeon/Makefile b/src/gallium/targets/egl-radeon/Makefile deleted file mode 100644 index 8fcca26826..0000000000 --- a/src/gallium/targets/egl-radeon/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -EGL_DRIVER_NAME = radeon -EGL_DRIVER_SOURCES = dummy.c -EGL_DRIVER_LIBS = -ldrm_radeon - -EGL_DRIVER_PIPES = \ - $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ - $(TOP)/src/gallium/drivers/trace/libtrace.a \ - $(TOP)/src/gallium/drivers/rbug/librbug.a \ - $(TOP)/src/gallium/drivers/r300/libr300.a - -include ../Makefile.egl diff --git a/src/gallium/targets/egl-radeon/dummy.c b/src/gallium/targets/egl-radeon/dummy.c deleted file mode 100644 index 3181d0ba7e..0000000000 --- a/src/gallium/targets/egl-radeon/dummy.c +++ /dev/null @@ -1,3 +0,0 @@ -/* A poor man's --whole-archive for EGL drivers */ -void *_eglMain(void *); -void *_eglWholeArchive = (void *) _eglMain; diff --git a/src/gallium/targets/egl-swrast/Makefile b/src/gallium/targets/egl-swrast/Makefile deleted file mode 100644 index 7d4f505498..0000000000 --- a/src/gallium/targets/egl-swrast/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -# Do propperly -CFLAGS+="-I$(TOP)/src/gallium/include" - -EGL_DRIVER_NAME = swrast -EGL_DRIVER_SOURCES = swrast_glue.c -EGL_DRIVER_LIBS = -EGL_DRIVER_PIPES = - -include ../Makefile.egl diff --git a/src/gallium/targets/egl-swrast/SConscript b/src/gallium/targets/egl-swrast/SConscript deleted file mode 100644 index 213e5b3e6c..0000000000 --- a/src/gallium/targets/egl-swrast/SConscript +++ /dev/null @@ -1,30 +0,0 @@ -####################################################################### -# SConscript for egl-swrast target - -Import('*') - -if env['platform'] == 'windows': - - env = env.Clone() - - env.Append(LIBS = [ - 'gdi32', - 'user32', - 'kernel32', - 'ws2_32', - ]) - - drivers = [softpipe] - if env['llvm']: - drivers += [llvmpipe] - drivers += [identity, trace, rbug] - - env['no_import_lib'] = 1 - - egl_gdi_swrast = env.SharedLibrary( - target ='egl_gdi_swrast', - source = 'swrast_glue.c', - LIBS = st_egl_gdi + ws_gdi + drivers + gallium + egl + env['LIBS'], - ) - - env.InstallSharedLibrary(egl_gdi_swrast) diff --git a/src/gallium/targets/egl-swrast/swrast_glue.c b/src/gallium/targets/egl-swrast/swrast_glue.c deleted file mode 100644 index defd11c687..0000000000 --- a/src/gallium/targets/egl-swrast/swrast_glue.c +++ /dev/null @@ -1,11 +0,0 @@ -#include "state_tracker/drm_api.h" - -struct drm_api * -drm_api_create() -{ - return NULL; -} - -/* A poor man's --whole-archive for EGL drivers */ -void *_eglMain(void *); -void *_eglWholeArchive = (void *) _eglMain; diff --git a/src/gallium/targets/egl-vmwgfx/Makefile b/src/gallium/targets/egl-vmwgfx/Makefile deleted file mode 100644 index a9f6874b98..0000000000 --- a/src/gallium/targets/egl-vmwgfx/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -EGL_DRIVER_NAME = vmwgfx -EGL_DRIVER_SOURCES = dummy.c -EGL_DRIVER_LIBS = - -EGL_DRIVER_PIPES = \ - $(TOP)/src/gallium/winsys/svga/drm/libsvgadrm.a \ - $(TOP)/src/gallium/drivers/trace/libtrace.a \ - $(TOP)/src/gallium/drivers/rbug/librbug.a \ - $(TOP)/src/gallium/drivers/svga/libsvga.a - -include ../Makefile.egl diff --git a/src/gallium/targets/egl-vmwgfx/dummy.c b/src/gallium/targets/egl-vmwgfx/dummy.c deleted file mode 100644 index 3181d0ba7e..0000000000 --- a/src/gallium/targets/egl-vmwgfx/dummy.c +++ /dev/null @@ -1,3 +0,0 @@ -/* A poor man's --whole-archive for EGL drivers */ -void *_eglMain(void *); -void *_eglWholeArchive = (void *) _eglMain; diff --git a/src/gallium/targets/egl/Makefile b/src/gallium/targets/egl/Makefile new file mode 100644 index 0000000000..1e4bb4d94c --- /dev/null +++ b/src/gallium/targets/egl/Makefile @@ -0,0 +1,229 @@ +# src/gallium/targets/egl/Makefile +# +# This is the Makefile for EGL Gallium driver package. The package consists of +# +# egl_gallium.so - EGL driver +# pipe_<HW>.so - pipe drivers +# st_<API>.so - client API state trackers +# +# The following variables are examined +# +# EGL_PLATFORMS - platforms to support +# GALLIUM_WINSYS_DIRS - pipe drivers to support +# EGL_CLIENT_APIS - state trackers to support +# + +TOP = ../../../.. +include $(TOP)/configs/current + +ST_PREFIX := st_ +PIPE_PREFIX := pipe_ + +common_CPPFLAGS := \ + -I$(TOP)/include \ + -I$(TOP)/src/gallium/auxiliary \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/winsys +common_SYS := +common_LIBS := \ + $(TOP)/src/gallium/drivers/identity/libidentity.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/rbug/librbug.a \ + $(GALLIUM_AUXILIARIES) + +# EGL driver +egl_CPPFLAGS := \ + -I$(TOP)/src/gallium/state_trackers/egl \ + -I$(TOP)/src/egl/main \ + -DPIPE_PREFIX=\"$(PIPE_PREFIX)\" -DST_PREFIX=\"$(ST_PREFIX)\" +egl_SYS := -lm $(DLOPEN_LIBS) -L$(TOP)/$(LIB_DIR) -lEGL +egl_LIBS := $(TOP)/src/gallium/state_trackers/egl/libegl.a + +ifneq ($(findstring x11, $(EGL_PLATFORMS)),) +egl_SYS += -lX11 -lXext -lXfixes +egl_LIBS += $(TOP)/src/gallium/winsys/sw/xlib/libws_xlib.a +endif +ifneq ($(findstring kms, $(EGL_PLATFORMS)),) +egl_SYS += -ldrm +endif +ifneq ($(findstring fbdev, $(EGL_PLATFORMS)),) +egl_LIBS += $(TOP)/src/gallium/winsys/sw/fbdev/libfbdev.a +endif + +# EGL_RENDERABLE_TYPE is a compile time attribute +egl_CPPFLAGS += $(API_DEFINES) +ifneq ($(filter $(GL_LIB), $(EGL_CLIENT_APIS)),) +egl_CPPFLAGS += -DFEATURE_GL=1 +endif +ifneq ($(filter $(GLESv1_CM_LIB), $(EGL_CLIENT_APIS)),) +egl_CPPFLAGS += -DFEATURE_ES1=1 +endif +ifneq ($(filter $(GLESv2_LIB), $(EGL_CLIENT_APIS)),) +egl_CPPFLAGS += -DFEATURE_ES2=1 +endif +ifneq ($(filter $(VG_LIB), $(EGL_CLIENT_APIS)),) +egl_CPPFLAGS += -DFEATURE_VG=1 +endif +egl_CPPFLAGS := $(sort $(egl_CPPFLAGS)) + +# i915 pipe driver +i915_CPPFLAGS := +i915_SYS := -ldrm_intel +i915_LIBS := \ + $(TOP)/src/gallium/winsys/i915/drm/libi915drm.a \ + $(TOP)/src/gallium/drivers/i915/libi915.a + +# i965 pipe driver +i965_CPPFLAGS := +i965_SYS := -ldrm_intel +i965_LIBS := \ + $(TOP)/src/gallium/winsys/i965/drm/libi965drm.a \ + $(TOP)/src/gallium/drivers/i965/libi965.a + +# nouveau pipe driver +nouveau_CPPFLAGS := +nouveau_SYS := -ldrm_nouveau +nouveau_LIBS := \ + $(TOP)/src/gallium/winsys/nouveau/drm/libnouveaudrm.a \ + $(TOP)/src/gallium/drivers/nvfx/libnvfx.a \ + $(TOP)/src/gallium/drivers/nv50/libnv50.a \ + $(TOP)/src/gallium/drivers/nouveau/libnouveau.a + +# radeon pipe driver +radeon_CPPFLAGS := +radeon_SYS := -ldrm -ldrm_radeon +radeon_LIBS := \ + $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ + $(TOP)/src/gallium/drivers/r300/libr300.a + +# vmwgfx pipe driver +vmwgfx_CPPFLAGS := +vmwgfx_SYS := +vmwgfx_LIBS := \ + $(TOP)/src/gallium/winsys/svga/drm/libsvgadrm.a \ + $(TOP)/src/gallium/drivers/svga/libsvga.a + +# swrast (pseudo) pipe driver +swrast_CPPFLAGS := -DGALLIUM_SOFTPIPE -DGALLIUM_RBUG -DGALLIUM_TRACE +swrast_SYS := -lm +swrast_LIBS := $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a + +# LLVM +ifeq ($(MESA_LLVM),1) +common_SYS += $(LLVM_LIBS) +swrast_CPPFLAGS += -DGALLIUM_LLVMPIPE +swrast_LIBS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a +LDFLAGS += $(LLVM_LDFLAGS) +endif + +# OpenGL state tracker +GL_CPPFLAGS := -I$(TOP)/src/mesa $(API_DEFINES) +GL_SYS := -lpthread -lm -L$(TOP)/$(LIB_DIR) -l$(GL_LIB) +GL_LIBS := $(TOP)/src/mesa/libmesagallium.a + +# OpenGL ES 1.x state tracker +GLESv1_CM_CPPFLAGS := -I$(TOP)/src/mesa +GLESv1_CM_SYS := -lpthread -lm -L$(TOP)/$(LIB_DIR) -l$(GLESv1_CM_LIB) +GLESv1_CM_LIBS := $(TOP)/src/mesa/libes1gallium.a + +# OpenGL ES 2.x state tracker +GLESv2_CPPFLAGS := -I$(TOP)/src/mesa +GLESv2_SYS := -lpthread -lm -L$(TOP)/$(LIB_DIR) -l$(GLESv2_LIB) +GLESv2_LIBS := $(TOP)/src/mesa/libes2gallium.a + +# OpenVG state tracker +OpenVG_CPPFLAGS := -I$(TOP)/src/gallium/state_trackers/vega +OpenVG_SYS := -lm -L$(TOP)/$(LIB_DIR) -l$(VG_LIB) +OpenVG_LIBS := $(TOP)/src/gallium/state_trackers/vega/libvega.a + + +OUTPUT_PATH := $(TOP)/$(LIB_DIR)/egl + +# determine the outputs +ifneq ($(findstring i915/drm,$(GALLIUM_WINSYS_DIRS)),) +OUTPUTS += i915 +endif +ifneq ($(findstring i965/drm,$(GALLIUM_WINSYS_DIRS)),) +OUTPUTS += i965 +endif +ifneq ($(findstring nouveau/drm,$(GALLIUM_WINSYS_DIRS)),) +OUTPUTS += nouveau +endif +ifneq ($(findstring radeon/drm,$(GALLIUM_WINSYS_DIRS)),) +OUTPUTS += radeon +endif +ifneq ($(findstring svga/drm,$(GALLIUM_WINSYS_DIRS)),) +OUTPUTS += vmwgfx +endif +OUTPUTS += swrast +OUTPUTS := $(addprefix $(PIPE_PREFIX), $(OUTPUTS)) + +# EGL driver and state trackers +OUTPUTS += egl_gallium $(addprefix $(ST_PREFIX), $(EGL_CLIENT_APIS)) + +OUTPUTS := $(addsuffix .so, $(OUTPUTS)) +OUTPUTS := $(addprefix $(OUTPUT_PATH)/, $(OUTPUTS)) + +default: $(OUTPUTS) + +define mklib +$(MKLIB) -o $(notdir $@) -noprefix -linker '$(CC)' -ldflags '$(LDFLAGS)' \ + -install $(OUTPUT_PATH) $(MKLIB_OPTIONS) $< \ + -Wl,--start-group $(common_LIBS) $($(1)_LIBS) -Wl,--end-group \ + $(common_SYS) $($(1)_SYS) +endef + +# EGL driver +$(OUTPUT_PATH)/egl_gallium.so: egl.o $(egl_LIBS) + $(call mklib,egl) + +# pipe drivers +$(OUTPUT_PATH)/$(PIPE_PREFIX)i915.so: pipe_i915.o $(i915_LIBS) + $(call mklib,i915) + +$(OUTPUT_PATH)/$(PIPE_PREFIX)i965.so: pipe_i965.o $(i965_LIBS) + $(call mklib,i965) + +$(OUTPUT_PATH)/$(PIPE_PREFIX)nouveau.so: pipe_nouveau.o $(nouveau_LIBS) + $(call mklib,nouveau) + +$(OUTPUT_PATH)/$(PIPE_PREFIX)radeon.so: pipe_radeon.o $(radeon_LIBS) + $(call mklib,radeon) + +$(OUTPUT_PATH)/$(PIPE_PREFIX)vmwgfx.so: pipe_vmwgfx.o $(vmwgfx_LIBS) + $(call mklib,vmwgfx) + +$(OUTPUT_PATH)/$(PIPE_PREFIX)swrast.so: pipe_swrast.o $(swrast_LIBS) + $(call mklib,swrast) + +# state trackers +$(OUTPUT_PATH)/$(ST_PREFIX)$(GL_LIB).so: st_GL.o $(GL_LIBS) + $(call mklib,GL) + +$(OUTPUT_PATH)/$(ST_PREFIX)$(GLESv1_CM_LIB).so: st_GLESv1_CM.o $(GLESv1_CM_LIBS) + $(call mklib,GLESv1_CM) + +$(OUTPUT_PATH)/$(ST_PREFIX)$(GLESv2_LIB).so: st_GLESv2.o $(GLESv2_LIBS) + $(call mklib,GLESv2) + +$(OUTPUT_PATH)/$(ST_PREFIX)$(VG_LIB).so: st_OpenVG.o $(OpenVG_LIBS) + $(call mklib,OpenVG) + +egl.o: egl.c + $(CC) -c -o $@ $< $(common_CPPFLAGS) $(egl_CPPFLAGS) $(DEFINES) $(CFLAGS) + +pipe_%.o: pipe_%.c + $(CC) -c -o $@ $< $(common_CPPFLAGS) $($*_CPPFLAGS) $(DEFINES) $(CFLAGS) + +st_%.o: st_%.c + $(CC) -c -o $@ $< $(common_CPPFLAGS) $($*_CPPFLAGS) $(DEFINES) $(CFLAGS) + +install: $(OUTPUTS) + $(INSTALL) -d $(DESTDIR)$(EGL_DRIVER_INSTALL_DIR) + for out in $(OUTPUTS); do \ + $(MINSTALL) -m 755 "$$out" $(DESTDIR)$(EGL_DRIVER_INSTALL_DIR); \ + done + +clean: + rm -f *.o diff --git a/src/gallium/targets/egl/egl.c b/src/gallium/targets/egl/egl.c new file mode 100644 index 0000000000..d9d89485c3 --- /dev/null +++ b/src/gallium/targets/egl/egl.c @@ -0,0 +1,428 @@ +/* + * Mesa 3-D graphics library + * Version: 7.9 + * + * Copyright (C) 2010 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <olv@lunarg.com> + */ + +#include "util/u_debug.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_dl.h" +#include "egldriver.h" +#include "egllog.h" + +#include "state_tracker/st_api.h" +#include "state_tracker/drm_driver.h" +#include "common/egl_g3d_loader.h" + +struct egl_g3d_loader egl_g3d_loader; + +static struct st_module { + boolean initialized; + char *name; + struct util_dl_library *lib; + struct st_api *stapi; +} st_modules[ST_API_COUNT]; + +static struct pipe_module { + boolean initialized; + char *name; + struct util_dl_library *lib; + const struct drm_driver_descriptor *drmdd; + struct pipe_screen *(*swrast_create_screen)(struct sw_winsys *); +} pipe_modules[16]; + +static char * +loader_strdup(const char *s) +{ + size_t len = (s) ? strlen(s) : 0; + char *t = MALLOC(len + 1); + if (t) { + memcpy(t, s, len); + t[len] = '\0'; + } + return t; +} + +static EGLBoolean +dlopen_st_module_cb(const char *dir, size_t len, void *callback_data) +{ + struct st_module *stmod = + (struct st_module *) callback_data; + char path[1024]; + int ret; + + if (len) { + ret = util_snprintf(path, sizeof(path), + "%.*s/" ST_PREFIX "%s" UTIL_DL_EXT, len, dir, stmod->name); + } + else { + ret = util_snprintf(path, sizeof(path), + ST_PREFIX "%s" UTIL_DL_EXT, stmod->name); + } + + if (ret > 0 && ret < sizeof(path)) { + stmod->lib = util_dl_open(path); + if (stmod->lib) + _eglLog(_EGL_DEBUG, "loaded %s", path); + } + + return !(stmod->lib); +} + +static boolean +load_st_module(struct st_module *stmod, + const char *name, const char *procname) +{ + struct st_api *(*create_api)(void); + + stmod->name = loader_strdup(name); + if (stmod->name) + _eglSearchPathForEach(dlopen_st_module_cb, (void *) stmod); + else + stmod->lib = util_dl_open(NULL); + + if (stmod->lib) { + create_api = (struct st_api *(*)(void)) + util_dl_get_proc_address(stmod->lib, procname); + if (create_api) + stmod->stapi = create_api(); + + if (!stmod->stapi) { + util_dl_close(stmod->lib); + stmod->lib = NULL; + } + } + + if (!stmod->stapi) { + FREE(stmod->name); + stmod->name = NULL; + } + + return (stmod->stapi != NULL); +} + +static EGLBoolean +dlopen_pipe_module_cb(const char *dir, size_t len, void *callback_data) +{ + struct pipe_module *pmod = (struct pipe_module *) callback_data; + char path[1024]; + int ret; + + if (len) { + ret = util_snprintf(path, sizeof(path), + "%.*s/" PIPE_PREFIX "%s" UTIL_DL_EXT, len, dir, pmod->name); + } + else { + ret = util_snprintf(path, sizeof(path), + PIPE_PREFIX "%s" UTIL_DL_EXT, pmod->name); + } + if (ret > 0 && ret < sizeof(path)) { + pmod->lib = util_dl_open(path); + if (pmod->lib) + _eglLog(_EGL_DEBUG, "loaded %s", path); + } + + return !(pmod->lib); +} + +static boolean +load_pipe_module(struct pipe_module *pmod, const char *name) +{ + pmod->name = loader_strdup(name); + if (!pmod->name) + return FALSE; + + _eglSearchPathForEach(dlopen_pipe_module_cb, (void *) pmod); + if (pmod->lib) { + pmod->drmdd = (const struct drm_driver_descriptor *) + util_dl_get_proc_address(pmod->lib, "driver_descriptor"); + if (pmod->drmdd) { + if (pmod->drmdd->driver_name) { + /* driver name mismatch */ + if (strcmp(pmod->drmdd->driver_name, pmod->name) != 0) + pmod->drmdd = NULL; + } + else { + /* swrast */ + pmod->swrast_create_screen = + (struct pipe_screen *(*)(struct sw_winsys *)) + util_dl_get_proc_address(pmod->lib, "swrast_create_screen"); + if (!pmod->swrast_create_screen) + pmod->drmdd = NULL; + } + } + + if (!pmod->drmdd) { + util_dl_close(pmod->lib); + pmod->lib = NULL; + } + } + + if (!pmod->drmdd) + pmod->name = NULL; + + return (pmod->drmdd != NULL); +} + +static struct st_api * +get_st_api(enum st_api_type api) +{ + struct st_module *stmod = &st_modules[api]; + const char *names[8], *symbol; + int i, count = 0; + + if (stmod->initialized) + return stmod->stapi; + + switch (api) { + case ST_API_OPENGL: + symbol = ST_CREATE_OPENGL_SYMBOL; + names[count++] = "GL"; + break; + case ST_API_OPENGL_ES1: + symbol = ST_CREATE_OPENGL_ES1_SYMBOL; + names[count++] = "GLESv1_CM"; + names[count++] = "GL"; + break; + case ST_API_OPENGL_ES2: + symbol = ST_CREATE_OPENGL_ES2_SYMBOL; + names[count++] = "GLESv2"; + names[count++] = "GL"; + break; + case ST_API_OPENVG: + symbol = ST_CREATE_OPENVG_SYMBOL; + names[count++] = "OpenVG"; + break; + default: + symbol = NULL; + assert(!"Unknown API Type\n"); + break; + } + + /* NULL means the process itself */ + names[count++] = NULL; + + for (i = 0; i < count; i++) { + if (load_st_module(stmod, names[i], symbol)) + break; + } + + if (!stmod->stapi) { + EGLint level = (egl_g3d_loader.api_mask & (1 << api)) ? + _EGL_WARNING : _EGL_DEBUG; + _eglLog(level, "unable to load " ST_PREFIX "%s" UTIL_DL_EXT, names[0]); + } + + stmod->initialized = TRUE; + + return stmod->stapi; +} + +static struct st_api * +guess_gl_api(void) +{ + struct st_api *stapi; + int gl_apis[] = { + ST_API_OPENGL, + ST_API_OPENGL_ES1, + ST_API_OPENGL_ES2, + -1 + }; + int i, api = -1; + + /* determine the api from the loaded libraries */ + for (i = 0; gl_apis[i] != -1; i++) { + if (st_modules[gl_apis[i]].stapi) { + api = gl_apis[i]; + break; + } + } + /* determine the api from the linked libraries */ + if (api == -1) { + struct util_dl_library *self = util_dl_open(NULL); + + if (self) { + if (util_dl_get_proc_address(self, "glColor4d")) + api = ST_API_OPENGL; + else if (util_dl_get_proc_address(self, "glColor4x")) + api = ST_API_OPENGL_ES1; + else if (util_dl_get_proc_address(self, "glShaderBinary")) + api = ST_API_OPENGL_ES2; + util_dl_close(self); + } + } + + stapi = (api != -1) ? get_st_api(api) : NULL; + if (!stapi) { + for (i = 0; gl_apis[i] != -1; i++) { + api = gl_apis[i]; + stapi = get_st_api(api); + if (stapi) + break; + } + } + + return stapi; +} + +static struct pipe_module * +get_pipe_module(const char *name) +{ + struct pipe_module *pmod = NULL; + int i; + + if (!name) + return NULL; + + for (i = 0; i < Elements(pipe_modules); i++) { + if (!pipe_modules[i].initialized || + strcmp(pipe_modules[i].name, name) == 0) { + pmod = &pipe_modules[i]; + break; + } + } + if (!pmod) + return NULL; + + if (!pmod->initialized) { + load_pipe_module(pmod, name); + pmod->initialized = TRUE; + } + + return pmod; +} + +static struct pipe_screen * +create_drm_screen(const char *name, int fd) +{ + struct pipe_module *pmod = get_pipe_module(name); + return (pmod && pmod->drmdd->create_screen) ? + pmod->drmdd->create_screen(fd) : NULL; +} + +static struct pipe_screen * +create_sw_screen(struct sw_winsys *ws) +{ + struct pipe_module *pmod = get_pipe_module("swrast"); + return (pmod && pmod->swrast_create_screen) ? + pmod->swrast_create_screen(ws) : NULL; +} + +static const struct egl_g3d_loader * +loader_init(void) +{ + uint api_mask = 0x0; + + /* TODO detect at runtime? */ +#if FEATURE_GL + api_mask |= 1 << ST_API_OPENGL; +#endif +#if FEATURE_ES1 + api_mask |= 1 << ST_API_OPENGL_ES1; +#endif +#if FEATURE_ES2 + api_mask |= 1 << ST_API_OPENGL_ES2; +#endif +#if FEATURE_VG + api_mask |= 1 << ST_API_OPENVG; +#endif + + egl_g3d_loader.api_mask = api_mask; + egl_g3d_loader.get_st_api = get_st_api; + egl_g3d_loader.guess_gl_api = guess_gl_api; + egl_g3d_loader.create_drm_screen = create_drm_screen; + egl_g3d_loader.create_sw_screen = create_sw_screen; + + return &egl_g3d_loader; +} + +static void +loader_fini(void) +{ + int i; + + for (i = 0; i < ST_API_COUNT; i++) { + struct st_module *stmod = &st_modules[i]; + + if (stmod->stapi) { + stmod->stapi->destroy(stmod->stapi); + stmod->stapi = NULL; + } + if (stmod->lib) { + util_dl_close(stmod->lib); + stmod->lib = NULL; + } + if (stmod->name) { + FREE(stmod->name); + stmod->name = NULL; + } + stmod->initialized = FALSE; + } + for (i = 0; i < Elements(pipe_modules); i++) { + struct pipe_module *pmod = &pipe_modules[i]; + + if (!pmod->initialized) + break; + + pmod->drmdd = NULL; + pmod->swrast_create_screen = NULL; + if (pmod->lib) { + util_dl_close(pmod->lib); + pmod->lib = NULL; + } + if (pmod->name) { + FREE(pmod->name); + pmod->name = NULL; + } + pmod->initialized = FALSE; + } +} + +static void +egl_g3d_unload(_EGLDriver *drv) +{ + egl_g3d_destroy_driver(drv); + loader_fini(); +} + +_EGLDriver * +_eglMain(const char *args) +{ + const struct egl_g3d_loader *loader; + _EGLDriver *drv; + + loader = loader_init(); + drv = egl_g3d_create_driver(loader); + if (!drv) { + loader_fini(); + return NULL; + } + + drv->Name = "Gallium"; + drv->Unload = egl_g3d_unload; + + return drv; +} diff --git a/src/gallium/targets/egl/pipe_i915.c b/src/gallium/targets/egl/pipe_i915.c new file mode 100644 index 0000000000..758a921b48 --- /dev/null +++ b/src/gallium/targets/egl/pipe_i915.c @@ -0,0 +1,28 @@ + +#include "target-helpers/inline_wrapper_sw_helper.h" +#include "target-helpers/inline_debug_helper.h" +#include "state_tracker/drm_driver.h" +#include "i915/drm/i915_drm_public.h" +#include "i915/i915_public.h" + +static struct pipe_screen * +create_screen(int fd) +{ + struct i915_winsys *iws; + struct pipe_screen *screen; + + iws = i915_drm_winsys_create(fd); + if (!iws) + return NULL; + + screen = i915_screen_create(iws); + if (!screen) + return NULL; + + screen = debug_screen_wrap(screen); + + return screen; +} + +PUBLIC +DRM_DRIVER_DESCRIPTOR("i915", "i915", create_screen) diff --git a/src/gallium/targets/egl/pipe_i965.c b/src/gallium/targets/egl/pipe_i965.c new file mode 100644 index 0000000000..43bf646e82 --- /dev/null +++ b/src/gallium/targets/egl/pipe_i965.c @@ -0,0 +1,31 @@ + +#include "target-helpers/inline_wrapper_sw_helper.h" +#include "target-helpers/inline_debug_helper.h" +#include "state_tracker/drm_driver.h" +#include "i965/drm/i965_drm_public.h" +#include "i965/brw_public.h" + +static struct pipe_screen * +create_screen(int fd) +{ + struct brw_winsys_screen *bws; + struct pipe_screen *screen; + + bws = i965_drm_winsys_screen_create(fd); + if (!bws) + return NULL; + + screen = brw_screen_create(bws); + if (!screen) + return NULL; + + if (debug_get_bool_option("BRW_SOFTPIPE", FALSE)) + screen = sw_screen_wrap(screen); + + screen = debug_screen_wrap(screen); + + return screen; +} + +PUBLIC +DRM_DRIVER_DESCRIPTOR("i965", "i965", create_screen) diff --git a/src/gallium/targets/egl/pipe_nouveau.c b/src/gallium/targets/egl/pipe_nouveau.c new file mode 100644 index 0000000000..0c9081bc71 --- /dev/null +++ b/src/gallium/targets/egl/pipe_nouveau.c @@ -0,0 +1,21 @@ + +#include "target-helpers/inline_debug_helper.h" +#include "state_tracker/drm_driver.h" +#include "nouveau/drm/nouveau_drm_public.h" + +static struct pipe_screen * +create_screen(int fd) +{ + struct pipe_screen *screen; + + screen = nouveau_drm_screen_create(fd); + if (!screen) + return NULL; + + screen = debug_screen_wrap(screen); + + return screen; +} + +PUBLIC +DRM_DRIVER_DESCRIPTOR("nouveau", "nouveau", create_screen) diff --git a/src/gallium/targets/egl/pipe_radeon.c b/src/gallium/targets/egl/pipe_radeon.c new file mode 100644 index 0000000000..35550bcb26 --- /dev/null +++ b/src/gallium/targets/egl/pipe_radeon.c @@ -0,0 +1,27 @@ + +#include "target-helpers/inline_debug_helper.h" +#include "state_tracker/drm_driver.h" +#include "radeon/drm/radeon_drm_public.h" +#include "r300/r300_public.h" + +static struct pipe_screen * +create_screen(int fd) +{ + struct r300_winsys_screen *sws; + struct pipe_screen *screen; + + sws = r300_drm_winsys_screen_create(fd); + if (!sws) + return NULL; + + screen = r300_screen_create(sws); + if (!screen) + return NULL; + + screen = debug_screen_wrap(screen); + + return screen; +} + +PUBLIC +DRM_DRIVER_DESCRIPTOR("radeon", "radeon", create_screen) diff --git a/src/gallium/targets/egl/pipe_swrast.c b/src/gallium/targets/egl/pipe_swrast.c new file mode 100644 index 0000000000..b2e3289c5d --- /dev/null +++ b/src/gallium/targets/egl/pipe_swrast.c @@ -0,0 +1,22 @@ + +#include "target-helpers/inline_sw_helper.h" +#include "target-helpers/inline_debug_helper.h" +#include "state_tracker/drm_driver.h" + +PUBLIC struct pipe_screen * +swrast_create_screen(struct sw_winsys *ws); + +PUBLIC +DRM_DRIVER_DESCRIPTOR("swrast", NULL, NULL) + +struct pipe_screen * +swrast_create_screen(struct sw_winsys *ws) +{ + struct pipe_screen *screen; + + screen = sw_screen_create(ws); + if (screen) + screen = debug_screen_wrap(screen); + + return screen; +} diff --git a/src/gallium/targets/egl/pipe_vmwgfx.c b/src/gallium/targets/egl/pipe_vmwgfx.c new file mode 100644 index 0000000000..22a28fa858 --- /dev/null +++ b/src/gallium/targets/egl/pipe_vmwgfx.c @@ -0,0 +1,27 @@ + +#include "target-helpers/inline_debug_helper.h" +#include "state_tracker/drm_driver.h" +#include "svga/drm/svga_drm_public.h" +#include "svga/svga_public.h" + +static struct pipe_screen * +create_screen(int fd) +{ + struct svga_winsys_screen *sws; + struct pipe_screen *screen; + + sws = svga_drm_winsys_screen_create(fd); + if (!sws) + return NULL; + + screen = svga_screen_create(sws); + if (!screen) + return NULL; + + screen = debug_screen_wrap(screen); + + return screen; +} + +PUBLIC +DRM_DRIVER_DESCRIPTOR("vmwgfx", "vmwgfx", create_screen) diff --git a/src/gallium/targets/egl/st_GL.c b/src/gallium/targets/egl/st_GL.c new file mode 100644 index 0000000000..676300b0cc --- /dev/null +++ b/src/gallium/targets/egl/st_GL.c @@ -0,0 +1,25 @@ +#include "state_tracker/st_gl_api.h" + +#if FEATURE_GL +PUBLIC struct st_api * +st_api_create_OpenGL(void) +{ + return st_gl_api_create(); +} +#endif + +#if FEATURE_ES1 +PUBLIC struct st_api * +st_api_create_OpenGL_ES1(void) +{ + return st_gl_api_create_es1(); +} +#endif + +#if FEATURE_ES2 +PUBLIC struct st_api * +st_api_create_OpenGL_ES2(void) +{ + return st_gl_api_create_es2(); +} +#endif diff --git a/src/gallium/targets/egl-apis/api_GL.c b/src/gallium/targets/egl/st_GLESv1_CM.c index 6d172745c0..0c8de8992f 100644 --- a/src/gallium/targets/egl-apis/api_GL.c +++ b/src/gallium/targets/egl/st_GLESv1_CM.c @@ -1,7 +1,7 @@ #include "state_tracker/st_gl_api.h" PUBLIC struct st_api * -st_api_create_OpenGL() +st_api_create_OpenGL_ES1(void) { - return st_gl_api_create(); + return st_gl_api_create_es1(); } diff --git a/src/gallium/targets/egl-apis/api_GLESv1_CM.c b/src/gallium/targets/egl/st_GLESv2.c index 825fdac215..87b3e65e23 100644 --- a/src/gallium/targets/egl-apis/api_GLESv1_CM.c +++ b/src/gallium/targets/egl/st_GLESv2.c @@ -1,7 +1,7 @@ #include "state_tracker/st_gl_api.h" PUBLIC struct st_api * -st_api_create_OpenGL_ES1() +st_api_create_OpenGL_ES2(void) { - return st_gl_api_create(); + return st_gl_api_create_es2(); } diff --git a/src/gallium/targets/egl-apis/api_OpenVG.c b/src/gallium/targets/egl/st_OpenVG.c index f85ebea8a1..e29a237479 100644 --- a/src/gallium/targets/egl-apis/api_OpenVG.c +++ b/src/gallium/targets/egl/st_OpenVG.c @@ -2,7 +2,7 @@ #include "vg_api.h" PUBLIC struct st_api * -st_api_create_OpenVG() +st_api_create_OpenVG(void) { return (struct st_api *) vg_api_get(); } diff --git a/src/gallium/targets/libgl-xlib/Makefile b/src/gallium/targets/libgl-xlib/Makefile index b173ceb994..e745023ba5 100644 --- a/src/gallium/targets/libgl-xlib/Makefile +++ b/src/gallium/targets/libgl-xlib/Makefile @@ -97,7 +97,7 @@ tags: etags `find . -name \*.[ch]` $(TOP)/include/GL/*.h clean: - -rm -f *.o + -rm -f *.o depend include depend diff --git a/src/gallium/targets/xorg-i915/Makefile b/src/gallium/targets/xorg-i915/Makefile index 18f07d6d8f..865240404c 100644 --- a/src/gallium/targets/xorg-i915/Makefile +++ b/src/gallium/targets/xorg-i915/Makefile @@ -4,19 +4,21 @@ include $(TOP)/configs/current LIBNAME = modesetting_drv.so C_SOURCES = \ + intel_target.c \ intel_xorg.c DRIVER_DEFINES = \ - -DHAVE_CONFIG_H + -DHAVE_CONFIG_H -DGALLIUM_RBUG -DGALLIUM_TRACE -DGALLIUM_GALAHAD -DRIVER_LINKS = \ +DRIVER_PIPES = \ $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ $(TOP)/src/gallium/winsys/i915/drm/libi915drm.a \ $(TOP)/src/gallium/drivers/i915/libi915.a \ + $(TOP)/src/gallium/drivers/galahad/libgalahad.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ - $(TOP)/src/gallium/drivers/rbug/librbug.a \ - $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ - $(GALLIUM_AUXILIARIES) \ + $(TOP)/src/gallium/drivers/rbug/librbug.a + +DRIVER_LINKS = \ $(shell pkg-config --libs libdrm libdrm_intel) include ../Makefile.xorg diff --git a/src/gallium/targets/xorg-i915/intel_target.c b/src/gallium/targets/xorg-i915/intel_target.c new file mode 100644 index 0000000000..8c8ef7e02b --- /dev/null +++ b/src/gallium/targets/xorg-i915/intel_target.c @@ -0,0 +1,26 @@ + +#include "state_tracker/drm_driver.h" +#include "target-helpers/inline_debug_helper.h" +#include "i915/drm/i915_drm_public.h" +#include "i915/i915_public.h" + +static struct pipe_screen * +create_screen(int fd) +{ + struct i915_winsys *iws; + struct pipe_screen *screen; + + iws = i915_drm_winsys_create(fd); + if (!iws) + return NULL; + + screen = i915_screen_create(iws); + if (!screen) + return NULL; + + screen = debug_screen_wrap(screen); + + return screen; +} + +DRM_DRIVER_DESCRIPTOR("i915", "i915", create_screen) diff --git a/src/gallium/targets/xorg-i965/Makefile b/src/gallium/targets/xorg-i965/Makefile index 2b0c7d6fdf..494dce41c8 100644 --- a/src/gallium/targets/xorg-i965/Makefile +++ b/src/gallium/targets/xorg-i965/Makefile @@ -4,19 +4,23 @@ include $(TOP)/configs/current LIBNAME = i965g_drv.so C_SOURCES = \ + intel_target.c \ intel_xorg.c DRIVER_DEFINES = \ - -DHAVE_CONFIG_H + -DHAVE_CONFIG_H -DGALLIUM_SOFTPIPE \ + -DGALLIUM_RBUG -DGALLIUM_TRACE -DRIVER_LINKS = \ +DRIVER_PIPES = \ $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ $(TOP)/src/gallium/winsys/i965/drm/libi965drm.a \ $(TOP)/src/gallium/drivers/i965/libi965.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/drivers/rbug/librbug.a \ - $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ - $(GALLIUM_AUXILIARIES) \ + $(TOP)/src/gallium/winsys/sw/wrapper/libwsw.a \ + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a + +DRIVER_LINKS = \ $(shell pkg-config --libs libdrm libdrm_intel) include ../Makefile.xorg diff --git a/src/gallium/targets/xorg-i965/intel_target.c b/src/gallium/targets/xorg-i965/intel_target.c new file mode 100644 index 0000000000..ce97f82027 --- /dev/null +++ b/src/gallium/targets/xorg-i965/intel_target.c @@ -0,0 +1,30 @@ + +#include "target-helpers/inline_wrapper_sw_helper.h" +#include "target-helpers/inline_debug_helper.h" +#include "state_tracker/drm_driver.h" +#include "i965/drm/i965_drm_public.h" +#include "i965/brw_public.h" + +static struct pipe_screen * +create_screen(int fd) +{ + struct brw_winsys_screen *bws; + struct pipe_screen *screen; + + bws = i965_drm_winsys_screen_create(fd); + if (!bws) + return NULL; + + screen = brw_screen_create(bws); + if (!screen) + return NULL; + + if (debug_get_bool_option("BRW_SOFTPIPE", FALSE)) + screen = sw_screen_wrap(screen); + + screen = debug_screen_wrap(screen); + + return screen; +} + +DRM_DRIVER_DESCRIPTOR("i915", "i965", create_screen) diff --git a/src/gallium/targets/xorg-nouveau/Makefile b/src/gallium/targets/xorg-nouveau/Makefile index f50872362f..2fcd9ffb7d 100644 --- a/src/gallium/targets/xorg-nouveau/Makefile +++ b/src/gallium/targets/xorg-nouveau/Makefile @@ -4,18 +4,22 @@ include $(TOP)/configs/current LIBNAME = modesetting_drv.so C_SOURCES = \ + nouveau_target.c \ nouveau_xorg.c DRIVER_DEFINES = \ - -DHAVE_CONFIG_H + -DHAVE_CONFIG_H -DGALLIUM_RBUG -DGALLIUM_TRACE -DRIVER_LINKS = \ +DRIVER_PIPES = \ $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ $(TOP)/src/gallium/winsys/nouveau/drm/libnouveaudrm.a \ $(TOP)/src/gallium/drivers/nvfx/libnvfx.a \ $(TOP)/src/gallium/drivers/nv50/libnv50.a \ $(TOP)/src/gallium/drivers/nouveau/libnouveau.a \ - $(GALLIUM_AUXILIARIES) \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/rbug/librbug.a + +DRIVER_LINKS = \ $(shell pkg-config --libs libdrm libdrm_nouveau) include ../Makefile.xorg diff --git a/src/gallium/targets/xorg-nouveau/nouveau_target.c b/src/gallium/targets/xorg-nouveau/nouveau_target.c new file mode 100644 index 0000000000..e725a4d9b7 --- /dev/null +++ b/src/gallium/targets/xorg-nouveau/nouveau_target.c @@ -0,0 +1,20 @@ + +#include "target-helpers/inline_debug_helper.h" +#include "state_tracker/drm_driver.h" +#include "nouveau/drm/nouveau_drm_public.h" + +static struct pipe_screen * +create_screen(int fd) +{ + struct pipe_screen *screen; + + screen = nouveau_drm_screen_create(fd); + if (!screen) + return NULL; + + screen = debug_screen_wrap(screen); + + return screen; +} + +DRM_DRIVER_DESCRIPTOR("nouveau", "nouveau", create_screen) diff --git a/src/gallium/targets/xorg-radeon/Makefile b/src/gallium/targets/xorg-radeon/Makefile index a4951c4bba..d3bc356992 100644 --- a/src/gallium/targets/xorg-radeon/Makefile +++ b/src/gallium/targets/xorg-radeon/Makefile @@ -4,19 +4,21 @@ include $(TOP)/configs/current LIBNAME = radeon_drv.so C_SOURCES = \ + radeon_target.c \ radeon_xorg.c DRIVER_DEFINES = \ - -DHAVE_CONFIG_H + -DHAVE_CONFIG_H -DGALLIUM_RBUG -DGALLIUM_TRACE -DGALLIUM_GALAHAD + +DRIVER_PIPES = \ + $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ + $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ + $(TOP)/src/gallium/drivers/r300/libr300.a \ + $(TOP)/src/gallium/drivers/galahad/libgalahad.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/rbug/librbug.a DRIVER_LINKS = \ - $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ - $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ - $(TOP)/src/gallium/drivers/r300/libr300.a \ - $(TOP)/src/gallium/drivers/trace/libtrace.a \ - $(TOP)/src/gallium/drivers/rbug/librbug.a \ - $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ - $(GALLIUM_AUXILIARIES) \ - $(shell pkg-config --libs libdrm libdrm_intel) + $(shell pkg-config --libs libdrm libdrm_radeon) include ../Makefile.xorg diff --git a/src/gallium/targets/xorg-radeon/radeon_target.c b/src/gallium/targets/xorg-radeon/radeon_target.c new file mode 100644 index 0000000000..5a0a8dc573 --- /dev/null +++ b/src/gallium/targets/xorg-radeon/radeon_target.c @@ -0,0 +1,26 @@ + +#include "target-helpers/inline_debug_helper.h" +#include "state_tracker/drm_driver.h" +#include "radeon/drm/radeon_drm_public.h" +#include "r300/r300_public.h" + +static struct pipe_screen * +create_screen(int fd) +{ + struct r300_winsys_screen *sws; + struct pipe_screen *screen; + + sws = r300_drm_winsys_screen_create(fd); + if (!sws) + return NULL; + + screen = r300_screen_create(sws); + if (!screen) + return NULL; + + screen = debug_screen_wrap(screen); + + return screen; +} + +DRM_DRIVER_DESCRIPTOR("radeon", "radeon", create_screen) diff --git a/src/gallium/targets/xorg-vmwgfx/Makefile b/src/gallium/targets/xorg-vmwgfx/Makefile index c0ff999116..04a444f5e9 100644 --- a/src/gallium/targets/xorg-vmwgfx/Makefile +++ b/src/gallium/targets/xorg-vmwgfx/Makefile @@ -8,6 +8,7 @@ C_SOURCES = \ vmw_video.c \ vmw_ioctl.c \ vmw_ctrl.c \ + vmw_target.c \ vmw_screen.c DRIVER_INCLUDES = \ @@ -15,16 +16,18 @@ DRIVER_INCLUDES = \ DRIVER_DEFINES = \ -std=gnu99 \ + -DGALLIUM_RBUG \ + -DGALLIUM_TRACE \ -DHAVE_CONFIG_H -DRIVER_LINKS = \ +DRIVER_PIPES = \ $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ $(TOP)/src/gallium/winsys/svga/drm/libsvgadrm.a \ - $(TOP)/src/gallium/drivers/trace/libtrace.a \ - $(TOP)/src/gallium/drivers/rbug/librbug.a \ $(TOP)/src/gallium/drivers/svga/libsvga.a \ - $(GALLIUM_AUXILIARIES) \ - $(shell pkg-config --libs --silence-errors libkms) \ - $(shell pkg-config --libs libdrm) + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/rbug/librbug.a + +DRIVER_LINKS = \ + $(shell pkg-config --libs libdrm libkms) include ../Makefile.xorg diff --git a/src/gallium/targets/xorg-vmwgfx/vmw_ctrl.c b/src/gallium/targets/xorg-vmwgfx/vmw_ctrl.c index 1cc8ddaac3..237b308ae3 100644 --- a/src/gallium/targets/xorg-vmwgfx/vmw_ctrl.c +++ b/src/gallium/targets/xorg-vmwgfx/vmw_ctrl.c @@ -32,9 +32,6 @@ * allows X clients to communicate with the driver. */ - -#define NEED_REPLIES -#define NEED_EVENTS #include "dixstruct.h" #include "extnsionst.h" #include <X11/X.h> diff --git a/src/gallium/targets/xorg-vmwgfx/vmw_driver.h b/src/gallium/targets/xorg-vmwgfx/vmw_driver.h index d6e3620cd3..8dfc9d2efb 100644 --- a/src/gallium/targets/xorg-vmwgfx/vmw_driver.h +++ b/src/gallium/targets/xorg-vmwgfx/vmw_driver.h @@ -59,6 +59,7 @@ struct vmw_customizer /* vmw_video.c */ void *video_priv; + uint64_t max_fb_size; }; static INLINE struct vmw_customizer * diff --git a/src/gallium/targets/xorg-vmwgfx/vmw_screen.c b/src/gallium/targets/xorg-vmwgfx/vmw_screen.c index 64934d53f6..8173908f55 100644 --- a/src/gallium/targets/xorg-vmwgfx/vmw_screen.c +++ b/src/gallium/targets/xorg-vmwgfx/vmw_screen.c @@ -35,6 +35,7 @@ #include <pipe/p_context.h> #include "cursorstr.h" +#include "../../winsys/svga/drm/vmwgfx_drm.h" void vmw_winsys_screen_set_throttling(struct pipe_screen *screen, uint32_t throttle_us); @@ -111,13 +112,29 @@ vmw_context_no_throttle(CustomizerPtr cust, } static Bool -vmw_screen_init(CustomizerPtr cust, int fd) +vmw_check_fb_size(CustomizerPtr cust, + unsigned long pitch, + unsigned long height) +{ + struct vmw_customizer *vmw = vmw_customizer(cust); + + /** + * 1) Is there a pitch alignment? + * 2) The 1024 byte pad is an arbitrary value to be on + */ + + return ((uint64_t) pitch * height + 1024ULL < vmw->max_fb_size); +} + +static Bool +vmw_pre_init(CustomizerPtr cust, int fd) { struct vmw_customizer *vmw = vmw_customizer(cust); drmVersionPtr ver; vmw->fd = fd; - ver = drmGetVersion(fd); + + ver = drmGetVersion(vmw->fd); if (ver == NULL || (ver->version_major == 1 && ver->version_minor < 1)) { cust->swap_throttling = TRUE; @@ -128,11 +145,34 @@ vmw_screen_init(CustomizerPtr cust, int fd) cust->dirty_throttling = FALSE; cust->winsys_context_throttle = vmw_context_throttle; debug_printf("%s: Enabling kernel throttling.\n", __func__); + + if (ver->version_major > 1 || + (ver->version_major == 1 && ver->version_minor >= 3)) { + struct drm_vmw_getparam_arg arg; + int ret; + + arg.param = DRM_VMW_PARAM_MAX_FB_SIZE; + ret = drmCommandWriteRead(fd, DRM_VMW_GET_PARAM, &arg, + sizeof(arg)); + if (!ret) { + vmw->max_fb_size = arg.value; + cust->winsys_check_fb_size = vmw_check_fb_size; + debug_printf("%s: Enabling fb size check.\n", __func__); + } + } } if (ver) drmFreeVersion(ver); + return TRUE; +} + +static Bool +vmw_screen_init(CustomizerPtr cust) +{ + struct vmw_customizer *vmw = vmw_customizer(cust); + vmw_screen_cursor_init(vmw); vmw_ctrl_ext_init(vmw); @@ -199,6 +239,7 @@ vmw_screen_pre_init(ScrnInfoPtr pScrn, int flags) cust = &vmw->base; + cust->winsys_pre_init = vmw_pre_init; cust->winsys_screen_init = vmw_screen_init; cust->winsys_screen_close = vmw_screen_close; cust->winsys_enter_vt = vmw_screen_enter_vt; diff --git a/src/gallium/targets/xorg-vmwgfx/vmw_target.c b/src/gallium/targets/xorg-vmwgfx/vmw_target.c new file mode 100644 index 0000000000..15089d6db2 --- /dev/null +++ b/src/gallium/targets/xorg-vmwgfx/vmw_target.c @@ -0,0 +1,26 @@ + +#include "target-helpers/inline_debug_helper.h" +#include "state_tracker/drm_driver.h" +#include "svga/drm/svga_drm_public.h" +#include "svga/svga_public.h" + +static struct pipe_screen * +create_screen(int fd) +{ + struct svga_winsys_screen *sws; + struct pipe_screen *screen; + + sws = svga_drm_winsys_screen_create(fd); + if (!sws) + return NULL; + + screen = svga_screen_create(sws); + if (!screen) + return NULL; + + screen = debug_screen_wrap(screen); + + return screen; +} + +DRM_DRIVER_DESCRIPTOR("vmwgfx", "vmwgfx", create_screen) diff --git a/src/gallium/tests/graw/SConscript b/src/gallium/tests/graw/SConscript index a40d66d4a1..61121732e3 100644 --- a/src/gallium/tests/graw/SConscript +++ b/src/gallium/tests/graw/SConscript @@ -14,6 +14,7 @@ env.Prepend(LIBS = ['graw'] + gallium) progs = [ 'clear', 'tri', + 'tri-instanced', 'quad-tex', 'fs-test', 'vs-test', diff --git a/src/gallium/tests/graw/tri-instanced.c b/src/gallium/tests/graw/tri-instanced.c new file mode 100644 index 0000000000..30e205f143 --- /dev/null +++ b/src/gallium/tests/graw/tri-instanced.c @@ -0,0 +1,340 @@ +/* + * Test draw instancing. + */ + +#include <stdio.h> +#include <string.h> + +#include "state_tracker/graw.h" +#include "pipe/p_screen.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "pipe/p_defines.h" + +#include "util/u_debug.h" /* debug_dump_surface_bmp() */ +#include "util/u_memory.h" /* Offset() */ + + +enum pipe_format formats[] = { + PIPE_FORMAT_R8G8B8A8_UNORM, + PIPE_FORMAT_B8G8R8A8_UNORM, + PIPE_FORMAT_NONE +}; + +static const int WIDTH = 300; +static const int HEIGHT = 300; + +static struct pipe_screen *screen = NULL; +static struct pipe_context *ctx = NULL; +static struct pipe_surface *surf = NULL; +static struct pipe_resource *indexBuffer = NULL; +static void *window = NULL; + +struct vertex { + float position[4]; + float color[4]; +}; + + +static int draw_elements = 0; + + +/** + * Vertex data. + * Each vertex has three attributes: position, color and translation. + * The translation attribute is a per-instance attribute. See + * "instance_divisor" below. + */ +static struct vertex vertices[4] = +{ + { + { 0.0f, -0.3f, 0.0f, 1.0f }, /* pos */ + { 1.0f, 0.0f, 0.0f, 1.0f } /* color */ + }, + { + { -0.2f, 0.3f, 0.0f, 1.0f }, + { 0.0f, 1.0f, 0.0f, 1.0f } + }, + { + { 0.2f, 0.3f, 0.0f, 1.0f }, + { 0.0f, 0.0f, 1.0f, 1.0f } + } +}; + + +#define NUM_INST 5 + +static float inst_data[NUM_INST][4] = +{ + { -0.50f, 0.4f, 0.0f, 0.0f }, + { -0.25f, 0.1f, 0.0f, 0.0f }, + { 0.00f, 0.2f, 0.0f, 0.0f }, + { 0.25f, 0.1f, 0.0f, 0.0f }, + { 0.50f, 0.3f, 0.0f, 0.0f } +}; + + +static ushort indices[3] = { 0, 2, 1 }; + + +static void set_viewport( float x, float y, + float width, float height, + float near, float far) +{ + float z = far; + float half_width = (float)width / 2.0f; + float half_height = (float)height / 2.0f; + float half_depth = ((float)far - (float)near) / 2.0f; + struct pipe_viewport_state vp; + + vp.scale[0] = half_width; + vp.scale[1] = half_height; + vp.scale[2] = half_depth; + vp.scale[3] = 1.0f; + + vp.translate[0] = half_width + x; + vp.translate[1] = half_height + y; + vp.translate[2] = half_depth + z; + vp.translate[3] = 0.0f; + + ctx->set_viewport_state( ctx, &vp ); +} + + +static void set_vertices( void ) +{ + struct pipe_vertex_element ve[3]; + struct pipe_vertex_buffer vbuf[2]; + void *handle; + + memset(ve, 0, sizeof ve); + + /* pos */ + ve[0].src_offset = Offset(struct vertex, position); + ve[0].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + ve[0].vertex_buffer_index = 0; + + /* color */ + ve[1].src_offset = Offset(struct vertex, color); + ve[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + ve[1].vertex_buffer_index = 0; + + /* per-instance info */ + ve[2].src_offset = 0; + ve[2].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + ve[2].vertex_buffer_index = 1; + ve[2].instance_divisor = 1; + + handle = ctx->create_vertex_elements_state(ctx, 3, ve); + ctx->bind_vertex_elements_state(ctx, handle); + + + /* vertex data */ + vbuf[0].stride = sizeof( struct vertex ); + vbuf[0].max_index = sizeof(vertices) / vbuf[0].stride; + vbuf[0].buffer_offset = 0; + vbuf[0].buffer = screen->user_buffer_create(screen, + vertices, + sizeof(vertices), + PIPE_BIND_VERTEX_BUFFER); + + /* instance data */ + vbuf[1].stride = sizeof( inst_data[0] ); + vbuf[1].max_index = sizeof(inst_data) / vbuf[1].stride; + vbuf[1].buffer_offset = 0; + vbuf[1].buffer = screen->user_buffer_create(screen, + inst_data, + sizeof(inst_data), + PIPE_BIND_VERTEX_BUFFER); + + + ctx->set_vertex_buffers(ctx, 2, vbuf); + + /* index data */ + indexBuffer = screen->user_buffer_create(screen, + indices, + sizeof(indices), + PIPE_BIND_VERTEX_BUFFER); + + +} + +static void set_vertex_shader( void ) +{ + void *handle; + const char *text = + "VERT\n" + "DCL IN[0]\n" + "DCL IN[1]\n" + "DCL IN[2]\n" + "DCL OUT[0], POSITION\n" + "DCL OUT[1], COLOR\n" + " 0: MOV OUT[1], IN[1]\n" + " 1: ADD OUT[0], IN[0], IN[2]\n" /* add instance pos to vertex pos */ + " 2: END\n"; + + handle = graw_parse_vertex_shader(ctx, text); + ctx->bind_vs_state(ctx, handle); +} + +static void set_fragment_shader( void ) +{ + void *handle; + const char *text = + "FRAG\n" + "DCL IN[0], COLOR, LINEAR\n" + "DCL OUT[0], COLOR\n" + " 0: MOV OUT[0], IN[0]\n" + " 1: END\n"; + + handle = graw_parse_fragment_shader(ctx, text); + ctx->bind_fs_state(ctx, handle); +} + + +static void draw( void ) +{ + float clear_color[4] = {1,0,1,1}; + + ctx->clear(ctx, PIPE_CLEAR_COLOR, clear_color, 0, 0); + + /* draw NUM_INST triangles */ + if (draw_elements) + ctx->draw_elements_instanced(ctx, indexBuffer, 2, + 0, /* indexBias */ + PIPE_PRIM_TRIANGLES, + 0, 3, /* start, count */ + 0, NUM_INST); /* startInst, instCount */ + else + ctx->draw_arrays_instanced(ctx, PIPE_PRIM_TRIANGLES, 0, 3, 0, NUM_INST); + + ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); + +#if 0 + /* At the moment, libgraw leaks out/makes available some of the + * symbols from gallium/auxiliary, including these debug helpers. + * Will eventually want to bless some of these paths, and lock the + * others down so they aren't accessible from test programs. + * + * This currently just happens to work on debug builds - a release + * build will probably fail to link here: + */ + debug_dump_surface_bmp(ctx, "result.bmp", surf); +#endif + + screen->flush_frontbuffer(screen, surf, window); +} + + +static void init( void ) +{ + struct pipe_framebuffer_state fb; + struct pipe_resource *tex, templat; + int i; + + /* It's hard to say whether window or screen should be created + * first. Different environments would prefer one or the other. + * + * Also, no easy way of querying supported formats if the screen + * cannot be created first. + */ + for (i = 0; + window == NULL && formats[i] != PIPE_FORMAT_NONE; + i++) { + + screen = graw_create_window_and_screen(0,0,300,300, + formats[i], + &window); + } + + ctx = screen->context_create(screen, NULL); + if (ctx == NULL) + exit(3); + + templat.target = PIPE_TEXTURE_2D; + templat.format = formats[i]; + templat.width0 = WIDTH; + templat.height0 = HEIGHT; + templat.depth0 = 1; + templat.last_level = 0; + templat.nr_samples = 1; + templat.bind = (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET); + + tex = screen->resource_create(screen, + &templat); + if (tex == NULL) + exit(4); + + surf = screen->get_tex_surface(screen, tex, 0, 0, 0, + PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET); + if (surf == NULL) + exit(5); + + memset(&fb, 0, sizeof fb); + fb.nr_cbufs = 1; + fb.width = WIDTH; + fb.height = HEIGHT; + fb.cbufs[0] = surf; + + ctx->set_framebuffer_state(ctx, &fb); + + { + struct pipe_blend_state blend; + void *handle; + memset(&blend, 0, sizeof blend); + blend.rt[0].colormask = PIPE_MASK_RGBA; + handle = ctx->create_blend_state(ctx, &blend); + ctx->bind_blend_state(ctx, handle); + } + + { + struct pipe_depth_stencil_alpha_state depthstencil; + void *handle; + memset(&depthstencil, 0, sizeof depthstencil); + handle = ctx->create_depth_stencil_alpha_state(ctx, &depthstencil); + ctx->bind_depth_stencil_alpha_state(ctx, handle); + } + + { + struct pipe_rasterizer_state rasterizer; + void *handle; + memset(&rasterizer, 0, sizeof rasterizer); + rasterizer.cull_face = PIPE_FACE_NONE; + rasterizer.gl_rasterization_rules = 1; + handle = ctx->create_rasterizer_state(ctx, &rasterizer); + ctx->bind_rasterizer_state(ctx, handle); + } + + set_viewport(0, 0, WIDTH, HEIGHT, 30, 1000); + set_vertices(); + set_vertex_shader(); + set_fragment_shader(); +} + + +static void options(int argc, char *argv[]) +{ + int i; + for (i = 1; i < argc; i++) { + if (strcmp(argv[i], "-e") == 0) + draw_elements = 1; + } + if (draw_elements) + printf("Using pipe_context::draw_elements_instanced()\n"); + else + printf("Using pipe_context::draw_arrays_instanced()\n"); +} + + +int main( int argc, char *argv[] ) +{ + options(argc, argv); + + init(); + + graw_set_display_func( draw ); + graw_main_loop(); + return 0; +} diff --git a/src/gallium/tests/trivial/Makefile b/src/gallium/tests/trivial/Makefile index bfcbdd9712..2ed63419c7 100644 --- a/src/gallium/tests/trivial/Makefile +++ b/src/gallium/tests/trivial/Makefile @@ -12,7 +12,9 @@ INCLUDES = \ $(PROG_INCLUDES) LINKS = \ + $(TOP)/src/gallium/drivers/rbug/librbug.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/galahad/libgalahad.a \ $(TOP)/src/gallium/winsys/sw/null/libws_null.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(GALLIUM_AUXILIARIES) \ @@ -26,6 +28,9 @@ OBJECTS = $(SOURCES:.c=.o) PROGS = $(OBJECTS:.o=) +PROG_DEFINES = \ + -DGALLIUM_SOFTPIPE -DGALLIUM_RBUG -DGALLIUM_TRACE -DGALLIUM_GALAHAD + ##### TARGETS ##### default: $(PROGS) diff --git a/src/gallium/tests/trivial/quad-tex.c b/src/gallium/tests/trivial/quad-tex.c index 93f24876cb..cf88edcdc5 100644 --- a/src/gallium/tests/trivial/quad-tex.c +++ b/src/gallium/tests/trivial/quad-tex.c @@ -58,18 +58,13 @@ /* util_make_[fragment|vertex]_passthrough_shader */ #include "util/u_simple_shaders.h" -/* softpipe software driver */ -#include "softpipe/sp_public.h" - +/* sw_screen_create: to get a software pipe driver */ +#include "target-helpers/inline_sw_helper.h" +/* debug_screen_wrap: to wrap with debug pipe drivers */ +#include "target-helpers/inline_debug_helper.h" /* null software winsys */ #include "sw/null/null_sw_winsys.h" -/* traceing support see src/gallium/drivers/trace/README for more info. */ -#if USE_TRACE -#include "trace/tr_screen.h" -#include "trace/tr_context.h" -#endif - struct program { struct pipe_screen *screen; @@ -98,10 +93,11 @@ struct program static void init_prog(struct program *p) { /* create the software rasterizer */ - p->screen = softpipe_create_screen(null_sw_create()); -#if USE_TRACE - p->screen = trace_screen_create(p->screen); -#endif + p->screen = sw_screen_create(null_sw_create()); + /* wrap the screen with any debugger */ + p->screen = debug_screen_wrap(p->screen); + + /* create the pipe driver context and cso context */ p->pipe = p->screen->context_create(p->screen, NULL); p->cso = cso_create_context(p->pipe); @@ -271,7 +267,7 @@ static void init_prog(struct program *p) } /* fragment shader */ - p->fs = util_make_fragment_tex_shader(p->pipe, TGSI_TEXTURE_2D); + p->fs = util_make_fragment_tex_shader(p->pipe, TGSI_TEXTURE_2D, TGSI_INTERPOLATE_LINEAR); } static void close_prog(struct program *p) diff --git a/src/gallium/tests/trivial/tri.c b/src/gallium/tests/trivial/tri.c index 7823c27727..667a27b28a 100644 --- a/src/gallium/tests/trivial/tri.c +++ b/src/gallium/tests/trivial/tri.c @@ -56,18 +56,13 @@ /* util_make_[fragment|vertex]_passthrough_shader */ #include "util/u_simple_shaders.h" -/* softpipe software driver */ -#include "softpipe/sp_public.h" - +/* sw_screen_create: to get a software pipe driver */ +#include "target-helpers/inline_sw_helper.h" +/* debug_screen_wrap: to wrap with debug pipe drivers */ +#include "target-helpers/inline_debug_helper.h" /* null software winsys */ #include "sw/null/null_sw_winsys.h" -/* traceing support see src/gallium/drivers/trace/README for more info. */ -#if USE_TRACE -#include "trace/tr_screen.h" -#include "trace/tr_context.h" -#endif - struct program { struct pipe_screen *screen; @@ -93,10 +88,11 @@ struct program static void init_prog(struct program *p) { /* create the software rasterizer */ - p->screen = softpipe_create_screen(null_sw_create()); -#if USE_TRACE - p->screen = trace_screen_create(p->screen); -#endif + p->screen = sw_screen_create(null_sw_create()); + /* wrap the screen with any debugger */ + p->screen = debug_screen_wrap(p->screen); + + /* create the pipe driver context and cso context */ p->pipe = p->screen->context_create(p->screen, NULL); p->cso = cso_create_context(p->pipe); diff --git a/src/gallium/winsys/SConscript b/src/gallium/winsys/SConscript index 907ac90bf0..65b12287df 100644 --- a/src/gallium/winsys/SConscript +++ b/src/gallium/winsys/SConscript @@ -17,7 +17,6 @@ if 'gdi' in env['winsys']: if env['dri']: SConscript([ - 'sw/drm/SConscript', 'sw/dri/SConscript', ]) diff --git a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c index 102f59dc54..e50e7801c0 100644 --- a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c +++ b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c @@ -3,6 +3,7 @@ #include "util/u_memory.h" #include "i915_drm.h" +#include "i915/i915_debug.h" #define BATCH_RESERVED 16 @@ -151,7 +152,6 @@ i915_drm_batchbuffer_flush(struct i915_winsys_batchbuffer *ibatch, struct i915_drm_batchbuffer *batch = i915_drm_batchbuffer(ibatch); unsigned used = 0; int ret = 0; - int i; assert(i915_winsys_batchbuffer_space(ibatch) >= 0); @@ -186,20 +186,28 @@ i915_drm_batchbuffer_flush(struct i915_winsys_batchbuffer *ibatch, #endif /* Do the sending to HW */ - ret = drm_intel_bo_exec(batch->bo, used, NULL, 0, 0); - assert(ret == 0); - - if (i915_drm_winsys(ibatch->iws)->dump_cmd) { - unsigned *ptr; - drm_intel_bo_map(batch->bo, FALSE); - ptr = (unsigned*)batch->bo->virtual; + if (i915_drm_winsys(ibatch->iws)->send_cmd) + ret = drm_intel_bo_exec(batch->bo, used, NULL, 0, 0); + else + ret = 0; - debug_printf("%s:\n", __func__); - for (i = 0; i < used / 4; i++, ptr++) { - debug_printf("\t%08x: %08x\n", i*4, *ptr); - } - - drm_intel_bo_unmap(batch->bo); + if (ret != 0 || i915_drm_winsys(ibatch->iws)->dump_cmd) { +#ifdef INTEL_MAP_BATCHBUFFER +#ifdef INTEL_MAP_GTT + drm_intel_gem_bo_map_gtt(batch->bo); +#else + drm_intel_bo_map(batch->bo, 0); +#endif +#endif + i915_dump_batchbuffer(ibatch); + assert(ret == 0); +#ifdef INTEL_MAP_BATCHBUFFER +#ifdef INTEL_MAP_GTT + drm_intel_gem_bo_unmap_gtt(batch->bo); +#else + drm_intel_bo_unmap(batch->bo); +#endif +#endif } else { #ifdef INTEL_RUN_SYNC drm_intel_bo_map(batch->bo, FALSE); diff --git a/src/gallium/winsys/i915/drm/i915_drm_buffer.c b/src/gallium/winsys/i915/drm/i915_drm_buffer.c index 3bd85026b2..6b06e7ae99 100644 --- a/src/gallium/winsys/i915/drm/i915_drm_buffer.c +++ b/src/gallium/winsys/i915/drm/i915_drm_buffer.c @@ -1,5 +1,5 @@ -#include "state_tracker/drm_api.h" +#include "state_tracker/drm_driver.h" #include "i915_drm_winsys.h" #include "util/u_memory.h" diff --git a/src/gallium/winsys/i915/drm/i915_drm_public.h b/src/gallium/winsys/i915/drm/i915_drm_public.h new file mode 100644 index 0000000000..b828d8d670 --- /dev/null +++ b/src/gallium/winsys/i915/drm/i915_drm_public.h @@ -0,0 +1,9 @@ + +#ifndef I915_DRM_PUBLIC_H +#define I915_DRM_PUBLIC_H + +struct i915_winsys; + +struct i915_winsys * i915_drm_winsys_create(int drmFD); + +#endif diff --git a/src/gallium/winsys/i915/drm/i915_drm_winsys.c b/src/gallium/winsys/i915/drm/i915_drm_winsys.c index 5a6b45e6c9..179a84a704 100644 --- a/src/gallium/winsys/i915/drm/i915_drm_winsys.c +++ b/src/gallium/winsys/i915/drm/i915_drm_winsys.c @@ -1,14 +1,11 @@ #include <stdio.h> -#include "state_tracker/drm_api.h" +#include "state_tracker/drm_driver.h" #include "i915_drm_winsys.h" +#include "i915_drm_public.h" #include "util/u_memory.h" -#include "i915/i915_context.h" -#include "i915/i915_screen.h" - -#include "trace/tr_drm.h" /* * Helper functions @@ -48,8 +45,8 @@ i915_drm_winsys_destroy(struct i915_winsys *iws) FREE(idws); } -static struct pipe_screen * -i915_drm_create_screen(struct drm_api *api, int drmFD) +struct i915_winsys * +i915_drm_winsys_create(int drmFD) { struct i915_drm_winsys *idws; unsigned int deviceID; @@ -73,21 +70,8 @@ i915_drm_create_screen(struct drm_api *api, int drmFD) idws->pools.gem = drm_intel_bufmgr_gem_init(idws->fd, idws->max_batch_size); drm_intel_bufmgr_gem_enable_reuse(idws->pools.gem); - idws->dump_cmd = debug_get_bool_option("INTEL_DUMP_CMD", FALSE); - - return i915_screen_create(&idws->base); -} + idws->dump_cmd = debug_get_bool_option("I915_DUMP_CMD", FALSE); + idws->send_cmd = !debug_get_bool_option("I915_NO_HW", FALSE); -static struct drm_api i915_drm_api = -{ - .name = "i915", - .driver_name = "i915", - .create_screen = i915_drm_create_screen, - .destroy = NULL, -}; - -struct drm_api * -drm_api_create() -{ - return trace_drm_create(&i915_drm_api); + return &idws->base; } diff --git a/src/gallium/winsys/i915/drm/i915_drm_winsys.h b/src/gallium/winsys/i915/drm/i915_drm_winsys.h index 99667bde4e..88a71f2424 100644 --- a/src/gallium/winsys/i915/drm/i915_drm_winsys.h +++ b/src/gallium/winsys/i915/drm/i915_drm_winsys.h @@ -18,6 +18,7 @@ struct i915_drm_winsys struct i915_winsys base; boolean dump_cmd; + boolean send_cmd; int fd; /**< Drm file discriptor */ @@ -34,7 +35,6 @@ i915_drm_winsys(struct i915_winsys *iws) return (struct i915_drm_winsys *)iws; } -struct i915_drm_winsys * i915_drm_winsys_create(int fd, unsigned pci_id); struct pipe_fence_handle * i915_drm_fence_create(drm_intel_bo *bo); void i915_drm_winsys_init_batchbuffer_functions(struct i915_drm_winsys *idws); diff --git a/src/gallium/winsys/i915/sw/i915_sw_public.h b/src/gallium/winsys/i915/sw/i915_sw_public.h new file mode 100644 index 0000000000..e951a32917 --- /dev/null +++ b/src/gallium/winsys/i915/sw/i915_sw_public.h @@ -0,0 +1,9 @@ + +#ifndef I915_SW_PUBLIC_H +#define I915_SW_PUBLIC_H + +struct i915_winsys; + +struct i915_winsys * i915_sw_winsys_create(void); + +#endif diff --git a/src/gallium/winsys/i915/sw/i915_sw_winsys.c b/src/gallium/winsys/i915/sw/i915_sw_winsys.c index bb1c107c05..058ddc44aa 100644 --- a/src/gallium/winsys/i915/sw/i915_sw_winsys.c +++ b/src/gallium/winsys/i915/sw/i915_sw_winsys.c @@ -1,5 +1,6 @@ #include "i915_sw_winsys.h" +#include "i915_sw_public.h" #include "util/u_memory.h" @@ -28,8 +29,8 @@ i915_sw_destroy(struct i915_winsys *iws) */ -struct pipe_screen * -i915_sw_create_screen() +struct i915_winsys * +i915_sw_winsys_create() { struct i915_sw_winsys *isws; unsigned int deviceID; @@ -51,6 +52,5 @@ i915_sw_create_screen() isws->dump_cmd = debug_get_bool_option("INTEL_DUMP_CMD", FALSE); - /* XXX so this will leak winsys:es */ - return i915_screen_create(&isws->base); + return &isws->base; } diff --git a/src/gallium/winsys/i915/sw/i915_sw_winsys.h b/src/gallium/winsys/i915/sw/i915_sw_winsys.h index b8aa9ef4ac..b7b43669f3 100644 --- a/src/gallium/winsys/i915/sw/i915_sw_winsys.h +++ b/src/gallium/winsys/i915/sw/i915_sw_winsys.h @@ -25,7 +25,6 @@ i915_sw_winsys(struct i915_winsys *iws) return (struct i915_sw_winsys *)iws; } -struct pipe_screen* i915_sw_create_screen(void); struct pipe_fence_handle * i915_sw_fence_create(void); void i915_sw_winsys_init_batchbuffer_functions(struct i915_sw_winsys *idws); diff --git a/src/gallium/winsys/i965/drm/Makefile b/src/gallium/winsys/i965/drm/Makefile index bbb71e25d8..46f98d7a24 100644 --- a/src/gallium/winsys/i965/drm/Makefile +++ b/src/gallium/winsys/i965/drm/Makefile @@ -5,7 +5,7 @@ LIBNAME = i965drm C_SOURCES = \ i965_drm_buffer.c \ - i965_drm_api.c + i965_drm_winsys.c LIBRARY_INCLUDES = $(shell pkg-config libdrm --cflags-only-I) diff --git a/src/gallium/winsys/i965/drm/SConscript b/src/gallium/winsys/i965/drm/SConscript index abf9aac5c0..785be449f7 100644 --- a/src/gallium/winsys/i965/drm/SConscript +++ b/src/gallium/winsys/i965/drm/SConscript @@ -5,8 +5,8 @@ env = env.Clone() env.ParseConfig('pkg-config --cflags libdrm') i965drm_sources = [ - 'i965_drm_api.c', 'i965_drm_buffer.c', + 'i965_drm_winsys.c', ] i965drm = env.ConvenienceLibrary( diff --git a/src/gallium/winsys/i965/drm/i965_drm_buffer.c b/src/gallium/winsys/i965/drm/i965_drm_buffer.c index fb5e50ce81..ed62db60bb 100644 --- a/src/gallium/winsys/i965/drm/i965_drm_buffer.c +++ b/src/gallium/winsys/i965/drm/i965_drm_buffer.c @@ -1,5 +1,5 @@ -#include "state_tracker/drm_api.h" +#include "state_tracker/drm_driver.h" #include "i965_drm_winsys.h" #include "util/u_memory.h" #include "util/u_inlines.h" @@ -322,7 +322,7 @@ i965_libdrm_bo_subdata(struct brw_winsys_buffer *buffer, nr_reloc); if (BRW_DUMP) - brw_dump_data( idws->id, + brw_dump_data( idws->base.pci_id, data_type, buf->bo->offset + offset, data, size ); @@ -460,7 +460,7 @@ i965_libdrm_bo_flush_range(struct brw_winsys_buffer *buffer, offset, length); if (BRW_DUMP) - brw_dump_data( idws->id, + brw_dump_data( idws->base.pci_id, buf->data_type, buf->bo->offset + offset, (char*)buf->bo->virtual + offset, diff --git a/src/gallium/winsys/i965/drm/i965_drm_public.h b/src/gallium/winsys/i965/drm/i965_drm_public.h new file mode 100644 index 0000000000..2913b07974 --- /dev/null +++ b/src/gallium/winsys/i965/drm/i965_drm_public.h @@ -0,0 +1,9 @@ + +#ifndef I965_DRM_PUBLIC_H +#define I965_DRM_PUBLIC_H + +struct brw_winsys_screen; + +struct brw_winsys_screen * i965_drm_winsys_screen_create(int drmFD); + +#endif diff --git a/src/gallium/winsys/i965/drm/i965_drm_api.c b/src/gallium/winsys/i965/drm/i965_drm_winsys.c index 87ee8070b1..b08e622db9 100644 --- a/src/gallium/winsys/i965/drm/i965_drm_api.c +++ b/src/gallium/winsys/i965/drm/i965_drm_winsys.c @@ -1,17 +1,11 @@ #include <stdio.h> -#include "state_tracker/drm_api.h" +#include "state_tracker/drm_driver.h" #include "i965_drm_winsys.h" +#include "i965_drm_public.h" #include "util/u_memory.h" -#include "i965/brw_context.h" /* XXX: shouldn't be doing this */ -#include "i965/brw_screen.h" /* XXX: shouldn't be doing this */ - -#include "trace/tr_drm.h" - -#include "../../sw/drm/sw_drm_api.h" - /* * Helper functions */ @@ -52,11 +46,10 @@ i965_libdrm_winsys_destroy(struct brw_winsys_screen *iws) FREE(idws); } -static struct pipe_screen * -i965_libdrm_create_screen(struct drm_api *api, int drmFD) +struct brw_winsys_screen * +i965_drm_winsys_screen_create(int drmFD) { struct i965_libdrm_winsys *idws; - unsigned int deviceID; debug_printf("%s\n", __FUNCTION__); @@ -64,12 +57,11 @@ i965_libdrm_create_screen(struct drm_api *api, int drmFD) if (!idws) return NULL; - i965_libdrm_get_device_id(&deviceID); + i965_libdrm_get_device_id(&idws->base.pci_id); i965_libdrm_winsys_init_buffer_functions(idws); idws->fd = drmFD; - idws->id = deviceID; idws->base.destroy = i965_libdrm_winsys_destroy; @@ -78,27 +70,5 @@ i965_libdrm_create_screen(struct drm_api *api, int drmFD) idws->send_cmd = !debug_get_bool_option("BRW_NO_HW", FALSE); - return brw_create_screen(&idws->base, deviceID); -} - -struct drm_api i965_libdrm_api = -{ - .name = "i965", - .driver_name = "i915", - .create_screen = i965_libdrm_create_screen, - .destroy = NULL, -}; - -struct drm_api * -drm_api_create() -{ - struct drm_api *api = NULL; - - if (api == NULL && debug_get_bool_option("BRW_SOFTPIPE", FALSE)) - api = sw_drm_api_create(&i965_libdrm_api); - - if (api == NULL) - api = &i965_libdrm_api; - - return trace_drm_create(api); + return &idws->base; } diff --git a/src/gallium/winsys/i965/drm/i965_drm_winsys.h b/src/gallium/winsys/i965/drm/i965_drm_winsys.h index c6a7d4a8c5..82dbe61cc5 100644 --- a/src/gallium/winsys/i965/drm/i965_drm_winsys.h +++ b/src/gallium/winsys/i965/drm/i965_drm_winsys.h @@ -22,8 +22,6 @@ struct i965_libdrm_winsys boolean send_cmd; int fd; /**< Drm file discriptor */ - - unsigned id; }; static INLINE struct i965_libdrm_winsys * @@ -32,8 +30,6 @@ i965_libdrm_winsys(struct brw_winsys_screen *iws) return (struct i965_libdrm_winsys *)iws; } -struct i965_libdrm_winsys *i965_libdrm_winsys_create(int fd, unsigned pci_id); - void i965_libdrm_winsys_init_buffer_functions(struct i965_libdrm_winsys *idws); diff --git a/src/gallium/winsys/i965/xlib/xlib_i965.c b/src/gallium/winsys/i965/xlib/xlib_i965.c index 063e9f600b..baadd6e89c 100644 --- a/src/gallium/winsys/i965/xlib/xlib_i965.c +++ b/src/gallium/winsys/i965/xlib/xlib_i965.c @@ -395,6 +395,7 @@ xlib_create_brw_winsys_screen( void ) return NULL; ws->used = 0; + ws->base.pci_id = PCI_CHIP_GM45_GM; ws->base.destroy = xlib_brw_winsys_destroy; ws->base.bo_alloc = xlib_brw_bo_alloc; @@ -452,7 +453,7 @@ xlib_create_i965_screen( void ) if (winsys == NULL) return NULL; - screen = brw_create_screen(winsys, PCI_CHIP_GM45_GM); + screen = brw_create_screen(winsys); if (screen == NULL) goto fail; diff --git a/src/gallium/winsys/nouveau/drm/Makefile b/src/gallium/winsys/nouveau/drm/Makefile index 71029858f7..74a3c6a0d7 100644 --- a/src/gallium/winsys/nouveau/drm/Makefile +++ b/src/gallium/winsys/nouveau/drm/Makefile @@ -3,7 +3,7 @@ include $(TOP)/configs/current LIBNAME = nouveaudrm -C_SOURCES = nouveau_drm_api.c +C_SOURCES = nouveau_drm_winsys.c LIBRARY_INCLUDES = $(shell pkg-config libdrm libdrm_nouveau --cflags-only-I) LIBRARY_DEFINES = $(shell pkg-config libdrm libdrm_nouveau --cflags-only-other) diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_public.h b/src/gallium/winsys/nouveau/drm/nouveau_drm_public.h new file mode 100644 index 0000000000..67b7c4429d --- /dev/null +++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_public.h @@ -0,0 +1,9 @@ + +#ifndef __NOUVEAU_DRM_PUBLIC_H__ +#define __NOUVEAU_DRM_PUBLIC_H__ + +struct pipe_screen; + +struct pipe_screen *nouveau_drm_screen_create(int drmFD); + +#endif diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_api.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c index 2f24431462..660dbd0c33 100644 --- a/src/gallium/winsys/nouveau/drm/nouveau_drm_api.c +++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c @@ -4,7 +4,8 @@ #include "util/u_memory.h" #include "util/u_inlines.h" -#include "nouveau_drm_api.h" +#include "nouveau_drm_winsys.h" +#include "nouveau_drm_public.h" #include "nouveau_drmif.h" #include "nouveau_channel.h" @@ -22,8 +23,8 @@ nouveau_drm_destroy_winsys(struct pipe_winsys *s) FREE(nv_winsys); } -static struct pipe_screen * -nouveau_drm_create_screen(struct drm_api *api, int fd) +struct pipe_screen * +nouveau_drm_screen_create(int fd) { struct nouveau_winsys *nvws; struct pipe_winsys *ws; @@ -70,16 +71,3 @@ nouveau_drm_create_screen(struct drm_api *api, int fd) return nvws->pscreen; } - -static struct drm_api nouveau_drm_api_hooks = { - .name = "nouveau", - .driver_name = "nouveau", - .create_screen = nouveau_drm_create_screen, - .destroy = NULL, -}; - -struct drm_api * -drm_api_create() { - return &nouveau_drm_api_hooks; -} - diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_api.h b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.h index ba6305c17e..9e529ecad3 100644 --- a/src/gallium/winsys/nouveau/drm/nouveau_drm_api.h +++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.h @@ -1,7 +1,5 @@ -#ifndef __NOUVEAU_DRM_API_H__ -#define __NOUVEAU_DRM_API_H__ - -#include "state_tracker/drm_api.h" +#ifndef __NOUVEAU_DRM_WINSYS_H__ +#define __NOUVEAU_DRM_WINSYS_H__ #include "util/u_simple_screen.h" diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index 803049d58c..3d87a994c1 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -26,21 +26,18 @@ * Joakim Sindholt <opensource@zhasha.com> */ #include <sys/ioctl.h> -#include "trace/tr_drm.h" #include "util/u_inlines.h" #include "util/u_debug.h" -#include "state_tracker/drm_api.h" #include "radeon_priv.h" #include "r600_screen.h" #include "r600_texture.h" +#include "r600_public.h" +#include "r600_drm_public.h" +#include "state_tracker/drm_driver.h" -static struct pipe_screen *r600_drm_create_screen(struct drm_api* api, int drmfd) +struct radeon *r600_drm_winsys_create(int drmfd) { - struct radeon *rw = radeon_new(drmfd, 0); - - if (rw == NULL) - return NULL; - return radeon_create_screen(rw); + return radeon_new(drmfd, 0); } boolean r600_buffer_get_handle(struct radeon *rw, @@ -66,24 +63,3 @@ boolean r600_buffer_get_handle(struct radeon *rw, } return TRUE; } - -static void r600_drm_api_destroy(struct drm_api *api) -{ - return; -} - -struct drm_api drm_api_hooks = { - .name = "r600", - .driver_name = "r600", - .create_screen = r600_drm_create_screen, - .destroy = r600_drm_api_destroy, -}; - -struct drm_api* drm_api_create() -{ -#ifdef DEBUG - return trace_drm_create(&drm_api_hooks); -#else - return &drm_api_hooks; -#endif -} diff --git a/src/gallium/winsys/r600/drm/r600_drm_public.h b/src/gallium/winsys/r600/drm/r600_drm_public.h new file mode 100644 index 0000000000..84f2dce437 --- /dev/null +++ b/src/gallium/winsys/r600/drm/r600_drm_public.h @@ -0,0 +1,9 @@ + +#ifndef R600_DRM_PUBLIC_H +#define R600_DRM_PUBLIC_H + +struct radeon; + +struct radeon *r600_drm_winsys_create(int drmFD); + +#endif diff --git a/src/gallium/winsys/r600/drm/radeon.c b/src/gallium/winsys/r600/drm/radeon.c index f2113c5807..7e65669806 100644 --- a/src/gallium/winsys/r600/drm/radeon.c +++ b/src/gallium/winsys/r600/drm/radeon.c @@ -25,6 +25,11 @@ #include "radeon_drm.h" #include "r600d.h" +enum radeon_family radeon_get_family(struct radeon *radeon) +{ + return radeon->family; +} + static int radeon_get_device(struct radeon *radeon) { struct drm_radeon_info info; diff --git a/src/gallium/winsys/radeon/drm/radeon_buffer.h b/src/gallium/winsys/radeon/drm/radeon_buffer.h index 73cb6a579b..a8137d85e8 100644 --- a/src/gallium/winsys/radeon/drm/radeon_buffer.h +++ b/src/gallium/winsys/radeon/drm/radeon_buffer.h @@ -43,10 +43,9 @@ #include "radeon_winsys.h" -#define RADEON_USAGE_DOMAIN_GTT (1 << 29) -#define RADEON_USAGE_DOMAIN_VRAM (1 << 30) - -#define RADEON_MAX_BOS 24 +#define RADEON_PB_USAGE_VERTEX (1 << 28) +#define RADEON_PB_USAGE_DOMAIN_GTT (1 << 29) +#define RADEON_PB_USAGE_DOMAIN_VRAM (1 << 30) static INLINE struct pb_buffer * radeon_pb_buffer(struct r300_winsys_buffer *buffer) @@ -63,24 +62,26 @@ radeon_libdrm_winsys_buffer(struct pb_buffer *buffer) struct pb_manager * radeon_drm_bufmgr_create(struct radeon_libdrm_winsys *rws); -boolean radeon_drm_bufmgr_add_buffer(struct pb_buffer *_buf, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd); - +void radeon_drm_bufmgr_add_buffer(struct r300_winsys_cs *cs, + struct r300_winsys_buffer *buf, + enum r300_buffer_domain rd, + enum r300_buffer_domain wd); -void radeon_drm_bufmgr_write_reloc(struct pb_buffer *_buf, +void radeon_drm_bufmgr_write_reloc(struct r300_winsys_cs *cs, + struct r300_winsys_buffer *buf, enum r300_buffer_domain rd, - enum r300_buffer_domain wd, - uint32_t flags); + enum r300_buffer_domain wd); struct pb_buffer *radeon_drm_bufmgr_create_buffer_from_handle(struct pb_manager *_mgr, uint32_t handle); -void radeon_drm_bufmgr_get_tiling(struct pb_buffer *_buf, +void radeon_drm_bufmgr_get_tiling(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf, enum r300_buffer_tiling *microtiled, enum r300_buffer_tiling *macrotiled); -void radeon_drm_bufmgr_set_tiling(struct pb_buffer *_buf, +void radeon_drm_bufmgr_set_tiling(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf, enum r300_buffer_tiling microtiled, enum r300_buffer_tiling macrotiled, uint32_t pitch); @@ -90,9 +91,19 @@ void radeon_drm_bufmgr_flush_maps(struct pb_manager *_mgr); boolean radeon_drm_bufmgr_get_handle(struct pb_buffer *_buf, struct winsys_handle *whandle); -boolean radeon_drm_bufmgr_is_buffer_referenced(struct pb_buffer *_buf, +boolean radeon_drm_bufmgr_is_buffer_referenced(struct r300_winsys_cs *cs, + struct r300_winsys_buffer *buf, enum r300_reference_domain domain); -void radeon_drm_bufmgr_wait(struct pb_buffer *_buf); +void radeon_drm_bufmgr_wait(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf); + +void *radeon_drm_buffer_map(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf, + struct r300_winsys_cs *cs, + enum pipe_transfer_usage usage); + +void radeon_drm_buffer_unmap(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf); #endif diff --git a/src/gallium/winsys/radeon/drm/radeon_drm.c b/src/gallium/winsys/radeon/drm/radeon_drm.c index 59f1b10230..e9a276362f 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm.c @@ -32,9 +32,9 @@ #include "radeon_drm.h" #include "radeon_r300.h" #include "radeon_buffer.h" +#include "radeon_drm_public.h" #include "r300_winsys.h" -#include "trace/tr_drm.h" #include "util/u_memory.h" @@ -153,7 +153,7 @@ static void do_ioctls(int fd, struct radeon_libdrm_winsys* winsys) } /* Create a pipe_screen. */ -struct pipe_screen* radeon_create_screen(struct drm_api* api, int drmFB) +struct r300_winsys_screen* r300_drm_winsys_screen_create(int drmFB) { struct radeon_libdrm_winsys* rws; boolean ret; @@ -171,22 +171,10 @@ struct pipe_screen* radeon_create_screen(struct drm_api* api, int drmFB) ret = radeon_setup_winsys(drmFB, rws); if (ret == FALSE) goto fail; - return r300_create_screen(&rws->base); + return &rws->base; } fail: FREE(rws); return NULL; } - -static struct drm_api radeon_drm_api_hooks = { - .name = "radeon", - .driver_name = "radeon", - .create_screen = radeon_create_screen, - .destroy = NULL, -}; - -struct drm_api* drm_api_create() -{ - return trace_drm_create(&radeon_drm_api_hooks); -} diff --git a/src/gallium/winsys/radeon/drm/radeon_drm.h b/src/gallium/winsys/radeon/drm/radeon_drm.h index 3544c926d9..df6dd91ad5 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm.h @@ -30,12 +30,7 @@ #ifndef RADEON_DRM_H #define RADEON_DRM_H -#include "state_tracker/drm_api.h" - - -struct pipe_screen* radeon_create_screen(struct drm_api* api, int drmFB); - -void radeon_destroy_drm_api(struct drm_api* api); +#include "state_tracker/drm_driver.h" /* Guess at whether this chipset should use r300g. * diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c index a4b6cff33d..017eac8464 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c @@ -20,10 +20,11 @@ struct radeon_drm_buffer { struct radeon_bo *bo; + /* The CS associated with the last buffer_map. */ + struct radeon_libdrm_cs *cs; + boolean flinked; uint32_t flink; - uint32_t tileflags; - uint32_t pitch; struct radeon_drm_buffer *next, *prev; }; @@ -67,34 +68,53 @@ radeon_drm_buffer_destroy(struct pb_buffer *_buf) FREE(buf); } +static unsigned get_pb_usage_from_transfer_flags(enum pipe_transfer_usage usage) +{ + unsigned res = 0; + + if (usage & PIPE_TRANSFER_READ) + res |= PB_USAGE_CPU_READ; + + if (usage & PIPE_TRANSFER_WRITE) + res |= PB_USAGE_CPU_WRITE; + + if (usage & PIPE_TRANSFER_DONTBLOCK) + res |= PB_USAGE_DONTBLOCK; + + if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) + res |= PB_USAGE_UNSYNCHRONIZED; + + return res; +} + static void * -radeon_drm_buffer_map(struct pb_buffer *_buf, +radeon_drm_buffer_map_internal(struct pb_buffer *_buf, unsigned flags) { struct radeon_drm_buffer *buf = radeon_drm_buffer(_buf); + struct radeon_libdrm_cs *cs = buf->cs; int write = 0; - if (flags & PIPE_TRANSFER_DONTBLOCK) { - if ((_buf->base.usage & PIPE_BIND_VERTEX_BUFFER) || - (_buf->base.usage & PIPE_BIND_INDEX_BUFFER)) - if (radeon_bo_is_referenced_by_cs(buf->bo, buf->mgr->rws->cs)) + if (flags & PB_USAGE_DONTBLOCK) { + if (_buf->base.usage & RADEON_PB_USAGE_VERTEX) + if (cs && radeon_bo_is_referenced_by_cs(buf->bo, cs->cs)) return NULL; } if (buf->bo->ptr != NULL) return buf->bo->ptr; - if (flags & PIPE_TRANSFER_DONTBLOCK) { + if (flags & PB_USAGE_DONTBLOCK) { uint32_t domain; if (radeon_bo_is_busy(buf->bo, &domain)) return NULL; } - if (radeon_bo_is_referenced_by_cs(buf->bo, buf->mgr->rws->cs)) { - buf->mgr->rws->flush_cb(buf->mgr->rws->flush_data); + if (cs && radeon_bo_is_referenced_by_cs(buf->bo, cs->cs)) { + cs->flush_cs(cs->flush_data); } - if (flags & PIPE_TRANSFER_WRITE) { + if (flags & PB_USAGE_CPU_WRITE) { write = 1; } @@ -106,7 +126,7 @@ radeon_drm_buffer_map(struct pb_buffer *_buf, } static void -radeon_drm_buffer_unmap(struct pb_buffer *_buf) +radeon_drm_buffer_unmap_internal(struct pb_buffer *_buf) { (void)_buf; } @@ -138,8 +158,8 @@ radeon_drm_buffer_fence(struct pb_buffer *buf, const struct pb_vtbl radeon_drm_buffer_vtbl = { radeon_drm_buffer_destroy, - radeon_drm_buffer_map, - radeon_drm_buffer_unmap, + radeon_drm_buffer_map_internal, + radeon_drm_buffer_unmap_internal, radeon_drm_buffer_validate, radeon_drm_buffer_fence, radeon_drm_buffer_get_base_buffer, @@ -168,8 +188,8 @@ struct pb_buffer *radeon_drm_bufmgr_create_buffer_from_handle(struct pb_manager pipe_reference_init(&buf->base.base.reference, 1); buf->base.base.alignment = 0; - buf->base.base.usage = PIPE_BIND_SAMPLER_VIEW; - buf->base.base.size = 0; + buf->base.base.usage = PB_USAGE_GPU_WRITE | PB_USAGE_GPU_READ; + buf->base.base.size = bo->size; buf->base.vtbl = &radeon_drm_buffer_vtbl; buf->mgr = mgr; @@ -202,8 +222,8 @@ radeon_drm_bufmgr_create_buffer(struct pb_manager *_mgr, make_empty_list(buf); domain = - (desc->usage & RADEON_USAGE_DOMAIN_GTT ? RADEON_GEM_DOMAIN_GTT : 0) | - (desc->usage & RADEON_USAGE_DOMAIN_VRAM ? RADEON_GEM_DOMAIN_VRAM : 0); + (desc->usage & RADEON_PB_USAGE_DOMAIN_GTT ? RADEON_GEM_DOMAIN_GTT : 0) | + (desc->usage & RADEON_PB_USAGE_DOMAIN_VRAM ? RADEON_GEM_DOMAIN_VRAM : 0); buf->bo = radeon_bo_open(rws->bom, 0, size, desc->alignment, domain, 0); @@ -251,7 +271,8 @@ radeon_drm_bufmgr_create(struct radeon_libdrm_winsys *rws) static struct radeon_drm_buffer *get_drm_buffer(struct pb_buffer *_buf) { - struct radeon_drm_buffer *buf; + struct radeon_drm_buffer *buf = NULL; + if (_buf->vtbl == &radeon_drm_buffer_vtbl) { buf = radeon_drm_buffer(_buf); } else { @@ -259,11 +280,35 @@ static struct radeon_drm_buffer *get_drm_buffer(struct pb_buffer *_buf) pb_size offset; pb_get_base_buffer(_buf, &base_buf, &offset); - buf = radeon_drm_buffer(base_buf); + if (base_buf->vtbl == &radeon_drm_buffer_vtbl) + buf = radeon_drm_buffer(base_buf); } + return buf; } +void *radeon_drm_buffer_map(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf, + struct r300_winsys_cs *cs, + enum pipe_transfer_usage usage) +{ + struct pb_buffer *_buf = radeon_pb_buffer(buf); + struct radeon_drm_buffer *rbuf = get_drm_buffer(_buf); + + if (rbuf) + rbuf->cs = radeon_libdrm_cs(cs); + + return pb_map(_buf, get_pb_usage_from_transfer_flags(usage)); +} + +void radeon_drm_buffer_unmap(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf) +{ + struct pb_buffer *_buf = radeon_pb_buffer(buf); + + pb_unmap(_buf); +} + boolean radeon_drm_bufmgr_get_handle(struct pb_buffer *_buf, struct winsys_handle *whandle) { @@ -288,18 +333,16 @@ boolean radeon_drm_bufmgr_get_handle(struct pb_buffer *_buf, return TRUE; } -void radeon_drm_bufmgr_get_tiling(struct pb_buffer *_buf, +void radeon_drm_bufmgr_get_tiling(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *_buf, enum r300_buffer_tiling *microtiled, enum r300_buffer_tiling *macrotiled) { - struct radeon_drm_buffer *buf = get_drm_buffer(_buf); + struct radeon_drm_buffer *buf = get_drm_buffer(radeon_pb_buffer(_buf)); uint32_t flags = 0, pitch; radeon_bo_get_tiling(buf->bo, &flags, &pitch); - buf->tileflags = flags; - buf->pitch = pitch; - *microtiled = R300_BUFFER_LINEAR; *macrotiled = R300_BUFFER_LINEAR; if (flags & RADEON_BO_FLAGS_MICRO_TILE) @@ -309,12 +352,13 @@ void radeon_drm_bufmgr_get_tiling(struct pb_buffer *_buf, *macrotiled = R300_BUFFER_TILED; } -void radeon_drm_bufmgr_set_tiling(struct pb_buffer *_buf, +void radeon_drm_bufmgr_set_tiling(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *_buf, enum r300_buffer_tiling microtiled, enum r300_buffer_tiling macrotiled, uint32_t pitch) { - struct radeon_drm_buffer *buf = get_drm_buffer(_buf); + struct radeon_drm_buffer *buf = get_drm_buffer(radeon_pb_buffer(_buf)); uint32_t flags = 0; if (microtiled == R300_BUFFER_TILED) flags |= RADEON_BO_FLAGS_MICRO_TILE; @@ -326,67 +370,63 @@ void radeon_drm_bufmgr_set_tiling(struct pb_buffer *_buf, if (macrotiled == R300_BUFFER_TILED) flags |= RADEON_BO_FLAGS_MACRO_TILE; - if (flags != buf->tileflags || pitch != buf->pitch) { - /* Tiling determines how DRM treats the buffer data. - * We must flush CS when changing it if the buffer is referenced. */ - if (radeon_bo_is_referenced_by_cs(buf->bo, buf->mgr->rws->cs)) { - buf->mgr->rws->flush_cb(buf->mgr->rws->flush_data); - } - - radeon_bo_set_tiling(buf->bo, flags, pitch); - } + radeon_bo_set_tiling(buf->bo, flags, pitch); } -static uint32_t gem_domain(enum r300_buffer_domain dom) +static uint32_t get_gem_domain(enum r300_buffer_domain domain) { uint32_t res = 0; - if (dom & R300_DOMAIN_GTT) + if (domain & R300_DOMAIN_GTT) res |= RADEON_GEM_DOMAIN_GTT; - if (dom & R300_DOMAIN_VRAM) + if (domain & R300_DOMAIN_VRAM) res |= RADEON_GEM_DOMAIN_VRAM; return res; } -boolean radeon_drm_bufmgr_add_buffer(struct pb_buffer *_buf, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd) +void radeon_drm_bufmgr_add_buffer(struct r300_winsys_cs *rcs, + struct r300_winsys_buffer *_buf, + enum r300_buffer_domain rd, + enum r300_buffer_domain wd) { - struct radeon_drm_buffer *buf = get_drm_buffer(_buf); - uint32_t gem_rd = gem_domain(rd); - uint32_t gem_wd = gem_domain(wd); + struct radeon_libdrm_cs *cs = radeon_libdrm_cs(rcs); + struct radeon_drm_buffer *buf = get_drm_buffer(radeon_pb_buffer(_buf)); + uint32_t gem_rd = get_gem_domain(rd); + uint32_t gem_wd = get_gem_domain(wd); - radeon_cs_space_add_persistent_bo(buf->mgr->rws->cs, buf->bo, - gem_rd, gem_wd); - return TRUE; + radeon_cs_space_add_persistent_bo(cs->cs, buf->bo, gem_rd, gem_wd); } -void radeon_drm_bufmgr_write_reloc(struct pb_buffer *_buf, +void radeon_drm_bufmgr_write_reloc(struct r300_winsys_cs *rcs, + struct r300_winsys_buffer *_buf, enum r300_buffer_domain rd, - enum r300_buffer_domain wd, - uint32_t flags) + enum r300_buffer_domain wd) { - struct radeon_drm_buffer *buf = get_drm_buffer(_buf); + struct radeon_libdrm_cs *cs = radeon_libdrm_cs(rcs); + struct radeon_drm_buffer *buf = get_drm_buffer(radeon_pb_buffer(_buf)); int retval; - uint32_t gem_rd = gem_domain(rd); - uint32_t gem_wd = gem_domain(wd); + uint32_t gem_rd = get_gem_domain(rd); + uint32_t gem_wd = get_gem_domain(wd); - retval = radeon_cs_write_reloc(buf->mgr->rws->cs, - buf->bo, gem_rd, gem_wd, flags); + cs->cs->cdw = cs->base.cdw; + retval = radeon_cs_write_reloc(cs->cs, buf->bo, gem_rd, gem_wd, 0); + cs->base.cdw = cs->cs->cdw; if (retval) { - debug_printf("radeon: Relocation of %p (%d, %d, %d) failed!\n", - buf, gem_rd, gem_wd, flags); + fprintf(stderr, "radeon: Relocation of %p (%d, %d, %d) failed!\n", + buf, gem_rd, gem_wd, 0); } } -boolean radeon_drm_bufmgr_is_buffer_referenced(struct pb_buffer *_buf, +boolean radeon_drm_bufmgr_is_buffer_referenced(struct r300_winsys_cs *rcs, + struct r300_winsys_buffer *_buf, enum r300_reference_domain domain) { - struct radeon_drm_buffer *buf = get_drm_buffer(_buf); + struct radeon_libdrm_cs *cs = radeon_libdrm_cs(rcs); + struct radeon_drm_buffer *buf = get_drm_buffer(radeon_pb_buffer(_buf)); uint32_t tmp; if (domain & R300_REF_CS) { - if (radeon_bo_is_referenced_by_cs(buf->bo, buf->mgr->rws->cs)) { + if (radeon_bo_is_referenced_by_cs(buf->bo, cs->cs)) { return TRUE; } } @@ -400,7 +440,6 @@ boolean radeon_drm_bufmgr_is_buffer_referenced(struct pb_buffer *_buf, return FALSE; } - void radeon_drm_bufmgr_flush_maps(struct pb_manager *_mgr) { struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr); @@ -415,9 +454,10 @@ void radeon_drm_bufmgr_flush_maps(struct pb_manager *_mgr) make_empty_list(&mgr->buffer_map_list); } -void radeon_drm_bufmgr_wait(struct pb_buffer *_buf) +void radeon_drm_bufmgr_wait(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *_buf) { - struct radeon_drm_buffer *buf = get_drm_buffer(_buf); + struct radeon_drm_buffer *buf = get_drm_buffer(radeon_pb_buffer(_buf)); radeon_bo_wait(buf->bo); } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_public.h b/src/gallium/winsys/radeon/drm/radeon_drm_public.h new file mode 100644 index 0000000000..0d96ae8c47 --- /dev/null +++ b/src/gallium/winsys/radeon/drm/radeon_drm_public.h @@ -0,0 +1,9 @@ + +#ifndef RADEON_DRM_PUBLIC_H +#define RADEON_DRM_PUBLIC_H + +struct r300_winsys_screen; + +struct r300_winsys_screen *r300_drm_winsys_screen_create(int drmFD); + +#endif diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c index d2d317dc20..5544504067 100644 --- a/src/gallium/winsys/radeon/drm/radeon_r300.c +++ b/src/gallium/winsys/radeon/drm/radeon_r300.c @@ -25,39 +25,69 @@ #include "radeon_bo_gem.h" #include "radeon_cs_gem.h" -#include "state_tracker/drm_api.h" +#include "state_tracker/drm_driver.h" + +#include "util/u_memory.h" + +static unsigned get_pb_usage_from_create_flags(unsigned bind, unsigned usage, + enum r300_buffer_domain domain) +{ + unsigned res = 0; + + if (bind & (PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT)) + res |= PB_USAGE_GPU_WRITE; + + if (bind & PIPE_BIND_SAMPLER_VIEW) + res |= PB_USAGE_GPU_READ | PB_USAGE_GPU_WRITE; + + if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) + res |= PB_USAGE_GPU_READ; + + if (bind & PIPE_BIND_TRANSFER_WRITE) + res |= PB_USAGE_CPU_WRITE; + + if (bind & PIPE_BIND_TRANSFER_READ) + res |= PB_USAGE_CPU_READ; + + /* Is usage of any use for us? Probably not. */ + + /* Now add driver-specific usage flags. */ + if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) + res |= RADEON_PB_USAGE_VERTEX; + + if (domain & R300_DOMAIN_GTT) + res |= RADEON_PB_USAGE_DOMAIN_GTT; + + if (domain & R300_DOMAIN_VRAM) + res |= RADEON_PB_USAGE_DOMAIN_VRAM; + + return res; +} static struct r300_winsys_buffer * radeon_r300_winsys_buffer_create(struct r300_winsys_screen *rws, - unsigned alignment, - unsigned usage, - enum r300_buffer_domain domain, - unsigned size) + unsigned size, + unsigned alignment, + unsigned bind, + unsigned usage, + enum r300_buffer_domain domain) { - struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); + struct radeon_libdrm_winsys *ws = radeon_libdrm_winsys(rws); struct pb_desc desc; struct pb_manager *provider; struct pb_buffer *buffer; - /* XXX this is hackish, but it's the only way to pass these flags - * to the real create function. */ - usage &= ~(RADEON_USAGE_DOMAIN_GTT | RADEON_USAGE_DOMAIN_VRAM); - if (domain & R300_DOMAIN_GTT) - usage |= RADEON_USAGE_DOMAIN_GTT; - if (domain & R300_DOMAIN_VRAM) - usage |= RADEON_USAGE_DOMAIN_VRAM; - memset(&desc, 0, sizeof(desc)); desc.alignment = alignment; - desc.usage = usage; + desc.usage = get_pb_usage_from_create_flags(bind, usage, domain); - if (usage & PIPE_BIND_CONSTANT_BUFFER) - provider = ws->mman; - else if ((usage & PIPE_BIND_VERTEX_BUFFER) || - (usage & PIPE_BIND_INDEX_BUFFER)) + /* Assign a buffer manager. */ + if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) provider = ws->cman; else provider = ws->kman; + buffer = provider->create_buffer(provider, size, &desc); if (!buffer) return NULL; @@ -65,55 +95,6 @@ radeon_r300_winsys_buffer_create(struct r300_winsys_screen *rws, return radeon_libdrm_winsys_buffer(buffer); } -static void radeon_r300_winsys_buffer_destroy(struct r300_winsys_buffer *buf) -{ - struct pb_buffer *_buf = radeon_pb_buffer(buf); - - pb_destroy(_buf); -} -static void radeon_r300_winsys_buffer_set_tiling(struct r300_winsys_screen *rws, - struct r300_winsys_buffer *buf, - uint32_t pitch, - enum r300_buffer_tiling microtiled, - enum r300_buffer_tiling macrotiled) -{ - struct pb_buffer *_buf = radeon_pb_buffer(buf); - radeon_drm_bufmgr_set_tiling(_buf, microtiled, macrotiled, pitch); -} - -static void radeon_r300_winsys_buffer_get_tiling(struct r300_winsys_screen *rws, - struct r300_winsys_buffer *buf, - enum r300_buffer_tiling *microtiled, - enum r300_buffer_tiling *macrotiled) -{ - struct pb_buffer *_buf = radeon_pb_buffer(buf); - radeon_drm_bufmgr_get_tiling(_buf, microtiled, macrotiled); -} - -static void *radeon_r300_winsys_buffer_map(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf, - unsigned usage) -{ - struct pb_buffer *_buf = radeon_pb_buffer(buf); - - return pb_map(_buf, usage); -} - -static void radeon_r300_winsys_buffer_unmap(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf) -{ - struct pb_buffer *_buf = radeon_pb_buffer(buf); - - pb_unmap(_buf); -} - -static void radeon_r300_winsys_buffer_wait(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf) -{ - struct pb_buffer *_buf = radeon_pb_buffer(buf); - radeon_drm_bufmgr_wait(_buf); -} - static void radeon_r300_winsys_buffer_reference(struct r300_winsys_screen *rws, struct r300_winsys_buffer **pdst, struct r300_winsys_buffer *src) @@ -126,140 +107,96 @@ static void radeon_r300_winsys_buffer_reference(struct r300_winsys_screen *rws, *pdst = radeon_libdrm_winsys_buffer(_dst); } -static boolean radeon_r300_winsys_is_buffer_referenced(struct r300_winsys_screen *rws, - struct r300_winsys_buffer *buf, - enum r300_reference_domain domain) -{ - struct pb_buffer *_buf = radeon_pb_buffer(buf); - - return radeon_drm_bufmgr_is_buffer_referenced(_buf, domain); -} - static struct r300_winsys_buffer *radeon_r300_winsys_buffer_from_handle(struct r300_winsys_screen *rws, - unsigned handle) + struct winsys_handle *whandle, + unsigned *stride, + unsigned *size) { - struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); + struct radeon_libdrm_winsys *ws = radeon_libdrm_winsys(rws); struct pb_buffer *_buf; - _buf = radeon_drm_bufmgr_create_buffer_from_handle(ws->kman, handle); + _buf = radeon_drm_bufmgr_create_buffer_from_handle(ws->kman, whandle->handle); + + if (stride) + *stride = whandle->stride; + if (size) + *size = _buf->base.size; + return radeon_libdrm_winsys_buffer(_buf); } static boolean radeon_r300_winsys_buffer_get_handle(struct r300_winsys_screen *rws, struct r300_winsys_buffer *buffer, + unsigned stride, struct winsys_handle *whandle) { struct pb_buffer *_buf = radeon_pb_buffer(buffer); + whandle->stride = stride; return radeon_drm_bufmgr_get_handle(_buf, whandle); } -static void radeon_set_flush_cb(struct r300_winsys_screen *rws, - void (*flush_cb)(void *), - void *data) -{ - struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); - ws->flush_cb = flush_cb; - ws->flush_data = data; - radeon_cs_space_set_flush(ws->cs, flush_cb, data); -} - -static boolean radeon_add_buffer(struct r300_winsys_screen *rws, - struct r300_winsys_buffer *buf, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd) -{ - struct pb_buffer *_buf = radeon_pb_buffer(buf); - - return radeon_drm_bufmgr_add_buffer(_buf, rd, wd); -} - -static boolean radeon_validate(struct r300_winsys_screen *rws) -{ - struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); - if (radeon_cs_space_check(ws->cs) < 0) { - return FALSE; - } - - /* Things are fine, we can proceed as normal. */ - return TRUE; -} - -static unsigned radeon_get_cs_free_dwords(struct r300_winsys_screen *rws) +static void radeon_r300_winsys_cs_set_flush(struct r300_winsys_cs *rcs, + void (*flush)(void *), + void *user) { - struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); - struct radeon_cs *cs = ws->cs; - - return cs->ndw - cs->cdw; -} - -static uint32_t *radeon_get_cs_pointer(struct r300_winsys_screen *rws, - unsigned count) -{ - struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); - struct radeon_cs *cs = ws->cs; - uint32_t *ptr = cs->packets + cs->cdw; - - cs->cdw += count; - return ptr; -} - -static void radeon_write_cs_dword(struct r300_winsys_screen *rws, - uint32_t dword) -{ - struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); - radeon_cs_write_dword(ws->cs, dword); + struct radeon_libdrm_cs *cs = radeon_libdrm_cs(rcs); + cs->flush_cs = flush; + cs->flush_data = user; + radeon_cs_space_set_flush(cs->cs, flush, user); } -static void radeon_write_cs_table(struct r300_winsys_screen *rws, - const void *table, unsigned count) +static boolean radeon_r300_winsys_cs_validate(struct r300_winsys_cs *rcs) { - struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); - radeon_cs_write_table(ws->cs, table, count); -} + struct radeon_libdrm_cs *cs = radeon_libdrm_cs(rcs); -static void radeon_write_cs_reloc(struct r300_winsys_screen *rws, - struct r300_winsys_buffer *buf, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd, - uint32_t flags) -{ - struct pb_buffer *_buf = radeon_pb_buffer(buf); - radeon_drm_bufmgr_write_reloc(_buf, rd, wd, flags); + return radeon_cs_space_check(cs->cs) >= 0; } -static void radeon_reset_bos(struct r300_winsys_screen *rws) +static void radeon_r300_winsys_cs_reset_buffers(struct r300_winsys_cs *rcs) { - struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); - radeon_cs_space_reset_bos(ws->cs); + struct radeon_libdrm_cs *cs = radeon_libdrm_cs(rcs); + radeon_cs_space_reset_bos(cs->cs); } -static void radeon_flush_cs(struct r300_winsys_screen *rws) +static void radeon_r300_winsys_cs_flush(struct r300_winsys_cs *rcs) { - struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); + struct radeon_libdrm_cs *cs = radeon_libdrm_cs(rcs); int retval; /* Don't flush a zero-sized CS. */ - if (!ws->cs->cdw) { + if (!cs->base.cdw) { return; } - radeon_drm_bufmgr_flush_maps(ws->kman); + cs->cs->cdw = cs->base.cdw; + + radeon_drm_bufmgr_flush_maps(cs->ws->kman); + /* Emit the CS. */ - retval = radeon_cs_emit(ws->cs); + retval = radeon_cs_emit(cs->cs); if (retval) { - debug_printf("radeon: Bad CS, dumping...\n"); - radeon_cs_print(ws->cs, stderr); + if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) { + fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n"); + radeon_cs_print(cs->cs, stderr); + } else { + fprintf(stderr, "radeon: The kernel rejected CS, " + "see dmesg for more information.\n"); + } } /* Reset CS. * Someday, when we care about performance, we should really find a way * to rotate between two or three CS objects so that the GPU can be * spinning through one CS while another one is being filled. */ - radeon_cs_erase(ws->cs); + radeon_cs_erase(cs->cs); + + cs->base.ptr = cs->cs->packets; + cs->base.cdw = cs->cs->cdw; + cs->base.ndw = cs->cs->ndw; } static uint32_t radeon_get_value(struct r300_winsys_screen *rws, - enum r300_value_id id) + enum r300_value_id id) { struct radeon_libdrm_winsys *ws = (struct radeon_libdrm_winsys *)rws; @@ -278,24 +215,52 @@ static uint32_t radeon_get_value(struct r300_winsys_screen *rws, return 0; } -static void -radeon_winsys_destroy(struct r300_winsys_screen *rws) +static struct r300_winsys_cs *radeon_r300_winsys_cs_create(struct r300_winsys_screen *rws) +{ + struct radeon_libdrm_winsys *ws = radeon_libdrm_winsys(rws); + struct radeon_libdrm_cs *cs = CALLOC_STRUCT(radeon_libdrm_cs); + + if (!cs) + return NULL; + + /* Size limit on IBs is 64 kibibytes. */ + cs->cs = radeon_cs_create(ws->csm, 1024 * 64 / 4); + if (!cs->cs) { + FREE(cs); + return NULL; + } + + radeon_cs_set_limit(cs->cs, + RADEON_GEM_DOMAIN_GTT, ws->gart_size); + radeon_cs_set_limit(cs->cs, + RADEON_GEM_DOMAIN_VRAM, ws->vram_size); + + cs->ws = ws; + cs->base.ptr = cs->cs->packets; + cs->base.cdw = cs->cs->cdw; + cs->base.ndw = cs->cs->ndw; + return &cs->base; +} + +static void radeon_r300_winsys_cs_destroy(struct r300_winsys_cs *rcs) +{ + struct radeon_libdrm_cs *cs = radeon_libdrm_cs(rcs); + radeon_cs_destroy(cs->cs); +} + +static void radeon_winsys_destroy(struct r300_winsys_screen *rws) { struct radeon_libdrm_winsys *ws = (struct radeon_libdrm_winsys *)rws; - radeon_cs_destroy(ws->cs); ws->cman->destroy(ws->cman); ws->kman->destroy(ws->kman); - ws->mman->destroy(ws->mman); radeon_bo_manager_gem_dtor(ws->bom); radeon_cs_manager_gem_dtor(ws->csm); } -boolean -radeon_setup_winsys(int fd, struct radeon_libdrm_winsys* ws) +boolean radeon_setup_winsys(int fd, struct radeon_libdrm_winsys* ws) { - ws->csm = radeon_cs_manager_gem_ctor(fd); if (!ws->csm) goto fail; @@ -310,43 +275,28 @@ radeon_setup_winsys(int fd, struct radeon_libdrm_winsys* ws) if (!ws->cman) goto fail; - ws->mman = pb_malloc_bufmgr_create(); - if (!ws->mman) - goto fail; - - /* Size limit on IBs is 64 kibibytes. */ - ws->cs = radeon_cs_create(ws->csm, 1024 * 64 / 4); - if (!ws->cs) - goto fail; - radeon_cs_set_limit(ws->cs, - RADEON_GEM_DOMAIN_GTT, ws->gart_size); - radeon_cs_set_limit(ws->cs, - RADEON_GEM_DOMAIN_VRAM, ws->vram_size); - - ws->base.add_buffer = radeon_add_buffer; - ws->base.validate = radeon_validate; ws->base.destroy = radeon_winsys_destroy; - ws->base.get_cs_free_dwords = radeon_get_cs_free_dwords; - ws->base.get_cs_pointer = radeon_get_cs_pointer; - ws->base.write_cs_dword = radeon_write_cs_dword; - ws->base.write_cs_table = radeon_write_cs_table; - ws->base.write_cs_reloc = radeon_write_cs_reloc; - ws->base.flush_cs = radeon_flush_cs; - ws->base.reset_bos = radeon_reset_bos; - ws->base.set_flush_cb = radeon_set_flush_cb; ws->base.get_value = radeon_get_value; ws->base.buffer_create = radeon_r300_winsys_buffer_create; - ws->base.buffer_destroy = radeon_r300_winsys_buffer_destroy; - ws->base.buffer_set_tiling = radeon_r300_winsys_buffer_set_tiling; - ws->base.buffer_get_tiling = radeon_r300_winsys_buffer_get_tiling; - ws->base.buffer_map = radeon_r300_winsys_buffer_map; - ws->base.buffer_unmap = radeon_r300_winsys_buffer_unmap; - ws->base.buffer_wait = radeon_r300_winsys_buffer_wait; + ws->base.buffer_set_tiling = radeon_drm_bufmgr_set_tiling; + ws->base.buffer_get_tiling = radeon_drm_bufmgr_get_tiling; + ws->base.buffer_map = radeon_drm_buffer_map; + ws->base.buffer_unmap = radeon_drm_buffer_unmap; + ws->base.buffer_wait = radeon_drm_bufmgr_wait; ws->base.buffer_reference = radeon_r300_winsys_buffer_reference; ws->base.buffer_from_handle = radeon_r300_winsys_buffer_from_handle; ws->base.buffer_get_handle = radeon_r300_winsys_buffer_get_handle; - ws->base.is_buffer_referenced = radeon_r300_winsys_is_buffer_referenced; + + ws->base.cs_create = radeon_r300_winsys_cs_create; + ws->base.cs_destroy = radeon_r300_winsys_cs_destroy; + ws->base.cs_add_buffer = radeon_drm_bufmgr_add_buffer; + ws->base.cs_validate = radeon_r300_winsys_cs_validate; + ws->base.cs_write_reloc = radeon_drm_bufmgr_write_reloc; + ws->base.cs_flush = radeon_r300_winsys_cs_flush; + ws->base.cs_reset_buffers = radeon_r300_winsys_cs_reset_buffers; + ws->base.cs_set_flush = radeon_r300_winsys_cs_set_flush; + ws->base.cs_is_buffer_referenced = radeon_drm_bufmgr_is_buffer_referenced; return TRUE; fail: @@ -360,10 +310,6 @@ fail: ws->cman->destroy(ws->cman); if (ws->kman) ws->kman->destroy(ws->kman); - if (ws->mman) - ws->mman->destroy(ws->mman); - if (ws->cs) - radeon_cs_destroy(ws->cs); return FALSE; } diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index ca789be8e9..533b7b2e2d 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -40,8 +40,6 @@ struct radeon_libdrm_winsys { struct pb_manager *cman; - struct pb_manager *mman; - /* PCI ID */ uint32_t pci_id; @@ -75,19 +73,32 @@ struct radeon_libdrm_winsys { /* Radeon CS manager. */ struct radeon_cs_manager *csm; +}; + +struct radeon_libdrm_cs { + struct r300_winsys_cs base; + + /* The winsys. */ + struct radeon_libdrm_winsys *ws; - /* Current CS. */ + /* The libdrm command stream. */ struct radeon_cs *cs; - /* Flush CB */ - void (*flush_cb)(void *); + /* Flush CS. */ + void (*flush_cs)(void *); void *flush_data; }; +static INLINE struct radeon_libdrm_cs * +radeon_libdrm_cs(struct r300_winsys_cs *base) +{ + return (struct radeon_libdrm_cs*)base; +} + static INLINE struct radeon_libdrm_winsys * -radeon_winsys_screen(struct r300_winsys_screen *base) +radeon_libdrm_winsys(struct r300_winsys_screen *base) { - return (struct radeon_libdrm_winsys *)base; + return (struct radeon_libdrm_winsys*)base; } #endif diff --git a/src/gallium/winsys/sw/drm/sw_drm_api.h b/src/gallium/winsys/svga/drm/svga_drm_public.h index ce90a04ae0..e98c89da1e 100644 --- a/src/gallium/winsys/sw/drm/sw_drm_api.h +++ b/src/gallium/winsys/svga/drm/svga_drm_public.h @@ -23,12 +23,19 @@ * **********************************************************/ +/** + * @file + * VMware SVGA DRM winsys public interface. Used by targets to create a stack. + * + * @author Jakob Bornecrantz Fonseca <jakob@vmware.com> + */ -#ifndef SW_DRM_API_H -#define SW_DRM_API_H +#ifndef SVGA_DRM_PUBLIC_H_ +#define SVGA_DRM_PUBLIC_H_ -struct drm_api; +struct svga_winsys_screen; -struct drm_api * sw_drm_api_create(struct drm_api *api); +struct svga_winsys_screen * +svga_drm_winsys_screen_create(int fd); -#endif +#endif /* SVGA_PUBLIC_H_ */ diff --git a/src/gallium/winsys/svga/drm/vmw_screen_dri.c b/src/gallium/winsys/svga/drm/vmw_screen_dri.c index fe28522691..1b0d10f60d 100644 --- a/src/gallium/winsys/svga/drm/vmw_screen_dri.c +++ b/src/gallium/winsys/svga/drm/vmw_screen_dri.c @@ -30,14 +30,14 @@ #include "util/u_format.h" #include "vmw_screen.h" -#include "trace/tr_drm.h" - #include "vmw_screen.h" #include "vmw_surface.h" #include "vmw_fence.h" #include "vmw_context.h" +#include "svga_drm_public.h" + +#include "state_tracker/drm_driver.h" -#include <state_tracker/drm_api.h> #include "vmwgfx_drm.h" #include <xf86drm.h> @@ -84,15 +84,13 @@ vmw_dri1_check_version(const struct dri1_api_version *cur, return FALSE; } -/* This is actually the entrypoint to the entire driver, called by the - * libGL (or EGL, or ...) code via the drm_api_hooks table at the - * bottom of the file. +/* This is actually the entrypoint to the entire driver, + * called by the target bootstrap code. */ -static struct pipe_screen * -vmw_drm_create_screen(struct drm_api *drm_api, int fd) +struct svga_winsys_screen * +svga_drm_winsys_screen_create(int fd) { struct vmw_winsys_screen *vws; - struct pipe_screen *screen; boolean use_old_scanout_flag = FALSE; struct dri1_api_version drm_ver; @@ -123,16 +121,7 @@ vmw_drm_create_screen(struct drm_api *drm_api, int fd) vws->base.surface_from_handle = vmw_drm_surface_from_handle; vws->base.surface_get_handle = vmw_drm_surface_get_handle; - screen = svga_screen_create( &vws->base ); - if (!screen) - goto out_no_screen; - - return screen; - - /* Failure cases: - */ -out_no_screen: - vmw_winsys_destroy( vws ); + return &vws->base; out_no_vws: return NULL; @@ -253,15 +242,3 @@ vmw_drm_surface_get_handle(struct svga_winsys_screen *sws, return TRUE; } - -static struct drm_api vmw_drm_api_hooks = { - .name = "vmwgfx", - .driver_name = "vmwgfx", - .create_screen = vmw_drm_create_screen, - .destroy = NULL, -}; - -struct drm_api* drm_api_create() -{ - return trace_drm_create(&vmw_drm_api_hooks); -} diff --git a/src/gallium/winsys/svga/drm/vmwgfx_drm.h b/src/gallium/winsys/svga/drm/vmwgfx_drm.h index fbb1a8f9a2..2f2807df0b 100644 --- a/src/gallium/winsys/svga/drm/vmwgfx_drm.h +++ b/src/gallium/winsys/svga/drm/vmwgfx_drm.h @@ -72,6 +72,7 @@ #define DRM_VMW_PARAM_FIFO_OFFSET 3 #define DRM_VMW_PARAM_HW_CAPS 4 #define DRM_VMW_PARAM_FIFO_CAPS 5 +#define DRM_VMW_PARAM_MAX_FB_SIZE 6 /** * struct drm_vmw_getparam_arg diff --git a/src/gallium/winsys/sw/drm/Makefile b/src/gallium/winsys/sw/drm/Makefile deleted file mode 100644 index 79664536aa..0000000000 --- a/src/gallium/winsys/sw/drm/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -TOP = ../../../../.. -include $(TOP)/configs/current - -LIBNAME = swdrm - -C_SOURCES = sw_drm_api.c - -LIBRARY_INCLUDES = - -LIBRARY_DEFINES = - -include ../../../Makefile.template diff --git a/src/gallium/winsys/sw/drm/SConscript b/src/gallium/winsys/sw/drm/SConscript deleted file mode 100644 index 15a2e05d5a..0000000000 --- a/src/gallium/winsys/sw/drm/SConscript +++ /dev/null @@ -1,21 +0,0 @@ -####################################################################### -# SConscript for xlib winsys - - -Import('*') - -env = env.Clone() - -env.Append(CPPPATH = [ - '#/src/gallium/include', - '#/src/gallium/auxiliary', - '#/src/gallium/drivers', -]) - -ws_drm = env.ConvenienceLibrary( - target = 'ws_drm', - source = [ - 'sw_drm_api.c', - ] -) -Export('ws_drm') diff --git a/src/gallium/winsys/sw/drm/sw_drm_api.c b/src/gallium/winsys/sw/drm/sw_drm_api.c deleted file mode 100644 index 7b86382619..0000000000 --- a/src/gallium/winsys/sw/drm/sw_drm_api.c +++ /dev/null @@ -1,103 +0,0 @@ -/********************************************************** - * Copyright 2010 VMware, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - **********************************************************/ - - -#include "util/u_memory.h" -#include "softpipe/sp_public.h" -#include "state_tracker/drm_api.h" -#include "../../sw/wrapper/wrapper_sw_winsys.h" -#include "sw_drm_api.h" - - -/* - * Defines - */ - - -struct sw_drm_api -{ - struct drm_api base; - struct drm_api *api; - struct sw_winsys *sw; -}; - -static INLINE struct sw_drm_api * -sw_drm_api(struct drm_api *api) -{ - return (struct sw_drm_api *)api; -} - - -/* - * Exported functions - */ - - -static struct pipe_screen * -sw_drm_create_screen(struct drm_api *_api, int drmFD) -{ - struct sw_drm_api *swapi = sw_drm_api(_api); - struct drm_api *api = swapi->api; - struct sw_winsys *sww; - struct pipe_screen *screen; - - screen = api->create_screen(api, drmFD); - if (!screen) - return NULL; - - sww = wrapper_sw_winsys_warp_pipe_screen(screen); - if (!sww) - return NULL; - - return softpipe_create_screen(sww); -} - -static void -sw_drm_destroy(struct drm_api *api) -{ - struct sw_drm_api *swapi = sw_drm_api(api); - if (swapi->api->destroy) - swapi->api->destroy(swapi->api); - - FREE(swapi); -} - -struct drm_api * -sw_drm_api_create(struct drm_api *api) -{ - struct sw_drm_api *swapi = CALLOC_STRUCT(sw_drm_api); - - if (!swapi) - return api; - - swapi->base.name = api->name; - swapi->base.driver_name = api->driver_name; - swapi->base.create_screen = sw_drm_create_screen; - swapi->base.destroy = sw_drm_destroy; - - swapi->api = api; - - return &swapi->base; -} |