diff options
Diffstat (limited to 'src/gallium/auxiliary/draw')
33 files changed, 1302 insertions, 1774 deletions
diff --git a/src/gallium/auxiliary/draw/draw_cliptest_tmp.h b/src/gallium/auxiliary/draw/draw_cliptest_tmp.h new file mode 100644 index 0000000000..958ed20dc8 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_cliptest_tmp.h @@ -0,0 +1,114 @@ +/************************************************************************** + * + * Copyright 2010, VMware, inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +static boolean TAG(do_cliptest)( struct pt_post_vs *pvs, + struct draw_vertex_info *info ) +{ + struct vertex_header *out = info->verts; + const float *scale = pvs->draw->viewport.scale; + const float *trans = pvs->draw->viewport.translate; + /* const */ float (*plane)[4] = pvs->draw->plane; + const unsigned pos = draw_current_shader_position_output(pvs->draw); + const unsigned ef = pvs->draw->vs.edgeflag_output; + const unsigned nr = pvs->draw->nr_planes; + const unsigned flags = (FLAGS); + unsigned need_pipeline = 0; + unsigned j; + + for (j = 0; j < info->count; j++) { + float *position = out->data[pos]; + unsigned mask = 0x0; + + initialize_vertex_header(out); + + if (flags & (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_HALF_Z | DO_CLIP_USER)) { + out->clip[0] = position[0]; + out->clip[1] = position[1]; + out->clip[2] = position[2]; + out->clip[3] = position[3]; + + /* Do the hardwired planes first: + */ + if (flags & DO_CLIP_XY) { + if (-position[0] + position[3] < 0) mask |= (1<<0); + if ( position[0] + position[3] < 0) mask |= (1<<1); + if (-position[1] + position[3] < 0) mask |= (1<<2); + if ( position[1] + position[3] < 0) mask |= (1<<3); + } + + /* Clip Z planes according to full cube, half cube or none. + */ + if (flags & DO_CLIP_FULL_Z) { + if ( position[2] + position[3] < 0) mask |= (1<<4); + if (-position[2] + position[3] < 0) mask |= (1<<5); + } + else if (flags & DO_CLIP_HALF_Z) { + if ( position[2] < 0) mask |= (1<<4); + if (-position[2] + position[3] < 0) mask |= (1<<5); + } + + if (flags & DO_CLIP_USER) { + unsigned i; + for (i = 6; i < nr; i++) { + if (dot4(position, plane[i]) < 0) + mask |= (1<<i); + } + } + + out->clipmask = mask; + need_pipeline |= out->clipmask; + } + + if ((flags & DO_VIEWPORT) && mask == 0) + { + /* divide by w */ + float w = 1.0f / position[3]; + + /* Viewport mapping */ + position[0] = position[0] * w * scale[0] + trans[0]; + position[1] = position[1] * w * scale[1] + trans[1]; + position[2] = position[2] * w * scale[2] + trans[2]; + position[3] = w; + } + + if ((flags & DO_EDGEFLAG) && ef) { + const float *edgeflag = out->data[ef]; + out->edgeflag = !(edgeflag[0] != 1.0f); + need_pipeline |= !out->edgeflag; + } + + out = (struct vertex_header *)( (char *)out + info->stride ); + } + + return need_pipeline != 0; +} + + +#undef FLAGS +#undef TAG diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 995b675b9a..937b093479 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -34,6 +34,7 @@ #include "pipe/p_context.h" #include "util/u_memory.h" #include "util/u_math.h" +#include "util/u_cpu_detect.h" #include "draw_context.h" #include "draw_vs.h" #include "draw_gs.h" @@ -41,6 +42,25 @@ #if HAVE_LLVM #include "gallivm/lp_bld_init.h" #include "draw_llvm.h" + +static boolean +draw_get_option_use_llvm(void) +{ + static boolean first = TRUE; + static boolean value; + if (first) { + first = FALSE; + value = debug_get_bool_option("DRAW_USE_LLVM", TRUE); + +#ifdef PIPE_ARCH_X86 + util_cpu_detect(); + /* require SSE2 due to LLVM PR6960. */ + if (!util_cpu_caps.has_sse2) + value = FALSE; +#endif + } + return value; +} #endif struct draw_context *draw_create( struct pipe_context *pipe ) @@ -50,10 +70,13 @@ struct draw_context *draw_create( struct pipe_context *pipe ) goto fail; #if HAVE_LLVM - lp_build_init(); - assert(lp_build_engine); - draw->engine = lp_build_engine; - draw->llvm = draw_llvm_create(draw); + if(draw_get_option_use_llvm()) + { + lp_build_init(); + assert(lp_build_engine); + draw->engine = lp_build_engine; + draw->llvm = draw_llvm_create(draw); + } #endif if (!draw_init(draw)) @@ -83,6 +106,8 @@ boolean draw_init(struct draw_context *draw) ASSIGN_4V( draw->plane[4], 0, 0, 1, 1 ); /* yes these are correct */ ASSIGN_4V( draw->plane[5], 0, 0, -1, 1 ); /* mesa's a bit wonky */ draw->nr_planes = 6; + draw->clip_xy = 1; + draw->clip_z = 1; draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */ @@ -135,7 +160,8 @@ void draw_destroy( struct draw_context *draw ) draw_vs_destroy( draw ); draw_gs_destroy( draw ); #ifdef HAVE_LLVM - draw_llvm_destroy( draw->llvm ); + if(draw->llvm) + draw_llvm_destroy( draw->llvm ); #endif FREE( draw ); @@ -162,6 +188,14 @@ void draw_set_mrd(struct draw_context *draw, double mrd) } +static void update_clip_flags( struct draw_context *draw ) +{ + draw->clip_xy = !draw->driver.bypass_clip_xy; + draw->clip_z = (!draw->driver.bypass_clip_z && + !draw->depth_clamp); + draw->clip_user = (draw->nr_planes > 6); +} + /** * Register new primitive rasterization/rendering state. * This causes the drawing pipeline to be rebuilt. @@ -176,18 +210,25 @@ void draw_set_rasterizer_state( struct draw_context *draw, draw->rasterizer = raster; draw->rast_handle = rast_handle; - draw->bypass_clipping = draw->driver.bypass_clipping; - } + } } - +/* With a little more work, llvmpipe will be able to turn this off and + * do its own x/y clipping. + * + * Some hardware can turn off clipping altogether - in particular any + * hardware with a TNL unit can do its own clipping, even if it is + * relying on the draw module for some other reason. + */ void draw_set_driver_clipping( struct draw_context *draw, - boolean bypass_clipping ) + boolean bypass_clip_xy, + boolean bypass_clip_z ) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->driver.bypass_clipping = bypass_clipping; - draw->bypass_clipping = draw->driver.bypass_clipping; + draw->driver.bypass_clip_xy = bypass_clip_xy; + draw->driver.bypass_clip_z = bypass_clip_z; + update_clip_flags(draw); } @@ -217,6 +258,8 @@ void draw_set_clip_state( struct draw_context *draw, memcpy(&draw->plane[6], clip->ucp, clip->nr * sizeof(clip->ucp[0])); draw->nr_planes = 6 + clip->nr; draw->depth_clamp = clip->depth_clamp; + + update_clip_flags(draw); } @@ -472,47 +515,28 @@ void draw_set_render( struct draw_context *draw, } - -/** - * Tell the drawing context about the index/element buffer to use - * (ala glDrawElements) - * If no element buffer is to be used (i.e. glDrawArrays) then this - * should be called with eltSize=0 and elements=NULL. - * - * \param draw the drawing context - * \param eltSize size of each element (1, 2 or 4 bytes) - * \param elements the element buffer ptr - */ void -draw_set_mapped_element_buffer_range( struct draw_context *draw, - unsigned eltSize, - int eltBias, - unsigned min_index, - unsigned max_index, - const void *elements ) +draw_set_index_buffer(struct draw_context *draw, + const struct pipe_index_buffer *ib) { - draw->pt.user.elts = elements; - draw->pt.user.eltSize = eltSize; - draw->pt.user.eltBias = eltBias; - draw->pt.user.min_index = min_index; - draw->pt.user.max_index = max_index; + if (ib) + memcpy(&draw->pt.index_buffer, ib, sizeof(draw->pt.index_buffer)); + else + memset(&draw->pt.index_buffer, 0, sizeof(draw->pt.index_buffer)); } +/** + * Tell drawing context where to find mapped index/element buffer. + */ void -draw_set_mapped_element_buffer( struct draw_context *draw, - unsigned eltSize, - int eltBias, - const void *elements ) +draw_set_mapped_index_buffer(struct draw_context *draw, + const void *elements) { - draw->pt.user.elts = elements; - draw->pt.user.eltSize = eltSize; - draw->pt.user.eltBias = eltBias; - draw->pt.user.min_index = 0; - draw->pt.user.max_index = 0xffffffff; + draw->pt.user.elts = elements; } - + /* Revamp me please: */ void draw_do_flush( struct draw_context *draw, unsigned flags ) @@ -659,7 +683,8 @@ draw_set_mapped_texture(struct draw_context *draw, const void *data[DRAW_MAX_TEXTURE_LEVELS]) { #ifdef HAVE_LLVM - draw_llvm_set_mapped_texture(draw, + if(draw->llvm) + draw_llvm_set_mapped_texture(draw, sampler_idx, width, height, depth, last_level, row_stride, img_stride, data); diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 116716af6f..4c780e4dcb 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -160,18 +160,11 @@ void draw_set_vertex_elements(struct draw_context *draw, unsigned count, const struct pipe_vertex_element *elements); -void -draw_set_mapped_element_buffer_range( struct draw_context *draw, - unsigned eltSize, - int eltBias, - unsigned min_index, - unsigned max_index, - const void *elements ); - -void draw_set_mapped_element_buffer( struct draw_context *draw, - unsigned eltSize, - int eltBias, - const void *elements ); +void draw_set_index_buffer(struct draw_context *draw, + const struct pipe_index_buffer *ib); + +void draw_set_mapped_index_buffer(struct draw_context *draw, + const void *elements); void draw_set_mapped_vertex_buffer(struct draw_context *draw, unsigned attr, const void *buffer); @@ -196,6 +189,9 @@ draw_set_so_state(struct draw_context *draw, * draw_pt.c */ +void draw_vbo(struct draw_context *draw, + const struct pipe_draw_info *info); + void draw_arrays(struct draw_context *draw, unsigned prim, unsigned start, unsigned count); @@ -216,7 +212,8 @@ void draw_set_render( struct draw_context *draw, struct vbuf_render *render ); void draw_set_driver_clipping( struct draw_context *draw, - boolean bypass_clipping ); + boolean bypass_clip_xy, + boolean bypass_clip_z ); void draw_set_force_passthrough( struct draw_context *draw, boolean enable ); diff --git a/src/gallium/auxiliary/draw/draw_decompose_tmp.h b/src/gallium/auxiliary/draw/draw_decompose_tmp.h index a52d2b5058..a142563af9 100644 --- a/src/gallium/auxiliary/draw/draw_decompose_tmp.h +++ b/src/gallium/auxiliary/draw/draw_decompose_tmp.h @@ -54,10 +54,10 @@ FUNC(FUNC_VARS) FUNC_ENTER; - /* prim, count, and last_vertex_last should have been defined */ + /* prim, prim_flags, count, and last_vertex_last should have been defined */ if (0) { - debug_printf("%s: prim 0x%x, count %d, last_vertex_last %d\n", - __FUNCTION__, prim, count, last_vertex_last); + debug_printf("%s: prim 0x%x, prim_flags 0x%x, count %d, last_vertex_last %d\n", + __FUNCTION__, prim, prim_flags, count, last_vertex_last); } switch (prim) { @@ -80,7 +80,7 @@ FUNC(FUNC_VARS) case PIPE_PRIM_LINE_LOOP: case PIPE_PRIM_LINE_STRIP: if (count >= 2) { - flags = DRAW_PIPE_RESET_STIPPLE; + flags = (prim_flags & DRAW_SPLIT_BEFORE) ? 0 : DRAW_PIPE_RESET_STIPPLE; idx[1] = GET_ELT(0); idx[2] = idx[1]; @@ -90,7 +90,7 @@ FUNC(FUNC_VARS) LINE(flags, idx[0], idx[1]); } /* close the loop */ - if (prim == PIPE_PRIM_LINE_LOOP) + if (prim == PIPE_PRIM_LINE_LOOP && !prim_flags) LINE(flags, idx[1], idx[2]); } break; @@ -255,17 +255,23 @@ FUNC(FUNC_VARS) if (last_vertex_last) { flags = (DRAW_PIPE_RESET_STIPPLE | - DRAW_PIPE_EDGE_FLAG_2 | DRAW_PIPE_EDGE_FLAG_0); + if (!(prim_flags & DRAW_SPLIT_BEFORE)) + flags |= DRAW_PIPE_EDGE_FLAG_2; + edge_next = DRAW_PIPE_EDGE_FLAG_0; - edge_finish = DRAW_PIPE_EDGE_FLAG_1; + edge_finish = + (prim_flags & DRAW_SPLIT_AFTER) ? 0 : DRAW_PIPE_EDGE_FLAG_1; } else { flags = (DRAW_PIPE_RESET_STIPPLE | - DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1); + if (!(prim_flags & DRAW_SPLIT_BEFORE)) + flags |= DRAW_PIPE_EDGE_FLAG_0; + edge_next = DRAW_PIPE_EDGE_FLAG_1; - edge_finish = DRAW_PIPE_EDGE_FLAG_2; + edge_finish = + (prim_flags & DRAW_SPLIT_AFTER) ? 0 : DRAW_PIPE_EDGE_FLAG_2; } idx[0] = GET_ELT(0); @@ -300,7 +306,7 @@ FUNC(FUNC_VARS) case PIPE_PRIM_LINE_STRIP_ADJACENCY: if (count >= 4) { - flags = DRAW_PIPE_RESET_STIPPLE; + flags = (prim_flags & DRAW_SPLIT_BEFORE) ? 0 : DRAW_PIPE_RESET_STIPPLE; idx[1] = GET_ELT(0); idx[2] = GET_ELT(1); idx[3] = GET_ELT(2); diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index 4a1013e79a..50a03ac95a 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -380,7 +380,7 @@ static void gs_tri_adj(struct draw_geometry_shader *shader, #define FUNC gs_run_elts #define LOCAL_VARS const ushort *elts = input_prims->elts; -#define GET_ELT(idx) (elts[idx] & ~DRAW_PIPE_FLAG_MASK) +#define GET_ELT(idx) (elts[idx]) #include "draw_gs_tmp.h" @@ -457,6 +457,7 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, output_prims->start = 0; output_prims->count = shader->emitted_vertices; output_prims->prim = shader->output_primitive; + output_prims->flags = 0x0; output_prims->primitive_lengths = shader->primitive_lengths; output_prims->primitive_count = shader->emitted_primitives; output_verts->count = shader->emitted_vertices; diff --git a/src/gallium/auxiliary/draw/draw_gs_tmp.h b/src/gallium/auxiliary/draw/draw_gs_tmp.h index 4a17af0dea..de7b02655a 100644 --- a/src/gallium/auxiliary/draw/draw_gs_tmp.h +++ b/src/gallium/auxiliary/draw/draw_gs_tmp.h @@ -6,12 +6,10 @@ #define FUNC_ENTER \ /* declare more local vars */ \ - struct draw_context *draw = gs->draw; \ const unsigned prim = input_prims->prim; \ + const unsigned prim_flags = input_prims->flags; \ const unsigned count = input_prims->count; \ - const boolean last_vertex_last = \ - !(draw->rasterizer->flatshade && \ - draw->rasterizer->flatshade_first); \ + const boolean last_vertex_last = TRUE; \ do { \ debug_assert(input_prims->primitive_count == 1); \ switch (prim) { \ diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 8d53601d19..8759c38cab 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -210,13 +210,6 @@ draw_llvm_create(struct draw_context *draw) { struct draw_llvm *llvm; -#ifdef PIPE_ARCH_X86 - util_cpu_detect(); - /* require SSE2 due to LLVM PR6960. */ - if (!util_cpu_caps.has_sse2) - return NULL; -#endif - llvm = CALLOC_STRUCT( draw_llvm ); if (!llvm) return NULL; @@ -292,15 +285,23 @@ draw_llvm_destroy(struct draw_llvm *llvm) } struct draw_llvm_variant * -draw_llvm_create_variant(struct draw_llvm *llvm, int num_inputs) +draw_llvm_create_variant(struct draw_llvm *llvm, + unsigned num_inputs, + const struct draw_llvm_variant_key *key) { - struct draw_llvm_variant *variant = MALLOC(sizeof(struct draw_llvm_variant)); + struct draw_llvm_variant *variant; struct llvm_vertex_shader *shader = llvm_vertex_shader(llvm->draw->vs.vertex_shader); + variant = MALLOC(sizeof *variant + + shader->variant_key_size - + sizeof variant->key); + if (variant == NULL) + return NULL; + variant->llvm = llvm; - draw_llvm_make_variant_key(llvm, &variant->key); + memcpy(&variant->key, key, shader->variant_key_size); llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs); @@ -738,8 +739,9 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); /* code generated texture sampling */ - sampler = draw_llvm_sampler_soa_create(variant->key.sampler, - context_ptr); + sampler = draw_llvm_sampler_soa_create( + draw_llvm_variant_key_samplers(&variant->key), + context_ptr); #if DEBUG_STORE lp_build_printf(builder, "start = %d, end = %d, step = %d\n", @@ -901,8 +903,9 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); /* code generated texture sampling */ - sampler = draw_llvm_sampler_soa_create(variant->key.sampler, - context_ptr); + sampler = draw_llvm_sampler_soa_create( + draw_llvm_variant_key_samplers(&variant->key), + context_ptr); fetch_max = LLVMBuildSub(builder, fetch_count, LLVMConstInt(LLVMInt32Type(), 1, 0), @@ -1002,35 +1005,42 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian lp_func_delete_body(variant->function_elts); } -void -draw_llvm_make_variant_key(struct draw_llvm *llvm, - struct draw_llvm_variant_key *key) + +struct draw_llvm_variant_key * +draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store) { unsigned i; + struct draw_llvm_variant_key *key; + struct lp_sampler_static_state *sampler; - memset(key, 0, sizeof(struct draw_llvm_variant_key)); + key = (struct draw_llvm_variant_key *)store; + /* Presumably all variants of the shader should have the same + * number of vertex elements - ie the number of shader inputs. + */ key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements; + /* All variants of this shader will have the same value for + * nr_samplers. Not yet trying to compact away holes in the + * sampler array. + */ + key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1; + + sampler = draw_llvm_variant_key_samplers(key); + memcpy(key->vertex_element, llvm->draw->pt.vertex_element, sizeof(struct pipe_vertex_element) * key->nr_vertex_elements); + + memset(sampler, 0, key->nr_samplers * sizeof *sampler); - memcpy(&key->vs, - &llvm->draw->vs.vertex_shader->state, - sizeof(struct pipe_shader_state)); - - /* if the driver implemented the sampling hooks then - * setup our sampling state */ - if (llvm->draw->num_sampler_views && llvm->draw->num_samplers) { - for(i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; ++i) { - struct draw_vertex_shader *shader = llvm->draw->vs.vertex_shader; - if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) - lp_sampler_static_state(&key->sampler[i], - llvm->draw->sampler_views[i], - llvm->draw->samplers[i]); - } + for (i = 0 ; i < key->nr_samplers; i++) { + lp_sampler_static_state(&sampler[i], + llvm->draw->sampler_views[i], + llvm->draw->samplers[i]); } + + return key; } void diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index 4addb47d2d..6196b2f983 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -151,12 +151,43 @@ typedef void struct draw_llvm_variant_key { - struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; - unsigned nr_vertex_elements; - struct pipe_shader_state vs; - struct lp_sampler_static_state sampler[PIPE_MAX_VERTEX_SAMPLERS]; + unsigned nr_vertex_elements:16; + unsigned nr_samplers:16; + + /* Variable number of vertex elements: + */ + struct pipe_vertex_element vertex_element[1]; + + /* Followed by variable number of samplers: + */ +/* struct lp_sampler_static_state sampler; */ }; +#define DRAW_LLVM_MAX_VARIANT_KEY_SIZE \ + (sizeof(struct draw_llvm_variant_key) + \ + PIPE_MAX_VERTEX_SAMPLERS * sizeof(struct lp_sampler_static_state) + \ + (PIPE_MAX_ATTRIBS-1) * sizeof(struct pipe_vertex_element)) + + +static INLINE size_t +draw_llvm_variant_key_size(unsigned nr_vertex_elements, + unsigned nr_samplers) +{ + return (sizeof(struct draw_llvm_variant_key) + + nr_samplers * sizeof(struct lp_sampler_static_state) + + (nr_vertex_elements - 1) * sizeof(struct pipe_vertex_element)); +} + + +static INLINE struct lp_sampler_static_state * +draw_llvm_variant_key_samplers(struct draw_llvm_variant_key *key) +{ + return (struct lp_sampler_static_state *) + &key->vertex_element[key->nr_vertex_elements]; +} + + + struct draw_llvm_variant_list_item { struct draw_llvm_variant *base; @@ -165,7 +196,6 @@ struct draw_llvm_variant_list_item struct draw_llvm_variant { - struct draw_llvm_variant_key key; LLVMValueRef function; LLVMValueRef function_elts; draw_jit_vert_func jit_func; @@ -176,11 +206,16 @@ struct draw_llvm_variant struct draw_llvm *llvm; struct draw_llvm_variant_list_item list_item_global; struct draw_llvm_variant_list_item list_item_local; + + /* key is variable-sized, must be last */ + struct draw_llvm_variant_key key; + /* key is variable-sized, must be last */ }; struct llvm_vertex_shader { struct draw_vertex_shader base; + unsigned variant_key_size; struct draw_llvm_variant_list_item variants; unsigned variants_created; unsigned variants_cached; @@ -220,14 +255,15 @@ void draw_llvm_destroy(struct draw_llvm *llvm); struct draw_llvm_variant * -draw_llvm_create_variant(struct draw_llvm *llvm, int num_inputs); +draw_llvm_create_variant(struct draw_llvm *llvm, + unsigned num_vertex_header_attribs, + const struct draw_llvm_variant_key *key); void draw_llvm_destroy_variant(struct draw_llvm_variant *variant); -void -draw_llvm_make_variant_key(struct draw_llvm *llvm, - struct draw_llvm_variant_key *key); +struct draw_llvm_variant_key * +draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store); LLVMValueRef draw_llvm_translate_from(LLVMBuilderRef builder, diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 58995e0724..6206197dae 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -169,35 +169,27 @@ static void do_triangle( struct draw_context *draw, /* * Set up macros for draw_pt_decompose.h template code. * This code uses vertex indexes / elements. - * - * Flags are needed by the stipple and unfilled stages. When the two stages - * are active, vcache_run_extras is called and the flags are stored in the - * higher bits of i0. Otherwise, flags do not matter. */ #define TRIANGLE(flags,i0,i1,i2) \ do { \ - assert(!((i1) & DRAW_PIPE_FLAG_MASK)); \ - assert(!((i2) & DRAW_PIPE_FLAG_MASK)); \ do_triangle( draw, \ - i0, /* flags */ \ - verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \ + flags, \ + verts + stride * (i0), \ verts + stride * (i1), \ verts + stride * (i2) ); \ } while (0) #define LINE(flags,i0,i1) \ do { \ - assert(!((i1) & DRAW_PIPE_FLAG_MASK)); \ do_line( draw, \ - i0, /* flags */ \ - verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \ + flags, \ + verts + stride * (i0), \ verts + stride * (i1) ); \ } while (0) #define POINT(i0) \ do { \ - assert(!((i0) & DRAW_PIPE_FLAG_MASK)); \ do_point( draw, verts + stride * (i0) ); \ } while (0) @@ -207,6 +199,7 @@ static void do_triangle( struct draw_context *draw, #define FUNC_VARS \ struct draw_context *draw, \ unsigned prim, \ + unsigned prim_flags, \ struct vertex_header *vertices, \ unsigned stride, \ const ushort *elts, \ @@ -245,22 +238,27 @@ void draw_pipeline_run( struct draw_context *draw, const unsigned count = prim_info->primitive_lengths[i]; #if DEBUG - /* make sure none of the element indexes go outside the vertex buffer */ + /* Warn if one of the element indexes go outside the vertex buffer */ { unsigned max_index = 0x0, i; /* find the largest element index */ for (i = 0; i < count; i++) { - unsigned int index = (prim_info->elts[start + i] - & ~DRAW_PIPE_FLAG_MASK); + unsigned int index = prim_info->elts[start + i]; if (index > max_index) max_index = index; } - assert(max_index <= vert_info->count); + if (max_index >= vert_info->count) { + debug_printf("%s: max_index (%u) outside vertex buffer (%u)\n", + __FUNCTION__, + max_index, + vert_info->count); + } } #endif pipe_run_elts(draw, prim_info->prim, + prim_info->flags, vert_info->verts, vert_info->stride, prim_info->elts + start, @@ -298,6 +296,7 @@ void draw_pipeline_run( struct draw_context *draw, #define FUNC_VARS \ struct draw_context *draw, \ unsigned prim, \ + unsigned prim_flags, \ struct vertex_header *vertices, \ unsigned stride, \ unsigned count @@ -330,6 +329,7 @@ void draw_pipeline_run_linear( struct draw_context *draw, pipe_run_linear(draw, prim_info->prim, + prim_info->flags, (struct vertex_header*)verts, vert_info->stride, count); diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c index eafa29276f..8b92543987 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_validate.c +++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c @@ -265,7 +265,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) /* Clip stage */ - if (!draw->bypass_clipping) + if (draw->clip_xy || draw->clip_z || draw->clip_user) { draw->pipeline.clip->next = next; next = draw->pipeline.clip; diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index 3c93c9014a..58c5858734 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -353,9 +353,6 @@ vbuf_alloc_vertices( struct vbuf_stage *vbuf ) /* Allocate a new vertex buffer */ vbuf->max_vertices = vbuf->render->max_vertex_buffer_bytes / vbuf->vertex_size; - /* even number */ - vbuf->max_vertices = vbuf->max_vertices & ~1; - if(vbuf->max_vertices >= UNDEFINED_VERTEX_ID) vbuf->max_vertices = UNDEFINED_VERTEX_ID - 1; diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 397d4bf653..362f563ba6 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -140,8 +140,7 @@ struct draw_context } middle; struct { - struct draw_pt_front_end *vcache; - struct draw_pt_front_end *varray; + struct draw_pt_front_end *vsplit; } front; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; @@ -150,6 +149,8 @@ struct draw_context struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; unsigned nr_vertex_elements; + struct pipe_index_buffer index_buffer; + /* user-space vertex data, buffers */ struct { /** vertex element/index buffer (ex: glDrawElements) */ @@ -175,13 +176,19 @@ struct draw_context } pt; struct { - boolean bypass_clipping; - boolean bypass_vs; + boolean bypass_clip_xy; + boolean bypass_clip_z; } driver; boolean flushing; /**< debugging/sanity */ boolean suspend_flushing; /**< internally set */ - boolean bypass_clipping; /**< set if either api or driver bypass_clipping true */ + + /* Flags set if API requires clipping in these planes and the + * driver doesn't indicate that it can do it for us. + */ + boolean clip_xy; + boolean clip_z; + boolean clip_user; boolean force_passthrough; /**< never clip or shade */ @@ -296,6 +303,10 @@ struct draw_vertex_info { unsigned count; }; +/* these flags are set if the primitive is a segment of a larger one */ +#define DRAW_SPLIT_BEFORE 0x1 +#define DRAW_SPLIT_AFTER 0x2 + struct draw_prim_info { boolean linear; unsigned start; @@ -304,6 +315,7 @@ struct draw_prim_info { unsigned count; unsigned prim; + unsigned flags; unsigned *primitive_lengths; unsigned primitive_count; }; @@ -369,21 +381,15 @@ void draw_pipeline_destroy( struct draw_context *draw ); -/* We use the top few bits in the elts[] parameter to convey a little - * API information. This limits the number of vertices we can address - * to only 4096 -- if that becomes a problem, we can switch to 32-bit - * draw indices. - * - * These flags expected at first vertex of lines & triangles when - * unfilled and/or line stipple modes are operational. +/* + * These flags are used by the pipeline when unfilled and/or line stipple modes + * are operational. */ -#define DRAW_PIPE_MAX_VERTICES (0x1<<12) -#define DRAW_PIPE_EDGE_FLAG_0 (0x1<<12) -#define DRAW_PIPE_EDGE_FLAG_1 (0x2<<12) -#define DRAW_PIPE_EDGE_FLAG_2 (0x4<<12) -#define DRAW_PIPE_EDGE_FLAG_ALL (0x7<<12) -#define DRAW_PIPE_RESET_STIPPLE (0x8<<12) -#define DRAW_PIPE_FLAG_MASK (0xf<<12) +#define DRAW_PIPE_EDGE_FLAG_0 0x1 +#define DRAW_PIPE_EDGE_FLAG_1 0x2 +#define DRAW_PIPE_EDGE_FLAG_2 0x4 +#define DRAW_PIPE_EDGE_FLAG_ALL 0x7 +#define DRAW_PIPE_RESET_STIPPLE 0x8 void draw_pipeline_run( struct draw_context *draw, const struct draw_vertex_info *vert, diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 248927505d..f44bf2507c 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -39,25 +39,14 @@ #include "util/u_math.h" #include "util/u_prim.h" #include "util/u_format.h" +#include "util/u_draw.h" DEBUG_GET_ONCE_BOOL_OPTION(draw_fse, "DRAW_FSE", FALSE) DEBUG_GET_ONCE_BOOL_OPTION(draw_no_fse, "DRAW_NO_FSE", FALSE) -#ifdef HAVE_LLVM -DEBUG_GET_ONCE_BOOL_OPTION(draw_use_llvm, "DRAW_USE_LLVM", TRUE) -#endif - -static unsigned trim( unsigned count, unsigned first, unsigned incr ) -{ - if (count < first) - return 0; - return count - (count - first) % incr; -} - - /* Overall we split things into: - * - frontend -- prepare fetch_elts, draw_elts - eg vcache + * - frontend -- prepare fetch_elts, draw_elts - eg vsplit * - middle -- fetch, shade, cliptest, viewport * - pipeline -- the prim pipeline: clipping, wide lines, etc * - backend -- the vbuf_render provided by the driver. @@ -77,7 +66,7 @@ draw_pt_arrays(struct draw_context *draw, { unsigned first, incr; draw_pt_split_prim(prim, &first, &incr); - count = trim(count, first, incr); + count = draw_pt_trim_count(count, first, incr); if (count < first) return TRUE; } @@ -97,7 +86,9 @@ draw_pt_arrays(struct draw_context *draw, opt |= PT_PIPELINE; } - if (!draw->bypass_clipping && !draw->pt.test_fse) { + if ((draw->clip_xy || + draw->clip_z || + draw->clip_user) && !draw->pt.test_fse) { opt |= PT_CLIPTEST; } @@ -115,22 +106,11 @@ draw_pt_arrays(struct draw_context *draw, middle = draw->pt.middle.general; } - - /* Pick the right frontend - */ - if (draw->pt.user.elts || (opt & PT_PIPELINE)) { - frontend = draw->pt.front.vcache; - } else { - frontend = draw->pt.front.varray; - } + frontend = draw->pt.front.vsplit; frontend->prepare( frontend, prim, middle, opt ); - frontend->run(frontend, - draw_pt_elt_func(draw), - draw_pt_elt_ptr(draw, start), - draw->pt.user.eltBias, - count); + frontend->run(frontend, start, count); frontend->finish( frontend ); @@ -143,12 +123,8 @@ boolean draw_pt_init( struct draw_context *draw ) draw->pt.test_fse = debug_get_option_draw_fse(); draw->pt.no_fse = debug_get_option_draw_no_fse(); - draw->pt.front.vcache = draw_pt_vcache( draw ); - if (!draw->pt.front.vcache) - return FALSE; - - draw->pt.front.varray = draw_pt_varray(draw); - if (!draw->pt.front.varray) + draw->pt.front.vsplit = draw_pt_vsplit(draw); + if (!draw->pt.front.vsplit) return FALSE; draw->pt.middle.fetch_emit = draw_pt_fetch_emit( draw ); @@ -164,7 +140,7 @@ boolean draw_pt_init( struct draw_context *draw ) return FALSE; #if HAVE_LLVM - if (debug_get_option_draw_use_llvm()) + if (draw->llvm) draw->pt.middle.llvm = draw_pt_fetch_pipeline_or_emit_llvm( draw ); #endif @@ -194,14 +170,9 @@ void draw_pt_destroy( struct draw_context *draw ) draw->pt.middle.fetch_shade_emit = NULL; } - if (draw->pt.front.vcache) { - draw->pt.front.vcache->destroy( draw->pt.front.vcache ); - draw->pt.front.vcache = NULL; - } - - if (draw->pt.front.varray) { - draw->pt.front.varray->destroy( draw->pt.front.varray ); - draw->pt.front.varray = NULL; + if (draw->pt.front.vsplit) { + draw->pt.front.vsplit->destroy( draw->pt.front.vsplit ); + draw->pt.front.vsplit = NULL; } } @@ -221,24 +192,29 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count) uint ii = 0; uint j; - if (draw->pt.user.elts) { + if (draw->pt.user.eltSize) { + const char *elts; + /* indexed arrays */ + elts = (const char *) draw->pt.user.elts; + elts += draw->pt.index_buffer.offset; + switch (draw->pt.user.eltSize) { case 1: { - const ubyte *elem = (const ubyte *) draw->pt.user.elts; + const ubyte *elem = (const ubyte *) elts; ii = elem[start + i]; } break; case 2: { - const ushort *elem = (const ushort *) draw->pt.user.elts; + const ushort *elem = (const ushort *) elts; ii = elem[start + i]; } break; case 4: { - const uint *elem = (const uint *) draw->pt.user.elts; + const uint *elem = (const uint *) elts; ii = elem[start + i]; } break; @@ -324,17 +300,8 @@ draw_arrays(struct draw_context *draw, unsigned prim, /** - * Draw vertex arrays. - * This is the main entrypoint into the drawing module. - * If drawing an indexed primitive, the draw_set_mapped_element_buffer_range() - * function should have already been called to specify the element/index buffer - * information. - * - * \param prim one of PIPE_PRIM_x - * \param start index of first vertex to draw - * \param count number of vertices to draw - * \param startInstance number for the first primitive instance (usually 0). - * \param instanceCount number of instances to draw (1=non-instanced) + * Instanced drawing. + * \sa draw_vbo */ void draw_arrays_instanced(struct draw_context *draw, @@ -344,10 +311,50 @@ draw_arrays_instanced(struct draw_context *draw, unsigned startInstance, unsigned instanceCount) { - unsigned reduced_prim = u_reduced_prim(mode); + struct pipe_draw_info info; + + util_draw_init_info(&info); + + info.mode = mode; + info.start = start; + info.count = count; + info.start_instance = startInstance; + info.instance_count = instanceCount; + + info.indexed = (draw->pt.user.elts != NULL); + if (!info.indexed) { + info.min_index = start; + info.max_index = start + count - 1; + } + + draw_vbo(draw, &info); +} + + +/** + * Draw vertex arrays. + * This is the main entrypoint into the drawing module. If drawing an indexed + * primitive, the draw_set_index_buffer() and draw_set_mapped_index_buffer() + * functions should have already been called to specify the element/index + * buffer information. + */ +void +draw_vbo(struct draw_context *draw, + const struct pipe_draw_info *info) +{ + unsigned reduced_prim = u_reduced_prim(info->mode); unsigned instance; - assert(instanceCount > 0); + assert(info->instance_count > 0); + if (info->indexed) + assert(draw->pt.user.elts); + + draw->pt.user.eltSize = + (info->indexed) ? draw->pt.index_buffer.index_size : 0; + + draw->pt.user.eltBias = info->index_bias; + draw->pt.user.min_index = info->min_index; + draw->pt.user.max_index = info->max_index; if (reduced_prim != draw->reduced_prim) { draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); @@ -355,8 +362,8 @@ draw_arrays_instanced(struct draw_context *draw, } if (0) - debug_printf("draw_arrays(mode=%u start=%u count=%u):\n", - mode, start, count); + debug_printf("draw_vbo(mode=%u start=%u count=%u):\n", + info->mode, info->start, info->count); if (0) tgsi_dump(draw->vs.vertex_shader->state.tokens, 0); @@ -384,10 +391,10 @@ draw_arrays_instanced(struct draw_context *draw, } if (0) - draw_print_arrays(draw, mode, start, MIN2(count, 20)); + draw_print_arrays(draw, info->mode, info->start, MIN2(info->count, 20)); - for (instance = 0; instance < instanceCount; instance++) { - draw->instance_id = instance + startInstance; - draw_pt_arrays(draw, mode, start, count); + for (instance = 0; instance < info->instance_count; instance++) { + draw->instance_id = instance + info->start_instance; + draw_pt_arrays(draw, info->mode, info->start, info->count); } } diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 44356fba4c..5fbb424291 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -35,8 +35,6 @@ #include "pipe/p_compiler.h" -typedef unsigned (*pt_elt_func)( const void *elts, unsigned idx ); - struct draw_pt_middle_end; struct draw_context; struct draw_prim_info; @@ -52,13 +50,18 @@ struct draw_vertex_info; /* The "front end" - prepare sets of fetch, draw elements for the * middle end. * - * Currenly one version of this: - * - vcache - catchall implementation, decomposes to TRI/LINE/POINT prims - * Later: - * - varray, varray_split - * - velement, velement_split + * The fetch elements are indices to the vertices. The draw elements are + * indices to the fetched vertices. When both arrays of elements are both + * linear, middle->run_linear is called; When only the fetch elements are + * linear, middle->run_linear_elts is called; Otherwise, middle->run is + * called. + * + * When the number of the draw elements exceeds max_vertex of the middle end, + * the draw elements (as well as the fetch elements) are splitted and the + * middle end is called multiple times. * - * Currenly only using the vcache version. + * Currenly there is: + * - vsplit - catchall implementation, splits big prims */ struct draw_pt_front_end { void (*prepare)( struct draw_pt_front_end *, @@ -67,9 +70,7 @@ struct draw_pt_front_end { unsigned opt ); void (*run)( struct draw_pt_front_end *, - pt_elt_func elt_func, - const void *elt_ptr, - int elt_bias, + unsigned start, unsigned count ); void (*finish)( struct draw_pt_front_end * ); @@ -80,6 +81,8 @@ struct draw_pt_front_end { /* The "middle end" - prepares actual hardware vertices for the * hardware backend. * + * prim_flags is as defined by pipe_draw_info::flags. + * * Currently two versions of this: * - fetch, vertex shade, cliptest, prim-pipeline * - fetch, emit (ie passthrough) @@ -94,11 +97,13 @@ struct draw_pt_middle_end { const unsigned *fetch_elts, unsigned fetch_count, const ushort *draw_elts, - unsigned draw_count ); + unsigned draw_count, + unsigned prim_flags ); void (*run_linear)(struct draw_pt_middle_end *, unsigned start, - unsigned count); + unsigned count, + unsigned prim_flags ); /* Transform all vertices in a linear range and then draw them with * the supplied element list. May fail and return FALSE. @@ -107,7 +112,8 @@ struct draw_pt_middle_end { unsigned fetch_start, unsigned fetch_count, const ushort *draw_elts, - unsigned draw_count ); + unsigned draw_count, + unsigned prim_flags ); int (*get_max_vertex_count)( struct draw_pt_middle_end * ); @@ -122,19 +128,11 @@ struct vbuf_render; struct vertex_header; -/* Helper functions. - */ -pt_elt_func draw_pt_elt_func( struct draw_context *draw ); -const void *draw_pt_elt_ptr( struct draw_context *draw, - unsigned start ); - /* Frontends: * - * Currently only the general-purpose vcache implementation, could add - * a special case for tiny vertex buffers. + * Currently only the general-purpose vsplit implementation. */ -struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw ); -struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw); +struct draw_pt_front_end *draw_pt_vsplit(struct draw_context *draw); /* Middle-ends: @@ -223,7 +221,9 @@ boolean draw_pt_post_vs_run( struct pt_post_vs *pvs, struct draw_vertex_info *info ); void draw_pt_post_vs_prepare( struct pt_post_vs *pvs, - boolean bypass_clipping, + boolean clip_xy, + boolean clip_z, + boolean clip_user, boolean bypass_viewport, boolean opengl, boolean need_edgeflags ); @@ -237,6 +237,7 @@ void draw_pt_post_vs_destroy( struct pt_post_vs *pvs ); * Utils: */ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr); +unsigned draw_pt_trim_count(unsigned count, unsigned first, unsigned incr); #endif diff --git a/src/gallium/auxiliary/draw/draw_pt_elts.c b/src/gallium/auxiliary/draw/draw_pt_elts.c deleted file mode 100644 index 88f4d9f495..0000000000 --- a/src/gallium/auxiliary/draw/draw_pt_elts.c +++ /dev/null @@ -1,89 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "draw/draw_pt.h" -#include "draw/draw_private.h" - -/* Neat get_elt func that also works for varrays drawing by encoding - * the start value into a pointer. - */ - -static unsigned elt_uint( const void *elts, unsigned idx ) -{ - return *(((const uint *)elts) + idx); -} - -static unsigned elt_ushort( const void *elts, unsigned idx ) -{ - return *(((const ushort *)elts) + idx); -} - -static unsigned elt_ubyte( const void *elts, unsigned idx ) -{ - return *(((const ubyte *)elts) + idx); -} - -static unsigned elt_vert( const void *elts, unsigned idx ) -{ - /* unsigned index is packed in the pointer */ - return (unsigned)(uintptr_t)elts + idx; -} - -pt_elt_func draw_pt_elt_func( struct draw_context *draw ) -{ - switch (draw->pt.user.eltSize) { - case 0: return &elt_vert; - case 1: return &elt_ubyte; - case 2: return &elt_ushort; - case 4: return &elt_uint; - default: return NULL; - } -} - -const void *draw_pt_elt_ptr( struct draw_context *draw, - unsigned start ) -{ - const char *elts = draw->pt.user.elts; - - switch (draw->pt.user.eltSize) { - case 0: - return (const void *)(((const ubyte *)NULL) + start); - case 1: - return (const void *)(((const ubyte *)elts) + start); - case 2: - return (const void *)(((const ushort *)elts) + start); - case 4: - return (const void *)(((const uint *)elts) + start); - default: - return NULL; - } -} diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index 5568fbb9f8..c8dfc16911 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -120,9 +120,6 @@ void draw_pt_emit_prepare( struct pt_emit *emit, *max_vertices = (draw->render->max_vertex_buffer_bytes / (vinfo->size * 4)); - - /* even number */ - *max_vertices = *max_vertices & ~1; } @@ -147,11 +144,6 @@ void draw_pt_emit( struct pt_emit *emit, if (vertex_count == 0) return; - if (vertex_count >= UNDEFINED_VERTEX_ID) { - assert(0); - return; - } - /* XXX: and work out some way to coordinate the render primitive * between vbuf.c and here... */ @@ -226,9 +218,6 @@ void draw_pt_emit_linear(struct pt_emit *emit, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - if (count >= UNDEFINED_VERTEX_ID) - goto fail; - /* XXX: and work out some way to coordinate the render primitive * between vbuf.c and here... */ diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 5c8af17c8e..e706b7796f 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -191,15 +191,6 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, *max_vertices = (draw->render->max_vertex_buffer_bytes / (vinfo->size * 4)); - - /* Return an even number of verts. - * This prevents "parity" errors when splitting long triangle strips which - * can lead to front/back culling mix-ups. - * Every other triangle in a strip has an alternate front/back orientation - * so splitting at an odd position can cause the orientation of subsequent - * triangles to get reversed. - */ - *max_vertices = *max_vertices & ~1; } @@ -210,7 +201,8 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, const unsigned *fetch_elts, unsigned fetch_count, const ushort *draw_elts, - unsigned draw_count ) + unsigned draw_count, + unsigned prim_flags ) { struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; struct draw_context *draw = feme->draw; @@ -220,11 +212,6 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - if (fetch_count >= UNDEFINED_VERTEX_ID) { - assert(0); - return; - } - draw->render->allocate_vertices( draw->render, (ushort)feme->translate->key.output_stride, (ushort)fetch_count ); @@ -273,7 +260,8 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, unsigned start, - unsigned count ) + unsigned count, + unsigned prim_flags ) { struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; struct draw_context *draw = feme->draw; @@ -283,9 +271,6 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - if (count >= UNDEFINED_VERTEX_ID) - goto fail; - if (!draw->render->allocate_vertices( draw->render, (ushort)feme->translate->key.output_stride, (ushort)count )) @@ -334,7 +319,8 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle, unsigned start, unsigned count, const ushort *draw_elts, - unsigned draw_count ) + unsigned draw_count, + unsigned prim_flags ) { struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; struct draw_context *draw = feme->draw; @@ -344,9 +330,6 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - if (count >= UNDEFINED_VERTEX_ID) - return FALSE; - if (!draw->render->allocate_vertices( draw->render, (ushort)feme->translate->key.output_stride, (ushort)count )) diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index b8270280b6..7c198c6026 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -102,7 +102,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, fse->key.nr_inputs); /* inputs - fetch from api format */ fse->key.viewport = !draw->identity_viewport; - fse->key.clip = !draw->bypass_clipping; + fse->key.clip = draw->clip_xy || draw->clip_z || draw->clip_user; fse->key.const_vbuffers = 0; memset(fse->key.element, 0, @@ -175,15 +175,6 @@ static void fse_prepare( struct draw_pt_middle_end *middle, *max_vertices = (draw->render->max_vertex_buffer_bytes / (vinfo->size * 4)); - /* Return an even number of verts. - * This prevents "parity" errors when splitting long triangle strips which - * can lead to front/back culling mix-ups. - * Every other triangle in a strip has an alternate front/back orientation - * so splitting at an odd position can cause the orientation of subsequent - * triangles to get reversed. - */ - *max_vertices = *max_vertices & ~1; - /* Probably need to do this somewhere (or fix exec shader not to * need it): */ @@ -197,7 +188,8 @@ static void fse_prepare( struct draw_pt_middle_end *middle, static void fse_run_linear( struct draw_pt_middle_end *middle, unsigned start, - unsigned count ) + unsigned count, + unsigned prim_flags ) { struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; struct draw_context *draw = fse->draw; @@ -207,9 +199,6 @@ static void fse_run_linear( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - if (count >= UNDEFINED_VERTEX_ID) - goto fail; - if (!draw->render->allocate_vertices( draw->render, (ushort)fse->key.output_stride, (ushort)count )) @@ -265,7 +254,8 @@ fse_run(struct draw_pt_middle_end *middle, const unsigned *fetch_elts, unsigned fetch_count, const ushort *draw_elts, - unsigned draw_count ) + unsigned draw_count, + unsigned prim_flags ) { struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; struct draw_context *draw = fse->draw; @@ -275,9 +265,6 @@ fse_run(struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - if (fetch_count >= UNDEFINED_VERTEX_ID) - goto fail; - if (!draw->render->allocate_vertices( draw->render, (ushort)fse->key.output_stride, (ushort)fetch_count )) @@ -327,7 +314,8 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle, unsigned start, unsigned count, const ushort *draw_elts, - unsigned draw_count ) + unsigned draw_count, + unsigned prim_flags ) { struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; struct draw_context *draw = fse->draw; @@ -337,9 +325,6 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - if (count >= UNDEFINED_VERTEX_ID) - return FALSE; - if (!draw->render->allocate_vertices( draw->render, (ushort)fse->key.output_stride, (ushort)count )) diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 5b16c3788e..b72fd61245 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -100,8 +100,10 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, * but gl vs dx9 clip spaces. */ draw_pt_post_vs_prepare( fpme->post_vs, - (boolean)draw->bypass_clipping, - (boolean)draw->identity_viewport, + draw->clip_xy, + draw->clip_z, + draw->clip_user, + draw->identity_viewport, (boolean)draw->rasterizer->gl_rasterization_rules, (draw->vs.edgeflag_output ? TRUE : FALSE) ); @@ -112,16 +114,13 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, gs_out_prim, max_vertices ); - *max_vertices = MAX2( *max_vertices, - DRAW_PIPE_MAX_VERTICES ); + *max_vertices = MAX2( *max_vertices, 4096 ); } else { - *max_vertices = DRAW_PIPE_MAX_VERTICES; + /* limit max fetches by limiting max_vertices */ + *max_vertices = 4096; } - /* return even number */ - *max_vertices = *max_vertices & ~1; - /* No need to prepare the shader. */ vs->prepare(vs, draw); @@ -295,7 +294,8 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, const unsigned *fetch_elts, unsigned fetch_count, const ushort *draw_elts, - unsigned draw_count ) + unsigned draw_count, + unsigned prim_flags ) { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_fetch_info fetch_info; @@ -311,6 +311,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, prim_info.count = draw_count; prim_info.elts = draw_elts; prim_info.prim = fpme->input_prim; + prim_info.flags = prim_flags; prim_info.primitive_count = 1; prim_info.primitive_lengths = &draw_count; @@ -320,7 +321,8 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, unsigned start, - unsigned count) + unsigned count, + unsigned prim_flags) { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_fetch_info fetch_info; @@ -336,6 +338,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, prim_info.count = count; prim_info.elts = NULL; prim_info.prim = fpme->input_prim; + prim_info.flags = prim_flags; prim_info.primitive_count = 1; prim_info.primitive_lengths = &count; @@ -348,7 +351,8 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle unsigned start, unsigned count, const ushort *draw_elts, - unsigned draw_count ) + unsigned draw_count, + unsigned prim_flags ) { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_fetch_info fetch_info; @@ -364,6 +368,7 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle prim_info.count = draw_count; prim_info.elts = draw_elts; prim_info.prim = fpme->input_prim; + prim_info.flags = prim_flags; prim_info.primitive_count = 1; prim_info.primitive_lengths = &draw_count; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index 4b99bee86a..77291e304e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -66,7 +66,8 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, struct draw_context *draw = fpme->draw; struct llvm_vertex_shader *shader = llvm_vertex_shader(draw->vs.vertex_shader); - struct draw_llvm_variant_key key; + char store[DRAW_LLVM_MAX_VARIANT_KEY_SIZE]; + struct draw_llvm_variant_key *key; struct draw_llvm_variant *variant = NULL; struct draw_llvm_variant_list_item *li; unsigned i; @@ -106,8 +107,10 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, * but gl vs dx9 clip spaces. */ draw_pt_post_vs_prepare( fpme->post_vs, - (boolean)draw->bypass_clipping, - (boolean)(draw->identity_viewport), + draw->clip_xy, + draw->clip_z, + draw->clip_user, + draw->identity_viewport, (boolean)draw->rasterizer->gl_rasterization_rules, (draw->vs.edgeflag_output ? TRUE : FALSE) ); @@ -118,21 +121,21 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, out_prim, max_vertices ); - *max_vertices = MAX2( *max_vertices, - DRAW_PIPE_MAX_VERTICES ); + *max_vertices = MAX2( *max_vertices, 4096 ); } else { - *max_vertices = DRAW_PIPE_MAX_VERTICES; + /* limit max fetches by limiting max_vertices */ + *max_vertices = 4096; } /* return even number */ *max_vertices = *max_vertices & ~1; - - draw_llvm_make_variant_key(fpme->llvm, &key); + + key = draw_llvm_make_variant_key(fpme->llvm, store); li = first_elem(&shader->variants); while(!at_end(&shader->variants, li)) { - if(memcmp(&li->base->key, &key, sizeof key) == 0) { + if(memcmp(&li->base->key, key, shader->variant_key_size) == 0) { variant = li->base; break; } @@ -155,7 +158,7 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, } } - variant = draw_llvm_create_variant(fpme->llvm, nr); + variant = draw_llvm_create_variant(fpme->llvm, nr, key); if (variant) { insert_at_head(&shader->variants, &variant->list_item_local); @@ -294,7 +297,8 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle, const unsigned *fetch_elts, unsigned fetch_count, const ushort *draw_elts, - unsigned draw_count ) + unsigned draw_count, + unsigned prim_flags ) { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; struct draw_fetch_info fetch_info; @@ -310,6 +314,7 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle, prim_info.count = draw_count; prim_info.elts = draw_elts; prim_info.prim = fpme->input_prim; + prim_info.flags = prim_flags; prim_info.primitive_count = 1; prim_info.primitive_lengths = &draw_count; @@ -319,7 +324,8 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle, static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle, unsigned start, - unsigned count) + unsigned count, + unsigned prim_flags) { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; struct draw_fetch_info fetch_info; @@ -335,6 +341,7 @@ static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle, prim_info.count = count; prim_info.elts = NULL; prim_info.prim = fpme->input_prim; + prim_info.flags = prim_flags; prim_info.primitive_count = 1; prim_info.primitive_lengths = &count; @@ -348,7 +355,8 @@ llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle, unsigned start, unsigned count, const ushort *draw_elts, - unsigned draw_count ) + unsigned draw_count, + unsigned prim_flags ) { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; struct draw_fetch_info fetch_info; @@ -364,6 +372,7 @@ llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle, prim_info.count = draw_count; prim_info.elts = draw_elts; prim_info.prim = fpme->input_prim; + prim_info.flags = prim_flags; prim_info.primitive_count = 1; prim_info.primitive_lengths = &draw_count; diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index 308f927b77..769409cfd6 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -26,14 +26,26 @@ **************************************************************************/ #include "util/u_memory.h" +#include "util/u_math.h" #include "pipe/p_context.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_pt.h" + +#define DO_CLIP_XY 0x1 +#define DO_CLIP_FULL_Z 0x2 +#define DO_CLIP_HALF_Z 0x4 +#define DO_CLIP_USER 0x8 +#define DO_VIEWPORT 0x10 +#define DO_EDGEFLAG 0x20 + + struct pt_post_vs { struct draw_context *draw; + unsigned flags; + boolean (*run)( struct pt_post_vs *pvs, struct draw_vertex_info *info ); }; @@ -56,186 +68,47 @@ dot4(const float *a, const float *b) a[3]*b[3]); } -static INLINE unsigned -compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr, - boolean clip_depth) -{ - unsigned mask = 0x0; - unsigned i; +#define FLAGS (0) +#define TAG(x) x##_none +#include "draw_cliptest_tmp.h" -#if 0 - debug_printf("compute clipmask %f %f %f %f\n", - clip[0], clip[1], clip[2], clip[3]); - assert(clip[3] != 0.0); -#endif +#define FLAGS (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_VIEWPORT) +#define TAG(x) x##_xy_fullz_viewport +#include "draw_cliptest_tmp.h" - /* Do the hardwired planes first: - */ - if (-clip[0] + clip[3] < 0) mask |= (1<<0); - if ( clip[0] + clip[3] < 0) mask |= (1<<1); - if (-clip[1] + clip[3] < 0) mask |= (1<<2); - if ( clip[1] + clip[3] < 0) mask |= (1<<3); - if (clip_depth) { - if ( clip[2] + clip[3] < 0) mask |= (1<<4); /* match mesa clipplane numbering - for now */ - if (-clip[2] + clip[3] < 0) mask |= (1<<5); /* match mesa clipplane numbering - for now */ - } +#define FLAGS (DO_CLIP_XY | DO_CLIP_HALF_Z | DO_VIEWPORT) +#define TAG(x) x##_xy_halfz_viewport +#include "draw_cliptest_tmp.h" - /* Followed by any remaining ones: - */ - for (i = 6; i < nr; i++) { - if (dot4(clip, plane[i]) < 0) - mask |= (1<<i); - } +#define FLAGS (DO_CLIP_FULL_Z | DO_VIEWPORT) +#define TAG(x) x##_fullz_viewport +#include "draw_cliptest_tmp.h" - return mask; -} +#define FLAGS (DO_CLIP_HALF_Z | DO_VIEWPORT) +#define TAG(x) x##_halfz_viewport +#include "draw_cliptest_tmp.h" +#define FLAGS (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER | DO_VIEWPORT) +#define TAG(x) x##_xy_fullz_user_viewport +#include "draw_cliptest_tmp.h" -/* The normal case - cliptest, rhw divide, viewport transform. - * - * Also handle identity viewport here at the expense of a few wasted - * instructions - */ -static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, - struct draw_vertex_info *info ) -{ - struct vertex_header *out = info->verts; - const float *scale = pvs->draw->viewport.scale; - const float *trans = pvs->draw->viewport.translate; - const unsigned pos = draw_current_shader_position_output(pvs->draw); - unsigned clipped = 0; - unsigned j; - - if (0) debug_printf("%s count, %d\n", __FUNCTION__, info->count); - - for (j = 0; j < info->count; j++) { - float *position = out->data[pos]; - - initialize_vertex_header(out); -#if 0 - debug_printf("%d) io = %p, data = %p = [%f, %f, %f, %f]\n", - j, out, position, position[0], position[1], position[2], position[3]); -#endif - - out->clip[0] = position[0]; - out->clip[1] = position[1]; - out->clip[2] = position[2]; - out->clip[3] = position[3]; - - out->vertex_id = 0xffff; - /* Disable depth clipping if depth clamping is enabled. */ - out->clipmask = compute_clipmask_gl(out->clip, - pvs->draw->plane, - pvs->draw->nr_planes, - !pvs->draw->depth_clamp); - clipped += out->clipmask; - - if (out->clipmask == 0) - { - /* divide by w */ - float w = 1.0f / position[3]; - - /* Viewport mapping */ - position[0] = position[0] * w * scale[0] + trans[0]; - position[1] = position[1] * w * scale[1] + trans[1]; - position[2] = position[2] * w * scale[2] + trans[2]; - position[3] = w; -#if 0 - debug_printf("post viewport: %f %f %f %f\n", - position[0], - position[1], - position[2], - position[3]); -#endif - } - - out = (struct vertex_header *)( (char *)out + info->stride ); - } - - return clipped != 0; -} +#define FLAGS (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER | DO_VIEWPORT | DO_EDGEFLAG) +#define TAG(x) x##_xy_fullz_user_viewport_edgeflag +#include "draw_cliptest_tmp.h" -/* As above plus edgeflags +/* Don't want to create 64 versions of this function, so catch the + * less common ones here. This is looking like something which should + * be code-generated, perhaps appended to the end of the vertex + * shader. */ -static boolean -post_vs_cliptest_viewport_gl_edgeflag(struct pt_post_vs *pvs, - struct draw_vertex_info *info) -{ - unsigned j; - boolean needpipe; - - needpipe = post_vs_cliptest_viewport_gl(pvs, info); - - /* If present, copy edgeflag VS output into vertex header. - * Otherwise, leave header as is. - */ - if (pvs->draw->vs.edgeflag_output) { - struct vertex_header *out = info->verts; - int ef = pvs->draw->vs.edgeflag_output; - - for (j = 0; j < info->count; j++) { - const float *edgeflag = out->data[ef]; - out->edgeflag = !(edgeflag[0] != 1.0f); - needpipe |= !out->edgeflag; - out = (struct vertex_header *)( (char *)out + info->stride ); - } - } - return needpipe; -} - +#define FLAGS (pvs->flags) +#define TAG(x) x##_generic +#include "draw_cliptest_tmp.h" -/* If bypass_clipping is set, skip cliptest and rhw divide. - */ -static boolean post_vs_viewport( struct pt_post_vs *pvs, - struct draw_vertex_info *info ) -{ - struct vertex_header *out = info->verts; - const float *scale = pvs->draw->viewport.scale; - const float *trans = pvs->draw->viewport.translate; - const unsigned pos = draw_current_shader_position_output(pvs->draw); - unsigned j; - - if (0) debug_printf("%s\n", __FUNCTION__); - for (j = 0; j < info->count; j++) { - float *position = out->data[pos]; - - initialize_vertex_header(out); - /* Viewport mapping only, no cliptest/rhw divide - */ - position[0] = position[0] * scale[0] + trans[0]; - position[1] = position[1] * scale[1] + trans[1]; - position[2] = position[2] * scale[2] + trans[2]; - - out = (struct vertex_header *)((char *)out + info->stride); - } - - return FALSE; -} - - -/* If bypass_clipping is set and we have an identity viewport, nothing - * to do. - */ -static boolean post_vs_none( struct pt_post_vs *pvs, - struct draw_vertex_info *info ) -{ - struct vertex_header *out = info->verts; - unsigned j; - - if (0) debug_printf("%s\n", __FUNCTION__); - /* just initialize the vertex_id in all headers */ - for (j = 0; j < info->count; j++) { - initialize_vertex_header(out); - - out = (struct vertex_header *)((char *)out + info->stride); - } - return FALSE; -} - boolean draw_pt_post_vs_run( struct pt_post_vs *pvs, struct draw_vertex_info *info ) { @@ -244,31 +117,72 @@ boolean draw_pt_post_vs_run( struct pt_post_vs *pvs, void draw_pt_post_vs_prepare( struct pt_post_vs *pvs, - boolean bypass_clipping, + boolean clip_xy, + boolean clip_z, + boolean clip_user, boolean bypass_viewport, boolean opengl, boolean need_edgeflags ) { - if (!need_edgeflags) { - if (bypass_clipping) { - if (bypass_viewport) - pvs->run = post_vs_none; - else - pvs->run = post_vs_viewport; - } - else { - /* if (opengl) */ - pvs->run = post_vs_cliptest_viewport_gl; - } + pvs->flags = 0; + + if (clip_xy) + pvs->flags |= DO_CLIP_XY; + + if (clip_z && opengl) { + pvs->flags |= DO_CLIP_FULL_Z; + ASSIGN_4V( pvs->draw->plane[4], 0, 0, 1, 1 ); + } + + if (clip_z && !opengl) { + pvs->flags |= DO_CLIP_HALF_Z; + ASSIGN_4V( pvs->draw->plane[4], 0, 0, 1, 0 ); } - else { - /* If we need to copy edgeflags to the vertex header, it should - * mean we're running the primitive pipeline. Hence the bypass - * flags should be false. - */ - assert(!bypass_clipping); - assert(!bypass_viewport); - pvs->run = post_vs_cliptest_viewport_gl_edgeflag; + + if (clip_user) + pvs->flags |= DO_CLIP_USER; + + if (!bypass_viewport) + pvs->flags |= DO_VIEWPORT; + + if (need_edgeflags) + pvs->flags |= DO_EDGEFLAG; + + /* Now select the relevant function: + */ + switch (pvs->flags) { + case 0: + pvs->run = do_cliptest_none; + break; + + case DO_CLIP_XY | DO_CLIP_FULL_Z | DO_VIEWPORT: + pvs->run = do_cliptest_xy_fullz_viewport; + break; + + case DO_CLIP_XY | DO_CLIP_HALF_Z | DO_VIEWPORT: + pvs->run = do_cliptest_xy_halfz_viewport; + break; + + case DO_CLIP_FULL_Z | DO_VIEWPORT: + pvs->run = do_cliptest_fullz_viewport; + break; + + case DO_CLIP_HALF_Z | DO_VIEWPORT: + pvs->run = do_cliptest_halfz_viewport; + break; + + case DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER | DO_VIEWPORT: + pvs->run = do_cliptest_xy_fullz_user_viewport; + break; + + case (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER | + DO_VIEWPORT | DO_EDGEFLAG): + pvs->run = do_cliptest_xy_fullz_user_viewport_edgeflag; + break; + + default: + pvs->run = do_cliptest_generic; + break; } } diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c index f7f4f24d35..c86bdd99a3 100644 --- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c @@ -225,7 +225,7 @@ static void so_tri(struct pt_so_emit *so, int i0, int i1, int i2) #define FUNC so_run_elts #define LOCAL_VARS const ushort *elts = input_prims->elts; -#define GET_ELT(idx) (elts[start + (idx)] & ~DRAW_PIPE_FLAG_MASK) +#define GET_ELT(idx) (elts[start + (idx)]) #include "draw_so_emit_tmp.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c index 182a597cca..513bbbed21 100644 --- a/src/gallium/auxiliary/draw/draw_pt_util.c +++ b/src/gallium/auxiliary/draw/draw_pt_util.c @@ -92,3 +92,10 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr) break; } } + +unsigned draw_pt_trim_count(unsigned count, unsigned first, unsigned incr) +{ + if (count < first) + return 0; + return count - (count - first) % incr; +} diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c deleted file mode 100644 index cd7bb7bf25..0000000000 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ /dev/null @@ -1,200 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "util/u_math.h" -#include "util/u_memory.h" - -#include "draw/draw_context.h" -#include "draw/draw_private.h" -#include "draw/draw_pt.h" - -#define FETCH_MAX 256 -#define DRAW_MAX (FETCH_MAX+8) - -struct varray_frontend { - struct draw_pt_front_end base; - struct draw_context *draw; - - ushort draw_elts[DRAW_MAX]; - unsigned fetch_elts[FETCH_MAX]; - - unsigned driver_fetch_max; - unsigned fetch_max; - - struct draw_pt_middle_end *middle; - - unsigned input_prim; - unsigned output_prim; -}; - - -static void varray_flush_linear(struct varray_frontend *varray, - unsigned start, unsigned count) -{ - if (count) { - assert(varray->middle->run_linear); - varray->middle->run_linear(varray->middle, start, count); - } -} - -static void varray_line_loop_segment(struct varray_frontend *varray, - unsigned start, - unsigned segment_start, - unsigned segment_count, - boolean end ) -{ - assert(segment_count < varray->fetch_max); - if (segment_count >= 1) { - unsigned nr = 0, i; - - for (i = 0; i < segment_count; i++) - varray->fetch_elts[nr++] = start + segment_start + i; - - if (end) - varray->fetch_elts[nr++] = start; - - assert(nr <= FETCH_MAX); - - varray->middle->run(varray->middle, - varray->fetch_elts, - nr, - varray->draw_elts, /* ie. linear */ - nr); - } -} - - - -static void varray_fan_segment(struct varray_frontend *varray, - unsigned start, - unsigned segment_start, - unsigned segment_count ) -{ - assert(segment_count < varray->fetch_max); - if (segment_count >= 2) { - unsigned nr = 0, i; - - if (segment_start != 0) - varray->fetch_elts[nr++] = start; - - for (i = 0 ; i < segment_count; i++) - varray->fetch_elts[nr++] = start + segment_start + i; - - assert(nr <= FETCH_MAX); - - varray->middle->run(varray->middle, - varray->fetch_elts, - nr, - varray->draw_elts, /* ie. linear */ - nr); - } -} - - - - -#define FUNC varray_run -#include "draw_pt_varray_tmp_linear.h" - -static unsigned decompose_prim[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY + 1] = { - PIPE_PRIM_POINTS, - PIPE_PRIM_LINES, - PIPE_PRIM_LINE_STRIP, /* decomposed LINELOOP */ - PIPE_PRIM_LINE_STRIP, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLE_STRIP, - PIPE_PRIM_TRIANGLE_FAN, - PIPE_PRIM_QUADS, - PIPE_PRIM_QUAD_STRIP, - PIPE_PRIM_POLYGON, - PIPE_PRIM_LINES_ADJACENCY, - PIPE_PRIM_LINE_STRIP_ADJACENCY, - PIPE_PRIM_TRIANGLES_ADJACENCY, - PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY -}; - - - -static void varray_prepare(struct draw_pt_front_end *frontend, - unsigned in_prim, - struct draw_pt_middle_end *middle, - unsigned opt) -{ - struct varray_frontend *varray = (struct varray_frontend *)frontend; - - varray->base.run = varray_run; - - varray->input_prim = in_prim; - assert(in_prim < Elements(decompose_prim)); - varray->output_prim = decompose_prim[in_prim]; - - varray->middle = middle; - middle->prepare(middle, - varray->output_prim, - opt, &varray->driver_fetch_max ); - - /* check that the max is even */ - assert((varray->driver_fetch_max & 1) == 0); - - varray->fetch_max = MIN2(FETCH_MAX, varray->driver_fetch_max); -} - - - - -static void varray_finish(struct draw_pt_front_end *frontend) -{ - struct varray_frontend *varray = (struct varray_frontend *)frontend; - varray->middle->finish(varray->middle); - varray->middle = NULL; -} - -static void varray_destroy(struct draw_pt_front_end *frontend) -{ - FREE(frontend); -} - - -struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw) -{ - ushort i; - struct varray_frontend *varray = CALLOC_STRUCT(varray_frontend); - if (varray == NULL) - return NULL; - - varray->base.prepare = varray_prepare; - varray->base.run = NULL; - varray->base.finish = varray_finish; - varray->base.destroy = varray_destroy; - varray->draw = draw; - - for (i = 0; i < DRAW_MAX; i++) { - varray->draw_elts[i] = i; - } - - return &varray->base; -} diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h deleted file mode 100644 index 7c722457c3..0000000000 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h +++ /dev/null @@ -1,238 +0,0 @@ - -static void FUNC(struct draw_pt_front_end *frontend, - pt_elt_func get_elt, - const void *elts, - unsigned count) -{ - struct varray_frontend *varray = (struct varray_frontend *)frontend; - struct draw_context *draw = varray->draw; - unsigned start = (unsigned)elts; - - boolean flatfirst = (draw->rasterizer->flatshade && - draw->rasterizer->flatshade_first); - unsigned i, j; - ushort flags; - unsigned first, incr; - - varray->fetch_start = start; - - draw_pt_split_prim(varray->input_prim, &first, &incr); - -#if 0 - debug_printf("%s (%d) %d/%d\n", __FUNCTION__, - varray->input_prim, - start, count); -#endif - - switch (varray->input_prim) { - case PIPE_PRIM_POINTS: - for (j = 0; j + first <= count; j += i) { - unsigned end = MIN2(FETCH_MAX, count - j); - end -= (end % incr); - for (i = 0; i < end; i++) { - POINT(varray, i + 0); - } - i = end; - fetch_init(varray, end); - varray_flush(varray); - } - break; - - case PIPE_PRIM_LINES: - for (j = 0; j + first <= count; j += i) { - unsigned end = MIN2(FETCH_MAX, count - j); - end -= (end % incr); - for (i = 0; i+1 < end; i += 2) { - LINE(varray, DRAW_PIPE_RESET_STIPPLE, - i + 0, i + 1); - } - i = end; - fetch_init(varray, end); - varray_flush(varray); - } - break; - - case PIPE_PRIM_LINE_LOOP: - if (count >= 2) { - flags = DRAW_PIPE_RESET_STIPPLE; - - for (j = 0; j + first <= count; j += i) { - unsigned end = MIN2(FETCH_MAX, count - j); - end -= (end % incr); - for (i = 1; i < end; i++, flags = 0) { - LINE(varray, flags, i - 1, i); - } - LINE(varray, flags, i - 1, 0); - i = end; - fetch_init(varray, end); - varray_flush(varray); - } - } - break; - - case PIPE_PRIM_LINE_STRIP: - flags = DRAW_PIPE_RESET_STIPPLE; - for (j = 0; j + first <= count; j += i) { - unsigned end = MIN2(FETCH_MAX, count - j); - end -= (end % incr); - for (i = 1; i < end; i++, flags = 0) { - LINE(varray, flags, i - 1, i); - } - i = end; - fetch_init(varray, end); - varray_flush(varray); - } - break; - - case PIPE_PRIM_TRIANGLES: - for (j = 0; j + first <= count; j += i) { - unsigned end = MIN2(FETCH_MAX, count - j); - end -= (end % incr); - for (i = 0; i+2 < end; i += 3) { - TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - i + 0, i + 1, i + 2); - } - i = end; - fetch_init(varray, end); - varray_flush(varray); - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - if (flatfirst) { - for (j = 0; j + first <= count; j += i) { - unsigned end = MIN2(FETCH_MAX, count - j); - end -= (end % incr); - for (i = 0; i+2 < end; i++) { - TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - i + 0, i + 1 + (i&1), i + 2 - (i&1)); - } - i = end; - fetch_init(varray, end); - varray_flush(varray); - if (j + first + i <= count) { - varray->fetch_start -= 2; - i -= 2; - } - } - } - else { - for (j = 0; j + first <= count; j += i) { - unsigned end = MIN2(FETCH_MAX, count - j); - end -= (end % incr); - for (i = 0; i + 2 < end; i++) { - TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - i + 0 + (i&1), i + 1 - (i&1), i + 2); - } - i = end; - fetch_init(varray, end); - varray_flush(varray); - if (j + first + i <= count) { - varray->fetch_start -= 2; - i -= 2; - } - } - } - break; - - case PIPE_PRIM_TRIANGLE_FAN: - if (count >= 3) { - if (flatfirst) { - flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; - for (j = 0; j + first <= count; j += i) { - unsigned end = MIN2(FETCH_MAX, count - j); - end -= (end % incr); - for (i = 0; i+2 < end; i++) { - TRIANGLE(varray, flags, i + 1, i + 2, 0); - } - i = end; - fetch_init(varray, end); - varray_flush(varray); - } - } - else { - flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; - for (j = 0; j + first <= count; j += i) { - unsigned end = MIN2(FETCH_MAX, count - j); - end -= (end % incr); - for (i = 0; i+2 < end; i++) { - TRIANGLE(varray, flags, 0, i + 1, i + 2); - } - i = end; - fetch_init(varray, end); - varray_flush(varray); - } - } - } - break; - - case PIPE_PRIM_QUADS: - for (j = 0; j + first <= count; j += i) { - unsigned end = MIN2(FETCH_MAX, count - j); - end -= (end % incr); - for (i = 0; i+3 < end; i += 4) { - QUAD(varray, i + 0, i + 1, i + 2, i + 3); - } - i = end; - fetch_init(varray, end); - varray_flush(varray); - } - break; - - case PIPE_PRIM_QUAD_STRIP: - for (j = 0; j + first <= count; j += i) { - unsigned end = MIN2(FETCH_MAX, count - j); - end -= (end % incr); - for (i = 0; i+3 < end; i += 2) { - QUAD(varray, i + 2, i + 0, i + 1, i + 3); - } - i = end; - fetch_init(varray, end); - varray_flush(varray); - if (j + first + i <= count) { - varray->fetch_start -= 2; - i -= 2; - } - } - break; - - case PIPE_PRIM_POLYGON: - { - /* These bitflags look a little odd because we submit the - * vertices as (1,2,0) to satisfy flatshade requirements. - */ - const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2; - const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0; - const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1; - - flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; - for (j = 0; j + first <= count; j += i) { - unsigned end = MIN2(FETCH_MAX, count - j); - end -= (end % incr); - for (i = 0; i+2 < end; i++, flags = edge_middle) { - - if (i + 3 == count) - flags |= edge_last; - - TRIANGLE(varray, flags, i + 1, i + 2, 0); - } - i = end; - fetch_init(varray, end); - varray_flush(varray); - } - } - break; - - default: - assert(0); - break; - } - - varray_flush(varray); -} - -#undef TRIANGLE -#undef QUAD -#undef POINT -#undef LINE -#undef FUNC diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h deleted file mode 100644 index 55e43b2a71..0000000000 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h +++ /dev/null @@ -1,103 +0,0 @@ -static unsigned trim( unsigned count, unsigned first, unsigned incr ) -{ - /* - * count either has been trimmed in draw_pt_arrays or is set to - * (driver)_fetch_max which is hopefully always larger than first. - */ - assert(count >= first); - return count - (count - first) % incr; -} - -static void FUNC(struct draw_pt_front_end *frontend, - pt_elt_func get_elt, - const void *elts, - int elt_bias, - unsigned count) -{ - struct varray_frontend *varray = (struct varray_frontend *)frontend; - unsigned start = (unsigned) ((char *) elts - (char *) NULL); - - unsigned j; - unsigned first, incr; - - assert(elt_bias == 0); - - draw_pt_split_prim(varray->input_prim, &first, &incr); - - /* Sanitize primitive length: - */ - count = trim(count, first, incr); - if (count < first) - return; - -#if 0 - debug_printf("%s (%d) %d/%d\n", __FUNCTION__, - varray->input_prim, - start, count); -#endif - - switch (varray->input_prim) { - case PIPE_PRIM_POINTS: - case PIPE_PRIM_LINES: - case PIPE_PRIM_TRIANGLES: - case PIPE_PRIM_LINE_STRIP: - case PIPE_PRIM_TRIANGLE_STRIP: - case PIPE_PRIM_QUADS: - case PIPE_PRIM_QUAD_STRIP: - case PIPE_PRIM_LINES_ADJACENCY: - case PIPE_PRIM_LINE_STRIP_ADJACENCY: - case PIPE_PRIM_TRIANGLES_ADJACENCY: - case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: - for (j = 0; j < count;) { - unsigned remaining = count - j; - unsigned nr = trim( MIN2(varray->driver_fetch_max, remaining), first, incr ); - varray_flush_linear(varray, start + j, nr); - j += nr; - if (nr != remaining) - j -= (first - incr); - } - break; - - case PIPE_PRIM_LINE_LOOP: - /* Always have to decompose as we've stated that this will be - * emitted as a line-strip. - */ - for (j = 0; j < count;) { - unsigned remaining = count - j; - unsigned nr = trim( MIN2(varray->fetch_max-1, remaining), first, incr ); - varray_line_loop_segment(varray, start, j, nr, nr == remaining); - j += nr; - if (nr != remaining) - j -= (first - incr); - } - break; - - - case PIPE_PRIM_POLYGON: - case PIPE_PRIM_TRIANGLE_FAN: - if (count < varray->driver_fetch_max) { - varray_flush_linear(varray, start, count); - } - else { - for ( j = 0; j < count;) { - unsigned remaining = count - j; - unsigned nr = trim( MIN2(varray->fetch_max-1, remaining), first, incr ); - varray_fan_segment(varray, start, j, nr); - j += nr; - if (nr != remaining) - j -= (first - incr); - } - } - break; - - default: - assert(0); - break; - } -} - -#undef TRIANGLE -#undef QUAD -#undef POINT -#undef LINE -#undef FUNC diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c deleted file mode 100644 index a848b54f7d..0000000000 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ /dev/null @@ -1,610 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "util/u_memory.h" -#include "util/u_prim.h" -#include "draw/draw_context.h" -#include "draw/draw_private.h" -#include "draw/draw_pt.h" - - -#define CACHE_MAX 256 -#define FETCH_MAX 256 -#define DRAW_MAX (16*1024) - - -struct vcache_frontend { - struct draw_pt_front_end base; - struct draw_context *draw; - - unsigned in[CACHE_MAX]; - ushort out[CACHE_MAX]; - - ushort draw_elts[DRAW_MAX]; - unsigned fetch_elts[FETCH_MAX]; - - unsigned draw_count; - unsigned fetch_count; - unsigned fetch_max; - - struct draw_pt_middle_end *middle; - - unsigned input_prim; - unsigned output_prim; - - unsigned middle_prim; - unsigned opt; -}; - - -static INLINE void -vcache_flush( struct vcache_frontend *vcache ) -{ - if (vcache->middle_prim != vcache->output_prim) { - vcache->middle_prim = vcache->output_prim; - vcache->middle->prepare( vcache->middle, - vcache->middle_prim, - vcache->opt, - &vcache->fetch_max ); - } - - if (vcache->draw_count) { - vcache->middle->run( vcache->middle, - vcache->fetch_elts, - vcache->fetch_count, - vcache->draw_elts, - vcache->draw_count ); - } - - memset(vcache->in, ~0, sizeof(vcache->in)); - vcache->fetch_count = 0; - vcache->draw_count = 0; -} - - -static INLINE void -vcache_check_flush( struct vcache_frontend *vcache ) -{ - if (vcache->draw_count + 6 >= DRAW_MAX || - vcache->fetch_count + 6 >= FETCH_MAX) { - vcache_flush( vcache ); - } -} - - -static INLINE void -vcache_elt( struct vcache_frontend *vcache, - unsigned felt, - ushort flags ) -{ - unsigned idx = felt % CACHE_MAX; - - if (vcache->in[idx] != felt) { - assert(vcache->fetch_count < FETCH_MAX); - - vcache->in[idx] = felt; - vcache->out[idx] = (ushort)vcache->fetch_count; - vcache->fetch_elts[vcache->fetch_count++] = felt; - } - - vcache->draw_elts[vcache->draw_count++] = vcache->out[idx] | flags; -} - - - -static INLINE void -vcache_triangle( struct vcache_frontend *vcache, - unsigned i0, - unsigned i1, - unsigned i2 ) -{ - vcache_elt(vcache, i0, 0); - vcache_elt(vcache, i1, 0); - vcache_elt(vcache, i2, 0); - vcache_check_flush(vcache); -} - - -static INLINE void -vcache_triangle_flags( struct vcache_frontend *vcache, - ushort flags, - unsigned i0, - unsigned i1, - unsigned i2 ) -{ - vcache_elt(vcache, i0, flags); - vcache_elt(vcache, i1, 0); - vcache_elt(vcache, i2, 0); - vcache_check_flush(vcache); -} - - -static INLINE void -vcache_line( struct vcache_frontend *vcache, - unsigned i0, - unsigned i1 ) -{ - vcache_elt(vcache, i0, 0); - vcache_elt(vcache, i1, 0); - vcache_check_flush(vcache); -} - - -static INLINE void -vcache_line_flags( struct vcache_frontend *vcache, - ushort flags, - unsigned i0, - unsigned i1 ) -{ - vcache_elt(vcache, i0, flags); - vcache_elt(vcache, i1, 0); - vcache_check_flush(vcache); -} - - -static INLINE void -vcache_point( struct vcache_frontend *vcache, - unsigned i0 ) -{ - vcache_elt(vcache, i0, 0); - vcache_check_flush(vcache); -} - - -static INLINE void -vcache_line_adj_flags( struct vcache_frontend *vcache, - unsigned flags, - unsigned a0, unsigned i0, unsigned i1, unsigned a1 ) -{ - vcache_elt(vcache, a0, 0); - vcache_elt(vcache, i0, flags); - vcache_elt(vcache, i1, 0); - vcache_elt(vcache, a1, 0); - vcache_check_flush(vcache); -} - - -static INLINE void -vcache_line_adj( struct vcache_frontend *vcache, - unsigned a0, unsigned i0, unsigned i1, unsigned a1 ) -{ - vcache_elt(vcache, a0, 0); - vcache_elt(vcache, i0, 0); - vcache_elt(vcache, i1, 0); - vcache_elt(vcache, a1, 0); - vcache_check_flush(vcache); -} - - -static INLINE void -vcache_triangle_adj_flags( struct vcache_frontend *vcache, - unsigned flags, - unsigned i0, unsigned a0, - unsigned i1, unsigned a1, - unsigned i2, unsigned a2 ) -{ - vcache_elt(vcache, i0, flags); - vcache_elt(vcache, a0, 0); - vcache_elt(vcache, i1, 0); - vcache_elt(vcache, a1, 0); - vcache_elt(vcache, i2, 0); - vcache_elt(vcache, a2, 0); - vcache_check_flush(vcache); -} - - -static INLINE void -vcache_triangle_adj( struct vcache_frontend *vcache, - unsigned i0, unsigned a0, - unsigned i1, unsigned a1, - unsigned i2, unsigned a2 ) -{ - vcache_elt(vcache, i0, 0); - vcache_elt(vcache, a0, 0); - vcache_elt(vcache, i1, 0); - vcache_elt(vcache, a1, 0); - vcache_elt(vcache, i2, 0); - vcache_elt(vcache, a2, 0); - vcache_check_flush(vcache); -} - - -/* At least for now, we're back to using a template include file for - * this. The two paths aren't too different though - it may be - * possible to reunify them. - */ -#define TRIANGLE(flags,i0,i1,i2) vcache_triangle_flags(vcache,flags,i0,i1,i2) -#define LINE(flags,i0,i1) vcache_line_flags(vcache,flags,i0,i1) -#define POINT(i0) vcache_point(vcache,i0) -#define LINE_ADJ(flags,a0,i0,i1,a1) \ - vcache_line_adj_flags(vcache,flags,a0,i0,i1,a1) -#define TRIANGLE_ADJ(flags,i0,a0,i1,a1,i2,a2) \ - vcache_triangle_adj_flags(vcache,flags,i0,a0,i1,a1,i2,a2) -#define FUNC vcache_run_extras -#include "draw_pt_vcache_tmp.h" - -#define TRIANGLE(flags,i0,i1,i2) vcache_triangle(vcache,i0,i1,i2) -#define LINE(flags,i0,i1) vcache_line(vcache,i0,i1) -#define POINT(i0) vcache_point(vcache,i0) -#define LINE_ADJ(flags,a0,i0,i1,a1) \ - vcache_line_adj(vcache,a0,i0,i1,a1) -#define TRIANGLE_ADJ(flags,i0,a0,i1,a1,i2,a2) \ - vcache_triangle_adj(vcache,i0,a0,i1,a1,i2,a2) -#define FUNC vcache_run -#include "draw_pt_vcache_tmp.h" - -static INLINE void -rebase_uint_elts( const unsigned *src, - unsigned count, - int delta, - ushort *dest ) -{ - unsigned i; - for (i = 0; i < count; i++) - dest[i] = (ushort)(src[i] + delta); -} - - -static INLINE void -rebase_ushort_elts( const ushort *src, - unsigned count, - int delta, - ushort *dest ) -{ - unsigned i; - for (i = 0; i < count; i++) - dest[i] = (ushort)(src[i] + delta); -} - - -static INLINE void -rebase_ubyte_elts( const ubyte *src, - unsigned count, - int delta, - ushort *dest ) -{ - unsigned i; - for (i = 0; i < count; i++) - dest[i] = (ushort)(src[i] + delta); -} - - -static INLINE void -translate_uint_elts( const unsigned *src, - unsigned count, - ushort *dest ) -{ - unsigned i; - for (i = 0; i < count; i++) - dest[i] = (ushort)(src[i]); -} - - -static INLINE void -translate_ushort_elts( const ushort *src, - unsigned count, - ushort *dest ) -{ - unsigned i; - for (i = 0; i < count; i++) - dest[i] = (ushort)(src[i]); -} - - -static INLINE void -translate_ubyte_elts( const ubyte *src, - unsigned count, - ushort *dest ) -{ - unsigned i; - for (i = 0; i < count; i++) - dest[i] = (ushort)(src[i]); -} - - - - -#if 0 -static INLINE enum pipe_format -format_from_get_elt( pt_elt_func get_elt ) -{ - switch (draw->pt.user.eltSize) { - case 1: return PIPE_FORMAT_R8_UNORM; - case 2: return PIPE_FORMAT_R16_UNORM; - case 4: return PIPE_FORMAT_R32_UNORM; - default: return PIPE_FORMAT_NONE; - } -} -#endif - - -/** - * Check if any vertex attributes use instance divisors. - * Note that instance divisors complicate vertex fetching so we need - * to take the vcache path when they're in use. - */ -static boolean -any_instance_divisors(const struct draw_context *draw) -{ - uint i; - - for (i = 0; i < draw->pt.nr_vertex_elements; i++) { - uint div = draw->pt.vertex_element[i].instance_divisor; - if (div) - return TRUE; - } - return FALSE; -} - - -static INLINE void -vcache_check_run( struct draw_pt_front_end *frontend, - pt_elt_func get_elt, - const void *elts, - int elt_bias, - unsigned draw_count ) -{ - struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; - struct draw_context *draw = vcache->draw; - const unsigned min_index = draw->pt.user.min_index; - const unsigned max_index = draw->pt.user.max_index; - const unsigned index_size = draw->pt.user.eltSize; - unsigned fetch_count; - const ushort *transformed_elts; - ushort *storage = NULL; - boolean ok = FALSE; - - /* debug: verify indexes are in range [min_index, max_index] */ - if (0) { - unsigned i; - for (i = 0; i < draw_count; i++) { - if (index_size == 1) { - assert( ((const ubyte *) elts)[i] >= min_index); - assert( ((const ubyte *) elts)[i] <= max_index); - } - else if (index_size == 2) { - assert( ((const ushort *) elts)[i] >= min_index); - assert( ((const ushort *) elts)[i] <= max_index); - } - else { - assert(index_size == 4); - assert( ((const uint *) elts)[i] >= min_index); - assert( ((const uint *) elts)[i] <= max_index); - } - } - } - - /* Note: max_index is frequently 0xffffffff so we have to be sure - * that any arithmetic involving max_index doesn't overflow! - */ - if (max_index >= (unsigned) DRAW_PIPE_MAX_VERTICES) - goto fail; - - if (any_instance_divisors(draw)) - goto fail; - - fetch_count = max_index + 1 - min_index; - - if (0) - debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count, - vcache->fetch_max, - draw_count); - - if (elt_bias + max_index >= DRAW_PIPE_MAX_VERTICES || - fetch_count >= UNDEFINED_VERTEX_ID || - fetch_count > draw_count) { - if (0) debug_printf("fail\n"); - goto fail; - } - - if (vcache->middle_prim != vcache->input_prim) { - vcache->middle_prim = vcache->input_prim; - vcache->middle->prepare( vcache->middle, - vcache->middle_prim, - vcache->opt, - &vcache->fetch_max ); - } - - assert((elt_bias >= 0 && min_index + elt_bias >= min_index) || - (elt_bias < 0 && min_index + elt_bias < min_index)); - - if (min_index == 0 && - index_size == 2) { - transformed_elts = (const ushort *)elts; - } - else { - storage = MALLOC( draw_count * sizeof(ushort) ); - if (!storage) - goto fail; - - if (min_index == 0) { - switch(index_size) { - case 1: - translate_ubyte_elts( (const ubyte *)elts, - draw_count, - storage ); - break; - - case 2: - translate_ushort_elts( (const ushort *)elts, - draw_count, - storage ); - break; - - case 4: - translate_uint_elts( (const uint *)elts, - draw_count, - storage ); - break; - - default: - assert(0); - FREE(storage); - return; - } - } - else { - switch(index_size) { - case 1: - rebase_ubyte_elts( (const ubyte *)elts, - draw_count, - 0 - (int)min_index, - storage ); - break; - - case 2: - rebase_ushort_elts( (const ushort *)elts, - draw_count, - 0 - (int)min_index, - storage ); - break; - - case 4: - rebase_uint_elts( (const uint *)elts, - draw_count, - 0 - (int)min_index, - storage ); - break; - - default: - assert(0); - FREE(storage); - return; - } - } - transformed_elts = storage; - } - - if (fetch_count < UNDEFINED_VERTEX_ID) - ok = vcache->middle->run_linear_elts( vcache->middle, - min_index + elt_bias, /* start */ - fetch_count, - transformed_elts, - draw_count ); - - FREE(storage); - - if (ok) - return; - - debug_printf("failed to execute atomic draw elts for %d/%d, splitting up\n", - fetch_count, draw_count); - -fail: - vcache_run( frontend, get_elt, elts, elt_bias, draw_count ); -} - - - - -static void -vcache_prepare( struct draw_pt_front_end *frontend, - unsigned in_prim, - struct draw_pt_middle_end *middle, - unsigned opt ) -{ - struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; - - if (opt & PT_PIPELINE) { - vcache->base.run = vcache_run_extras; - } - else { - vcache->base.run = vcache_check_run; - } - - /* VCache will always emit the reduced version of its input - * primitive, ie STRIP/FANS become TRIS, etc. - * - * This is not to be confused with what the GS might be up to, - * which is a separate issue. - */ - vcache->input_prim = in_prim; - switch (in_prim) { - case PIPE_PRIM_LINES_ADJACENCY: - case PIPE_PRIM_LINE_STRIP_ADJACENCY: - vcache->output_prim = PIPE_PRIM_LINES_ADJACENCY; - break; - case PIPE_PRIM_TRIANGLES_ADJACENCY: - case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: - vcache->output_prim = PIPE_PRIM_TRIANGLES_ADJACENCY; - break; - default: - vcache->output_prim = u_reduced_prim(in_prim); - } - - vcache->middle = middle; - vcache->opt = opt; - - /* Have to run prepare here, but try and guess a good prim for - * doing so: - */ - vcache->middle_prim = (opt & PT_PIPELINE) - ? vcache->output_prim : vcache->input_prim; - - middle->prepare( middle, - vcache->middle_prim, - opt, &vcache->fetch_max ); -} - - -static void -vcache_finish( struct draw_pt_front_end *frontend ) -{ - struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; - vcache->middle->finish( vcache->middle ); - vcache->middle = NULL; -} - - -static void -vcache_destroy( struct draw_pt_front_end *frontend ) -{ - FREE(frontend); -} - - -struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw ) -{ - struct vcache_frontend *vcache = CALLOC_STRUCT( vcache_frontend ); - if (vcache == NULL) - return NULL; - - vcache->base.prepare = vcache_prepare; - vcache->base.run = NULL; - vcache->base.finish = vcache_finish; - vcache->base.destroy = vcache_destroy; - vcache->draw = draw; - - memset(vcache->in, ~0, sizeof(vcache->in)); - - return &vcache->base; -} diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h deleted file mode 100644 index 1a3748d5f0..0000000000 --- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h +++ /dev/null @@ -1,19 +0,0 @@ -#define FUNC_VARS \ - struct draw_pt_front_end *frontend, \ - pt_elt_func get_elt, \ - const void *elts, \ - int elt_bias, \ - unsigned count - -#define LOCAL_VARS \ - struct vcache_frontend *vcache = (struct vcache_frontend *) frontend; \ - struct draw_context *draw = vcache->draw; \ - const unsigned prim = vcache->input_prim; \ - const boolean last_vertex_last = !(draw->rasterizer->flatshade && \ - draw->rasterizer->flatshade_first); - -#define GET_ELT(idx) (get_elt(elts, idx) + elt_bias) - -#define FUNC_EXIT do { vcache_flush(vcache); } while (0) - -#include "draw_decompose_tmp.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit.c b/src/gallium/auxiliary/draw/draw_pt_vsplit.c new file mode 100644 index 0000000000..a687525309 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_vsplit.c @@ -0,0 +1,208 @@ +/* + * Mesa 3-D graphics library + * Version: 7.9 + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright (C) 2010 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_pt.h" + +#define SEGMENT_SIZE 1024 +#define MAP_SIZE 256 + +struct vsplit_frontend { + struct draw_pt_front_end base; + struct draw_context *draw; + + unsigned prim; + + struct draw_pt_middle_end *middle; + + unsigned max_vertices; + ushort segment_size; + + /* buffers for splitting */ + unsigned fetch_elts[SEGMENT_SIZE]; + ushort draw_elts[SEGMENT_SIZE]; + ushort identity_draw_elts[SEGMENT_SIZE]; + + struct { + /* map a fetch element to a draw element */ + unsigned fetches[MAP_SIZE]; + ushort draws[MAP_SIZE]; + boolean has_max_fetch; + + ushort num_fetch_elts; + ushort num_draw_elts; + } cache; +}; + + +static void +vsplit_clear_cache(struct vsplit_frontend *vsplit) +{ + memset(vsplit->cache.fetches, 0xff, sizeof(vsplit->cache.fetches)); + vsplit->cache.has_max_fetch = FALSE; + vsplit->cache.num_fetch_elts = 0; + vsplit->cache.num_draw_elts = 0; +} + +static void +vsplit_flush_cache(struct vsplit_frontend *vsplit, unsigned flags) +{ + vsplit->middle->run(vsplit->middle, + vsplit->fetch_elts, vsplit->cache.num_fetch_elts, + vsplit->draw_elts, vsplit->cache.num_draw_elts, flags); +} + +/** + * Add a fetch element and add it to the draw elements. + */ +static INLINE void +vsplit_add_cache(struct vsplit_frontend *vsplit, unsigned fetch) +{ + unsigned hash = fetch % MAP_SIZE; + + if (vsplit->cache.fetches[hash] != fetch) { + /* update cache */ + vsplit->cache.fetches[hash] = fetch; + vsplit->cache.draws[hash] = vsplit->cache.num_fetch_elts; + + /* add fetch */ + assert(vsplit->cache.num_fetch_elts < vsplit->segment_size); + vsplit->fetch_elts[vsplit->cache.num_fetch_elts++] = fetch; + } + + vsplit->draw_elts[vsplit->cache.num_draw_elts++] = vsplit->cache.draws[hash]; +} + + +/** + * Add a fetch element and add it to the draw elements. The fetch element is + * in full range (uint). + */ +static INLINE void +vsplit_add_cache_uint(struct vsplit_frontend *vsplit, unsigned fetch) +{ + /* special care for 0xffffffff */ + if (fetch == 0xffffffff && !vsplit->cache.has_max_fetch) { + unsigned hash = fetch % MAP_SIZE; + vsplit->cache.fetches[hash] = fetch - 1; /* force update */ + vsplit->cache.has_max_fetch = TRUE; + } + + vsplit_add_cache(vsplit, fetch); +} + + +#define FUNC vsplit_run_linear +#include "draw_pt_vsplit_tmp.h" + +#define FUNC vsplit_run_ubyte +#define ELT_TYPE ubyte +#define ADD_CACHE(vsplit, fetch) vsplit_add_cache(vsplit, fetch) +#include "draw_pt_vsplit_tmp.h" + +#define FUNC vsplit_run_ushort +#define ELT_TYPE ushort +#define ADD_CACHE(vsplit, fetch) vsplit_add_cache(vsplit, fetch) +#include "draw_pt_vsplit_tmp.h" + +#define FUNC vsplit_run_uint +#define ELT_TYPE uint +#define ADD_CACHE(vsplit, fetch) vsplit_add_cache_uint(vsplit, fetch) +#include "draw_pt_vsplit_tmp.h" + + +static void vsplit_prepare(struct draw_pt_front_end *frontend, + unsigned in_prim, + struct draw_pt_middle_end *middle, + unsigned opt) +{ + struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend; + + switch (vsplit->draw->pt.user.eltSize) { + case 0: + vsplit->base.run = vsplit_run_linear; + break; + case 1: + vsplit->base.run = vsplit_run_ubyte; + break; + case 2: + vsplit->base.run = vsplit_run_ushort; + break; + case 4: + vsplit->base.run = vsplit_run_uint; + break; + default: + assert(0); + break; + } + + /* split only */ + vsplit->prim = in_prim; + + vsplit->middle = middle; + middle->prepare(middle, vsplit->prim, opt, &vsplit->max_vertices); + + vsplit->segment_size = MIN2(SEGMENT_SIZE, vsplit->max_vertices); +} + + +static void vsplit_finish(struct draw_pt_front_end *frontend) +{ + struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend; + vsplit->middle->finish(vsplit->middle); + vsplit->middle = NULL; +} + + +static void vsplit_destroy(struct draw_pt_front_end *frontend) +{ + FREE(frontend); +} + + +struct draw_pt_front_end *draw_pt_vsplit(struct draw_context *draw) +{ + struct vsplit_frontend *vsplit = CALLOC_STRUCT(vsplit_frontend); + ushort i; + + if (!vsplit) + return NULL; + + vsplit->base.prepare = vsplit_prepare; + vsplit->base.run = NULL; + vsplit->base.finish = vsplit_finish; + vsplit->base.destroy = vsplit_destroy; + vsplit->draw = draw; + + for (i = 0; i < SEGMENT_SIZE; i++) + vsplit->identity_draw_elts[i] = i; + + return &vsplit->base; +} diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h new file mode 100644 index 0000000000..3f66f962e1 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h @@ -0,0 +1,309 @@ +/* + * Mesa 3-D graphics library + * Version: 7.9 + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright (C) 2010 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#define CONCAT2(name, elt_type) name ## elt_type +#define CONCAT(name, elt_type) CONCAT2(name, elt_type) + +#ifdef ELT_TYPE + +/** + * Fetch all elements in [min_index, max_index] with bias, and use the + * (rebased) index buffer as the draw elements. + */ +static boolean +CONCAT(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend *vsplit, + unsigned istart, unsigned icount) +{ + struct draw_context *draw = vsplit->draw; + const ELT_TYPE *ib = (const ELT_TYPE *) + ((const char *) draw->pt.user.elts + draw->pt.index_buffer.offset); + const unsigned min_index = draw->pt.user.min_index; + const unsigned max_index = draw->pt.user.max_index; + const int elt_bias = draw->pt.user.eltBias; + unsigned fetch_start, fetch_count; + const ushort *draw_elts = NULL; + unsigned i; + + /* use the ib directly */ + if (min_index == 0 && sizeof(ib[0]) == sizeof(draw_elts[0])) { + if (icount > vsplit->max_vertices) + return FALSE; + + for (i = 0; i < icount; i++) { + ELT_TYPE idx = ib[istart + i]; + assert(idx >= min_index && idx <= max_index); + } + draw_elts = (const ushort *) ib; + } + else { + /* have to go through vsplit->draw_elts */ + if (icount > vsplit->segment_size) + return FALSE; + } + + /* this is faster only when we fetch less elements than the normal path */ + if (max_index - min_index > icount - 1) + return FALSE; + + if (elt_bias < 0 && min_index < -elt_bias) + return FALSE; + + /* why this check? */ + for (i = 0; i < draw->pt.nr_vertex_elements; i++) { + if (draw->pt.vertex_element[i].instance_divisor) + return FALSE; + } + + fetch_start = min_index + elt_bias; + fetch_count = max_index - min_index + 1; + + if (!draw_elts) { + if (min_index == 0) { + for (i = 0; i < icount; i++) { + ELT_TYPE idx = ib[istart + i]; + + assert(idx >= min_index && idx <= max_index); + vsplit->draw_elts[i] = (ushort) idx; + } + } + else { + for (i = 0; i < icount; i++) { + ELT_TYPE idx = ib[istart + i]; + + assert(idx >= min_index && idx <= max_index); + vsplit->draw_elts[i] = (ushort) (idx - min_index); + } + } + + draw_elts = vsplit->draw_elts; + } + + return vsplit->middle->run_linear_elts(vsplit->middle, + fetch_start, fetch_count, + draw_elts, icount, 0x0); +} + +/** + * Use the cache to prepare the fetch and draw elements, and flush. + * + * When spoken is TRUE, ispoken replaces istart; When close is TRUE, iclose is + * appended. + */ +static INLINE void +CONCAT(vsplit_segment_cache_, ELT_TYPE)(struct vsplit_frontend *vsplit, + unsigned flags, + unsigned istart, unsigned icount, + boolean spoken, unsigned ispoken, + boolean close, unsigned iclose) +{ + struct draw_context *draw = vsplit->draw; + const ELT_TYPE *ib = (const ELT_TYPE *) + ((const char *) draw->pt.user.elts + draw->pt.index_buffer.offset); + const int ibias = draw->pt.user.eltBias; + unsigned i; + + assert(icount + !!close <= vsplit->segment_size); + + vsplit_clear_cache(vsplit); + + spoken = !!spoken; + if (ibias == 0) { + if (spoken) + ADD_CACHE(vsplit, ib[ispoken]); + + for (i = spoken; i < icount; i++) + ADD_CACHE(vsplit, ib[istart + i]); + + if (close) + ADD_CACHE(vsplit, ib[iclose]); + } + else if (ibias > 0) { + if (spoken) + ADD_CACHE(vsplit, (uint) ib[ispoken] + ibias); + + for (i = spoken; i < icount; i++) + ADD_CACHE(vsplit, (uint) ib[istart + i] + ibias); + + if (close) + ADD_CACHE(vsplit, (uint) ib[iclose] + ibias); + } + else { + if (spoken) { + if (ib[ispoken] < -ibias) + return; + ADD_CACHE(vsplit, ib[ispoken] + ibias); + } + + for (i = spoken; i < icount; i++) { + if (ib[istart + i] < -ibias) + return; + ADD_CACHE(vsplit, ib[istart + i] + ibias); + } + + if (close) { + if (ib[iclose] < -ibias) + return; + ADD_CACHE(vsplit, ib[iclose] + ibias); + } + } + + vsplit_flush_cache(vsplit, flags); +} + +static void +CONCAT(vsplit_segment_simple_, ELT_TYPE)(struct vsplit_frontend *vsplit, + unsigned flags, + unsigned istart, + unsigned icount) +{ + CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit, + flags, istart, icount, FALSE, 0, FALSE, 0); +} + +static void +CONCAT(vsplit_segment_loop_, ELT_TYPE)(struct vsplit_frontend *vsplit, + unsigned flags, + unsigned istart, + unsigned icount, + unsigned i0) +{ + const boolean close_loop = ((flags) == DRAW_SPLIT_BEFORE); + + CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit, + flags, istart, icount, FALSE, 0, close_loop, i0); +} + +static void +CONCAT(vsplit_segment_fan_, ELT_TYPE)(struct vsplit_frontend *vsplit, + unsigned flags, + unsigned istart, + unsigned icount, + unsigned i0) +{ + const boolean use_spoken = (((flags) & DRAW_SPLIT_BEFORE) != 0); + + CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit, + flags, istart, icount, use_spoken, i0, FALSE, 0); +} + +#define LOCAL_VARS \ + struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend; \ + const unsigned prim = vsplit->prim; \ + const unsigned max_count_simple = vsplit->segment_size; \ + const unsigned max_count_loop = vsplit->segment_size - 1; \ + const unsigned max_count_fan = vsplit->segment_size; + +#define PRIMITIVE(istart, icount) \ + CONCAT(vsplit_primitive_, ELT_TYPE)(vsplit, istart, icount) + +#else /* ELT_TYPE */ + +static void +vsplit_segment_simple_linear(struct vsplit_frontend *vsplit, unsigned flags, + unsigned istart, unsigned icount) +{ + assert(icount <= vsplit->max_vertices); + vsplit->middle->run_linear(vsplit->middle, istart, icount, flags); +} + +static void +vsplit_segment_loop_linear(struct vsplit_frontend *vsplit, unsigned flags, + unsigned istart, unsigned icount, unsigned i0) +{ + boolean close_loop = (flags == DRAW_SPLIT_BEFORE); + unsigned nr; + + assert(icount + !!close_loop <= vsplit->segment_size); + + if (close_loop) { + for (nr = 0; nr < icount; nr++) + vsplit->fetch_elts[nr] = istart + nr; + vsplit->fetch_elts[nr++] = i0; + + vsplit->middle->run(vsplit->middle, vsplit->fetch_elts, nr, + vsplit->identity_draw_elts, nr, flags); + } + else { + vsplit->middle->run_linear(vsplit->middle, istart, icount, flags); + } +} + +static void +vsplit_segment_fan_linear(struct vsplit_frontend *vsplit, unsigned flags, + unsigned istart, unsigned icount, unsigned i0) +{ + boolean use_spoken = ((flags & DRAW_SPLIT_BEFORE) != 0); + unsigned nr = 0, i; + + assert(icount + !!use_spoken <= vsplit->segment_size); + + if (use_spoken) { + vsplit->fetch_elts[nr++] = i0; + for (i = 1 ; i < icount; i++) + vsplit->fetch_elts[nr++] = istart + i; + + vsplit->middle->run(vsplit->middle, vsplit->fetch_elts, nr, + vsplit->identity_draw_elts, nr, flags); + } + else { + vsplit->middle->run_linear(vsplit->middle, istart, icount, flags); + } +} + +#define LOCAL_VARS \ + struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend; \ + const unsigned prim = vsplit->prim; \ + const unsigned max_count_simple = vsplit->max_vertices; \ + const unsigned max_count_loop = vsplit->segment_size - 1; \ + const unsigned max_count_fan = vsplit->segment_size; + +#define PRIMITIVE(istart, icount) FALSE + +#define ELT_TYPE linear + +#endif /* ELT_TYPE */ + +#define FUNC_VARS \ + struct draw_pt_front_end *frontend, \ + unsigned start, \ + unsigned count + +#define SEGMENT_SIMPLE(flags, istart, icount) \ + CONCAT(vsplit_segment_simple_, ELT_TYPE)(vsplit, flags, istart, icount) + +#define SEGMENT_LOOP(flags, istart, icount, i0) \ + CONCAT(vsplit_segment_loop_, ELT_TYPE)(vsplit, flags, istart, icount, i0) + +#define SEGMENT_FAN(flags, istart, icount, i0) \ + CONCAT(vsplit_segment_fan_, ELT_TYPE)(vsplit, flags, istart, icount, i0) + +#include "draw_split_tmp.h" + +#undef CONCAT2 +#undef CONCAT + +#undef ELT_TYPE +#undef ADD_CACHE diff --git a/src/gallium/auxiliary/draw/draw_so_emit_tmp.h b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h index 6d8937a0b4..7fafde9d5e 100644 --- a/src/gallium/auxiliary/draw/draw_so_emit_tmp.h +++ b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h @@ -7,11 +7,9 @@ #define FUNC_ENTER \ /* declare more local vars */ \ - struct draw_context *draw = so->draw; \ const unsigned prim = input_prims->prim; \ - const boolean last_vertex_last = \ - !(draw->rasterizer->flatshade && \ - draw->rasterizer->flatshade_first); \ + const unsigned prim_flags = input_prims->flags; \ + const boolean last_vertex_last = TRUE; \ do { \ debug_assert(input_prims->primitive_count == 1); \ switch (prim) { \ diff --git a/src/gallium/auxiliary/draw/draw_split_tmp.h b/src/gallium/auxiliary/draw/draw_split_tmp.h new file mode 100644 index 0000000000..47defc62b9 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_split_tmp.h @@ -0,0 +1,176 @@ +/* + * Mesa 3-D graphics library + * Version: 7.9 + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright (C) 2010 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +static void +FUNC(FUNC_VARS) +{ + unsigned first, incr; + LOCAL_VARS + + /* + * prim, start, count, and max_count_{simple,loop,fan} should have been + * defined + */ + if (0) { + debug_printf("%s: prim 0x%x, start %d, count %d, max_count_simple %d, " + "max_count_loop %d, max_count_fan %d\n", + __FUNCTION__, prim, start, count, max_count_simple, + max_count_loop, max_count_fan); + } + + draw_pt_split_prim(prim, &first, &incr); + /* sanitize primitive length */ + count = draw_pt_trim_count(count, first, incr); + if (count < first) + return; + + /* try flushing the entire primitive */ + if (PRIMITIVE(start, count)) + return; + + /* must be able to at least flush two complete primitives */ + assert(max_count_simple >= first + incr && + max_count_loop >= first + incr && + max_count_fan >= first + incr); + + /* no splitting required */ + if (count <= max_count_simple) { + SEGMENT_SIMPLE(0x0, start, count); + } + else { + const unsigned rollback = first - incr; + unsigned flags = DRAW_SPLIT_AFTER, seg_start = 0, seg_max; + + /* + * Both count and seg_max below are explicitly trimmed. Because + * + * seg_start = N * (seg_max - rollback) = N' * incr, + * + * we have + * + * remaining = count - seg_start = first + N'' * incr. + * + * That is, remaining is implicitly trimmed. + */ + switch (prim) { + case PIPE_PRIM_POINTS: + case PIPE_PRIM_LINES: + case PIPE_PRIM_LINE_STRIP: + case PIPE_PRIM_TRIANGLES: + case PIPE_PRIM_TRIANGLE_STRIP: + case PIPE_PRIM_QUADS: + case PIPE_PRIM_QUAD_STRIP: + case PIPE_PRIM_LINES_ADJACENCY: + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + case PIPE_PRIM_TRIANGLES_ADJACENCY: + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + seg_max = + draw_pt_trim_count(MIN2(max_count_simple, count), first, incr); + if (prim == PIPE_PRIM_TRIANGLE_STRIP || + prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY) { + /* make sure we flush even number of triangles at a time */ + if (seg_max < count && !(((seg_max - first) / incr) & 1)) + seg_max -= incr; + } + + do { + const unsigned remaining = count - seg_start; + + if (remaining > seg_max) { + SEGMENT_SIMPLE(flags, start + seg_start, seg_max); + seg_start += seg_max - rollback; + + flags |= DRAW_SPLIT_BEFORE; + } + else { + flags &= ~DRAW_SPLIT_AFTER; + + SEGMENT_SIMPLE(flags, start + seg_start, remaining); + seg_start += remaining; + } + } while (seg_start < count); + break; + + case PIPE_PRIM_LINE_LOOP: + seg_max = + draw_pt_trim_count(MIN2(max_count_loop, count), first, incr); + + do { + const unsigned remaining = count - seg_start; + + if (remaining > seg_max) { + SEGMENT_LOOP(flags, start + seg_start, seg_max, start); + seg_start += seg_max - rollback; + + flags |= DRAW_SPLIT_BEFORE; + } + else { + flags &= ~DRAW_SPLIT_AFTER; + + SEGMENT_LOOP(flags, start + seg_start, remaining, start); + seg_start += remaining; + } + } while (seg_start < count); + break; + + case PIPE_PRIM_TRIANGLE_FAN: + case PIPE_PRIM_POLYGON: + seg_max = + draw_pt_trim_count(MIN2(max_count_fan, count), first, incr); + + do { + const unsigned remaining = count - seg_start; + + if (remaining > seg_max) { + SEGMENT_FAN(flags, start + seg_start, seg_max, start); + seg_start += seg_max - rollback; + + flags |= DRAW_SPLIT_BEFORE; + } + else { + flags &= ~DRAW_SPLIT_AFTER; + + SEGMENT_FAN(flags, start + seg_start, remaining, start); + seg_start += remaining; + } + } while (seg_start < count); + break; + + default: + assert(0); + break; + } + } +} + +#undef FUNC +#undef FUNC_VARS +#undef LOCAL_VARS + +#undef PRIMITIVE +#undef SEGMENT_SIMPLE +#undef SEGMENT_LOOP +#undef SEGMENT_FAN diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index d13ad24fff..fa9992db78 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -28,6 +28,7 @@ #include "util/u_math.h" #include "util/u_memory.h" #include "pipe/p_shader_tokens.h" +#include "pipe/p_screen.h" #include "draw_private.h" #include "draw_context.h" @@ -109,6 +110,11 @@ draw_create_vs_llvm(struct draw_context *draw, tgsi_scan_shader(state->tokens, &vs->base.info); + vs->variant_key_size = + draw_llvm_variant_key_size( + vs->base.info.file_max[TGSI_FILE_INPUT]+1, + vs->base.info.file_max[TGSI_FILE_SAMPLER]+1); + vs->base.draw = draw; vs->base.prepare = vs_llvm_prepare; vs->base.run_linear = vs_llvm_run_linear; |