diff options
Diffstat (limited to 'src/gallium/auxiliary/draw')
48 files changed, 2381 insertions, 5735 deletions
diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 5ab3cfe5ce..bc6acfe458 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -4,40 +4,36 @@ include $(TOP)/configs/current LIBNAME = draw C_SOURCES = \ - draw_aaline.c \ - draw_aapoint.c \ - draw_clip.c \ - draw_vs_exec.c \ - draw_vs_sse.c \ - draw_vs_llvm.c \ - draw_context.c\ - draw_cull.c \ - draw_debug.c \ - draw_flatshade.c \ - draw_offset.c \ + draw_context.c \ + draw_pipe.c \ + draw_pipe_aaline.c \ + draw_pipe_aapoint.c \ + draw_pipe_clip.c \ + draw_pipe_cull.c \ + draw_pipe_flatshade.c \ + draw_pipe_offset.c \ + draw_pipe_pstipple.c \ + draw_pipe_stipple.c \ + draw_pipe_twoside.c \ + draw_pipe_unfilled.c \ + draw_pipe_util.c \ + draw_pipe_validate.c \ + draw_pipe_vbuf.c \ + draw_pipe_wide_line.c \ + draw_pipe_wide_point.c \ draw_pt.c \ - draw_pt_vcache.c \ + draw_pt_elts.c \ + draw_pt_emit.c \ + draw_pt_fetch.c \ draw_pt_fetch_emit.c \ - draw_pt_fetch_pipeline.c \ draw_pt_fetch_shade_pipeline.c \ - draw_pt_pipeline.c \ - draw_pt_elts.c \ - draw_prim.c \ - draw_pstipple.c \ - draw_stipple.c \ - draw_twoside.c \ - draw_unfilled.c \ - draw_validate.c \ - draw_vbuf.c \ + draw_pt_post_vs.c \ + draw_pt_vcache.c \ draw_vertex.c \ - draw_vertex_cache.c \ - draw_vertex_fetch.c \ - draw_vertex_shader.c \ - draw_vf.c \ - draw_vf_generic.c \ - draw_vf_sse.c \ - draw_wide_line.c \ - draw_wide_point.c + draw_vs.c \ + draw_vs_exec.c \ + draw_vs_llvm.c \ + draw_vs_sse.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index a7fb5dbd61..0b9852f633 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -3,40 +3,36 @@ Import('*') draw = env.ConvenienceLibrary( target = 'draw', source = [ - 'draw_aaline.c', - 'draw_aapoint.c', - 'draw_clip.c', - 'draw_vs_exec.c', - 'draw_vs_sse.c', - 'draw_vs_llvm.c', 'draw_context.c', - 'draw_cull.c', - 'draw_debug.c', - 'draw_flatshade.c', - 'draw_offset.c', + 'draw_pipe.c', + 'draw_pipe_aaline.c', + 'draw_pipe_aapoint.c', + 'draw_pipe_clip.c', + 'draw_pipe_cull.c', + 'draw_pipe_flatshade.c', + 'draw_pipe_offset.c', + 'draw_pipe_pstipple.c', + 'draw_pipe_stipple.c', + 'draw_pipe_twoside.c', + 'draw_pipe_unfilled.c', + 'draw_pipe_util.c', + 'draw_pipe_validate.c', + 'draw_pipe_vbuf.c', + 'draw_pipe_wide_line.c', + 'draw_pipe_wide_point.c', 'draw_pt.c', - 'draw_pt_vcache.c', + 'draw_pt_elts.c', + 'draw_pt_emit.c', + 'draw_pt_fetch.c', 'draw_pt_fetch_emit.c', - 'draw_pt_fetch_pipeline.c', 'draw_pt_fetch_shade_pipeline.c', - 'draw_pt_pipeline.c', - 'draw_pt_elts.c', - 'draw_prim.c', - 'draw_pstipple.c', - 'draw_stipple.c', - 'draw_twoside.c', - 'draw_unfilled.c', - 'draw_validate.c', - 'draw_vbuf.c', + 'draw_pt_post_vs.c', + 'draw_pt_vcache.c', 'draw_vertex.c', - 'draw_vertex_cache.c', - 'draw_vertex_fetch.c', - 'draw_vertex_shader.c', - 'draw_vf.c', - 'draw_vf_generic.c', - 'draw_vf_sse.c', - 'draw_wide_point.c', - 'draw_wide_line.c' + 'draw_vs.c', + 'draw_vs_exec.c', + 'draw_vs_llvm.c', + 'draw_vs_sse.c', ]) auxiliaries.insert(0, draw) diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 0c314f6e1d..f90187816b 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -33,8 +33,10 @@ #include "pipe/p_util.h" #include "draw_context.h" -#include "draw_private.h" #include "draw_vbuf.h" +#include "draw_vs.h" +#include "draw_pt.h" +#include "draw_pipe.h" struct draw_context *draw_create( void ) @@ -43,40 +45,6 @@ struct draw_context *draw_create( void ) if (draw == NULL) goto fail; -#if defined(__i386__) || defined(__386__) - draw->use_sse = GETENV( "GALLIUM_NOSSE" ) == NULL; -#else - draw->use_sse = FALSE; -#endif - - draw->use_pt_shaders = GETENV( "GALLIUM_PT_SHADERS" ) != NULL; - - /* create pipeline stages */ - draw->pipeline.wide_line = draw_wide_line_stage( draw ); - draw->pipeline.wide_point = draw_wide_point_stage( draw ); - draw->pipeline.stipple = draw_stipple_stage( draw ); - draw->pipeline.unfilled = draw_unfilled_stage( draw ); - draw->pipeline.twoside = draw_twoside_stage( draw ); - draw->pipeline.offset = draw_offset_stage( draw ); - draw->pipeline.clip = draw_clip_stage( draw ); - draw->pipeline.flatshade = draw_flatshade_stage( draw ); - draw->pipeline.cull = draw_cull_stage( draw ); - draw->pipeline.validate = draw_validate_stage( draw ); - draw->pipeline.first = draw->pipeline.validate; - - if (!draw->pipeline.wide_line || - !draw->pipeline.wide_point || - !draw->pipeline.stipple || - !draw->pipeline.unfilled || - !draw->pipeline.twoside || - !draw->pipeline.offset || - !draw->pipeline.clip || - !draw->pipeline.flatshade || - !draw->pipeline.cull || - !draw->pipeline.validate) - goto fail; - - ASSIGN_4V( draw->plane[0], -1, 0, 0, 1 ); ASSIGN_4V( draw->plane[1], 1, 0, 0, 1 ); ASSIGN_4V( draw->plane[2], 0, -1, 0, 1 ); @@ -85,28 +53,18 @@ struct draw_context *draw_create( void ) ASSIGN_4V( draw->plane[5], 0, 0, -1, 1 ); /* mesa's a bit wonky */ draw->nr_planes = 6; - /* Statically allocate maximum sized vertices for the cache - could be cleverer... - */ - { - char *tmp = align_malloc(VS_QUEUE_LENGTH * MAX_VERTEX_ALLOCATION, 16); - if (!tmp) - goto fail; - - draw->vs.vertex_cache = tmp; - } - draw->shader_queue_flush = draw_vertex_shader_queue_flush; + draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */ - /* these defaults are oriented toward the needs of softpipe */ - draw->wide_point_threshold = 1000000.0; /* infinity */ - draw->wide_line_threshold = 1.0; - draw->line_stipple = TRUE; - draw->point_sprite = TRUE; + tgsi_exec_machine_init(&draw->machine); - draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */ + /* FIXME: give this machine thing a proper constructor: + */ + draw->machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); + draw->machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); - draw_vertex_cache_invalidate( draw ); - draw_set_mapped_element_buffer( draw, 0, NULL ); + if (!draw_pipeline_init( draw )) + goto fail; if (!draw_pt_init( draw )) goto fail; @@ -124,39 +82,14 @@ void draw_destroy( struct draw_context *draw ) if (!draw) return; - if (draw->pipeline.wide_line) - draw->pipeline.wide_line->destroy( draw->pipeline.wide_line ); - if (draw->pipeline.wide_point) - draw->pipeline.wide_point->destroy( draw->pipeline.wide_point ); - if (draw->pipeline.stipple) - draw->pipeline.stipple->destroy( draw->pipeline.stipple ); - if (draw->pipeline.unfilled) - draw->pipeline.unfilled->destroy( draw->pipeline.unfilled ); - if (draw->pipeline.twoside) - draw->pipeline.twoside->destroy( draw->pipeline.twoside ); - if (draw->pipeline.offset) - draw->pipeline.offset->destroy( draw->pipeline.offset ); - if (draw->pipeline.clip) - draw->pipeline.clip->destroy( draw->pipeline.clip ); - if (draw->pipeline.flatshade) - draw->pipeline.flatshade->destroy( draw->pipeline.flatshade ); - if (draw->pipeline.cull) - draw->pipeline.cull->destroy( draw->pipeline.cull ); - if (draw->pipeline.validate) - draw->pipeline.validate->destroy( draw->pipeline.validate ); - if (draw->pipeline.aaline) - draw->pipeline.aaline->destroy( draw->pipeline.aaline ); - if (draw->pipeline.aapoint) - draw->pipeline.aapoint->destroy( draw->pipeline.aapoint ); - if (draw->pipeline.pstipple) - draw->pipeline.pstipple->destroy( draw->pipeline.pstipple ); - if (draw->pipeline.rasterize) - draw->pipeline.rasterize->destroy( draw->pipeline.rasterize ); + + if (draw->machine.Inputs) + align_free(draw->machine.Inputs); + + if (draw->machine.Outputs) + align_free(draw->machine.Outputs); tgsi_exec_machine_free_data(&draw->machine); - - if (draw->vs.vertex_cache) - align_free( draw->vs.vertex_cache ); /* Frees all the vertices. */ /* Not so fast -- we're just borrowing this at the moment. * @@ -164,6 +97,7 @@ void draw_destroy( struct draw_context *draw ) draw->render->destroy( draw->render ); */ + draw_pipeline_destroy( draw ); draw_pt_destroy( draw ); FREE( draw ); @@ -188,6 +122,20 @@ void draw_set_rasterizer_state( struct draw_context *draw, draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); draw->rasterizer = raster; + draw->bypass_clipping = + ((draw->rasterizer && draw->rasterizer->bypass_clipping) || + draw->driver.bypass_clipping); +} + + +void draw_set_driver_clipping( struct draw_context *draw, + boolean bypass_clipping ) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + + draw->driver.bypass_clipping = bypass_clipping; + draw->bypass_clipping = (draw->rasterizer->bypass_clipping || + draw->driver.bypass_clipping); } @@ -246,9 +194,8 @@ draw_set_vertex_buffers(struct draw_context *draw, { assert(count <= PIPE_MAX_ATTRIBS); - draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); - - memcpy(draw->vertex_buffer, buffers, count * sizeof(buffers[0])); + memcpy(draw->pt.vertex_buffer, buffers, count * sizeof(buffers[0])); + draw->pt.nr_vertex_buffers = count; } @@ -259,9 +206,8 @@ draw_set_vertex_elements(struct draw_context *draw, { assert(count <= PIPE_MAX_ATTRIBS); - draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); - - memcpy(draw->vertex_element, elements, count * sizeof(elements[0])); + memcpy(draw->pt.vertex_element, elements, count * sizeof(elements[0])); + draw->pt.nr_vertex_elements = count; } @@ -272,8 +218,7 @@ void draw_set_mapped_vertex_buffer(struct draw_context *draw, unsigned attr, const void *buffer) { - draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); - draw->user.vbuffer[attr] = buffer; + draw->pt.user.vbuffer[attr] = buffer; } @@ -281,8 +226,7 @@ void draw_set_mapped_constant_buffer(struct draw_context *draw, const void *buffer) { - draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); - draw->user.constants = buffer; + draw->pt.user.constants = buffer; } @@ -294,7 +238,7 @@ void draw_wide_point_threshold(struct draw_context *draw, float threshold) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->wide_point_threshold = threshold; + draw->pipeline.wide_point_threshold = threshold; } @@ -306,7 +250,7 @@ void draw_wide_line_threshold(struct draw_context *draw, float threshold) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->wide_line_threshold = threshold; + draw->pipeline.wide_line_threshold = threshold; } @@ -317,7 +261,7 @@ void draw_enable_line_stipple(struct draw_context *draw, boolean enable) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->line_stipple = enable; + draw->pipeline.line_stipple = enable; } @@ -328,7 +272,7 @@ void draw_enable_point_sprites(struct draw_context *draw, boolean enable) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->point_sprite = enable; + draw->pipeline.point_sprite = enable; } @@ -383,79 +327,54 @@ draw_num_vs_outputs(struct draw_context *draw) } -/** - * Allocate space for temporary post-transform vertices, such as for clipping. - */ -void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ) -{ - assert(!stage->tmp); - - stage->nr_tmps = nr; - - if (nr) { - ubyte *store = (ubyte *) MALLOC( MAX_VERTEX_SIZE * nr ); - unsigned i; - - stage->tmp = (struct vertex_header **) MALLOC( sizeof(struct vertex_header *) * nr ); - - for (i = 0; i < nr; i++) - stage->tmp[i] = (struct vertex_header *)(store + i * MAX_VERTEX_SIZE); - } -} - -void draw_free_temp_verts( struct draw_stage *stage ) +void draw_set_render( struct draw_context *draw, + struct vbuf_render *render ) { - if (stage->tmp) { - FREE( stage->tmp[0] ); - FREE( stage->tmp ); - stage->tmp = NULL; - } + draw->render = render; } - -boolean draw_use_sse(struct draw_context *draw) +void draw_set_edgeflags( struct draw_context *draw, + const unsigned *edgeflag ) { - return (boolean) draw->use_sse; + draw->pt.user.edgeflag = edgeflag; } -void draw_reset_vertex_ids(struct draw_context *draw) -{ - struct draw_stage *stage = draw->pipeline.first; - - while (stage) { - unsigned i; - - for (i = 0; i < stage->nr_tmps; i++) - stage->tmp[i]->vertex_id = UNDEFINED_VERTEX_ID; - stage = stage->next; - } - draw_vertex_cache_reset_vertex_ids(draw); /* going away soon */ - draw_pt_reset_vertex_ids(draw); -} - - -void draw_set_render( struct draw_context *draw, - struct vbuf_render *render ) +/** + * Tell the drawing context about the index/element buffer to use + * (ala glDrawElements) + * If no element buffer is to be used (i.e. glDrawArrays) then this + * should be called with eltSize=0 and elements=NULL. + * + * \param draw the drawing context + * \param eltSize size of each element (1, 2 or 4 bytes) + * \param elements the element buffer ptr + */ +void +draw_set_mapped_element_buffer( struct draw_context *draw, + unsigned eltSize, void *elements ) { - draw->render = render; + draw->pt.user.elts = elements; + draw->pt.user.eltSize = eltSize; } -void draw_set_edgeflags( struct draw_context *draw, - const unsigned *edgeflag ) + + +/* Revamp me please: + */ +void draw_do_flush( struct draw_context *draw, unsigned flags ) { - draw->user.edgeflag = edgeflag; -} + if (!draw->flushing && !draw->vcache_flushing) + { + draw->flushing = TRUE; + draw_pipeline_flush( draw, flags ); -boolean draw_get_edgeflag( struct draw_context *draw, - unsigned idx ) -{ - if (draw->user.edgeflag) - return (draw->user.edgeflag[idx/32] & (1 << (idx%32))) != 0; - else - return 1; + draw->reduced_prim = ~0; /* is reduced_prim needed any more? */ + + draw->flushing = FALSE; + } } diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index c7ac32b452..c5c3d3b09e 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -42,37 +42,11 @@ struct pipe_context; -struct vertex_buffer; -struct vertex_info; struct draw_context; struct draw_stage; struct draw_vertex_shader; -/** - * Clipmask flags - */ -/*@{*/ -#define CLIP_RIGHT_BIT 0x01 -#define CLIP_LEFT_BIT 0x02 -#define CLIP_TOP_BIT 0x04 -#define CLIP_BOTTOM_BIT 0x08 -#define CLIP_NEAR_BIT 0x10 -#define CLIP_FAR_BIT 0x20 -/*@}*/ - -/** - * Bitshift for each clip flag - */ -/*@{*/ -#define CLIP_RIGHT_SHIFT 0 -#define CLIP_LEFT_SHIFT 1 -#define CLIP_TOP_SHIFT 2 -#define CLIP_BOTTOM_SHIFT 3 -#define CLIP_NEAR_SHIFT 4 -#define CLIP_FAR_SHIFT 5 -/*@}*/ - struct draw_context *draw_create( void ); @@ -99,15 +73,13 @@ void draw_enable_line_stipple(struct draw_context *draw, boolean enable); void draw_enable_point_sprites(struct draw_context *draw, boolean enable); -boolean draw_use_sse(struct draw_context *draw); - -void +boolean draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe); -void +boolean draw_install_aapoint_stage(struct draw_context *draw, struct pipe_context *pipe); -void +boolean draw_install_pstipple_stage(struct draw_context *draw, struct pipe_context *pipe); @@ -168,17 +140,24 @@ void draw_arrays(struct draw_context *draw, unsigned prim, void draw_flush(struct draw_context *draw); -/*********************************************************************** - * draw_debug.c + +/******************************************************************************* + * Driver backend interface */ -boolean draw_validate_prim( unsigned prim, unsigned length ); -unsigned draw_trim_prim( unsigned mode, unsigned count ); +struct vbuf_render; +void draw_set_render( struct draw_context *draw, + struct vbuf_render *render ); +void draw_set_driver_clipping( struct draw_context *draw, + boolean bypass_clipping ); +/******************************************************************************* + * Draw pipeline + */ +boolean draw_need_pipeline(const struct draw_context *draw, + const struct pipe_rasterizer_state *rasterizer, + unsigned prim ); -struct vbuf_render; -void draw_set_render( struct draw_context *draw, - struct vbuf_render *render ); #endif /* DRAW_CONTEXT_H */ diff --git a/src/gallium/auxiliary/draw/draw_debug.c b/src/gallium/auxiliary/draw/draw_debug.c deleted file mode 100644 index d6220b5f62..0000000000 --- a/src/gallium/auxiliary/draw/draw_debug.c +++ /dev/null @@ -1,113 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "draw_private.h" -#include "draw_context.h" - - - -static void -draw_prim_info(unsigned prim, unsigned *first, unsigned *incr) -{ - assert(prim >= PIPE_PRIM_POINTS); - assert(prim <= PIPE_PRIM_POLYGON); - - switch (prim) { - case PIPE_PRIM_POINTS: - *first = 1; - *incr = 1; - break; - case PIPE_PRIM_LINES: - *first = 2; - *incr = 2; - break; - case PIPE_PRIM_LINE_STRIP: - *first = 2; - *incr = 1; - break; - case PIPE_PRIM_LINE_LOOP: - *first = 2; - *incr = 1; - break; - case PIPE_PRIM_TRIANGLES: - *first = 3; - *incr = 3; - break; - case PIPE_PRIM_TRIANGLE_STRIP: - *first = 3; - *incr = 1; - break; - case PIPE_PRIM_TRIANGLE_FAN: - case PIPE_PRIM_POLYGON: - *first = 3; - *incr = 1; - break; - case PIPE_PRIM_QUADS: - *first = 4; - *incr = 4; - break; - case PIPE_PRIM_QUAD_STRIP: - *first = 4; - *incr = 2; - break; - default: - assert(0); - *first = 1; - *incr = 1; - break; - } -} - - -unsigned -draw_trim_prim( unsigned mode, unsigned count ) -{ - unsigned length, first, incr; - - draw_prim_info( mode, &first, &incr ); - - if (count < first) - length = 0; - else - length = count - (count - first) % incr; - - return length; -} - - -boolean -draw_validate_prim( unsigned mode, unsigned count ) -{ - return (count > 0 && - count == draw_trim_prim( mode, count )); -} - diff --git a/src/gallium/auxiliary/draw/draw_pt_pipeline.c b/src/gallium/auxiliary/draw/draw_pipe.c index e70e63d08f..d0890203a5 100644 --- a/src/gallium/auxiliary/draw/draw_pt_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -31,16 +31,86 @@ */ #include "pipe/p_util.h" -#include "draw/draw_context.h" #include "draw/draw_private.h" -#include "draw/draw_vertex.h" -#include "draw/draw_pt.h" +#include "draw/draw_pipe.h" + + + +boolean draw_pipeline_init( struct draw_context *draw ) +{ + /* create pipeline stages */ + draw->pipeline.wide_line = draw_wide_line_stage( draw ); + draw->pipeline.wide_point = draw_wide_point_stage( draw ); + draw->pipeline.stipple = draw_stipple_stage( draw ); + draw->pipeline.unfilled = draw_unfilled_stage( draw ); + draw->pipeline.twoside = draw_twoside_stage( draw ); + draw->pipeline.offset = draw_offset_stage( draw ); + draw->pipeline.clip = draw_clip_stage( draw ); + draw->pipeline.flatshade = draw_flatshade_stage( draw ); + draw->pipeline.cull = draw_cull_stage( draw ); + draw->pipeline.validate = draw_validate_stage( draw ); + draw->pipeline.first = draw->pipeline.validate; + + if (!draw->pipeline.wide_line || + !draw->pipeline.wide_point || + !draw->pipeline.stipple || + !draw->pipeline.unfilled || + !draw->pipeline.twoside || + !draw->pipeline.offset || + !draw->pipeline.clip || + !draw->pipeline.flatshade || + !draw->pipeline.cull || + !draw->pipeline.validate) + return FALSE; + + /* these defaults are oriented toward the needs of softpipe */ + draw->pipeline.wide_point_threshold = 1000000.0; /* infinity */ + draw->pipeline.wide_line_threshold = 1.0; + draw->pipeline.line_stipple = TRUE; + draw->pipeline.point_sprite = TRUE; + + return TRUE; +} + + +void draw_pipeline_destroy( struct draw_context *draw ) +{ + if (draw->pipeline.wide_line) + draw->pipeline.wide_line->destroy( draw->pipeline.wide_line ); + if (draw->pipeline.wide_point) + draw->pipeline.wide_point->destroy( draw->pipeline.wide_point ); + if (draw->pipeline.stipple) + draw->pipeline.stipple->destroy( draw->pipeline.stipple ); + if (draw->pipeline.unfilled) + draw->pipeline.unfilled->destroy( draw->pipeline.unfilled ); + if (draw->pipeline.twoside) + draw->pipeline.twoside->destroy( draw->pipeline.twoside ); + if (draw->pipeline.offset) + draw->pipeline.offset->destroy( draw->pipeline.offset ); + if (draw->pipeline.clip) + draw->pipeline.clip->destroy( draw->pipeline.clip ); + if (draw->pipeline.flatshade) + draw->pipeline.flatshade->destroy( draw->pipeline.flatshade ); + if (draw->pipeline.cull) + draw->pipeline.cull->destroy( draw->pipeline.cull ); + if (draw->pipeline.validate) + draw->pipeline.validate->destroy( draw->pipeline.validate ); + if (draw->pipeline.aaline) + draw->pipeline.aaline->destroy( draw->pipeline.aaline ); + if (draw->pipeline.aapoint) + draw->pipeline.aapoint->destroy( draw->pipeline.aapoint ); + if (draw->pipeline.pstipple) + draw->pipeline.pstipple->destroy( draw->pipeline.pstipple ); + if (draw->pipeline.rasterize) + draw->pipeline.rasterize->destroy( draw->pipeline.rasterize ); +} + + + + + -/** - * Add a point to the primitive queue. - * \param i0 index into user's vertex arrays - */ static void do_point( struct draw_context *draw, const char *v0 ) { @@ -55,11 +125,6 @@ static void do_point( struct draw_context *draw, } -/** - * Add a line to the primitive queue. - * \param i0 index into user's vertex arrays - * \param i1 index into user's vertex arrays - */ static void do_line( struct draw_context *draw, const char *v0, const char *v1 ) @@ -75,9 +140,7 @@ static void do_line( struct draw_context *draw, draw->pipeline.first->line( draw->pipeline.first, &prim ); } -/** - * Add a triangle to the primitive queue. - */ + static void do_triangle( struct draw_context *draw, char *v0, char *v1, @@ -94,28 +157,11 @@ static void do_triangle( struct draw_context *draw, (prim.v[2]->edgeflag << 2)); prim.pad = 0; - if (0) debug_printf("tri ef: %d %d %d\n", - prim.v[0]->edgeflag, - prim.v[1]->edgeflag, - prim.v[2]->edgeflag); - draw->pipeline.first->tri( draw->pipeline.first, &prim ); } -void draw_pt_reset_vertex_ids( struct draw_context *draw ) -{ - unsigned i; - char *verts = draw->pt.pipeline.verts; - unsigned stride = draw->pt.pipeline.vertex_stride; - - for (i = 0; i < draw->pt.pipeline.vertex_count; i++) { - ((struct vertex_header *)verts)->vertex_id = UNDEFINED_VERTEX_ID; - verts += stride; - } -} - /* Code to run the pipeline on a fairly arbitary collection of vertices. * @@ -127,19 +173,20 @@ void draw_pt_reset_vertex_ids( struct draw_context *draw ) * This code provides a callback to reset the vertex id's which the * draw_vbuf.c code uses when it has to perform a flush. */ -void draw_pt_run_pipeline( struct draw_context *draw, - unsigned prim, - char *verts, - unsigned stride, - unsigned vertex_count, - const ushort *elts, - unsigned count ) +void draw_pipeline_run( struct draw_context *draw, + unsigned prim, + struct vertex_header *vertices, + unsigned vertex_count, + unsigned stride, + const ushort *elts, + unsigned count ) { + char *verts = (char *)vertices; unsigned i; - draw->pt.pipeline.verts = verts; - draw->pt.pipeline.vertex_stride = stride; - draw->pt.pipeline.vertex_count = vertex_count; + draw->pipeline.verts = verts; + draw->pipeline.vertex_stride = stride; + draw->pipeline.vertex_count = vertex_count; switch (prim) { case PIPE_PRIM_POINTS: @@ -162,7 +209,15 @@ void draw_pt_run_pipeline( struct draw_context *draw, break; } - draw->pt.pipeline.verts = NULL; - draw->pt.pipeline.vertex_count = 0; + draw->pipeline.verts = NULL; + draw->pipeline.vertex_count = 0; } + + +void draw_pipeline_flush( struct draw_context *draw, + unsigned flags ) +{ + draw->pipeline.first->flush( draw->pipeline.first, flags ); + draw->pipeline.first = draw->pipeline.validate; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe.h b/src/gallium/auxiliary/draw/draw_pipe.h new file mode 100644 index 0000000000..2476abb2b2 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe.h @@ -0,0 +1,114 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef DRAW_PIPE_H +#define DRAW_PIPE_H + +#include "pipe/p_compiler.h" +#include "draw_private.h" /* for sizeof(vertex_header) */ + + + +/** + * Base class for all primitive drawing stages. + */ +struct draw_stage +{ + struct draw_context *draw; /**< parent context */ + + struct draw_stage *next; /**< next stage in pipeline */ + + struct vertex_header **tmp; /**< temp vert storage, such as for clipping */ + unsigned nr_tmps; + + void (*point)( struct draw_stage *, + struct prim_header * ); + + void (*line)( struct draw_stage *, + struct prim_header * ); + + void (*tri)( struct draw_stage *, + struct prim_header * ); + + void (*flush)( struct draw_stage *, + unsigned flags ); + + void (*reset_stipple_counter)( struct draw_stage * ); + + void (*destroy)( struct draw_stage * ); +}; + + +extern struct draw_stage *draw_unfilled_stage( struct draw_context *context ); +extern struct draw_stage *draw_twoside_stage( struct draw_context *context ); +extern struct draw_stage *draw_offset_stage( struct draw_context *context ); +extern struct draw_stage *draw_clip_stage( struct draw_context *context ); +extern struct draw_stage *draw_flatshade_stage( struct draw_context *context ); +extern struct draw_stage *draw_cull_stage( struct draw_context *context ); +extern struct draw_stage *draw_stipple_stage( struct draw_context *context ); +extern struct draw_stage *draw_wide_line_stage( struct draw_context *context ); +extern struct draw_stage *draw_wide_point_stage( struct draw_context *context ); +extern struct draw_stage *draw_validate_stage( struct draw_context *context ); + + +extern void draw_free_temp_verts( struct draw_stage *stage ); +extern boolean draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ); + +extern void draw_reset_vertex_ids( struct draw_context *draw ); + +void draw_pipe_passthrough_tri(struct draw_stage *stage, struct prim_header *header); +void draw_pipe_passthrough_line(struct draw_stage *stage, struct prim_header *header); +void draw_pipe_passthrough_point(struct draw_stage *stage, struct prim_header *header); + + + +/** + * Get a writeable copy of a vertex. + * \param stage drawing stage info + * \param vert the vertex to copy (source) + * \param idx index into stage's tmp[] array to put the copy (dest) + * \return pointer to the copied vertex + */ +static INLINE struct vertex_header * +dup_vert( struct draw_stage *stage, + const struct vertex_header *vert, + unsigned idx ) +{ + struct vertex_header *tmp = stage->tmp[idx]; + const uint vsize = sizeof(struct vertex_header) + + stage->draw->num_vs_outputs * 4 * sizeof(float); + memcpy(tmp, vert, vsize); + tmp->vertex_id = UNDEFINED_VERTEX_ID; + return tmp; +} + +#endif diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index e8d2a45102..7e5f8bd281 100644 --- a/src/gallium/auxiliary/draw/draw_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -43,6 +43,7 @@ #include "draw_context.h" #include "draw_private.h" +#include "draw_pipe.h" /** @@ -333,11 +334,10 @@ aa_transform_inst(struct tgsi_transform_context *ctx, * Generate the frag shader we'll use for drawing AA lines. * This will be the user's shader plus some texture/modulate instructions. */ -static void +static boolean generate_aaline_fs(struct aaline_stage *aaline) { const struct pipe_shader_state *orig_fs = &aaline->fs->state; - //struct draw_context *draw = aaline->stage.draw; struct pipe_shader_state aaline_fs; struct aa_transform_context transform; @@ -345,6 +345,8 @@ generate_aaline_fs(struct aaline_stage *aaline) aaline_fs = *orig_fs; /* copy to init */ aaline_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + if (aaline_fs.tokens == NULL) + return FALSE; memset(&transform, 0, sizeof(transform)); transform.colorOutput = -1; @@ -369,15 +371,18 @@ generate_aaline_fs(struct aaline_stage *aaline) aaline->fs->aaline_fs = aaline->driver_create_fs_state(aaline->pipe, &aaline_fs); + if (aaline->fs->aaline_fs == NULL) + return FALSE; aaline->fs->generic_attrib = transform.maxGeneric + 1; + return TRUE; } /** * Create the texture map we'll use for antialiasing the lines. */ -static void +static boolean aaline_create_texture(struct aaline_stage *aaline) { struct pipe_context *pipe = aaline->pipe; @@ -395,6 +400,8 @@ aaline_create_texture(struct aaline_stage *aaline) texTemp.cpp = 1; aaline->texture = screen->texture_create(screen, &texTemp); + if (!aaline->texture) + return FALSE; /* Fill in mipmap images. * Basically each level is solid opaque, except for the outermost @@ -410,6 +417,8 @@ aaline_create_texture(struct aaline_stage *aaline) surface = screen->get_tex_surface(screen, aaline->texture, 0, level, 0); data = pipe_surface_map(surface); + if (data == NULL) + return FALSE; for (i = 0; i < size; i++) { for (j = 0; j < size; j++) { @@ -435,6 +444,7 @@ aaline_create_texture(struct aaline_stage *aaline) pipe_surface_reference(&surface, NULL); pipe->texture_update(pipe, aaline->texture, 0, (1 << level)); } + return TRUE; } @@ -443,7 +453,7 @@ aaline_create_texture(struct aaline_stage *aaline) * By using a mipmapped texture, we don't have to generate a different * texture image for each line size. */ -static void +static boolean aaline_create_sampler(struct aaline_stage *aaline) { struct pipe_sampler_state sampler; @@ -461,6 +471,10 @@ aaline_create_sampler(struct aaline_stage *aaline) sampler.max_lod = MAX_TEXTURE_LEVEL; aaline->sampler_cso = pipe->create_sampler_state(pipe, &sampler); + if (aaline->sampler_cso == NULL) + return FALSE; + + return TRUE; } @@ -468,13 +482,15 @@ aaline_create_sampler(struct aaline_stage *aaline) * When we're about to draw our first AA line in a batch, this function is * called to tell the driver to bind our modified fragment shader. */ -static void +static boolean bind_aaline_fragment_shader(struct aaline_stage *aaline) { - if (!aaline->fs->aaline_fs) { - generate_aaline_fs(aaline); - } + if (!aaline->fs->aaline_fs && + !generate_aaline_fs(aaline)) + return FALSE; + aaline->driver_bind_fs_state(aaline->pipe, aaline->fs->aaline_fs); + return TRUE; } @@ -486,20 +502,6 @@ aaline_stage( struct draw_stage *stage ) } -static void -passthrough_point(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->point(stage->next, header); -} - - -static void -passthrough_tri(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->tri(stage->next, header); -} - - /** * Draw a wide line by drawing a quad, using geometry which will * fullfill GL's antialiased line requirements. @@ -637,7 +639,11 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) /* * Bind (generate) our fragprog, sampler and texture */ - bind_aaline_fragment_shader(aaline); + if (!bind_aaline_fragment_shader(aaline)) { + stage->line = draw_pipe_passthrough_line; + stage->line(stage, header); + return; + } /* update vertex attrib info */ aaline->tex_slot = draw->num_vs_outputs; @@ -701,9 +707,11 @@ aaline_destroy(struct draw_stage *stage) { struct aaline_stage *aaline = aaline_stage(stage); - aaline->pipe->delete_sampler_state(aaline->pipe, aaline->sampler_cso); + if (aaline->sampler_cso) + aaline->pipe->delete_sampler_state(aaline->pipe, aaline->sampler_cso); - pipe_texture_release(&aaline->texture); + if (aaline->texture) + pipe_texture_release(&aaline->texture); draw_free_temp_verts( stage ); @@ -715,19 +723,28 @@ static struct aaline_stage * draw_aaline_stage(struct draw_context *draw) { struct aaline_stage *aaline = CALLOC_STRUCT(aaline_stage); + if (aaline == NULL) + return NULL; - draw_alloc_temp_verts( &aaline->stage, 8 ); + if (!draw_alloc_temp_verts( &aaline->stage, 8 )) + goto fail; aaline->stage.draw = draw; aaline->stage.next = NULL; - aaline->stage.point = passthrough_point; + aaline->stage.point = draw_pipe_passthrough_point; aaline->stage.line = aaline_first_line; - aaline->stage.tri = passthrough_tri; + aaline->stage.tri = draw_pipe_passthrough_tri; aaline->stage.flush = aaline_flush; aaline->stage.reset_stipple_counter = aaline_reset_stipple_counter; aaline->stage.destroy = aaline_destroy; return aaline; + + fail: + if (aaline) + aaline_destroy(&aaline->stage); + + return NULL; } @@ -749,13 +766,13 @@ aaline_create_fs_state(struct pipe_context *pipe, { struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); struct aaline_fragment_shader *aafs = CALLOC_STRUCT(aaline_fragment_shader); + if (aafs == NULL) + return NULL; - if (aafs) { - aafs->state = *fs; + aafs->state = *fs; - /* pass-through */ - aafs->driver_fs = aaline->driver_create_fs_state(aaline->pipe, fs); - } + /* pass-through */ + aafs->driver_fs = aaline->driver_create_fs_state(aaline->pipe, fs); return aafs; } @@ -821,7 +838,7 @@ aaline_set_sampler_textures(struct pipe_context *pipe, * into the draw module's pipeline. This will not be used if the * hardware has native support for AA lines. */ -void +boolean draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) { struct aaline_stage *aaline; @@ -832,14 +849,17 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) * Create / install AA line drawing / prim stage */ aaline = draw_aaline_stage( draw ); - assert(aaline); - draw->pipeline.aaline = &aaline->stage; + if (!aaline) + goto fail; aaline->pipe = pipe; /* create special texture, sampler state */ - aaline_create_texture(aaline); - aaline_create_sampler(aaline); + if (!aaline_create_texture(aaline)) + goto fail; + + if (!aaline_create_sampler(aaline)) + goto fail; /* save original driver functions */ aaline->driver_create_fs_state = pipe->create_fs_state; @@ -856,4 +876,16 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) pipe->bind_sampler_states = aaline_bind_sampler_states; pipe->set_sampler_textures = aaline_set_sampler_textures; + + /* Install once everything is known to be OK: + */ + draw->pipeline.aaline = &aaline->stage; + + return TRUE; + + fail: + if (aaline) + aaline->stage.destroy( &aaline->stage ); + + return FALSE; } diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index fcebe3e7a0..ac0aa4cd7c 100644 --- a/src/gallium/auxiliary/draw/draw_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -48,7 +48,8 @@ #include "tgsi/util/tgsi_dump.h" #include "draw_context.h" -#include "draw_private.h" +#include "draw_vs.h" +#include "draw_pipe.h" /* @@ -483,7 +484,7 @@ aa_transform_inst(struct tgsi_transform_context *ctx, * Generate the frag shader we'll use for drawing AA lines. * This will be the user's shader plus some texture/modulate instructions. */ -static void +static boolean generate_aapoint_fs(struct aapoint_stage *aapoint) { const struct pipe_shader_state *orig_fs = &aapoint->fs->state; @@ -494,6 +495,8 @@ generate_aapoint_fs(struct aapoint_stage *aapoint) aapoint_fs = *orig_fs; /* copy to init */ aapoint_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + if (aapoint_fs.tokens == NULL) + return FALSE; memset(&transform, 0, sizeof(transform)); transform.colorOutput = -1; @@ -518,8 +521,12 @@ generate_aapoint_fs(struct aapoint_stage *aapoint) aapoint->fs->aapoint_fs = aapoint->driver_create_fs_state(aapoint->pipe, &aapoint_fs); + if (aapoint->fs->aapoint_fs == NULL) + return FALSE; aapoint->fs->generic_attrib = transform.maxGeneric + 1; + + return TRUE; } @@ -527,13 +534,15 @@ generate_aapoint_fs(struct aapoint_stage *aapoint) * When we're about to draw our first AA line in a batch, this function is * called to tell the driver to bind our modified fragment shader. */ -static void +static boolean bind_aapoint_fragment_shader(struct aapoint_stage *aapoint) { - if (!aapoint->fs->aapoint_fs) { - generate_aapoint_fs(aapoint); - } + if (!aapoint->fs->aapoint_fs && + !generate_aapoint_fs(aapoint)) + return FALSE; + aapoint->driver_bind_fs_state(aapoint->pipe, aapoint->fs->aapoint_fs); + return TRUE; } @@ -545,18 +554,6 @@ aapoint_stage( struct draw_stage *stage ) } -static void -passthrough_line(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->line(stage->next, header); -} - - -static void -passthrough_tri(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->tri(stage->next, header); -} /** @@ -742,19 +739,29 @@ static struct aapoint_stage * draw_aapoint_stage(struct draw_context *draw) { struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage); + if (aapoint == NULL) + goto fail; - draw_alloc_temp_verts( &aapoint->stage, 4 ); + if (!draw_alloc_temp_verts( &aapoint->stage, 4 )) + goto fail; aapoint->stage.draw = draw; aapoint->stage.next = NULL; aapoint->stage.point = aapoint_first_point; - aapoint->stage.line = passthrough_line; - aapoint->stage.tri = passthrough_tri; + aapoint->stage.line = draw_pipe_passthrough_line; + aapoint->stage.tri = draw_pipe_passthrough_tri; aapoint->stage.flush = aapoint_flush; aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter; aapoint->stage.destroy = aapoint_destroy; return aapoint; + + fail: + if (aapoint) + aapoint_destroy(&aapoint->stage); + + return NULL; + } @@ -776,13 +783,13 @@ aapoint_create_fs_state(struct pipe_context *pipe, { struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader); + if (aafs == NULL) + return NULL; - if (aafs) { - aafs->state = *fs; + aafs->state = *fs; - /* pass-through */ - aafs->driver_fs = aapoint->driver_create_fs_state(aapoint->pipe, fs); - } + /* pass-through */ + aafs->driver_fs = aapoint->driver_create_fs_state(aapoint->pipe, fs); return aafs; } @@ -817,7 +824,7 @@ aapoint_delete_fs_state(struct pipe_context *pipe, void *fs) * into the draw module's pipeline. This will not be used if the * hardware has native support for AA points. */ -void +boolean draw_install_aapoint_stage(struct draw_context *draw, struct pipe_context *pipe) { @@ -829,8 +836,8 @@ draw_install_aapoint_stage(struct draw_context *draw, * Create / install AA point drawing / prim stage */ aapoint = draw_aapoint_stage( draw ); - assert(aapoint); - draw->pipeline.aapoint = &aapoint->stage; + if (aapoint == NULL) + goto fail; aapoint->pipe = pipe; @@ -843,4 +850,14 @@ draw_install_aapoint_stage(struct draw_context *draw, pipe->create_fs_state = aapoint_create_fs_state; pipe->bind_fs_state = aapoint_bind_fs_state; pipe->delete_fs_state = aapoint_delete_fs_state; + + draw->pipeline.aapoint = &aapoint->stage; + + return TRUE; + + fail: + if (aapoint) + aapoint->stage.destroy( &aapoint->stage ); + + return FALSE; } diff --git a/src/gallium/auxiliary/draw/draw_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index e24c5d8032..21216addea 100644 --- a/src/gallium/auxiliary/draw/draw_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -35,8 +35,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "draw_context.h" -#include "draw_private.h" +#include "draw_vs.h" +#include "draw_pipe.h" #ifndef IS_NEGATIVE @@ -204,7 +204,14 @@ static void emit_poly( struct draw_stage *stage, } } - +static INLINE float +dot4(const float *a, const float *b) +{ + return (a[0]*b[0] + + a[1]*b[1] + + a[2]*b[2] + + a[3]*b[3]); +} /* Clip a triangle against the viewport and user clip planes. @@ -486,8 +493,11 @@ static void clip_destroy( struct draw_stage *stage ) struct draw_stage *draw_clip_stage( struct draw_context *draw ) { struct clipper *clipper = CALLOC_STRUCT(clipper); + if (clipper == NULL) + goto fail; - draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 ); + if (!draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 )) + goto fail; clipper->stage.draw = draw; clipper->stage.point = clip_point; @@ -500,4 +510,10 @@ struct draw_stage *draw_clip_stage( struct draw_context *draw ) clipper->plane = draw->plane; return &clipper->stage; + + fail: + if (clipper) + clipper->stage.destroy( &clipper->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c index 8177b0ac86..87aaf1f85b 100644 --- a/src/gallium/auxiliary/draw/draw_cull.c +++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c @@ -35,7 +35,7 @@ #include "pipe/p_util.h" #include "pipe/p_defines.h" -#include "draw_private.h" +#include "draw_pipe.h" struct cull_stage { @@ -95,20 +95,6 @@ static void cull_first_tri( struct draw_stage *stage, -static void cull_line( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->line( stage->next, header ); -} - - -static void cull_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} - - static void cull_flush( struct draw_stage *stage, unsigned flags ) { stage->tri = cull_first_tri; @@ -134,17 +120,26 @@ static void cull_destroy( struct draw_stage *stage ) struct draw_stage *draw_cull_stage( struct draw_context *draw ) { struct cull_stage *cull = CALLOC_STRUCT(cull_stage); + if (cull == NULL) + goto fail; - draw_alloc_temp_verts( &cull->stage, 0 ); + if (!draw_alloc_temp_verts( &cull->stage, 0 )) + goto fail; cull->stage.draw = draw; cull->stage.next = NULL; - cull->stage.point = cull_point; - cull->stage.line = cull_line; + cull->stage.point = draw_pipe_passthrough_point; + cull->stage.line = draw_pipe_passthrough_line; cull->stage.tri = cull_first_tri; cull->stage.flush = cull_flush; cull->stage.reset_stipple_counter = cull_reset_stipple_counter; cull->stage.destroy = cull_destroy; return &cull->stage; + + fail: + if (cull) + cull->stage.destroy( &cull->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c index af2cb05c98..205000cbea 100644 --- a/src/gallium/auxiliary/draw/draw_flatshade.c +++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c @@ -30,7 +30,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "draw_private.h" +#include "draw_vs.h" +#include "draw_pipe.h" /** subclass of draw_stage */ @@ -151,13 +152,6 @@ static void flatshade_line_1( struct draw_stage *stage, } -/* Flatshade point -- passthrough. - */ -static void flatshade_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} static void flatshade_init_state( struct draw_stage *stage ) @@ -230,12 +224,15 @@ static void flatshade_destroy( struct draw_stage *stage ) struct draw_stage *draw_flatshade_stage( struct draw_context *draw ) { struct flat_stage *flatshade = CALLOC_STRUCT(flat_stage); + if (flatshade == NULL) + goto fail; - draw_alloc_temp_verts( &flatshade->stage, 2 ); + if (!draw_alloc_temp_verts( &flatshade->stage, 2 )) + goto fail; flatshade->stage.draw = draw; flatshade->stage.next = NULL; - flatshade->stage.point = flatshade_point; + flatshade->stage.point = draw_pipe_passthrough_point; flatshade->stage.line = flatshade_first_line; flatshade->stage.tri = flatshade_first_tri; flatshade->stage.flush = flatshade_flush; @@ -243,6 +240,12 @@ struct draw_stage *draw_flatshade_stage( struct draw_context *draw ) flatshade->stage.destroy = flatshade_destroy; return &flatshade->stage; + + fail: + if (flatshade) + flatshade->stage.destroy( &flatshade->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c index dbc676deae..ffec85ccdd 100644 --- a/src/gallium/auxiliary/draw/draw_offset.c +++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c @@ -33,7 +33,7 @@ */ #include "pipe/p_util.h" -#include "draw_private.h" +#include "draw_pipe.h" @@ -129,18 +129,6 @@ static void offset_first_tri( struct draw_stage *stage, } -static void offset_line( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->line( stage->next, header ); -} - - -static void offset_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} static void offset_flush( struct draw_stage *stage, @@ -170,17 +158,25 @@ static void offset_destroy( struct draw_stage *stage ) struct draw_stage *draw_offset_stage( struct draw_context *draw ) { struct offset_stage *offset = CALLOC_STRUCT(offset_stage); + if (offset == NULL) + goto fail; draw_alloc_temp_verts( &offset->stage, 3 ); offset->stage.draw = draw; offset->stage.next = NULL; - offset->stage.point = offset_point; - offset->stage.line = offset_line; + offset->stage.point = draw_pipe_passthrough_point; + offset->stage.line = draw_pipe_passthrough_line; offset->stage.tri = offset_first_tri; offset->stage.flush = offset_flush; offset->stage.reset_stipple_counter = offset_reset_stipple_counter; offset->stage.destroy = offset_destroy; return &offset->stage; + + fail: + if (offset) + offset->stage.destroy( &offset->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 4dddb72906..aec485a6e7 100644 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -44,7 +44,7 @@ #include "tgsi/util/tgsi_dump.h" #include "draw_context.h" -#include "draw_private.h" +#include "draw_pipe.h" @@ -320,7 +320,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, * Generate the frag shader we'll use for doing polygon stipple. * This will be the user's shader prefixed with a TEX and KIL instruction. */ -static void +static boolean generate_pstip_fs(struct pstip_stage *pstip) { const struct pipe_shader_state *orig_fs = &pstip->fs->state; @@ -332,6 +332,8 @@ generate_pstip_fs(struct pstip_stage *pstip) pstip_fs = *orig_fs; /* copy to init */ pstip_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + if (pstip_fs.tokens == NULL) + return FALSE; memset(&transform, 0, sizeof(transform)); transform.wincoordInput = -1; @@ -355,6 +357,8 @@ generate_pstip_fs(struct pstip_stage *pstip) assert(pstip->fs->sampler_unit < PIPE_MAX_SAMPLERS); pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs); + + return TRUE; } @@ -404,7 +408,7 @@ pstip_update_texture(struct pstip_stage *pstip) /** * Create the texture map we'll use for stippling. */ -static void +static boolean pstip_create_texture(struct pstip_stage *pstip) { struct pipe_context *pipe = pstip->pipe; @@ -421,7 +425,10 @@ pstip_create_texture(struct pstip_stage *pstip) texTemp.cpp = 1; pstip->texture = screen->texture_create(screen, &texTemp); - assert(pstip->texture->refcount == 1); + if (pstip->texture == NULL) + return FALSE; + + return TRUE; } @@ -430,7 +437,7 @@ pstip_create_texture(struct pstip_stage *pstip) * By using a mipmapped texture, we don't have to generate a different * texture image for each line size. */ -static void +static boolean pstip_create_sampler(struct pstip_stage *pstip) { struct pipe_sampler_state sampler; @@ -448,6 +455,10 @@ pstip_create_sampler(struct pstip_stage *pstip) sampler.max_lod = 0.0f; pstip->sampler_cso = pipe->create_sampler_state(pipe, &sampler); + if (pstip->sampler_cso == NULL) + return FALSE; + + return TRUE; } @@ -455,13 +466,15 @@ pstip_create_sampler(struct pstip_stage *pstip) * When we're about to draw our first AA line in a batch, this function is * called to tell the driver to bind our modified fragment shader. */ -static void +static boolean bind_pstip_fragment_shader(struct pstip_stage *pstip) { - if (!pstip->fs->pstip_fs) { - generate_pstip_fs(pstip); - } + if (!pstip->fs->pstip_fs && + !generate_pstip_fs(pstip)) + return FALSE; + pstip->driver_bind_fs_state(pstip->pipe, pstip->fs->pstip_fs); + return TRUE; } @@ -473,25 +486,6 @@ pstip_stage( struct draw_stage *stage ) } -static void -passthrough_point(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->point(stage->next, header); -} - - -static void -passthrough_line(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->line(stage->next, header); -} - - -static void -passthrough_tri(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->tri(stage->next, header); -} @@ -505,7 +499,12 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header) assert(stage->draw->rasterizer->poly_stipple_enable); /* bind our fragprog */ - bind_pstip_fragment_shader(pstip); + if (!bind_pstip_fragment_shader(pstip)) { + stage->tri = draw_pipe_passthrough_tri; + stage->tri(stage, header); + return; + } + /* how many samplers? */ /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ @@ -523,7 +522,7 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header) pstip->driver_set_sampler_textures(pipe, num_samplers, pstip->state.textures); /* now really draw first line */ - stage->tri = passthrough_tri; + stage->tri = draw_pipe_passthrough_tri; stage->tri(stage, header); } @@ -579,8 +578,8 @@ draw_pstip_stage(struct draw_context *draw) pstip->stage.draw = draw; pstip->stage.next = NULL; - pstip->stage.point = passthrough_point; - pstip->stage.line = passthrough_line; + pstip->stage.point = draw_pipe_passthrough_point; + pstip->stage.line = draw_pipe_passthrough_line; pstip->stage.tri = pstip_first_tri; pstip->stage.flush = pstip_flush; pstip->stage.reset_stipple_counter = pstip_reset_stipple_counter; @@ -705,7 +704,7 @@ pstip_set_polygon_stipple(struct pipe_context *pipe, * into the draw module's pipeline. This will not be used if the * hardware has native support for AA lines. */ -void +boolean draw_install_pstipple_stage(struct draw_context *draw, struct pipe_context *pipe) { @@ -717,14 +716,19 @@ draw_install_pstipple_stage(struct draw_context *draw, * Create / install AA line drawing / prim stage */ pstip = draw_pstip_stage( draw ); - assert(pstip); + if (pstip == NULL) + goto fail; + draw->pipeline.pstipple = &pstip->stage; pstip->pipe = pipe; /* create special texture, sampler state */ - pstip_create_texture(pstip); - pstip_create_sampler(pstip); + if (!pstip_create_texture(pstip)) + goto fail; + + if (!pstip_create_sampler(pstip)) + goto fail; /* save original driver functions */ pstip->driver_create_fs_state = pipe->create_fs_state; @@ -743,4 +747,12 @@ draw_install_pstipple_stage(struct draw_context *draw, pipe->bind_sampler_states = pstip_bind_sampler_states; pipe->set_sampler_textures = pstip_set_sampler_textures; pipe->set_polygon_stipple = pstip_set_polygon_stipple; + + return TRUE; + + fail: + if (pstip) + pstip->stage.destroy( &pstip->stage ); + + return FALSE; } diff --git a/src/gallium/auxiliary/draw/draw_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c index 506f33512c..9cf5840cce 100644 --- a/src/gallium/auxiliary/draw/draw_stipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c @@ -39,7 +39,7 @@ #include "pipe/p_util.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" -#include "draw_private.h" +#include "draw_pipe.h" /** Subclass of draw_stage */ @@ -195,18 +195,6 @@ stipple_flush(struct draw_stage *stage, unsigned flags) } -static void -passthrough_point(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->point( stage->next, header ); -} - - -static void -passthrough_tri(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->tri(stage->next, header); -} static void @@ -228,9 +216,9 @@ struct draw_stage *draw_stipple_stage( struct draw_context *draw ) stipple->stage.draw = draw; stipple->stage.next = NULL; - stipple->stage.point = passthrough_point; + stipple->stage.point = draw_pipe_passthrough_point; stipple->stage.line = stipple_first_line; - stipple->stage.tri = passthrough_tri; + stipple->stage.tri = draw_pipe_passthrough_tri; stipple->stage.reset_stipple_counter = reset_stipple_counter; stipple->stage.flush = stipple_flush; stipple->stage.destroy = stipple_destroy; diff --git a/src/gallium/auxiliary/draw/draw_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c index 3debaac282..5910dccc43 100644 --- a/src/gallium/auxiliary/draw/draw_twoside.c +++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c @@ -31,8 +31,8 @@ #include "pipe/p_util.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" -#include "draw_private.h" - +#include "draw_vs.h" +#include "draw_pipe.h" struct twoside_stage { struct draw_stage stage; @@ -99,21 +99,6 @@ static void twoside_tri( struct draw_stage *stage, } -static void twoside_line( struct draw_stage *stage, - struct prim_header *header ) -{ - /* pass-through */ - stage->next->line( stage->next, header ); -} - - -static void twoside_point( struct draw_stage *stage, - struct prim_header *header ) -{ - /* pass-through */ - stage->next->point( stage->next, header ); -} - static void twoside_first_tri( struct draw_stage *stage, struct prim_header *header ) @@ -187,17 +172,26 @@ static void twoside_destroy( struct draw_stage *stage ) struct draw_stage *draw_twoside_stage( struct draw_context *draw ) { struct twoside_stage *twoside = CALLOC_STRUCT(twoside_stage); + if (twoside == NULL) + goto fail; - draw_alloc_temp_verts( &twoside->stage, 3 ); + if (!draw_alloc_temp_verts( &twoside->stage, 3 )) + goto fail; twoside->stage.draw = draw; twoside->stage.next = NULL; - twoside->stage.point = twoside_point; - twoside->stage.line = twoside_line; + twoside->stage.point = draw_pipe_passthrough_point; + twoside->stage.line = draw_pipe_passthrough_line; twoside->stage.tri = twoside_first_tri; twoside->stage.flush = twoside_flush; twoside->stage.reset_stipple_counter = twoside_reset_stipple_counter; twoside->stage.destroy = twoside_destroy; return &twoside->stage; + + fail: + if (twoside) + twoside->stage.destroy( &twoside->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c index b07860cd9e..eeb2bc43f9 100644 --- a/src/gallium/auxiliary/draw/draw_unfilled.c +++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c @@ -36,6 +36,7 @@ #include "pipe/p_util.h" #include "pipe/p_defines.h" #include "draw_private.h" +#include "draw_pipe.h" struct unfilled_stage { @@ -147,19 +148,6 @@ static void unfilled_first_tri( struct draw_stage *stage, } -static void unfilled_line( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->line( stage->next, header ); -} - - -static void unfilled_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} - static void unfilled_flush( struct draw_stage *stage, unsigned flags ) @@ -189,18 +177,27 @@ static void unfilled_destroy( struct draw_stage *stage ) struct draw_stage *draw_unfilled_stage( struct draw_context *draw ) { struct unfilled_stage *unfilled = CALLOC_STRUCT(unfilled_stage); + if (unfilled == NULL) + goto fail; - draw_alloc_temp_verts( &unfilled->stage, 0 ); + if (!draw_alloc_temp_verts( &unfilled->stage, 0 )) + goto fail; unfilled->stage.draw = draw; unfilled->stage.next = NULL; unfilled->stage.tmp = NULL; - unfilled->stage.point = unfilled_point; - unfilled->stage.line = unfilled_line; + unfilled->stage.point = draw_pipe_passthrough_point; + unfilled->stage.line = draw_pipe_passthrough_line; unfilled->stage.tri = unfilled_first_tri; unfilled->stage.flush = unfilled_flush; unfilled->stage.reset_stipple_counter = unfilled_reset_stipple_counter; unfilled->stage.destroy = unfilled_destroy; return &unfilled->stage; + + fail: + if (unfilled) + unfilled->stage.destroy( &unfilled->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_util.c b/src/gallium/auxiliary/draw/draw_pipe_util.c new file mode 100644 index 0000000000..04438f4dd0 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_util.c @@ -0,0 +1,137 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "pipe/p_util.h" +#include "draw/draw_private.h" +#include "draw/draw_pipe.h" + + + +void +draw_pipe_passthrough_point(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->point(stage->next, header); +} + +void +draw_pipe_passthrough_line(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->line(stage->next, header); +} + +void +draw_pipe_passthrough_tri(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->tri(stage->next, header); +} + + + + + +/* This is only used for temporary verts. + */ +#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float)) + + +/** + * Allocate space for temporary post-transform vertices, such as for clipping. + */ +boolean draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ) +{ + assert(!stage->tmp); + + stage->tmp = NULL; + stage->nr_tmps = nr; + + if (nr != 0) + { + unsigned i; + ubyte *store = (ubyte *) MALLOC( MAX_VERTEX_SIZE * nr ); + + if (store == NULL) + return FALSE; + + stage->tmp = (struct vertex_header **) MALLOC( sizeof(struct vertex_header *) * nr ); + if (stage->tmp == NULL) { + FREE(store); + return FALSE; + } + + for (i = 0; i < nr; i++) + stage->tmp[i] = (struct vertex_header *)(store + i * MAX_VERTEX_SIZE); + } + + return TRUE; +} + + +void draw_free_temp_verts( struct draw_stage *stage ) +{ + if (stage->tmp) { + FREE( stage->tmp[0] ); + FREE( stage->tmp ); + stage->tmp = NULL; + } +} + + +/* Reset vertex ids. This is basically a type of flush. + * + * Called only from draw_pipe_vbuf.c + */ +void draw_reset_vertex_ids(struct draw_context *draw) +{ + struct draw_stage *stage = draw->pipeline.first; + + while (stage) { + unsigned i; + + for (i = 0; i < stage->nr_tmps; i++) + stage->tmp[i]->vertex_id = UNDEFINED_VERTEX_ID; + + stage = stage->next; + } + + if (draw->pipeline.verts) + { + unsigned i; + char *verts = draw->pipeline.verts; + unsigned stride = draw->pipeline.vertex_stride; + + for (i = 0; i < draw->pipeline.vertex_count; i++) { + ((struct vertex_header *)verts)->vertex_id = UNDEFINED_VERTEX_ID; + verts += stride; + } + } +} + diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c index e163e078f0..6be1d369c3 100644 --- a/src/gallium/auxiliary/draw/draw_validate.c +++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c @@ -31,6 +31,8 @@ #include "pipe/p_util.h" #include "pipe/p_defines.h" #include "draw_private.h" +#include "draw_pipe.h" +#include "draw_context.h" static boolean points( unsigned prim ) { @@ -56,7 +58,8 @@ static boolean triangles( unsigned prim ) * pipeline stages. */ boolean -draw_need_pipeline(const struct draw_context *draw, +draw_need_pipeline(const struct draw_context *draw, + const struct pipe_rasterizer_state *rasterizer, unsigned int prim ) { /* Don't have to worry about triangles turning into lines/points @@ -66,30 +69,30 @@ draw_need_pipeline(const struct draw_context *draw, if (lines(prim)) { /* line stipple */ - if (draw->rasterizer->line_stipple_enable && draw->line_stipple) + if (rasterizer->line_stipple_enable && draw->pipeline.line_stipple) return TRUE; /* wide lines */ - if (draw->rasterizer->line_width > draw->wide_line_threshold) + if (rasterizer->line_width > draw->pipeline.wide_line_threshold) return TRUE; /* AA lines */ - if (draw->rasterizer->line_smooth && draw->pipeline.aaline) + if (rasterizer->line_smooth && draw->pipeline.aaline) return TRUE; } if (points(prim)) { /* large points */ - if (draw->rasterizer->point_size > draw->wide_point_threshold) + if (rasterizer->point_size > draw->pipeline.wide_point_threshold) return TRUE; /* AA points */ - if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) + if (rasterizer->point_smooth && draw->pipeline.aapoint) return TRUE; /* point sprites */ - if (draw->rasterizer->point_sprite && draw->point_sprite) + if (rasterizer->point_sprite && draw->pipeline.point_sprite) return TRUE; } @@ -97,20 +100,20 @@ draw_need_pipeline(const struct draw_context *draw, if (triangles(prim)) { /* polygon stipple */ - if (draw->rasterizer->poly_stipple_enable && draw->pipeline.pstipple) + if (rasterizer->poly_stipple_enable && draw->pipeline.pstipple) return TRUE; /* unfilled polygons */ - if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || - draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) + if (rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || + rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) return TRUE; /* polygon offset */ - if (draw->rasterizer->offset_cw || draw->rasterizer->offset_ccw) + if (rasterizer->offset_cw || rasterizer->offset_ccw) return TRUE; /* two-side lighting */ - if (draw->rasterizer->light_twoside) + if (rasterizer->light_twoside) return TRUE; } @@ -119,7 +122,7 @@ draw_need_pipeline(const struct draw_context *draw, * * Generally this isn't a reason to require the pipeline, though. * - if (draw->rasterizer->cull_mode) + if (rasterizer->cull_mode) return TRUE; */ @@ -145,15 +148,15 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) stage->next = next; /* drawing wide lines? */ - wide_lines = (draw->rasterizer->line_width > draw->wide_line_threshold + wide_lines = (draw->rasterizer->line_width > draw->pipeline.wide_line_threshold && !draw->rasterizer->line_smooth); /* drawing large points? */ - if (draw->rasterizer->point_sprite && draw->point_sprite) + if (draw->rasterizer->point_sprite && draw->pipeline.point_sprite) wide_points = TRUE; else if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) wide_points = FALSE; - else if (draw->rasterizer->point_size > draw->wide_point_threshold) + else if (draw->rasterizer->point_size > draw->pipeline.wide_point_threshold) wide_points = TRUE; else wide_points = FALSE; @@ -186,7 +189,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) next = draw->pipeline.wide_point; } - if (draw->rasterizer->line_stipple_enable && draw->line_stipple) { + if (draw->rasterizer->line_stipple_enable && draw->pipeline.line_stipple) { draw->pipeline.stipple->next = next; next = draw->pipeline.stipple; precalc_flat = 1; /* only needed for lines really */ @@ -238,7 +241,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) /* Clip stage */ - if (!draw->rasterizer->bypass_clipping) + if (!draw->bypass_clipping) { draw->pipeline.clip->next = next; next = draw->pipeline.clip; @@ -298,6 +301,8 @@ static void validate_destroy( struct draw_stage *stage ) struct draw_stage *draw_validate_stage( struct draw_context *draw ) { struct draw_stage *stage = CALLOC_STRUCT(draw_stage); + if (stage == NULL) + return NULL; stage->draw = draw; stage->next = NULL; diff --git a/src/gallium/auxiliary/draw/draw_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index e3216ff711..afd5f5544d 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -40,7 +40,8 @@ #include "draw_vbuf.h" #include "draw_private.h" #include "draw_vertex.h" -#include "draw_vf.h" +#include "draw_pipe.h" +#include "translate/translate.h" /** @@ -56,7 +57,7 @@ struct vbuf_stage { /** Vertex size in bytes */ unsigned vertex_size; - struct draw_vertex_fetch *vf; + struct translate *translate; /* FIXME: we have no guarantee that 'unsigned' is 32bit */ @@ -71,8 +72,9 @@ struct vbuf_stage { unsigned max_indices; unsigned nr_indices; - /** Pipe primitive */ - unsigned prim; + /* Cache point size somewhere it's address won't change: + */ + float point_size; }; @@ -113,61 +115,6 @@ check_space( struct vbuf_stage *vbuf, unsigned nr ) } -#if 0 -static INLINE void -dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data) -{ - assert(vinfo == vbuf->render->get_vertex_info(vbuf->render)); - unsigned i, j, k; - - for (i = 0; i < vinfo->num_attribs; i++) { - j = vinfo->src_index[i]; - switch (vinfo->emit[i]) { - case EMIT_OMIT: - debug_printf("EMIT_OMIT:"); - break; - case EMIT_1F: - debug_printf("EMIT_1F:\t"); - debug_printf("%f ", *(float *)data); data += sizeof(float); - break; - case EMIT_1F_PSIZE: - debug_printf("EMIT_1F_PSIZE:\t"); - debug_printf("%f ", *(float *)data); data += sizeof(float); - break; - case EMIT_2F: - debug_printf("EMIT_2F:\t"); - debug_printf("%f ", *(float *)data); data += sizeof(float); - debug_printf("%f ", *(float *)data); data += sizeof(float); - break; - case EMIT_3F: - debug_printf("EMIT_3F:\t"); - debug_printf("%f ", *(float *)data); data += sizeof(float); - debug_printf("%f ", *(float *)data); data += sizeof(float); - debug_printf("%f ", *(float *)data); data += sizeof(float); - data += sizeof(float); - break; - case EMIT_4F: - debug_printf("EMIT_4F:\t"); - debug_printf("%f ", *(float *)data); data += sizeof(float); - debug_printf("%f ", *(float *)data); data += sizeof(float); - debug_printf("%f ", *(float *)data); data += sizeof(float); - debug_printf("%f ", *(float *)data); data += sizeof(float); - break; - case EMIT_4UB: - debug_printf("EMIT_4UB:\t"); - debug_printf("%u ", *data++); - debug_printf("%u ", *data++); - debug_printf("%u ", *data++); - debug_printf("%u ", *data++); - break; - default: - assert(0); - } - debug_printf("\n"); - } - debug_printf("\n"); -} -#endif /** @@ -177,96 +124,25 @@ dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data) * have a couple of slots at the beginning (1-dword header, 4-dword * clip pos) that we ignore here. We only use the vertex->data[] fields. */ -static INLINE void +static INLINE ushort emit_vertex( struct vbuf_stage *vbuf, struct vertex_header *vertex ) { -#if 0 - debug_printf("emit vertex %d to %p\n", - vbuf->nr_vertices, vbuf->vertex_ptr); -#endif - - if(vertex->vertex_id != UNDEFINED_VERTEX_ID) { - if(vertex->vertex_id < vbuf->nr_vertices) - return; - else - debug_printf("Bad vertex id 0x%04x (>= 0x%04x)\n", - vertex->vertex_id, vbuf->nr_vertices); - return; - } - - vertex->vertex_id = vbuf->nr_vertices++; - - if(!vbuf->vf) { - const struct vertex_info *vinfo = vbuf->vinfo; - uint i; - uint count = 0; /* for debug/sanity */ + if(vertex->vertex_id == UNDEFINED_VERTEX_ID) { + /* Hmm - vertices are emitted one at a time - better make sure + * set_buffer is efficient. Consider a special one-shot mode for + * translate. + */ + vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0); + vbuf->translate->run(vbuf->translate, 0, 1, vbuf->vertex_ptr); + + if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr); - assert(vinfo == vbuf->render->get_vertex_info(vbuf->render)); - - for (i = 0; i < vinfo->num_attribs; i++) { - uint j = vinfo->src_index[i]; - switch (vinfo->emit[i]) { - case EMIT_OMIT: - /* no-op */ - break; - case EMIT_1F: - *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); - count++; - break; - case EMIT_1F_PSIZE: - *vbuf->vertex_ptr++ = fui(vbuf->stage.draw->rasterizer->point_size); - count++; - break; - case EMIT_2F: - *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); - *vbuf->vertex_ptr++ = fui(vertex->data[j][1]); - count += 2; - break; - case EMIT_3F: - *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); - *vbuf->vertex_ptr++ = fui(vertex->data[j][1]); - *vbuf->vertex_ptr++ = fui(vertex->data[j][2]); - count += 3; - break; - case EMIT_4F: - *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); - *vbuf->vertex_ptr++ = fui(vertex->data[j][1]); - *vbuf->vertex_ptr++ = fui(vertex->data[j][2]); - *vbuf->vertex_ptr++ = fui(vertex->data[j][3]); - count += 4; - break; - case EMIT_4UB: - *vbuf->vertex_ptr++ = pack_ub4(float_to_ubyte( vertex->data[j][2] ), - float_to_ubyte( vertex->data[j][1] ), - float_to_ubyte( vertex->data[j][0] ), - float_to_ubyte( vertex->data[j][3] )); - count += 1; - break; - default: - assert(0); - } - } - assert(count == vinfo->size); -#if 0 - { - static float data[256]; - draw_vf_emit_vertex(vbuf->vf, vertex, data); - if(memcmp((uint8_t *)vbuf->vertex_ptr - vbuf->vertex_size, data, vbuf->vertex_size)) { - debug_printf("With VF:\n"); - dump_emitted_vertex(vbuf->vinfo, (uint8_t *)data); - debug_printf("Without VF:\n"); - dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr - vbuf->vertex_size); - assert(0); - } - } -#endif - } - else { - draw_vf_emit_vertex(vbuf->vf, vertex, vbuf->vertex_ptr); - vbuf->vertex_ptr += vbuf->vertex_size/4; + vertex->vertex_id = vbuf->nr_vertices++; } + + return vertex->vertex_id; } @@ -280,9 +156,7 @@ vbuf_tri( struct draw_stage *stage, check_space( vbuf, 3 ); for (i = 0; i < 3; i++) { - emit_vertex( vbuf, prim->v[i] ); - - vbuf->indices[vbuf->nr_indices++] = (ushort) prim->v[i]->vertex_id; + vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] ); } } @@ -297,9 +171,7 @@ vbuf_line( struct draw_stage *stage, check_space( vbuf, 2 ); for (i = 0; i < 2; i++) { - emit_vertex( vbuf, prim->v[i] ); - - vbuf->indices[vbuf->nr_indices++] = (ushort) prim->v[i]->vertex_id; + vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] ); } } @@ -312,43 +184,112 @@ vbuf_point( struct draw_stage *stage, check_space( vbuf, 1 ); - emit_vertex( vbuf, prim->v[0] ); - - vbuf->indices[vbuf->nr_indices++] = (ushort) prim->v[0]->vertex_id; + vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[0] ); } + + /** * Set the prim type for subsequent vertices. * This may result in a new vertex size. The existing vbuffer (if any) * will be flushed if needed and a new one allocated. */ static void -vbuf_set_prim( struct vbuf_stage *vbuf, uint newprim ) +vbuf_set_prim( struct vbuf_stage *vbuf, uint prim ) { - const struct vertex_info *vinfo; - unsigned vertex_size; - - assert(newprim == PIPE_PRIM_POINTS || - newprim == PIPE_PRIM_LINES || - newprim == PIPE_PRIM_TRIANGLES); + struct translate_key hw_key; + unsigned dst_offset; + unsigned i; - vbuf->prim = newprim; - vbuf->render->set_primitive(vbuf->render, newprim); + vbuf->render->set_primitive(vbuf->render, prim); - vinfo = vbuf->render->get_vertex_info(vbuf->render); - vertex_size = vinfo->size * sizeof(float); + /* Must do this after set_primitive() above: + * + * XXX: need some state managment to track when this needs to be + * recalculated. The driver should tell us whether there was a + * state change. + */ + vbuf->vinfo = vbuf->render->get_vertex_info(vbuf->render); - if (vertex_size != vbuf->vertex_size) + if (vbuf->vertex_size != vbuf->vinfo->size * sizeof(float)) { vbuf_flush_vertices(vbuf); + vbuf->vertex_size = vbuf->vinfo->size * sizeof(float); + } - vbuf->vinfo = vinfo; - vbuf->vertex_size = vertex_size; - if(vbuf->vf) - draw_vf_set_vertex_info(vbuf->vf, - vbuf->vinfo, - vbuf->stage.draw->rasterizer->point_size); - + /* Translate from pipeline vertices to hw vertices. + */ + dst_offset = 0; + memset(&hw_key, 0, sizeof(hw_key)); + + for (i = 0; i < vbuf->vinfo->num_attribs; i++) { + unsigned emit_sz = 0; + unsigned src_buffer = 0; + unsigned output_format; + unsigned src_offset = (vbuf->vinfo->src_index[i] * 4 * sizeof(float) ); + + switch (vbuf->vinfo->emit[i]) { + case EMIT_4F: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + emit_sz = 4 * sizeof(float); + break; + case EMIT_3F: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + emit_sz = 3 * sizeof(float); + break; + case EMIT_2F: + output_format = PIPE_FORMAT_R32G32_FLOAT; + emit_sz = 2 * sizeof(float); + break; + case EMIT_1F: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + break; + case EMIT_1F_PSIZE: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + src_buffer = 1; + src_offset = 0; + break; + case EMIT_4UB: + output_format = PIPE_FORMAT_B8G8R8A8_UNORM; + emit_sz = 4 * sizeof(ubyte); + default: + assert(0); + output_format = PIPE_FORMAT_NONE; + emit_sz = 0; + break; + } + + hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + hw_key.element[i].input_buffer = src_buffer; + hw_key.element[i].input_offset = src_offset; + hw_key.element[i].output_format = output_format; + hw_key.element[i].output_offset = dst_offset; + + dst_offset += emit_sz; + } + + hw_key.nr_elements = vbuf->vinfo->num_attribs; + hw_key.output_stride = vbuf->vinfo->size * 4; + + /* Don't bother with caching at this stage: + */ + if (!vbuf->translate || + memcmp(&vbuf->translate->key, &hw_key, sizeof(hw_key)) != 0) + { + if (vbuf->translate) + vbuf->translate->release(vbuf->translate); + + vbuf->translate = translate_create( &hw_key ); + + vbuf->translate->set_buffer(vbuf->translate, 1, &vbuf->point_size, 0); + } + + vbuf->point_size = vbuf->stage.draw->rasterizer->point_size; + + /* Allocate new buffer? + */ if (!vbuf->vertices) vbuf_alloc_vertices(vbuf); } @@ -402,29 +343,9 @@ vbuf_flush_indices( struct vbuf_stage *vbuf ) assert((uint) (vbuf->vertex_ptr - vbuf->vertices) == vbuf->nr_vertices * vbuf->vertex_size / sizeof(unsigned)); - switch(vbuf->prim) { - case PIPE_PRIM_POINTS: - break; - case PIPE_PRIM_LINES: - assert(vbuf->nr_indices % 2 == 0); - break; - case PIPE_PRIM_TRIANGLES: - assert(vbuf->nr_indices % 3 == 0); - break; - default: - assert(0); - } - vbuf->render->draw(vbuf->render, vbuf->indices, vbuf->nr_indices); vbuf->nr_indices = 0; - - /* don't need to reset point/line/tri functions */ -#if 0 - stage->point = vbuf_first_point; - stage->line = vbuf_first_line; - stage->tri = vbuf_first_tri; -#endif } @@ -466,8 +387,8 @@ vbuf_alloc_vertices( struct vbuf_stage *vbuf ) /* Allocate a new vertex buffer */ vbuf->max_vertices = vbuf->render->max_vertex_buffer_bytes / vbuf->vertex_size; vbuf->vertices = (uint *) vbuf->render->allocate_vertices(vbuf->render, - (ushort) vbuf->vertex_size, - (ushort) vbuf->max_vertices); + (ushort) vbuf->vertex_size, + (ushort) vbuf->max_vertices); vbuf->vertex_ptr = vbuf->vertices; } @@ -505,8 +426,8 @@ static void vbuf_destroy( struct draw_stage *stage ) if(vbuf->indices) align_free( vbuf->indices ); - if(vbuf->vf) - draw_vf_destroy( vbuf->vf ); + if(vbuf->translate) + vbuf->translate->release( vbuf->translate ); if (vbuf->render) vbuf->render->destroy( vbuf->render ); @@ -522,9 +443,8 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw, struct vbuf_render *render ) { struct vbuf_stage *vbuf = CALLOC_STRUCT(vbuf_stage); - - if(!vbuf) - return NULL; + if (vbuf == NULL) + goto fail; vbuf->stage.draw = draw; vbuf->stage.point = vbuf_first_point; @@ -535,21 +455,22 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw, vbuf->stage.destroy = vbuf_destroy; vbuf->render = render; + vbuf->max_indices = MAX2(render->max_indices, UNDEFINED_VERTEX_ID-1); - assert(render->max_indices < UNDEFINED_VERTEX_ID); - vbuf->max_indices = render->max_indices; - vbuf->indices = (ushort *) - align_malloc( vbuf->max_indices * sizeof(vbuf->indices[0]), 16 ); - if(!vbuf->indices) - vbuf_destroy(&vbuf->stage); + vbuf->indices = (ushort *) align_malloc( vbuf->max_indices * + sizeof(vbuf->indices[0]), + 16 ); + if (!vbuf->indices) + goto fail; vbuf->vertices = NULL; vbuf->vertex_ptr = vbuf->vertices; - - vbuf->prim = ~0; - - if(!GETENV("GALLIUM_NOVF")) - vbuf->vf = draw_vf_create(); return &vbuf->stage; + + fail: + if (vbuf) + vbuf_destroy(&vbuf->stage); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c index 9a168ce8bd..452732e662 100644 --- a/src/gallium/auxiliary/draw/draw_wide_line.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c @@ -32,6 +32,7 @@ #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" #include "draw_private.h" +#include "draw_pipe.h" struct wideline_stage { @@ -48,19 +49,6 @@ static INLINE struct wideline_stage *wideline_stage( struct draw_stage *stage ) } -static void wideline_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} - - -static void wideline_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->tri(stage->next, header); -} - /** * Draw a wide line by drawing a quad (two triangles). @@ -179,9 +167,9 @@ struct draw_stage *draw_wide_line_stage( struct draw_context *draw ) wide->stage.draw = draw; wide->stage.next = NULL; - wide->stage.point = wideline_point; + wide->stage.point = draw_pipe_passthrough_point; wide->stage.line = wideline_line; - wide->stage.tri = wideline_tri; + wide->stage.tri = draw_pipe_passthrough_point; wide->stage.flush = wideline_flush; wide->stage.reset_stipple_counter = wideline_reset_stipple_counter; wide->stage.destroy = wideline_destroy; diff --git a/src/gallium/auxiliary/draw/draw_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c index 6fc7c9fcd7..ed08573382 100644 --- a/src/gallium/auxiliary/draw/draw_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -31,7 +31,8 @@ #include "pipe/p_util.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" -#include "draw_private.h" +#include "draw_vs.h" +#include "draw_pipe.h" struct widepoint_stage { @@ -60,23 +61,6 @@ widepoint_stage( struct draw_stage *stage ) } -static void passthrough_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} - -static void widepoint_line( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->line(stage->next, header); -} - -static void widepoint_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->tri(stage->next, header); -} /** @@ -199,16 +183,16 @@ static void widepoint_first_point( struct draw_stage *stage, wide->ybias = 0.0; if (draw->rasterizer->gl_rasterization_rules) { - wide->ybias = -0.125; + wide->xbias = 0.125; } /* XXX we won't know the real size if it's computed by the vertex shader! */ - if ((draw->rasterizer->point_size > draw->wide_point_threshold) || - (draw->rasterizer->point_sprite && draw->point_sprite)) { + if ((draw->rasterizer->point_size > draw->pipeline.wide_point_threshold) || + (draw->rasterizer->point_sprite && draw->pipeline.point_sprite)) { stage->point = widepoint_point; } else { - stage->point = passthrough_point; + stage->point = draw_pipe_passthrough_point; } if (draw->rasterizer->point_sprite) { @@ -265,17 +249,26 @@ static void widepoint_destroy( struct draw_stage *stage ) struct draw_stage *draw_wide_point_stage( struct draw_context *draw ) { struct widepoint_stage *wide = CALLOC_STRUCT(widepoint_stage); + if (wide == NULL) + goto fail; - draw_alloc_temp_verts( &wide->stage, 4 ); + if (!draw_alloc_temp_verts( &wide->stage, 4 )) + goto fail; wide->stage.draw = draw; wide->stage.next = NULL; wide->stage.point = widepoint_first_point; - wide->stage.line = widepoint_line; - wide->stage.tri = widepoint_tri; + wide->stage.line = draw_pipe_passthrough_line; + wide->stage.tri = draw_pipe_passthrough_tri; wide->stage.flush = widepoint_flush; wide->stage.reset_stipple_counter = widepoint_reset_stipple_counter; wide->stage.destroy = widepoint_destroy; return &wide->stage; + + fail: + if (wide) + wide->stage.destroy( &wide->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c deleted file mode 100644 index 51b6950334..0000000000 --- a/src/gallium/auxiliary/draw/draw_prim.c +++ /dev/null @@ -1,523 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "pipe/p_debug.h" -#include "pipe/p_util.h" - -#include "draw_private.h" -#include "draw_context.h" - - - -#define RP_NONE 0 -#define RP_POINT 1 -#define RP_LINE 2 -#define RP_TRI 3 - - -static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { - RP_POINT, - RP_LINE, - RP_LINE, - RP_LINE, - RP_TRI, - RP_TRI, - RP_TRI, - RP_TRI, - RP_TRI, - RP_TRI -}; - - -static void draw_prim_queue_flush( struct draw_context *draw ) -{ - unsigned i; - - if (0) - debug_printf("Flushing with %d prims, %d verts\n", - draw->pq.queue_nr, draw->vs.queue_nr); - - assert (draw->pq.queue_nr != 0); - - /* NOTE: we cannot save draw->pipeline->first in a local var because - * draw->pipeline->first is often changed by the first call to tri(), - * line(), etc. - */ - if (draw->rasterizer->line_stipple_enable) { - switch (draw->reduced_prim) { - case RP_TRI: - for (i = 0; i < draw->pq.queue_nr; i++) { - if (draw->pq.queue[i].reset_line_stipple) - draw->pipeline.first->reset_stipple_counter( draw->pipeline.first ); - - draw->pipeline.first->tri( draw->pipeline.first, &draw->pq.queue[i] ); - } - break; - case RP_LINE: - for (i = 0; i < draw->pq.queue_nr; i++) { - if (draw->pq.queue[i].reset_line_stipple) - draw->pipeline.first->reset_stipple_counter( draw->pipeline.first ); - - draw->pipeline.first->line( draw->pipeline.first, &draw->pq.queue[i] ); - } - break; - case RP_POINT: - draw->pipeline.first->reset_stipple_counter( draw->pipeline.first ); - for (i = 0; i < draw->pq.queue_nr; i++) - draw->pipeline.first->point( draw->pipeline.first, &draw->pq.queue[i] ); - break; - } - } - else { - switch (draw->reduced_prim) { - case RP_TRI: - for (i = 0; i < draw->pq.queue_nr; i++) - draw->pipeline.first->tri( draw->pipeline.first, &draw->pq.queue[i] ); - break; - case RP_LINE: - for (i = 0; i < draw->pq.queue_nr; i++) - draw->pipeline.first->line( draw->pipeline.first, &draw->pq.queue[i] ); - break; - case RP_POINT: - for (i = 0; i < draw->pq.queue_nr; i++) - draw->pipeline.first->point( draw->pipeline.first, &draw->pq.queue[i] ); - break; - } - } - - draw->pq.queue_nr = 0; - draw->vs.post_nr = 0; - draw_vertex_cache_unreference( draw ); -} - -void draw_do_flush( struct draw_context *draw, unsigned flags ) -{ - if (0) - debug_printf("Flushing with %d verts, %d prims\n", - draw->vs.queue_nr, - draw->pq.queue_nr ); - - if (draw->flushing) - return; - - draw->flushing = TRUE; - - if (flags >= DRAW_FLUSH_SHADER_QUEUE) { - if (draw->vs.queue_nr) { - (*draw->shader_queue_flush)(draw); - } - - if (flags >= DRAW_FLUSH_PRIM_QUEUE) { - if (draw->pq.queue_nr) - draw_prim_queue_flush(draw); - - if (flags >= DRAW_FLUSH_VERTEX_CACHE) { - draw_vertex_cache_invalidate(draw); - - if (flags >= DRAW_FLUSH_STATE_CHANGE) { - draw->pipeline.first->flush( draw->pipeline.first, flags ); - draw->pipeline.first = draw->pipeline.validate; - draw->reduced_prim = ~0; - } - } - } - } - - draw->flushing = FALSE; -} - - - -/* Return a pointer to a freshly queued primitive header. Ensure that - * there is room in the vertex cache for a maximum of "nr_verts" new - * vertices. Flush primitive and/or vertex queues if necessary to - * make space. - */ -static struct prim_header *get_queued_prim( struct draw_context *draw, - unsigned nr_verts ) -{ - if (!draw_vertex_cache_check_space( draw, nr_verts )) { -// debug_printf("v"); - draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE ); - } - else if (draw->pq.queue_nr == PRIM_QUEUE_LENGTH) { -// debug_printf("p"); - draw_do_flush( draw, DRAW_FLUSH_PRIM_QUEUE ); - } - - assert(draw->pq.queue_nr < PRIM_QUEUE_LENGTH); - - return &draw->pq.queue[draw->pq.queue_nr++]; -} - - - -/** - * Add a point to the primitive queue. - * \param i0 index into user's vertex arrays - */ -static void do_point( struct draw_context *draw, - unsigned i0 ) -{ - struct prim_header *prim = get_queued_prim( draw, 1 ); - - prim->reset_line_stipple = 0; - prim->edgeflags = 1; - prim->pad = 0; - prim->v[0] = draw->vcache.get_vertex( draw, i0 ); -} - - -/** - * Add a line to the primitive queue. - * \param i0 index into user's vertex arrays - * \param i1 index into user's vertex arrays - */ -static void do_line( struct draw_context *draw, - boolean reset_stipple, - unsigned i0, - unsigned i1 ) -{ - struct prim_header *prim = get_queued_prim( draw, 2 ); - - prim->reset_line_stipple = reset_stipple; - prim->edgeflags = 1; - prim->pad = 0; - prim->v[0] = draw->vcache.get_vertex( draw, i0 ); - prim->v[1] = draw->vcache.get_vertex( draw, i1 ); -} - -/** - * Add a triangle to the primitive queue. - */ -static void do_triangle( struct draw_context *draw, - unsigned i0, - unsigned i1, - unsigned i2 ) -{ - struct prim_header *prim = get_queued_prim( draw, 3 ); - -// _mesa_printf("tri %d %d %d\n", i0, i1, i2); - prim->reset_line_stipple = 1; - prim->edgeflags = ~0; - prim->pad = 0; - prim->v[0] = draw->vcache.get_vertex( draw, i0 ); - prim->v[1] = draw->vcache.get_vertex( draw, i1 ); - prim->v[2] = draw->vcache.get_vertex( draw, i2 ); -} - -static void do_ef_triangle( struct draw_context *draw, - boolean reset_stipple, - unsigned ef_mask, - unsigned i0, - unsigned i1, - unsigned i2 ) -{ - struct prim_header *prim = get_queued_prim( draw, 3 ); - struct vertex_header *v0 = draw->vcache.get_vertex( draw, i0 ); - struct vertex_header *v1 = draw->vcache.get_vertex( draw, i1 ); - struct vertex_header *v2 = draw->vcache.get_vertex( draw, i2 ); - - prim->reset_line_stipple = reset_stipple; - - prim->edgeflags = ef_mask & ((v0->edgeflag << 0) | - (v1->edgeflag << 1) | - (v2->edgeflag << 2)); - prim->pad = 0; - prim->v[0] = v0; - prim->v[1] = v1; - prim->v[2] = v2; -} - - -static void do_ef_quad( struct draw_context *draw, - unsigned v0, - unsigned v1, - unsigned v2, - unsigned v3 ) -{ - const unsigned omitEdge2 = ~(1 << 1); - const unsigned omitEdge3 = ~(1 << 2); - do_ef_triangle( draw, 1, omitEdge2, v0, v1, v3 ); - do_ef_triangle( draw, 0, omitEdge3, v1, v2, v3 ); -} - -static void do_quad( struct draw_context *draw, - unsigned v0, - unsigned v1, - unsigned v2, - unsigned v3 ) -{ - do_triangle( draw, v0, v1, v3 ); - do_triangle( draw, v1, v2, v3 ); -} - - -/** - * Main entrypoint to draw some number of points/lines/triangles - */ -static void -draw_prim( struct draw_context *draw, - unsigned prim, unsigned start, unsigned count ) -{ - unsigned i; - boolean unfilled = (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || - draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL); - boolean flatfirst = - (draw->rasterizer->flatshade & draw->rasterizer->flatshade_first) ? TRUE : FALSE; - -// debug_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count ); - - switch (prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < count; i ++) { - do_point( draw, - start + i ); - } - break; - - case PIPE_PRIM_LINES: - for (i = 0; i+1 < count; i += 2) { - do_line( draw, - TRUE, - start + i + 0, - start + i + 1); - } - break; - - case PIPE_PRIM_LINE_LOOP: - if (count >= 2) { - for (i = 1; i < count; i++) { - do_line( draw, - i == 1, /* XXX: only if vb not split */ - start + i - 1, - start + i ); - } - - do_line( draw, - 0, - start + count - 1, - start + 0 ); - } - break; - - case PIPE_PRIM_LINE_STRIP: - for (i = 1; i < count; i++) { - do_line( draw, - i == 1, - start + i - 1, - start + i ); - } - break; - - case PIPE_PRIM_TRIANGLES: - if (unfilled) { - for (i = 0; i+2 < count; i += 3) { - do_ef_triangle( draw, - 1, - ~0, - start + i + 0, - start + i + 1, - start + i + 2 ); - } - } - else { - for (i = 0; i+2 < count; i += 3) { - do_triangle( draw, - start + i + 0, - start + i + 1, - start + i + 2 ); - } - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - if (i & 1) { - do_triangle( draw, - start + i + 0, - start + i + 2, - start + i + 1 ); - } - else { - do_triangle( draw, - start + i + 0, - start + i + 1, - start + i + 2 ); - } - } - } - else { - for (i = 0; i+2 < count; i++) { - if (i & 1) { - do_triangle( draw, - start + i + 1, - start + i + 0, - start + i + 2 ); - } - else { - do_triangle( draw, - start + i + 0, - start + i + 1, - start + i + 2 ); - } - } - } - break; - - case PIPE_PRIM_TRIANGLE_FAN: - if (count >= 3) { - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - do_triangle( draw, - start + i + 1, - start + i + 2, - start + 0 ); - } - } - else { - for (i = 0; i+2 < count; i++) { - do_triangle( draw, - start + 0, - start + i + 1, - start + i + 2 ); - } - } - } - break; - - - case PIPE_PRIM_QUADS: - if (unfilled) { - for (i = 0; i+3 < count; i += 4) { - do_ef_quad( draw, - start + i + 0, - start + i + 1, - start + i + 2, - start + i + 3); - } - } - else { - for (i = 0; i+3 < count; i += 4) { - do_quad( draw, - start + i + 0, - start + i + 1, - start + i + 2, - start + i + 3); - } - } - break; - - case PIPE_PRIM_QUAD_STRIP: - if (unfilled) { - for (i = 0; i+3 < count; i += 2) { - do_ef_quad( draw, - start + i + 2, - start + i + 0, - start + i + 1, - start + i + 3); - } - } - else { - for (i = 0; i+3 < count; i += 2) { - do_quad( draw, - start + i + 2, - start + i + 0, - start + i + 1, - start + i + 3); - } - } - break; - - case PIPE_PRIM_POLYGON: - if (unfilled) { - unsigned ef_mask = (1<<2) | (1<<0); - - for (i = 0; i+2 < count; i++) { - - if (i + 3 >= count) - ef_mask |= (1<<1); - - do_ef_triangle( draw, - i == 0, - ef_mask, - start + i + 1, - start + i + 2, - start + 0); - - ef_mask &= ~(1<<2); - } - } - else { - for (i = 0; i+2 < count; i++) { - do_triangle( draw, - start + i + 1, - start + i + 2, - start + 0); - } - } - break; - - default: - assert(0); - break; - } -} - - - - -/** - * Draw vertex arrays - * This is the main entrypoint into the drawing module. - * \param prim one of PIPE_PRIM_x - * \param start index of first vertex to draw - * \param count number of vertices to draw - */ -void -draw_arrays(struct draw_context *draw, unsigned prim, - unsigned start, unsigned count) -{ - if (reduced_prim[prim] != draw->reduced_prim) { - draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->reduced_prim = reduced_prim[prim]; - } - - /* drawing done here: */ - if (!draw_pt_arrays(draw, prim, start, count)) { - /* we have to run the whole pipeline */ - draw_prim(draw, prim, start, count); - } -} - - diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index c8cb96c8ba..39aa81b43c 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -44,7 +44,6 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "rtasm/rtasm_x86sse.h" #include "tgsi/exec/tgsi_exec.h" #include "tgsi/util/tgsi_scan.h" @@ -52,11 +51,11 @@ struct pipe_context; struct gallivm_prog; struct gallivm_cpu_engine; +struct draw_vertex_shader; +struct draw_context; +struct draw_stage; +struct vbuf_render; -struct draw_pt_middle_end; -struct draw_pt_front_end; - -#define MAX_SHADER_VERTICES 128 /** * Basic vertex info. @@ -70,17 +69,14 @@ struct vertex_header { float clip[4]; - float data[][4]; /* Note variable size */ + /* This will probably become float (*data)[4] soon: + */ + float data[][4]; }; /* NOTE: It should match vertex_id size above */ #define UNDEFINED_VERTEX_ID 0xffff -/* XXX This is too large */ -#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float)) -#define MAX_VERTEX_ALLOCATION ((MAX_VERTEX_SIZE + 0x0f) & ~0x0f) - - /** * Basic info for a point/line/triangle primitive. @@ -95,92 +91,11 @@ struct prim_header { -struct draw_context; - -/** - * Base class for all primitive drawing stages. - */ -struct draw_stage -{ - struct draw_context *draw; /**< parent context */ - - struct draw_stage *next; /**< next stage in pipeline */ - - struct vertex_header **tmp; /**< temp vert storage, such as for clipping */ - unsigned nr_tmps; - - void (*point)( struct draw_stage *, - struct prim_header * ); - - void (*line)( struct draw_stage *, - struct prim_header * ); - - void (*tri)( struct draw_stage *, - struct prim_header * ); - - void (*flush)( struct draw_stage *, - unsigned flags ); - - void (*reset_stipple_counter)( struct draw_stage * ); - void (*destroy)( struct draw_stage * ); -}; - - -#define PRIM_QUEUE_LENGTH 32 -#define VCACHE_SIZE 32 -#define VCACHE_OVERFLOW 4 -#define VS_QUEUE_LENGTH (VCACHE_SIZE + VCACHE_OVERFLOW + 1) /* can never fill up */ - -/** - * Private version of the compiled vertex_shader - */ -struct draw_vertex_shader { - - /* This member will disappear shortly: - */ - struct pipe_shader_state state; - - struct tgsi_shader_info info; - - void (*prepare)( struct draw_vertex_shader *shader, - struct draw_context *draw ); - - /* Run the shader - this interface will get cleaned up in the - * future: - */ - boolean (*run)( struct draw_vertex_shader *shader, - struct draw_context *draw, - const unsigned *elts, - unsigned count, - void *out, - unsigned vertex_size); - - - void (*delete)( struct draw_vertex_shader * ); -}; - - -/* Internal function for vertex fetch. - */ -typedef void (*fetch_func)(const void *ptr, float *attrib); - -fetch_func draw_get_fetch_func( enum pipe_format format ); - - - -typedef void (*full_fetch_func)( struct draw_context *draw, - struct tgsi_exec_machine *machine, - const unsigned *elts, - unsigned count ); - -typedef void (*pt_fetch_func)( struct draw_context *draw, - float *out, - unsigned start, - unsigned count ); - - -struct vbuf_render; +#define PT_SHADE 0x1 +#define PT_CLIPTEST 0x2 +#define PT_PIPELINE 0x4 +#define PT_MAX_MIDDLE 0x8 /** * Private context for the drawing module. @@ -207,6 +122,17 @@ struct draw_context struct draw_stage *wide_line; struct draw_stage *wide_point; struct draw_stage *rasterize; + + float wide_point_threshold; /**< convert pnts to tris if larger than this */ + float wide_line_threshold; /**< convert lines to tris if wider than this */ + boolean line_stipple; /**< do line stipple? */ + boolean point_sprite; /**< convert points to quads for sprites? */ + + /* Temporary storage while the pipeline is being run: + */ + char *verts; + unsigned vertex_stride; + unsigned vertex_count; } pipeline; @@ -215,71 +141,63 @@ struct draw_context /* Support prototype passthrough path: */ struct { - unsigned prim; /* XXX: to be removed */ - unsigned hw_vertex_size; /* XXX: to be removed */ - struct { struct draw_pt_middle_end *fetch_emit; - struct draw_pt_middle_end *fetch_pipeline; - struct draw_pt_middle_end *fetch_shade_emit; - struct draw_pt_middle_end *fetch_shade_cliptest_pipeline_or_emit; + struct draw_pt_middle_end *general; } middle; struct { - struct draw_pt_front_end *noop; - struct draw_pt_front_end *split_arrays; struct draw_pt_front_end *vcache; } front; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + unsigned nr_vertex_buffers; + + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + unsigned nr_vertex_elements; + + /* user-space vertex data, buffers */ struct { - char *verts; - unsigned vertex_stride; - unsigned vertex_count; - } pipeline; + const unsigned *edgeflag; + + /** vertex element/index buffer (ex: glDrawElements) */ + const void *elts; + /** bytes per index (0, 1, 2 or 4) */ + unsigned eltSize; + + /** vertex arrays */ + const void *vbuffer[PIPE_MAX_ATTRIBS]; + + /** constant buffer (for vertex shader) */ + const void *constants; + } user; } pt; + struct { + boolean bypass_clipping; + } driver; + boolean flushing; + boolean vcache_flushing; + boolean bypass_clipping; /* set if either api or driver bypass_clipping true */ /* pipe state that we need: */ const struct pipe_rasterizer_state *rasterizer; struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + struct draw_vertex_shader *vertex_shader; boolean identity_viewport; uint num_vs_outputs; /**< convenience, from vertex_shader */ - /* user-space vertex data, buffers */ - struct { - const unsigned *edgeflag; - - /** vertex element/index buffer (ex: glDrawElements) */ - const void *elts; - /** bytes per index (0, 1, 2 or 4) */ - unsigned eltSize; - - /** vertex arrays */ - const void *vbuffer[PIPE_MAX_ATTRIBS]; - - /** constant buffer (for vertex shader) */ - const void *constants; - } user; /* Clip derived state: */ float plane[12][4]; unsigned nr_planes; - float wide_point_threshold; /**< convert pnts to tris if larger than this */ - float wide_line_threshold; /**< convert lines to tris if wider than this */ - boolean line_stipple; /**< do line stipple? */ - boolean point_sprite; /**< convert points to quads for sprites? */ - boolean use_sse; - boolean use_pt_shaders; /* temporary flag to switch on pt shader paths */ - /* If a prim stage introduces new vertex attributes, they'll be stored here */ struct { @@ -293,59 +211,6 @@ struct draw_context /** TGSI program interpreter runtime state */ struct tgsi_exec_machine machine; - /* Vertex fetch internal state - */ - struct { - const ubyte *src_ptr[PIPE_MAX_ATTRIBS]; - unsigned pitch[PIPE_MAX_ATTRIBS]; - fetch_func fetch[PIPE_MAX_ATTRIBS]; - unsigned nr_attrs; - full_fetch_func fetch_func; - pt_fetch_func pt_fetch; - } vertex_fetch; - - /* Post-tnl vertex cache: - */ - struct { - unsigned referenced; /**< bitfield */ - - struct { - unsigned in; /* client array element */ - unsigned out; /* index in vs queue/array */ - } idx[VCACHE_SIZE + VCACHE_OVERFLOW]; - - unsigned overflow; - - /** To find space in the vertex cache: */ - struct vertex_header *(*get_vertex)( struct draw_context *draw, - unsigned i ); - } vcache; - - /* Vertex shader queue: - */ - struct { - unsigned elts[VS_QUEUE_LENGTH]; /**< index into the user's vertex arrays */ - char *vertex_cache; - unsigned queue_nr; - unsigned post_nr; - } vs; - - /** - * Run the vertex shader on all vertices in the vertex queue. - */ - void (*shader_queue_flush)(struct draw_context *draw); - - /* Prim pipeline queue: - */ - struct { - /* Need to queue up primitives until their vertices have been - * transformed by a vs queue flush. - */ - struct prim_header queue[PRIM_QUEUE_LENGTH]; - unsigned queue_nr; - } pq; - - /* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private. */ struct gallivm_cpu_engine *engine; @@ -354,107 +219,49 @@ struct draw_context -extern struct draw_stage *draw_unfilled_stage( struct draw_context *context ); -extern struct draw_stage *draw_twoside_stage( struct draw_context *context ); -extern struct draw_stage *draw_offset_stage( struct draw_context *context ); -extern struct draw_stage *draw_clip_stage( struct draw_context *context ); -extern struct draw_stage *draw_flatshade_stage( struct draw_context *context ); -extern struct draw_stage *draw_cull_stage( struct draw_context *context ); -extern struct draw_stage *draw_stipple_stage( struct draw_context *context ); -extern struct draw_stage *draw_wide_line_stage( struct draw_context *context ); -extern struct draw_stage *draw_wide_point_stage( struct draw_context *context ); -extern struct draw_stage *draw_validate_stage( struct draw_context *context ); -extern void draw_free_temp_verts( struct draw_stage *stage ); -extern void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ); -extern void draw_reset_vertex_ids( struct draw_context *draw ); +/******************************************************************************* + * Vertex processing (was passthrough) code: + */ +boolean draw_pt_init( struct draw_context *draw ); +void draw_pt_destroy( struct draw_context *draw ); +void draw_pt_reset_vertex_ids( struct draw_context *draw ); -extern int draw_vertex_cache_check_space( struct draw_context *draw, - unsigned nr_verts ); +/******************************************************************************* + * Primitive processing (pipeline) code: + */ -extern void draw_vertex_cache_invalidate( struct draw_context *draw ); -extern void draw_vertex_cache_unreference( struct draw_context *draw ); -extern void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw ); +boolean draw_pipeline_init( struct draw_context *draw ); +void draw_pipeline_destroy( struct draw_context *draw ); -extern void draw_vertex_shader_queue_flush( struct draw_context *draw ); +void draw_pipeline_run( struct draw_context *draw, + unsigned prim, + struct vertex_header *vertices, + unsigned vertex_count, + unsigned stride, + const ushort *elts, + unsigned count ); -extern void draw_update_vertex_fetch( struct draw_context *draw ); +void draw_pipeline_flush( struct draw_context *draw, + unsigned flags ); -extern boolean draw_need_pipeline(const struct draw_context *draw, - unsigned prim ); -/* Passthrough mode (second attempt): +/******************************************************************************* + * Flushing */ -boolean draw_pt_init( struct draw_context *draw ); -void draw_pt_destroy( struct draw_context *draw ); -boolean draw_pt_arrays( struct draw_context *draw, - unsigned prim, - unsigned start, - unsigned count ); -void draw_pt_reset_vertex_ids( struct draw_context *draw ); -void draw_pt_run_pipeline( struct draw_context *draw, - unsigned prim, - char *verts, - unsigned vertex_stride, - unsigned vertex_count, - const ushort *elts, - unsigned count ); - - -#define DRAW_FLUSH_SHADER_QUEUE 0x1 /* sized not to overflow, never raised */ -#define DRAW_FLUSH_PRIM_QUEUE 0x2 -#define DRAW_FLUSH_VERTEX_CACHE 0x4 #define DRAW_FLUSH_STATE_CHANGE 0x8 #define DRAW_FLUSH_BACKEND 0x10 void draw_do_flush( struct draw_context *draw, unsigned flags ); -boolean draw_get_edgeflag( struct draw_context *draw, - unsigned idx ); -/** - * Get a writeable copy of a vertex. - * \param stage drawing stage info - * \param vert the vertex to copy (source) - * \param idx index into stage's tmp[] array to put the copy (dest) - * \return pointer to the copied vertex - */ -static INLINE struct vertex_header * -dup_vert( struct draw_stage *stage, - const struct vertex_header *vert, - unsigned idx ) -{ - struct vertex_header *tmp = stage->tmp[idx]; - const uint vsize = sizeof(struct vertex_header) - + stage->draw->num_vs_outputs * 4 * sizeof(float); - memcpy(tmp, vert, vsize); - tmp->vertex_id = UNDEFINED_VERTEX_ID; - return tmp; -} - -static INLINE float -dot4(const float *a, const float *b) -{ - float result = (a[0]*b[0] + - a[1]*b[1] + - a[2]*b[2] + - a[3]*b[3]); - - return result; -} - -static INLINE struct vertex_header * -draw_header_from_block(char *block, int size, int num) -{ - return (struct vertex_header*)(block + num * size); -} #endif /* DRAW_PRIVATE_H */ diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 3d2e7bf7b8..f5a3bf390e 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -36,146 +36,53 @@ #include "draw/draw_pt.h" -#if 0 -static boolean too_many_elts( struct draw_context *draw, - unsigned elts ) -{ - return elts > (8 * 1024); -} -#endif - -static INLINE unsigned reduced_prim(unsigned prim) -{ - /*FIXME*/ - return prim; -} -static INLINE boolean good_prim(unsigned prim) -{ - /*FIXME*/ - return FALSE; -} -boolean +/* Overall we split things into: + * - frontend -- prepare fetch_elts, draw_elts - eg vcache + * - middle -- fetch, shade, cliptest, viewport + * - pipeline -- the prim pipeline: clipping, wide lines, etc + * - backend -- the vbuf_render provided by the driver. + */ +static boolean draw_pt_arrays(struct draw_context *draw, unsigned prim, unsigned start, unsigned count) { - const boolean pipeline = draw_need_pipeline(draw, prim); - const boolean cliptest = !draw->rasterizer->bypass_clipping; - const boolean shading = !draw->rasterizer->bypass_vs; struct draw_pt_front_end *frontend = NULL; struct draw_pt_middle_end *middle = NULL; + unsigned opt = 0; - if (!draw->render) - return FALSE; - /*debug_printf("XXXXXXXXXX needs_pipeline = %d\n", pipeline);*/ + if (!draw->render) { + opt |= PT_PIPELINE; + } - /* Overall we do: - * - frontend -- prepare fetch_elts, draw_elts - eg vcache - * - middle -- fetch, shade, cliptest, viewport - * - pipeline -- the prim pipeline: clipping, wide lines, etc - * - backend -- the vbuf_render provided by the driver. - */ + if (draw_need_pipeline(draw, + draw->rasterizer, + prim)) { + opt |= PT_PIPELINE; + } - if (shading && !draw->use_pt_shaders) - return FALSE; + if (!draw->bypass_clipping) { + opt |= PT_CLIPTEST; + } + if (!draw->rasterizer->bypass_vs) { + opt |= PT_SHADE; + } - if (!cliptest && !pipeline && !shading) { - /* This is the 'passthrough' path: - */ - /* Fetch user verts, emit hw verts: - */ + if (opt) + middle = draw->pt.middle.general; + else middle = draw->pt.middle.fetch_emit; - } - else if (!cliptest && !shading) { - /* This is the 'passthrough' path targetting the pipeline backend. - */ - /* Fetch user verts, emit pipeline verts, run pipeline: - */ - middle = draw->pt.middle.fetch_pipeline; - } - else if (!cliptest && !pipeline) { - /* Fetch user verts, run vertex shader, emit hw verts: - */ - middle = draw->pt.middle.fetch_shade_emit; - } - else if (!pipeline) { - /* Even though !pipeline, we have to run it to get clipping. We - * do know that the pipeline is just the clipping operation, but - * that probably doesn't help much. - * - * This is going to be the most important path for a lot of - * swtnl cards. - */ - /* Fetch user verts, - * run vertex shader, - * cliptest and viewport trasform - * if no clipped vertices, - * emit hw verts - * else - * run pipline - */ - middle = draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit; - } - else { - /* This is what we're currently always doing: - */ - /* Fetch user verts, run vertex shader, cliptest, run pipeline - * or - * Fetch user verts, run vertex shader, run pipeline - */ - middle = draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit; - } - /* If !pipeline, need to make sure we respect the driver's limited - * capabilites to receive blocks of vertex data and elements. + /* May create a short-circuited version of this for small primitives: */ -#if 0 - if (!pipeline) { - unsigned vertex_mode = passthrough; - unsigned nr_verts = count_vertices( draw, start, count ); - unsigned hw_prim = prim; - - if (is_elts(draw)) { - frontend = draw->pt.front.vcache; - hw_prim = reduced_prim(prim); - } -#if 0 - if (too_many_verts(nr_verts)) { - /* if (is_verts(draw) && can_split(prim)) { - draw = draw_arrays_split; - } - else */ { - frontend = draw->pt.front.vcache; - hw_prim = reduced_prim(prim); - } - } -#endif - - if (too_many_elts(count)) { - - /* if (is_elts(draw) && can_split(prim)) { - draw = draw_elts_split; - } - else */ { - frontend = draw->pt.front.vcache; - hw_prim = reduced_prim(prim); - } - } - - if (!good_prim(hw_prim)) { - frontend = draw->pt.front.vcache; - } - } -#else frontend = draw->pt.front.vcache; -#endif - frontend->prepare( frontend, prim, middle ); + frontend->prepare( frontend, prim, middle, opt ); frontend->run( frontend, draw_pt_elt_func( draw ), @@ -190,21 +97,16 @@ draw_pt_arrays(struct draw_context *draw, boolean draw_pt_init( struct draw_context *draw ) { - draw->pt.middle.fetch_emit = draw_pt_fetch_emit( draw ); - if (!draw->pt.middle.fetch_emit) - return FALSE; - - draw->pt.middle.fetch_pipeline = draw_pt_fetch_pipeline( draw ); - if (!draw->pt.middle.fetch_pipeline) + draw->pt.front.vcache = draw_pt_vcache( draw ); + if (!draw->pt.front.vcache) return FALSE; - draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit = - draw_pt_fetch_pipeline_or_emit( draw ); - if (!draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit) + draw->pt.middle.fetch_emit = draw_pt_fetch_emit( draw ); + if (!draw->pt.middle.fetch_emit) return FALSE; - draw->pt.front.vcache = draw_pt_vcache( draw ); - if (!draw->pt.front.vcache) + draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw ); + if (!draw->pt.middle.general) return FALSE; return TRUE; @@ -213,24 +115,63 @@ boolean draw_pt_init( struct draw_context *draw ) void draw_pt_destroy( struct draw_context *draw ) { + if (draw->pt.middle.general) { + draw->pt.middle.general->destroy( draw->pt.middle.general ); + draw->pt.middle.general = NULL; + } + if (draw->pt.middle.fetch_emit) { draw->pt.middle.fetch_emit->destroy( draw->pt.middle.fetch_emit ); draw->pt.middle.fetch_emit = NULL; } - if (draw->pt.middle.fetch_pipeline) { - draw->pt.middle.fetch_pipeline->destroy( draw->pt.middle.fetch_pipeline ); - draw->pt.middle.fetch_pipeline = NULL; - } - - if (draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit) { - draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit->destroy( - draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit ); - draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit = NULL; - } - if (draw->pt.front.vcache) { draw->pt.front.vcache->destroy( draw->pt.front.vcache ); draw->pt.front.vcache = NULL; } } + + + +static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES +}; + + +/** + * Draw vertex arrays + * This is the main entrypoint into the drawing module. + * \param prim one of PIPE_PRIM_x + * \param start index of first vertex to draw + * \param count number of vertices to draw + */ +void +draw_arrays(struct draw_context *draw, unsigned prim, + unsigned start, unsigned count) +{ + if (reduced_prim[prim] != draw->reduced_prim) { + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw->reduced_prim = reduced_prim[prim]; + } + + /* drawing done here: */ + draw_pt_arrays(draw, prim, start, count); +} + +boolean draw_pt_get_edgeflag( struct draw_context *draw, + unsigned idx ) +{ + if (draw->pt.user.edgeflag) + return (draw->pt.user.edgeflag[idx/32] & (1 << (idx%32))) != 0; + else + return 1; +} diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 48413b648a..fd0d158fcf 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -50,6 +50,12 @@ struct draw_context; #define DRAW_PT_FLAG_MASK (3<<30) +#define PT_SHADE 0x1 +#define PT_CLIPTEST 0x2 +#define PT_PIPELINE 0x4 +#define PT_MAX_MIDDLE 0x8 + + /* The "front end" - prepare sets of fetch, draw elements for the * middle end. * @@ -64,7 +70,8 @@ struct draw_context; struct draw_pt_front_end { void (*prepare)( struct draw_pt_front_end *, unsigned prim, - struct draw_pt_middle_end * ); + struct draw_pt_middle_end *, + unsigned opt ); void (*run)( struct draw_pt_front_end *, pt_elt_func elt_func, @@ -82,15 +89,11 @@ struct draw_pt_front_end { * Currently two versions of this: * - fetch, vertex shade, cliptest, prim-pipeline * - fetch, emit (ie passthrough) - * Later: - * - fetch, vertex shade, cliptest, maybe-pipeline, maybe-emit - * - fetch, vertex shade, emit - * - * Currenly only using the passthrough version. */ struct draw_pt_middle_end { void (*prepare)( struct draw_pt_middle_end *, - unsigned prim ); + unsigned prim, + unsigned opt ); void (*run)( struct draw_pt_middle_end *, const unsigned *fetch_elts, @@ -104,12 +107,9 @@ struct draw_pt_middle_end { /* The "back end" - supplied by the driver, defined in draw_vbuf.h. - * - * Not sure whether to wrap the prim pipeline up as an alternate - * backend. Would be a win for everything except pure passthrough - * mode... */ struct vbuf_render; +struct vertex_header; /* Helper functions. @@ -118,12 +118,88 @@ pt_elt_func draw_pt_elt_func( struct draw_context *draw ); const void *draw_pt_elt_ptr( struct draw_context *draw, unsigned start ); -/* Implementations: +/* Frontends: + * + * Currently only the general-purpose vcache implementation, could add + * a special case for tiny vertex buffers. */ struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw ); + +/* Middle-ends: + * + * Currently one general-purpose case which can do all possibilities, + * at the slight expense of creating a vertex_header in some cases + * unecessarily. + * + * The special case fetch_emit code avoids pipeline vertices + * altogether and builds hardware vertices directly from API + * vertex_elements. + */ struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ); -struct draw_pt_middle_end *draw_pt_fetch_pipeline( struct draw_context *draw ); struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw); +/* More helpers: + */ +boolean draw_pt_get_edgeflag( struct draw_context *draw, + unsigned idx ); + + +/******************************************************************************* + * HW vertex emit: + */ +struct pt_emit; + +void draw_pt_emit_prepare( struct pt_emit *emit, + unsigned prim ); + +void draw_pt_emit( struct pt_emit *emit, + const float (*vertex_data)[4], + unsigned vertex_count, + unsigned stride, + const ushort *elts, + unsigned count ); + +void draw_pt_emit_destroy( struct pt_emit *emit ); + +struct pt_emit *draw_pt_emit_create( struct draw_context *draw ); + + +/******************************************************************************* + * API vertex fetch: + */ + +struct pt_fetch; +void draw_pt_fetch_prepare( struct pt_fetch *fetch, + unsigned vertex_size ); + +void draw_pt_fetch_run( struct pt_fetch *fetch, + const unsigned *elts, + unsigned count, + char *verts ); + +void draw_pt_fetch_destroy( struct pt_fetch *fetch ); + +struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw ); + +/******************************************************************************* + * Post-VS: cliptest, rhw, viewport + */ +struct pt_post_vs; + +boolean draw_pt_post_vs_run( struct pt_post_vs *pvs, + struct vertex_header *pipeline_verts, + unsigned stride, + unsigned count ); + +void draw_pt_post_vs_prepare( struct pt_post_vs *pvs, + boolean bypass_clipping, + boolean identity_viewport, + boolean opengl ); + +struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw ); + +void draw_pt_post_vs_destroy( struct pt_post_vs *pvs ); + + #endif diff --git a/src/gallium/auxiliary/draw/draw_pt_elts.c b/src/gallium/auxiliary/draw/draw_pt_elts.c index d49770e7b2..2094c081ed 100644 --- a/src/gallium/auxiliary/draw/draw_pt_elts.c +++ b/src/gallium/auxiliary/draw/draw_pt_elts.c @@ -59,7 +59,7 @@ static unsigned elt_vert( const void *elts, unsigned idx ) pt_elt_func draw_pt_elt_func( struct draw_context *draw ) { - switch (draw->user.eltSize) { + switch (draw->pt.user.eltSize) { case 0: return elt_vert; case 1: return elt_ubyte; case 2: return elt_ushort; @@ -71,9 +71,9 @@ pt_elt_func draw_pt_elt_func( struct draw_context *draw ) const void *draw_pt_elt_ptr( struct draw_context *draw, unsigned start ) { - const char *elts = draw->user.elts; + const char *elts = draw->pt.user.elts; - switch (draw->user.eltSize) { + switch (draw->pt.user.eltSize) { case 0: return (const void *)(((const ubyte *)NULL) + start); case 1: diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c new file mode 100644 index 0000000000..d35329aba0 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -0,0 +1,252 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" +#include "translate/translate.h" + +#include "cso_cache/cso_cache.h" +#include "cso_cache/cso_hash.h" + +struct pt_emit { + struct draw_context *draw; + + struct translate *translate; + + struct cso_hash *hash; +}; + +static INLINE unsigned translate_hash_key_size(struct translate_key *key) +{ + unsigned size = sizeof(struct translate_key) - + sizeof(struct translate_element) * (PIPE_MAX_ATTRIBS - key->nr_elements); + return size; +} + +static INLINE unsigned create_key(struct translate_key *key) +{ + unsigned hash_key; + unsigned size = translate_hash_key_size(key); + /*debug_printf("key size = %d, (els = %d)\n", + size, key->nr_elements);*/ + hash_key = cso_construct_key(key, size); + return hash_key; +} + +static struct translate *cached_translate(struct pt_emit *emit, + struct translate_key *key) +{ + unsigned hash_key = create_key(key); + struct cso_hash_iter iter = cso_hash_find(emit->hash, hash_key); + struct translate *translate = 0; + + if (cso_hash_iter_is_null(iter)) { + translate = translate_create(key); + cso_hash_insert(emit->hash, hash_key, translate); + /*debug_printf("\tCREATED with %d\n", hash_key);*/ + } else { + translate = cso_hash_iter_data(iter); + /*debug_printf("\tOK with %d\n", hash_key);*/ + } + + return translate; +} + + +static INLINE void delete_translates(struct pt_emit *emit) +{ + struct cso_hash *hash = emit->hash; + struct cso_hash_iter iter = cso_hash_first_node(hash); + while (!cso_hash_iter_is_null(iter)) { + struct translate *state = (struct translate*)cso_hash_iter_data(iter); + iter = cso_hash_iter_next(iter); + if (state) { + state->release(state); + } + } +} + +void draw_pt_emit_prepare( struct pt_emit *emit, + unsigned prim ) +{ + struct draw_context *draw = emit->draw; + const struct vertex_info *vinfo; + unsigned dst_offset; + struct translate_key hw_key; + unsigned i; + boolean ok; + + ok = draw->render->set_primitive(draw->render, prim); + if (!ok) { + assert(0); + return; + } + + /* Must do this after set_primitive() above: + */ + vinfo = draw->render->get_vertex_info(draw->render); + + + /* Translate from pipeline vertices to hw vertices. + */ + dst_offset = 0; + for (i = 0; i < vinfo->num_attribs; i++) { + unsigned emit_sz = 0; + unsigned src_buffer = 0; + unsigned output_format; + unsigned src_offset = (vinfo->src_index[i] * 4 * sizeof(float) ); + + + + switch (vinfo->emit[i]) { + case EMIT_4F: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + emit_sz = 4 * sizeof(float); + break; + case EMIT_3F: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + emit_sz = 3 * sizeof(float); + break; + case EMIT_2F: + output_format = PIPE_FORMAT_R32G32_FLOAT; + emit_sz = 2 * sizeof(float); + break; + case EMIT_1F: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + break; + case EMIT_1F_PSIZE: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + src_buffer = 1; + src_offset = 0; + break; + case EMIT_4UB: + output_format = PIPE_FORMAT_B8G8R8A8_UNORM; + emit_sz = 4 * sizeof(ubyte); + default: + assert(0); + output_format = PIPE_FORMAT_NONE; + emit_sz = 0; + break; + } + + hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + hw_key.element[i].input_buffer = src_buffer; + hw_key.element[i].input_offset = src_offset; + hw_key.element[i].output_format = output_format; + hw_key.element[i].output_offset = dst_offset; + + dst_offset += emit_sz; + } + + hw_key.nr_elements = vinfo->num_attribs; + hw_key.output_stride = vinfo->size * 4; + + /* Don't bother with caching at this stage: + */ + if (!emit->translate || + memcmp(&emit->translate->key, &hw_key, sizeof(hw_key)) != 0) + { + emit->translate = cached_translate(emit, &hw_key); + } +} + + +void draw_pt_emit( struct pt_emit *emit, + const float (*vertex_data)[4], + unsigned vertex_count, + unsigned stride, + const ushort *elts, + unsigned count ) +{ + struct draw_context *draw = emit->draw; + struct translate *translate = emit->translate; + struct vbuf_render *render = draw->render; + void *hw_verts; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + hw_verts = render->allocate_vertices(render, + (ushort)translate->key.output_stride, + (ushort)count); + if (!hw_verts) { + assert(0); + return; + } + + translate->set_buffer(translate, + 0, + vertex_data, + stride ); + + translate->set_buffer(translate, + 1, + &draw->rasterizer->point_size, + 0); + + translate->run( translate, + 0, + vertex_count, + hw_verts ); + + render->draw(render, + elts, + count); + + render->release_vertices(render, + hw_verts, + translate->key.output_stride, + vertex_count); +} + + +struct pt_emit *draw_pt_emit_create( struct draw_context *draw ) +{ + struct pt_emit *emit = CALLOC_STRUCT(pt_emit); + if (!emit) + return NULL; + + emit->draw = draw; + emit->hash = cso_hash_create(); + + return emit; +} + +void draw_pt_emit_destroy( struct pt_emit *emit ) +{ + delete_translates(emit); + cso_hash_delete(emit->hash); + + FREE(emit); +} diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c new file mode 100644 index 0000000000..93da811ed8 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -0,0 +1,223 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" +#include "translate/translate.h" + +#include "cso_cache/cso_cache.h" +#include "cso_cache/cso_hash.h" + +struct pt_fetch { + struct draw_context *draw; + + struct translate *translate; + + unsigned vertex_size; + + struct cso_hash *hash; +}; + +static INLINE unsigned translate_hash_key_size(struct translate_key *key) +{ + unsigned size = sizeof(struct translate_key) - + sizeof(struct translate_element) * (PIPE_MAX_ATTRIBS - key->nr_elements); + return size; +} + +static INLINE unsigned create_key(struct translate_key *key) +{ + unsigned hash_key; + unsigned size = translate_hash_key_size(key); + /*debug_printf("key size = %d, (els = %d)\n", + size, key->nr_elements);*/ + hash_key = cso_construct_key(key, size); + return hash_key; +} + +static struct translate *cached_translate(struct pt_fetch *fetch, + struct translate_key *key) +{ + unsigned hash_key = create_key(key); + struct cso_hash_iter iter = cso_hash_find(fetch->hash, hash_key); + struct translate *translate = 0; + + if (cso_hash_iter_is_null(iter)) { + translate = translate_create(key); + cso_hash_insert(fetch->hash, hash_key, translate); + /*debug_printf("\tCREATED with %d\n", hash_key);*/ + } else { + translate = cso_hash_iter_data(iter); + /*debug_printf("\tOK with %d\n", hash_key);*/ + } + + return translate; +} + +static INLINE void delete_translates(struct pt_fetch *fetch) +{ + struct cso_hash *hash = fetch->hash; + struct cso_hash_iter iter = cso_hash_first_node(hash); + while (!cso_hash_iter_is_null(iter)) { + struct translate *state = (struct translate*)cso_hash_iter_data(iter); + iter = cso_hash_iter_next(iter); + if (state) { + state->release(state); + } + } +} + +/* Perform the fetch from API vertex elements & vertex buffers, to a + * contiguous set of float[4] attributes as required for the + * vertex_shader->run_linear() method. + * + * This is used in all cases except pure passthrough + * (draw_pt_fetch_emit.c) which has its own version to translate + * directly to hw vertices. + * + */ +void draw_pt_fetch_prepare( struct pt_fetch *fetch, + unsigned vertex_size ) +{ + struct draw_context *draw = fetch->draw; + unsigned i, nr = 0; + unsigned dst_offset = 0; + struct translate_key key; + + fetch->vertex_size = vertex_size; + + memset(&key, 0, sizeof(key)); + + /* Always emit/leave space for a vertex header. + * + * It's worth considering whether the vertex headers should contain + * a pointer to the 'data', rather than having it inline. + * Something to look at after we've fully switched over to the pt + * paths. + */ + { + /* Need to set header->vertex_id = 0xffff somehow. + */ + key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT; + key.element[nr].input_buffer = draw->pt.nr_vertex_buffers; + key.element[nr].input_offset = 0; + key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT; + key.element[nr].output_offset = dst_offset; + dst_offset += 1 * sizeof(float); + nr++; + + + /* Just leave the clip[] array untouched. + */ + dst_offset += 4 * sizeof(float); + } + + + for (i = 0; i < draw->pt.nr_vertex_elements; i++) { + key.element[nr].input_format = draw->pt.vertex_element[i].src_format; + key.element[nr].input_buffer = draw->pt.vertex_element[i].vertex_buffer_index; + key.element[nr].input_offset = draw->pt.vertex_element[i].src_offset; + key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + key.element[nr].output_offset = dst_offset; + + dst_offset += 4 * sizeof(float); + nr++; + } + + assert(dst_offset <= vertex_size); + + key.nr_elements = nr; + key.output_stride = vertex_size; + + + /* Don't bother with caching at this stage: + */ + if (!fetch->translate || + memcmp(&fetch->translate->key, &key, sizeof(key)) != 0) + { + fetch->translate = cached_translate(fetch, &key); + + { + static struct vertex_header vh = { 0, 0, 0, 0xffff }; + fetch->translate->set_buffer(fetch->translate, + draw->pt.nr_vertex_buffers, + &vh, + 0); + } + } +} + + + + +void draw_pt_fetch_run( struct pt_fetch *fetch, + const unsigned *elts, + unsigned count, + char *verts ) +{ + struct draw_context *draw = fetch->draw; + struct translate *translate = fetch->translate; + unsigned i; + + for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { + translate->set_buffer(translate, + i, + ((char *)draw->pt.user.vbuffer[i] + + draw->pt.vertex_buffer[i].buffer_offset), + draw->pt.vertex_buffer[i].pitch ); + } + + translate->run_elts( translate, + elts, + count, + verts ); +} + + +struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw ) +{ + struct pt_fetch *fetch = CALLOC_STRUCT(pt_fetch); + if (!fetch) + return NULL; + + fetch->draw = draw; + fetch->hash = cso_hash_create(); + return fetch; +} + +void draw_pt_fetch_destroy( struct pt_fetch *fetch ) +{ + delete_translates(fetch); + cso_hash_delete(fetch->hash); + + FREE(fetch); +} + diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 3a26a5d712..68b2c5b1e3 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -36,6 +36,7 @@ #include "draw/draw_vbuf.h" #include "draw/draw_vertex.h" #include "draw/draw_pt.h" +#include "translate/translate.h" /* The simplest 'middle end' in the new vertex code. * @@ -72,105 +73,29 @@ struct fetch_emit_middle_end { struct draw_pt_middle_end base; struct draw_context *draw; - - struct { - const ubyte *ptr; - unsigned pitch; - void (*fetch)( const void *from, float *attrib); - void (*emit)( const float *attrib, float **out ); - } fetch[PIPE_MAX_ATTRIBS]; - unsigned nr_fetch; - unsigned hw_vertex_size; -}; - - - -static void fetch_R32_FLOAT( const void *from, - float *attrib ) -{ - float *f = (float *) from; - attrib[0] = f[0]; - attrib[1] = 0.0; - attrib[2] = 0.0; - attrib[3] = 1.0; -} - - -static void emit_R32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out) += 1; -} + struct translate *translate; + const struct vertex_info *vinfo; -static void emit_R32G32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out)[1] = attrib[1]; - (*out) += 2; -} + /* Cache point size somewhere it's address won't change: + */ + float point_size; -static void emit_R32G32B32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out)[1] = attrib[1]; - (*out)[2] = attrib[2]; - (*out) += 3; -} +}; -static void emit_R32G32B32A32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out)[1] = attrib[1]; - (*out)[2] = attrib[2]; - (*out)[3] = attrib[3]; - (*out) += 4; -} - - -/** - * General-purpose fetch from user's vertex arrays, emit to driver's - * vertex buffer. - * - * XXX this is totally temporary. - */ -static void -fetch_store_general( struct fetch_emit_middle_end *feme, - void *out_ptr, - const unsigned *fetch_elts, - unsigned count ) -{ - float *out = (float *)out_ptr; - uint i, j; - - for (i = 0; i < count; i++) { - unsigned elt = fetch_elts[i] & ~DRAW_PT_FLAG_MASK; - - for (j = 0; j < feme->nr_fetch; j++) { - float attrib[4]; - const ubyte *from = (feme->fetch[j].ptr + - feme->fetch[j].pitch * elt); - - feme->fetch[j].fetch( from, attrib ); - feme->fetch[j].emit( attrib, &out ); - } - } -} static void fetch_emit_prepare( struct draw_pt_middle_end *middle, - unsigned prim ) + unsigned prim, + unsigned opt ) { struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; struct draw_context *draw = feme->draw; const struct vertex_info *vinfo; - unsigned i; + unsigned i, dst_offset; boolean ok; + struct translate_key key; ok = draw->render->set_primitive( draw->render, @@ -182,49 +107,93 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, /* Must do this after set_primitive() above: */ - vinfo = draw->render->get_vertex_info(draw->render); + vinfo = feme->vinfo = draw->render->get_vertex_info(draw->render); + + - for (i = 0; i < vinfo->num_attribs; i++) { - unsigned src_element = vinfo->src_index[i]; - unsigned src_buffer = draw->vertex_element[src_element].vertex_buffer_index; - - feme->fetch[i].ptr = ((const ubyte *)draw->user.vbuffer[src_buffer] + - draw->vertex_buffer[src_buffer].buffer_offset + - draw->vertex_element[src_element].src_offset); + /* Transform from API vertices to HW vertices, skipping the + * pipeline_vertex intermediate step. + */ + dst_offset = 0; + memset(&key, 0, sizeof(key)); - feme->fetch[i].pitch = draw->vertex_buffer[src_buffer].pitch; - - feme->fetch[i].fetch = draw_get_fetch_func(draw->vertex_element[src_element].src_format); + for (i = 0; i < vinfo->num_attribs; i++) { + const struct pipe_vertex_element *src = &draw->pt.vertex_element[vinfo->src_index[i]]; + unsigned emit_sz = 0; + unsigned input_format = src->src_format; + unsigned input_buffer = src->vertex_buffer_index; + unsigned input_offset = src->src_offset; + unsigned output_format; switch (vinfo->emit[i]) { case EMIT_4F: - feme->fetch[i].emit = emit_R32G32B32A32_FLOAT; + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + emit_sz = 4 * sizeof(float); break; case EMIT_3F: - feme->fetch[i].emit = emit_R32G32B32_FLOAT; + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + emit_sz = 3 * sizeof(float); break; case EMIT_2F: - feme->fetch[i].emit = emit_R32G32_FLOAT; + output_format = PIPE_FORMAT_R32G32_FLOAT; + emit_sz = 2 * sizeof(float); break; case EMIT_1F: - feme->fetch[i].emit = emit_R32_FLOAT; + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); break; case EMIT_1F_PSIZE: - feme->fetch[i].ptr = (const ubyte *)&feme->draw->rasterizer->point_size; - feme->fetch[i].pitch = 0; - feme->fetch[i].fetch = fetch_R32_FLOAT; - feme->fetch[i].emit = emit_R32_FLOAT; + input_format = PIPE_FORMAT_R32_FLOAT; + input_buffer = draw->pt.nr_vertex_buffers; + input_offset = 0; + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); break; default: assert(0); - feme->fetch[i].emit = NULL; - break; + output_format = PIPE_FORMAT_NONE; + emit_sz = 0; + continue; } + + key.element[i].input_format = input_format; + key.element[i].input_buffer = input_buffer; + key.element[i].input_offset = input_offset; + key.element[i].output_format = output_format; + key.element[i].output_offset = dst_offset; + + dst_offset += emit_sz; } - feme->nr_fetch = vinfo->num_attribs; - feme->hw_vertex_size = vinfo->size * 4; + key.nr_elements = vinfo->num_attribs; + key.output_stride = vinfo->size * 4; + + /* Don't bother with caching at this stage: + */ + if (!feme->translate || + memcmp(&feme->translate->key, &key, sizeof(key)) != 0) + { + if (feme->translate) + feme->translate->release(feme->translate); + + feme->translate = translate_create( &key ); + + feme->translate->set_buffer(feme->translate, + draw->pt.nr_vertex_buffers, + &feme->point_size, + 0); + } + + feme->point_size = draw->rasterizer->point_size; + + for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { + feme->translate->set_buffer(feme->translate, + i, + ((char *)draw->pt.user.vbuffer[i] + + draw->pt.vertex_buffer[i].buffer_offset), + draw->pt.vertex_buffer[i].pitch ); + } } @@ -246,7 +215,7 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, draw_do_flush( draw, DRAW_FLUSH_BACKEND ); hw_verts = draw->render->allocate_vertices( draw->render, - (ushort)feme->hw_vertex_size, + (ushort)feme->translate->key.output_stride, (ushort)fetch_count ); if (!hw_verts) { assert(0); @@ -256,10 +225,19 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, /* Single routine to fetch vertices and emit HW verts. */ - fetch_store_general( feme, - hw_verts, - fetch_elts, - fetch_count ); + feme->translate->run_elts( feme->translate, + fetch_elts, + fetch_count, + hw_verts ); + + if (0) { + unsigned i; + for (i = 0; i < fetch_count; i++) { + debug_printf("\n\nvertex %d:\n", i); + draw_dump_emitted_vertex( feme->vinfo, + (const uint8_t *)hw_verts + feme->vinfo->size * 4 * i ); + } + } /* XXX: Draw arrays path to avoid re-emitting index list again and * again. @@ -272,7 +250,7 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, */ draw->render->release_vertices( draw->render, hw_verts, - feme->hw_vertex_size, + feme->translate->key.output_stride, fetch_count ); } @@ -286,6 +264,11 @@ static void fetch_emit_finish( struct draw_pt_middle_end *middle ) static void fetch_emit_destroy( struct draw_pt_middle_end *middle ) { + struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; + + if (feme->translate) + feme->translate->release( feme->translate ); + FREE(middle); } @@ -293,6 +276,8 @@ static void fetch_emit_destroy( struct draw_pt_middle_end *middle ) struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ) { struct fetch_emit_middle_end *fetch_emit = CALLOC_STRUCT( fetch_emit_middle_end ); + if (fetch_emit == NULL) + return NULL; fetch_emit->base.prepare = fetch_emit_prepare; fetch_emit->base.run = fetch_emit_run; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c deleted file mode 100644 index a70d129c93..0000000000 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c +++ /dev/null @@ -1,326 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "pipe/p_util.h" -#include "draw/draw_context.h" -#include "draw/draw_private.h" -#include "draw/draw_vertex.h" -#include "draw/draw_pt.h" - -/* The simplest 'middle end' in the new vertex code. - * - * The responsibilities of a middle end are to: - * - perform vertex fetch using - * - draw vertex element/buffer state - * - a list of fetch indices we received as an input - * - run the vertex shader - * - cliptest, - * - clip coord calculation - * - viewport transformation - * - if necessary, run the primitive pipeline, passing it: - * - a linear array of vertex_header vertices constructed here - * - a set of draw indices we received as an input - * - otherwise, drive the hw backend, - * - allocate space for hardware format vertices - * - translate the vertex-shader output vertices to hw format - * - calling the backend draw functions. - * - * For convenience, we provide a helper function to drive the hardware - * backend given similar inputs to those required to run the pipeline. - * - * In the case of passthrough mode, many of these actions are disabled - * or noops, so we end up doing: - * - * - perform vertex fetch - * - drive the hw backend - * - * IE, basically just vertex fetch to post-vs-format vertices, - * followed by a call to the backend helper function. - */ - - -struct fetch_pipeline_middle_end { - struct draw_pt_middle_end base; - struct draw_context *draw; - - void (*header)( unsigned idx, float **out); - - struct { - const ubyte *ptr; - unsigned pitch; - void (*fetch)( const void *from, float *attrib); - void (*emit)( const float *attrib, float **out ); - } fetch[PIPE_MAX_ATTRIBS]; - - unsigned nr_fetch; - unsigned pipeline_vertex_size; - unsigned prim; -}; - - -#if 0 -static void emit_R32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out) += 1; -} - -static void emit_R32G32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out)[1] = attrib[1]; - (*out) += 2; -} - -static void emit_R32G32B32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out)[1] = attrib[1]; - (*out)[2] = attrib[2]; - (*out) += 3; -} -#endif -static void emit_R32G32B32A32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out)[1] = attrib[1]; - (*out)[2] = attrib[2]; - (*out)[3] = attrib[3]; - (*out) += 4; -} - -static void header( unsigned idx, - float **out ) -{ - struct vertex_header *header = (struct vertex_header *) (*out); - - header->clipmask = 0; - header->edgeflag = 1; - header->pad = 0; - header->vertex_id = UNDEFINED_VERTEX_ID; - - (*out)[1] = 0; - (*out)[2] = 0; - (*out)[3] = 0; - (*out)[3] = 1; - (*out) += 5; -} - - -static void header_ef( unsigned idx, - float **out ) -{ - struct vertex_header *header = (struct vertex_header *) (*out); - - /* XXX: need a reset_stipple flag in the vertex header too? - */ - header->clipmask = 0; - header->edgeflag = (idx & DRAW_PT_EDGEFLAG) != 0; - header->pad = 0; - header->vertex_id = UNDEFINED_VERTEX_ID; - - (*out)[1] = 0; - (*out)[2] = 0; - (*out)[3] = 0; - (*out)[3] = 1; - (*out) += 5; -} - - -/** - * General-purpose fetch from user's vertex arrays, emit to driver's - * vertex buffer. - * - * XXX this is totally temporary. - */ -static void -fetch_store_general( struct fetch_pipeline_middle_end *fpme, - void *out_ptr, - const unsigned *fetch_elts, - unsigned count ) -{ - float *out = (float *)out_ptr; - uint i, j; - - for (i = 0; i < count; i++) { - unsigned elt = fetch_elts[i]; - - fpme->header( elt, &out ); - elt &= ~DRAW_PT_FLAG_MASK; - - for (j = 0; j < fpme->nr_fetch; j++) { - float attrib[4]; - const ubyte *from = (fpme->fetch[j].ptr + - fpme->fetch[j].pitch * elt); - - fpme->fetch[j].fetch( from, attrib ); - fpme->fetch[j].emit( attrib, &out ); - } - } -} - - -/* We aren't running a vertex shader, but are running the pipeline. - * That means the vertices we need to build look like: - * - * dw0: vertex header (zero?) - * dw1: clip coord 0 - * dw2: clip coord 1 - * dw3: clip coord 2 - * dw4: clip coord 4 - * dw5: screen coord 0 - * dw6: screen coord 0 - * dw7: screen coord 0 - * dw8: screen coord 0 - * dw9: other attribs... - * - */ -static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, - unsigned prim ) -{ - struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; - struct draw_context *draw = fpme->draw; - unsigned i, nr = 0; - - fpme->prim = prim; - - /* Emit the vertex header and empty clipspace coord field: - */ - if (draw->user.edgeflag) { - fpme->header = header_ef; - } - else { - fpme->header = header; - } - - - /* Need to look at vertex shader inputs (we know it is a - * passthrough shader, so these define the outputs too). If we - * were running a shader, we'd still be looking at the inputs at - * this point. - */ - for (i = 0; i < draw->vertex_shader->info.num_inputs; i++) { - unsigned buf = draw->vertex_element[i].vertex_buffer_index; - enum pipe_format format = draw->vertex_element[i].src_format; - - fpme->fetch[nr].ptr = ((const ubyte *) draw->user.vbuffer[buf] + - draw->vertex_buffer[buf].buffer_offset + - draw->vertex_element[i].src_offset); - - fpme->fetch[nr].pitch = draw->vertex_buffer[buf].pitch; - fpme->fetch[nr].fetch = draw_get_fetch_func( format ); - - /* Always do this -- somewhat redundant... - */ - fpme->fetch[nr].emit = emit_R32G32B32A32_FLOAT; - nr++; - } - - fpme->nr_fetch = nr; - fpme->pipeline_vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float); -} - - - - -static void fetch_pipeline_run( struct draw_pt_middle_end *middle, - const unsigned *fetch_elts, - unsigned fetch_count, - const ushort *draw_elts, - unsigned draw_count ) -{ - struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; - char *pipeline_verts; - - pipeline_verts = MALLOC( fpme->pipeline_vertex_size * - fetch_count ); - if (!pipeline_verts) { - assert(0); - return; - } - - - /* Single routine to fetch vertices and emit pipeline verts. - */ - fetch_store_general( fpme, - pipeline_verts, - fetch_elts, - fetch_count ); - - - /* Run the pipeline - */ - draw_pt_run_pipeline( fpme->draw, - fpme->prim, - pipeline_verts, - fpme->pipeline_vertex_size, - fetch_count, - draw_elts, - draw_count ); - - - /* Done -- that was easy, wasn't it: - */ - FREE( pipeline_verts ); -} - - - -static void fetch_pipeline_finish( struct draw_pt_middle_end *middle ) -{ - /* nothing to do */ -} - -static void fetch_pipeline_destroy( struct draw_pt_middle_end *middle ) -{ - FREE(middle); -} - - -struct draw_pt_middle_end *draw_pt_fetch_pipeline( struct draw_context *draw ) -{ - struct fetch_pipeline_middle_end *fetch_pipeline = CALLOC_STRUCT( fetch_pipeline_middle_end ); - - fetch_pipeline->base.prepare = fetch_pipeline_prepare; - fetch_pipeline->base.run = fetch_pipeline_run; - fetch_pipeline->base.finish = fetch_pipeline_finish; - fetch_pipeline->base.destroy = fetch_pipeline_destroy; - - fetch_pipeline->draw = draw; - - return &fetch_pipeline->base; -} - diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 04b3d2c4cf..f0763dad8d 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -27,109 +27,73 @@ #include "pipe/p_util.h" #include "draw/draw_context.h" -#include "draw/draw_private.h" #include "draw/draw_vbuf.h" #include "draw/draw_vertex.h" #include "draw/draw_pt.h" +#include "draw/draw_vs.h" +#include "translate/translate.h" + struct fetch_pipeline_middle_end { struct draw_pt_middle_end base; struct draw_context *draw; - struct { - const ubyte *ptr; - unsigned pitch; - void (*fetch)( const void *from, float *attrib); - void (*emit)( const float *attrib, float **out ); - } fetch[PIPE_MAX_ATTRIBS]; + struct pt_emit *emit; + struct pt_fetch *fetch; + struct pt_post_vs *post_vs; - unsigned nr_fetch; - unsigned pipeline_vertex_size; - unsigned hw_vertex_size; + unsigned vertex_data_offset; + unsigned vertex_size; unsigned prim; + unsigned opt; }; -#if 0 -static void emit_R32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out) += 1; -} - -static void emit_R32G32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out)[1] = attrib[1]; - (*out) += 2; -} - -static void emit_R32G32B32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out)[1] = attrib[1]; - (*out)[2] = attrib[2]; - (*out) += 3; -} -#endif -static void emit_R32G32B32A32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out)[1] = attrib[1]; - (*out)[2] = attrib[2]; - (*out)[3] = attrib[3]; - (*out) += 4; -} static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, - unsigned prim ) + unsigned prim, + unsigned opt ) { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; - unsigned i, nr = 0; - boolean ok; - const struct vertex_info *vinfo; + struct draw_vertex_shader *vs = draw->vertex_shader; + + /* Add one to num_outputs because the pipeline occasionally tags on + * an additional texcoord, eg for AA lines. + */ + unsigned nr = MAX2( vs->info.num_inputs, + vs->info.num_outputs + 1 ); fpme->prim = prim; + fpme->opt = opt; - ok = draw->render->set_primitive(draw->render, prim); - if (!ok) { - assert(0); - return; - } - /* Must do this after set_primitive() above: + /* Always leave room for the vertex header whether we need it or + * not. It's hard to get rid of it in particular because of the + * viewport code in draw_pt_post_vs.c. */ - vinfo = draw->render->get_vertex_info(draw->render); + fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float); - /* Need to look at vertex shader inputs (we know it is a - * passthrough shader, so these define the outputs too). If we - * were running a shader, we'd still be looking at the inputs at - * this point. - */ - for (i = 0; i < draw->vertex_shader->info.num_inputs; i++) { - unsigned buf = draw->vertex_element[i].vertex_buffer_index; - enum pipe_format format = draw->vertex_element[i].src_format; + - fpme->fetch[nr].ptr = ((const ubyte *) draw->user.vbuffer[buf] + - draw->vertex_buffer[buf].buffer_offset + - draw->vertex_element[i].src_offset); + draw_pt_fetch_prepare( fpme->fetch, + fpme->vertex_size ); - fpme->fetch[nr].pitch = draw->vertex_buffer[buf].pitch; - fpme->fetch[nr].fetch = draw_get_fetch_func( format ); + /* XXX: it's not really gl rasterization rules we care about here, + * but gl vs dx9 clip spaces. + */ + draw_pt_post_vs_prepare( fpme->post_vs, + draw->bypass_clipping, + draw->identity_viewport, + draw->rasterizer->gl_rasterization_rules ); + - /* Always do this -- somewhat redundant... - */ - fpme->fetch[nr].emit = emit_R32G32B32A32_FLOAT; - nr++; - } + if (!(opt & PT_PIPELINE)) + draw_pt_emit_prepare( fpme->emit, + prim ); + + /* No need to prepare the shader. + */ + vs->prepare(vs, draw); - fpme->nr_fetch = nr; - //fpme->pipeline_vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float); - fpme->pipeline_vertex_size = MAX_VERTEX_ALLOCATION; - fpme->hw_vertex_size = vinfo->size * 4; } @@ -144,71 +108,67 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; struct draw_vertex_shader *shader = draw->vertex_shader; - char *pipeline_verts; + unsigned opt = fpme->opt; - pipeline_verts = MALLOC(fpme->pipeline_vertex_size * - fetch_count); + struct vertex_header *pipeline_verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * fetch_count); if (!pipeline_verts) { + /* Not much we can do here - just skip the rendering. + */ assert(0); return; } + /* Fetch into our vertex buffer + */ + draw_pt_fetch_run( fpme->fetch, + fetch_elts, + fetch_count, + (char *)pipeline_verts ); + + /* Run the shader, note that this overwrites the data[] parts of + * the pipeline verts. If there is no shader, ie a bypass shader, + * then the inputs == outputs, and are already in the correct + * place. + */ + if (opt & PT_SHADE) + { + shader->run_linear(shader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + (const float (*)[4])draw->pt.user.constants, + fetch_count, + fpme->vertex_size, + fpme->vertex_size); + } + + if (draw_pt_post_vs_run( fpme->post_vs, + pipeline_verts, + fetch_count, + fpme->vertex_size )) + { + opt |= PT_PIPELINE; + } - /* Shade + /* Do we need to run the pipeline? */ - shader->prepare(shader, draw); - if (shader->run(shader, draw, fetch_elts, fetch_count, pipeline_verts, - fpme->pipeline_vertex_size)) { - /* Run the pipeline */ - draw_pt_run_pipeline( fpme->draw, - fpme->prim, - pipeline_verts, - fpme->pipeline_vertex_size, - fetch_count, - draw_elts, - draw_count ); - } else { - unsigned i, j; - void *hw_verts; - float *out; - - /* XXX: need to flush to get prim_vbuf.c to release its allocation?? - */ - draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - - hw_verts = draw->render->allocate_vertices(draw->render, - (ushort)fpme->hw_vertex_size, - (ushort)fetch_count); - if (!hw_verts) { - assert(0); - return; - } - - out = (float *)hw_verts; - for (i = 0; i < fetch_count; i++) { - struct vertex_header *header = - (struct vertex_header*)(pipeline_verts + (fpme->pipeline_vertex_size * i)); - - for (j = 0; j < fpme->nr_fetch; j++) { - float *attrib = header->data[j]; - /*debug_printf("emiting [%f, %f, %f, %f]\n", - attrib[0], attrib[1], - attrib[2], attrib[3]);*/ - fpme->fetch[j].emit(attrib, &out); - } - } - /* XXX: Draw arrays path to avoid re-emitting index list again and - * again. - */ - draw->render->draw(draw->render, + if (opt & PT_PIPELINE) { + draw_pipeline_run( fpme->draw, + fpme->prim, + pipeline_verts, + fetch_count, + fpme->vertex_size, draw_elts, - draw_count); - - draw->render->release_vertices(draw->render, - hw_verts, - fpme->hw_vertex_size, - fetch_count); + draw_count ); + } + else { + draw_pt_emit( fpme->emit, + (const float (*)[4])pipeline_verts->data, + fetch_count, + fpme->vertex_size, + draw_elts, + draw_count ); } @@ -224,20 +184,51 @@ static void fetch_pipeline_finish( struct draw_pt_middle_end *middle ) static void fetch_pipeline_destroy( struct draw_pt_middle_end *middle ) { + struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + + if (fpme->fetch) + draw_pt_fetch_destroy( fpme->fetch ); + + if (fpme->emit) + draw_pt_emit_destroy( fpme->emit ); + + if (fpme->post_vs) + draw_pt_post_vs_destroy( fpme->post_vs ); + FREE(middle); } struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context *draw ) { - struct fetch_pipeline_middle_end *fetch_pipeline = CALLOC_STRUCT( fetch_pipeline_middle_end ); + struct fetch_pipeline_middle_end *fpme = CALLOC_STRUCT( fetch_pipeline_middle_end ); + if (!fpme) + goto fail; + + fpme->base.prepare = fetch_pipeline_prepare; + fpme->base.run = fetch_pipeline_run; + fpme->base.finish = fetch_pipeline_finish; + fpme->base.destroy = fetch_pipeline_destroy; + + fpme->draw = draw; + + fpme->fetch = draw_pt_fetch_create( draw ); + if (!fpme->fetch) + goto fail; + + fpme->post_vs = draw_pt_post_vs_create( draw ); + if (!fpme->post_vs) + goto fail; + + fpme->emit = draw_pt_emit_create( draw ); + if (!fpme->emit) + goto fail; - fetch_pipeline->base.prepare = fetch_pipeline_prepare; - fetch_pipeline->base.run = fetch_pipeline_run; - fetch_pipeline->base.finish = fetch_pipeline_finish; - fetch_pipeline->base.destroy = fetch_pipeline_destroy; + return &fpme->base; - fetch_pipeline->draw = draw; + fail: + if (fpme) + fetch_pipeline_destroy( &fpme->base ); - return &fetch_pipeline->base; + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c new file mode 100644 index 0000000000..f98e130ed6 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -0,0 +1,215 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "pipe/p_context.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" + +struct pt_post_vs { + struct draw_context *draw; + + boolean (*run)( struct pt_post_vs *pvs, + struct vertex_header *vertices, + unsigned count, + unsigned stride ); +}; + + + +static INLINE float +dot4(const float *a, const float *b) +{ + return (a[0]*b[0] + + a[1]*b[1] + + a[2]*b[2] + + a[3]*b[3]); +} + + + +static INLINE unsigned +compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr) +{ + unsigned mask = 0x0; + unsigned i; + + /* Do the hardwired planes first: + */ + if (-clip[0] + clip[3] < 0) mask |= (1<<0); + if ( clip[0] + clip[3] < 0) mask |= (1<<1); + if (-clip[1] + clip[3] < 0) mask |= (1<<2); + if ( clip[1] + clip[3] < 0) mask |= (1<<3); + if ( clip[2] + clip[3] < 0) mask |= (1<<4); /* match mesa clipplane numbering - for now */ + if (-clip[2] + clip[3] < 0) mask |= (1<<5); /* match mesa clipplane numbering - for now */ + + /* Followed by any remaining ones: + */ + for (i = 6; i < nr; i++) { + if (dot4(clip, plane[i]) < 0) + mask |= (1<<i); + } + + return mask; +} + + +/* The normal case - cliptest, rhw divide, viewport transform. + * + * Also handle identity viewport here at the expense of a few wasted + * instructions + */ +static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, + struct vertex_header *vertices, + unsigned count, + unsigned stride ) +{ + struct vertex_header *out = vertices; + const float *scale = pvs->draw->viewport.scale; + const float *trans = pvs->draw->viewport.translate; + unsigned clipped = 0; + unsigned j; + + if (0) debug_printf("%s\n"); + + for (j = 0; j < count; j++) { + out->clip[0] = out->data[0][0]; + out->clip[1] = out->data[0][1]; + out->clip[2] = out->data[0][2]; + out->clip[3] = out->data[0][3]; + + out->vertex_id = 0xffff; + out->edgeflag = 1; + out->clipmask = compute_clipmask_gl(out->clip, + pvs->draw->plane, + pvs->draw->nr_planes); + clipped += out->clipmask; + + if (out->clipmask == 0) + { + /* divide by w */ + float w = 1.0f / out->data[0][3]; + + /* Viewport mapping */ + out->data[0][0] = out->data[0][0] * w * scale[0] + trans[0]; + out->data[0][1] = out->data[0][1] * w * scale[1] + trans[1]; + out->data[0][2] = out->data[0][2] * w * scale[2] + trans[2]; + out->data[0][3] = w; + } + + out = (struct vertex_header *)( (char *)out + stride ); + } + + return clipped != 0; +} + + + +/* If bypass_clipping is set, skip cliptest and rhw divide. + */ +static boolean post_vs_viewport( struct pt_post_vs *pvs, + struct vertex_header *vertices, + unsigned count, + unsigned stride ) +{ + struct vertex_header *out = vertices; + const float *scale = pvs->draw->viewport.scale; + const float *trans = pvs->draw->viewport.translate; + unsigned j; + + if (0) debug_printf("%s\n", __FUNCTION__); + for (j = 0; j < count; j++) { + /* Viewport mapping only, no cliptest/rhw divide + */ + out->data[0][0] = out->data[0][0] * scale[0] + trans[0]; + out->data[0][1] = out->data[0][1] * scale[1] + trans[1]; + out->data[0][2] = out->data[0][2] * scale[2] + trans[2]; + + out = (struct vertex_header *)((char *)out + stride); + } + + return FALSE; +} + + +/* If bypass_clipping is set and we have an identity viewport, nothing + * to do. + */ +static boolean post_vs_none( struct pt_post_vs *pvs, + struct vertex_header *vertices, + unsigned count, + unsigned stride ) +{ + if (0) debug_printf("%s\n", __FUNCTION__); + return FALSE; +} + +boolean draw_pt_post_vs_run( struct pt_post_vs *pvs, + struct vertex_header *pipeline_verts, + unsigned count, + unsigned stride ) +{ + return pvs->run( pvs, pipeline_verts, count, stride ); +} + + +void draw_pt_post_vs_prepare( struct pt_post_vs *pvs, + boolean bypass_clipping, + boolean identity_viewport, + boolean opengl ) +{ + if (bypass_clipping) { + if (identity_viewport) + pvs->run = post_vs_none; + else + pvs->run = post_vs_viewport; + } + else { + //if (opengl) + pvs->run = post_vs_cliptest_viewport_gl; + } +} + + +struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw ) +{ + struct pt_post_vs *pvs = CALLOC_STRUCT( pt_post_vs ); + if (!pvs) + return NULL; + + pvs->draw = draw; + + return pvs; +} + +void draw_pt_post_vs_destroy( struct pt_post_vs *pvs ) +{ + FREE(pvs); +} diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 107dcfc269..afcff41043 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -63,6 +63,7 @@ struct vcache_frontend { static void vcache_flush( struct vcache_frontend *vcache ) { + vcache->draw->vcache_flushing = TRUE; if (vcache->draw_count) { vcache->middle->run( vcache->middle, vcache->fetch_elts, @@ -74,6 +75,7 @@ static void vcache_flush( struct vcache_frontend *vcache ) memset(vcache->in, ~0, sizeof(vcache->in)); vcache->fetch_count = 0; vcache->draw_count = 0; + vcache->draw->vcache_flushing = FALSE; } static void vcache_check_flush( struct vcache_frontend *vcache ) @@ -106,7 +108,7 @@ static unsigned add_edgeflag( struct vcache_frontend *vcache, unsigned idx, unsigned mask ) { - if (mask && draw_get_edgeflag(vcache->draw, idx)) + if (0 && mask && draw_pt_get_edgeflag(vcache->draw, idx)) return idx | DRAW_PT_EDGEFLAG; else return idx; @@ -116,7 +118,7 @@ static unsigned add_edgeflag( struct vcache_frontend *vcache, static unsigned add_reset_stipple( unsigned idx, unsigned reset ) { - if (reset) + if (0 && reset) return idx | DRAW_PT_RESET_STIPPLE; else return idx; @@ -128,9 +130,9 @@ static void vcache_triangle( struct vcache_frontend *vcache, unsigned i1, unsigned i2 ) { - vcache_elt(vcache, i0 | DRAW_PT_EDGEFLAG | DRAW_PT_RESET_STIPPLE); - vcache_elt(vcache, i1 | DRAW_PT_EDGEFLAG); - vcache_elt(vcache, i2 | DRAW_PT_EDGEFLAG); + vcache_elt(vcache, i0 /* | DRAW_PT_EDGEFLAG | DRAW_PT_RESET_STIPPLE */ ); + vcache_elt(vcache, i1 /* | DRAW_PT_EDGEFLAG */); + vcache_elt(vcache, i2 /* | DRAW_PT_EDGEFLAG */); vcache_check_flush(vcache); } @@ -142,11 +144,12 @@ static void vcache_ef_triangle( struct vcache_frontend *vcache, unsigned i1, unsigned i2 ) { +/* i0 = add_edgeflag( vcache, i0, (ef_mask >> 0) & 1 ); i1 = add_edgeflag( vcache, i1, (ef_mask >> 1) & 1 ); i2 = add_edgeflag( vcache, i2, (ef_mask >> 2) & 1 ); - i0 = add_reset_stipple( i0, reset_stipple ); +*/ vcache_elt(vcache, i0); vcache_elt(vcache, i1); @@ -448,7 +451,8 @@ static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { static void vcache_prepare( struct draw_pt_front_end *frontend, unsigned prim, - struct draw_pt_middle_end *middle ) + struct draw_pt_middle_end *middle, + unsigned opt ) { struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; @@ -464,7 +468,7 @@ static void vcache_prepare( struct draw_pt_front_end *frontend, vcache->output_prim = reduced_prim[prim]; vcache->middle = middle; - middle->prepare( middle, vcache->output_prim ); + middle->prepare( middle, vcache->output_prim, opt ); } @@ -486,6 +490,8 @@ static void vcache_destroy( struct draw_pt_front_end *frontend ) struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw ) { struct vcache_frontend *vcache = CALLOC_STRUCT( vcache_frontend ); + if (vcache == NULL) + return NULL; vcache->base.prepare = vcache_prepare; vcache->base.run = NULL; diff --git a/src/gallium/auxiliary/draw/draw_vertex.c b/src/gallium/auxiliary/draw/draw_vertex.c index 168036eee8..1446f785c5 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.c +++ b/src/gallium/auxiliary/draw/draw_vertex.c @@ -72,6 +72,58 @@ draw_compute_vertex_size(struct vertex_info *vinfo) assert(0); } } +} + - assert(vinfo->size * 4 <= MAX_VERTEX_SIZE); +void +draw_dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data) +{ + unsigned i, j; + + for (i = 0; i < vinfo->num_attribs; i++) { + j = vinfo->src_index[i]; + switch (vinfo->emit[i]) { + case EMIT_OMIT: + debug_printf("EMIT_OMIT:"); + break; + case EMIT_1F: + debug_printf("EMIT_1F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + break; + case EMIT_1F_PSIZE: + debug_printf("EMIT_1F_PSIZE:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + break; + case EMIT_2F: + debug_printf("EMIT_2F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + break; + case EMIT_3F: + debug_printf("EMIT_3F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + data += sizeof(float); + break; + case EMIT_4F: + debug_printf("EMIT_4F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + break; + case EMIT_4UB: + debug_printf("EMIT_4UB:\t"); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + break; + default: + assert(0); + } + debug_printf("\n"); + } + debug_printf("\n"); } diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h index 65818463ca..6d8bac5138 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.h +++ b/src/gallium/auxiliary/draw/draw_vertex.h @@ -106,5 +106,7 @@ draw_emit_vertex_attr(struct vertex_info *vinfo, extern void draw_compute_vertex_size(struct vertex_info *vinfo); +void draw_dump_emitted_vertex(const struct vertex_info *vinfo, + const uint8_t *data); #endif /* DRAW_VERTEX_H */ diff --git a/src/gallium/auxiliary/draw/draw_vertex_cache.c b/src/gallium/auxiliary/draw/draw_vertex_cache.c deleted file mode 100644 index 730c18bcb3..0000000000 --- a/src/gallium/auxiliary/draw/draw_vertex_cache.c +++ /dev/null @@ -1,219 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "pipe/p_util.h" -#include "draw_private.h" -#include "draw_context.h" - - -void draw_vertex_cache_invalidate( struct draw_context *draw ) -{ - assert(draw->pq.queue_nr == 0); - assert(draw->vs.queue_nr == 0); - assert(draw->vcache.referenced == 0); - - /* There's an error somewhere in the vcache code that requires this - * memset. The bug is exposed in q3demo demo001, but probably - * elsewhere as well. Will track it down later. - */ - memset(draw->vcache.idx, ~0, sizeof(draw->vcache.idx)); -} - - -/** - * Check if vertex is in cache, otherwise add it. It won't go through - * VS yet, not until there is a flush operation or the VS queue fills up. - * - * Note that cache entries are basically just two pointers: the first - * an index into the user's vertex arrays, the second a location in - * the vertex shader cache for the post-transformed vertex. - * - * \return pointer to location of (post-transformed) vertex header in the cache - */ -static struct vertex_header *get_vertex( struct draw_context *draw, - unsigned i ) -{ - unsigned slot = (i + (i>>5)) % VCACHE_SIZE; - - assert(slot < 32); /* so we don't exceed the bitfield size below */ - - if (draw->vcache.referenced & (1<<slot)) - { - /* Cache hit? - */ - if (draw->vcache.idx[slot].in == i) { - /*debug_printf("HIT %d %d\n", slot, i);*/ - assert(draw->vcache.idx[slot].out < draw->vs.queue_nr); - return draw_header_from_block(draw->vs.vertex_cache, - MAX_VERTEX_ALLOCATION, - draw->vcache.idx[slot].out); - } - - /* Otherwise a collision - */ - slot = VCACHE_SIZE + draw->vcache.overflow++; - /*debug_printf("XXX %d --> %d\n", i, slot);*/ - } - - /* Deal with the cache miss: - */ - { - unsigned out; - struct vertex_header *header; - - assert(slot < Elements(draw->vcache.idx)); - - /*debug_printf("NEW %d %d\n", slot, i);*/ - draw->vcache.idx[slot].in = i; - draw->vcache.idx[slot].out = out = draw->vs.queue_nr++; - draw->vcache.referenced |= (1 << slot); - - - /* Add to vertex shader queue: - */ - assert(draw->vs.queue_nr < VS_QUEUE_LENGTH); - - header = draw_header_from_block(draw->vs.vertex_cache, MAX_VERTEX_ALLOCATION, - out); - draw->vs.elts[out] = i; - header->clipmask = 0; - header->edgeflag = draw_get_edgeflag(draw, i); - header->pad = 0; - header->vertex_id = UNDEFINED_VERTEX_ID; - - /* Need to set the vertex's edge flag here. If we're being called - * by do_ef_triangle(), that function needs edge flag info! - */ - - return draw_header_from_block(draw->vs.vertex_cache, - MAX_VERTEX_ALLOCATION, - draw->vcache.idx[slot].out); - } -} - - -static struct vertex_header *get_uint_elt_vertex( struct draw_context *draw, - unsigned i ) -{ - const unsigned *elts = (const unsigned *) draw->user.elts; - return get_vertex( draw, elts[i] ); -} - - -static struct vertex_header *get_ushort_elt_vertex( struct draw_context *draw, - unsigned i ) -{ - const ushort *elts = (const ushort *) draw->user.elts; - return get_vertex( draw, elts[i] ); -} - - -static struct vertex_header *get_ubyte_elt_vertex( struct draw_context *draw, - unsigned i ) -{ - const ubyte *elts = (const ubyte *) draw->user.elts; - return get_vertex( draw, elts[i] ); -} - - -void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw ) -{ - unsigned i; - - for (i = 0; i < draw->vs.post_nr; i++) { - struct vertex_header * header = - draw_header_from_block(draw->vs.vertex_cache, - MAX_VERTEX_ALLOCATION, i); - header->vertex_id = UNDEFINED_VERTEX_ID; - } -} - - -void draw_vertex_cache_unreference( struct draw_context *draw ) -{ - draw->vcache.referenced = 0; - draw->vcache.overflow = 0; -} - - -int draw_vertex_cache_check_space( struct draw_context *draw, - unsigned nr_verts ) -{ - if (draw->vcache.overflow + nr_verts < VCACHE_OVERFLOW) { - /* The vs queue is sized so that this can never happen: - */ - assert(draw->vs.queue_nr + nr_verts < VS_QUEUE_LENGTH); - return TRUE; - } - else - return FALSE; -} - - - -/** - * Tell the drawing context about the index/element buffer to use - * (ala glDrawElements) - * If no element buffer is to be used (i.e. glDrawArrays) then this - * should be called with eltSize=0 and elements=NULL. - * - * \param draw the drawing context - * \param eltSize size of each element (1, 2 or 4 bytes) - * \param elements the element buffer ptr - */ -void -draw_set_mapped_element_buffer( struct draw_context *draw, - unsigned eltSize, void *elements ) -{ -// draw_statechange( draw ); - - /* choose the get_vertex() function to use */ - switch (eltSize) { - case 0: - draw->vcache.get_vertex = get_vertex; - break; - case 1: - draw->vcache.get_vertex = get_ubyte_elt_vertex; - break; - case 2: - draw->vcache.get_vertex = get_ushort_elt_vertex; - break; - case 4: - draw->vcache.get_vertex = get_uint_elt_vertex; - break; - default: - assert(0); - } - draw->user.elts = elements; - draw->user.eltSize = eltSize; -} - diff --git a/src/gallium/auxiliary/draw/draw_vertex_fetch.c b/src/gallium/auxiliary/draw/draw_vertex_fetch.c deleted file mode 100644 index 9041041006..0000000000 --- a/src/gallium/auxiliary/draw/draw_vertex_fetch.c +++ /dev/null @@ -1,528 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" -#include "draw_private.h" -#include "draw_context.h" - - -#define DRAW_DBG 0 - - -/** - * Fetch a float[4] vertex attribute from memory, doing format/type - * conversion as needed. - * - * This is probably needed/dupliocated elsewhere, eg format - * conversion, texture sampling etc. - */ -#define FETCH_ATTRIB( NAME, SZ, CVT ) \ -static void \ -fetch_##NAME(const void *ptr, float *attrib) \ -{ \ - static const float defaults[4] = { 0,0,0,1 }; \ - int i; \ - \ - for (i = 0; i < SZ; i++) { \ - attrib[i] = CVT(i); \ - } \ - \ - for (; i < 4; i++) { \ - attrib[i] = defaults[i]; \ - } \ -} - -#define CVT_64_FLOAT(i) (float) ((double *) ptr)[i] -#define CVT_32_FLOAT(i) ((float *) ptr)[i] - -#define CVT_8_USCALED(i) (float) ((unsigned char *) ptr)[i] -#define CVT_16_USCALED(i) (float) ((unsigned short *) ptr)[i] -#define CVT_32_USCALED(i) (float) ((unsigned int *) ptr)[i] - -#define CVT_8_SSCALED(i) (float) ((char *) ptr)[i] -#define CVT_16_SSCALED(i) (float) ((short *) ptr)[i] -#define CVT_32_SSCALED(i) (float) ((int *) ptr)[i] - -#define CVT_8_UNORM(i) (float) ((unsigned char *) ptr)[i] / 255.0f -#define CVT_16_UNORM(i) (float) ((unsigned short *) ptr)[i] / 65535.0f -#define CVT_32_UNORM(i) (float) ((unsigned int *) ptr)[i] / 4294967295.0f - -#define CVT_8_SNORM(i) (float) ((char *) ptr)[i] / 127.0f -#define CVT_16_SNORM(i) (float) ((short *) ptr)[i] / 32767.0f -#define CVT_32_SNORM(i) (float) ((int *) ptr)[i] / 2147483647.0f - -FETCH_ATTRIB( R64G64B64A64_FLOAT, 4, CVT_64_FLOAT ) -FETCH_ATTRIB( R64G64B64_FLOAT, 3, CVT_64_FLOAT ) -FETCH_ATTRIB( R64G64_FLOAT, 2, CVT_64_FLOAT ) -FETCH_ATTRIB( R64_FLOAT, 1, CVT_64_FLOAT ) - -FETCH_ATTRIB( R32G32B32A32_FLOAT, 4, CVT_32_FLOAT ) -FETCH_ATTRIB( R32G32B32_FLOAT, 3, CVT_32_FLOAT ) -FETCH_ATTRIB( R32G32_FLOAT, 2, CVT_32_FLOAT ) -FETCH_ATTRIB( R32_FLOAT, 1, CVT_32_FLOAT ) - -FETCH_ATTRIB( R32G32B32A32_USCALED, 4, CVT_32_USCALED ) -FETCH_ATTRIB( R32G32B32_USCALED, 3, CVT_32_USCALED ) -FETCH_ATTRIB( R32G32_USCALED, 2, CVT_32_USCALED ) -FETCH_ATTRIB( R32_USCALED, 1, CVT_32_USCALED ) - -FETCH_ATTRIB( R32G32B32A32_SSCALED, 4, CVT_32_SSCALED ) -FETCH_ATTRIB( R32G32B32_SSCALED, 3, CVT_32_SSCALED ) -FETCH_ATTRIB( R32G32_SSCALED, 2, CVT_32_SSCALED ) -FETCH_ATTRIB( R32_SSCALED, 1, CVT_32_SSCALED ) - -FETCH_ATTRIB( R32G32B32A32_UNORM, 4, CVT_32_UNORM ) -FETCH_ATTRIB( R32G32B32_UNORM, 3, CVT_32_UNORM ) -FETCH_ATTRIB( R32G32_UNORM, 2, CVT_32_UNORM ) -FETCH_ATTRIB( R32_UNORM, 1, CVT_32_UNORM ) - -FETCH_ATTRIB( R32G32B32A32_SNORM, 4, CVT_32_SNORM ) -FETCH_ATTRIB( R32G32B32_SNORM, 3, CVT_32_SNORM ) -FETCH_ATTRIB( R32G32_SNORM, 2, CVT_32_SNORM ) -FETCH_ATTRIB( R32_SNORM, 1, CVT_32_SNORM ) - -FETCH_ATTRIB( R16G16B16A16_USCALED, 4, CVT_16_USCALED ) -FETCH_ATTRIB( R16G16B16_USCALED, 3, CVT_16_USCALED ) -FETCH_ATTRIB( R16G16_USCALED, 2, CVT_16_USCALED ) -FETCH_ATTRIB( R16_USCALED, 1, CVT_16_USCALED ) - -FETCH_ATTRIB( R16G16B16A16_SSCALED, 4, CVT_16_SSCALED ) -FETCH_ATTRIB( R16G16B16_SSCALED, 3, CVT_16_SSCALED ) -FETCH_ATTRIB( R16G16_SSCALED, 2, CVT_16_SSCALED ) -FETCH_ATTRIB( R16_SSCALED, 1, CVT_16_SSCALED ) - -FETCH_ATTRIB( R16G16B16A16_UNORM, 4, CVT_16_UNORM ) -FETCH_ATTRIB( R16G16B16_UNORM, 3, CVT_16_UNORM ) -FETCH_ATTRIB( R16G16_UNORM, 2, CVT_16_UNORM ) -FETCH_ATTRIB( R16_UNORM, 1, CVT_16_UNORM ) - -FETCH_ATTRIB( R16G16B16A16_SNORM, 4, CVT_16_SNORM ) -FETCH_ATTRIB( R16G16B16_SNORM, 3, CVT_16_SNORM ) -FETCH_ATTRIB( R16G16_SNORM, 2, CVT_16_SNORM ) -FETCH_ATTRIB( R16_SNORM, 1, CVT_16_SNORM ) - -FETCH_ATTRIB( R8G8B8A8_USCALED, 4, CVT_8_USCALED ) -FETCH_ATTRIB( R8G8B8_USCALED, 3, CVT_8_USCALED ) -FETCH_ATTRIB( R8G8_USCALED, 2, CVT_8_USCALED ) -FETCH_ATTRIB( R8_USCALED, 1, CVT_8_USCALED ) - -FETCH_ATTRIB( R8G8B8A8_SSCALED, 4, CVT_8_SSCALED ) -FETCH_ATTRIB( R8G8B8_SSCALED, 3, CVT_8_SSCALED ) -FETCH_ATTRIB( R8G8_SSCALED, 2, CVT_8_SSCALED ) -FETCH_ATTRIB( R8_SSCALED, 1, CVT_8_SSCALED ) - -FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM ) -FETCH_ATTRIB( R8G8B8_UNORM, 3, CVT_8_UNORM ) -FETCH_ATTRIB( R8G8_UNORM, 2, CVT_8_UNORM ) -FETCH_ATTRIB( R8_UNORM, 1, CVT_8_UNORM ) - -FETCH_ATTRIB( R8G8B8A8_SNORM, 4, CVT_8_SNORM ) -FETCH_ATTRIB( R8G8B8_SNORM, 3, CVT_8_SNORM ) -FETCH_ATTRIB( R8G8_SNORM, 2, CVT_8_SNORM ) -FETCH_ATTRIB( R8_SNORM, 1, CVT_8_SNORM ) - -FETCH_ATTRIB( A8R8G8B8_UNORM, 4, CVT_8_UNORM ) -//FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM ) - - - -static void -fetch_B8G8R8A8_UNORM(const void *ptr, float *attrib) -{ - attrib[2] = CVT_8_UNORM(0); - attrib[1] = CVT_8_UNORM(1); - attrib[0] = CVT_8_UNORM(2); - attrib[3] = CVT_8_UNORM(3); -} - - -fetch_func draw_get_fetch_func( enum pipe_format format ) -{ -#if 0 - { - char tmp[80]; - pf_sprint_name(tmp, format); - debug_printf("%s: %s\n", __FUNCTION__, tmp); - } -#endif - - switch (format) { - case PIPE_FORMAT_R64_FLOAT: - return fetch_R64_FLOAT; - case PIPE_FORMAT_R64G64_FLOAT: - return fetch_R64G64_FLOAT; - case PIPE_FORMAT_R64G64B64_FLOAT: - return fetch_R64G64B64_FLOAT; - case PIPE_FORMAT_R64G64B64A64_FLOAT: - return fetch_R64G64B64A64_FLOAT; - - case PIPE_FORMAT_R32_FLOAT: - return fetch_R32_FLOAT; - case PIPE_FORMAT_R32G32_FLOAT: - return fetch_R32G32_FLOAT; - case PIPE_FORMAT_R32G32B32_FLOAT: - return fetch_R32G32B32_FLOAT; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - return fetch_R32G32B32A32_FLOAT; - - case PIPE_FORMAT_R32_UNORM: - return fetch_R32_UNORM; - case PIPE_FORMAT_R32G32_UNORM: - return fetch_R32G32_UNORM; - case PIPE_FORMAT_R32G32B32_UNORM: - return fetch_R32G32B32_UNORM; - case PIPE_FORMAT_R32G32B32A32_UNORM: - return fetch_R32G32B32A32_UNORM; - - case PIPE_FORMAT_R32_USCALED: - return fetch_R32_USCALED; - case PIPE_FORMAT_R32G32_USCALED: - return fetch_R32G32_USCALED; - case PIPE_FORMAT_R32G32B32_USCALED: - return fetch_R32G32B32_USCALED; - case PIPE_FORMAT_R32G32B32A32_USCALED: - return fetch_R32G32B32A32_USCALED; - - case PIPE_FORMAT_R32_SNORM: - return fetch_R32_SNORM; - case PIPE_FORMAT_R32G32_SNORM: - return fetch_R32G32_SNORM; - case PIPE_FORMAT_R32G32B32_SNORM: - return fetch_R32G32B32_SNORM; - case PIPE_FORMAT_R32G32B32A32_SNORM: - return fetch_R32G32B32A32_SNORM; - - case PIPE_FORMAT_R32_SSCALED: - return fetch_R32_SSCALED; - case PIPE_FORMAT_R32G32_SSCALED: - return fetch_R32G32_SSCALED; - case PIPE_FORMAT_R32G32B32_SSCALED: - return fetch_R32G32B32_SSCALED; - case PIPE_FORMAT_R32G32B32A32_SSCALED: - return fetch_R32G32B32A32_SSCALED; - - case PIPE_FORMAT_R16_UNORM: - return fetch_R16_UNORM; - case PIPE_FORMAT_R16G16_UNORM: - return fetch_R16G16_UNORM; - case PIPE_FORMAT_R16G16B16_UNORM: - return fetch_R16G16B16_UNORM; - case PIPE_FORMAT_R16G16B16A16_UNORM: - return fetch_R16G16B16A16_UNORM; - - case PIPE_FORMAT_R16_USCALED: - return fetch_R16_USCALED; - case PIPE_FORMAT_R16G16_USCALED: - return fetch_R16G16_USCALED; - case PIPE_FORMAT_R16G16B16_USCALED: - return fetch_R16G16B16_USCALED; - case PIPE_FORMAT_R16G16B16A16_USCALED: - return fetch_R16G16B16A16_USCALED; - - case PIPE_FORMAT_R16_SNORM: - return fetch_R16_SNORM; - case PIPE_FORMAT_R16G16_SNORM: - return fetch_R16G16_SNORM; - case PIPE_FORMAT_R16G16B16_SNORM: - return fetch_R16G16B16_SNORM; - case PIPE_FORMAT_R16G16B16A16_SNORM: - return fetch_R16G16B16A16_SNORM; - - case PIPE_FORMAT_R16_SSCALED: - return fetch_R16_SSCALED; - case PIPE_FORMAT_R16G16_SSCALED: - return fetch_R16G16_SSCALED; - case PIPE_FORMAT_R16G16B16_SSCALED: - return fetch_R16G16B16_SSCALED; - case PIPE_FORMAT_R16G16B16A16_SSCALED: - return fetch_R16G16B16A16_SSCALED; - - case PIPE_FORMAT_R8_UNORM: - return fetch_R8_UNORM; - case PIPE_FORMAT_R8G8_UNORM: - return fetch_R8G8_UNORM; - case PIPE_FORMAT_R8G8B8_UNORM: - return fetch_R8G8B8_UNORM; - case PIPE_FORMAT_R8G8B8A8_UNORM: - return fetch_R8G8B8A8_UNORM; - - case PIPE_FORMAT_R8_USCALED: - return fetch_R8_USCALED; - case PIPE_FORMAT_R8G8_USCALED: - return fetch_R8G8_USCALED; - case PIPE_FORMAT_R8G8B8_USCALED: - return fetch_R8G8B8_USCALED; - case PIPE_FORMAT_R8G8B8A8_USCALED: - return fetch_R8G8B8A8_USCALED; - - case PIPE_FORMAT_R8_SNORM: - return fetch_R8_SNORM; - case PIPE_FORMAT_R8G8_SNORM: - return fetch_R8G8_SNORM; - case PIPE_FORMAT_R8G8B8_SNORM: - return fetch_R8G8B8_SNORM; - case PIPE_FORMAT_R8G8B8A8_SNORM: - return fetch_R8G8B8A8_SNORM; - - case PIPE_FORMAT_R8_SSCALED: - return fetch_R8_SSCALED; - case PIPE_FORMAT_R8G8_SSCALED: - return fetch_R8G8_SSCALED; - case PIPE_FORMAT_R8G8B8_SSCALED: - return fetch_R8G8B8_SSCALED; - case PIPE_FORMAT_R8G8B8A8_SSCALED: - return fetch_R8G8B8A8_SSCALED; - - case PIPE_FORMAT_A8R8G8B8_UNORM: - return fetch_A8R8G8B8_UNORM; - - - case PIPE_FORMAT_B8G8R8A8_UNORM: - return fetch_B8G8R8A8_UNORM; - - case 0: - return NULL; /* not sure why this is needed */ - - default: - /* This can get hit because draw-state-validation is too eager, - and can jump in here validating stuff before the state tracker has set - up everything. - */ - /* assert(0); */ - return NULL; - } -} - - -static void -transpose_4x4( float *out, const float *in ) -{ - /* This can be achieved in 12 sse instructions, plus the final - * stores I guess. This is probably a bit more than that - maybe - * 32 or so? - */ - out[0] = in[0]; out[1] = in[4]; out[2] = in[8]; out[3] = in[12]; - out[4] = in[1]; out[5] = in[5]; out[6] = in[9]; out[7] = in[13]; - out[8] = in[2]; out[9] = in[6]; out[10] = in[10]; out[11] = in[14]; - out[12] = in[3]; out[13] = in[7]; out[14] = in[11]; out[15] = in[15]; -} - - - -static void fetch_xyz_rgb( struct draw_context *draw, - struct tgsi_exec_machine *machine, - const unsigned *elts, - unsigned count ) -{ - const unsigned *pitch = draw->vertex_fetch.pitch; - const ubyte **src = draw->vertex_fetch.src_ptr; - int i; - - assert(count <= 4); - -// debug_printf("%s\n", __FUNCTION__); - - /* loop over vertex attributes (vertex shader inputs) - */ - - for (i = 0; i < 4; i++) { - { - const float *in = (const float *)(src[0] + elts[i] * pitch[0]); - float *out = &machine->Inputs[0].xyzw[0].f[i]; - out[0] = in[0]; - out[4] = in[1]; - out[8] = in[2]; - out[12] = 1.0f; - } - - { - const float *in = (const float *)(src[1] + elts[i] * pitch[1]); - float *out = &machine->Inputs[1].xyzw[0].f[i]; - out[0] = in[0]; - out[4] = in[1]; - out[8] = in[2]; - out[12] = 1.0f; - } - } -} - - - - -static void fetch_xyz_rgb_st( struct draw_context *draw, - struct tgsi_exec_machine *machine, - const unsigned *elts, - unsigned count ) -{ - const unsigned *pitch = draw->vertex_fetch.pitch; - const ubyte **src = draw->vertex_fetch.src_ptr; - int i; - - assert(count <= 4); - - /* loop over vertex attributes (vertex shader inputs) - */ - - for (i = 0; i < 4; i++) { - { - const float *in = (const float *)(src[0] + elts[i] * pitch[0]); - float *out = &machine->Inputs[0].xyzw[0].f[i]; - out[0] = in[0]; - out[4] = in[1]; - out[8] = in[2]; - out[12] = 1.0f; - } - - { - const float *in = (const float *)(src[1] + elts[i] * pitch[1]); - float *out = &machine->Inputs[1].xyzw[0].f[i]; - out[0] = in[0]; - out[4] = in[1]; - out[8] = in[2]; - out[12] = 1.0f; - } - - { - const float *in = (const float *)(src[2] + elts[i] * pitch[2]); - float *out = &machine->Inputs[2].xyzw[0].f[i]; - out[0] = in[0]; - out[4] = in[1]; - out[8] = 0.0f; - out[12] = 1.0f; - } - } -} - - - - -/** - * Fetch vertex attributes for 'count' vertices. - */ -static void generic_vertex_fetch( struct draw_context *draw, - struct tgsi_exec_machine *machine, - const unsigned *elts, - unsigned count ) -{ - unsigned nr_attrs = draw->vertex_fetch.nr_attrs; - unsigned attr; - - assert(count <= 4); - -// debug_printf("%s %d\n", __FUNCTION__, count); - - /* loop over vertex attributes (vertex shader inputs) - */ - for (attr = 0; attr < nr_attrs; attr++) { - - const unsigned pitch = draw->vertex_fetch.pitch[attr]; - const ubyte *src = draw->vertex_fetch.src_ptr[attr]; - const fetch_func fetch = draw->vertex_fetch.fetch[attr]; - unsigned i; - float p[4][4]; - - - /* Fetch four attributes for four vertices. - * - * Could fetch directly into AOS format, but this is meant to be - * a prototype for an sse implementation, which would have - * difficulties doing that. - */ - for (i = 0; i < count; i++) - fetch( src + elts[i] * pitch, p[i] ); - - /* Be nice and zero out any missing vertices: - */ - for ( ; i < 4; i++) - p[i][0] = p[i][1] = p[i][2] = p[i][3] = 0; - - /* Transpose/swizzle into sse-friendly format. Currently - * assuming that all vertex shader inputs are float[4], but this - * isn't true -- if the vertex shader only wants tex0.xy, we - * could optimize for that. - * - * To do so fully without codegen would probably require an - * excessive number of fetch functions, but we could at least - * minimize the transpose step: - */ - transpose_4x4( (float *)&machine->Inputs[attr].xyzw[0].f[0], (float *)p ); - } -} - - - -void draw_update_vertex_fetch( struct draw_context *draw ) -{ - unsigned nr_attrs, i; - -// debug_printf("%s\n", __FUNCTION__); - - /* this may happend during context init */ - if (!draw->vertex_shader) - return; - - nr_attrs = draw->vertex_shader->info.num_inputs; - - for (i = 0; i < nr_attrs; i++) { - unsigned buf = draw->vertex_element[i].vertex_buffer_index; - enum pipe_format format = draw->vertex_element[i].src_format; - - draw->vertex_fetch.src_ptr[i] = (const ubyte *) draw->user.vbuffer[buf] + - draw->vertex_buffer[buf].buffer_offset + - draw->vertex_element[i].src_offset; - - draw->vertex_fetch.pitch[i] = draw->vertex_buffer[buf].pitch; - draw->vertex_fetch.fetch[i] = draw_get_fetch_func( format ); - } - - draw->vertex_fetch.nr_attrs = nr_attrs; - - draw->vertex_fetch.fetch_func = generic_vertex_fetch; - - switch (nr_attrs) { - case 2: - if (draw->vertex_element[0].src_format == PIPE_FORMAT_R32G32B32_FLOAT && - draw->vertex_element[1].src_format == PIPE_FORMAT_R32G32B32_FLOAT) - draw->vertex_fetch.fetch_func = fetch_xyz_rgb; - break; - case 3: - if (draw->vertex_element[0].src_format == PIPE_FORMAT_R32G32B32_FLOAT && - draw->vertex_element[1].src_format == PIPE_FORMAT_R32G32B32_FLOAT && - draw->vertex_element[2].src_format == PIPE_FORMAT_R32G32_FLOAT) - draw->vertex_fetch.fetch_func = fetch_xyz_rgb_st; - break; - default: - break; - } - -} diff --git a/src/gallium/auxiliary/draw/draw_vf.c b/src/gallium/auxiliary/draw/draw_vf.c deleted file mode 100644 index 9d0154c50d..0000000000 --- a/src/gallium/auxiliary/draw/draw_vf.c +++ /dev/null @@ -1,378 +0,0 @@ -/* - * Copyright 2003 Tungsten Graphics, inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Keith Whitwell <keithw@tungstengraphics.com> - */ - - -#include <stddef.h> - -#include "pipe/p_compiler.h" -#include "pipe/p_util.h" -#include "rtasm/rtasm_execmem.h" - -#include "draw_vf.h" - - -#define DRAW_VF_DBG 0 - - -static boolean match_fastpath( struct draw_vertex_fetch *vf, - const struct draw_vf_fastpath *fp) -{ - unsigned j; - - if (vf->attr_count != fp->attr_count) - return FALSE; - - for (j = 0; j < vf->attr_count; j++) - if (vf->attr[j].format != fp->attr[j].format || - vf->attr[j].inputsize != fp->attr[j].size || - vf->attr[j].vertoffset != fp->attr[j].offset) - return FALSE; - - if (fp->match_strides) { - if (vf->vertex_stride != fp->vertex_stride) - return FALSE; - - for (j = 0; j < vf->attr_count; j++) - if (vf->attr[j].inputstride != fp->attr[j].stride) - return FALSE; - } - - return TRUE; -} - -static boolean search_fastpath_emit( struct draw_vertex_fetch *vf ) -{ - struct draw_vf_fastpath *fp = vf->fastpath; - - for ( ; fp ; fp = fp->next) { - if (match_fastpath(vf, fp)) { - vf->emit = fp->func; - return TRUE; - } - } - - return FALSE; -} - -void draw_vf_register_fastpath( struct draw_vertex_fetch *vf, - boolean match_strides ) -{ - struct draw_vf_fastpath *fastpath = CALLOC_STRUCT(draw_vf_fastpath); - unsigned i; - - fastpath->vertex_stride = vf->vertex_stride; - fastpath->attr_count = vf->attr_count; - fastpath->match_strides = match_strides; - fastpath->func = vf->emit; - fastpath->attr = (struct draw_vf_attr_type *) - MALLOC(vf->attr_count * sizeof(fastpath->attr[0])); - - for (i = 0; i < vf->attr_count; i++) { - fastpath->attr[i].format = vf->attr[i].format; - fastpath->attr[i].stride = vf->attr[i].inputstride; - fastpath->attr[i].size = vf->attr[i].inputsize; - fastpath->attr[i].offset = vf->attr[i].vertoffset; - } - - fastpath->next = vf->fastpath; - vf->fastpath = fastpath; -} - - - - -/*********************************************************************** - * Build codegen functions or return generic ones: - */ -static void choose_emit_func( struct draw_vertex_fetch *vf, - unsigned count, - uint8_t *dest) -{ - vf->emit = NULL; - - /* Does this match an existing (hardwired, codegen or known-bad) - * fastpath? - */ - if (search_fastpath_emit(vf)) { - /* Use this result. If it is null, then it is already known - * that the current state will fail for codegen and there is no - * point trying again. - */ - } - else if (vf->codegen_emit) { - vf->codegen_emit( vf ); - } - - if (!vf->emit) { - draw_vf_generate_hardwired_emit(vf); - } - - /* Otherwise use the generic version: - */ - if (!vf->emit) - vf->emit = draw_vf_generic_emit; - - vf->emit( vf, count, dest ); -} - - - - - -/*********************************************************************** - * Public entrypoints, mostly dispatch to the above: - */ - - - -static unsigned -draw_vf_set_vertex_attributes( struct draw_vertex_fetch *vf, - const struct draw_vf_attr_map *map, - unsigned nr, - unsigned vertex_stride ) -{ - unsigned offset = 0; - unsigned i, j; - - assert(nr < PIPE_MAX_ATTRIBS); - - for (j = 0, i = 0; i < nr; i++) { - const unsigned format = map[i].format; - if (format == DRAW_EMIT_PAD) { -#if (DRAW_VF_DBG) - debug_printf("%d: pad %d, offset %d\n", i, - map[i].offset, offset); -#endif - - offset += map[i].offset; - - } - else { - vf->attr[j].attrib = map[i].attrib; - vf->attr[j].format = format; - vf->attr[j].insert = draw_vf_format_info[format].insert; - vf->attr[j].vertattrsize = draw_vf_format_info[format].attrsize; - vf->attr[j].vertoffset = offset; - vf->attr[j].isconst = draw_vf_format_info[format].isconst; - if(vf->attr[j].isconst) - memcpy(vf->attr[j].data, &map[i].data, vf->attr[j].vertattrsize); - -#if (DRAW_VF_DBG) - debug_printf("%d: %s, offset %d\n", i, - draw_vf_format_info[format].name, - vf->attr[j].vertoffset); -#endif - - offset += draw_vf_format_info[format].attrsize; - j++; - } - } - - vf->attr_count = j; - vf->vertex_stride = vertex_stride ? vertex_stride : offset; - vf->emit = choose_emit_func; - - assert(vf->vertex_stride >= offset); - return vf->vertex_stride; -} - - -void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf, - const struct vertex_info *vinfo, - float point_size ) -{ - unsigned i, j; - struct draw_vf_attr *a = vf->attr; - struct draw_vf_attr_map attrs[PIPE_MAX_SHADER_INPUTS]; - unsigned count = 0; /* for debug/sanity */ - unsigned nr_attrs = 0; - - for (i = 0; i < vinfo->num_attribs; i++) { - j = vinfo->src_index[i]; - switch (vinfo->emit[i]) { - case EMIT_OMIT: - /* no-op */ - break; - case EMIT_1F: - attrs[nr_attrs].attrib = j; - attrs[nr_attrs].format = DRAW_EMIT_1F; - attrs[nr_attrs].offset = 0; - nr_attrs++; - count++; - break; - case EMIT_1F_PSIZE: - attrs[nr_attrs].attrib = j; - attrs[nr_attrs].format = DRAW_EMIT_1F_CONST; - attrs[nr_attrs].offset = 0; - attrs[nr_attrs].data.f[0] = point_size; - nr_attrs++; - count++; - break; - case EMIT_2F: - attrs[nr_attrs].attrib = j; - attrs[nr_attrs].format = DRAW_EMIT_2F; - attrs[nr_attrs].offset = 0; - nr_attrs++; - count += 2; - break; - case EMIT_3F: - attrs[nr_attrs].attrib = j; - attrs[nr_attrs].format = DRAW_EMIT_3F; - attrs[nr_attrs].offset = 0; - nr_attrs++; - count += 3; - break; - case EMIT_4F: - attrs[nr_attrs].attrib = j; - attrs[nr_attrs].format = DRAW_EMIT_4F; - attrs[nr_attrs].offset = 0; - nr_attrs++; - count += 4; - break; - case EMIT_4UB: - attrs[nr_attrs].attrib = j; - attrs[nr_attrs].format = DRAW_EMIT_4UB_4F_BGRA; - attrs[nr_attrs].offset = 0; - nr_attrs++; - count += 1; - break; - default: - assert(0); - } - } - - assert(count == vinfo->size); - - draw_vf_set_vertex_attributes(vf, - attrs, - nr_attrs, - vinfo->size * sizeof(float) ); - - for (j = 0; j < vf->attr_count; j++) { - a[j].inputsize = 4; - a[j].do_insert = a[j].insert[4 - 1]; - if(a[j].isconst) { - a[j].inputptr = a[j].data; - a[j].inputstride = 0; - } - } -} - - -#if 0 -/* Set attribute pointers, adjusted for start position: - */ -void draw_vf_set_sources( struct draw_vertex_fetch *vf, - GLvector4f * const sources[], - unsigned start ) -{ - struct draw_vf_attr *a = vf->attr; - unsigned j; - - for (j = 0; j < vf->attr_count; j++) { - const GLvector4f *vptr = sources[a[j].attrib]; - - if ((a[j].inputstride != vptr->stride) || - (a[j].inputsize != vptr->size)) - vf->emit = choose_emit_func; - - a[j].inputstride = vptr->stride; - a[j].inputsize = vptr->size; - a[j].do_insert = a[j].insert[vptr->size - 1]; - a[j].inputptr = ((uint8_t *)vptr->data) + start * vptr->stride; - } -} -#endif - - -/** - * Emit a vertex to dest. - */ -void draw_vf_emit_vertex( struct draw_vertex_fetch *vf, - struct vertex_header *vertex, - void *dest ) -{ - struct draw_vf_attr *a = vf->attr; - unsigned j; - - for (j = 0; j < vf->attr_count; j++) { - if (!a[j].isconst) { - a[j].inputptr = (uint8_t *)&vertex->data[a[j].attrib][0]; - a[j].inputstride = 0; /* XXX: one-vertex-max ATM */ - } - } - - vf->emit( vf, 1, (uint8_t*) dest ); -} - - - -struct draw_vertex_fetch *draw_vf_create( void ) -{ - struct draw_vertex_fetch *vf = CALLOC_STRUCT(draw_vertex_fetch); - unsigned i; - - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) - vf->attr[i].vf = vf; - - vf->identity[0] = 0.0; - vf->identity[1] = 0.0; - vf->identity[2] = 0.0; - vf->identity[3] = 1.0; - - vf->codegen_emit = NULL; - -#ifdef USE_SSE_ASM - if (!GETENV("GALLIUM_NO_CODEGEN")) - vf->codegen_emit = draw_vf_generate_sse_emit; -#endif - - return vf; -} - - -void draw_vf_destroy( struct draw_vertex_fetch *vf ) -{ - struct draw_vf_fastpath *fp, *tmp; - - for (fp = vf->fastpath ; fp ; fp = tmp) { - tmp = fp->next; - FREE(fp->attr); - - /* KW: At the moment, fp->func is constrained to be allocated by - * rtasm_exec_alloc(), as the hardwired fastpaths in - * t_vertex_generic.c are handled specially. It would be nice - * to unify them, but this probably won't change until this - * module gets another overhaul. - */ - //rtasm_exec_free((void *) fp->func); - FREE(fp); - } - - vf->fastpath = NULL; - FREE(vf); -} diff --git a/src/gallium/auxiliary/draw/draw_vf.h b/src/gallium/auxiliary/draw/draw_vf.h deleted file mode 100644 index 0ef98d6257..0000000000 --- a/src/gallium/auxiliary/draw/draw_vf.h +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright 2008 Tungsten Graphics, inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -/** - * Vertex fetch/store/convert code. This functionality is used in two places: - * 1. Vertex fetch/convert - to grab vertex data from incoming vertex - * arrays and convert to format needed by vertex shaders. - * 2. Vertex store/emit - to convert simple float[][4] vertex attributes - * (which is the organization used throughout the draw/prim pipeline) to - * hardware-specific formats and emit into hardware vertex buffers. - * - * - * Authors: - * Keith Whitwell <keithw@tungstengraphics.com> - */ - -#ifndef DRAW_VF_H -#define DRAW_VF_H - - -#include "pipe/p_compiler.h" -#include "pipe/p_state.h" - -#include "draw_vertex.h" -#include "draw_private.h" /* for vertex_header */ - - -enum draw_vf_attr_format { - DRAW_EMIT_1F, - DRAW_EMIT_2F, - DRAW_EMIT_3F, - DRAW_EMIT_4F, - DRAW_EMIT_3F_XYW, /**< for projective texture */ - DRAW_EMIT_1UB_1F, /**< for fog coordinate */ - DRAW_EMIT_3UB_3F_RGB, /**< for specular color */ - DRAW_EMIT_3UB_3F_BGR, /**< for specular color */ - DRAW_EMIT_4UB_4F_RGBA, /**< for color */ - DRAW_EMIT_4UB_4F_BGRA, /**< for color */ - DRAW_EMIT_4UB_4F_ARGB, /**< for color */ - DRAW_EMIT_4UB_4F_ABGR, /**< for color */ - DRAW_EMIT_1F_CONST, - DRAW_EMIT_2F_CONST, - DRAW_EMIT_3F_CONST, - DRAW_EMIT_4F_CONST, - DRAW_EMIT_PAD, /**< leave a hole of 'offset' bytes */ - DRAW_EMIT_MAX -}; - -struct draw_vf_attr_map -{ - /** Input attribute number */ - unsigned attrib; - - enum draw_vf_attr_format format; - - unsigned offset; - - /** - * Constant data for DRAW_EMIT_*_CONST - */ - union { - uint8_t ub[4]; - float f[4]; - } data; -}; - -struct draw_vertex_fetch; - - - -#if 0 -unsigned -draw_vf_set_vertex_attributes( struct draw_vertex_fetch *vf, - const struct draw_vf_attr_map *map, - unsigned nr, - unsigned vertex_stride ); -#endif - -void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf, - const struct vertex_info *vinfo, - float point_size ); - -#if 0 -void -draw_vf_set_sources( struct draw_vertex_fetch *vf, - GLvector4f * const attrib[], - unsigned start ); -#endif - -void -draw_vf_emit_vertex( struct draw_vertex_fetch *vf, - struct vertex_header *vertex, - void *dest ); - -struct draw_vertex_fetch * -draw_vf_create( void ); - -void -draw_vf_destroy( struct draw_vertex_fetch *vf ); - - - -/*********************************************************************** - * Internal functions and structs: - */ - -struct draw_vf_attr; - - -typedef void (*draw_vf_insert_func)( const struct draw_vf_attr *a, - uint8_t *v, - const float *in ); - -typedef void (*draw_vf_emit_func)( struct draw_vertex_fetch *vf, - unsigned count, - uint8_t *dest ); - - - -/** - * Describes how to convert/move a vertex attribute from a vertex - * array to a vertex structure. - */ -struct draw_vf_attr -{ - struct draw_vertex_fetch *vf; - - unsigned format; - unsigned inputsize; - unsigned inputstride; - unsigned vertoffset; /**< position of the attrib in the vertex struct */ - - boolean isconst; /**< read from const data below */ - uint8_t data[16]; - - unsigned attrib; /**< which vertex attrib (0=position, etc) */ - unsigned vertattrsize; /**< size of the attribute in bytes */ - - uint8_t *inputptr; - const draw_vf_insert_func *insert; - draw_vf_insert_func do_insert; -}; - -struct draw_vertex_fetch -{ - struct draw_vf_attr attr[PIPE_MAX_ATTRIBS]; - unsigned attr_count; - unsigned vertex_stride; - - draw_vf_emit_func emit; - - /* Parameters and constants for codegen: - */ - float identity[4]; - - struct draw_vf_fastpath *fastpath; - - void (*codegen_emit)( struct draw_vertex_fetch *vf ); -}; - - -struct draw_vf_attr_type { - unsigned format; - unsigned size; - unsigned stride; - unsigned offset; -}; - -/** XXX this could be moved into draw_vf.c */ -struct draw_vf_fastpath { - unsigned vertex_stride; - unsigned attr_count; - boolean match_strides; - - struct draw_vf_attr_type *attr; - - draw_vf_emit_func func; - struct draw_vf_fastpath *next; -}; - - -void -draw_vf_register_fastpath( struct draw_vertex_fetch *vtx, - boolean match_strides ); - -void -draw_vf_generic_emit( struct draw_vertex_fetch *vf, - unsigned count, - uint8_t *v ); - -void -draw_vf_generate_hardwired_emit( struct draw_vertex_fetch *vf ); - -void -draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf ); - - -/** XXX this type and function could probably be moved into draw_vf.c */ -struct draw_vf_format_info { - const char *name; - draw_vf_insert_func insert[4]; - const unsigned attrsize; - const boolean isconst; -}; - -extern const struct draw_vf_format_info -draw_vf_format_info[DRAW_EMIT_MAX]; - - -#endif diff --git a/src/gallium/auxiliary/draw/draw_vf_generic.c b/src/gallium/auxiliary/draw/draw_vf_generic.c deleted file mode 100644 index 7a60a9db9c..0000000000 --- a/src/gallium/auxiliary/draw/draw_vf_generic.c +++ /dev/null @@ -1,585 +0,0 @@ - -/* - * Copyright 2003 Tungsten Graphics, inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Keith Whitwell <keithw@tungstengraphics.com> - */ - - -#include "pipe/p_compiler.h" -#include "pipe/p_debug.h" -#include "pipe/p_util.h" - -#include "draw_vf.h" - - - -static INLINE void insert_4f_4( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = in[1]; - out[2] = in[2]; - out[3] = in[3]; -} - -static INLINE void insert_4f_3( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = in[1]; - out[2] = in[2]; - out[3] = 1; -} - -static INLINE void insert_4f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = in[1]; - out[2] = 0; - out[3] = 1; -} - -static INLINE void insert_4f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = 0; - out[2] = 0; - out[3] = 1; -} - -static INLINE void insert_3f_xyw_4( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = in[1]; - out[2] = in[3]; -} - -static INLINE void insert_3f_xyw_err( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - (void) a; (void) v; (void) in; - assert(0); -} - -static INLINE void insert_3f_3( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = in[1]; - out[2] = in[2]; -} - -static INLINE void insert_3f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = in[1]; - out[2] = 0; -} - -static INLINE void insert_3f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = 0; - out[2] = 0; -} - - -static INLINE void insert_2f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = in[1]; -} - -static INLINE void insert_2f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = 0; -} - -static INLINE void insert_1f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; -} - -static INLINE void insert_null( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - (void) a; (void) v; (void) in; -} - -static INLINE void insert_4ub_4f_rgba_4( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]); - UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[3]); -} - -static INLINE void insert_4ub_4f_rgba_3( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]); - v[3] = 0xff; -} - -static INLINE void insert_4ub_4f_rgba_2( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - v[2] = 0; - v[3] = 0xff; -} - -static INLINE void insert_4ub_4f_rgba_1( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); - v[1] = 0; - v[2] = 0; - v[3] = 0xff; -} - -static INLINE void insert_4ub_4f_bgra_4( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]); - UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[3]); -} - -static INLINE void insert_4ub_4f_bgra_3( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]); - v[3] = 0xff; -} - -static INLINE void insert_4ub_4f_bgra_2( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - v[0] = 0; - v[3] = 0xff; -} - -static INLINE void insert_4ub_4f_bgra_1( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); - v[1] = 0; - v[0] = 0; - v[3] = 0xff; -} - -static INLINE void insert_4ub_4f_argb_4( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[2]); - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[3]); -} - -static INLINE void insert_4ub_4f_argb_3( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[2]); - v[0] = 0xff; -} - -static INLINE void insert_4ub_4f_argb_2( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); - v[3] = 0x00; - v[0] = 0xff; -} - -static INLINE void insert_4ub_4f_argb_1( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); - v[2] = 0x00; - v[3] = 0x00; - v[0] = 0xff; -} - -static INLINE void insert_4ub_4f_abgr_4( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[2]); - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[3]); -} - -static INLINE void insert_4ub_4f_abgr_3( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[2]); - v[0] = 0xff; -} - -static INLINE void insert_4ub_4f_abgr_2( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); - v[1] = 0x00; - v[0] = 0xff; -} - -static INLINE void insert_4ub_4f_abgr_1( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); - v[2] = 0x00; - v[1] = 0x00; - v[0] = 0xff; -} - -static INLINE void insert_3ub_3f_rgb_3( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]); -} - -static INLINE void insert_3ub_3f_rgb_2( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - v[2] = 0; -} - -static INLINE void insert_3ub_3f_rgb_1( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); - v[1] = 0; - v[2] = 0; -} - -static INLINE void insert_3ub_3f_bgr_3( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]); -} - -static INLINE void insert_3ub_3f_bgr_2( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - v[0] = 0; -} - -static INLINE void insert_3ub_3f_bgr_1( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); - v[1] = 0; - v[0] = 0; -} - - -static INLINE void insert_1ub_1f_1( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); -} - - -const struct draw_vf_format_info draw_vf_format_info[DRAW_EMIT_MAX] = -{ - { "1f", - { insert_1f_1, insert_1f_1, insert_1f_1, insert_1f_1 }, - sizeof(float), FALSE }, - - { "2f", - { insert_2f_1, insert_2f_2, insert_2f_2, insert_2f_2 }, - 2 * sizeof(float), FALSE }, - - { "3f", - { insert_3f_1, insert_3f_2, insert_3f_3, insert_3f_3 }, - 3 * sizeof(float), FALSE }, - - { "4f", - { insert_4f_1, insert_4f_2, insert_4f_3, insert_4f_4 }, - 4 * sizeof(float), FALSE }, - - { "3f_xyw", - { insert_3f_xyw_err, insert_3f_xyw_err, insert_3f_xyw_err, - insert_3f_xyw_4 }, - 3 * sizeof(float), FALSE }, - - { "1ub_1f", - { insert_1ub_1f_1, insert_1ub_1f_1, insert_1ub_1f_1, insert_1ub_1f_1 }, - sizeof(uint8_t), FALSE }, - - { "3ub_3f_rgb", - { insert_3ub_3f_rgb_1, insert_3ub_3f_rgb_2, insert_3ub_3f_rgb_3, - insert_3ub_3f_rgb_3 }, - 3 * sizeof(uint8_t), FALSE }, - - { "3ub_3f_bgr", - { insert_3ub_3f_bgr_1, insert_3ub_3f_bgr_2, insert_3ub_3f_bgr_3, - insert_3ub_3f_bgr_3 }, - 3 * sizeof(uint8_t), FALSE }, - - { "4ub_4f_rgba", - { insert_4ub_4f_rgba_1, insert_4ub_4f_rgba_2, insert_4ub_4f_rgba_3, - insert_4ub_4f_rgba_4 }, - 4 * sizeof(uint8_t), FALSE }, - - { "4ub_4f_bgra", - { insert_4ub_4f_bgra_1, insert_4ub_4f_bgra_2, insert_4ub_4f_bgra_3, - insert_4ub_4f_bgra_4 }, - 4 * sizeof(uint8_t), FALSE }, - - { "4ub_4f_argb", - { insert_4ub_4f_argb_1, insert_4ub_4f_argb_2, insert_4ub_4f_argb_3, - insert_4ub_4f_argb_4 }, - 4 * sizeof(uint8_t), FALSE }, - - { "4ub_4f_abgr", - { insert_4ub_4f_abgr_1, insert_4ub_4f_abgr_2, insert_4ub_4f_abgr_3, - insert_4ub_4f_abgr_4 }, - 4 * sizeof(uint8_t), FALSE }, - - { "1f_const", - { insert_1f_1, insert_1f_1, insert_1f_1, insert_1f_1 }, - sizeof(float), TRUE }, - - { "2f_const", - { insert_2f_1, insert_2f_2, insert_2f_2, insert_2f_2 }, - 2 * sizeof(float), TRUE }, - - { "3f_const", - { insert_3f_1, insert_3f_2, insert_3f_3, insert_3f_3 }, - 3 * sizeof(float), TRUE }, - - { "4f_const", - { insert_4f_1, insert_4f_2, insert_4f_3, insert_4f_4 }, - 4 * sizeof(float), TRUE }, - - { "pad", - { NULL, NULL, NULL, NULL }, - 0, FALSE }, - -}; - - - - -/*********************************************************************** - * Hardwired fastpaths for emitting whole vertices or groups of - * vertices - */ -#define EMIT5(NR, F0, F1, F2, F3, F4, NAME) \ -static void NAME( struct draw_vertex_fetch *vf, \ - unsigned count, \ - uint8_t *v ) \ -{ \ - struct draw_vf_attr *a = vf->attr; \ - unsigned i; \ - \ - for (i = 0 ; i < count ; i++, v += vf->vertex_stride) { \ - if (NR > 0) { \ - F0( &a[0], v + a[0].vertoffset, (float *)a[0].inputptr ); \ - a[0].inputptr += a[0].inputstride; \ - } \ - \ - if (NR > 1) { \ - F1( &a[1], v + a[1].vertoffset, (float *)a[1].inputptr ); \ - a[1].inputptr += a[1].inputstride; \ - } \ - \ - if (NR > 2) { \ - F2( &a[2], v + a[2].vertoffset, (float *)a[2].inputptr ); \ - a[2].inputptr += a[2].inputstride; \ - } \ - \ - if (NR > 3) { \ - F3( &a[3], v + a[3].vertoffset, (float *)a[3].inputptr ); \ - a[3].inputptr += a[3].inputstride; \ - } \ - \ - if (NR > 4) { \ - F4( &a[4], v + a[4].vertoffset, (float *)a[4].inputptr ); \ - a[4].inputptr += a[4].inputstride; \ - } \ - } \ -} - - -#define EMIT2(F0, F1, NAME) EMIT5(2, F0, F1, insert_null, \ - insert_null, insert_null, NAME) - -#define EMIT3(F0, F1, F2, NAME) EMIT5(3, F0, F1, F2, insert_null, \ - insert_null, NAME) - -#define EMIT4(F0, F1, F2, F3, NAME) EMIT5(4, F0, F1, F2, F3, \ - insert_null, NAME) - - -EMIT2(insert_3f_3, insert_4ub_4f_rgba_4, emit_xyz3_rgba4) - -EMIT3(insert_4f_4, insert_4ub_4f_rgba_4, insert_2f_2, emit_xyzw4_rgba4_st2) - -EMIT4(insert_4f_4, insert_4ub_4f_rgba_4, insert_2f_2, insert_2f_2, emit_xyzw4_rgba4_st2_st2) - - -/* Use the codegen paths to select one of a number of hardwired - * fastpaths. - */ -void draw_vf_generate_hardwired_emit( struct draw_vertex_fetch *vf ) -{ - draw_vf_emit_func func = NULL; - - /* Does it fit a hardwired fastpath? Help! this is growing out of - * control! - */ - switch (vf->attr_count) { - case 2: - if (vf->attr[0].do_insert == insert_3f_3 && - vf->attr[1].do_insert == insert_4ub_4f_rgba_4) { - func = emit_xyz3_rgba4; - } - break; - case 3: - if (vf->attr[2].do_insert == insert_2f_2) { - if (vf->attr[1].do_insert == insert_4ub_4f_rgba_4) { - if (vf->attr[0].do_insert == insert_4f_4) - func = emit_xyzw4_rgba4_st2; - } - } - break; - case 4: - if (vf->attr[2].do_insert == insert_2f_2 && - vf->attr[3].do_insert == insert_2f_2) { - if (vf->attr[1].do_insert == insert_4ub_4f_rgba_4) { - if (vf->attr[0].do_insert == insert_4f_4) - func = emit_xyzw4_rgba4_st2_st2; - } - } - break; - } - - vf->emit = func; -} - -/*********************************************************************** - * Generic (non-codegen) functions for whole vertices or groups of - * vertices - */ - -void draw_vf_generic_emit( struct draw_vertex_fetch *vf, - unsigned count, - uint8_t *v ) -{ - struct draw_vf_attr *a = vf->attr; - const unsigned attr_count = vf->attr_count; - const unsigned stride = vf->vertex_stride; - unsigned i, j; - - for (i = 0 ; i < count ; i++, v += stride) { - for (j = 0; j < attr_count; j++) { - float *in = (float *)a[j].inputptr; - a[j].inputptr += a[j].inputstride; - a[j].do_insert( &a[j], v + a[j].vertoffset, in ); - } - } -} - - diff --git a/src/gallium/auxiliary/draw/draw_vf_sse.c b/src/gallium/auxiliary/draw/draw_vf_sse.c deleted file mode 100644 index aff4ffd985..0000000000 --- a/src/gallium/auxiliary/draw/draw_vf_sse.c +++ /dev/null @@ -1,613 +0,0 @@ -/* - * Copyright 2003 Tungsten Graphics, inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Keith Whitwell <keithw@tungstengraphics.com> - */ - - -#include "pipe/p_compiler.h" -#include "util/u_simple_list.h" - -#include "draw_vf.h" - - -#if defined(USE_SSE_ASM) - -#include "rtasm/rtasm_cpu.h" -#include "rtasm/rtasm_x86sse.h" - - -#define X 0 -#define Y 1 -#define Z 2 -#define W 3 - - -struct x86_program { - struct x86_function func; - - struct draw_vertex_fetch *vf; - boolean inputs_safe; - boolean outputs_safe; - boolean have_sse2; - - struct x86_reg identity; - struct x86_reg chan0; -}; - - -static struct x86_reg get_identity( struct x86_program *p ) -{ - return p->identity; -} - -static void emit_load4f_4( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - sse_movups(&p->func, dest, arg0); -} - -static void emit_load4f_3( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - /* Have to jump through some hoops: - * - * c 0 0 0 - * c 0 0 1 - * 0 0 c 1 - * a b c 1 - */ - sse_movss(&p->func, dest, x86_make_disp(arg0, 8)); - sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) ); - sse_shufps(&p->func, dest, dest, SHUF(Y,Z,X,W) ); - sse_movlps(&p->func, dest, arg0); -} - -static void emit_load4f_2( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - /* Initialize from identity, then pull in low two words: - */ - sse_movups(&p->func, dest, get_identity(p)); - sse_movlps(&p->func, dest, arg0); -} - -static void emit_load4f_1( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - /* Pull in low word, then swizzle in identity */ - sse_movss(&p->func, dest, arg0); - sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) ); -} - - - -static void emit_load3f_3( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - /* Over-reads by 1 dword - potential SEGV if input is a vertex - * array. - */ - if (p->inputs_safe) { - sse_movups(&p->func, dest, arg0); - } - else { - /* c 0 0 0 - * c c c c - * a b c c - */ - sse_movss(&p->func, dest, x86_make_disp(arg0, 8)); - sse_shufps(&p->func, dest, dest, SHUF(X,X,X,X)); - sse_movlps(&p->func, dest, arg0); - } -} - -static void emit_load3f_2( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - emit_load4f_2(p, dest, arg0); -} - -static void emit_load3f_1( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - emit_load4f_1(p, dest, arg0); -} - -static void emit_load2f_2( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - sse_movlps(&p->func, dest, arg0); -} - -static void emit_load2f_1( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - emit_load4f_1(p, dest, arg0); -} - -static void emit_load1f_1( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - sse_movss(&p->func, dest, arg0); -} - -static void (*load[4][4])( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) = { - { emit_load1f_1, - emit_load1f_1, - emit_load1f_1, - emit_load1f_1 }, - - { emit_load2f_1, - emit_load2f_2, - emit_load2f_2, - emit_load2f_2 }, - - { emit_load3f_1, - emit_load3f_2, - emit_load3f_3, - emit_load3f_3 }, - - { emit_load4f_1, - emit_load4f_2, - emit_load4f_3, - emit_load4f_4 } -}; - -static void emit_load( struct x86_program *p, - struct x86_reg dest, - unsigned sz, - struct x86_reg src, - unsigned src_sz) -{ - load[sz-1][src_sz-1](p, dest, src); -} - -static void emit_store4f( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - sse_movups(&p->func, dest, arg0); -} - -static void emit_store3f( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - if (p->outputs_safe) { - /* Emit the extra dword anyway. This may hurt writecombining, - * may cause other problems. - */ - sse_movups(&p->func, dest, arg0); - } - else { - /* Alternate strategy - emit two, shuffle, emit one. - */ - sse_movlps(&p->func, dest, arg0); - sse_shufps(&p->func, arg0, arg0, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */ - sse_movss(&p->func, x86_make_disp(dest,8), arg0); - } -} - -static void emit_store2f( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - sse_movlps(&p->func, dest, arg0); -} - -static void emit_store1f( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - sse_movss(&p->func, dest, arg0); -} - - -static void (*store[4])( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) = -{ - emit_store1f, - emit_store2f, - emit_store3f, - emit_store4f -}; - -static void emit_store( struct x86_program *p, - struct x86_reg dest, - unsigned sz, - struct x86_reg temp ) - -{ - store[sz-1](p, dest, temp); -} - -static void emit_pack_store_4ub( struct x86_program *p, - struct x86_reg dest, - struct x86_reg temp ) -{ - /* Scale by 255.0 - */ - sse_mulps(&p->func, temp, p->chan0); - - if (p->have_sse2) { - sse2_cvtps2dq(&p->func, temp, temp); - sse2_packssdw(&p->func, temp, temp); - sse2_packuswb(&p->func, temp, temp); - sse_movss(&p->func, dest, temp); - } - else { - struct x86_reg mmx0 = x86_make_reg(file_MMX, 0); - struct x86_reg mmx1 = x86_make_reg(file_MMX, 1); - sse_cvtps2pi(&p->func, mmx0, temp); - sse_movhlps(&p->func, temp, temp); - sse_cvtps2pi(&p->func, mmx1, temp); - mmx_packssdw(&p->func, mmx0, mmx1); - mmx_packuswb(&p->func, mmx0, mmx0); - mmx_movd(&p->func, dest, mmx0); - } -} - -static int get_offset( const void *a, const void *b ) -{ - return (const char *)b - (const char *)a; -} - -/* Not much happens here. Eventually use this function to try and - * avoid saving/reloading the source pointers each vertex (if some of - * them can fit in registers). - */ -static void get_src_ptr( struct x86_program *p, - struct x86_reg srcREG, - struct x86_reg vfREG, - struct draw_vf_attr *a ) -{ - struct draw_vertex_fetch *vf = p->vf; - struct x86_reg ptr_to_src = x86_make_disp(vfREG, get_offset(vf, &a->inputptr)); - - /* Load current a[j].inputptr - */ - x86_mov(&p->func, srcREG, ptr_to_src); -} - -static void update_src_ptr( struct x86_program *p, - struct x86_reg srcREG, - struct x86_reg vfREG, - struct draw_vf_attr *a ) -{ - if (a->inputstride) { - struct draw_vertex_fetch *vf = p->vf; - struct x86_reg ptr_to_src = x86_make_disp(vfREG, get_offset(vf, &a->inputptr)); - - /* add a[j].inputstride (hardcoded value - could just as easily - * pull the stride value from memory each time). - */ - x86_lea(&p->func, srcREG, x86_make_disp(srcREG, a->inputstride)); - - /* save new value of a[j].inputptr - */ - x86_mov(&p->func, ptr_to_src, srcREG); - } -} - - -/* Lots of hardcoding - * - * EAX -- pointer to current output vertex - * ECX -- pointer to current attribute - * - */ -static boolean build_vertex_emit( struct x86_program *p ) -{ - struct draw_vertex_fetch *vf = p->vf; - unsigned j = 0; - - struct x86_reg vertexEAX = x86_make_reg(file_REG32, reg_AX); - struct x86_reg srcECX = x86_make_reg(file_REG32, reg_CX); - struct x86_reg countEBP = x86_make_reg(file_REG32, reg_BP); - struct x86_reg vfESI = x86_make_reg(file_REG32, reg_SI); - struct x86_reg temp = x86_make_reg(file_XMM, 0); - uint8_t *fixup, *label; - - /* Push a few regs? - */ - x86_push(&p->func, countEBP); - x86_push(&p->func, vfESI); - - - /* Get vertex count, compare to zero - */ - x86_xor(&p->func, srcECX, srcECX); - x86_mov(&p->func, countEBP, x86_fn_arg(&p->func, 2)); - x86_cmp(&p->func, countEBP, srcECX); - fixup = x86_jcc_forward(&p->func, cc_E); - - /* Initialize destination register. - */ - x86_mov(&p->func, vertexEAX, x86_fn_arg(&p->func, 3)); - - /* Move argument 1 (vf) into a reg: - */ - x86_mov(&p->func, vfESI, x86_fn_arg(&p->func, 1)); - - - /* always load, needed or not: - */ - sse_movups(&p->func, p->identity, x86_make_disp(vfESI, get_offset(vf, &vf->identity[0]))); - - /* Note address for loop jump */ - label = x86_get_label(&p->func); - - /* Emit code for each of the attributes. Currently routes - * everything through SSE registers, even when it might be more - * efficient to stick with regular old x86. No optimization or - * other tricks - enough new ground to cover here just getting - * things working. - */ - while (j < vf->attr_count) { - struct draw_vf_attr *a = &vf->attr[j]; - struct x86_reg dest = x86_make_disp(vertexEAX, a->vertoffset); - - /* Now, load an XMM reg from src, perhaps transform, then save. - * Could be shortcircuited in specific cases: - */ - switch (a->format) { - case DRAW_EMIT_1F: - case DRAW_EMIT_1F_CONST: - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 1, x86_deref(srcECX), a->inputsize); - emit_store(p, dest, 1, temp); - update_src_ptr(p, srcECX, vfESI, a); - break; - case DRAW_EMIT_2F: - case DRAW_EMIT_2F_CONST: - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize); - emit_store(p, dest, 2, temp); - update_src_ptr(p, srcECX, vfESI, a); - break; - case DRAW_EMIT_3F: - case DRAW_EMIT_3F_CONST: - /* Potentially the worst case - hardcode 2+1 copying: - */ - if (0) { - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize); - emit_store(p, dest, 3, temp); - update_src_ptr(p, srcECX, vfESI, a); - } - else { - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize); - emit_store(p, dest, 2, temp); - if (a->inputsize > 2) { - emit_load(p, temp, 1, x86_make_disp(srcECX, 8), 1); - emit_store(p, x86_make_disp(dest,8), 1, temp); - } - else { - sse_movss(&p->func, x86_make_disp(dest,8), get_identity(p)); - } - update_src_ptr(p, srcECX, vfESI, a); - } - break; - case DRAW_EMIT_4F: - case DRAW_EMIT_4F_CONST: - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); - emit_store(p, dest, 4, temp); - update_src_ptr(p, srcECX, vfESI, a); - break; - case DRAW_EMIT_3F_XYW: - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); - sse_shufps(&p->func, temp, temp, SHUF(X,Y,W,Z)); - emit_store(p, dest, 3, temp); - update_src_ptr(p, srcECX, vfESI, a); - break; - - case DRAW_EMIT_1UB_1F: - /* Test for PAD3 + 1UB: - */ - if (j > 0 && - a[-1].vertoffset + a[-1].vertattrsize <= a->vertoffset - 3) - { - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 1, x86_deref(srcECX), a->inputsize); - sse_shufps(&p->func, temp, temp, SHUF(X,X,X,X)); - emit_pack_store_4ub(p, x86_make_disp(dest, -3), temp); /* overkill! */ - update_src_ptr(p, srcECX, vfESI, a); - } - else { - debug_printf("Can't emit 1ub %x %x %d\n", - a->vertoffset, a[-1].vertoffset, a[-1].vertattrsize ); - return FALSE; - } - break; - case DRAW_EMIT_3UB_3F_RGB: - case DRAW_EMIT_3UB_3F_BGR: - /* Test for 3UB + PAD1: - */ - if (j == vf->attr_count - 1 || - a[1].vertoffset >= a->vertoffset + 4) { - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize); - if (a->format == DRAW_EMIT_3UB_3F_BGR) - sse_shufps(&p->func, temp, temp, SHUF(Z,Y,X,W)); - emit_pack_store_4ub(p, dest, temp); - update_src_ptr(p, srcECX, vfESI, a); - } - /* Test for 3UB + 1UB: - */ - else if (j < vf->attr_count - 1 && - a[1].format == DRAW_EMIT_1UB_1F && - a[1].vertoffset == a->vertoffset + 3) { - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize); - update_src_ptr(p, srcECX, vfESI, a); - - /* Make room for incoming value: - */ - sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z)); - - get_src_ptr(p, srcECX, vfESI, &a[1]); - emit_load(p, temp, 1, x86_deref(srcECX), a[1].inputsize); - update_src_ptr(p, srcECX, vfESI, &a[1]); - - /* Rearrange and possibly do BGR conversion: - */ - if (a->format == DRAW_EMIT_3UB_3F_BGR) - sse_shufps(&p->func, temp, temp, SHUF(W,Z,Y,X)); - else - sse_shufps(&p->func, temp, temp, SHUF(Y,Z,W,X)); - - emit_pack_store_4ub(p, dest, temp); - j++; /* NOTE: two attrs consumed */ - } - else { - debug_printf("Can't emit 3ub\n"); - } - return FALSE; /* add this later */ - break; - - case DRAW_EMIT_4UB_4F_RGBA: - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); - emit_pack_store_4ub(p, dest, temp); - update_src_ptr(p, srcECX, vfESI, a); - break; - case DRAW_EMIT_4UB_4F_BGRA: - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); - sse_shufps(&p->func, temp, temp, SHUF(Z,Y,X,W)); - emit_pack_store_4ub(p, dest, temp); - update_src_ptr(p, srcECX, vfESI, a); - break; - case DRAW_EMIT_4UB_4F_ARGB: - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); - sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z)); - emit_pack_store_4ub(p, dest, temp); - update_src_ptr(p, srcECX, vfESI, a); - break; - case DRAW_EMIT_4UB_4F_ABGR: - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); - sse_shufps(&p->func, temp, temp, SHUF(W,Z,Y,X)); - emit_pack_store_4ub(p, dest, temp); - update_src_ptr(p, srcECX, vfESI, a); - break; - default: - debug_printf("unknown a[%d].format %d\n", j, a->format); - return FALSE; /* catch any new opcodes */ - } - - /* Increment j by at least 1 - may have been incremented above also: - */ - j++; - } - - /* Next vertex: - */ - x86_lea(&p->func, vertexEAX, x86_make_disp(vertexEAX, vf->vertex_stride)); - - /* decr count, loop if not zero - */ - x86_dec(&p->func, countEBP); - x86_test(&p->func, countEBP, countEBP); - x86_jcc(&p->func, cc_NZ, label); - - /* Exit mmx state? - */ - if (p->func.need_emms) - mmx_emms(&p->func); - - /* Land forward jump here: - */ - x86_fixup_fwd_jump(&p->func, fixup); - - /* Pop regs and return - */ - x86_pop(&p->func, x86_get_base_reg(vfESI)); - x86_pop(&p->func, countEBP); - x86_ret(&p->func); - - vf->emit = (draw_vf_emit_func)x86_get_func(&p->func); - return TRUE; -} - - - -void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf ) -{ - struct x86_program p; - - if (!rtasm_cpu_has_sse()) { - vf->codegen_emit = NULL; - return; - } - - memset(&p, 0, sizeof(p)); - - p.vf = vf; - p.inputs_safe = 0; /* for now */ - p.outputs_safe = 1; /* for now */ - p.have_sse2 = rtasm_cpu_has_sse2(); - p.identity = x86_make_reg(file_XMM, 6); - p.chan0 = x86_make_reg(file_XMM, 7); - - x86_init_func(&p.func); - - if (build_vertex_emit(&p)) { - draw_vf_register_fastpath( vf, TRUE ); - } - else { - /* Note the failure so that we don't keep trying to codegen an - * impossible state: - */ - draw_vf_register_fastpath( vf, FALSE ); - x86_release_func(&p.func); - } -} - -#else - -void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf ) -{ - /* Dummy version for when USE_SSE_ASM not defined */ -} - -#endif diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vs.c index 8572a6d40c..03fe00a951 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_shader.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -37,49 +37,6 @@ #include "draw_context.h" #include "draw_vs.h" -/** - * Run the vertex shader on all vertices in the vertex queue. - * Called by the draw module when the vertx cache needs to be flushed. - */ -void -draw_vertex_shader_queue_flush(struct draw_context *draw) -{ - struct draw_vertex_shader *shader = draw->vertex_shader; - unsigned i; - - assert(draw->vs.queue_nr != 0); - - /* XXX: do this on statechange: - */ - shader->prepare( shader, draw ); - -// fprintf(stderr, "%s %d\n", __FUNCTION__, draw->vs.queue_nr ); - - /* run vertex shader on vertex cache entries, four per invokation */ - for (i = 0; i < draw->vs.queue_nr; i += MAX_SHADER_VERTICES) { - unsigned elts[MAX_SHADER_VERTICES]; - int j, n = MIN2(MAX_SHADER_VERTICES, draw->vs.queue_nr - i); - struct vertex_header *dests = - draw_header_from_block(draw->vs.vertex_cache, - MAX_VERTEX_ALLOCATION, i); - - for (j = 0; j < n; j++) { - elts[j] = draw->vs.elts[i + j]; - } - - for ( ; j < MAX_SHADER_VERTICES; j++) { - elts[j] = elts[0]; - } - - assert(n > 0); - assert(n <= MAX_SHADER_VERTICES); - - shader->run(shader, draw, elts, n, dests, MAX_VERTEX_ALLOCATION); - } - - draw->vs.post_nr = draw->vs.queue_nr; - draw->vs.queue_nr = 0; -} struct draw_vertex_shader * @@ -95,10 +52,8 @@ draw_create_vertex_shader(struct draw_context *draw, vs = draw_create_vs_exec( draw, shader ); } } - assert(vs); - - tgsi_scan_shader(shader->tokens, &vs->info); + assert(vs); return vs; } @@ -113,9 +68,6 @@ draw_bind_vertex_shader(struct draw_context *draw, { draw->vertex_shader = dvs; draw->num_vs_outputs = dvs->info.num_outputs; - - tgsi_exec_machine_init(&draw->machine); - dvs->prepare( dvs, draw ); } else { diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 33ce1e335e..f9772b83b8 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -35,10 +35,39 @@ #include "draw_private.h" -struct draw_vertex_shader; struct draw_context; struct pipe_shader_state; +/** + * Private version of the compiled vertex_shader + */ +struct draw_vertex_shader { + + /* This member will disappear shortly: + */ + struct pipe_shader_state state; + + struct tgsi_shader_info info; + + void (*prepare)( struct draw_vertex_shader *shader, + struct draw_context *draw ); + + /* Run the shader - this interface will get cleaned up in the + * future: + */ + void (*run_linear)( struct draw_vertex_shader *shader, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ); + + + void (*delete)( struct draw_vertex_shader * ); +}; + + struct draw_vertex_shader * draw_create_vs_exec(struct draw_context *draw, const struct pipe_shader_state *templ); @@ -52,32 +81,7 @@ draw_create_vs_llvm(struct draw_context *draw, const struct pipe_shader_state *templ); -/* Should be part of the generated shader: - */ -static INLINE unsigned -compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) -{ - unsigned mask = 0x0; - unsigned i; - - /* Do the hardwired planes first: - */ - if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; - if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; - if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; - if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; - if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; - if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; - - /* Followed by any remaining ones: - */ - for (i = 6; i < nr; i++) { - if (dot4(clip, plane[i]) < 0) - mask |= (1<<i); - } - - return mask; -} - +#define MAX_TGSI_VERTICES 4 + #endif diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 5c88c2e24e..54a2b2ab04 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -40,145 +40,118 @@ #include "tgsi/util/tgsi_parse.h" -#define MAX_TGSI_VERTICES 4 +struct exec_vertex_shader { + struct draw_vertex_shader base; + struct tgsi_exec_machine *machine; +}; + +static struct exec_vertex_shader *exec_vertex_shader( struct draw_vertex_shader *vs ) +{ + return (struct exec_vertex_shader *)vs; +} + + +/* Not required for run_linear. + */ static void vs_exec_prepare( struct draw_vertex_shader *shader, struct draw_context *draw ) { + struct exec_vertex_shader *evs = exec_vertex_shader(shader); + /* specify the vertex program to interpret/execute */ - tgsi_exec_machine_bind_shader(&draw->machine, + tgsi_exec_machine_bind_shader(evs->machine, shader->state.tokens, PIPE_MAX_SAMPLERS, NULL /*samplers*/ ); - draw_update_vertex_fetch( draw ); } -/** - * Transform vertices with the current vertex program/shader - * Up to four vertices can be shaded at a time. - * \param vbuffer the input vertex data - * \param elts indexes of four input vertices - * \param count number of vertices to shade [1..4] - * \param vOut array of pointers to four output vertices + + +/* Simplified vertex shader interface for the pt paths. Given the + * complexity of code-generating all the above operations together, + * it's time to try doing all the other stuff separately. */ -static boolean -vs_exec_run( struct draw_vertex_shader *shader, - struct draw_context *draw, - const unsigned *elts, - unsigned count, - void *vOut, - unsigned vertex_size) +static void +vs_exec_run_linear( struct draw_vertex_shader *shader, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ) { - struct tgsi_exec_machine *machine = &draw->machine; + struct exec_vertex_shader *evs = exec_vertex_shader(shader); + struct tgsi_exec_machine *machine = evs->machine; unsigned int i, j; - unsigned int clipped = 0; + unsigned slot; - ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); - ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); - const float *scale = draw->viewport.scale; - const float *trans = draw->viewport.translate; - - assert(draw->vertex_shader->info.output_semantic_name[0] - == TGSI_SEMANTIC_POSITION); - - machine->Consts = (float (*)[4]) draw->user.constants; - machine->Inputs = ALIGN16_ASSIGN(inputs); - if (draw->rasterizer->bypass_vs) { - /* outputs are just the inputs */ - machine->Outputs = machine->Inputs; - } - else { - machine->Outputs = ALIGN16_ASSIGN(outputs); - } + machine->Consts = constants; for (i = 0; i < count; i += MAX_TGSI_VERTICES) { unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); - draw->vertex_fetch.fetch_func( draw, machine, &elts[i], max_vertices ); - - if (!draw->rasterizer->bypass_vs) { - /* run interpreter */ - tgsi_exec_machine_run( machine ); - } - /* store machine results */ + /* Swizzle inputs. + */ for (j = 0; j < max_vertices; j++) { - unsigned slot; - float x, y, z, w; - struct vertex_header *out = - draw_header_from_block(vOut, vertex_size, i + j); - - /* Handle attr[0] (position) specially: - * - * XXX: Computing the clipmask should be done in the vertex - * program as a set of DP4 instructions appended to the - * user-provided code. - */ - x = out->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = out->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = out->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = out->clip[3] = machine->Outputs[0].xyzw[3].f[j]; - - if (!draw->rasterizer->bypass_clipping) { - out->clipmask = compute_clipmask(out->clip, draw->plane, - draw->nr_planes); - clipped += out->clipmask; - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; +#if 0 + debug_printf("%d) Input vert:\n", i + j); + for (slot = 0; slot < shader->info.num_inputs; slot++) { + debug_printf("\t%d: %f %f %f %f\n", slot, + input[slot][0], + input[slot][1], + input[slot][2], + input[slot][3]); } - else { - out->clipmask = 0; - } - out->edgeflag = 1; - out->vertex_id = UNDEFINED_VERTEX_ID; - - if (!draw->identity_viewport) { - /* Viewport mapping */ - out->data[0][0] = x * scale[0] + trans[0]; - out->data[0][1] = y * scale[1] + trans[1]; - out->data[0][2] = z * scale[2] + trans[2]; - out->data[0][3] = w; - } - else { - out->data[0][0] = x; - out->data[0][1] = y; - out->data[0][2] = z; - out->data[0][3] = w; +#endif + + for (slot = 0; slot < shader->info.num_inputs; slot++) { + machine->Inputs[slot].xyzw[0].f[j] = input[slot][0]; + machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; + machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; + machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; } - /* Remaining attributes are packed into sequential post-transform - * vertex attrib slots. - */ - for (slot = 1; slot < draw->num_vs_outputs; slot++) { - out->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - out->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - out->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - out->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + input = (const float (*)[4])((const char *)input + input_stride); + } + + /* run interpreter */ + tgsi_exec_machine_run( machine ); + + /* Unswizzle all output results. + */ + for (j = 0; j < max_vertices; j++) { + for (slot = 0; slot < shader->info.num_outputs; slot++) { + output[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } -#if 0 /*DEBUG*/ - printf("%d) Post xform vert:\n", i + j); - for (slot = 0; slot < draw->num_vs_outputs; slot++) { - printf("\t%d: %f %f %f %f\n", slot, - out->data[slot][0], - out->data[slot][1], - out->data[slot][2], - out->data[slot][3]); +#if 0 + debug_printf("%d) Post xform vert:\n", i + j); + for (slot = 0; slot < shader->info.num_outputs; slot++) { + debug_printf("\t%d: %f %f %f %f\n", slot, + output[slot][0], + output[slot][1], + output[slot][2], + output[slot][3]); } #endif - } /* loop over vertices */ + + output = (float (*)[4])((char *)output + output_stride); + } + } - return clipped != 0; } + static void vs_exec_delete( struct draw_vertex_shader *dvs ) { @@ -191,17 +164,22 @@ struct draw_vertex_shader * draw_create_vs_exec(struct draw_context *draw, const struct pipe_shader_state *state) { - struct draw_vertex_shader *vs = CALLOC_STRUCT( draw_vertex_shader ); + struct exec_vertex_shader *vs = CALLOC_STRUCT( exec_vertex_shader ); uint nt = tgsi_num_tokens(state->tokens); if (vs == NULL) return NULL; /* we make a private copy of the tokens */ - vs->state.tokens = mem_dup(state->tokens, nt * sizeof(state->tokens[0])); - vs->prepare = vs_exec_prepare; - vs->run = vs_exec_run; - vs->delete = vs_exec_delete; + vs->base.state.tokens = mem_dup(state->tokens, nt * sizeof(state->tokens[0])); + tgsi_scan_shader(state->tokens, &vs->base.info); + + + vs->base.prepare = vs_exec_prepare; + vs->base.run_linear = vs_exec_run_linear; + vs->base.delete = vs_exec_delete; + vs->machine = &draw->machine; + - return vs; + return &vs->base; } diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 73076d2467..dcada66514 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -47,6 +47,7 @@ struct draw_llvm_vertex_shader { struct draw_vertex_shader base; struct gallivm_prog *llvm_prog; + struct tgsi_exec_machine *machine; }; @@ -54,121 +55,68 @@ static void vs_llvm_prepare( struct draw_vertex_shader *base, struct draw_context *draw ) { - draw_update_vertex_fetch( draw ); } -/** - * Transform vertices with the current vertex program/shader - * Up to four vertices can be shaded at a time. - * \param vbuffer the input vertex data - * \param elts indexes of four input vertices - * \param count number of vertices to shade [1..4] - * \param vOut array of pointers to four output vertices - */ -static boolean -vs_llvm_run( struct draw_vertex_shader *base, - struct draw_context *draw, - const unsigned *elts, - unsigned count, - void *vOut ) + +static void +vs_llvm_run_linear( struct draw_vertex_shader *base, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ) { struct draw_llvm_vertex_shader *shader = (struct draw_llvm_vertex_shader *)base; - struct tgsi_exec_machine *machine = &draw->machine; - unsigned int j; - unsigned int clipped = 0; - - ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); - ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); - const float *scale = draw->viewport.scale; - const float *trans = draw->viewport.translate; - - - assert(count <= 4); - assert(draw->vertex_shader->state->output_semantic_name[0] - == TGSI_SEMANTIC_POSITION); + struct tgsi_exec_machine *machine = shader->machine; + unsigned int i, j; + unsigned slot; - /* Consts does not require 16 byte alignment. */ - machine->Consts = (float (*)[4]) draw->user.constants; - machine->Inputs = ALIGN16_ASSIGN(inputs); - if (draw->rasterizer->bypass_vs) { - /* outputs are just the inputs */ - machine->Outputs = machine->Inputs; - } - else { - machine->Outputs = ALIGN16_ASSIGN(outputs); - } + for (i = 0; i < count; i += MAX_TGSI_VERTICES) { + unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); + /* Swizzle inputs. + */ + for (j = 0; j < max_vertices; j++) { + for (slot = 0; slot < base->info.num_inputs; slot++) { + machine->Inputs[slot].xyzw[0].f[j] = input[slot][0]; + machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; + machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; + machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; + } - draw->vertex_fetch.fetch_func( draw, machine, elts, count ); + input = (const float (*)[4])((const char *)input + input_stride); + } - if (!draw->rasterizer->bypass_vs) { /* run shader */ gallivm_cpu_vs_exec(shader->llvm_prog, machine->Inputs, machine->Outputs, - machine->Consts, + (float (*)[4]) constants, machine->Temps); - } - /* store machine results */ - for (j = 0; j < count; j++) { - unsigned slot; - float x, y, z, w; - - x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; - - if (!draw->rasterizer->bypass_clipping) { - vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, - draw->nr_planes); - clipped += vOut[j]->clipmask; - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - } - else { - vOut[j]->clipmask = 0; - } - vOut[j]->edgeflag = 1; - vOut[j]->vertex_id = UNDEFINED_VERTEX_ID; - - if (!draw->identity_viewport) { - /* Viewport mapping */ - vOut[j]->data[0][0] = x * scale[0] + trans[0]; - vOut[j]->data[0][1] = y * scale[1] + trans[1]; - vOut[j]->data[0][2] = z * scale[2] + trans[2]; - vOut[j]->data[0][3] = w; - } - else { - vOut[j]->data[0][0] = x; - vOut[j]->data[0][1] = y; - vOut[j]->data[0][2] = z; - vOut[j]->data[0][3] = w; - } - /* Remaining attributes are packed into sequential post-transform - * vertex attrib slots. + /* Unswizzle all output results */ - for (slot = 1; slot < draw->num_vs_outputs; slot++) { - vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + for (j = 0; j < max_vertices; j++) { + for (slot = 0; slot < base->info.num_outputs; slot++) { + output[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } + output = (float (*)[4])((char *)output + output_stride); } - } /* loop over vertices */ - return clipped != 0; + } } + + static void vs_llvm_delete( struct draw_vertex_shader *base ) { @@ -198,15 +146,19 @@ draw_create_vs_llvm(struct draw_context *draw, /* we make a private copy of the tokens */ vs->base.state.tokens = mem_dup(templ->tokens, nt * sizeof(templ->tokens[0])); + + tgsi_scan_shader(vs->base.state.tokens, &vs->base.info); + vs->base.prepare = vs_llvm_prepare; - vs->base.run = vs_llvm_run; + vs->base.run_linear = vs_llvm_run_linear; vs->base.delete = vs_llvm_delete; + vs->machine = &draw->machine; { struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS); gallivm_ir_set_layout(ir, GALLIVM_SOA); gallivm_ir_set_components(ir, 4); - gallivm_ir_fill_from_tgsi(ir, vs->base.state->tokens); + gallivm_ir_fill_from_tgsi(ir, vs->base.state.tokens); vs->llvm_prog = gallivm_ir_compile(ir); gallivm_ir_delete(ir); } diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index ee0a3105b9..b1e9f67114 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -41,6 +41,7 @@ #include "draw_private.h" #include "draw_context.h" +#include "rtasm/rtasm_cpu.h" #include "rtasm/rtasm_x86sse.h" #include "tgsi/exec/tgsi_sse2.h" #include "tgsi/util/tgsi_parse.h" @@ -58,7 +59,11 @@ typedef void (XSTDCALL *codegen_function) ( struct draw_sse_vertex_shader { struct draw_vertex_shader base; struct x86_function sse2_program; + codegen_function func; + + struct tgsi_exec_machine *machine; + float immediates[TGSI_EXEC_NUM_IMMEDIATES][4]; }; @@ -67,140 +72,71 @@ static void vs_sse_prepare( struct draw_vertex_shader *base, struct draw_context *draw ) { - draw_update_vertex_fetch( draw ); } -/** - * Transform vertices with the current vertex program/shader - * Up to four vertices can be shaded at a time. - * \param vbuffer the input vertex data - * \param elts indexes of four input vertices - * \param count number of vertices to shade [1..4] - * \param vOut array of pointers to four output vertices + + +/* Simplified vertex shader interface for the pt paths. Given the + * complexity of code-generating all the above operations together, + * it's time to try doing all the other stuff separately. */ -static boolean -vs_sse_run( struct draw_vertex_shader *base, - struct draw_context *draw, - const unsigned *elts, - unsigned count, - void *vOut, - unsigned vertex_size ) +static void +vs_sse_run_linear( struct draw_vertex_shader *base, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ) { struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base; - struct tgsi_exec_machine *machine = &draw->machine; + struct tgsi_exec_machine *machine = shader->machine; unsigned int i, j; - unsigned int clipped = 0; - - ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); - ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); - const float *scale = draw->viewport.scale; - const float *trans = draw->viewport.translate; - - assert(draw->vertex_shader->info.output_semantic_name[0] - == TGSI_SEMANTIC_POSITION); - - /* Consts does not require 16 byte alignment. */ - machine->Consts = (float (*)[4]) draw->user.constants; - machine->Inputs = ALIGN16_ASSIGN(inputs); - if (draw->rasterizer->bypass_vs) { - /* outputs are just the inputs */ - machine->Outputs = machine->Inputs; - } - else { - machine->Outputs = ALIGN16_ASSIGN(outputs); - } + unsigned slot; - for (i = 0; i < count; i += SSE_MAX_VERTICES) { - unsigned int max_vertices = MIN2(SSE_MAX_VERTICES, count - i); - /* Fetch vertices. This may at some point be integrated into the - * compiled shader -- that would require a reorganization where - * multiple versions of the compiled shader might exist, - * specialized for each fetch state. - */ - draw->vertex_fetch.fetch_func(draw, machine, &elts[i], max_vertices); - - if (!draw->rasterizer->bypass_vs) { - /* run compiled shader - */ - shader->func(machine->Inputs, - machine->Outputs, - machine->Consts, - machine->Temps, - shader->immediates); - } - - /* XXX: Computing the clipmask and emitting results should be done - * in the vertex program as a set of instructions appended to - * the user-provided code. + for (i = 0; i < count; i += MAX_TGSI_VERTICES) { + unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); + + /* Swizzle inputs. */ for (j = 0; j < max_vertices; j++) { - unsigned slot; - float x, y, z, w; - struct vertex_header *out = - draw_header_from_block(vOut, vertex_size, i + j); - - x = out->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = out->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = out->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = out->clip[3] = machine->Outputs[0].xyzw[3].f[j]; - - if (!draw->rasterizer->bypass_clipping) { - out->clipmask = compute_clipmask(out->clip, draw->plane, - draw->nr_planes); - clipped += out->clipmask; - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - } - else { - out->clipmask = 0; - } - out->edgeflag = 1; - out->vertex_id = UNDEFINED_VERTEX_ID; - - if (!draw->identity_viewport) { - /* Viewport mapping */ - out->data[0][0] = x * scale[0] + trans[0]; - out->data[0][1] = y * scale[1] + trans[1]; - out->data[0][2] = z * scale[2] + trans[2]; - out->data[0][3] = w; - } - else { - out->data[0][0] = x; - out->data[0][1] = y; - out->data[0][2] = z; - out->data[0][3] = w; + for (slot = 0; slot < base->info.num_inputs; slot++) { + machine->Inputs[slot].xyzw[0].f[j] = input[slot][0]; + machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; + machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; + machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; } - /* Remaining attributes are packed into sequential post-transform - * vertex attrib slots. - */ - for (slot = 1; slot < draw->num_vs_outputs; slot++) { - out->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - out->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - out->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - out->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; - } -#if 0 /*DEBUG*/ - printf("%d) Post xform vert:\n", i + j); - for (slot = 0; slot < draw->num_vs_outputs; slot++) { - printf("\t%d: %f %f %f %f\n", slot, - out->data[slot][0], - out->data[slot][1], - out->data[slot][2], - out->data[slot][3]); + input = (const float (*)[4])((const char *)input + input_stride); + } + + /* run compiled shader + */ + shader->func(machine->Inputs, + machine->Outputs, + (float (*)[4])constants, + machine->Temps, + shader->immediates); + + + /* Unswizzle all output results. + */ + for (j = 0; j < max_vertices; j++) { + for (slot = 0; slot < base->info.num_outputs; slot++) { + output[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; } -#endif - } + + output = (float (*)[4])((char *)output + output_stride); + } } - return clipped != 0; } + static void vs_sse_delete( struct draw_vertex_shader *base ) { @@ -220,7 +156,7 @@ draw_create_vs_sse(struct draw_context *draw, struct draw_sse_vertex_shader *vs; uint nt = tgsi_num_tokens(templ->tokens); - if (!draw->use_sse) + if (!rtasm_cpu_has_sse2()) return NULL; vs = CALLOC_STRUCT( draw_sse_vertex_shader ); @@ -229,9 +165,13 @@ draw_create_vs_sse(struct draw_context *draw, /* we make a private copy of the tokens */ vs->base.state.tokens = mem_dup(templ->tokens, nt * sizeof(templ->tokens[0])); + + tgsi_scan_shader(templ->tokens, &vs->base.info); + vs->base.prepare = vs_sse_prepare; - vs->base.run = vs_sse_run; + vs->base.run_linear = vs_sse_run_linear; vs->base.delete = vs_sse_delete; + vs->machine = &draw->machine; x86_init_func( &vs->sse2_program ); diff --git a/src/gallium/auxiliary/draw/draw_wide_prims.c b/src/gallium/auxiliary/draw/draw_wide_prims.c deleted file mode 100644 index d6bff110b4..0000000000 --- a/src/gallium/auxiliary/draw/draw_wide_prims.c +++ /dev/null @@ -1,366 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "pipe/p_util.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" -#include "draw_private.h" - - -struct wide_stage { - struct draw_stage stage; - - float half_line_width; - float half_point_size; - - uint texcoord_slot[PIPE_MAX_SHADER_OUTPUTS]; - uint texcoord_mode[PIPE_MAX_SHADER_OUTPUTS]; - uint num_texcoords; - - int psize_slot; -}; - - - -static INLINE struct wide_stage *wide_stage( struct draw_stage *stage ) -{ - return (struct wide_stage *)stage; -} - - -static void passthrough_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} - -static void passthrough_line( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->line(stage->next, header); -} - -static void passthrough_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->tri(stage->next, header); -} - - -/** - * Draw a wide line by drawing a quad (two triangles). - * XXX need to disable polygon stipple. - */ -static void wide_line( struct draw_stage *stage, - struct prim_header *header ) -{ - const struct wide_stage *wide = wide_stage(stage); - const float half_width = wide->half_line_width; - - struct prim_header tri; - - struct vertex_header *v0 = dup_vert(stage, header->v[0], 0); - struct vertex_header *v1 = dup_vert(stage, header->v[0], 1); - struct vertex_header *v2 = dup_vert(stage, header->v[1], 2); - struct vertex_header *v3 = dup_vert(stage, header->v[1], 3); - - float *pos0 = v0->data[0]; - float *pos1 = v1->data[0]; - float *pos2 = v2->data[0]; - float *pos3 = v3->data[0]; - - const float dx = FABSF(pos0[0] - pos2[0]); - const float dy = FABSF(pos0[1] - pos2[1]); - - /* - * Draw wide line as a quad (two tris) by "stretching" the line along - * X or Y. - * We need to tweak coords in several ways to be conformant here. - */ - - if (dx > dy) { - /* x-major line */ - pos0[1] = pos0[1] - half_width - 0.25f; - pos1[1] = pos1[1] + half_width - 0.25f; - pos2[1] = pos2[1] - half_width - 0.25f; - pos3[1] = pos3[1] + half_width - 0.25f; - if (pos0[0] < pos2[0]) { - /* left to right line */ - pos0[0] -= 0.5f; - pos1[0] -= 0.5f; - pos2[0] -= 0.5f; - pos3[0] -= 0.5f; - } - else { - /* right to left line */ - pos0[0] += 0.5f; - pos1[0] += 0.5f; - pos2[0] += 0.5f; - pos3[0] += 0.5f; - } - } - else { - /* y-major line */ - pos0[0] = pos0[0] - half_width + 0.25f; - pos1[0] = pos1[0] + half_width + 0.25f; - pos2[0] = pos2[0] - half_width + 0.25f; - pos3[0] = pos3[0] + half_width + 0.25f; - if (pos0[1] < pos2[1]) { - /* top to bottom line */ - pos0[1] -= 0.5f; - pos1[1] -= 0.5f; - pos2[1] -= 0.5f; - pos3[1] -= 0.5f; - } - else { - /* bottom to top line */ - pos0[1] += 0.5f; - pos1[1] += 0.5f; - pos2[1] += 0.5f; - pos3[1] += 0.5f; - } - } - - tri.det = header->det; /* only the sign matters */ - tri.v[0] = v0; - tri.v[1] = v2; - tri.v[2] = v3; - stage->next->tri( stage->next, &tri ); - - tri.v[0] = v0; - tri.v[1] = v3; - tri.v[2] = v1; - stage->next->tri( stage->next, &tri ); -} - - -/** - * Set the vertex texcoords for sprite mode. - * Coords may be left untouched or set to a right-side-up or upside-down - * orientation. - */ -static void set_texcoords(const struct wide_stage *wide, - struct vertex_header *v, const float tc[4]) -{ - uint i; - for (i = 0; i < wide->num_texcoords; i++) { - if (wide->texcoord_mode[i] != PIPE_SPRITE_COORD_NONE) { - uint j = wide->texcoord_slot[i]; - v->data[j][0] = tc[0]; - if (wide->texcoord_mode[i] == PIPE_SPRITE_COORD_LOWER_LEFT) - v->data[j][1] = 1.0f - tc[1]; - else - v->data[j][1] = tc[1]; - v->data[j][2] = tc[2]; - v->data[j][3] = tc[3]; - } - } -} - - -/* If there are lots of sprite points (and why wouldn't there be?) it - * would probably be more sensible to change hardware setup to - * optimize this rather than doing the whole thing in software like - * this. - */ -static void wide_point( struct draw_stage *stage, - struct prim_header *header ) -{ - const struct wide_stage *wide = wide_stage(stage); - const boolean sprite = (boolean) stage->draw->rasterizer->point_sprite; - float half_size; - float left_adj, right_adj; - - struct prim_header tri; - - /* four dups of original vertex */ - struct vertex_header *v0 = dup_vert(stage, header->v[0], 0); - struct vertex_header *v1 = dup_vert(stage, header->v[0], 1); - struct vertex_header *v2 = dup_vert(stage, header->v[0], 2); - struct vertex_header *v3 = dup_vert(stage, header->v[0], 3); - - float *pos0 = v0->data[0]; - float *pos1 = v1->data[0]; - float *pos2 = v2->data[0]; - float *pos3 = v3->data[0]; - - /* point size is either per-vertex or fixed size */ - if (wide->psize_slot >= 0) { - half_size = 0.5f * header->v[0]->data[wide->psize_slot][0]; - } - else { - half_size = wide->half_point_size; - } - - left_adj = -half_size; /* + 0.25f;*/ - right_adj = half_size; /* + 0.25f;*/ - - pos0[0] += left_adj; - pos0[1] -= half_size; - - pos1[0] += left_adj; - pos1[1] += half_size; - - pos2[0] += right_adj; - pos2[1] -= half_size; - - pos3[0] += right_adj; - pos3[1] += half_size; - - if (sprite) { - static const float tex00[4] = { 0, 0, 0, 1 }; - static const float tex01[4] = { 0, 1, 0, 1 }; - static const float tex11[4] = { 1, 1, 0, 1 }; - static const float tex10[4] = { 1, 0, 0, 1 }; - set_texcoords( wide, v0, tex00 ); - set_texcoords( wide, v1, tex01 ); - set_texcoords( wide, v2, tex10 ); - set_texcoords( wide, v3, tex11 ); - } - - tri.det = header->det; /* only the sign matters */ - tri.v[0] = v0; - tri.v[1] = v2; - tri.v[2] = v3; - stage->next->tri( stage->next, &tri ); - - tri.v[0] = v0; - tri.v[1] = v3; - tri.v[2] = v1; - stage->next->tri( stage->next, &tri ); -} - - -static void wide_first_point( struct draw_stage *stage, - struct prim_header *header ) -{ - struct wide_stage *wide = wide_stage(stage); - struct draw_context *draw = stage->draw; - - wide->half_point_size = 0.5f * draw->rasterizer->point_size; - - /* XXX we won't know the real size if it's computed by the vertex shader! */ - if (draw->rasterizer->point_size > draw->wide_point_threshold) { - stage->point = wide_point; - } - else { - stage->point = passthrough_point; - } - - if (draw->rasterizer->point_sprite) { - /* find vertex shader texcoord outputs */ - const struct draw_vertex_shader *vs = draw->vertex_shader; - uint i, j = 0; - for (i = 0; i < vs->info.num_outputs; i++) { - if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { - wide->texcoord_slot[j] = i; - wide->texcoord_mode[j] = draw->rasterizer->sprite_coord_mode[j]; - j++; - } - } - wide->num_texcoords = j; - } - - wide->psize_slot = -1; - - if (draw->rasterizer->point_size_per_vertex) { - /* find PSIZ vertex output */ - const struct draw_vertex_shader *vs = draw->vertex_shader; - uint i; - for (i = 0; i < vs->info.num_outputs; i++) { - if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { - wide->psize_slot = i; - break; - } - } - } - - stage->point( stage, header ); -} - - - -static void wide_first_line( struct draw_stage *stage, - struct prim_header *header ) -{ - struct wide_stage *wide = wide_stage(stage); - struct draw_context *draw = stage->draw; - - wide->half_line_width = 0.5f * draw->rasterizer->line_width; - - if (draw->rasterizer->line_width != 1.0) { - wide->stage.line = wide_line; - } - else { - wide->stage.line = passthrough_line; - } - - stage->line( stage, header ); -} - - -static void wide_flush( struct draw_stage *stage, unsigned flags ) -{ - stage->line = wide_first_line; - stage->point = wide_first_point; - stage->next->flush( stage->next, flags ); -} - - -static void wide_reset_stipple_counter( struct draw_stage *stage ) -{ - stage->next->reset_stipple_counter( stage->next ); -} - - -static void wide_destroy( struct draw_stage *stage ) -{ - draw_free_temp_verts( stage ); - FREE( stage ); -} - - -struct draw_stage *draw_wide_stage( struct draw_context *draw ) -{ - struct wide_stage *wide = CALLOC_STRUCT(wide_stage); - - draw_alloc_temp_verts( &wide->stage, 4 ); - - wide->stage.draw = draw; - wide->stage.next = NULL; - wide->stage.point = wide_first_point; - wide->stage.line = wide_first_line; - wide->stage.tri = passthrough_tri; - wide->stage.flush = wide_flush; - wide->stage.reset_stipple_counter = wide_reset_stipple_counter; - wide->stage.destroy = wide_destroy; - - return &wide->stage; -} |