diff options
18 files changed, 1276 insertions, 2212 deletions
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 21aff1967a..8f05e5a6fd 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -32,7 +32,6 @@ C_SOURCES = \ lp_draw_arrays.c \ lp_flush.c \ lp_jit.c \ - lp_prim_setup.c \ lp_prim_vbuf.c \ lp_setup.c \ lp_query.c \ @@ -51,7 +50,6 @@ C_SOURCES = \ lp_tex_sample_c.c \ lp_tex_sample_llvm.c \ lp_texture.c \ - lp_tile_cache.c \ lp_tile_soa.c include ../../Makefile.template diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 13cd465838..344b246337 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -45,7 +45,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_draw_arrays.c', 'lp_flush.c', 'lp_jit.c', - 'lp_prim_setup.c', 'lp_prim_vbuf.c', 'lp_setup.c', 'lp_query.c', @@ -64,7 +63,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_tex_sample_c.c', 'lp_tex_sample_llvm.c', 'lp_texture.c', - 'lp_tile_cache.c', 'lp_tile_soa.c', ]) diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 202cb8ef43..57e71f3e98 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -31,13 +31,13 @@ */ #include "draw/draw_context.h" +#include "draw/draw_vbuf.h" #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "lp_clear.h" #include "lp_context.h" #include "lp_flush.h" -#include "lp_prim_setup.h" #include "lp_prim_vbuf.h" #include "lp_state.h" #include "lp_surface.h" @@ -264,21 +264,21 @@ llvmpipe_create( struct pipe_screen *screen ) (struct tgsi_sampler **) llvmpipe->tgsi.vert_samplers_list); - llvmpipe->setup = lp_draw_render_stage(llvmpipe); - if (!llvmpipe->setup) - goto fail; - if (debug_get_bool_option( "LP_NO_RAST", FALSE )) llvmpipe->no_rast = TRUE; - if (debug_get_bool_option( "LP_NO_VBUF", FALSE )) { - /* Deprecated path -- vbuf is the intended interface to the draw module: - */ - draw_set_rasterize_stage(llvmpipe->draw, llvmpipe->setup); - } - else { - lp_init_vbuf(llvmpipe); - } + llvmpipe->vbuf_backend = lp_create_vbuf_backend(llvmpipe); + if (!llvmpipe->vbuf_backend) + goto fail; + + llvmpipe->vbuf = draw_vbuf_stage(llvmpipe->draw, llvmpipe->vbuf_backend); + if (!llvmpipe->vbuf) + goto fail; + + draw_set_rasterize_stage(llvmpipe->draw, llvmpipe->vbuf); + draw_set_render(llvmpipe->draw, llvmpipe->vbuf_backend); + + /* plug in AA line/point stages */ draw_install_aaline_stage(llvmpipe->draw, &llvmpipe->pipe); diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 8d5a0d4f1f..0b77ae58d5 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -124,9 +124,10 @@ struct llvmpipe_context { /** The primitive drawing context */ struct draw_context *draw; - struct draw_stage *setup; + + /** Draw module backend */ + struct vbuf_render *vbuf_backend; struct draw_stage *vbuf; - struct llvmpipe_vbuf_render *vbuf_render; boolean dirty_render_cache; diff --git a/src/gallium/drivers/llvmpipe/lp_prim_setup.c b/src/gallium/drivers/llvmpipe/lp_prim_setup.c deleted file mode 100644 index b14f8fb99d..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_prim_setup.c +++ /dev/null @@ -1,190 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \brief A draw stage that drives our triangle setup routines from - * within the draw pipeline. One of two ways to drive setup, the - * other being in lp_prim_vbuf.c. - * - * \author Keith Whitwell <keith@tungstengraphics.com> - * \author Brian Paul - */ - - -#include "lp_context.h" -#include "lp_setup.h" -#include "lp_state.h" -#include "lp_prim_setup.h" -#include "draw/draw_pipe.h" -#include "draw/draw_vertex.h" -#include "util/u_memory.h" - -/** - * Triangle setup info (derived from draw_stage). - * Also used for line drawing (taking some liberties). - */ -struct setup_stage { - struct draw_stage stage; /**< This must be first (base class) */ - - struct setup_context *setup; -}; - - - -/** - * Basically a cast wrapper. - */ -static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) -{ - return (struct setup_stage *)stage; -} - - -typedef const float (*cptrf4)[4]; - -static void -do_tri(struct draw_stage *stage, struct prim_header *prim) -{ - struct setup_stage *setup = setup_stage( stage ); - - llvmpipe_setup_tri( setup->setup, - (cptrf4)prim->v[0]->data, - (cptrf4)prim->v[1]->data, - (cptrf4)prim->v[2]->data ); -} - -static void -do_line(struct draw_stage *stage, struct prim_header *prim) -{ - struct setup_stage *setup = setup_stage( stage ); - - llvmpipe_setup_line( setup->setup, - (cptrf4)prim->v[0]->data, - (cptrf4)prim->v[1]->data ); -} - -static void -do_point(struct draw_stage *stage, struct prim_header *prim) -{ - struct setup_stage *setup = setup_stage( stage ); - - llvmpipe_setup_point( setup->setup, - (cptrf4)prim->v[0]->data ); -} - - - - -static void setup_begin( struct draw_stage *stage ) -{ - struct setup_stage *setup = setup_stage(stage); - - llvmpipe_setup_prepare( setup->setup ); - - stage->point = do_point; - stage->line = do_line; - stage->tri = do_tri; -} - - -static void setup_first_point( struct draw_stage *stage, - struct prim_header *header ) -{ - setup_begin(stage); - stage->point( stage, header ); -} - -static void setup_first_line( struct draw_stage *stage, - struct prim_header *header ) -{ - setup_begin(stage); - stage->line( stage, header ); -} - - -static void setup_first_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - setup_begin(stage); - stage->tri( stage, header ); -} - - - -static void setup_flush( struct draw_stage *stage, - unsigned flags ) -{ - stage->point = setup_first_point; - stage->line = setup_first_line; - stage->tri = setup_first_tri; -} - - -static void reset_stipple_counter( struct draw_stage *stage ) -{ -} - - -static void render_destroy( struct draw_stage *stage ) -{ - struct setup_stage *ssetup = setup_stage(stage); - llvmpipe_setup_destroy_context(ssetup->setup); - FREE( stage ); -} - - -/** - * Create a new primitive setup/render stage. - */ -struct draw_stage *lp_draw_render_stage( struct llvmpipe_context *llvmpipe ) -{ - struct setup_stage *sstage = CALLOC_STRUCT(setup_stage); - - sstage->setup = llvmpipe_setup_create_context(llvmpipe); - sstage->stage.draw = llvmpipe->draw; - sstage->stage.point = setup_first_point; - sstage->stage.line = setup_first_line; - sstage->stage.tri = setup_first_tri; - sstage->stage.flush = setup_flush; - sstage->stage.reset_stipple_counter = reset_stipple_counter; - sstage->stage.destroy = render_destroy; - - return (struct draw_stage *)sstage; -} - -struct setup_context * -lp_draw_setup_context( struct draw_stage *stage ) -{ - struct setup_stage *ssetup = setup_stage(stage); - return ssetup->setup; -} - -void -lp_draw_flush( struct draw_stage *stage ) -{ - stage->flush( stage, 0 ); -} diff --git a/src/gallium/drivers/llvmpipe/lp_prim_setup.h b/src/gallium/drivers/llvmpipe/lp_prim_setup.h deleted file mode 100644 index da6cae6375..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_prim_setup.h +++ /dev/null @@ -1,85 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef LP_PRIM_SETUP_H -#define LP_PRIM_SETUP_H - - -/** - * vbuf is a special stage to gather the stream of triangles, lines, points - * together and reconstruct vertex buffers for hardware upload. - * - * First attempt, work in progress. - * - * TODO: - * - separate out vertex buffer building and primitive emit, ie >1 draw per vb. - * - tell vbuf stage how to build hw vertices directly - * - pass vbuf stage a buffer pointer for direct emit to agp/vram. - * - * - * - * Vertices are just an array of floats, with all the attributes - * packed. We currently assume a layout like: - * - * attr[0][0..3] - window position - * attr[1..n][0..3] - remaining attributes. - * - * Attributes are assumed to be 4 floats wide but are packed so that - * all the enabled attributes run contiguously. - */ - - -struct draw_stage; -struct llvmpipe_context; - - -typedef void (*vbuf_draw_func)( struct pipe_context *pipe, - unsigned prim, - const ushort *elements, - unsigned nr_elements, - const void *vertex_buffer, - unsigned nr_vertices ); - - -extern struct draw_stage * -lp_draw_render_stage( struct llvmpipe_context *llvmpipe ); - -extern struct setup_context * -lp_draw_setup_context( struct draw_stage * ); - -extern void -lp_draw_flush( struct draw_stage * ); - - -extern struct draw_stage * -lp_draw_vbuf_stage( struct draw_context *draw_context, - struct pipe_context *pipe, - vbuf_draw_func draw ); - - -#endif /* LP_PRIM_SETUP_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c index c394dcb61d..e244ac9087 100644 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c @@ -39,7 +39,6 @@ #include "lp_context.h" #include "lp_state.h" #include "lp_prim_vbuf.h" -#include "lp_prim_setup.h" #include "lp_setup.h" #include "draw/draw_context.h" #include "draw/draw_vbuf.h" @@ -59,6 +58,8 @@ struct llvmpipe_vbuf_render { struct vbuf_render base; struct llvmpipe_context *llvmpipe; + struct setup_context *setup; + uint prim; uint vertex_size; uint nr_vertices; @@ -75,6 +76,11 @@ llvmpipe_vbuf_render(struct vbuf_render *vbr) } + + + + + static const struct vertex_info * lp_vbuf_get_vertex_info(struct vbuf_render *vbr) { @@ -105,36 +111,6 @@ lp_vbuf_allocate_vertices(struct vbuf_render *vbr, static void lp_vbuf_release_vertices(struct vbuf_render *vbr) { -#if 0 - { - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - const struct vertex_info *info = - llvmpipe_get_vbuf_vertex_info(cvbr->llvmpipe); - const float *vtx = (const float *) cvbr->vertex_buffer; - uint i, j; - debug_printf("%s (vtx_size = %u, vtx_used = %u)\n", - __FUNCTION__, cvbr->vertex_size, cvbr->nr_vertices); - for (i = 0; i < cvbr->nr_vertices; i++) { - for (j = 0; j < info->num_attribs; j++) { - uint k; - switch (info->attrib[j].emit) { - case EMIT_4F: k = 4; break; - case EMIT_3F: k = 3; break; - case EMIT_2F: k = 2; break; - case EMIT_1F: k = 1; break; - default: assert(0); - } - debug_printf("Vert %u attr %u: ", i, j); - while (k-- > 0) { - debug_printf("%g ", vtx[0]); - vtx++; - } - debug_printf("\n"); - } - } - } -#endif - /* keep the old allocation for next time */ } @@ -160,12 +136,8 @@ static boolean lp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); + struct setup_context *setup_ctx = cvbr->setup; - /* XXX: break this dependency - make setup_context live under - * llvmpipe, rename the old "setup" draw stage to something else. - */ - struct setup_context *setup_ctx = lp_draw_setup_context(cvbr->llvmpipe->setup); - llvmpipe_setup_prepare( setup_ctx ); cvbr->llvmpipe->reduced_prim = u_reduced_prim(prim); @@ -193,14 +165,9 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) struct llvmpipe_context *llvmpipe = cvbr->llvmpipe; const unsigned stride = llvmpipe->vertex_info_vbuf.size * sizeof(float); const void *vertex_buffer = cvbr->vertex_buffer; + struct setup_context *setup_ctx = cvbr->setup; unsigned i; - /* XXX: break this dependency - make setup_context live under - * llvmpipe, rename the old "setup" draw stage to something else. - */ - struct draw_stage *setup = llvmpipe->setup; - struct setup_context *setup_ctx = lp_draw_setup_context(setup); - switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { @@ -367,11 +334,6 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) default: assert(0); } - - /* XXX: why are we calling this??? If we had to call something, it - * would be a function in lp_setup.c: - */ - lp_draw_flush( setup ); } @@ -384,17 +346,12 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); struct llvmpipe_context *llvmpipe = cvbr->llvmpipe; + struct setup_context *setup_ctx = cvbr->setup; const unsigned stride = llvmpipe->vertex_info_vbuf.size * sizeof(float); const void *vertex_buffer = (void *) get_vert(cvbr->vertex_buffer, start, stride); unsigned i; - /* XXX: break this dependency - make setup_context live under - * llvmpipe, rename the old "setup" draw stage to something else. - */ - struct draw_stage *setup = llvmpipe->setup; - struct setup_context *setup_ctx = lp_draw_setup_context(setup); - switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { @@ -568,40 +525,38 @@ static void lp_vbuf_destroy(struct vbuf_render *vbr) { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - cvbr->llvmpipe->vbuf_render = NULL; + llvmpipe_setup_destroy_context(cvbr->setup); FREE(cvbr); } /** - * Initialize the post-transform vertex buffer information for the given - * context. + * Create the post-transform vertex handler for the given context. */ -void -lp_init_vbuf(struct llvmpipe_context *lp) +struct vbuf_render * +lp_create_vbuf_backend(struct llvmpipe_context *lp) { - assert(lp->draw); + struct llvmpipe_vbuf_render *cvbr = CALLOC_STRUCT(llvmpipe_vbuf_render); - lp->vbuf_render = CALLOC_STRUCT(llvmpipe_vbuf_render); + assert(lp->draw); - lp->vbuf_render->base.max_indices = LP_MAX_VBUF_INDEXES; - lp->vbuf_render->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE; - lp->vbuf_render->base.get_vertex_info = lp_vbuf_get_vertex_info; - lp->vbuf_render->base.allocate_vertices = lp_vbuf_allocate_vertices; - lp->vbuf_render->base.map_vertices = lp_vbuf_map_vertices; - lp->vbuf_render->base.unmap_vertices = lp_vbuf_unmap_vertices; - lp->vbuf_render->base.set_primitive = lp_vbuf_set_primitive; - lp->vbuf_render->base.draw = lp_vbuf_draw; - lp->vbuf_render->base.draw_arrays = lp_vbuf_draw_arrays; - lp->vbuf_render->base.release_vertices = lp_vbuf_release_vertices; - lp->vbuf_render->base.destroy = lp_vbuf_destroy; + cvbr->base.max_indices = LP_MAX_VBUF_INDEXES; + cvbr->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE; - lp->vbuf_render->llvmpipe = lp; + cvbr->base.get_vertex_info = lp_vbuf_get_vertex_info; + cvbr->base.allocate_vertices = lp_vbuf_allocate_vertices; + cvbr->base.map_vertices = lp_vbuf_map_vertices; + cvbr->base.unmap_vertices = lp_vbuf_unmap_vertices; + cvbr->base.set_primitive = lp_vbuf_set_primitive; + cvbr->base.draw = lp_vbuf_draw; + cvbr->base.draw_arrays = lp_vbuf_draw_arrays; + cvbr->base.release_vertices = lp_vbuf_release_vertices; + cvbr->base.destroy = lp_vbuf_destroy; - lp->vbuf = draw_vbuf_stage(lp->draw, &lp->vbuf_render->base); + cvbr->llvmpipe = lp; - draw_set_rasterize_stage(lp->draw, lp->vbuf); + cvbr->setup = llvmpipe_setup_create_context(cvbr->llvmpipe); - draw_set_render(lp->draw, &lp->vbuf_render->base); + return &cvbr->base; } diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h index 6c4e6063e6..0676e2f42a 100644 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h +++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h @@ -31,8 +31,8 @@ struct llvmpipe_context; -extern void -lp_init_vbuf(struct llvmpipe_context *llvmpipe); +extern struct vbuf_render * +lp_create_vbuf_backend(struct llvmpipe_context *llvmpipe); #endif /* LP_VBUF_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_rasterizer.c new file mode 100644 index 0000000000..089ea59729 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_rasterizer.c @@ -0,0 +1,157 @@ + +struct lp_rasterizer { + + /* We can choose whatever layout for the internal tile storage we + * prefer: + */ + struct { + unsigned color[TILESIZE][TILESIZE]; + unsigned depth[TILESIZE][TILESIZE]; + char stencil[TILESIZE][TILESIZE]; + } tile; + + + unsigned x; + unsigned y; + + + struct { + struct pipe_surface *color; + struct pipe_surface *zstencil; + unsigned clear_color; + unsigned clear_depth; + char clear_stencil; + } state; +}; + +struct lp_rasterizer *lp_rast_create( void ) +{ + return CALLOC_STRUCT(lp_rasterizer); +} + +void lp_rast_bind_surfaces( struct lp_rasterizer *, + struct pipe_surface *color, + struct pipe_surface *zstencil, + const float *clear_color, + double clear_depth, + unsigned clear_stencil) +{ + pipe_surface_reference(&rast->state.color, color); + pipe_surface_reference(&rast->state.depth, depth); + rast->state.clear_color = util_pack_8888(clear_color); + rast->state.clear_depth = clear_depth * 0xffffffff; + rast->state.clear_stencil = clear_stencil; +} + +/* Begining of each tile: + */ +void lp_rast_start_tile( struct lp_rasterizer *, + unsigned x, + unsigned y ) +{ + rast->x = x; + rast->y = y; +} + +void lp_rast_clear_color( struct lp_rasterizer *rast ) +{ + const unsigned clear_color = rast->state.clear_color; + unsigned i, j; + + for (i = 0; i < TILESIZE; i++) + for (j = 0; j < TILESIZE; j++) + rast->tile[i][j] = clear_color; +} + +void lp_rast_clear_depth( struct lp_rasterizer *rast ) +{ + const unsigned clear_depth = rast->state.clear_depth; + unsigned i, j; + + for (i = 0; i < TILESIZE; i++) + for (j = 0; j < TILESIZE; j++) + rast->tile[i][j] = clear_depth; +} + +void lp_rast_clear_stencil( struct lp_rasterizer *rast ) +{ + const unsigned clear_stencil = rast->state.clear_stencil; + + memset(rast->tile.stencil, clear_stencil, sizeof rast->tile.stencil ); +} + +void lp_rast_load_color( struct lp_rasterizer *rast ) +{ + /* call u_tile func to load colors from surface */ +} + +void lp_rast_load_zstencil( struct lp_rasterizer *rast ) +{ + /* call u_tile func to load depth (and stencil?) from surface */ +} + +/* Within a tile: + */ +void lp_rast_set_state( struct lp_rasterizer *rast, + const struct lp_rast_state *state ) +{ + rast->shader_state = state; +} + +void lp_rast_triangle( struct lp_rasterizer *rast, + const struct lp_rast_triangle *inputs ) +{ + /* Set up the silly quad coef pointers + */ + for (i = 0; i < 4; i++) { + rast->quads[i].posCoef = inputs->posCoef; + rast->quads[i].coef = inputs->coef; + } + + /* Scan the tile in 4x4 chunks (?) and figure out which bits to + * rasterize: + */ + +} + +void lp_rast_shade_tile( struct lp_rasterizer *rast, + const struct lp_rast_shader_inputs *inputs ) +{ + /* Set up the silly quad coef pointers + */ + for (i = 0; i < 4; i++) { + rast->quads[i].posCoef = inputs->posCoef; + rast->quads[i].coef = inputs->coef; + } + + /* Use the existing preference for 8x2 (four quads) shading: + */ + for (i = 0; i < TILESIZE; i += 8) { + for (j = 0; j < TILESIZE; j += 2) { + rast->shader_state.shade( inputs->jc, + rast->x + i, + rast->y + j, + rast->quads, 4 ); + } + } +} + +/* End of tile: + */ +void lp_rast_store_color( struct lp_rasterizer *rast ) +{ + /* call u_tile func to store colors to surface */ +} + +void lp_rast_store_zstencil( struct lp_rasterizer *rast ) +{ + /* call u_tile func to store depth/stencil to surface */ +} + +/* Shutdown: + */ +void lp_rast_destroy( struct lp_rasterizer *rast ) +{ + FREE(rast); +} + diff --git a/src/gallium/drivers/llvmpipe/lp_rasterizer.h b/src/gallium/drivers/llvmpipe/lp_rasterizer.h new file mode 100644 index 0000000000..b3ae06a116 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_rasterizer.h @@ -0,0 +1,112 @@ + +/* Initially create and program a single rasterizer directly. Later + * will want multiple of these, one or two per core. At that stage + * will probably pass command buffers into the rasterizers rather than + * individual function calls like this. + */ +struct lp_rasterizer; + +struct lp_rast_state { + /* State: + */ + struct lp_jit_context jc; + + /* Shader itself: + */ +}; + +/* Coefficients necessary to run the shader at a given location: + */ +struct lp_rast_shader_inputs { + + /* Current rasterizer state: + */ + const struct lp_rast_state *state; + + /* Attribute interpolation: + */ + float oneoverarea; + float x1; + float y1; + + struct tgsi_interp_coef position_coef; + struct tgsi_interp_coef *coef; +}; + + +/* Rasterization information for a triangle known to be in this bin, + * plus inputs to run the shader: + */ +struct lp_rast_triangle { + /* one-pixel sized trivial accept offsets for each plane */ + float ei1; + float ei2; + float ei3; + + /* one-pixel sized trivial reject offsets for each plane */ + float eo1; + float eo2; + float eo3; + + /* y deltas for vertex pairs */ + float dy12; + float dy23; + float dy31; + + /* x deltas for vertex pairs */ + float dx12; + float dx23; + float dx31; + + /* State to run the shader: */ + struct lp_rast_shader_inputs inputs; +}; + + + +struct lp_rasterizer *lp_rast_create( void ); + +void lp_rast_bind_surfaces( struct lp_rasterizer *, + struct pipe_surface *color, + struct pipe_surface *zstencil, + const float *clear_color, + double clear_depth, + unsigned clear_stencil); + +/* Begining of each tile: + */ +void lp_rast_start_tile( struct lp_rasterizer *, + unsigned x, + unsigned y ); + +void lp_rast_clear_color( struct lp_rasterizer * ); + +void lp_rast_clear_zstencil( struct lp_rasterizer * ); + +void lp_rast_load_color( struct lp_rasterizer * ); + +void lp_rast_load_zstencil( struct lp_rasterizer * ); + + +/* Within a tile: + */ +void lp_rast_set_state( struct lp_rasterizer *, + const struct lp_rast_state * ); + +void lp_rast_triangle( struct lp_rasterizer *, + const struct lp_rast_triangle * ); + +void lp_rast_shade_tile( struct lp_rasterizer *, + const struct lp_rast_shader_inputs * ); + +/* End of tile: + */ +void lp_rast_store_color( struct lp_rasterizer * ); + +void lp_rast_store_zstencil( struct lp_rasterizer * ); + + +/* Shutdown: + */ +void lp_rast_destroy( struct lp_rasterizer * ); + diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 60107214df..8c67524506 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -26,15 +26,15 @@ **************************************************************************/ /** - * \brief Primitive rasterization/rendering (points, lines, triangles) + * \brief Primitive rasterization/rendering (points, lines) * * \author Keith Whitwell <keith@tungstengraphics.com> * \author Brian Paul */ #include "lp_context.h" -#include "lp_prim_setup.h" #include "lp_quad.h" +#include "lp_quad_pipe.h" #include "lp_setup.h" #include "lp_state.h" #include "draw/draw_context.h" @@ -44,1397 +44,49 @@ #include "pipe/p_thread.h" #include "util/u_math.h" #include "util/u_memory.h" -#include "lp_bld_debug.h" -#include "lp_tile_cache.h" -#include "lp_tile_soa.h" #define DEBUG_VERTS 0 -#define DEBUG_FRAGS 0 -/** - * Triangle edge info - */ -struct edge { - float dx; /**< X(v1) - X(v0), used only during setup */ - float dy; /**< Y(v1) - Y(v0), used only during setup */ - float dxdy; /**< dx/dy */ - float sx, sy; /**< first sample point coord */ - int lines; /**< number of lines on this edge */ -}; - - -#define MAX_QUADS 16 - - -/** - * Triangle setup info (derived from draw_stage). - * Also used for line drawing (taking some liberties). - */ -struct setup_context { - struct llvmpipe_context *llvmpipe; - - /* Vertices are just an array of floats making up each attribute in - * turn. Currently fixed at 4 floats, but should change in time. - * Codegen will help cope with this. - */ - const float (*vmax)[4]; - const float (*vmid)[4]; - const float (*vmin)[4]; - const float (*vprovoke)[4]; - - struct edge ebot; - struct edge etop; - struct edge emaj; - - float oneoverarea; - int facing; - - struct quad_header quad[MAX_QUADS]; - struct quad_header *quad_ptrs[MAX_QUADS]; - unsigned count; - - struct quad_interp_coef coef; - - struct { - int left[2]; /**< [0] = row0, [1] = row1 */ - int right[2]; - int y; - } span; - -#if DEBUG_FRAGS - uint numFragsEmitted; /**< per primitive */ - uint numFragsWritten; /**< per primitive */ -#endif - - unsigned winding; /* which winding to cull */ -}; - - - -/** - * Execute fragment shader for the four fragments in the quad. - */ -static void -shade_quads(struct llvmpipe_context *llvmpipe, - struct quad_header *quads[], - unsigned nr) -{ - struct lp_fragment_shader *fs = llvmpipe->fs; - struct quad_header *quad = quads[0]; - const unsigned x = quad->input.x0; - const unsigned y = quad->input.y0; - uint8_t *tile; - uint8_t *color; - void *depth; - uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; - unsigned chan_index; - unsigned q; - - assert(fs->current); - if(!fs->current) - return; - - /* Sanity checks */ - assert(nr * QUAD_SIZE == TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH); - assert(x % TILE_VECTOR_WIDTH == 0); - assert(y % TILE_VECTOR_HEIGHT == 0); - for (q = 0; q < nr; ++q) { - assert(quads[q]->input.x0 == x + q*2); - assert(quads[q]->input.y0 == y); - } - - /* mask */ - for (q = 0; q < 4; ++q) - for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index) - mask[q][chan_index] = quads[q]->inout.mask & (1 << chan_index) ? ~0 : 0; - - /* color buffer */ - if(llvmpipe->framebuffer.nr_cbufs >= 1 && - llvmpipe->framebuffer.cbufs[0]) { - tile = lp_get_cached_tile(llvmpipe->cbuf_cache[0], x, y); - color = &TILE_PIXEL(tile, x & (TILE_SIZE-1), y & (TILE_SIZE-1), 0); - } - else - color = NULL; - - /* depth buffer */ - if(llvmpipe->zsbuf_map) { - assert((x % 2) == 0); - assert((y % 2) == 0); - depth = llvmpipe->zsbuf_map + - y*llvmpipe->zsbuf_transfer->stride + - 2*x*llvmpipe->zsbuf_transfer->block.size; - } - else - depth = NULL; - - /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ - assert(lp_check_alignment(mask, 16)); - - assert(lp_check_alignment(depth, 16)); - assert(lp_check_alignment(color, 16)); - assert(lp_check_alignment(llvmpipe->jit_context.blend_color, 16)); - - /* run shader */ - fs->current->jit_function( &llvmpipe->jit_context, - x, y, - quad->coef->a0, - quad->coef->dadx, - quad->coef->dady, - &mask[0][0], - color, - depth); -} - - - - -/** - * Do triangle cull test using tri determinant (sign indicates orientation) - * \return true if triangle is to be culled. - */ -static INLINE boolean -cull_tri(const struct setup_context *setup, float det) -{ - if (det != 0) { - /* if (det < 0 then Z points toward camera and triangle is - * counter-clockwise winding. - */ - unsigned winding = (det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW; - - if ((winding & setup->winding) == 0) - return FALSE; - } - - /* Culled: - */ - return TRUE; -} - - - -/** - * Clip setup->quad against the scissor/surface bounds. - */ -static INLINE void -quad_clip( struct setup_context *setup, struct quad_header *quad ) -{ - const struct pipe_scissor_state *cliprect = &setup->llvmpipe->cliprect; - const int minx = (int) cliprect->minx; - const int maxx = (int) cliprect->maxx; - const int miny = (int) cliprect->miny; - const int maxy = (int) cliprect->maxy; - - if (quad->input.x0 >= maxx || - quad->input.y0 >= maxy || - quad->input.x0 + 1 < minx || - quad->input.y0 + 1 < miny) { - /* totally clipped */ - quad->inout.mask = 0x0; - return; - } - if (quad->input.x0 < minx) - quad->inout.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); - if (quad->input.y0 < miny) - quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); - if (quad->input.x0 == maxx - 1) - quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); - if (quad->input.y0 == maxy - 1) - quad->inout.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); -} - - - -/** - * Given an X or Y coordinate, return the block/quad coordinate that it - * belongs to. - */ -static INLINE int block( int x ) -{ - return x & ~(2-1); -} - -static INLINE int block_x( int x ) -{ - return x & ~(TILE_VECTOR_WIDTH - 1); -} - - -/** - * Emit a quad (pass to next stage) with clipping. - */ -static INLINE void -clip_emit_quad( struct setup_context *setup, struct quad_header *quad ) -{ - quad_clip( setup, quad ); - - if (quad->inout.mask) { - struct llvmpipe_context *lp = setup->llvmpipe; - -#if 1 - /* XXX: The blender expects 4 quads. This is far from efficient, but - * until we codegenerate single-quad variants of the fragment pipeline - * we need this hack. */ - const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE; - struct quad_header quads[nr_quads]; - struct quad_header *quad_ptrs[nr_quads]; - int x0 = block_x(quad->input.x0); - unsigned i; - - for(i = 0; i < nr_quads; ++i) { - int x = x0 + 2*i; - if(x == quad->input.x0) - memcpy(&quads[i], quad, sizeof quads[i]); - else { - memset(&quads[i], 0, sizeof quads[i]); - quads[i].input.x0 = x; - quads[i].input.y0 = quad->input.y0; - quads[i].coef = quad->coef; - } - quad_ptrs[i] = &quads[i]; - } - - shade_quads( lp, quad_ptrs, nr_quads ); -#else - shade_quads( lp, &quad, 1 ); -#endif - } -} - - -/** - * Render a horizontal span of quads - */ -static void flush_spans( struct setup_context *setup ) -{ - const int step = TILE_VECTOR_WIDTH; - const int xleft0 = setup->span.left[0]; - const int xleft1 = setup->span.left[1]; - const int xright0 = setup->span.right[0]; - const int xright1 = setup->span.right[1]; - - - int minleft = block_x(MIN2(xleft0, xleft1)); - int maxright = MAX2(xright0, xright1); - int x; - - for (x = minleft; x < maxright; x += step) { - unsigned skip_left0 = CLAMP(xleft0 - x, 0, step); - unsigned skip_left1 = CLAMP(xleft1 - x, 0, step); - unsigned skip_right0 = CLAMP(x + step - xright0, 0, step); - unsigned skip_right1 = CLAMP(x + step - xright1, 0, step); - unsigned lx = x; - const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE; - unsigned q = 0; - - unsigned skipmask_left0 = (1U << skip_left0) - 1U; - unsigned skipmask_left1 = (1U << skip_left1) - 1U; - - /* These calculations fail when step == 32 and skip_right == 0. - */ - unsigned skipmask_right0 = ~0U << (unsigned)(step - skip_right0); - unsigned skipmask_right1 = ~0U << (unsigned)(step - skip_right1); - - unsigned mask0 = ~skipmask_left0 & ~skipmask_right0; - unsigned mask1 = ~skipmask_left1 & ~skipmask_right1; - - if (mask0 | mask1) { - for(q = 0; q < nr_quads; ++q) { - unsigned quadmask = (mask0 & 3) | ((mask1 & 3) << 2); - setup->quad[q].input.x0 = lx; - setup->quad[q].input.y0 = setup->span.y; - setup->quad[q].inout.mask = quadmask; - setup->quad_ptrs[q] = &setup->quad[q]; - mask0 >>= 2; - mask1 >>= 2; - lx += 2; - } - assert(!(mask0 | mask1)); - - shade_quads(setup->llvmpipe, setup->quad_ptrs, nr_quads ); - } - } - - - setup->span.y = 0; - setup->span.right[0] = 0; - setup->span.right[1] = 0; - setup->span.left[0] = 1000000; /* greater than right[0] */ - setup->span.left[1] = 1000000; /* greater than right[1] */ -} - - -#if DEBUG_VERTS -static void print_vertex(const struct setup_context *setup, - const float (*v)[4]) -{ - int i; - debug_printf(" Vertex: (%p)\n", v); - for (i = 0; i < setup->quad[0].nr_attrs; i++) { - debug_printf(" %d: %f %f %f %f\n", i, - v[i][0], v[i][1], v[i][2], v[i][3]); - if (util_is_inf_or_nan(v[i][0])) { - debug_printf(" NaN!\n"); - } - } -} -#endif - -/** - * Sort the vertices from top to bottom order, setting up the triangle - * edge fields (ebot, emaj, etop). - * \return FALSE if coords are inf/nan (cull the tri), TRUE otherwise - */ -static boolean setup_sort_vertices( struct setup_context *setup, - float det, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ) -{ - setup->vprovoke = v2; - - /* determine bottom to top order of vertices */ - { - float y0 = v0[0][1]; - float y1 = v1[0][1]; - float y2 = v2[0][1]; - if (y0 <= y1) { - if (y1 <= y2) { - /* y0<=y1<=y2 */ - setup->vmin = v0; - setup->vmid = v1; - setup->vmax = v2; - } - else if (y2 <= y0) { - /* y2<=y0<=y1 */ - setup->vmin = v2; - setup->vmid = v0; - setup->vmax = v1; - } - else { - /* y0<=y2<=y1 */ - setup->vmin = v0; - setup->vmid = v2; - setup->vmax = v1; - } - } - else { - if (y0 <= y2) { - /* y1<=y0<=y2 */ - setup->vmin = v1; - setup->vmid = v0; - setup->vmax = v2; - } - else if (y2 <= y1) { - /* y2<=y1<=y0 */ - setup->vmin = v2; - setup->vmid = v1; - setup->vmax = v0; - } - else { - /* y1<=y2<=y0 */ - setup->vmin = v1; - setup->vmid = v2; - setup->vmax = v0; - } - } - } - - setup->ebot.dx = setup->vmid[0][0] - setup->vmin[0][0]; - setup->ebot.dy = setup->vmid[0][1] - setup->vmin[0][1]; - setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; - setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; - setup->etop.dx = setup->vmax[0][0] - setup->vmid[0][0]; - setup->etop.dy = setup->vmax[0][1] - setup->vmid[0][1]; - - /* - * Compute triangle's area. Use 1/area to compute partial - * derivatives of attributes later. - * - * The area will be the same as prim->det, but the sign may be - * different depending on how the vertices get sorted above. - * - * To determine whether the primitive is front or back facing we - * use the prim->det value because its sign is correct. - */ - { - const float area = (setup->emaj.dx * setup->ebot.dy - - setup->ebot.dx * setup->emaj.dy); - - setup->oneoverarea = 1.0f / area; - - /* - debug_printf("%s one-over-area %f area %f det %f\n", - __FUNCTION__, setup->oneoverarea, area, det ); - */ - if (util_is_inf_or_nan(setup->oneoverarea)) - return FALSE; - } - - /* We need to know if this is a front or back-facing triangle for: - * - the GLSL gl_FrontFacing fragment attribute (bool) - * - two-sided stencil test - */ - setup->facing = - ((det > 0.0) ^ - (setup->llvmpipe->rasterizer->front_winding == PIPE_WINDING_CW)); - - return TRUE; -} - - -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a triangle. - */ -static void tri_pos_coeff( struct setup_context *setup, - uint vertSlot, unsigned i) -{ - float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i]; - float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; - float a = setup->ebot.dy * majda - botda * setup->emaj.dy; - float b = setup->emaj.dx * botda - majda * setup->ebot.dx; - float dadx = a * setup->oneoverarea; - float dady = b * setup->oneoverarea; - - assert(i <= 3); - - setup->coef.dadx[0][i] = dadx; - setup->coef.dady[0][i] = dady; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); - - /* - debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", - slot, "xyzw"[i], - setup->coef[slot].a0[i], - setup->coef[slot].dadx[i], - setup->coef[slot].dady[i]); - */ -} - - -/** - * Compute a0 for a constant-valued coefficient (GL_FLAT shading). - * The value value comes from vertex[slot][i]. - * The result will be put into setup->coef[slot].a0[i]. - * \param slot which attribute slot - * \param i which component of the slot (0..3) - */ -static void const_pos_coeff( struct setup_context *setup, - uint vertSlot, unsigned i) -{ - setup->coef.dadx[0][i] = 0; - setup->coef.dady[0][i] = 0; - - /* need provoking vertex info! - */ - setup->coef.a0[0][i] = setup->vprovoke[vertSlot][i]; -} - - -/** - * Compute a0 for a constant-valued coefficient (GL_FLAT shading). - * The value value comes from vertex[slot][i]. - * The result will be put into setup->coef[slot].a0[i]. - * \param slot which attribute slot - * \param i which component of the slot (0..3) - */ -static void const_coeff( struct setup_context *setup, - unsigned attrib, - uint vertSlot) -{ - unsigned i; - for (i = 0; i < NUM_CHANNELS; ++i) { - setup->coef.dadx[1 + attrib][i] = 0; - setup->coef.dady[1 + attrib][i] = 0; - - /* need provoking vertex info! - */ - setup->coef.a0[1 + attrib][i] = setup->vprovoke[vertSlot][i]; - } -} - - -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a triangle. - */ -static void tri_linear_coeff( struct setup_context *setup, - unsigned attrib, - uint vertSlot) -{ - unsigned i; - for (i = 0; i < NUM_CHANNELS; ++i) { - float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i]; - float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; - float a = setup->ebot.dy * majda - botda * setup->emaj.dy; - float b = setup->emaj.dx * botda - majda * setup->ebot.dx; - float dadx = a * setup->oneoverarea; - float dady = b * setup->oneoverarea; - - assert(i <= 3); - - setup->coef.dadx[1 + attrib][i] = dadx; - setup->coef.dady[1 + attrib][i] = dady; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); - - /* - debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", - slot, "xyzw"[i], - setup->coef[slot].a0[i], - setup->coef[slot].dadx[i], - setup->coef[slot].dady[i]); - */ - } -} - - -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a triangle. - * We basically multiply the vertex value by 1/w before computing - * the plane coefficients (a0, dadx, dady). - * Later, when we compute the value at a particular fragment position we'll - * divide the interpolated value by the interpolated W at that fragment. - */ -static void tri_persp_coeff( struct setup_context *setup, - unsigned attrib, - uint vertSlot) -{ - unsigned i; - for (i = 0; i < NUM_CHANNELS; ++i) { - /* premultiply by 1/w (v[0][3] is always W): - */ - float mina = setup->vmin[vertSlot][i] * setup->vmin[0][3]; - float mida = setup->vmid[vertSlot][i] * setup->vmid[0][3]; - float maxa = setup->vmax[vertSlot][i] * setup->vmax[0][3]; - float botda = mida - mina; - float majda = maxa - mina; - float a = setup->ebot.dy * majda - botda * setup->emaj.dy; - float b = setup->emaj.dx * botda - majda * setup->ebot.dx; - float dadx = a * setup->oneoverarea; - float dady = b * setup->oneoverarea; - - /* - debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i, - setup->vmin[vertSlot][i], - setup->vmid[vertSlot][i], - setup->vmax[vertSlot][i] - ); - */ - assert(i <= 3); - - setup->coef.dadx[1 + attrib][i] = dadx; - setup->coef.dady[1 + attrib][i] = dady; - setup->coef.a0[1 + attrib][i] = (mina - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); - } -} - - -/** - * Special coefficient setup for gl_FragCoord. - * X and Y are trivial, though Y has to be inverted for OpenGL. - * Z and W are copied from posCoef which should have already been computed. - * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. - */ -static void -setup_fragcoord_coeff(struct setup_context *setup, uint slot) -{ - /*X*/ - setup->coef.a0[1 + slot][0] = 0; - setup->coef.dadx[1 + slot][0] = 1.0; - setup->coef.dady[1 + slot][0] = 0.0; - /*Y*/ - setup->coef.a0[1 + slot][1] = 0.0; - setup->coef.dadx[1 + slot][1] = 0.0; - setup->coef.dady[1 + slot][1] = 1.0; - /*Z*/ - setup->coef.a0[1 + slot][2] = setup->coef.a0[0][2]; - setup->coef.dadx[1 + slot][2] = setup->coef.dadx[0][2]; - setup->coef.dady[1 + slot][2] = setup->coef.dady[0][2]; - /*W*/ - setup->coef.a0[1 + slot][3] = setup->coef.a0[0][3]; - setup->coef.dadx[1 + slot][3] = setup->coef.dadx[0][3]; - setup->coef.dady[1 + slot][3] = setup->coef.dady[0][3]; -} - - - -/** - * Compute the setup->coef[] array dadx, dady, a0 values. - * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized. - */ -static void setup_tri_coefficients( struct setup_context *setup ) -{ - struct llvmpipe_context *llvmpipe = setup->llvmpipe; - const struct lp_fragment_shader *lpfs = llvmpipe->fs; - const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); - uint fragSlot; - - /* z and w are done by linear interpolation: - */ - tri_pos_coeff(setup, 0, 2); - tri_pos_coeff(setup, 0, 3); - - /* setup interpolation for all the remaining attributes: - */ - for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->attrib[fragSlot].src_index; - - switch (vinfo->attrib[fragSlot].interp_mode) { - case INTERP_CONSTANT: - const_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_LINEAR: - tri_linear_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_PERSPECTIVE: - tri_persp_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_POS: - setup_fragcoord_coeff(setup, fragSlot); - break; - default: - assert(0); - } - - if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { - setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing; - setup->coef.dadx[1 + fragSlot][0] = 0.0; - setup->coef.dady[1 + fragSlot][0] = 0.0; - } - } -} - - - -static void setup_tri_edges( struct setup_context *setup ) -{ - float vmin_x = setup->vmin[0][0] + 0.5f; - float vmid_x = setup->vmid[0][0] + 0.5f; - - float vmin_y = setup->vmin[0][1] - 0.5f; - float vmid_y = setup->vmid[0][1] - 0.5f; - float vmax_y = setup->vmax[0][1] - 0.5f; - - setup->emaj.sy = ceilf(vmin_y); - setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy); - setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy; - setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy; - - setup->etop.sy = ceilf(vmid_y); - setup->etop.lines = (int) ceilf(vmax_y - setup->etop.sy); - setup->etop.dxdy = setup->etop.dx / setup->etop.dy; - setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy; - - setup->ebot.sy = ceilf(vmin_y); - setup->ebot.lines = (int) ceilf(vmid_y - setup->ebot.sy); - setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy; - setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy; -} - - -/** - * Render the upper or lower half of a triangle. - * Scissoring/cliprect is applied here too. - */ -static void subtriangle( struct setup_context *setup, - struct edge *eleft, - struct edge *eright, - unsigned lines ) -{ - const struct pipe_scissor_state *cliprect = &setup->llvmpipe->cliprect; - const int minx = (int) cliprect->minx; - const int maxx = (int) cliprect->maxx; - const int miny = (int) cliprect->miny; - const int maxy = (int) cliprect->maxy; - int y, start_y, finish_y; - int sy = (int)eleft->sy; - - assert((int)eleft->sy == (int) eright->sy); - - /* clip top/bottom */ - start_y = sy; - if (start_y < miny) - start_y = miny; - - finish_y = sy + lines; - if (finish_y > maxy) - finish_y = maxy; - - start_y -= sy; - finish_y -= sy; - - /* - debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); - */ - - for (y = start_y; y < finish_y; y++) { - - /* avoid accumulating adds as floats don't have the precision to - * accurately iterate large triangle edges that way. luckily we - * can just multiply these days. - * - * this is all drowned out by the attribute interpolation anyway. - */ - int left = (int)(eleft->sx + y * eleft->dxdy); - int right = (int)(eright->sx + y * eright->dxdy); - - /* clip left/right */ - if (left < minx) - left = minx; - if (right > maxx) - right = maxx; - - if (left < right) { - int _y = sy + y; - if (block(_y) != setup->span.y) { - flush_spans(setup); - setup->span.y = block(_y); - } - - setup->span.left[_y&1] = left; - setup->span.right[_y&1] = right; - } - } - - - /* save the values so that emaj can be restarted: - */ - eleft->sx += lines * eleft->dxdy; - eright->sx += lines * eright->dxdy; - eleft->sy += lines; - eright->sy += lines; -} - - -/** - * Recalculate prim's determinant. This is needed as we don't have - * get this information through the vbuf_render interface & we must - * calculate it here. - */ -static float -calc_det( const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ) -{ - /* edge vectors e = v0 - v2, f = v1 - v2 */ - const float ex = v0[0][0] - v2[0][0]; - const float ey = v0[0][1] - v2[0][1]; - const float fx = v1[0][0] - v2[0][0]; - const float fy = v1[0][1] - v2[0][1]; - - /* det = cross(e,f).z */ - return ex * fy - ey * fx; -} - - -/** - * Do setup for triangle rasterization, then render the triangle. - */ -void llvmpipe_setup_tri( struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ) -{ - float det; - -#if DEBUG_VERTS - debug_printf("Setup triangle:\n"); - print_vertex(setup, v0); - print_vertex(setup, v1); - print_vertex(setup, v2); -#endif - - if (setup->llvmpipe->no_rast) - return; - - det = calc_det(v0, v1, v2); - /* - debug_printf("%s\n", __FUNCTION__ ); - */ - -#if DEBUG_FRAGS - setup->numFragsEmitted = 0; - setup->numFragsWritten = 0; -#endif - - if (cull_tri( setup, det )) - return; - - if (!setup_sort_vertices( setup, det, v0, v1, v2 )) - return; - setup_tri_coefficients( setup ); - setup_tri_edges( setup ); - - assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_TRIANGLES); - - setup->span.y = 0; - setup->span.right[0] = 0; - setup->span.right[1] = 0; - /* setup->span.z_mode = tri_z_mode( setup->ctx ); */ - - /* init_constant_attribs( setup ); */ - - if (setup->oneoverarea < 0.0) { - /* emaj on left: - */ - subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines ); - subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines ); - } - else { - /* emaj on right: - */ - subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines ); - subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines ); - } - - flush_spans( setup ); - -#if DEBUG_FRAGS - printf("Tri: %u frags emitted, %u written\n", - setup->numFragsEmitted, - setup->numFragsWritten); -#endif -} - - - -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a line. - */ -static void -linear_pos_coeff(struct setup_context *setup, - uint vertSlot, uint i) -{ - const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; - const float dadx = da * setup->emaj.dx * setup->oneoverarea; - const float dady = da * setup->emaj.dy * setup->oneoverarea; - setup->coef.dadx[0][i] = dadx; - setup->coef.dady[0][i] = dady; - setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); -} - - -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a line. - */ -static void -line_linear_coeff(struct setup_context *setup, - unsigned attrib, - uint vertSlot) -{ - unsigned i; - for (i = 0; i < NUM_CHANNELS; ++i) { - const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; - const float dadx = da * setup->emaj.dx * setup->oneoverarea; - const float dady = da * setup->emaj.dy * setup->oneoverarea; - setup->coef.dadx[1 + attrib][i] = dadx; - setup->coef.dady[1 + attrib][i] = dady; - setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); - } -} - - -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a line. - */ -static void -line_persp_coeff(struct setup_context *setup, - unsigned attrib, - uint vertSlot) -{ - unsigned i; - for (i = 0; i < NUM_CHANNELS; ++i) { - /* XXX double-check/verify this arithmetic */ - const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3]; - const float a1 = setup->vmax[vertSlot][i] * setup->vmax[0][3]; - const float da = a1 - a0; - const float dadx = da * setup->emaj.dx * setup->oneoverarea; - const float dady = da * setup->emaj.dy * setup->oneoverarea; - setup->coef.dadx[1 + attrib][i] = dadx; - setup->coef.dady[1 + attrib][i] = dady; - setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); - } -} - - -/** - * Compute the setup->coef[] array dadx, dady, a0 values. - * Must be called after setup->vmin,vmax are initialized. - */ -static INLINE boolean -setup_line_coefficients(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4]) -{ - struct llvmpipe_context *llvmpipe = setup->llvmpipe; - const struct lp_fragment_shader *lpfs = llvmpipe->fs; - const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); - uint fragSlot; - float area; - - /* use setup->vmin, vmax to point to vertices */ - if (llvmpipe->rasterizer->flatshade_first) - setup->vprovoke = v0; - else - setup->vprovoke = v1; - setup->vmin = v0; - setup->vmax = v1; - - setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; - setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; - - /* NOTE: this is not really area but something proportional to it */ - area = setup->emaj.dx * setup->emaj.dx + setup->emaj.dy * setup->emaj.dy; - if (area == 0.0f || util_is_inf_or_nan(area)) - return FALSE; - setup->oneoverarea = 1.0f / area; - - /* z and w are done by linear interpolation: - */ - linear_pos_coeff(setup, 0, 2); - linear_pos_coeff(setup, 0, 3); - - /* setup interpolation for all the remaining attributes: - */ - for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->attrib[fragSlot].src_index; - - switch (vinfo->attrib[fragSlot].interp_mode) { - case INTERP_CONSTANT: - const_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_LINEAR: - line_linear_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_PERSPECTIVE: - line_persp_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_POS: - setup_fragcoord_coeff(setup, fragSlot); - break; - default: - assert(0); - } - - if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { - setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing; - setup->coef.dadx[1 + fragSlot][0] = 0.0; - setup->coef.dady[1 + fragSlot][0] = 0.0; - } - } - return TRUE; -} - - -/** - * Plot a pixel in a line segment. +/* Stubs for lines & points for now: */ -static INLINE void -plot(struct setup_context *setup, int x, int y) +void +llvmpipe_setup_point(struct setup_context *setup, + const float (*v0)[4]) { - const int iy = y & 1; - const int ix = x & 1; - const int quadX = x - ix; - const int quadY = y - iy; - const int mask = (1 << ix) << (2 * iy); - - if (quadX != setup->quad[0].input.x0 || - quadY != setup->quad[0].input.y0) - { - /* flush prev quad, start new quad */ - - if (setup->quad[0].input.x0 != -1) - clip_emit_quad( setup, &setup->quad[0] ); - - setup->quad[0].input.x0 = quadX; - setup->quad[0].input.y0 = quadY; - setup->quad[0].inout.mask = 0x0; - } - - setup->quad[0].inout.mask |= mask; } - -/** - * Do setup for line rasterization, then render the line. - * Single-pixel width, no stipple, etc. We rely on the 'draw' module - * to handle stippling and wide lines. - */ void llvmpipe_setup_line(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4]) -{ - int x0 = (int) v0[0][0]; - int x1 = (int) v1[0][0]; - int y0 = (int) v0[0][1]; - int y1 = (int) v1[0][1]; - int dx = x1 - x0; - int dy = y1 - y0; - int xstep, ystep; - -#if DEBUG_VERTS - debug_printf("Setup line:\n"); - print_vertex(setup, v0); - print_vertex(setup, v1); -#endif - - if (setup->llvmpipe->no_rast) - return; - - if (dx == 0 && dy == 0) - return; - - if (!setup_line_coefficients(setup, v0, v1)) - return; - - assert(v0[0][0] < 1.0e9); - assert(v0[0][1] < 1.0e9); - assert(v1[0][0] < 1.0e9); - assert(v1[0][1] < 1.0e9); - - if (dx < 0) { - dx = -dx; /* make positive */ - xstep = -1; - } - else { - xstep = 1; - } - - if (dy < 0) { - dy = -dy; /* make positive */ - ystep = -1; - } - else { - ystep = 1; - } - - assert(dx >= 0); - assert(dy >= 0); - assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_LINES); - - setup->quad[0].input.x0 = setup->quad[0].input.y0 = -1; - setup->quad[0].inout.mask = 0x0; - - /* XXX temporary: set coverage to 1.0 so the line appears - * if AA mode happens to be enabled. - */ - setup->quad[0].input.coverage[0] = - setup->quad[0].input.coverage[1] = - setup->quad[0].input.coverage[2] = - setup->quad[0].input.coverage[3] = 1.0; - - if (dx > dy) { - /*** X-major line ***/ - int i; - const int errorInc = dy + dy; - int error = errorInc - dx; - const int errorDec = error - dx; - - for (i = 0; i < dx; i++) { - plot(setup, x0, y0); - - x0 += xstep; - if (error < 0) { - error += errorInc; - } - else { - error += errorDec; - y0 += ystep; - } - } - } - else { - /*** Y-major line ***/ - int i; - const int errorInc = dx + dx; - int error = errorInc - dy; - const int errorDec = error - dy; - - for (i = 0; i < dy; i++) { - plot(setup, x0, y0); - - y0 += ystep; - if (error < 0) { - error += errorInc; - } - else { - error += errorDec; - x0 += xstep; - } - } - } - - /* draw final quad */ - if (setup->quad[0].inout.mask) { - clip_emit_quad( setup, &setup->quad[0] ); - } -} - - -static void -point_persp_coeff(struct setup_context *setup, - const float (*vert)[4], - unsigned attrib, - uint vertSlot) + const float (*v0)[4], + const float (*v1)[4]) { - unsigned i; - for(i = 0; i < NUM_CHANNELS; ++i) { - setup->coef.dadx[1 + attrib][i] = 0.0F; - setup->coef.dady[1 + attrib][i] = 0.0F; - setup->coef.a0[1 + attrib][i] = vert[vertSlot][i] * vert[0][3]; - } } -/** - * Do setup for point rasterization, then render the point. - * Round or square points... - * XXX could optimize a lot for 1-pixel points. +/* Called after statechange, before emitting primitives. If binning + * is active, this function should store relevant state in the binning + * context. + * + * That includes: + * - current fragment shader function + * - bound constant buffer contents + * - bound textures + * - blend color + * - etc. + * + * Basically everything needed at some point in the future to + * rasterize triangles for the current state. + * + * Additionally this will set up the state needed for the rasterizer + * to process and bin incoming triangles. That would include such + * things as: + * - cull mode + * - ??? + * - etc. + * */ -void -llvmpipe_setup_point( struct setup_context *setup, - const float (*v0)[4] ) -{ - struct llvmpipe_context *llvmpipe = setup->llvmpipe; - const struct lp_fragment_shader *lpfs = llvmpipe->fs; - const int sizeAttr = setup->llvmpipe->psize_slot; - const float size - = sizeAttr > 0 ? v0[sizeAttr][0] - : setup->llvmpipe->rasterizer->point_size; - const float halfSize = 0.5F * size; - const boolean round = (boolean) setup->llvmpipe->rasterizer->point_smooth; - const float x = v0[0][0]; /* Note: data[0] is always position */ - const float y = v0[0][1]; - const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); - uint fragSlot; - -#if DEBUG_VERTS - debug_printf("Setup point:\n"); - print_vertex(setup, v0); -#endif - - if (llvmpipe->no_rast) - return; - - assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_POINTS); - - /* For points, all interpolants are constant-valued. - * However, for point sprites, we'll need to setup texcoords appropriately. - * XXX: which coefficients are the texcoords??? - * We may do point sprites as textured quads... - * - * KW: We don't know which coefficients are texcoords - ultimately - * the choice of what interpolation mode to use for each attribute - * should be determined by the fragment program, using - * per-attribute declaration statements that include interpolation - * mode as a parameter. So either the fragment program will have - * to be adjusted for pointsprite vs normal point behaviour, or - * otherwise a special interpolation mode will have to be defined - * which matches the required behaviour for point sprites. But - - * the latter is not a feature of normal hardware, and as such - * probably should be ruled out on that basis. - */ - setup->vprovoke = v0; - - /* setup Z, W */ - const_pos_coeff(setup, 0, 2); - const_pos_coeff(setup, 0, 3); - - for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->attrib[fragSlot].src_index; - - switch (vinfo->attrib[fragSlot].interp_mode) { - case INTERP_CONSTANT: - /* fall-through */ - case INTERP_LINEAR: - const_coeff(setup, fragSlot, vertSlot); - break; - case INTERP_PERSPECTIVE: - point_persp_coeff(setup, setup->vprovoke, fragSlot, vertSlot); - break; - case INTERP_POS: - setup_fragcoord_coeff(setup, fragSlot); - break; - default: - assert(0); - } - - if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { - setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing; - setup->coef.dadx[1 + fragSlot][0] = 0.0; - setup->coef.dady[1 + fragSlot][0] = 0.0; - } - } - - - if (halfSize <= 0.5 && !round) { - /* special case for 1-pixel points */ - const int ix = ((int) x) & 1; - const int iy = ((int) y) & 1; - setup->quad[0].input.x0 = (int) x - ix; - setup->quad[0].input.y0 = (int) y - iy; - setup->quad[0].inout.mask = (1 << ix) << (2 * iy); - clip_emit_quad( setup, &setup->quad[0] ); - } - else { - if (round) { - /* rounded points */ - const int ixmin = block((int) (x - halfSize)); - const int ixmax = block((int) (x + halfSize)); - const int iymin = block((int) (y - halfSize)); - const int iymax = block((int) (y + halfSize)); - const float rmin = halfSize - 0.7071F; /* 0.7071 = sqrt(2)/2 */ - const float rmax = halfSize + 0.7071F; - const float rmin2 = MAX2(0.0F, rmin * rmin); - const float rmax2 = rmax * rmax; - const float cscale = 1.0F / (rmax2 - rmin2); - int ix, iy; - - for (iy = iymin; iy <= iymax; iy += 2) { - for (ix = ixmin; ix <= ixmax; ix += 2) { - float dx, dy, dist2, cover; - - setup->quad[0].inout.mask = 0x0; - - dx = (ix + 0.5f) - x; - dy = (iy + 0.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad[0].input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); - setup->quad[0].inout.mask |= MASK_TOP_LEFT; - } - - dx = (ix + 1.5f) - x; - dy = (iy + 0.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad[0].input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); - setup->quad[0].inout.mask |= MASK_TOP_RIGHT; - } - - dx = (ix + 0.5f) - x; - dy = (iy + 1.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad[0].input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); - setup->quad[0].inout.mask |= MASK_BOTTOM_LEFT; - } - - dx = (ix + 1.5f) - x; - dy = (iy + 1.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad[0].input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); - setup->quad[0].inout.mask |= MASK_BOTTOM_RIGHT; - } - - if (setup->quad[0].inout.mask) { - setup->quad[0].input.x0 = ix; - setup->quad[0].input.y0 = iy; - clip_emit_quad( setup, &setup->quad[0] ); - } - } - } - } - else { - /* square points */ - const int xmin = (int) (x + 0.75 - halfSize); - const int ymin = (int) (y + 0.25 - halfSize); - const int xmax = xmin + (int) size; - const int ymax = ymin + (int) size; - /* XXX could apply scissor to xmin,ymin,xmax,ymax now */ - const int ixmin = block(xmin); - const int ixmax = block(xmax - 1); - const int iymin = block(ymin); - const int iymax = block(ymax - 1); - int ix, iy; - - /* - debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax); - */ - for (iy = iymin; iy <= iymax; iy += 2) { - uint rowMask = 0xf; - if (iy < ymin) { - /* above the top edge */ - rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); - } - if (iy + 1 >= ymax) { - /* below the bottom edge */ - rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); - } - - for (ix = ixmin; ix <= ixmax; ix += 2) { - uint mask = rowMask; - - if (ix < xmin) { - /* fragment is past left edge of point, turn off left bits */ - mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); - } - if (ix + 1 >= xmax) { - /* past the right edge */ - mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); - } - - setup->quad[0].inout.mask = mask; - setup->quad[0].input.x0 = ix; - setup->quad[0].input.y0 = iy; - clip_emit_quad( setup, &setup->quad[0] ); - } - } - } - } -} - -void llvmpipe_setup_prepare( struct setup_context *setup ) +void setup_prepare( struct setup_context *setup ) { struct llvmpipe_context *lp = setup->llvmpipe; @@ -1442,6 +94,8 @@ void llvmpipe_setup_prepare( struct setup_context *setup ) llvmpipe_update_derived(lp); } + lp->quad.first->begin( lp->quad.first ); + if (lp->reduced_api_prim == PIPE_PRIM_TRIANGLES && lp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && lp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) { @@ -1452,38 +106,28 @@ void llvmpipe_setup_prepare( struct setup_context *setup ) /* 'draw' will do culling */ setup->winding = PIPE_WINDING_NONE; } + + setup_prepare_tri( setup->llvmpipe ); } -void llvmpipe_setup_destroy_context( struct setup_context *setup ) +void setup_destroy_context( struct setup_context *setup ) { - align_free( setup ); + FREE( setup ); } /** * Create a new primitive setup/render stage. */ -struct setup_context *llvmpipe_setup_create_context( struct llvmpipe_context *llvmpipe ) +struct setup_context *setup_create_context( struct llvmpipe_context *llvmpipe ) { - struct setup_context *setup; + struct setup_context *setup = CALLOC_STRUCT(setup_context); unsigned i; - setup = align_malloc(sizeof(struct setup_context), 16); - if (!setup) - return NULL; - - memset(setup, 0, sizeof *setup); setup->llvmpipe = llvmpipe; - for (i = 0; i < MAX_QUADS; i++) { - setup->quad[i].coef = &setup->coef; - } - - setup->span.left[0] = 1000000; /* greater than right[0] */ - setup->span.left[1] = 1000000; /* greater than right[1] */ - return setup; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 89c43da046..05aaaf83b8 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -30,11 +30,8 @@ struct setup_context; struct llvmpipe_context; -void -llvmpipe_setup_tri( struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ); +/* Note, not using setup_context currently + */ void llvmpipe_setup_line(struct setup_context *setup, @@ -46,8 +43,12 @@ llvmpipe_setup_point( struct setup_context *setup, const float (*v0)[4] ); -struct setup_context *llvmpipe_setup_create_context( struct llvmpipe_context *llvmpipe ); -void llvmpipe_setup_prepare( struct setup_context *setup ); -void llvmpipe_setup_destroy_context( struct setup_context *setup ); +struct setup_context *setup_create_context( struct llvmpipe_context *llvmpipe ); + +void setup_prepare( struct setup_context *setup ); + +void setup_destroy_context( struct setup_context *setup ); + +void setup_prepare_tri( struct llvmpipe_context *llvmpipe ); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h new file mode 100644 index 0000000000..848705e099 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -0,0 +1,140 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +#ifndef LP_SETUP_CONTEXT_H +#define LP_SETUP_CONTEXT_H + +struct clear_tile { + boolean do_color; + boolean do_depth_stencil; + unsigned rgba; + unsigned depth_stencil; +}; + +struct load_tile { + boolean do_color; + boolean do_depth_stencil; +}; + +/* Shade tile points directly at this: + */ +struct shader_inputs { + /* Some way of updating rasterizer state: + */ + /* ??? */ + + /* Attribute interpolation: + */ + float oneoverarea; + float x1; + float y1; + + struct tgsi_interp_coef position_coef; + struct tgsi_interp_coef *coef; +}; + +/* Shade triangle points at this: + */ +struct shade_triangle { + /* one-pixel sized trivial accept offsets for each plane */ + float ei1; + float ei2; + float ei3; + + /* one-pixel sized trivial reject offsets for each plane */ + float eo1; + float eo2; + float eo3; + + /* y deltas for vertex pairs */ + float dy12; + float dy23; + float dy31; + + /* x deltas for vertex pairs */ + float dx12; + float dx23; + float dx31; + + struct shader_inputs inputs; +}; + +struct bin_cmd { + enum { + CMD_END = 0, + CMD_CLEAR, + CMD_LOAD_TILE, + CMD_SHADE_TILE, + CMD_SHADE_TRIANGLE, + } cmd; + + union { + struct triangle *tri; + struct clear *clear; + } ptr; +}; + +struct cmd_block { + struct bin_cmd cmds[128]; + unsigned count; + struct cmd_block *next; +}; + +/* Triangles + */ +struct data_block { + ubyte data[4096 - sizeof(unsigned) - sizeof(struct cmd_block *)]; + unsigned count; + struct data_block *next; +}; + +/* Need to store the state at the time the triangle was drawn, at + * least as it is needed during rasterization. That would include at + * minimum the constant values referred to by the fragment shader, + * blend state, etc. Much of this is code-generated into the shader + * in llvmpipe -- may be easier to do this work there. + */ +struct state_block { +}; + + +/** + * Basically all the data from a binner scene: + */ +struct binned_scene { + struct llvmpipe_context *llvmpipe; + + struct cmd_block *bin[MAX_HEIGHT / BIN_SIZE][MAX_WIDTH / BIN_SIZE]; + struct data_block *data; +}; + +static INLINE struct triangle *get_triangle( struct setup_context *setup ) +{ + if (setup->triangles->count == TRIANGLE_BLOCK_COUNT) + return setup_triangle_from_new_block( setup ); + + return &setup->triangles[setup->triangles->count++]; +} diff --git a/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c b/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c new file mode 100644 index 0000000000..5b4faf489b --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c @@ -0,0 +1,7 @@ + +void +rasterize( struct llvmpipe_context *llvmpipe, + struct binned_scene *scene ) +{ + +} diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c new file mode 100644 index 0000000000..a09e0fa643 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -0,0 +1,755 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Recursive rasterization for triangles + */ + +#include "lp_context.h" +#include "lp_quad.h" +#include "lp_quad_pipe.h" +#include "lp_setup.h" +#include "lp_state.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vertex.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_thread.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#define BLOCKSIZE 4 + +struct triangle { + /* one-pixel sized trivial accept offsets for each plane */ + float ei1; + float ei2; + float ei3; + + /* one-pixel sized trivial reject offsets for each plane */ + float eo1; + float eo2; + float eo3; + + /* y deltas for vertex pairs */ + float dy12; + float dy23; + float dy31; + + /* x deltas for vertex pairs */ + float dx12; + float dx23; + float dx31; + + /* Attribute interpolation: + */ + float oneoverarea; + float x1; + float y1; + struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; + struct tgsi_interp_coef position_coef; + + /* A run of pre-initialized quads: + */ + struct llvmpipe_context *llvmpipe; + struct quad_header quad[4]; +}; + + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + */ +static void constant_coef( struct tgsi_interp_coef *coef, + const float (*v3)[4], + unsigned vert_attr, + unsigned i ) +{ + coef->a0[i] = v3[vert_attr][i]; + coef->dadx[i] = 0; + coef->dady[i] = 0; +} + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a triangle. + */ +static void linear_coef( struct triangle *tri, + struct tgsi_interp_coef *coef, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + unsigned vert_attr, + unsigned i) +{ + float a1 = v1[vert_attr][i]; + float a2 = v2[vert_attr][i]; + float a3 = v3[vert_attr][i]; + + float da12 = a1 - a2; + float da31 = a3 - a1; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; + + coef->dadx[i] = dadx; + coef->dady[i] = dady; + + /* calculate a0 as the value which would be sampled for the + * fragment at (0,0), taking into account that we want to sample at + * pixel centers, in other words (0.5, 0.5). + * + * this is neat but unfortunately not a good way to do things for + * triangles with very large values of dadx or dady as it will + * result in the subtraction and re-addition from a0 of a very + * large number, which means we'll end up loosing a lot of the + * fractional bits and precision from a0. the way to fix this is + * to define a0 as the sample at a pixel center somewhere near vmin + * instead - i'll switch to this later. + */ + coef->a0[i] = (v1[vert_attr][i] - + (dadx * (v1[0][0] - 0.5f) + + dady * (v1[0][1] - 0.5f))); +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void perspective_coef( struct triangle *tri, + struct tgsi_interp_coef *coef, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + unsigned vert_attr, + unsigned i) +{ + /* premultiply by 1/w (v[0][3] is always 1/w): + */ + float a1 = v1[vert_attr][i] * v1[0][3]; + float a2 = v2[vert_attr][i] * v2[0][3]; + float a3 = v3[vert_attr][i] * v3[0][3]; + float da12 = a1 - a2; + float da31 = a3 - a1; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; + + + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (a1 - + (dadx * (v1[0][0] - 0.5f) + + dady * (v1[0][1] - 0.5f))); +} + + +/** + * Special coefficient setup for gl_FragCoord. + * X and Y are trivial, though Y has to be inverted for OpenGL. + * Z and W are copied from position_coef which should have already been computed. + * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. + */ +static void +setup_fragcoord_coef(struct triangle *tri, unsigned slot) +{ + /*X*/ + tri->coef[slot].a0[0] = 0.0; + tri->coef[slot].dadx[0] = 1.0; + tri->coef[slot].dady[0] = 0.0; + /*Y*/ + tri->coef[slot].a0[1] = 0.0; + tri->coef[slot].dadx[1] = 0.0; + tri->coef[slot].dady[1] = 1.0; + /*Z*/ + tri->coef[slot].a0[2] = tri->position_coef.a0[2]; + tri->coef[slot].dadx[2] = tri->position_coef.dadx[2]; + tri->coef[slot].dady[2] = tri->position_coef.dady[2]; + /*W*/ + tri->coef[slot].a0[3] = tri->position_coef.a0[3]; + tri->coef[slot].dadx[3] = tri->position_coef.dadx[3]; + tri->coef[slot].dady[3] = tri->position_coef.dady[3]; +} + + + +/** + * Compute the tri->coef[] array dadx, dady, a0 values. + */ +static void setup_tri_coefficients( struct llvmpipe_context *llvmpipe, + struct triangle *tri, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + boolean frontface ) +{ + const struct lp_fragment_shader *fs = llvmpipe->fs; + const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); + unsigned input; + + /* z and w are done by linear interpolation: + */ + linear_coef(tri, &tri->position_coef, v1, v2, v3, 0, 2); + linear_coef(tri, &tri->position_coef, v1, v2, v3, 0, 3); + + /* setup interpolation for all the remaining attributes: + */ + for (input = 0; input < fs->info.num_inputs; input++) { + unsigned vert_attr = vinfo->attrib[input].src_index; + unsigned i; + + switch (vinfo->attrib[input].interp_mode) { + case INTERP_CONSTANT: + for (i = 0; i < NUM_CHANNELS; i++) + constant_coef(&tri->coef[input], v3, vert_attr, i); + break; + + case INTERP_LINEAR: + for (i = 0; i < NUM_CHANNELS; i++) + linear_coef(tri, &tri->coef[input], v1, v2, v3, vert_attr, i); + break; + + case INTERP_PERSPECTIVE: + for (i = 0; i < NUM_CHANNELS; i++) + perspective_coef(tri, &tri->coef[input], v1, v2, v3, vert_attr, i); + break; + + case INTERP_POS: + setup_fragcoord_coef(tri, input); + break; + + default: + assert(0); + } + + if (fs->info.input_semantic_name[input] == TGSI_SEMANTIC_FACE) { + tri->coef[input].a0[0] = 1.0f - frontface; + tri->coef[input].dadx[0] = 0.0; + tri->coef[input].dady[0] = 0.0; + } + } +} + + + +/* XXX: do this by add/subtracting a large floating point number: + */ +static inline float subpixel_snap( float a ) +{ + int i = a * 16; + return (float)i * (1.0/16); +} + + +/* Convert 8x8 block into four runs of quads and render each in turn. + */ +#if (BLOCKSIZE == 8) +static void block_full( struct triangle *tri, int x, int y ) +{ + struct quad_header *ptrs[4]; + int i; + + tri->quad[0].input.x0 = x + 0; + tri->quad[1].input.x0 = x + 2; + tri->quad[2].input.x0 = x + 4; + tri->quad[3].input.x0 = x + 6; + + for (i = 0; i < 4; i++, y += 2) { + tri->quad[0].inout.mask = 0xf; + tri->quad[1].inout.mask = 0xf; + tri->quad[2].inout.mask = 0xf; + tri->quad[3].inout.mask = 0xf; + + tri->quad[0].input.y0 = y; + tri->quad[1].input.y0 = y; + tri->quad[2].input.y0 = y; + tri->quad[3].input.y0 = y; + + /* XXX: don't bother with this ptrs business */ + ptrs[0] = &tri->quad[0]; + ptrs[1] = &tri->quad[1]; + ptrs[2] = &tri->quad[2]; + ptrs[3] = &tri->quad[3]; + + tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 4 ); + } +} +#elif (BLOCKSIZE == 4) +static void block_full( struct triangle *tri, int x, int y ) +{ + struct quad_header *ptrs[4]; + int iy; + + tri->quad[0].input.x0 = x + 0; + tri->quad[1].input.x0 = x + 2; + + for (iy = 0; iy < 4; iy += 2) { + tri->quad[0].inout.mask = 0xf; + tri->quad[1].inout.mask = 0xf; + + tri->quad[0].input.y0 = y + iy; + tri->quad[1].input.y0 = y + iy; + + /* XXX: don't bother with this ptrs business */ + ptrs[0] = &tri->quad[0]; + ptrs[1] = &tri->quad[1]; + + tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 2 ); + } +} +#else +static void block_full( struct triangle *tri, int x, int y ) +{ + struct quad_header *ptrs[4]; + int iy; + + tri->quad[0].input.x0 = x; + tri->quad[0].input.y0 = y; + tri->quad[0].inout.mask = 0xf; + + ptrs[0] = &tri->quad[0]; + tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 1 ); +} +#endif + + +static void +do_quad( struct triangle *tri, + int x, int y, + float c1, float c2, float c3 ) +{ + struct quad_header *quad = &tri->quad[0]; + + float xstep1 = -tri->dy12; + float xstep2 = -tri->dy23; + float xstep3 = -tri->dy31; + + float ystep1 = tri->dx12; + float ystep2 = tri->dx23; + float ystep3 = tri->dx31; + + quad->input.x0 = x; + quad->input.y0 = y; + quad->inout.mask = 0; + + if (c1 > 0 && + c2 > 0 && + c3 > 0) + quad->inout.mask |= 1; + + if (c1 + xstep1 > 0 && + c2 + xstep2 > 0 && + c3 + xstep3 > 0) + quad->inout.mask |= 2; + + if (c1 + ystep1 > 0 && + c2 + ystep2 > 0 && + c3 + ystep3 > 0) + quad->inout.mask |= 4; + + if (c1 + ystep1 + xstep1 > 0 && + c2 + ystep2 + xstep2 > 0 && + c3 + ystep3 + xstep3 > 0) + quad->inout.mask |= 8; + + if (quad->inout.mask) + tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, &quad, 1 ); +} + +/* Evaluate each pixel in a block, generate a mask and possibly render + * the quad: + */ +static void +do_block( struct triangle *tri, + int x, int y, + float c1, + float c2, + float c3 ) +{ + const int step = 2; + + float xstep1 = -step * tri->dy12; + float xstep2 = -step * tri->dy23; + float xstep3 = -step * tri->dy31; + + float ystep1 = step * tri->dx12; + float ystep2 = step * tri->dx23; + float ystep3 = step * tri->dx31; + + int ix, iy; + + for (iy = 0; iy < BLOCKSIZE; iy += 2) { + float cx1 = c1; + float cx2 = c2; + float cx3 = c3; + + for (ix = 0; ix < BLOCKSIZE; ix += 2) { + + do_quad(tri, x+ix, y+iy, cx1, cx2, cx3); + + cx1 += xstep1; + cx2 += xstep2; + cx3 += xstep3; + } + + c1 += ystep1; + c2 += ystep2; + c3 += ystep3; + } +} + + + + +/* to avoid having to allocate power-of-four, square render targets, + * end up having a specialized version of the above that runs only at + * the topmost level. + * + * at the topmost level there may be an arbitary number of steps on + * either dimension, so this loop needs to be either separately + * code-generated and unrolled for each render target size, or kept as + * generic looping code: + */ + +#define MIN3(a,b,c) MIN2(MIN2(a,b),c) +#define MAX3(a,b,c) MAX2(MAX2(a,b),c) + +static void +do_triangle_ccw(struct llvmpipe_context *llvmpipe, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + boolean frontfacing ) +{ + const int rt_width = llvmpipe->framebuffer.cbufs[0]->width; + const int rt_height = llvmpipe->framebuffer.cbufs[0]->height; + + const float y1 = subpixel_snap(v1[0][1]); + const float y2 = subpixel_snap(v2[0][1]); + const float y3 = subpixel_snap(v3[0][1]); + + const float x1 = subpixel_snap(v1[0][0]); + const float x2 = subpixel_snap(v2[0][0]); + const float x3 = subpixel_snap(v3[0][0]); + + struct triangle tri; + float area; + float c1, c2, c3; + int i; + int minx, maxx, miny, maxy; + + tri.llvmpipe = llvmpipe; + + + tri.dx12 = x1 - x2; + tri.dx23 = x2 - x3; + tri.dx31 = x3 - x1; + + tri.dy12 = y1 - y2; + tri.dy23 = y2 - y3; + tri.dy31 = y3 - y1; + + area = (tri.dx12 * tri.dy31 - + tri.dx31 * tri.dy12); + + /* Cull non-ccw and zero-sized triangles. + */ + if (area <= 0 || util_is_inf_or_nan(area)) + return; + + // Bounding rectangle + minx = util_iround(MIN3(x1, x2, x3) - .5); + maxx = util_iround(MAX3(x1, x2, x3) + .5); + miny = util_iround(MIN3(y1, y2, y3) - .5); + maxy = util_iround(MAX3(y1, y2, y3) + .5); + + /* Clamp to framebuffer (or tile) dimensions: + */ + miny = MAX2(0, miny); + minx = MAX2(0, minx); + maxy = MIN2(rt_height, maxy); + maxx = MIN2(rt_width, maxx); + + if (miny == maxy || minx == maxx) + return; + + /* The only divide in this code. Is it really needed? + */ + tri.oneoverarea = 1.0f / area; + + /* Setup parameter interpolants: + */ + setup_tri_coefficients( llvmpipe, &tri, v1, v2, v3, frontfacing ); + + for (i = 0; i < Elements(tri.quad); i++) { + tri.quad[i].coef = tri.coef; + tri.quad[i].posCoef = &tri.position_coef; + } + + /* half-edge constants, will be interated over the whole + * rendertarget. + */ + c1 = tri.dy12 * x1 - tri.dx12 * y1; + c2 = tri.dy23 * x2 - tri.dx23 * y2; + c3 = tri.dy31 * x3 - tri.dx31 * y3; + + /* correct for top-left fill convention: + */ + if (tri.dy12 < 0 || (tri.dy12 == 0 && tri.dx12 > 0)) c1++; + if (tri.dy23 < 0 || (tri.dy23 == 0 && tri.dx23 > 0)) c2++; + if (tri.dy31 < 0 || (tri.dy31 == 0 && tri.dx31 > 0)) c3++; + + /* find trivial reject offsets for each edge for a single-pixel + * sized block. These will be scaled up at each recursive level to + * match the active blocksize. Scaling in this way works best if + * the blocks are square. + */ + tri.eo1 = 0; + if (tri.dy12 < 0) tri.eo1 -= tri.dy12; + if (tri.dx12 > 0) tri.eo1 += tri.dx12; + + tri.eo2 = 0; + if (tri.dy23 < 0) tri.eo2 -= tri.dy23; + if (tri.dx23 > 0) tri.eo2 += tri.dx23; + + tri.eo3 = 0; + if (tri.dy31 < 0) tri.eo3 -= tri.dy31; + if (tri.dx31 > 0) tri.eo3 += tri.dx31; + + /* Calculate trivial accept offsets from the above. + */ + tri.ei1 = tri.dx12 - tri.dy12 - tri.eo1; + tri.ei2 = tri.dx23 - tri.dy23 - tri.eo2; + tri.ei3 = tri.dx31 - tri.dy31 - tri.eo3; + + minx &= ~(BLOCKSIZE-1); /* aligned blocks */ + miny &= ~(BLOCKSIZE-1); /* aligned blocks */ + + c1 += tri.dx12 * miny - tri.dy12 * minx; + c2 += tri.dx23 * miny - tri.dy23 * minx; + c3 += tri.dx31 * miny - tri.dy31 * minx; + + if ((miny & ~15) == (maxy & ~15) && + (minx & ~15) == (maxx & ~15)) + { + const int step = 2; + + float xstep1 = -step * tri.dy12; + float xstep2 = -step * tri.dy23; + float xstep3 = -step * tri.dy31; + + float ystep1 = step * tri.dx12; + float ystep2 = step * tri.dx23; + float ystep3 = step * tri.dx31; + + float eo1 = tri.eo1 * step; + float eo2 = tri.eo2 * step; + float eo3 = tri.eo3 * step; + + int x, y; + + /* Subdivide space into NxM blocks, where each block is square and + * power-of-four in dimension. + * + * Trivially accept or reject blocks, else jump to per-pixel + * examination above. + */ + for (y = miny; y < maxy; y += step) + { + float cx1 = c1; + float cx2 = c2; + float cx3 = c3; + + for (x = minx; x < maxx; x += step) + { + if (cx1 + eo1 < 0 || + cx2 + eo2 < 0 || + cx3 + eo3 < 0) + { + } + else + { + do_quad(&tri, x, y, cx1, cx2, cx3); + } + + /* Iterate cx values across the region: + */ + cx1 += xstep1; + cx2 += xstep2; + cx3 += xstep3; + } + + /* Iterate c values down the region: + */ + c1 += ystep1; + c2 += ystep2; + c3 += ystep3; + } + } + else + { + const int step = BLOCKSIZE; + + float ei1 = tri.ei1 * step; + float ei2 = tri.ei2 * step; + float ei3 = tri.ei3 * step; + + float eo1 = tri.eo1 * step; + float eo2 = tri.eo2 * step; + float eo3 = tri.eo3 * step; + + float xstep1 = -step * tri.dy12; + float xstep2 = -step * tri.dy23; + float xstep3 = -step * tri.dy31; + + float ystep1 = step * tri.dx12; + float ystep2 = step * tri.dx23; + float ystep3 = step * tri.dx31; + int x, y; + + + /* Subdivide space into NxM blocks, where each block is square and + * power-of-four in dimension. + * + * Trivially accept or reject blocks, else jump to per-pixel + * examination above. + */ + for (y = miny; y < maxy; y += step) + { + float cx1 = c1; + float cx2 = c2; + float cx3 = c3; + boolean in = false; + + for (x = minx; x < maxx; x += step) + { + if (cx1 + eo1 < 0 || + cx2 + eo2 < 0 || + cx3 + eo3 < 0) + { + /* do nothing */ + if (in) + break; + } + else if (cx1 + ei1 > 0 && + cx2 + ei2 > 0 && + cx3 + ei3 > 0) + { + in = TRUE; + block_full(&tri, x, y); /* trivial accept */ + } + else + { + in = TRUE; + // block_full(&tri, x, y); /* trivial accept */ + do_block(&tri, x, y, cx1, cx2, cx3); + } + + /* Iterate cx values across the region: + */ + cx1 += xstep1; + cx2 += xstep2; + cx3 += xstep3; + } + + /* Iterate c values down the region: + */ + c1 += ystep1; + c2 += ystep2; + c3 += ystep3; + } + } +} + +static void triangle_cw( struct llvmpipe_context *llvmpipe, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + do_triangle_ccw( llvmpipe, v1, v0, v2, !llvmpipe->ccw_is_frontface ); +} + +static void triangle_ccw( struct llvmpipe_context *llvmpipe, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + do_triangle_ccw( llvmpipe, v0, v1, v2, llvmpipe->ccw_is_frontface ); +} + +static void triangle_both( struct llvmpipe_context *llvmpipe, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + /* edge vectors e = v0 - v2, f = v1 - v2 */ + const float ex = v0[0][0] - v2[0][0]; + const float ey = v0[0][1] - v2[0][1]; + const float fx = v1[0][0] - v2[0][0]; + const float fy = v1[0][1] - v2[0][1]; + + /* det = cross(e,f).z */ + if (ex * fy - ey * fx < 0) + triangle_ccw( llvmpipe, v0, v1, v2 ); + else + triangle_cw( llvmpipe, v0, v1, v2 ); +} + +static void triangle_nop( struct llvmpipe_context *llvmpipe, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ +} + +/** + * Do setup for triangle rasterization, then render the triangle. + */ +void setup_prepare_tri( struct llvmpipe_context *llvmpipe ) +{ + llvmpipe->ccw_is_frontface = (llvmpipe->rasterizer->front_winding == + PIPE_WINDING_CW); + + switch (llvmpipe->rasterizer->cull_mode) { + case PIPE_WINDING_NONE: + llvmpipe->triangle = triangle_both; + break; + case PIPE_WINDING_CCW: + llvmpipe->triangle = triangle_cw; + break; + case PIPE_WINDING_CW: + llvmpipe->triangle = triangle_ccw; + break; + default: + llvmpipe->triangle = triangle_nop; + break; + } +} + + diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 30fb41ea65..31eaadda21 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -67,24 +67,19 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) const struct lp_fragment_shader *lpfs = llvmpipe->fs; const enum interp_mode colorInterp = llvmpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; + struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; + const uint num = draw_num_vs_outputs(llvmpipe->draw); uint i; - if (llvmpipe->vbuf) { - /* if using the post-transform vertex buffer, tell draw_vbuf to - * simply emit the whole post-xform vertex as-is: - */ - struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; - const uint num = draw_num_vs_outputs(llvmpipe->draw); - uint i; - - /* No longer any need to try and emit draw vertex_header info. - */ - vinfo_vbuf->num_attribs = 0; - for (i = 0; i < num; i++) { - draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); - } - draw_compute_vertex_size(vinfo_vbuf); + /* Tell draw_vbuf to simply emit the whole post-xform vertex + * as-is. No longer any need to try and emit draw vertex_header + * info. + */ + vinfo_vbuf->num_attribs = 0; + for (i = 0; i < num; i++) { + draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); } + draw_compute_vertex_size(vinfo_vbuf); /* * Loop over fragment shader inputs, searching for the matching output diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c deleted file mode 100644 index ec3e002d62..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c +++ /dev/null @@ -1,353 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Texture tile caching. - * - * Author: - * Brian Paul - */ - -#include "pipe/p_inlines.h" -#include "util/u_memory.h" -#include "util/u_math.h" -#include "util/u_tile.h" -#include "util/u_rect.h" -#include "lp_context.h" -#include "lp_surface.h" -#include "lp_texture.h" -#include "lp_tile_soa.h" -#include "lp_tile_cache.h" - - -#define MAX_WIDTH 4096 -#define MAX_HEIGHT 4096 - - -enum llvmpipe_tile_status -{ - LP_TILE_STATUS_UNDEFINED = 0, - LP_TILE_STATUS_CLEAR = 1, - LP_TILE_STATUS_DEFINED = 2 -}; - - -struct llvmpipe_cached_tile -{ - enum llvmpipe_tile_status status; - - /** color in SOA format */ - uint8_t *color; -}; - - -struct llvmpipe_tile_cache -{ - struct pipe_screen *screen; - struct pipe_surface *surface; /**< the surface we're caching */ - struct pipe_transfer *transfer; - void *transfer_map; - - struct llvmpipe_cached_tile entries[MAX_WIDTH/TILE_SIZE][MAX_HEIGHT/TILE_SIZE]; - - uint8_t clear_color[4]; /**< for color bufs */ - uint clear_val; /**< for z+stencil, or packed color clear value */ - - struct llvmpipe_cached_tile *last_tile; /**< most recently retrieved tile */ -}; - - -struct llvmpipe_tile_cache * -lp_create_tile_cache( struct pipe_screen *screen ) -{ - struct llvmpipe_tile_cache *tc; - int maxLevels, maxTexSize; - - /* sanity checking: max sure MAX_WIDTH/HEIGHT >= largest texture image */ - maxLevels = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS); - maxTexSize = 1 << (maxLevels - 1); - assert(MAX_WIDTH >= maxTexSize); - - tc = CALLOC_STRUCT( llvmpipe_tile_cache ); - if(!tc) - return NULL; - - tc->screen = screen; - - return tc; -} - - -void -lp_destroy_tile_cache(struct llvmpipe_tile_cache *tc) -{ - struct pipe_screen *screen; - unsigned x, y; - - for (y = 0; y < MAX_HEIGHT; y += TILE_SIZE) { - for (x = 0; x < MAX_WIDTH; x += TILE_SIZE) { - struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE]; - - if(tile->color) - align_free(tile->color); - } - } - - if (tc->transfer) { - screen = tc->transfer->texture->screen; - screen->tex_transfer_destroy(tc->transfer); - } - - FREE( tc ); -} - - -/** - * Specify the surface to cache. - */ -void -lp_tile_cache_set_surface(struct llvmpipe_tile_cache *tc, - struct pipe_surface *ps) -{ - if (tc->transfer) { - struct pipe_screen *screen = tc->transfer->texture->screen; - - if (ps == tc->surface) - return; - - if (tc->transfer_map) { - screen->transfer_unmap(screen, tc->transfer); - tc->transfer_map = NULL; - } - - screen->tex_transfer_destroy(tc->transfer); - tc->transfer = NULL; - } - - tc->surface = ps; - - if (ps) { - struct pipe_screen *screen = ps->texture->screen; - unsigned x, y; - - tc->transfer = screen->get_tex_transfer(screen, ps->texture, ps->face, - ps->level, ps->zslice, - PIPE_TRANSFER_READ_WRITE, - 0, 0, ps->width, ps->height); - - for (y = 0; y < ps->height; y += TILE_SIZE) { - for (x = 0; x < ps->width; x += TILE_SIZE) { - struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE]; - - tile->status = LP_TILE_STATUS_UNDEFINED; - - if(!tile->color) - tile->color = align_malloc( TILE_SIZE*TILE_SIZE*NUM_CHANNELS, 16 ); - } - } - } -} - - -/** - * Return the transfer being cached. - */ -struct pipe_surface * -lp_tile_cache_get_surface(struct llvmpipe_tile_cache *tc) -{ - return tc->surface; -} - - -void -lp_tile_cache_map_transfers(struct llvmpipe_tile_cache *tc) -{ - if (tc->transfer && !tc->transfer_map) - tc->transfer_map = tc->screen->transfer_map(tc->screen, tc->transfer); -} - - -void -lp_tile_cache_unmap_transfers(struct llvmpipe_tile_cache *tc) -{ - if (tc->transfer_map) { - tc->screen->transfer_unmap(tc->screen, tc->transfer); - tc->transfer_map = NULL; - } -} - - -/** - * Set a tile to a solid color. - */ -static void -clear_tile(struct llvmpipe_cached_tile *tile, - uint8_t clear_color[4]) -{ - if (clear_color[0] == clear_color[1] && - clear_color[1] == clear_color[2] && - clear_color[2] == clear_color[3]) { - memset(tile->color, clear_color[0], TILE_SIZE * TILE_SIZE * 4); - } - else { - uint x, y, chan; - for (y = 0; y < TILE_SIZE; y++) - for (x = 0; x < TILE_SIZE; x++) - for (chan = 0; chan < 4; ++chan) - TILE_PIXEL(tile->color, x, y, chan) = clear_color[chan]; - } -} - - -/** - * Flush the tile cache: write all dirty tiles back to the transfer. - * any tiles "flagged" as cleared will be "really" cleared. - */ -void -lp_flush_tile_cache(struct llvmpipe_tile_cache *tc) -{ - struct pipe_transfer *pt = tc->transfer; - unsigned x, y; - - if(!pt) - return; - - assert(tc->transfer_map); - - /* push the tile to all positions marked as clear */ - for (y = 0; y < pt->height; y += TILE_SIZE) { - for (x = 0; x < pt->width; x += TILE_SIZE) { - struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE]; - - if(tile->status != LP_TILE_STATUS_UNDEFINED) { - unsigned w = TILE_SIZE; - unsigned h = TILE_SIZE; - - if (!pipe_clip_tile(x, y, &w, &h, pt)) { - switch(tile->status) { - case LP_TILE_STATUS_CLEAR: - /* Actually clear the tiles which were flagged as being in a - * clear state. */ - util_fill_rect(tc->transfer_map, &pt->block, pt->stride, - x, y, w, h, - tc->clear_val); - break; - - case LP_TILE_STATUS_DEFINED: - lp_tile_write_4ub(pt->format, - tile->color, - tc->transfer_map, pt->stride, - x, y, w, h); - break; - - default: - assert(0); - break; - } - } - - tile->status = LP_TILE_STATUS_UNDEFINED; - } - } - } -} - - -/** - * Get a tile from the cache. - * \param x, y position of tile, in pixels - */ -void * -lp_get_cached_tile(struct llvmpipe_tile_cache *tc, - unsigned x, unsigned y ) -{ - struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE]; - struct pipe_transfer *pt = tc->transfer; - - assert(tc->surface); - assert(tc->transfer); - - switch(tile->status) { - case LP_TILE_STATUS_CLEAR: - /* don't get tile from framebuffer, just clear it */ - clear_tile(tile, tc->clear_color); - tile->status = LP_TILE_STATUS_DEFINED; - break; - - case LP_TILE_STATUS_UNDEFINED: { - unsigned w = TILE_SIZE; - unsigned h = TILE_SIZE; - - x &= ~(TILE_SIZE - 1); - y &= ~(TILE_SIZE - 1); - - if (!pipe_clip_tile(x, y, &w, &h, tc->transfer)) - lp_tile_read_4ub(pt->format, - tile->color, - tc->transfer_map, tc->transfer->stride, - x, y, w, h); - - tile->status = LP_TILE_STATUS_DEFINED; - break; - } - - case LP_TILE_STATUS_DEFINED: - /* nothing to do */ - break; - } - - return tile->color; -} - - -/** - * When a whole surface is being cleared to a value we can avoid - * fetching tiles above. - * Save the color and set a 'clearflag' for each tile of the screen. - */ -void -lp_tile_cache_clear(struct llvmpipe_tile_cache *tc, const float *rgba, - uint clearValue) -{ - struct pipe_transfer *pt = tc->transfer; - const unsigned w = pt->width; - const unsigned h = pt->height; - unsigned x, y, chan; - - for(chan = 0; chan < 4; ++chan) - tc->clear_color[chan] = float_to_ubyte(rgba[chan]); - - tc->clear_val = clearValue; - - /* push the tile to all positions marked as clear */ - for (y = 0; y < h; y += TILE_SIZE) { - for (x = 0; x < w; x += TILE_SIZE) { - struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE]; - tile->status = LP_TILE_STATUS_CLEAR; - } - } -} diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.h b/src/gallium/drivers/llvmpipe/lp_tile_cache.h deleted file mode 100644 index 161bab3799..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.h +++ /dev/null @@ -1,71 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef LP_TILE_CACHE_H -#define LP_TILE_CACHE_H - - -#include "pipe/p_compiler.h" -#include "lp_tile_soa.h" - - -struct llvmpipe_tile_cache; /* opaque */ - - -extern struct llvmpipe_tile_cache * -lp_create_tile_cache( struct pipe_screen *screen ); - -extern void -lp_destroy_tile_cache(struct llvmpipe_tile_cache *tc); - -extern void -lp_tile_cache_set_surface(struct llvmpipe_tile_cache *tc, - struct pipe_surface *lps); - -extern struct pipe_surface * -lp_tile_cache_get_surface(struct llvmpipe_tile_cache *tc); - -extern void -lp_tile_cache_map_transfers(struct llvmpipe_tile_cache *tc); - -extern void -lp_tile_cache_unmap_transfers(struct llvmpipe_tile_cache *tc); - -extern void -lp_flush_tile_cache(struct llvmpipe_tile_cache *tc); - -extern void -lp_tile_cache_clear(struct llvmpipe_tile_cache *tc, const float *rgba, - uint clearValue); - -extern void * -lp_get_cached_tile(struct llvmpipe_tile_cache *tc, - unsigned x, unsigned y ); - - -#endif /* LP_TILE_CACHE_H */ - |