From c9b7bece0569d9e193591ebff329acd1d9bd9e3c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 15 Aug 2010 22:58:26 +0100 Subject: llvmpipe: special case triangles which fall in a single 16x16 block Check for these and route them to a dedicated handler with one fewer levels of recursive rasterization. --- src/gallium/drivers/llvmpipe/lp_rast.h | 4 ++ src/gallium/drivers/llvmpipe/lp_rast_tri.c | 82 +++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_setup_tri.c | 20 +++++++ 3 files changed, 106 insertions(+) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 44319a0ad6..102e902d02 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -256,5 +256,9 @@ void lp_rast_begin_query(struct lp_rasterizer_task *, void lp_rast_end_query(struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); +void +lp_rast_triangle_3_16(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg); + #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 980c18c024..673f67386b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -157,3 +157,85 @@ build_mask_linear(int c, int dcdx, int dcdy) #define NR_PLANES 7 #include "lp_rast_tri_tmp.h" + +/* Special case for 3 plane triangle which is contained entirely + * within a 16x16 block. + */ +void +lp_rast_triangle_3_16(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + const struct lp_rast_triangle *tri = arg.triangle.tri; + const struct lp_rast_plane *plane = tri->plane; + unsigned mask = arg.triangle.plane_mask; + const int x = task->x + (mask & 0xf) * 16; + const int y = task->y + (mask >> 4) * 16; + unsigned outmask, inmask, partmask, partial_mask; + unsigned j; + int c[3]; + + outmask = 0; /* outside one or more trivial reject planes */ + partmask = 0; /* outside one or more trivial accept planes */ + + for (j = 0; j < 3; j++) { + c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; + + { + const int dcdx = -plane[j].dcdx * 4; + const int dcdy = plane[j].dcdy * 4; + const int cox = c[j] + plane[j].eo * 4; + const int cio = c[j] + plane[j].ei * 4 - 1; + + outmask |= build_mask_linear(cox, dcdx, dcdy); + partmask |= build_mask_linear(cio, dcdx, dcdy); + } + } + + if (outmask == 0xffff) + return; + + /* Mask of sub-blocks which are inside all trivial accept planes: + */ + inmask = ~partmask & 0xffff; + + /* Mask of sub-blocks which are inside all trivial reject planes, + * but outside at least one trivial accept plane: + */ + partial_mask = partmask & ~outmask; + + assert((partial_mask & inmask) == 0); + + /* Iterate over partials: + */ + while (partial_mask) { + int i = ffs(partial_mask) - 1; + int ix = (i & 3) * 4; + int iy = (i >> 2) * 4; + int px = x + ix; + int py = y + iy; + int cx[3]; + + partial_mask &= ~(1 << i); + + for (j = 0; j < 3; j++) + cx[j] = (c[j] + - plane[j].dcdx * ix + + plane[j].dcdy * iy); + + do_block_4_3(task, tri, plane, px, py, cx); + } + + /* Iterate over fulls: + */ + while (inmask) { + int i = ffs(inmask) - 1; + int ix = (i & 3) * 4; + int iy = (i >> 2) * 4; + int px = x + ix; + int py = y + iy; + + inmask &= ~(1 << i); + + block_full_4(task, tri, px, py); + } +} diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 393533ebee..614a6372b4 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -643,6 +643,26 @@ do_triangle_ccw(struct lp_setup_context *setup, /* Convert to tile coordinates, and inclusive ranges: */ + if (nr_planes == 3) { + int ix0 = minx / 16; + int iy0 = miny / 16; + int ix1 = (maxx-1) / 16; + int iy1 = (maxy-1) / 16; + + if (iy0 == iy1 && ix0 == ix1) + { + + /* Triangle is contained in a single 16x16 block: + */ + int mask = (ix0 & 3) | ((iy0 & 3) << 4); + + lp_scene_bin_command( scene, ix0/4, iy0/4, + lp_rast_triangle_3_16, + lp_rast_arg_triangle(tri, mask) ); + return; + } + } + ix0 = minx / TILE_SIZE; iy0 = miny / TILE_SIZE; ix1 = (maxx-1) / TILE_SIZE; -- cgit v1.2.3 From ae0ef6f69f351cacdc7eaa9b21097a7c1b414e44 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 18 Aug 2010 17:28:08 +0200 Subject: gallium: make all checks for PIPE_TEXTURE_2D check for PIPE_TEXTURE_RECT too Searched for them with: git grep -E '[!=]=.*PIPE_TEXTURE_2D|PIPE_TEXTURE_2D.*[!=]=|case.*PIPE_TEXTURE_2D' Behavior hasn't been changed. --- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 7 +++++-- src/gallium/auxiliary/util/u_blit.c | 3 ++- src/gallium/auxiliary/util/u_blitter.c | 3 +++ src/gallium/auxiliary/util/u_gen_mipmap.c | 1 + src/gallium/auxiliary/util/u_surfaces.h | 4 ++-- src/gallium/drivers/i915/i915_resource_texture.c | 5 ++++- src/gallium/drivers/i965/brw_resource_texture.c | 4 +++- src/gallium/drivers/llvmpipe/lp_screen.c | 1 + src/gallium/drivers/llvmpipe/lp_texture.c | 1 + src/gallium/drivers/nv50/nv50_miptree.c | 3 ++- src/gallium/drivers/nv50/nv50_tex.c | 1 + src/gallium/drivers/nvfx/nv30_fragtex.c | 1 + src/gallium/drivers/nvfx/nv40_fragtex.c | 1 + src/gallium/drivers/nvfx/nvfx_miptree.c | 3 ++- src/gallium/drivers/r300/r300_hyperz.c | 3 ++- src/gallium/drivers/r300/r300_texture.c | 6 ++++-- src/gallium/drivers/r300/r300_texture_desc.c | 6 ++++-- src/gallium/drivers/r600/r600_state.c | 1 + src/gallium/drivers/r600/r600_texture.c | 3 ++- src/gallium/drivers/softpipe/sp_screen.c | 1 + src/gallium/drivers/softpipe/sp_tex_sample.c | 2 ++ src/gallium/drivers/svga/svga_resource_texture.c | 3 ++- src/gallium/drivers/svga/svga_tgsi_emit.h | 1 + src/gallium/tests/python/tests/texture_blit.py | 2 +- src/mesa/state_tracker/st_cb_bitmap.c | 2 +- 25 files changed, 50 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 806c7d56a8..f6b6162f63 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -176,6 +176,7 @@ texture_dims(enum pipe_texture_target tex) case PIPE_TEXTURE_1D: return 1; case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: case PIPE_TEXTURE_CUBE: return 2; case PIPE_TEXTURE_3D: @@ -1749,7 +1750,8 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, LLVMValueRef unswizzled[4]; LLVMValueRef stride; - assert(bld->static_state->target == PIPE_TEXTURE_2D); + assert(bld->static_state->target == PIPE_TEXTURE_2D + || bld->static_state->target == PIPE_TEXTURE_RECT); assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR); assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR); assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE); @@ -2077,7 +2079,8 @@ lp_build_sample_soa(LLVMBuilderRef builder, } else if (util_format_fits_8unorm(bld.format_desc) && bld.format_desc->nr_channels > 1 && - static_state->target == PIPE_TEXTURE_2D && + (static_state->target == PIPE_TEXTURE_2D || + static_state->target == PIPE_TEXTURE_RECT) && static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR && static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR && static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 97fa99ec65..30c7a96462 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -347,7 +347,8 @@ util_blit_pixels_writemask(struct blit_state *ctx, dst->face == srcsub.face && dst->level == srcsub.level && dst->zslice == srcZ0) || - src_tex->target != PIPE_TEXTURE_2D) + (src_tex->target != PIPE_TEXTURE_2D && + src_tex->target != PIPE_TEXTURE_RECT)) { struct pipe_resource texTemp; struct pipe_resource *tex; diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 49ee7bb31d..4b69a7fb6a 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -569,6 +569,8 @@ pipe_tex_to_tgsi_tex(enum pipe_texture_target pipe_tex_target) return TGSI_TEXTURE_1D; case PIPE_TEXTURE_2D: return TGSI_TEXTURE_2D; + case PIPE_TEXTURE_RECT: + return TGSI_TEXTURE_2D; case PIPE_TEXTURE_3D: return TGSI_TEXTURE_3D; case PIPE_TEXTURE_CUBE: @@ -807,6 +809,7 @@ void util_blitter_copy_region(struct blitter_context *blitter, /* Draw the quad with the draw_rectangle callback. */ case PIPE_TEXTURE_1D: case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: { /* Set texture coordinates. */ float coord[4]; diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index b7fe2d3003..6a931a9581 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -1255,6 +1255,7 @@ fallback_gen_mipmap(struct gen_mipmap_state *ctx, make_1d_mipmap(ctx, pt, face, baseLevel, lastLevel); break; case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: case PIPE_TEXTURE_CUBE: make_2d_mipmap(ctx, pt, face, baseLevel, lastLevel); break; diff --git a/src/gallium/auxiliary/util/u_surfaces.h b/src/gallium/auxiliary/util/u_surfaces.h index af978c7057..46f3ec5d7d 100644 --- a/src/gallium/auxiliary/util/u_surfaces.h +++ b/src/gallium/auxiliary/util/u_surfaces.h @@ -22,7 +22,7 @@ struct pipe_surface *util_surfaces_do_get(struct util_surfaces *us, unsigned sur static INLINE struct pipe_surface * util_surfaces_get(struct util_surfaces *us, unsigned surface_struct_size, struct pipe_screen *pscreen, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags) { - if(likely(pt->target == PIPE_TEXTURE_2D && us->u.array)) + if(likely((pt->target == PIPE_TEXTURE_2D || pt->target == PIPE_TEXTURE_RECT) && us->u.array)) { struct pipe_surface *ps = us->u.array[level]; if(ps) @@ -52,7 +52,7 @@ void util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps); static INLINE void util_surfaces_detach(struct util_surfaces *us, struct pipe_surface *ps) { - if(likely(ps->texture->target == PIPE_TEXTURE_2D)) + if(likely(ps->texture->target == PIPE_TEXTURE_2D || ps->texture->target == PIPE_TEXTURE_RECT)) { us->u.array[ps->level] = 0; return; diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c index 752ddaae7b..c5c6179b16 100644 --- a/src/gallium/drivers/i915/i915_resource_texture.c +++ b/src/gallium/drivers/i915/i915_resource_texture.c @@ -360,6 +360,7 @@ i915_texture_layout(struct i915_texture * tex) switch (pt->target) { case PIPE_TEXTURE_1D: case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: if (!i9x5_special_layout(tex)) i915_texture_layout_2d(tex); break; @@ -605,6 +606,7 @@ i945_texture_layout(struct i915_texture * tex) switch (pt->target) { case PIPE_TEXTURE_1D: case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: if (!i9x5_special_layout(tex)) i945_texture_layout_2d(tex); break; @@ -829,7 +831,8 @@ i915_texture_from_handle(struct pipe_screen * screen, buffer = iws->buffer_from_handle(iws, whandle, &stride); /* Only supports one type */ - if (template->target != PIPE_TEXTURE_2D || + if ((template->target != PIPE_TEXTURE_2D && + template->target != PIPE_TEXTURE_RECT) || template->last_level != 0 || template->depth0 != 1) { return NULL; diff --git a/src/gallium/drivers/i965/brw_resource_texture.c b/src/gallium/drivers/i965/brw_resource_texture.c index ffd0f38672..3860d18a7a 100644 --- a/src/gallium/drivers/i965/brw_resource_texture.c +++ b/src/gallium/drivers/i965/brw_resource_texture.c @@ -66,6 +66,7 @@ static GLuint translate_tex_target( unsigned target ) return BRW_SURFACE_1D; case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: return BRW_SURFACE_2D; case PIPE_TEXTURE_3D: @@ -498,7 +499,8 @@ brw_texture_from_handle(struct pipe_screen *screen, unsigned pitch; GLuint format; - if (template->target != PIPE_TEXTURE_2D || + if ((template->target != PIPE_TEXTURE_2D + && template->target != PIPE_TEXTURE_RECT) || template->last_level != 0 || template->depth0 != 1) return NULL; diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 167cb2ee2e..6968cda629 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -230,6 +230,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, assert(target == PIPE_BUFFER || target == PIPE_TEXTURE_1D || target == PIPE_TEXTURE_2D || + target == PIPE_TEXTURE_RECT || target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE); diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 25112c10a6..ff4773fd7c 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -67,6 +67,7 @@ resource_is_texture(const struct pipe_resource *resource) return FALSE; case PIPE_TEXTURE_1D: case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: case PIPE_TEXTURE_3D: case PIPE_TEXTURE_CUBE: return TRUE; diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index b7cd92158f..c0f5cc10dd 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -235,7 +235,8 @@ nv50_miptree_from_handle(struct pipe_screen *pscreen, unsigned stride; /* Only supports 2D, non-mipmapped textures for the moment */ - if (template->target != PIPE_TEXTURE_2D || + if ((template->target != PIPE_TEXTURE_2D && + template->target != PIPE_TEXTURE_RECT) || template->last_level != 0 || template->depth0 != 1) return NULL; diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index 5ea0c1d726..4db53f7ec2 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -131,6 +131,7 @@ nv50_tex_construct(struct nv50_sampler_view *view) tic[2] |= NV50TIC_0_2_TARGET_1D; break; case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: tic[2] |= NV50TIC_0_2_TARGET_2D; break; case PIPE_TEXTURE_3D: diff --git a/src/gallium/drivers/nvfx/nv30_fragtex.c b/src/gallium/drivers/nvfx/nv30_fragtex.c index dec073ac90..0cd70ca104 100644 --- a/src/gallium/drivers/nvfx/nv30_fragtex.c +++ b/src/gallium/drivers/nvfx/nv30_fragtex.c @@ -116,6 +116,7 @@ nv30_fragtex_set(struct nvfx_context *nvfx, int unit) txf |= NV34TCL_TX_FORMAT_CUBIC; /* fall-through */ case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: txf |= NV34TCL_TX_FORMAT_DIMS_2D; break; case PIPE_TEXTURE_3D: diff --git a/src/gallium/drivers/nvfx/nv40_fragtex.c b/src/gallium/drivers/nvfx/nv40_fragtex.c index 0068b1ba54..0d3e90dcb0 100644 --- a/src/gallium/drivers/nvfx/nv40_fragtex.c +++ b/src/gallium/drivers/nvfx/nv40_fragtex.c @@ -135,6 +135,7 @@ nv40_fragtex_set(struct nvfx_context *nvfx, int unit) txf |= NV34TCL_TX_FORMAT_CUBIC; /* fall-through */ case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: txf |= NV34TCL_TX_FORMAT_DIMS_2D; break; case PIPE_TEXTURE_3D: diff --git a/src/gallium/drivers/nvfx/nvfx_miptree.c b/src/gallium/drivers/nvfx/nvfx_miptree.c index b5639bb464..1fec1ffa42 100644 --- a/src/gallium/drivers/nvfx/nvfx_miptree.c +++ b/src/gallium/drivers/nvfx/nvfx_miptree.c @@ -205,7 +205,8 @@ nvfx_miptree_from_handle(struct pipe_screen *pscreen, unsigned stride; /* Only supports 2D, non-mipmapped textures for the moment */ - if (template->target != PIPE_TEXTURE_2D || + if ((template->target != PIPE_TEXTURE_2D && + template->target != PIPE_TEXTURE_RECT) || template->last_level != 0 || template->depth0 != 1) return NULL; diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index b2526d6e41..eb5b0c36f8 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -354,7 +354,8 @@ void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf /* We currently don't handle decompression for 3D textures and cubemaps * correctly. */ if (tex->desc.b.b.target != PIPE_TEXTURE_1D && - tex->desc.b.b.target != PIPE_TEXTURE_2D) + tex->desc.b.b.target != PIPE_TEXTURE_2D && + tex->desc.b.b.target != PIPE_TEXTURE_RECT) return; /* Cannot flush zmask of 16-bit zbuffers. */ diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index da8eadd3b5..852acdd462 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -754,7 +754,8 @@ struct pipe_resource *r300_texture_create(struct pipe_screen *screen, /* Refuse to create a texture with size 0. */ if (!base->width0 || (!base->height0 && (base->target == PIPE_TEXTURE_2D || - base->target == PIPE_TEXTURE_CUBE)) || + base->target == PIPE_TEXTURE_CUBE || + base->target == PIPE_TEXTURE_RECT)) || (!base->depth0 && base->target == PIPE_TEXTURE_3D)) { fprintf(stderr, "r300: texture_create: " "Got invalid texture dimensions: %ix%ix%i\n", @@ -787,7 +788,8 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, unsigned stride, size; /* Support only 2D textures without mipmaps */ - if (base->target != PIPE_TEXTURE_2D || + if ((base->target != PIPE_TEXTURE_2D && + base->target != PIPE_TEXTURE_RECT) || base->depth0 != 1 || base->last_level != 0) { return NULL; diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index 5d690e8c33..2fe5d72188 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -184,7 +184,8 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, /* This is needed for the kernel checker, unfortunately. */ if ((desc->b.b.target != PIPE_TEXTURE_1D && - desc->b.b.target != PIPE_TEXTURE_2D) || + desc->b.b.target != PIPE_TEXTURE_2D && + desc->b.b.target != PIPE_TEXTURE_RECT) || desc->b.b.last_level != 0) { height = util_next_power_of_two(height); } @@ -202,7 +203,8 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, * Do so for 3 or more macrotiles in the Y direction. */ if (level == 0 && desc->b.b.last_level == 0 && (desc->b.b.target == PIPE_TEXTURE_1D || - desc->b.b.target == PIPE_TEXTURE_2D) && + desc->b.b.target == PIPE_TEXTURE_2D || + desc->b.b.target == PIPE_TEXTURE_RECT) && height >= tile_height * 3) { height = align(height, tile_height * 2); } diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index f78d1671ba..7d2b61f9b0 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1199,6 +1199,7 @@ static inline unsigned r600_tex_dim(unsigned dim) case PIPE_TEXTURE_1D: return V_038000_SQ_TEX_DIM_1D; case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: return V_038000_SQ_TEX_DIM_2D; case PIPE_TEXTURE_3D: return V_038000_SQ_TEX_DIM_3D; diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index eabd7f7705..8a6b5f8764 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -170,7 +170,8 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, } /* Support only 2D textures without mipmaps */ - if (templ->target != PIPE_TEXTURE_2D || templ->depth0 != 1 || templ->last_level != 0) + if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) || + templ->depth0 != 1 || templ->last_level != 0) return NULL; rtex = CALLOC_STRUCT(r600_resource_texture); diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 93af6ee5b0..73ae2dea56 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -199,6 +199,7 @@ softpipe_is_format_supported( struct pipe_screen *screen, assert(target == PIPE_BUFFER || target == PIPE_TEXTURE_1D || target == PIPE_TEXTURE_2D || + target == PIPE_TEXTURE_RECT || target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE); diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index cf7ab81405..e654bb77c2 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -1785,6 +1785,7 @@ get_lambda_func(const union sp_sampler_key key) case PIPE_TEXTURE_1D: return compute_lambda_1d; case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: case PIPE_TEXTURE_CUBE: return compute_lambda_2d; case PIPE_TEXTURE_3D: @@ -1809,6 +1810,7 @@ get_img_filter(const union sp_sampler_key key, return img_filter_1d_linear; break; case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: /* Try for fast path: */ if (key.bits.is_pot && diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c index ff83c750aa..26eb03a895 100644 --- a/src/gallium/drivers/svga/svga_resource_texture.c +++ b/src/gallium/drivers/svga/svga_resource_texture.c @@ -583,7 +583,8 @@ svga_texture_from_handle(struct pipe_screen *screen, assert(screen); /* Only supports one type */ - if (template->target != PIPE_TEXTURE_2D || + if ((template->target != PIPE_TEXTURE_2D && + template->target != PIPE_TEXTURE_RECT) || template->last_level != 0 || template->depth0 != 1) { return NULL; diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h index 48eced2ece..b4e90a957d 100644 --- a/src/gallium/drivers/svga/svga_tgsi_emit.h +++ b/src/gallium/drivers/svga/svga_tgsi_emit.h @@ -353,6 +353,7 @@ static INLINE ubyte svga_tgsi_sampler_type( struct svga_shader_emitter *emit, case PIPE_TEXTURE_1D: return SVGA3DSAMP_2D; case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: return SVGA3DSAMP_2D; case PIPE_TEXTURE_3D: return SVGA3DSAMP_VOLUME; diff --git a/src/gallium/tests/python/tests/texture_blit.py b/src/gallium/tests/python/tests/texture_blit.py index 58706dab93..089d05c623 100755 --- a/src/gallium/tests/python/tests/texture_blit.py +++ b/src/gallium/tests/python/tests/texture_blit.py @@ -55,7 +55,7 @@ def tex_coords(texture, face, level, zslice): [0.0, 1.0], ] - if texture.target == PIPE_TEXTURE_2D: + if texture.target == PIPE_TEXTURE_2D or texture.target == PIPE_TEXTURE_RECT: return [[s, t, 0.0] for s, t in st] elif texture.target == PIPE_TEXTURE_3D: depth = texture.get_depth(level) diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index 0b8ecd27cb..91037ab223 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -761,7 +761,7 @@ st_Bitmap(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, if (pt) { struct pipe_sampler_view *sv = st_create_texture_sampler_view(st->pipe, pt); - assert(pt->target == PIPE_TEXTURE_2D); + assert(pt->target == PIPE_TEXTURE_2D || pt->target == PIPE_TEXTURE_RECT); if (sv) { draw_bitmap_quad(ctx, x, y, ctx->Current.RasterPos[2], -- cgit v1.2.3 From 0ad82b8d28cbb8d7224bf96c43c80fed45321597 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 22 Aug 2010 11:43:01 +0100 Subject: llvmpipe: don't clear unused bins If bins outside the current scene bounds are being corrupted, we'll need to fix that separately. Currently seems ok though. --- src/gallium/drivers/llvmpipe/lp_scene.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index f88a759fe7..15a09b7100 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -163,12 +163,15 @@ lp_scene_reset(struct lp_scene *scene ) /* Free all but last binner command lists: */ - for (i = 0; i < TILES_X; i++) { - for (j = 0; j < TILES_Y; j++) { + for (i = 0; i < scene->tiles_x; i++) { + for (j = 0; j < scene->tiles_y; j++) { lp_scene_bin_reset(scene, i, j); } } + /* If there are any bins which weren't cleared by the loop above, + * they will be caught (on debug builds at least) by this assert: + */ assert(lp_scene_is_empty(scene)); /* Free all but last binned data block: -- cgit v1.2.3 From 49a2ea082b16732959b1e8e793d9c3293d8332c0 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 22 Aug 2010 12:16:45 +0100 Subject: llvmpipe: remove unused member from lp_fragment_shader_variant_key --- src/gallium/drivers/llvmpipe/lp_state_fs.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h index 37900fc544..78c5b1aee2 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.h +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h @@ -56,10 +56,6 @@ struct lp_fragment_shader_variant_key unsigned flatshade:1; unsigned occlusion_count:1; - struct { - ubyte colormask; - } cbuf_blend[PIPE_MAX_COLOR_BUFS]; - struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS]; }; -- cgit v1.2.3 From 3d4b60f1f7be3dc54951c9c414601062e73ca674 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 22 Aug 2010 12:31:18 +0100 Subject: llvmpipe: reduce size of fragment shader variant key Don't spend as much time comparing them. --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 26 ++++++++++++++++++++------ src/gallium/drivers/llvmpipe/lp_state_fs.h | 2 ++ 2 files changed, 22 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index dbca49a2ef..35ef63389c 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -808,7 +808,7 @@ generate_variant(struct llvmpipe_context *lp, variant->list_item_local.base = variant; variant->no = shader->variants_created++; - memcpy(&variant->key, key, sizeof *key); + memcpy(&variant->key, key, shader->variant_key_size); if (gallivm_debug & GALLIVM_DEBUG_IR) { debug_printf("llvmpipe: Creating fragment shader #%u variant #%u:\n", @@ -840,6 +840,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { struct lp_fragment_shader *shader; + int nr_samplers; shader = CALLOC_STRUCT(lp_fragment_shader); if (!shader) @@ -854,6 +855,11 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, /* we need to keep a local copy of the tokens */ shader->base.tokens = tgsi_dup_tokens(templ->tokens); + nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1; + + shader->variant_key_size = Offset(struct lp_fragment_shader_variant_key, + sampler[nr_samplers]); + if (LP_DEBUG & DEBUG_TGSI) { unsigned attrib; debug_printf("llvmpipe: Create fragment shader #%u %p:\n", shader->no, (void *) shader); @@ -1027,7 +1033,7 @@ make_variant_key(struct llvmpipe_context *lp, { unsigned i; - memset(key, 0, sizeof *key); + memset(key, 0, shader->variant_key_size); if (lp->framebuffer.zsbuf) { if (lp->depth_stencil->depth.enabled) { @@ -1097,9 +1103,17 @@ make_variant_key(struct llvmpipe_context *lp, } } - for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) - if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) - lp_sampler_static_state(&key->sampler[i], lp->fragment_sampler_views[i], lp->sampler[i]); + /* This value will be the same for all the variants of a given shader: + */ + key->nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1; + + for(i = 0; i < key->nr_samplers; ++i) { + if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { + lp_sampler_static_state(&key->sampler[i], + lp->fragment_sampler_views[i], + lp->sampler[i]); + } + } } /** @@ -1118,7 +1132,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) li = first_elem(&shader->variants); while(!at_end(&shader->variants, li)) { - if(memcmp(&li->base->key, &key, sizeof key) == 0) { + if(memcmp(&li->base->key, &key, shader->variant_key_size) == 0) { variant = li->base; break; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h index 78c5b1aee2..33c480010d 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.h +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h @@ -53,6 +53,7 @@ struct lp_fragment_shader_variant_key struct pipe_blend_state blend; enum pipe_format zsbuf_format; unsigned nr_cbufs:8; + unsigned nr_samplers:8; /* actually derivable from just the shader */ unsigned flatshade:1; unsigned occlusion_count:1; @@ -93,6 +94,7 @@ struct lp_fragment_shader struct lp_fs_variant_list_item variants; /* For debugging/profiling purposes */ + unsigned variant_key_size; unsigned no; unsigned variants_created; unsigned variants_cached; -- cgit v1.2.3 From 22f6026324f63c142925244ff575fefc29a90389 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Wed, 25 Aug 2010 15:11:03 +0800 Subject: gallium: Use draw_set_index_buffer and others. Update all drivers to use draw_set_index_buffer, draw_set_mapped_index_buffer, and draw_vbo. Remove draw_set_mapped_element_buffer and draw_set_mapped_element_buffer_range. --- src/gallium/auxiliary/draw/draw_context.c | 42 ---------------------- src/gallium/auxiliary/draw/draw_context.h | 13 ------- src/gallium/auxiliary/draw/draw_pt.c | 8 ++--- src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 15 +++----- src/gallium/drivers/cell/ppu/cell_state_vertex.c | 2 +- src/gallium/drivers/i915/i915_context.c | 22 ++++-------- src/gallium/drivers/i915/i915_state.c | 3 +- src/gallium/drivers/llvmpipe/lp_draw_arrays.c | 18 +++------- src/gallium/drivers/llvmpipe/lp_state_vertex.c | 2 +- src/gallium/drivers/nvfx/nvfx_draw.c | 12 +++---- src/gallium/drivers/nvfx/nvfx_state_emit.c | 3 ++ src/gallium/drivers/r300/r300_render.c | 14 ++------ src/gallium/drivers/r300/r300_state.c | 7 +++- src/gallium/drivers/softpipe/sp_draw_arrays.c | 26 ++++---------- src/gallium/drivers/softpipe/sp_state_vertex.c | 2 +- src/gallium/drivers/svga/svga_swtnl_draw.c | 17 ++++----- src/mesa/state_tracker/st_draw_feedback.c | 46 +++++++++++++----------- 17 files changed, 80 insertions(+), 172 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index c2b7a441bd..b39b835f05 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -518,48 +518,6 @@ draw_set_mapped_index_buffer(struct draw_context *draw, } -/** - * Tell the drawing context about the index/element buffer to use - * (ala glDrawElements) - * If no element buffer is to be used (i.e. glDrawArrays) then this - * should be called with eltSize=0 and elements=NULL. - * - * \param draw the drawing context - * \param eltSize size of each element (1, 2 or 4 bytes) - * \param elements the element buffer ptr - */ -void -draw_set_mapped_element_buffer_range( struct draw_context *draw, - unsigned eltSize, - int eltBias, - unsigned min_index, - unsigned max_index, - const void *elements ) -{ - struct pipe_index_buffer ib; - - memset(&ib, 0, sizeof(ib)); - ib.index_size = eltSize; - draw_set_index_buffer(draw, &ib); - - draw->pt.user.elts = elements; - draw->pt.user.eltBias = eltBias; - draw->pt.user.min_index = min_index; - draw->pt.user.max_index = max_index; -} - - -void -draw_set_mapped_element_buffer( struct draw_context *draw, - unsigned eltSize, - int eltBias, - const void *elements ) -{ - draw_set_mapped_element_buffer_range(draw, - eltSize, eltBias, 0, 0xffffffff, elements); -} - - /* Revamp me please: */ void draw_do_flush( struct draw_context *draw, unsigned flags ) diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index e9f3237dda..ea55320c42 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -166,19 +166,6 @@ void draw_set_index_buffer(struct draw_context *draw, void draw_set_mapped_index_buffer(struct draw_context *draw, const void *elements); -void -draw_set_mapped_element_buffer_range( struct draw_context *draw, - unsigned eltSize, - int eltBias, - unsigned min_index, - unsigned max_index, - const void *elements ); - -void draw_set_mapped_element_buffer( struct draw_context *draw, - unsigned eltSize, - int eltBias, - const void *elements ); - void draw_set_mapped_vertex_buffer(struct draw_context *draw, unsigned attr, const void *buffer); diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 8db0d73662..f81714d6b4 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -299,7 +299,6 @@ draw_arrays(struct draw_context *draw, unsigned prim, /** * Instanced drawing. - * draw_set_mapped_element_buffer must be called before calling this function. * \sa draw_vbo */ void @@ -321,9 +320,10 @@ draw_arrays_instanced(struct draw_context *draw, info.instance_count = instanceCount; info.indexed = (draw->pt.user.elts != NULL); - info.index_bias = draw->pt.user.eltBias; - info.min_index = draw->pt.user.min_index; - info.max_index = draw->pt.user.max_index; + if (!info.indexed) { + info.min_index = start; + info.max_index = start + count - 1; + } draw_vbo(draw, &info); } diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 4adef5b8c0..a367fa3fe1 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -78,20 +78,13 @@ cell_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) draw_set_mapped_vertex_buffer(draw, i, buf); } /* Map index buffer, if present */ - if (info->indexed && cell->index_buffer.buffer) { + if (info->indexed && cell->index_buffer.buffer) mapped_indices = cell_resource(cell->index_buffer.buffer)->data; - mapped_indices += cell->index_buffer.offset; - } - draw_set_mapped_element_buffer_range(draw, (mapped_indices) ? - lp->index_buffer.index_size : 0, - info->index_bias, - info->min_index, - info->max_index, - mapped_indices); + draw_set_mapped_index_buffer(draw, mapped_indices); /* draw! */ - draw_arrays(draw, info->mode, info->start, info->count); + draw_vbo(draw, info); /* * unmap vertex/index buffers - will cause draw module to flush @@ -100,7 +93,7 @@ cell_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) draw_set_mapped_vertex_buffer(draw, i, NULL); } if (mapped_indices) { - draw_set_mapped_element_buffer(draw, 0, 0, NULL); + draw_set_mapped_index_buffer(draw, NULL); } /* diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c index 4e3701cd0a..a065d68b5a 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -102,7 +102,7 @@ cell_set_index_buffer(struct pipe_context *pipe, else memset(&cell->index_buffer, 0, sizeof(cell->index_buffer)); - /* TODO make this more like a state */ + draw_set_index_buffer(cell->draw, ib); } diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 2beb9e3091..847dd6dd47 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -66,18 +66,9 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) /* * Map index buffer, if present */ - if (info->indexed && i915->index_buffer.buffer) { - char *indices = (char *) i915_buffer(i915->index_buffer.buffer)->data; - mapped_indices = (void *) (indices + i915->index_buffer.offset); - } - - draw_set_mapped_element_buffer_range(draw, (mapped_indices) ? - i915->index_buffer.index_size : 0, - info->index_bias, - info->min_index, - info->max_index, - mapped_indices); - + if (info->indexed && i915->index_buffer.buffer) + mapped_indices = i915_buffer(i915->index_buffer.buffer)->data; + draw_set_mapped_index_buffer(draw, mapped_indices); draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, i915->current.constants[PIPE_SHADER_VERTEX], @@ -87,7 +78,7 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) /* * Do the drawing */ - draw_arrays(i915->draw, info->mode, info->start, info->count); + draw_vbo(i915->draw, info); /* * unmap vertex/index buffers @@ -96,9 +87,8 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) draw_set_mapped_vertex_buffer(draw, i, NULL); } - if (mapped_indices) { - draw_set_mapped_element_buffer(draw, 0, 0, NULL); - } + if (mapped_indices) + draw_set_mapped_index_buffer(draw, NULL); } diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index 8c53b06931..bbfcff6bc4 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -817,7 +817,8 @@ static void i915_set_index_buffer(struct pipe_context *pipe, else memset(&i915->index_buffer, 0, sizeof(i915->index_buffer)); - /* TODO make this more like a state */ + /* pass-through to draw module */ + draw_set_index_buffer(i915->draw, ib); } static void diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index e73b431cb4..3af5c8d5c5 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -68,25 +68,17 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) } /* Map index buffer, if present */ - if (info->indexed && lp->index_buffer.buffer) { - char *indices = (char *) llvmpipe_resource_data(lp->index_buffer.buffer); - mapped_indices = (void *) (indices + lp->index_buffer.offset); - } + if (info->indexed && lp->index_buffer.buffer) + mapped_indices = llvmpipe_resource_data(lp->index_buffer.buffer); - draw_set_mapped_element_buffer_range(draw, (mapped_indices) ? - lp->index_buffer.index_size : 0, - info->index_bias, - info->min_index, - info->max_index, - mapped_indices); + draw_set_mapped_index_buffer(draw, mapped_indices); llvmpipe_prepare_vertex_sampling(lp, lp->num_vertex_sampler_views, lp->vertex_sampler_views); /* draw! */ - draw_arrays_instanced(draw, info->mode, info->start, info->count, - info->start_instance, info->instance_count); + draw_vbo(draw, info); /* * unmap vertex/index buffers @@ -95,7 +87,7 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) draw_set_mapped_vertex_buffer(draw, i, NULL); } if (mapped_indices) { - draw_set_mapped_element_buffer(draw, 0, 0, NULL); + draw_set_mapped_index_buffer(draw, NULL); } llvmpipe_cleanup_vertex_sampling(lp); diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c index d86e66b4fb..fb29423dd3 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c @@ -100,7 +100,7 @@ llvmpipe_set_index_buffer(struct pipe_context *pipe, else memset(&llvmpipe->index_buffer, 0, sizeof(llvmpipe->index_buffer)); - /* TODO make this more like a state */ + draw_set_index_buffer(llvmpipe->draw, ib); } void diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c index 0b17921295..2601d5b8e2 100644 --- a/src/gallium/drivers/nvfx/nvfx_draw.c +++ b/src/gallium/drivers/nvfx/nvfx_draw.c @@ -239,12 +239,10 @@ nvfx_draw_vbo_swtnl(struct pipe_context *pipe, const struct pipe_draw_info* info draw_set_mapped_vertex_buffer(nvfx->draw, i, map); } - if (info->indexed) { - map = nvfx_buffer(nvfx->idxbuf.buffer)->data + nvfx->idxbuf.offset; - draw_set_mapped_element_buffer_range(nvfx->draw, nvfx->idxbuf.index_size, info->index_bias, info->min_index, info->max_index, map); - } else { - draw_set_mapped_element_buffer(nvfx->draw, 0, 0, NULL); - } + map = NULL; + if (info->indexed && nvfx->idxbuf.buffer) + map = nvfx_buffer(nvfx->idxbuf.buffer)->data; + draw_set_mapped_index_buffer(nvfx->draw, map); if (nvfx->constbuf[PIPE_SHADER_VERTEX]) { const unsigned nr = nvfx->constbuf_nr[PIPE_SHADER_VERTEX]; @@ -254,7 +252,7 @@ nvfx_draw_vbo_swtnl(struct pipe_context *pipe, const struct pipe_draw_info* info map, nr); } - draw_arrays_instanced(nvfx->draw, info->mode, info->start, info->count, info->start_instance, info->instance_count); + draw_vbo(nvfx->draw, info); draw_flush(nvfx->draw); } diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index cfcb0f7ef6..390bca8cdb 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -335,6 +335,9 @@ nvfx_state_validate_swtnl(struct nvfx_context *nvfx) draw_set_vertex_elements(draw, nvfx->vtxelt->num_elements, nvfx->vtxelt->pipe); } + if (nvfx->draw_dirty & NVFX_NEW_INDEX) + draw_set_index_buffer(draw, &nvfx->idxbuf); + nvfx_state_validate_common(nvfx); nvfx->draw_dirty = 0; diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index e08335a105..20bad2c56f 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -680,18 +680,11 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, if (info->indexed && r300->index_buffer.buffer) { indices = pipe_buffer_map(pipe, r300->index_buffer.buffer, PIPE_TRANSFER_READ, &ib_transfer); - if (indices) - indices = (void *) ((char *) indices + r300->index_buffer.offset); } - draw_set_mapped_element_buffer_range(r300->draw, (indices) ? - r300->index_buffer.index_size : 0, - info->index_bias, - info->min_index, - info->max_index, - indices); + draw_set_mapped_index_buffer(r300->draw, indices); - draw_arrays(r300->draw, info->mode, info->start, count); + draw_vbo(r300->draw, info); /* XXX Not sure whether this is the best fix. * It prevents CS from being rejected and weird assertion failures. */ @@ -707,8 +700,7 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, if (ib_transfer) { pipe_buffer_unmap(pipe, r300->index_buffer.buffer, ib_transfer); - draw_set_mapped_element_buffer_range(r300->draw, 0, 0, info->start, - info->start + count - 1, NULL); + draw_set_mapped_index_buffer(r300->draw, NULL); } } diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 47e359cd5f..5c225e24f9 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1556,7 +1556,12 @@ static void r300_set_index_buffer(struct pipe_context* pipe, memset(&r300->index_buffer, 0, sizeof(r300->index_buffer)); } - /* TODO make this more like a state */ + if (r300->screen->caps.has_tcl) { + /* TODO make this more like a state */ + } + else { + draw_set_index_buffer(r300->draw, ib); + } } /* Initialize the PSC tables. */ diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 386c8acb8c..01b4ca985d 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -75,14 +75,10 @@ softpipe_draw_stream_output(struct pipe_context *pipe, unsigned mode) buf = (void*)((int32_t*)buf + offset); draw_set_mapped_vertex_buffer(draw, 0, buf); - draw_set_mapped_element_buffer_range(draw, - 0, 0, - start, - start + count - 1, - NULL); + draw_set_mapped_index_buffer(draw, NULL); /* draw! */ - draw_arrays_instanced(draw, mode, start, count, 0, 1); + draw_arrays(draw, mode, start, count); /* unmap vertex/index buffers - will cause draw module to flush */ draw_set_mapped_vertex_buffer(draw, 0, NULL); @@ -138,28 +134,20 @@ softpipe_draw_vbo(struct pipe_context *pipe, } /* Map index buffer, if present */ - if (info->indexed && sp->index_buffer.buffer) { - char *indices = (char *) softpipe_resource(sp->index_buffer.buffer)->data; - mapped_indices = (void *) (indices + sp->index_buffer.offset); - } + if (info->indexed && sp->index_buffer.buffer) + mapped_indices = softpipe_resource(sp->index_buffer.buffer)->data; - draw_set_mapped_element_buffer_range(draw, (mapped_indices) ? - sp->index_buffer.index_size : 0, - info->index_bias, - info->min_index, - info->max_index, - mapped_indices); + draw_set_mapped_index_buffer(draw, mapped_indices); /* draw! */ - draw_arrays_instanced(draw, info->mode, info->start, info->count, - info->start_instance, info->instance_count); + draw_vbo(draw, info); /* unmap vertex/index buffers - will cause draw module to flush */ for (i = 0; i < sp->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); } if (mapped_indices) { - draw_set_mapped_element_buffer(draw, 0, 0, NULL); + draw_set_mapped_index_buffer(draw, NULL); } /* diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c index 880a7c7cd2..b650fcaea5 100644 --- a/src/gallium/drivers/softpipe/sp_state_vertex.c +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -100,5 +100,5 @@ softpipe_set_index_buffer(struct pipe_context *pipe, else memset(&softpipe->index_buffer, 0, sizeof(softpipe->index_buffer)); - /* TODO make this more like a state */ + draw_set_index_buffer(softpipe->draw, ib); } diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c index 4f83822b5c..e9eba3b422 100644 --- a/src/gallium/drivers/svga/svga_swtnl_draw.c +++ b/src/gallium/drivers/svga/svga_swtnl_draw.c @@ -71,22 +71,17 @@ svga_swtnl_draw_vbo(struct svga_context *svga, draw_set_mapped_vertex_buffer(draw, i, map); } + /* TODO move this to update_swtnl_draw */ + draw_set_index_buffer(draw, &svga->curr.ib); + /* Map index buffer, if present */ map = NULL; if (info->indexed && svga->curr.ib.buffer) { map = pipe_buffer_map(&svga->pipe, svga->curr.ib.buffer, PIPE_TRANSFER_READ, &ib_transfer); - if (map) - map = (const void *) ((const char *) map + svga->curr.ib.offset); } - - draw_set_mapped_element_buffer_range(draw, (map) ? - svga->curr.ib.index_size : 0, - info->index_bias, - info->min_index, - info->max_index, - map); + draw_set_mapped_index_buffer(draw, map); if (svga->curr.cb[PIPE_SHADER_VERTEX]) { map = pipe_buffer_map(&svga->pipe, @@ -100,7 +95,7 @@ svga_swtnl_draw_vbo(struct svga_context *svga, svga->curr.cb[PIPE_SHADER_VERTEX]->width0); } - draw_arrays(draw, info->mode, info->start, info->count); + draw_vbo(draw, info); draw_flush(svga->swtnl.draw); @@ -118,7 +113,7 @@ svga_swtnl_draw_vbo(struct svga_context *svga, if (ib_transfer) { pipe_buffer_unmap(&svga->pipe, svga->curr.ib.buffer, ib_transfer); - draw_set_mapped_element_buffer(draw, 0, 0, NULL); + draw_set_mapped_index_buffer(draw, NULL); } if (svga->curr.cb[PIPE_SHADER_VERTEX]) { diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c index 5cf2666334..e0995f8318 100644 --- a/src/mesa/state_tracker/st_draw_feedback.c +++ b/src/mesa/state_tracker/st_draw_feedback.c @@ -40,6 +40,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "util/u_inlines.h" +#include "util/u_draw.h" #include "draw/draw_private.h" #include "draw/draw_context.h" @@ -104,14 +105,15 @@ st_feedback_draw_vbo(GLcontext *ctx, struct draw_context *draw = st->draw; const struct st_vertex_program *vp; const struct pipe_shader_state *vs; - struct pipe_resource *index_buffer_handle = 0; struct pipe_vertex_buffer vbuffers[PIPE_MAX_SHADER_INPUTS]; struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; + struct pipe_index_buffer ibuffer; struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS]; struct pipe_transfer *ib_transfer = NULL; struct pipe_transfer *cb_transfer; GLuint attr, i; ubyte *mapped_constants; + const void *mapped_indices = NULL; assert(draw); @@ -204,17 +206,19 @@ st_feedback_draw_vbo(GLcontext *ctx, draw_set_vertex_buffers(draw, vp->num_inputs, vbuffers); draw_set_vertex_elements(draw, vp->num_inputs, velements); + memset(&ibuffer, 0, sizeof(ibuffer)); if (ib) { struct gl_buffer_object *bufobj = ib->obj; - unsigned indexSize; - void *map; switch (ib->type) { case GL_UNSIGNED_INT: - indexSize = 4; + ibuffer.index_size = 4; break; case GL_UNSIGNED_SHORT: - indexSize = 2; + ibuffer.index_size = 2; + break; + case GL_UNSIGNED_BYTE: + ibuffer.index_size = 1; break; default: assert(0); @@ -224,23 +228,20 @@ st_feedback_draw_vbo(GLcontext *ctx, if (bufobj && bufobj->Name) { struct st_buffer_object *stobj = st_buffer_object(bufobj); - index_buffer_handle = stobj->buffer; - - map = pipe_buffer_map(pipe, index_buffer_handle, - PIPE_TRANSFER_READ, &ib_transfer); + pipe_resource_reference(&ibuffer.buffer, stobj->buffer); + ibuffer.offset = pointer_to_offset(ib->ptr); - draw_set_mapped_element_buffer(draw, indexSize, 0, map); + mapped_indices = pipe_buffer_map(pipe, stobj->buffer, + PIPE_TRANSFER_READ, &ib_transfer); } else { - draw_set_mapped_element_buffer(draw, indexSize, 0, (void *) ib->ptr); - ib_transfer = NULL; + /* skip setting ibuffer.buffer as the draw module does not use it */ + mapped_indices = ib->ptr; } - } - else { - /* no index/element buffer */ - draw_set_mapped_element_buffer(draw, 0, 0, NULL); - } + draw_set_index_buffer(draw, &ibuffer); + draw_set_mapped_index_buffer(draw, mapped_indices); + } /* map constant buffers */ mapped_constants = pipe_buffer_map(pipe, @@ -273,9 +274,14 @@ st_feedback_draw_vbo(GLcontext *ctx, draw_set_mapped_vertex_buffer(draw, i, NULL); } } - if (index_buffer_handle) { - pipe_buffer_unmap(pipe, index_buffer_handle, ib_transfer); - draw_set_mapped_element_buffer(draw, 0, 0, NULL); + + if (ib) { + draw_set_mapped_index_buffer(draw, NULL); + draw_set_index_buffer(draw, NULL); + + if (ib_transfer) + pipe_buffer_unmap(pipe, ibuffer.buffer, ib_transfer); + pipe_resource_reference(&ibuffer.buffer, NULL); } } -- cgit v1.2.3 From f77daaa88290388ec817e1a4756405fddce1e696 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 20 Aug 2010 11:36:53 +0100 Subject: llvmpipe: remove dead code --- src/gallium/drivers/llvmpipe/lp_rast.c | 37 ---------------------------------- 1 file changed, 37 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 3215d0f652..b1c306bbe9 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -316,43 +316,6 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, } -/** - * Load tile color from the framebuffer surface. - * This is a bin command called during bin processing. - */ -#if 0 -void -lp_rast_load_color(struct lp_rasterizer_task *task, - const union lp_rast_cmd_arg arg) -{ - struct lp_rasterizer *rast = task->rast; - unsigned buf; - enum lp_texture_usage usage; - - LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y); - - if (scene->has_color_clear) - usage = LP_TEX_USAGE_WRITE_ALL; - else - usage = LP_TEX_USAGE_READ_WRITE; - - /* Get pointers to color tile(s). - * This will convert linear data to tiled if needed. - */ - for (buf = 0; buf < rast->state.nr_cbufs; buf++) { - struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf]; - struct llvmpipe_texture *lpt; - assert(cbuf); - lpt = llvmpipe_texture(cbuf->texture); - task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt, - cbuf->face + cbuf->zslice, - cbuf->level, - usage, - task->x, task->y); - assert(task->color_tiles[buf]); - } -} -#endif /** -- cgit v1.2.3 From b6e03eafe3311142445ca42c1574d3f6998eecc3 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 20 Aug 2010 15:14:19 +0100 Subject: llvmpipe: fence debugging, add llvmpipe_finish --- src/gallium/drivers/llvmpipe/lp_context.c | 10 +++++++- src/gallium/drivers/llvmpipe/lp_debug.h | 2 ++ src/gallium/drivers/llvmpipe/lp_fence.c | 16 +++++++++++-- src/gallium/drivers/llvmpipe/lp_fence.h | 1 + src/gallium/drivers/llvmpipe/lp_flush.c | 38 ++++++++++++++---------------- src/gallium/drivers/llvmpipe/lp_flush.h | 13 +++++++--- src/gallium/drivers/llvmpipe/lp_query.c | 25 ++++---------------- src/gallium/drivers/llvmpipe/lp_screen.c | 2 ++ src/gallium/drivers/llvmpipe/lp_setup.c | 5 ++-- src/gallium/drivers/llvmpipe/lp_setup.h | 3 ++- src/gallium/drivers/llvmpipe/lp_state_fs.c | 15 ++---------- src/gallium/drivers/llvmpipe/lp_surface.c | 6 +++-- src/gallium/drivers/llvmpipe/lp_texture.c | 3 ++- 13 files changed, 74 insertions(+), 65 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 7543bd7b2b..086a2d5898 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -85,6 +85,14 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) align_free( llvmpipe ); } +static void +do_flush( struct pipe_context *pipe, + unsigned flags, + struct pipe_fence_handle **fence) +{ + llvmpipe_flush(pipe, flags, fence, __FUNCTION__); +} + struct pipe_context * llvmpipe_create_context( struct pipe_screen *screen, void *priv ) @@ -109,7 +117,7 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) llvmpipe->pipe.destroy = llvmpipe_destroy; llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state; llvmpipe->pipe.clear = llvmpipe_clear; - llvmpipe->pipe.flush = llvmpipe_flush; + llvmpipe->pipe.flush = do_flush; llvmpipe_init_blend_funcs(llvmpipe); llvmpipe_init_clip_funcs(llvmpipe); diff --git a/src/gallium/drivers/llvmpipe/lp_debug.h b/src/gallium/drivers/llvmpipe/lp_debug.h index 92fb2b3ee5..a928ee38be 100644 --- a/src/gallium/drivers/llvmpipe/lp_debug.h +++ b/src/gallium/drivers/llvmpipe/lp_debug.h @@ -46,6 +46,8 @@ st_print_current(void); #define DEBUG_SHOW_TILES 0x200 #define DEBUG_SHOW_SUBTILES 0x400 #define DEBUG_COUNTERS 0x800 +#define DEBUG_SCENE 0x1000 +#define DEBUG_FENCE 0x2000 #ifdef DEBUG diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c b/src/gallium/drivers/llvmpipe/lp_fence.c index f9805e5d68..4d549b0750 100644 --- a/src/gallium/drivers/llvmpipe/lp_fence.c +++ b/src/gallium/drivers/llvmpipe/lp_fence.c @@ -44,6 +44,7 @@ struct lp_fence * lp_fence_create(unsigned rank) { + static int fence_id; struct lp_fence *fence = CALLOC_STRUCT(lp_fence); pipe_reference_init(&fence->reference, 1); @@ -51,8 +52,12 @@ lp_fence_create(unsigned rank) pipe_mutex_init(fence->mutex); pipe_condvar_init(fence->signalled); + fence->id = fence_id++; fence->rank = rank; + if (LP_DEBUG & DEBUG_FENCE) + debug_printf("%s %d\n", __FUNCTION__, fence->id); + return fence; } @@ -61,6 +66,9 @@ lp_fence_create(unsigned rank) void lp_fence_destroy(struct lp_fence *fence) { + if (LP_DEBUG & DEBUG_FENCE) + debug_printf("%s %d\n", __FUNCTION__, fence->id); + pipe_mutex_destroy(fence->mutex); pipe_condvar_destroy(fence->signalled); FREE(fence); @@ -126,13 +134,17 @@ llvmpipe_fence_finish(struct pipe_screen *screen, void lp_fence_signal(struct lp_fence *fence) { + if (LP_DEBUG & DEBUG_FENCE) + debug_printf("%s %d\n", __FUNCTION__, fence->id); + pipe_mutex_lock(fence->mutex); fence->count++; assert(fence->count <= fence->rank); - LP_DBG(DEBUG_RAST, "%s count=%u rank=%u\n", __FUNCTION__, - fence->count, fence->rank); + if (LP_DEBUG & DEBUG_FENCE) + debug_printf("%s count=%u rank=%u\n", __FUNCTION__, + fence->count, fence->rank); pipe_condvar_signal(fence->signalled); diff --git a/src/gallium/drivers/llvmpipe/lp_fence.h b/src/gallium/drivers/llvmpipe/lp_fence.h index 13358fb99f..b80af5c654 100644 --- a/src/gallium/drivers/llvmpipe/lp_fence.h +++ b/src/gallium/drivers/llvmpipe/lp_fence.h @@ -41,6 +41,7 @@ struct pipe_screen; struct lp_fence { struct pipe_reference reference; + unsigned id; pipe_mutex mutex; pipe_condvar signalled; diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index 845292f4ab..040fe0ba9a 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -45,14 +45,15 @@ void llvmpipe_flush( struct pipe_context *pipe, unsigned flags, - struct pipe_fence_handle **fence ) + struct pipe_fence_handle **fence, + const char *reason) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); draw_flush(llvmpipe->draw); /* ask the setup module to flush */ - lp_setup_flush(llvmpipe->setup, flags, fence); + lp_setup_flush(llvmpipe->setup, flags, fence, reason); /* Enable to dump BMPs of the color/depth buffers each frame */ if (0) { @@ -76,6 +77,17 @@ llvmpipe_flush( struct pipe_context *pipe, } } +void +llvmpipe_finish( struct pipe_context *pipe, + const char *reason ) +{ + struct pipe_fence_handle *fence = NULL; + llvmpipe_flush(pipe, 0, &fence, reason); + if (fence) { + pipe->screen->fence_finish(pipe->screen, fence, 0); + pipe->screen->fence_reference(pipe->screen, &fence, NULL); + } +} /** * Flush context if necessary. @@ -93,7 +105,8 @@ llvmpipe_flush_resource(struct pipe_context *pipe, unsigned flush_flags, boolean read_only, boolean cpu_access, - boolean do_not_block) + boolean do_not_block, + const char *reason) { unsigned referenced; @@ -106,31 +119,16 @@ llvmpipe_flush_resource(struct pipe_context *pipe, /* * Flush and wait. */ - - struct pipe_fence_handle *fence = NULL; - if (do_not_block) return FALSE; - /* - * Do the unswizzling in parallel. - * - * XXX: Don't abuse the PIPE_FLUSH_FRAME flag for this. - */ - flush_flags |= PIPE_FLUSH_FRAME; - - llvmpipe_flush(pipe, flush_flags, &fence); - - if (fence) { - pipe->screen->fence_finish(pipe->screen, fence, 0); - pipe->screen->fence_reference(pipe->screen, &fence, NULL); - } + llvmpipe_finish(pipe, reason); } else { /* * Just flush. */ - llvmpipe_flush(pipe, flush_flags, NULL); + llvmpipe_flush(pipe, flush_flags, NULL, reason); } } diff --git a/src/gallium/drivers/llvmpipe/lp_flush.h b/src/gallium/drivers/llvmpipe/lp_flush.h index 7b605681a9..bb538b2bd8 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.h +++ b/src/gallium/drivers/llvmpipe/lp_flush.h @@ -34,8 +34,14 @@ struct pipe_context; struct pipe_fence_handle; void -llvmpipe_flush(struct pipe_context *pipe, unsigned flags, - struct pipe_fence_handle **fence); +llvmpipe_flush(struct pipe_context *pipe, + unsigned flags, + struct pipe_fence_handle **fence, + const char *reason); + +void +llvmpipe_finish( struct pipe_context *pipe, + const char *reason ); boolean llvmpipe_flush_resource(struct pipe_context *pipe, @@ -45,6 +51,7 @@ llvmpipe_flush_resource(struct pipe_context *pipe, unsigned flush_flags, boolean read_only, boolean cpu_access, - boolean do_not_block); + boolean do_not_block, + const char *reason); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c index 02eeaf6487..540eea7fd1 100644 --- a/src/gallium/drivers/llvmpipe/lp_query.c +++ b/src/gallium/drivers/llvmpipe/lp_query.c @@ -35,9 +35,9 @@ #include "util/u_memory.h" #include "lp_context.h" #include "lp_flush.h" +#include "lp_fence.h" #include "lp_query.h" #include "lp_rast.h" -#include "lp_rast_priv.h" #include "lp_state.h" @@ -69,12 +69,7 @@ llvmpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q) struct llvmpipe_query *pq = llvmpipe_query(q); /* query might still be in process if we never waited for the result */ if (!pq->done) { - struct pipe_fence_handle *fence = NULL; - llvmpipe_flush(pipe, 0, &fence); - if (fence) { - pipe->screen->fence_finish(pipe->screen, fence, 0); - pipe->screen->fence_reference(pipe->screen, &fence, NULL); - } + llvmpipe_finish(pipe, __FUNCTION__); } pipe_mutex_destroy(pq->mutex); @@ -93,16 +88,11 @@ llvmpipe_get_query_result(struct pipe_context *pipe, if (!pq->done) { if (wait) { - struct pipe_fence_handle *fence = NULL; - llvmpipe_flush(pipe, 0, &fence); - if (fence) { - pipe->screen->fence_finish(pipe->screen, fence, 0); - pipe->screen->fence_reference(pipe->screen, &fence, NULL); - } + llvmpipe_finish(pipe, __FUNCTION__); } /* this is a bit inconsequent but should be ok */ else { - llvmpipe_flush(pipe, 0, NULL); + llvmpipe_flush(pipe, 0, NULL, __FUNCTION__); } } @@ -125,12 +115,7 @@ llvmpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q) * frame of rendering. */ if (pq->binned) { - struct pipe_fence_handle *fence; - llvmpipe_flush(pipe, 0, &fence); - if (fence) { - pipe->screen->fence_finish(pipe->screen, fence, 0); - pipe->screen->fence_reference(pipe->screen, &fence, NULL); - } + llvmpipe_finish(pipe, __FUNCTION__); } lp_setup_begin_query(llvmpipe->setup, pq); diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 6968cda629..14a156378a 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -61,6 +61,8 @@ static const struct debug_named_value lp_debug_flags[] = { { "show_tiles", DEBUG_SHOW_TILES, NULL }, { "show_subtiles", DEBUG_SHOW_SUBTILES, NULL }, { "counters", DEBUG_COUNTERS, NULL }, + { "scene", DEBUG_SCENE, NULL }, + { "fence", DEBUG_FENCE, NULL }, DEBUG_NAMED_VALUE_END }; #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 556e571585..6eede83bfb 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -275,9 +275,10 @@ set_scene_state( struct lp_setup_context *setup, void lp_setup_flush( struct lp_setup_context *setup, unsigned flags, - struct pipe_fence_handle **fence) + struct pipe_fence_handle **fence, + const char *reason) { - LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s %s\n", __FUNCTION__, reason); if (setup->scene) { if (fence) { diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 73b1c85325..a41bb8863b 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -85,7 +85,8 @@ lp_setup_fence( struct lp_setup_context *setup ); void lp_setup_flush( struct lp_setup_context *setup, unsigned flags, - struct pipe_fence_handle **fence); + struct pipe_fence_handle **fence, + const char *reason); void diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 35ef63389c..33c1a49efe 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -927,7 +927,6 @@ static void llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - struct pipe_fence_handle *fence = NULL; struct lp_fragment_shader *shader = fs; struct lp_fs_variant_list_item *li; @@ -940,12 +939,7 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) * Flushing alone might not sufficient we need to wait on it too. */ - llvmpipe_flush(pipe, 0, &fence); - - if (fence) { - pipe->screen->fence_finish(pipe->screen, fence, 0); - pipe->screen->fence_reference(pipe->screen, &fence, NULL); - } + llvmpipe_finish(pipe, __FUNCTION__); li = first_elem(&shader->variants); while(!at_end(&shader->variants, li)) { @@ -1148,19 +1142,14 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) unsigned i; if (lp->nr_fs_variants >= LP_MAX_SHADER_VARIANTS) { struct pipe_context *pipe = &lp->pipe; - struct pipe_fence_handle *fence = NULL; /* * XXX: we need to flush the context until we have some sort of reference * counting in fragment shaders as they may still be binned * Flushing alone might not be sufficient we need to wait on it too. */ - llvmpipe_flush(pipe, 0, &fence); + llvmpipe_finish(pipe, __FUNCTION__); - if (fence) { - pipe->screen->fence_finish(pipe->screen, fence, 0); - pipe->screen->fence_reference(pipe->screen, &fence, NULL); - } for (i = 0; i < LP_MAX_SHADER_VARIANTS / 4; i++) { struct lp_fs_variant_list_item *item = last_elem(&lp->fs_variants_list); remove_shader_variant(lp, item->base); diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c index f761e82850..63ddc669c2 100644 --- a/src/gallium/drivers/llvmpipe/lp_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_surface.c @@ -68,14 +68,16 @@ lp_resource_copy(struct pipe_context *pipe, 0, /* flush_flags */ FALSE, /* read_only */ TRUE, /* cpu_access */ - FALSE); /* do_not_block */ + FALSE, + "blit dst"); /* do_not_block */ llvmpipe_flush_resource(pipe, src, subsrc.face, subsrc.level, 0, /* flush_flags */ TRUE, /* read_only */ TRUE, /* cpu_access */ - FALSE); /* do_not_block */ + FALSE, + "blit src"); /* do_not_block */ /* printf("surface copy from %u to %u: %u,%u to %u,%u %u x %u\n", diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index ff4773fd7c..5832ea2744 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -584,7 +584,8 @@ llvmpipe_get_transfer(struct pipe_context *pipe, 0, /* flush_flags */ read_only, TRUE, /* cpu_access */ - do_not_block)) { + do_not_block, + "transfer dest")) { /* * It would have blocked, but state tracker requested no to. */ -- cgit v1.2.3 From 1e926fe42a9780fd5e6574df2014a75d4af1101d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 20 Aug 2010 15:15:36 +0100 Subject: llvmpipe: wake all threads waiting on a fence --- src/gallium/drivers/llvmpipe/lp_fence.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c b/src/gallium/drivers/llvmpipe/lp_fence.c index 4d549b0750..1f96dcd81a 100644 --- a/src/gallium/drivers/llvmpipe/lp_fence.c +++ b/src/gallium/drivers/llvmpipe/lp_fence.c @@ -146,7 +146,9 @@ lp_fence_signal(struct lp_fence *fence) debug_printf("%s count=%u rank=%u\n", __FUNCTION__, fence->count, fence->rank); - pipe_condvar_signal(fence->signalled); + /* Wakeup all threads waiting on the mutex: + */ + pipe_condvar_broadcast(fence->signalled); pipe_mutex_unlock(fence->mutex); } -- cgit v1.2.3 From 5a45e53df4419fde1fe7696f3a9459893363f7c5 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 20 Aug 2010 15:25:27 +0100 Subject: llvmpipe: move some fence functions into lp_screen.c --- src/gallium/drivers/llvmpipe/lp_fence.c | 73 ++++++++------------------------ src/gallium/drivers/llvmpipe/lp_fence.h | 12 ++++++ src/gallium/drivers/llvmpipe/lp_screen.c | 49 ++++++++++++++++++++- src/gallium/drivers/llvmpipe/lp_setup.c | 2 + 4 files changed, 79 insertions(+), 57 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c b/src/gallium/drivers/llvmpipe/lp_fence.c index 1f96dcd81a..3a55e76bc3 100644 --- a/src/gallium/drivers/llvmpipe/lp_fence.c +++ b/src/gallium/drivers/llvmpipe/lp_fence.c @@ -75,58 +75,6 @@ lp_fence_destroy(struct lp_fence *fence) } -/** - * For reference counting. - * This is a Gallium API function. - */ -static void -llvmpipe_fence_reference(struct pipe_screen *screen, - struct pipe_fence_handle **ptr, - struct pipe_fence_handle *fence) -{ - struct lp_fence **old = (struct lp_fence **) ptr; - struct lp_fence *f = (struct lp_fence *) fence; - - lp_fence_reference(old, f); -} - - -/** - * Has the fence been executed/finished? - * This is a Gallium API function. - */ -static int -llvmpipe_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flag) -{ - struct lp_fence *f = (struct lp_fence *) fence; - - return f->count == f->rank; -} - - -/** - * Wait for the fence to finish. - * This is a Gallium API function. - */ -static int -llvmpipe_fence_finish(struct pipe_screen *screen, - struct pipe_fence_handle *fence_handle, - unsigned flag) -{ - struct lp_fence *fence = (struct lp_fence *) fence_handle; - - pipe_mutex_lock(fence->mutex); - while (fence->count < fence->rank) { - pipe_condvar_wait(fence->signalled, fence->mutex); - } - pipe_mutex_unlock(fence->mutex); - - return 0; -} - - /** * Called by the rendering threads to increment the fence counter. * When the counter == the rank, the fence is finished. @@ -153,11 +101,24 @@ lp_fence_signal(struct lp_fence *fence) pipe_mutex_unlock(fence->mutex); } +boolean +lp_fence_signalled(struct lp_fence *f) +{ + return f->count == f->rank; +} void -llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen) +lp_fence_wait(struct lp_fence *f) { - screen->fence_reference = llvmpipe_fence_reference; - screen->fence_signalled = llvmpipe_fence_signalled; - screen->fence_finish = llvmpipe_fence_finish; + if (LP_DEBUG & DEBUG_FENCE) + debug_printf("%s %d\n", __FUNCTION__, f->id); + + pipe_mutex_lock(f->mutex); + assert(f->issued); + while (f->count < f->rank) { + pipe_condvar_wait(f->signalled, f->mutex); + } + pipe_mutex_unlock(f->mutex); } + + diff --git a/src/gallium/drivers/llvmpipe/lp_fence.h b/src/gallium/drivers/llvmpipe/lp_fence.h index b80af5c654..3c59118780 100644 --- a/src/gallium/drivers/llvmpipe/lp_fence.h +++ b/src/gallium/drivers/llvmpipe/lp_fence.h @@ -46,6 +46,7 @@ struct lp_fence pipe_mutex mutex; pipe_condvar signalled; + boolean issued; unsigned rank; unsigned count; }; @@ -58,6 +59,11 @@ lp_fence_create(unsigned rank); void lp_fence_signal(struct lp_fence *fence); +boolean +lp_fence_signalled(struct lp_fence *fence); + +void +lp_fence_wait(struct lp_fence *fence); void llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen); @@ -79,5 +85,11 @@ lp_fence_reference(struct lp_fence **ptr, *ptr = f; } +static INLINE boolean +lp_fence_issued(const struct lp_fence *fence) +{ + return fence->issued; +} + #endif /* LP_FENCE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 14a156378a..9b7e0d51cd 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -317,6 +317,51 @@ llvmpipe_destroy_screen( struct pipe_screen *_screen ) + +/** + * Fence reference counting. + */ +static void +llvmpipe_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + struct lp_fence **old = (struct lp_fence **) ptr; + struct lp_fence *f = (struct lp_fence *) fence; + + lp_fence_reference(old, f); +} + + +/** + * Has the fence been executed/finished? + */ +static int +llvmpipe_fence_signalled(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flag) +{ + struct lp_fence *f = (struct lp_fence *) fence; + return lp_fence_signalled(f); +} + + +/** + * Wait for the fence to finish. + */ +static int +llvmpipe_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence_handle, + unsigned flag) +{ + struct lp_fence *f = (struct lp_fence *) fence_handle; + + lp_fence_wait(f); + return 0; +} + + + /** * Create a new pipe_screen object * Note: we're not presently subclassing pipe_screen (no llvmpipe_screen). @@ -354,9 +399,11 @@ llvmpipe_create_screen(struct sw_winsys *winsys) screen->base.context_create = llvmpipe_create_context; screen->base.flush_frontbuffer = llvmpipe_flush_frontbuffer; + screen->base.fence_reference = llvmpipe_fence_reference; + screen->base.fence_signalled = llvmpipe_fence_signalled; + screen->base.fence_finish = llvmpipe_fence_finish; llvmpipe_init_screen_resource_funcs(&screen->base); - llvmpipe_init_screen_fence_funcs(&screen->base); lp_jit_screen_init(screen); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 6eede83bfb..5389de8b63 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -288,6 +288,8 @@ lp_setup_flush( struct lp_setup_context *setup, *fence = lp_setup_fence( setup ); } + if (setup->scene->fence) + setup->scene->fence->issued = TRUE; } set_scene_state( setup, SETUP_FLUSHED ); -- cgit v1.2.3 From 98f3ff8f4a761d579ee9b42ee3090635519213a5 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 20 Aug 2010 15:45:25 +0100 Subject: llvmpipe: more rasterization counters --- src/gallium/drivers/llvmpipe/lp_perf.c | 21 +++++++++++++++------ src/gallium/drivers/llvmpipe/lp_perf.h | 7 +++++++ src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 7 +++++++ 3 files changed, 29 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_perf.c b/src/gallium/drivers/llvmpipe/lp_perf.c index 083e7e30a5..e22532f25c 100644 --- a/src/gallium/drivers/llvmpipe/lp_perf.c +++ b/src/gallium/drivers/llvmpipe/lp_perf.c @@ -46,7 +46,7 @@ lp_print_counters(void) { if (LP_DEBUG & DEBUG_COUNTERS) { unsigned total_64, total_16, total_4; - float p1, p2, p3, p4; + float p1, p2, p3, p5, p6; debug_printf("llvmpipe: nr_triangles: %9u\n", lp_count.nr_tris); debug_printf("llvmpipe: nr_culled_triangles: %9u\n", lp_count.nr_culled_tris); @@ -58,11 +58,15 @@ lp_print_counters(void) p1 = 100.0 * (float) lp_count.nr_empty_64 / (float) total_64; p2 = 100.0 * (float) lp_count.nr_fully_covered_64 / (float) total_64; p3 = 100.0 * (float) lp_count.nr_partially_covered_64 / (float) total_64; - p4 = 100.0 * (float) lp_count.nr_shade_opaque_64 / (float) total_64; + p5 = 100.0 * (float) lp_count.nr_shade_opaque_64 / (float) total_64; + p6 = 100.0 * (float) lp_count.nr_shade_64 / (float) total_64; debug_printf("llvmpipe: nr_64x64: %9u\n", total_64); debug_printf("llvmpipe: nr_fully_covered_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_64, p2, total_64); - debug_printf("llvmpipe: nr_shade_opaque_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_shade_opaque_64, p4, total_64); + debug_printf("llvmpipe: nr_shade_opaque_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_shade_opaque_64, p5, total_64); + debug_printf("llvmpipe: nr_pure_shade_opaque: %9u (%3.0f%% of %u)\n", lp_count.nr_pure_shade_opaque_64, 0.0, lp_count.nr_shade_opaque_64); + debug_printf("llvmpipe: nr_shade_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_shade_64, p6, total_64); + debug_printf("llvmpipe: nr_pure_shade: %9u (%3.0f%% of %u)\n", lp_count.nr_pure_shade_64, 0.0, lp_count.nr_shade_64); debug_printf("llvmpipe: nr_partially_covered_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_64, p3, total_64); debug_printf("llvmpipe: nr_empty_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_64, p1, total_64); @@ -79,12 +83,17 @@ lp_print_counters(void) debug_printf("llvmpipe: nr_partially_covered_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_16, p3, total_16); debug_printf("llvmpipe: nr_empty_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_16, p1, total_16); - total_4 = (lp_count.nr_empty_4 + lp_count.nr_non_empty_4); + total_4 = (lp_count.nr_empty_4 + + lp_count.nr_fully_covered_4 + + lp_count.nr_partially_covered_4); p1 = 100.0 * (float) lp_count.nr_empty_4 / (float) total_4; - p2 = 100.0 * (float) lp_count.nr_non_empty_4 / (float) total_4; + p2 = 100.0 * (float) lp_count.nr_fully_covered_4 / (float) total_4; + p3 = 100.0 * (float) lp_count.nr_partially_covered_4 / (float) total_4; - debug_printf("llvmpipe: nr_4x4: %9u\n", total_4); + debug_printf("llvmpipe: nr_tri_4x4: %9u\n", total_4); + debug_printf("llvmpipe: nr_fully_covered_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_4, p2, total_4); + debug_printf("llvmpipe: nr_partially_covered_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_4, p3, total_4); debug_printf("llvmpipe: nr_empty_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_4, p1, total_4); debug_printf("llvmpipe: nr_non_empty_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_non_empty_4, p2, total_4); diff --git a/src/gallium/drivers/llvmpipe/lp_perf.h b/src/gallium/drivers/llvmpipe/lp_perf.h index 4774f64550..c28652fc30 100644 --- a/src/gallium/drivers/llvmpipe/lp_perf.h +++ b/src/gallium/drivers/llvmpipe/lp_perf.h @@ -44,11 +44,16 @@ struct lp_counters unsigned nr_empty_64; unsigned nr_fully_covered_64; unsigned nr_partially_covered_64; + unsigned nr_pure_shade_opaque_64; + unsigned nr_pure_shade_64; + unsigned nr_shade_64; unsigned nr_shade_opaque_64; unsigned nr_empty_16; unsigned nr_fully_covered_16; unsigned nr_partially_covered_16; unsigned nr_empty_4; + unsigned nr_fully_covered_4; + unsigned nr_partially_covered_4; unsigned nr_non_empty_4; unsigned nr_llvm_compiles; int64_t llvm_compile_time; /**< total, in microseconds */ @@ -66,9 +71,11 @@ extern struct lp_counters lp_count; #ifdef DEBUG #define LP_COUNT(counter) lp_count.counter++ #define LP_COUNT_ADD(counter, incr) lp_count.counter += (incr) +#define LP_COUNT_GET(counter) (lp_count.counter) #else #define LP_COUNT(counter) #define LP_COUNT_ADD(counter, incr) (void) incr +#define LP_COUNT_GET(counter) 0 #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index 43f72d8ca8..70a4b64c8d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -102,6 +102,8 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, assert((partial_mask & inmask) == 0); + LP_COUNT_ADD(nr_empty_4, util_bitcount(0xffff & ~(partial_mask | inmask))); + /* Iterate over partials: */ while (partial_mask) { @@ -114,6 +116,8 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, partial_mask &= ~(1 << i); + LP_COUNT(nr_partially_covered_4); + for (j = 0; j < NR_PLANES; j++) cx[j] = (c[j] - plane[j].dcdx * ix @@ -133,6 +137,7 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, inmask &= ~(1 << i); + LP_COUNT(nr_fully_covered_4); block_full_4(task, tri, px, py); } } @@ -190,6 +195,8 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, assert((partial_mask & inmask) == 0); + LP_COUNT_ADD(nr_empty_16, util_bitcount(0xffff & ~(partial_mask | inmask))); + /* Iterate over partials: */ while (partial_mask) { -- cgit v1.2.3 From c25151dd6a06acd93c8bf0d9e79fdcf134ffe818 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 20 Aug 2010 16:18:23 +0100 Subject: llvmpipe: cull zero-area triangles early --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 614a6372b4..b4325390fe 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -819,9 +819,10 @@ static void triangle_both( struct lp_setup_context *setup, const float fy = v1[0][1] - v2[0][1]; /* det = cross(e,f).z */ - if (ex * fy - ey * fx < 0.0f) + const float det = ex * fy - ey * fx; + if (det < 0.0f) triangle_ccw( setup, v0, v1, v2 ); - else + else if (det > 0.0f) triangle_cw( setup, v0, v1, v2 ); } -- cgit v1.2.3 From d808f7b53ec71a7684bac7e6b536911fc27d5238 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 24 Aug 2010 19:58:54 +0100 Subject: llvmpipe: better triangle debugging --- src/gallium/drivers/llvmpipe/lp_setup_context.h | 12 ++++ src/gallium/drivers/llvmpipe/lp_setup_tri.c | 74 ++++++++++++++++++------- 2 files changed, 67 insertions(+), 19 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index a0606f5034..102361cca3 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -158,4 +158,16 @@ void lp_setup_update_state( struct lp_setup_context *setup ); void lp_setup_destroy( struct lp_setup_context *setup ); +void +lp_setup_print_triangle(struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]); + +void +lp_setup_print_vertex(struct lp_setup_context *setup, + const char *name, + const float (*v)[4]); + #endif + diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index b4325390fe..d6c837d8d4 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -357,31 +357,67 @@ alloc_triangle(struct lp_scene *scene, return tri; } +void +lp_setup_print_vertex(struct lp_setup_context *setup, + const char *name, + const float (*v)[4]) +{ + int i, j; + + debug_printf(" wpos (%s[0]) xyzw %f %f %f %f\n", + name, + v[0][0], v[0][1], v[0][2], v[0][3]); + + for (i = 0; i < setup->fs.nr_inputs; i++) { + const float *in = v[setup->fs.input[i].src_index]; + + debug_printf(" in[%d] (%s[%d]) %s%s%s%s ", + i, + name, setup->fs.input[i].src_index, + (setup->fs.input[i].usage_mask & 0x1) ? "x" : " ", + (setup->fs.input[i].usage_mask & 0x2) ? "y" : " ", + (setup->fs.input[i].usage_mask & 0x4) ? "z" : " ", + (setup->fs.input[i].usage_mask & 0x8) ? "w" : " "); + + for (j = 0; j < 4; j++) + if (setup->fs.input[i].usage_mask & (1<fs.nr_inputs; i++) { - debug_printf(" v1[%d]: %f %f %f %f\n", i, - v1[i][0], v1[i][1], v1[i][2], v1[i][3]); - } - for (i = 0; i < 1 + setup->fs.nr_inputs; i++) { - debug_printf(" v2[%d]: %f %f %f %f\n", i, - v2[i][0], v2[i][1], v2[i][2], v2[i][3]); - } - for (i = 0; i < 1 + setup->fs.nr_inputs; i++) { - debug_printf(" v3[%d]: %f %f %f %f\n", i, - v3[i][0], v3[i][1], v3[i][2], v3[i][3]); + { + const float ex = v0[0][0] - v2[0][0]; + const float ey = v0[0][1] - v2[0][1]; + const float fx = v1[0][0] - v2[0][0]; + const float fy = v1[0][1] - v2[0][1]; + + /* det = cross(e,f).z */ + const float det = ex * fy - ey * fx; + if (det < 0.0f) + debug_printf(" - ccw\n"); + else if (det > 0.0f) + debug_printf(" - cw\n"); + else + debug_printf(" - zero area\n"); } + + lp_setup_print_vertex(setup, "v0", v0); + lp_setup_print_vertex(setup, "v1", v1); + lp_setup_print_vertex(setup, "v2", v2); } @@ -421,7 +457,7 @@ do_triangle_ccw(struct lp_setup_context *setup, int nr_planes = 3; if (0) - print_triangle(setup, v1, v2, v3); + lp_setup_print_triangle(setup, v1, v2, v3); if (setup->scissor_test) { nr_planes = 7; -- cgit v1.2.3 From 29bcbf5e797a18430285c75abb8a9300c8defe1d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 24 Aug 2010 20:04:08 +0100 Subject: llvmpipe: track drawing region as a single u_rect --- src/gallium/drivers/llvmpipe/lp_setup.c | 29 ++++-- src/gallium/drivers/llvmpipe/lp_setup_context.h | 7 +- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 113 ++++++++++++------------ 3 files changed, 86 insertions(+), 63 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 5389de8b63..0c6d2de193 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -315,6 +315,11 @@ lp_setup_bind_framebuffer( struct lp_setup_context *setup, * scene. */ util_copy_framebuffer_state(&setup->fb, fb); + setup->framebuffer.x0 = 0; + setup->framebuffer.y0 = 0; + setup->framebuffer.x1 = fb->width-1; + setup->framebuffer.y1 = fb->height-1; + setup->dirty |= LP_SETUP_NEW_SCISSOR; } @@ -472,8 +477,12 @@ lp_setup_set_triangle_state( struct lp_setup_context *setup, setup->ccw_is_frontface = ccw_is_frontface; setup->cullmode = cull_mode; setup->triangle = first_triangle; - setup->scissor_test = scissor; setup->pixel_offset = gl_rasterization_rules ? 0.5f : 0.0f; + + if (setup->scissor_test != scissor) { + setup->dirty |= LP_SETUP_NEW_SCISSOR; + setup->scissor_test = scissor; + } } @@ -562,10 +571,11 @@ lp_setup_set_scissor( struct lp_setup_context *setup, assert(scissor); - if (memcmp(&setup->scissor.current, scissor, sizeof(*scissor)) != 0) { - setup->scissor.current = *scissor; /* struct copy */ - setup->dirty |= LP_SETUP_NEW_SCISSOR; - } + setup->scissor.x0 = scissor->minx; + setup->scissor.x1 = scissor->maxx-1; + setup->scissor.y0 = scissor->miny; + setup->scissor.y1 = scissor->maxy-1; + setup->dirty |= LP_SETUP_NEW_SCISSOR; } @@ -805,6 +815,15 @@ lp_setup_update_state( struct lp_setup_context *setup ) for (i = 0; i < Elements(setup->fs.current_tex); i++) { if (setup->fs.current_tex[i]) lp_scene_add_resource_reference(scene, setup->fs.current_tex[i]); + if (setup->dirty & LP_SETUP_NEW_SCISSOR) { + setup->draw_region = setup->framebuffer; + if (setup->scissor_test) { + u_rect_possible_intersection(&setup->scissor, + &setup->draw_region); + } + } + + } } } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 102361cca3..1a147e0353 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -41,6 +41,7 @@ #include "lp_scene.h" #include "draw/draw_vbuf.h" +#include "util/u_rect.h" #define LP_SETUP_NEW_FS 0x01 #define LP_SETUP_NEW_CONSTANTS 0x02 @@ -92,6 +93,9 @@ struct lp_setup_context float pixel_offset; struct pipe_framebuffer_state fb; + struct u_rect framebuffer; + struct u_rect scissor; + struct u_rect draw_region; /* intersection of fb & scissor */ struct { unsigned flags; @@ -127,9 +131,6 @@ struct lp_setup_context uint8_t *stored; } blend_color; - struct { - struct pipe_scissor_state current; - } scissor; unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index d6c837d8d4..fe5c9358dd 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -31,6 +31,7 @@ #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_rect.h" #include "lp_perf.h" #include "lp_setup_context.h" #include "lp_rast.h" @@ -450,7 +451,7 @@ do_triangle_ccw(struct lp_setup_context *setup, struct lp_rast_triangle *tri; struct tri_info info; int area; - int minx, maxx, miny, maxy; + struct u_rect bbox; int ix0, ix1, iy0, iy1; unsigned tri_bytes; int i; @@ -466,6 +467,50 @@ do_triangle_ccw(struct lp_setup_context *setup, nr_planes = 3; } + /* x/y positions in fixed point */ + info.x[0] = subpixel_snap(v1[0][0] - setup->pixel_offset); + info.x[1] = subpixel_snap(v2[0][0] - setup->pixel_offset); + info.x[2] = subpixel_snap(v3[0][0] - setup->pixel_offset); + info.y[0] = subpixel_snap(v1[0][1] - setup->pixel_offset); + info.y[1] = subpixel_snap(v2[0][1] - setup->pixel_offset); + info.y[2] = subpixel_snap(v3[0][1] - setup->pixel_offset); + + + + /* Bounding rectangle (in pixels) */ + { + /* Yes this is necessary to accurately calculate bounding boxes + * with the two fill-conventions we support. GL (normally) ends + * up needing a bottom-left fill convention, which requires + * slightly different rounding. + */ + int adj = (setup->pixel_offset != 0) ? 1 : 0; + + bbox.x0 = (MIN3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; + bbox.x1 = (MAX3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; + bbox.y0 = (MIN3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + bbox.y1 = (MAX3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + + /* Inclusive coordinates: + */ + bbox.x1--; + bbox.y1--; + } + + if (bbox.x1 < bbox.x0 || + bbox.y1 < bbox.y0) { + if (0) debug_printf("empty bounding box\n"); + LP_COUNT(nr_culled_tris); + return; + } + + if (!u_rect_test_intersection(&setup->draw_region, &bbox)) { + if (0) debug_printf("offscreen\n"); + LP_COUNT(nr_culled_tris); + return; + } + + u_rect_find_intersection(&setup->draw_region, &bbox); tri = alloc_triangle(scene, setup->fs.nr_inputs, @@ -483,14 +528,6 @@ do_triangle_ccw(struct lp_setup_context *setup, tri->v[2][1] = v3[0][1]; #endif - /* x/y positions in fixed point */ - info.x[0] = subpixel_snap(v1[0][0] - setup->pixel_offset); - info.x[1] = subpixel_snap(v2[0][0] - setup->pixel_offset); - info.x[2] = subpixel_snap(v3[0][0] - setup->pixel_offset); - info.y[0] = subpixel_snap(v1[0][1] - setup->pixel_offset); - info.y[1] = subpixel_snap(v2[0][1] - setup->pixel_offset); - info.y[2] = subpixel_snap(v3[0][1] - setup->pixel_offset); - tri->plane[0].dcdy = info.x[0] - info.x[1]; tri->plane[1].dcdy = info.x[1] - info.x[2]; tri->plane[2].dcdy = info.x[2] - info.x[0]; @@ -514,40 +551,6 @@ do_triangle_ccw(struct lp_setup_context *setup, return; } - /* Bounding rectangle (in pixels) */ - { - /* Yes this is necessary to accurately calculate bounding boxes - * with the two fill-conventions we support. GL (normally) ends - * up needing a bottom-left fill convention, which requires - * slightly different rounding. - */ - int adj = (setup->pixel_offset != 0) ? 1 : 0; - - minx = (MIN3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; - maxx = (MAX3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; - miny = (MIN3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; - maxy = (MAX3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; - } - - if (setup->scissor_test) { - minx = MAX2(minx, setup->scissor.current.minx); - maxx = MIN2(maxx, setup->scissor.current.maxx); - miny = MAX2(miny, setup->scissor.current.miny); - maxy = MIN2(maxy, setup->scissor.current.maxy); - } - else { - minx = MAX2(minx, 0); - miny = MAX2(miny, 0); - maxx = MIN2(maxx, scene->fb.width); - maxy = MIN2(maxy, scene->fb.height); - } - - - if (miny >= maxy || minx >= maxx) { - lp_scene_putback_data( scene, tri_bytes ); - LP_COUNT(nr_culled_tris); - return; - } /* */ @@ -648,25 +651,25 @@ do_triangle_ccw(struct lp_setup_context *setup, if (nr_planes == 7) { tri->plane[3].dcdx = -1; tri->plane[3].dcdy = 0; - tri->plane[3].c = 1-minx; + tri->plane[3].c = 1-bbox.x0; tri->plane[3].ei = 0; tri->plane[3].eo = 1; tri->plane[4].dcdx = 1; tri->plane[4].dcdy = 0; - tri->plane[4].c = maxx; + tri->plane[4].c = bbox.x1+1; tri->plane[4].ei = -1; tri->plane[4].eo = 0; tri->plane[5].dcdx = 0; tri->plane[5].dcdy = 1; - tri->plane[5].c = 1-miny; + tri->plane[5].c = 1-bbox.y0; tri->plane[5].ei = 0; tri->plane[5].eo = 1; tri->plane[6].dcdx = 0; tri->plane[6].dcdy = -1; - tri->plane[6].c = maxy; + tri->plane[6].c = bbox.y1+1; tri->plane[6].ei = -1; tri->plane[6].eo = 0; } @@ -680,10 +683,10 @@ do_triangle_ccw(struct lp_setup_context *setup, /* Convert to tile coordinates, and inclusive ranges: */ if (nr_planes == 3) { - int ix0 = minx / 16; - int iy0 = miny / 16; - int ix1 = (maxx-1) / 16; - int iy1 = (maxy-1) / 16; + int ix0 = bbox.x0 / 16; + int iy0 = bbox.y0 / 16; + int ix1 = bbox.x1 / 16; + int iy1 = bbox.y1 / 16; if (iy0 == iy1 && ix0 == ix1) { @@ -699,10 +702,10 @@ do_triangle_ccw(struct lp_setup_context *setup, } } - ix0 = minx / TILE_SIZE; - iy0 = miny / TILE_SIZE; - ix1 = (maxx-1) / TILE_SIZE; - iy1 = (maxy-1) / TILE_SIZE; + ix0 = bbox.x0 / TILE_SIZE; + iy0 = bbox.y0 / TILE_SIZE; + ix1 = bbox.x1 / TILE_SIZE; + iy1 = bbox.y1 / TILE_SIZE; /* * Clamp to framebuffer size -- cgit v1.2.3 From 14ac294e1a67577d3f2b63e77272ec472a93c224 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 25 Aug 2010 09:55:07 -0700 Subject: llvmpipe: Remove unnecessary header. --- src/gallium/drivers/llvmpipe/lp_query.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c index 540eea7fd1..67fd797af2 100644 --- a/src/gallium/drivers/llvmpipe/lp_query.c +++ b/src/gallium/drivers/llvmpipe/lp_query.c @@ -37,7 +37,6 @@ #include "lp_flush.h" #include "lp_fence.h" #include "lp_query.h" -#include "lp_rast.h" #include "lp_state.h" -- cgit v1.2.3 From d4bfd8a24a4c2b246b55888d4983ddcf665b6976 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 25 Aug 2010 18:01:51 +0100 Subject: llvmpipe: fix bad patch application --- src/gallium/drivers/llvmpipe/lp_setup.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 0c6d2de193..9aa6c4bf38 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -815,6 +815,10 @@ lp_setup_update_state( struct lp_setup_context *setup ) for (i = 0; i < Elements(setup->fs.current_tex); i++) { if (setup->fs.current_tex[i]) lp_scene_add_resource_reference(scene, setup->fs.current_tex[i]); + } + } + } + if (setup->dirty & LP_SETUP_NEW_SCISSOR) { setup->draw_region = setup->framebuffer; if (setup->scissor_test) { @@ -823,11 +827,6 @@ lp_setup_update_state( struct lp_setup_context *setup ) } } - - } - } - } - setup->dirty = 0; assert(setup->fs.stored); -- cgit v1.2.3 From 4418a493c2466e734e1ca5ace51535d1dbcf8a46 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 26 Aug 2010 11:45:25 -0600 Subject: llvmpipe: fix PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS query Fixes crashes in glean glsl1 and demos/src/glsl/vert-tex. See comments for details. --- src/gallium/drivers/llvmpipe/lp_screen.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 9b7e0d51cd..1e65a91fc6 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -89,7 +89,14 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: return PIPE_MAX_SAMPLERS; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return PIPE_MAX_VERTEX_SAMPLERS; + /* At this time, the draw module and llvmpipe driver only + * support vertex shader texture lookups when LLVM is enabled in + * the draw module. + */ + if (debug_get_bool_option("DRAW_USE_LLVM", TRUE)) + return PIPE_MAX_VERTEX_SAMPLERS; + else + return 0; case PIPE_CAP_MAX_COMBINED_SAMPLERS: return PIPE_MAX_SAMPLERS + PIPE_MAX_VERTEX_SAMPLERS; case PIPE_CAP_NPOT_TEXTURES: -- cgit v1.2.3 From 0be0ad5d58806bc12ec2c7bb3c00e7f8c7a6d6c4 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 22 Aug 2010 10:57:12 +0100 Subject: llvmpipe: intrinsics version of triangle coeficient calculation Looks nice, but makes almost no impact on performance - maybe a percent or so in isosurf, nothing elsewhere. May be of use later on. --- src/gallium/drivers/llvmpipe/SConscript | 2 + src/gallium/drivers/llvmpipe/lp_setup_coef.c | 258 +++++++++++++++ src/gallium/drivers/llvmpipe/lp_setup_coef.h | 61 ++++ .../drivers/llvmpipe/lp_setup_coef_intrin.c | 208 ++++++++++++ src/gallium/drivers/llvmpipe/lp_setup_tri.c | 348 +++------------------ 5 files changed, 577 insertions(+), 300 deletions(-) create mode 100644 src/gallium/drivers/llvmpipe/lp_setup_coef.c create mode 100644 src/gallium/drivers/llvmpipe/lp_setup_coef.h create mode 100644 src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 5583fca38e..8d57db72cf 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -63,6 +63,8 @@ llvmpipe = env.ConvenienceLibrary( 'lp_setup_line.c', 'lp_setup_point.c', 'lp_setup_tri.c', + 'lp_setup_coef.c', + 'lp_setup_coef_intrin.c', 'lp_setup_vbuf.c', 'lp_state_blend.c', 'lp_state_clip.c', diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.c b/src/gallium/drivers/llvmpipe/lp_setup_coef.c new file mode 100644 index 0000000000..95e3e8fffe --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_coef.c @@ -0,0 +1,258 @@ +/************************************************************************** + * + * Copyright 2010, VMware. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Binning code for triangles + */ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "lp_perf.h" +#include "lp_setup_context.h" +#include "lp_setup_coef.h" +#include "lp_rast.h" +#include "lp_state_fs.h" + +#if !defined(PIPE_ARCH_SSE) + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + */ +static void constant_coef( struct lp_rast_shader_inputs *inputs, + unsigned slot, + const float value, + unsigned i ) +{ + inputs->a0[slot][i] = value; + inputs->dadx[slot][i] = 0.0f; + inputs->dady[slot][i] = 0.0f; +} + + + +static void linear_coef( struct lp_rast_shader_inputs *inputs, + const struct lp_tri_info *info, + unsigned slot, + unsigned vert_attr, + unsigned i) +{ + float a0 = info->v0[vert_attr][i]; + float a1 = info->v1[vert_attr][i]; + float a2 = info->v2[vert_attr][i]; + + float da01 = a0 - a1; + float da20 = a2 - a0; + float dadx = (da01 * info->dy20_ooa - info->dy01_ooa * da20); + float dady = (da20 * info->dx01_ooa - info->dx20_ooa * da01); + + inputs->dadx[slot][i] = dadx; + inputs->dady[slot][i] = dady; + + /* calculate a0 as the value which would be sampled for the + * fragment at (0,0), taking into account that we want to sample at + * pixel centers, in other words (0.5, 0.5). + * + * this is neat but unfortunately not a good way to do things for + * triangles with very large values of dadx or dady as it will + * result in the subtraction and re-addition from a0 of a very + * large number, which means we'll end up loosing a lot of the + * fractional bits and precision from a0. the way to fix this is + * to define a0 as the sample at a pixel center somewhere near vmin + * instead - i'll switch to this later. + */ + inputs->a0[slot][i] = a0 - (dadx * info->x0_center + + dady * info->y0_center); +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void perspective_coef( struct lp_rast_shader_inputs *inputs, + const struct lp_tri_info *info, + unsigned slot, + unsigned vert_attr, + unsigned i) +{ + /* premultiply by 1/w (v[0][3] is always 1/w): + */ + float a0 = info->v0[vert_attr][i] * info->v0[0][3]; + float a1 = info->v1[vert_attr][i] * info->v1[0][3]; + float a2 = info->v2[vert_attr][i] * info->v2[0][3]; + float da01 = a0 - a1; + float da20 = a2 - a0; + float dadx = da01 * info->dy20_ooa - info->dy01_ooa * da20; + float dady = da20 * info->dx01_ooa - info->dx20_ooa * da01; + + inputs->dadx[slot][i] = dadx; + inputs->dady[slot][i] = dady; + inputs->a0[slot][i] = a0 - (dadx * info->x0_center + + dady * info->y0_center); +} + + +/** + * Special coefficient setup for gl_FragCoord. + * X and Y are trivial + * Z and W are copied from position_coef which should have already been computed. + * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. + */ +static void +setup_fragcoord_coef(struct lp_rast_shader_inputs *inputs, + const struct lp_tri_info *info, + unsigned slot, + unsigned usage_mask) +{ + /*X*/ + if (usage_mask & TGSI_WRITEMASK_X) { + inputs->a0[slot][0] = 0.0; + inputs->dadx[slot][0] = 1.0; + inputs->dady[slot][0] = 0.0; + } + + /*Y*/ + if (usage_mask & TGSI_WRITEMASK_Y) { + inputs->a0[slot][1] = 0.0; + inputs->dadx[slot][1] = 0.0; + inputs->dady[slot][1] = 1.0; + } + + /*Z*/ + if (usage_mask & TGSI_WRITEMASK_Z) { + linear_coef(inputs, info, slot, 0, 2); + } + + /*W*/ + if (usage_mask & TGSI_WRITEMASK_W) { + linear_coef(inputs, info, slot, 0, 3); + } +} + + +/** + * Setup the fragment input attribute with the front-facing value. + * \param frontface is the triangle front facing? + */ +static void setup_facing_coef( struct lp_rast_shader_inputs *inputs, + unsigned slot, + boolean frontface, + unsigned usage_mask) +{ + /* convert TRUE to 1.0 and FALSE to -1.0 */ + if (usage_mask & TGSI_WRITEMASK_X) + constant_coef( inputs, slot, 2.0f * frontface - 1.0f, 0 ); + + if (usage_mask & TGSI_WRITEMASK_Y) + constant_coef( inputs, slot, 0.0f, 1 ); /* wasted */ + + if (usage_mask & TGSI_WRITEMASK_Z) + constant_coef( inputs, slot, 0.0f, 2 ); /* wasted */ + + if (usage_mask & TGSI_WRITEMASK_W) + constant_coef( inputs, slot, 0.0f, 3 ); /* wasted */ +} + + +/** + * Compute the tri->coef[] array dadx, dady, a0 values. + */ +void lp_setup_tri_coef( struct lp_setup_context *setup, + struct lp_rast_shader_inputs *inputs, + const struct lp_tri_info *info) +{ + unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; + unsigned slot; + unsigned i; + + /* setup interpolation for all the remaining attributes: + */ + for (slot = 0; slot < setup->fs.nr_inputs; slot++) { + unsigned vert_attr = setup->fs.input[slot].src_index; + unsigned usage_mask = setup->fs.input[slot].usage_mask; + + switch (setup->fs.input[slot].interp) { + case LP_INTERP_CONSTANT: + if (setup->flatshade_first) { + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + constant_coef(inputs, slot+1, info->v0[vert_attr][i], i); + } + else { + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + constant_coef(inputs, slot+1, info->v2[vert_attr][i], i); + } + break; + + case LP_INTERP_LINEAR: + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + linear_coef(inputs, info, slot+1, vert_attr, i); + break; + + case LP_INTERP_PERSPECTIVE: + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + perspective_coef(inputs, info, slot+1, vert_attr, i); + fragcoord_usage_mask |= TGSI_WRITEMASK_W; + break; + + case LP_INTERP_POSITION: + /* + * The generated pixel interpolators will pick up the coeffs from + * slot 0, so all need to ensure that the usage mask is covers all + * usages. + */ + fragcoord_usage_mask |= usage_mask; + break; + + case LP_INTERP_FACING: + setup_facing_coef(inputs, slot+1, info->frontfacing, usage_mask); + break; + + default: + assert(0); + } + } + + /* The internal position input is in slot zero: + */ + setup_fragcoord_coef(inputs, info, 0, fragcoord_usage_mask); +} + +#else +extern void lp_setup_coef_dummy(void); +void lp_setup_coef_dummy(void) +{ +} + +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.h b/src/gallium/drivers/llvmpipe/lp_setup_coef.h new file mode 100644 index 0000000000..d68b39c603 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_coef.h @@ -0,0 +1,61 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * The setup code is concerned with point/line/triangle setup and + * putting commands/data into the bins. + */ + + +#ifndef LP_SETUP_COEF_H +#define LP_SETUP_COEF_H + + +struct lp_tri_info { + + float x0_center; + float y0_center; + + /* turn these into an aligned float[4] */ + float dy01_ooa; + float dy20_ooa; + float dx01_ooa; + float dx20_ooa; + + const float (*v0)[4]; + const float (*v1)[4]; + const float (*v2)[4]; + + boolean frontfacing; /* remove eventually */ +}; + +void lp_setup_tri_coef( struct lp_setup_context *setup, + struct lp_rast_shader_inputs *inputs, + const struct lp_tri_info *info); + +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c b/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c new file mode 100644 index 0000000000..b477bc2113 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c @@ -0,0 +1,208 @@ +/************************************************************************** + * + * Copyright 2010 VMware. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Binning code for triangles + */ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "lp_perf.h" +#include "lp_setup_context.h" +#include "lp_setup_coef.h" +#include "lp_rast.h" +#include "lp_state_fs.h" + +#if defined(PIPE_ARCH_SSE) +#include + + +static void constant_coef4( struct lp_rast_shader_inputs *inputs, + const struct lp_tri_info *info, + unsigned slot, + const float *attr) +{ + *(__m128 *)inputs->a0[slot] = *(__m128 *)attr; + *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0); + *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0); +} + + + +/** + * Setup the fragment input attribute with the front-facing value. + * \param frontface is the triangle front facing? + */ +static void setup_facing_coef( struct lp_rast_shader_inputs *inputs, + const struct lp_tri_info *info, + unsigned slot ) +{ + /* XXX: just pass frontface directly to the shader, don't bother + * treating it as an input. + */ + __m128 a0 = _mm_setr_ps(info->frontfacing ? 1.0 : -1.0, + 0, 0, 0); + + *(__m128 *)inputs->a0[slot] = a0; + *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0); + *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0); +} + + + +static void calc_coef4( struct lp_rast_shader_inputs *inputs, + const struct lp_tri_info *info, + unsigned slot, + __m128 a0, + __m128 a1, + __m128 a2) +{ + __m128 da01 = _mm_sub_ps(a0, a1); + __m128 da20 = _mm_sub_ps(a2, a0); + + __m128 da01_dy20_ooa = _mm_mul_ps(da01, _mm_set1_ps(info->dy20_ooa)); + __m128 da20_dy01_ooa = _mm_mul_ps(da20, _mm_set1_ps(info->dy01_ooa)); + __m128 dadx = _mm_sub_ps(da01_dy20_ooa, da20_dy01_ooa); + + __m128 da01_dx20_ooa = _mm_mul_ps(da01, _mm_set1_ps(info->dx20_ooa)); + __m128 da20_dx01_ooa = _mm_mul_ps(da20, _mm_set1_ps(info->dx01_ooa)); + __m128 dady = _mm_sub_ps(da20_dx01_ooa, da01_dx20_ooa); + + __m128 dadx_x0 = _mm_mul_ps(dadx, _mm_set1_ps(info->x0_center)); + __m128 dady_y0 = _mm_mul_ps(dady, _mm_set1_ps(info->y0_center)); + __m128 attr_v0 = _mm_add_ps(dadx_x0, dady_y0); + __m128 attr_0 = _mm_sub_ps(a0, attr_v0); + + *(__m128 *)inputs->a0[slot] = attr_0; + *(__m128 *)inputs->dadx[slot] = dadx; + *(__m128 *)inputs->dady[slot] = dady; +} + + +static void linear_coef( struct lp_rast_shader_inputs *inputs, + const struct lp_tri_info *info, + unsigned slot, + unsigned vert_attr) +{ + __m128 a0 = *(const __m128 *)info->v0[vert_attr]; + __m128 a1 = *(const __m128 *)info->v1[vert_attr]; + __m128 a2 = *(const __m128 *)info->v2[vert_attr]; + + calc_coef4(inputs, info, slot, a0, a1, a2); +} + + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void perspective_coef( struct lp_rast_shader_inputs *inputs, + const struct lp_tri_info *info, + unsigned slot, + unsigned vert_attr) +{ + /* premultiply by 1/w (v[0][3] is always 1/w): + */ + __m128 a0 = *(const __m128 *)info->v0[vert_attr]; + __m128 a1 = *(const __m128 *)info->v1[vert_attr]; + __m128 a2 = *(const __m128 *)info->v2[vert_attr]; + + __m128 a0_oow = _mm_mul_ps(a0, _mm_set1_ps(info->v0[0][3])); + __m128 a1_oow = _mm_mul_ps(a1, _mm_set1_ps(info->v1[0][3])); + __m128 a2_oow = _mm_mul_ps(a2, _mm_set1_ps(info->v2[0][3])); + + calc_coef4(inputs, info, slot, a0_oow, a1_oow, a2_oow); +} + + + + + +/** + * Compute the inputs-> dadx, dady, a0 values. + */ +void lp_setup_tri_coef( struct lp_setup_context *setup, + struct lp_rast_shader_inputs *inputs, + const struct lp_tri_info *info) +{ + unsigned slot; + + /* The internal position input is in slot zero: + */ + linear_coef(inputs, info, 0, 0); + + /* setup interpolation for all the remaining attributes: + */ + for (slot = 0; slot < setup->fs.nr_inputs; slot++) { + unsigned vert_attr = setup->fs.input[slot].src_index; + + switch (setup->fs.input[slot].interp) { + case LP_INTERP_CONSTANT: + if (setup->flatshade_first) { + constant_coef4(inputs, info, slot+1, info->v0[vert_attr]); + } + else { + constant_coef4(inputs, info, slot+1, info->v2[vert_attr]); + } + break; + + case LP_INTERP_LINEAR: + linear_coef(inputs, info, slot+1, vert_attr); + break; + + case LP_INTERP_PERSPECTIVE: + perspective_coef(inputs, info, slot+1, vert_attr); + break; + + case LP_INTERP_POSITION: + /* + * The generated pixel interpolators will pick up the coeffs from + * slot 0. + */ + break; + + case LP_INTERP_FACING: + setup_facing_coef(inputs, info, slot+1); + break; + + default: + assert(0); + } + } +} + +#else +extern void lp_setup_coef_dummy(void); +void lp_setup_coef_dummy(void) +{ +} +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index fe5c9358dd..d86fb8652a 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -34,33 +34,12 @@ #include "util/u_rect.h" #include "lp_perf.h" #include "lp_setup_context.h" +#include "lp_setup_coef.h" #include "lp_rast.h" #include "lp_state_fs.h" #define NUM_CHANNELS 4 -struct tri_info { - - float pixel_offset; - - /* fixed point vertex coordinates */ - int x[3]; - int y[3]; - - /* float x,y deltas - all from the original coordinates - */ - float dy01, dy20; - float dx01, dx20; - float oneoverarea; - - const float (*v0)[4]; - const float (*v1)[4]; - const float (*v2)[4]; - - boolean frontfacing; -}; - - static INLINE int @@ -77,247 +56,6 @@ fixed_to_float(int a) -/** - * Compute a0 for a constant-valued coefficient (GL_FLAT shading). - */ -static void constant_coef( struct lp_rast_triangle *tri, - unsigned slot, - const float value, - unsigned i ) -{ - tri->inputs.a0[slot][i] = value; - tri->inputs.dadx[slot][i] = 0.0f; - tri->inputs.dady[slot][i] = 0.0f; -} - - - -static void linear_coef( struct lp_rast_triangle *tri, - const struct tri_info *info, - unsigned slot, - unsigned vert_attr, - unsigned i) -{ - float a0 = info->v0[vert_attr][i]; - float a1 = info->v1[vert_attr][i]; - float a2 = info->v2[vert_attr][i]; - - float da01 = a0 - a1; - float da20 = a2 - a0; - float dadx = (da01 * info->dy20 - info->dy01 * da20) * info->oneoverarea; - float dady = (da20 * info->dx01 - info->dx20 * da01) * info->oneoverarea; - - tri->inputs.dadx[slot][i] = dadx; - tri->inputs.dady[slot][i] = dady; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - tri->inputs.a0[slot][i] = (a0 - - (dadx * (info->v0[0][0] - info->pixel_offset) + - dady * (info->v0[0][1] - info->pixel_offset))); -} - - -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a triangle. - * We basically multiply the vertex value by 1/w before computing - * the plane coefficients (a0, dadx, dady). - * Later, when we compute the value at a particular fragment position we'll - * divide the interpolated value by the interpolated W at that fragment. - */ -static void perspective_coef( struct lp_rast_triangle *tri, - const struct tri_info *info, - unsigned slot, - unsigned vert_attr, - unsigned i) -{ - /* premultiply by 1/w (v[0][3] is always 1/w): - */ - float a0 = info->v0[vert_attr][i] * info->v0[0][3]; - float a1 = info->v1[vert_attr][i] * info->v1[0][3]; - float a2 = info->v2[vert_attr][i] * info->v2[0][3]; - float da01 = a0 - a1; - float da20 = a2 - a0; - float dadx = (da01 * info->dy20 - info->dy01 * da20) * info->oneoverarea; - float dady = (da20 * info->dx01 - info->dx20 * da01) * info->oneoverarea; - - tri->inputs.dadx[slot][i] = dadx; - tri->inputs.dady[slot][i] = dady; - tri->inputs.a0[slot][i] = (a0 - - (dadx * (info->v0[0][0] - info->pixel_offset) + - dady * (info->v0[0][1] - info->pixel_offset))); -} - - -/** - * Special coefficient setup for gl_FragCoord. - * X and Y are trivial - * Z and W are copied from position_coef which should have already been computed. - * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. - */ -static void -setup_fragcoord_coef(struct lp_rast_triangle *tri, - const struct tri_info *info, - unsigned slot, - unsigned usage_mask) -{ - /*X*/ - if (usage_mask & TGSI_WRITEMASK_X) { - tri->inputs.a0[slot][0] = 0.0; - tri->inputs.dadx[slot][0] = 1.0; - tri->inputs.dady[slot][0] = 0.0; - } - - /*Y*/ - if (usage_mask & TGSI_WRITEMASK_Y) { - tri->inputs.a0[slot][1] = 0.0; - tri->inputs.dadx[slot][1] = 0.0; - tri->inputs.dady[slot][1] = 1.0; - } - - /*Z*/ - if (usage_mask & TGSI_WRITEMASK_Z) { - linear_coef(tri, info, slot, 0, 2); - } - - /*W*/ - if (usage_mask & TGSI_WRITEMASK_W) { - linear_coef(tri, info, slot, 0, 3); - } -} - - -/** - * Setup the fragment input attribute with the front-facing value. - * \param frontface is the triangle front facing? - */ -static void setup_facing_coef( struct lp_rast_triangle *tri, - unsigned slot, - boolean frontface, - unsigned usage_mask) -{ - /* convert TRUE to 1.0 and FALSE to -1.0 */ - if (usage_mask & TGSI_WRITEMASK_X) - constant_coef( tri, slot, 2.0f * frontface - 1.0f, 0 ); - - if (usage_mask & TGSI_WRITEMASK_Y) - constant_coef( tri, slot, 0.0f, 1 ); /* wasted */ - - if (usage_mask & TGSI_WRITEMASK_Z) - constant_coef( tri, slot, 0.0f, 2 ); /* wasted */ - - if (usage_mask & TGSI_WRITEMASK_W) - constant_coef( tri, slot, 0.0f, 3 ); /* wasted */ -} - - -/** - * Compute the tri->coef[] array dadx, dady, a0 values. - */ -static void setup_tri_coefficients( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, - const struct tri_info *info) -{ - unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; - unsigned slot; - unsigned i; - - /* setup interpolation for all the remaining attributes: - */ - for (slot = 0; slot < setup->fs.nr_inputs; slot++) { - unsigned vert_attr = setup->fs.input[slot].src_index; - unsigned usage_mask = setup->fs.input[slot].usage_mask; - - switch (setup->fs.input[slot].interp) { - case LP_INTERP_CONSTANT: - if (setup->flatshade_first) { - for (i = 0; i < NUM_CHANNELS; i++) - if (usage_mask & (1 << i)) - constant_coef(tri, slot+1, info->v0[vert_attr][i], i); - } - else { - for (i = 0; i < NUM_CHANNELS; i++) - if (usage_mask & (1 << i)) - constant_coef(tri, slot+1, info->v2[vert_attr][i], i); - } - break; - - case LP_INTERP_LINEAR: - for (i = 0; i < NUM_CHANNELS; i++) - if (usage_mask & (1 << i)) - linear_coef(tri, info, slot+1, vert_attr, i); - break; - - case LP_INTERP_PERSPECTIVE: - for (i = 0; i < NUM_CHANNELS; i++) - if (usage_mask & (1 << i)) - perspective_coef(tri, info, slot+1, vert_attr, i); - fragcoord_usage_mask |= TGSI_WRITEMASK_W; - break; - - case LP_INTERP_POSITION: - /* - * The generated pixel interpolators will pick up the coeffs from - * slot 0, so all need to ensure that the usage mask is covers all - * usages. - */ - fragcoord_usage_mask |= usage_mask; - break; - - case LP_INTERP_FACING: - setup_facing_coef(tri, slot+1, info->frontfacing, usage_mask); - break; - - default: - assert(0); - } - } - - /* The internal position input is in slot zero: - */ - setup_fragcoord_coef(tri, info, 0, fragcoord_usage_mask); - - if (0) { - for (i = 0; i < NUM_CHANNELS; i++) { - float a0 = tri->inputs.a0 [0][i]; - float dadx = tri->inputs.dadx[0][i]; - float dady = tri->inputs.dady[0][i]; - - debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n", - "xyzw"[i], - a0, dadx, dady); - } - - for (slot = 0; slot < setup->fs.nr_inputs; slot++) { - unsigned usage_mask = setup->fs.input[slot].usage_mask; - for (i = 0; i < NUM_CHANNELS; i++) { - if (usage_mask & (1 << i)) { - float a0 = tri->inputs.a0 [1 + slot][i]; - float dadx = tri->inputs.dadx[1 + slot][i]; - float dady = tri->inputs.dady[1 + slot][i]; - - debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n", - slot, - "xyzw"[i], - a0, dadx, dady); - } - } - } - } -} - - @@ -440,16 +178,21 @@ lp_rast_cmd lp_rast_tri_tab[8] = { */ static void do_triangle_ccw(struct lp_setup_context *setup, + const float (*v0)[4], const float (*v1)[4], const float (*v2)[4], - const float (*v3)[4], boolean frontfacing ) { struct lp_scene *scene = lp_setup_get_current_scene(setup); struct lp_fragment_shader_variant *variant = setup->fs.current.variant; struct lp_rast_triangle *tri; - struct tri_info info; + int x[3]; + int y[3]; + float dy01, dy20; + float dx01, dx20; + float oneoverarea; + struct lp_tri_info info; int area; struct u_rect bbox; int ix0, ix1, iy0, iy1; @@ -458,7 +201,7 @@ do_triangle_ccw(struct lp_setup_context *setup, int nr_planes = 3; if (0) - lp_setup_print_triangle(setup, v1, v2, v3); + lp_setup_print_triangle(setup, v0, v1, v2); if (setup->scissor_test) { nr_planes = 7; @@ -468,13 +211,12 @@ do_triangle_ccw(struct lp_setup_context *setup, } /* x/y positions in fixed point */ - info.x[0] = subpixel_snap(v1[0][0] - setup->pixel_offset); - info.x[1] = subpixel_snap(v2[0][0] - setup->pixel_offset); - info.x[2] = subpixel_snap(v3[0][0] - setup->pixel_offset); - info.y[0] = subpixel_snap(v1[0][1] - setup->pixel_offset); - info.y[1] = subpixel_snap(v2[0][1] - setup->pixel_offset); - info.y[2] = subpixel_snap(v3[0][1] - setup->pixel_offset); - + x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset); + x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset); + x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset); + y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset); + y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset); + y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset); /* Bounding rectangle (in pixels) */ @@ -486,10 +228,10 @@ do_triangle_ccw(struct lp_setup_context *setup, */ int adj = (setup->pixel_offset != 0) ? 1 : 0; - bbox.x0 = (MIN3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; - bbox.x1 = (MAX3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; - bbox.y0 = (MIN3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; - bbox.y1 = (MAX3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + bbox.x0 = (MIN3(x[0], x[1], x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; + bbox.x1 = (MAX3(x[0], x[1], x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; + bbox.y0 = (MIN3(y[0], y[1], y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + bbox.y1 = (MAX3(y[0], y[1], y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; /* Inclusive coordinates: */ @@ -520,21 +262,21 @@ do_triangle_ccw(struct lp_setup_context *setup, return; #ifdef DEBUG - tri->v[0][0] = v1[0][0]; - tri->v[1][0] = v2[0][0]; - tri->v[2][0] = v3[0][0]; - tri->v[0][1] = v1[0][1]; - tri->v[1][1] = v2[0][1]; - tri->v[2][1] = v3[0][1]; + tri->v[0][0] = v0[0][0]; + tri->v[1][0] = v1[0][0]; + tri->v[2][0] = v2[0][0]; + tri->v[0][1] = v0[0][1]; + tri->v[1][1] = v1[0][1]; + tri->v[2][1] = v2[0][1]; #endif - tri->plane[0].dcdy = info.x[0] - info.x[1]; - tri->plane[1].dcdy = info.x[1] - info.x[2]; - tri->plane[2].dcdy = info.x[2] - info.x[0]; + tri->plane[0].dcdy = x[0] - x[1]; + tri->plane[1].dcdy = x[1] - x[2]; + tri->plane[2].dcdy = x[2] - x[0]; - tri->plane[0].dcdx = info.y[0] - info.y[1]; - tri->plane[1].dcdx = info.y[1] - info.y[2]; - tri->plane[2].dcdx = info.y[2] - info.y[0]; + tri->plane[0].dcdx = y[0] - y[1]; + tri->plane[1].dcdx = y[1] - y[2]; + tri->plane[2].dcdx = y[2] - y[0]; area = (tri->plane[0].dcdy * tri->plane[2].dcdx - tri->plane[2].dcdy * tri->plane[0].dcdx); @@ -554,20 +296,26 @@ do_triangle_ccw(struct lp_setup_context *setup, /* */ - info.pixel_offset = setup->pixel_offset; - info.v0 = v1; - info.v1 = v2; - info.v2 = v3; - info.dx01 = info.v0[0][0] - info.v1[0][0]; - info.dx20 = info.v2[0][0] - info.v0[0][0]; - info.dy01 = info.v0[0][1] - info.v1[0][1]; - info.dy20 = info.v2[0][1] - info.v0[0][1]; - info.oneoverarea = 1.0f / (info.dx01 * info.dy20 - info.dx20 * info.dy01); + dx01 = v0[0][0] - v1[0][0]; + dy01 = v0[0][1] - v1[0][1]; + dx20 = v2[0][0] - v0[0][0]; + dy20 = v2[0][1] - v0[0][1]; + oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01); + + info.v0 = v0; + info.v1 = v1; + info.v2 = v2; info.frontfacing = frontfacing; + info.x0_center = v0[0][0] - setup->pixel_offset; + info.y0_center = v0[0][1] - setup->pixel_offset; + info.dx01_ooa = dx01 * oneoverarea; + info.dx20_ooa = dx20 * oneoverarea; + info.dy01_ooa = dy01 * oneoverarea; + info.dy20_ooa = dy20 * oneoverarea; /* Setup parameter interpolants: */ - setup_tri_coefficients( setup, tri, &info ); + lp_setup_tri_coef( setup, &tri->inputs, &info ); tri->inputs.facing = frontfacing ? 1.0F : -1.0F; tri->inputs.state = setup->fs.stored; @@ -580,7 +328,7 @@ do_triangle_ccw(struct lp_setup_context *setup, /* half-edge constants, will be interated over the whole render * target. */ - plane->c = plane->dcdx * info.x[i] - plane->dcdy * info.y[i]; + plane->c = plane->dcdx * x[i] - plane->dcdy * y[i]; /* correct for top-left vs. bottom-left fill convention. * -- cgit v1.2.3 From c95ca04b63eadb61add249531c1041aaf5b525d6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 27 Aug 2010 11:18:11 +0100 Subject: llvmpipe: add lp_setup_coef to makefile --- src/gallium/drivers/llvmpipe/Makefile | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 2892b62920..dec874623e 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -27,6 +27,8 @@ C_SOURCES = \ lp_scene_queue.c \ lp_screen.c \ lp_setup.c \ + lp_setup_coef.c \ + lp_setup_coef_intrin.c \ lp_setup_line.c \ lp_setup_point.c \ lp_setup_tri.c \ -- cgit v1.2.3 From 5286dd701640976ffc328e8e85fb3830746851a1 Mon Sep 17 00:00:00 2001 From: Hui Qi Tay Date: Mon, 19 Jul 2010 15:23:09 +0100 Subject: llvmpipe: native rasterization for lines Rasterize lines directly by treating them as 4-sided polygons. Still need to check the exact pixel rasteration. --- src/gallium/drivers/llvmpipe/lp_context.c | 2 +- src/gallium/drivers/llvmpipe/lp_rast.h | 4 +- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 4 + src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 2 +- src/gallium/drivers/llvmpipe/lp_setup.c | 7 + src/gallium/drivers/llvmpipe/lp_setup.h | 4 + src/gallium/drivers/llvmpipe/lp_setup_context.h | 38 ++ src/gallium/drivers/llvmpipe/lp_setup_line.c | 617 ++++++++++++++++++++- src/gallium/drivers/llvmpipe/lp_setup_point.c | 4 +- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 23 +- src/gallium/drivers/llvmpipe/lp_state_rasterizer.c | 2 + 11 files changed, 685 insertions(+), 22 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 086a2d5898..a6873abbee 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -157,7 +157,7 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) /* convert points and lines into triangles: */ draw_wide_point_threshold(llvmpipe->draw, 0.0); - draw_wide_line_threshold(llvmpipe->draw, 0.0); + draw_wide_line_threshold(llvmpipe->draw, 10000.0); #if USE_DRAW_STAGE_PSTIPPLE /* Do polygon stipple w/ texture map + frag prog? */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 102e902d02..b4564ef33b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -120,7 +120,7 @@ struct lp_rast_triangle { float v[3][2]; #endif - struct lp_rast_plane plane[7]; /* NOTE: may allocate fewer planes */ + struct lp_rast_plane plane[8]; /* NOTE: may allocate fewer planes */ }; @@ -236,6 +236,8 @@ void lp_rast_triangle_6( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); void lp_rast_triangle_7( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); +void lp_rast_triangle_8( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); void lp_rast_shade_tile( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 673f67386b..8d729c7481 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -157,6 +157,10 @@ build_mask_linear(int c, int dcdx, int dcdy) #define NR_PLANES 7 #include "lp_rast_tri_tmp.h" +#define TAG(x) x##_8 +#define NR_PLANES 8 +#include "lp_rast_tri_tmp.h" + /* Special case for 3 plane triangle which is contained entirely * within a 16x16 block. diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index 70a4b64c8d..0def5f7243 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -32,7 +32,7 @@ /** - * Prototype for a 7 plane rasterizer function. Will codegenerate + * Prototype for a 8 plane rasterizer function. Will codegenerate * several of these. * * XXX: Varients for more/fewer planes. diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 9aa6c4bf38..4c8275665e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -485,7 +485,14 @@ lp_setup_set_triangle_state( struct lp_setup_context *setup, } } +void +lp_setup_set_line_state( struct lp_setup_context *setup, + float line_width) +{ + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + setup->line_width = line_width; +} void lp_setup_set_fs_inputs( struct lp_setup_context *setup, diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index a41bb8863b..693550b8c8 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -100,6 +100,10 @@ lp_setup_set_triangle_state( struct lp_setup_context *setup, boolean scissor, boolean gl_rasterization_rules ); +void +lp_setup_set_line_state( struct lp_setup_context *setup, + float line_width); + void lp_setup_set_fs_inputs( struct lp_setup_context *setup, const struct lp_shader_input *interp, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 1a147e0353..a4838d59a5 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -91,6 +91,7 @@ struct lp_setup_context boolean scissor_test; unsigned cullmode; float pixel_offset; + float line_width; struct pipe_framebuffer_state fb; struct u_rect framebuffer; @@ -170,5 +171,42 @@ lp_setup_print_vertex(struct lp_setup_context *setup, const char *name, const float (*v)[4]); +/** shared code between lp_setup_line and lp_setup_tri */ +extern lp_rast_cmd lp_rast_tri_tab[]; + +void +do_triangle_ccw_whole_tile(struct lp_setup_context *setup, + struct lp_scene *scene, + struct lp_rast_triangle *tri, + int x, int y, + boolean opaque, + int *is_blit); + + +void +lp_setup_tri_coefficients( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + float oneoverarea, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + boolean frontface); + +struct lp_rast_triangle * +lp_setup_alloc_triangle(struct lp_scene *scene, + unsigned nr_inputs, + unsigned nr_planes, + unsigned *tri_size); + +void +lp_setup_fragcoord_coef(struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + float oneoverarea, + unsigned slot, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + unsigned usage_mask); + #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index be41c44e6f..930207ae33 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -29,19 +29,624 @@ * Binning code for lines */ +#include "util/u_math.h" +#include "util/u_memory.h" +#include "lp_perf.h" #include "lp_setup_context.h" +#include "lp_rast.h" +#include "lp_state_fs.h" -static void line_nop( struct lp_setup_context *setup, - const float (*v0)[4], - const float (*v1)[4] ) +#define NUM_CHANNELS 4 + + +static const int step_scissor_minx[16] = { + 0, 1, 0, 1, + 2, 3, 2, 3, + 0, 1, 0, 1, + 2, 3, 2, 3 +}; + +static const int step_scissor_maxx[16] = { + 0, -1, 0, -1, + -2, -3, -2, -3, + 0, -1, 0, -1, + -2, -3, -2, -3 +}; + +static const int step_scissor_miny[16] = { + 0, 0, 1, 1, + 0, 0, 1, 1, + 2, 2, 3, 3, + 2, 2, 3, 3 +}; + +static const int step_scissor_maxy[16] = { + 0, 0, -1, -1, + 0, 0, -1, -1, + -2, -2, -3, -3, + -2, -2, -3, -3 +}; + + + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + */ +static void constant_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + unsigned slot, + const float value, + unsigned i ) +{ + tri->inputs.a0[slot][i] = value; + tri->inputs.dadx[slot][i] = 0.0f; + tri->inputs.dady[slot][i] = 0.0f; +} + + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a triangle. + */ +static void linear_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + float oneoverarea, + unsigned slot, + const float (*v1)[4], + const float (*v2)[4], + unsigned vert_attr, + unsigned i) +{ + float a1 = v1[vert_attr][i]; + float a2 = v2[vert_attr][i]; + + float da21 = a1 - a2; + float dadx = da21 * tri->dx * oneoverarea; + float dady = da21 * tri->dy * oneoverarea; + + tri->inputs.dadx[slot][i] = dadx; + tri->inputs.dady[slot][i] = dady; + + tri->inputs.a0[slot][i] = (a1 - + (dadx * (v1[0][0] - setup->pixel_offset) + + dady * (v1[0][1] - setup->pixel_offset))); +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void perspective_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + float oneoverarea, + unsigned slot, + const float (*v1)[4], + const float (*v2)[4], + unsigned vert_attr, + unsigned i) +{ + /* premultiply by 1/w (v[0][3] is always 1/w): + */ + float a1 = v1[vert_attr][i] * v1[0][3]; + float a2 = v2[vert_attr][i] * v2[0][3]; + + float da21 = a1 - a2; + float dadx = da21 * tri->dx * oneoverarea; + float dady = da21 * tri->dy * oneoverarea; + + tri->inputs.dadx[slot][i] = dadx; + tri->inputs.dady[slot][i] = dady; + + tri->inputs.a0[slot][i] = (a1 - + (dadx * (v1[0][0] - setup->pixel_offset) + + dady * (v1[0][1] - setup->pixel_offset))); +} + +/** + * Compute the tri->coef[] array dadx, dady, a0 values. + */ +static void setup_line_coefficients( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + float oneoverarea, + const float (*v1)[4], + const float (*v2)[4]) { + unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; + unsigned slot; + + /* setup interpolation for all the remaining attributes: + */ + for (slot = 0; slot < setup->fs.nr_inputs; slot++) { + unsigned vert_attr = setup->fs.input[slot].src_index; + unsigned usage_mask = setup->fs.input[slot].usage_mask; + unsigned i; + + switch (setup->fs.input[slot].interp) { + case LP_INTERP_CONSTANT: + if (setup->flatshade_first) { + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + constant_coef(setup, tri, slot+1, v1[vert_attr][i], i); + } + else { + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + constant_coef(setup, tri, slot+1, v2[vert_attr][i], i); + } + break; + + case LP_INTERP_LINEAR: + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + linear_coef(setup, tri, oneoverarea, slot+1, v1, v2, vert_attr, i); + break; + + case LP_INTERP_PERSPECTIVE: + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + perspective_coef(setup, tri, oneoverarea, slot+1, v1, v2, vert_attr, i); + fragcoord_usage_mask |= TGSI_WRITEMASK_W; + break; + + case LP_INTERP_POSITION: + /* + * The generated pixel interpolators will pick up the coeffs from + * slot 0, so all need to ensure that the usage mask is covers all + * usages. + */ + fragcoord_usage_mask |= usage_mask; + break; + + default: + assert(0); + } + } + + /* The internal position input is in slot zero: + */ + lp_setup_fragcoord_coef(setup, tri, oneoverarea, 0, v1, v2, v2, + fragcoord_usage_mask); } -void -lp_setup_choose_line( struct lp_setup_context *setup ) + +static INLINE int subpixel_snap( float a ) { - setup->line = line_nop; + return util_iround(FIXED_ONE * a); +} + + +/** + * Print line vertex attribs (for debug). + */ +static void +print_line(struct lp_setup_context *setup, + const float (*v1)[4], + const float (*v2)[4]) +{ + uint i; + + debug_printf("llvmpipe line\n"); + for (i = 0; i < 1 + setup->fs.nr_inputs; i++) { + debug_printf(" v1[%d]: %f %f %f %f\n", i, + v1[i][0], v1[i][1], v1[i][2], v1[i][3]); + } + for (i = 0; i < 1 + setup->fs.nr_inputs; i++) { + debug_printf(" v2[%d]: %f %f %f %f\n", i, + v2[i][0], v2[i][1], v2[i][2], v2[i][3]); + } +} + + +static void +lp_setup_line( struct lp_setup_context *setup, + const float (*v1)[4], + const float (*v2)[4]) +{ + struct lp_scene *scene = lp_setup_get_current_scene(setup); + struct lp_rast_triangle *line; + float oneoverarea; + float half_width = setup->line_width / 2; + int minx, maxx, miny, maxy; + int ix0, ix1, iy0, iy1; + unsigned tri_bytes; + int x[4]; + int y[4]; + int i; + int nr_planes = 4; + boolean opaque; + + if (0) + print_line(setup, v1, v2); + + if (setup->scissor_test) { + nr_planes = 8; + } + else { + nr_planes = 4; + } + + line = lp_setup_alloc_triangle(scene, + setup->fs.nr_inputs, + nr_planes, + &tri_bytes); + if (!line) + return; + +#ifndef DEBUG + line->v[0][0] = v1[0][0]; + line->v[1][0] = v2[0][0]; + line->v[0][1] = v1[0][1]; + line->v[1][1] = v2[0][1]; +#endif + + /* pre-calculation(based on given vertices) to determine if line is + * more horizontal or more vertical + */ + line->dx = v1[0][0] - v2[0][0]; + line->dy = v1[0][1] - v2[0][1]; + + /* x-major line */ + if (fabsf(line->dx) >= fabsf(line->dy)) { + if (line->dx < 0) { + /* if v2 is to the right of v1, swap pointers */ + const float (*temp)[4] = v1; + v1 = v2; + v2 = temp; + line->dx = -line->dx; + line->dy = -line->dy; + } + + /* x/y positions in fixed point */ + x[0] = subpixel_snap(v1[0][0] - setup->pixel_offset); + x[1] = subpixel_snap(v2[0][0] - setup->pixel_offset); + x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset); + x[3] = subpixel_snap(v1[0][0] - setup->pixel_offset); + + y[0] = subpixel_snap(v1[0][1] - half_width - setup->pixel_offset); + y[1] = subpixel_snap(v2[0][1] - half_width - setup->pixel_offset); + y[2] = subpixel_snap(v2[0][1] + half_width - setup->pixel_offset); + y[3] = subpixel_snap(v1[0][1] + half_width - setup->pixel_offset); + } + else{ + /* y-major line */ + if (line->dy > 0) { + /* if v2 is on top of v1, swap pointers */ + const float (*temp)[4] = v1; + v1 = v2; + v2 = temp; + line->dx = -line->dx; + line->dy = -line->dy; + } + + x[0] = subpixel_snap(v1[0][0] - half_width - setup->pixel_offset); + x[1] = subpixel_snap(v2[0][0] - half_width - setup->pixel_offset); + x[2] = subpixel_snap(v2[0][0] + half_width - setup->pixel_offset); + x[3] = subpixel_snap(v1[0][0] + half_width - setup->pixel_offset); + + y[0] = subpixel_snap(v1[0][1] - setup->pixel_offset); + y[1] = subpixel_snap(v2[0][1] - setup->pixel_offset); + y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset); + y[3] = subpixel_snap(v1[0][1] - setup->pixel_offset); + } + + /* calculate the deltas */ + line->plane[0].dcdy = x[0] - x[1]; + line->plane[1].dcdy = x[1] - x[2]; + line->plane[2].dcdy = x[2] - x[3]; + line->plane[3].dcdy = x[3] - x[0]; + + line->plane[0].dcdx = y[0] - y[1]; + line->plane[1].dcdx = y[1] - y[2]; + line->plane[2].dcdx = y[2] - y[3]; + line->plane[3].dcdx = y[3] - y[0]; + + + LP_COUNT(nr_tris); + + + /* Bounding rectangle (in pixels) */ + { + /* Yes this is necessary to accurately calculate bounding boxes + * with the two fill-conventions we support. GL (normally) ends + * up needing a bottom-left fill convention, which requires + * slightly different rounding. + */ + int adj = (setup->pixel_offset != 0) ? 1 : 0; + + minx = (MIN4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER; + maxx = (MAX4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER; + miny = (MIN4(y[0], y[1], y[3], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + maxy = (MAX4(y[0], y[1], y[3], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + } + + if (setup->scissor_test) { + minx = MAX2(minx, setup->scissor.current.minx); + maxx = MIN2(maxx, setup->scissor.current.maxx); + miny = MAX2(miny, setup->scissor.current.miny); + maxy = MIN2(maxy, setup->scissor.current.maxy); + } + else { + minx = MAX2(minx, 0); + miny = MAX2(miny, 0); + maxx = MIN2(maxx, scene->fb.width); + maxy = MIN2(maxy, scene->fb.height); + } + + + if (miny >= maxy || minx >= maxx) { + lp_scene_putback_data( scene, tri_bytes ); + return; + } + + oneoverarea = 1.0f / (line->dx * line->dx + line->dy * line->dy); + + /* Setup parameter interpolants: + */ + setup_line_coefficients( setup, line, oneoverarea, v1, v2); + + for (i = 0; i < 4; i++) { + struct lp_rast_plane *plane = &line->plane[i]; + + /* half-edge constants, will be interated over the whole render + * target. + */ + plane->c = plane->dcdx * x[i] - plane->dcdy * y[i]; + + + /* correct for top-left vs. bottom-left fill convention. + * + * note that we're overloading gl_rasterization_rules to mean + * both (0.5,0.5) pixel centers *and* bottom-left filling + * convention. + * + * GL actually has a top-left filling convention, but GL's + * notion of "top" differs from gallium's... + * + * Also, sometimes (in FBO cases) GL will render upside down + * to its usual method, in which case it will probably want + * to use the opposite, top-left convention. + */ + if (plane->dcdx < 0) { + /* both fill conventions want this - adjust for left edges */ + plane->c++; + } + else if (plane->dcdx == 0) { + if (setup->pixel_offset == 0) { + /* correct for top-left fill convention: + */ + if (plane->dcdy > 0) plane->c++; + } + else { + /* correct for bottom-left fill convention: + */ + if (plane->dcdy < 0) plane->c++; + } + } + + plane->dcdx *= FIXED_ONE; + plane->dcdy *= FIXED_ONE; + + /* find trivial reject offsets for each edge for a single-pixel + * sized block. These will be scaled up at each recursive level to + * match the active blocksize. Scaling in this way works best if + * the blocks are square. + */ + plane->eo = 0; + if (plane->dcdx < 0) plane->eo -= plane->dcdx; + if (plane->dcdy > 0) plane->eo += plane->dcdy; + + /* Calculate trivial accept offsets from the above. + */ + plane->ei = plane->dcdy - plane->dcdx - plane->eo; + + plane->step = line->step[i]; + + /* Fill in the inputs.step[][] arrays. + * We've manually unrolled some loops here. + */ +#define SETUP_STEP(j, x, y) \ + line->step[i][j] = y * plane->dcdy - x * plane->dcdx + + SETUP_STEP(0, 0, 0); + SETUP_STEP(1, 1, 0); + SETUP_STEP(2, 0, 1); + SETUP_STEP(3, 1, 1); + + SETUP_STEP(4, 2, 0); + SETUP_STEP(5, 3, 0); + SETUP_STEP(6, 2, 1); + SETUP_STEP(7, 3, 1); + + SETUP_STEP(8, 0, 2); + SETUP_STEP(9, 1, 2); + SETUP_STEP(10, 0, 3); + SETUP_STEP(11, 1, 3); + + SETUP_STEP(12, 2, 2); + SETUP_STEP(13, 3, 2); + SETUP_STEP(14, 2, 3); + SETUP_STEP(15, 3, 3); +#undef STEP + } + + + /* + * When rasterizing scissored tris, use the intersection of the + * triangle bounding box and the scissor rect to generate the + * scissor planes. + * + * This permits us to cut off the triangle "tails" that are present + * in the intermediate recursive levels caused when two of the + * triangles edges don't diverge quickly enough to trivially reject + * exterior blocks from the triangle. + * + * It's not really clear if it's worth worrying about these tails, + * but since we generate the planes for each scissored tri, it's + * free to trim them in this case. + * + * Note that otherwise, the scissor planes only vary in 'C' value, + * and even then only on state-changes. Could alternatively store + * these planes elsewhere. + */ + if (nr_planes == 8) { + line->plane[4].step = step_scissor_maxx; + line->plane[4].dcdx = 1; + line->plane[4].dcdy = 0; + line->plane[4].c = maxx; + line->plane[4].ei = -1; + line->plane[4].eo = 0; + + line->plane[5].step = step_scissor_miny; + line->plane[5].dcdx = 0; + line->plane[5].dcdy = 1; + line->plane[5].c = 1-miny; + line->plane[5].ei = 0; + line->plane[5].eo = 1; + + line->plane[6].step = step_scissor_maxy; + line->plane[6].dcdx = 0; + line->plane[6].dcdy = -1; + line->plane[6].c = maxy; + line->plane[6].ei = -1; + line->plane[6].eo = 0; + + line->plane[7].step = step_scissor_minx; + line->plane[7].dcdx = -1; + line->plane[7].dcdy = 0; + line->plane[7].c = 1-minx; + line->plane[7].ei = 0; + line->plane[7].eo = 1; + } + + + /* + * All fields of 'tri' are now set. The remaining code here is + * concerned with binning. + */ + + /* Convert to tile coordinates, and inclusive ranges: + */ + ix0 = minx / TILE_SIZE; + iy0 = miny / TILE_SIZE; + ix1 = (maxx-1) / TILE_SIZE; + iy1 = (maxy-1) / TILE_SIZE; + + /* + * Clamp to framebuffer size + */ + assert(ix0 == MAX2(ix0, 0)); + assert(iy0 == MAX2(iy0, 0)); + assert(ix1 == MIN2(ix1, scene->tiles_x - 1)); + assert(iy1 == MIN2(iy1, scene->tiles_y - 1)); + + /* Determine which tile(s) intersect the triangle's bounding box + */ + if (iy0 == iy1 && ix0 == ix1) + { + /* Triangle is contained in a single tile: + */ + lp_scene_bin_command( scene, ix0, iy0, + lp_rast_tri_tab[nr_planes], + lp_rast_arg_triangle(line, (1<plane[i].c + + line->plane[i].dcdy * iy0 * TILE_SIZE - + line->plane[i].dcdx * ix0 * TILE_SIZE); + + ei[i] = line->plane[i].ei << TILE_ORDER; + eo[i] = line->plane[i].eo << TILE_ORDER; + xstep[i] = -(line->plane[i].dcdx << TILE_ORDER); + ystep[i] = line->plane[i].dcdy << TILE_ORDER; + } + + + + /* Test tile-sized blocks against the triangle. + * Discard blocks fully outside the tri. If the block is fully + * contained inside the tri, bin an lp_rast_shade_tile command. + * Else, bin a lp_rast_triangle command. + */ + for (y = iy0; y <= iy1; y++) + { + boolean in = FALSE; /* are we inside the triangle? */ + int cx[8]; + + for (i = 0; i < nr_planes; i++) + cx[i] = c[i]; + + for (x = ix0; x <= ix1; x++) + { + int out = 0; + int partial = 0; + + for (i = 0; i < nr_planes; i++) { + int planeout = cx[i] + eo[i]; + int planepartial = cx[i] + ei[i] - 1; + out |= (planeout >> 31); + partial |= (planepartial >> 31) & (1<line = lp_setup_line; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c index 9f69e6c5ce..709c3e2fd2 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -31,7 +31,7 @@ #include "lp_setup_context.h" -static void point_nop( struct lp_setup_context *setup, +static void lp_setup_point( struct lp_setup_context *setup, const float (*v0)[4] ) { } @@ -40,7 +40,7 @@ static void point_nop( struct lp_setup_context *setup, void lp_setup_choose_point( struct lp_setup_context *setup ) { - setup->point = point_nop; + setup->point = lp_setup_point; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index d86fb8652a..212bb3ab90 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -68,11 +68,11 @@ fixed_to_float(int a) * \param nr_inputs number of fragment shader inputs * \return pointer to triangle space */ -static INLINE struct lp_rast_triangle * -alloc_triangle(struct lp_scene *scene, - unsigned nr_inputs, - unsigned nr_planes, - unsigned *tri_size) +struct lp_rast_triangle * +lp_setup_alloc_triangle(struct lp_scene *scene, + unsigned nr_inputs, + unsigned nr_planes, + unsigned *tri_size) { unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float); struct lp_rast_triangle *tri; @@ -160,7 +160,7 @@ lp_setup_print_triangle(struct lp_setup_context *setup, } -lp_rast_cmd lp_rast_tri_tab[8] = { +lp_rast_cmd lp_rast_tri_tab[9] = { NULL, /* should be impossible */ lp_rast_triangle_1, lp_rast_triangle_2, @@ -168,7 +168,8 @@ lp_rast_cmd lp_rast_tri_tab[8] = { lp_rast_triangle_4, lp_rast_triangle_5, lp_rast_triangle_6, - lp_rast_triangle_7 + lp_rast_triangle_7, + lp_rast_triangle_8 }; /** @@ -254,10 +255,10 @@ do_triangle_ccw(struct lp_setup_context *setup, u_rect_find_intersection(&setup->draw_region, &bbox); - tri = alloc_triangle(scene, - setup->fs.nr_inputs, - nr_planes, - &tri_bytes); + tri = lp_setup_alloc_triangle(scene, + setup->fs.nr_inputs, + nr_planes, + &tri_bytes); if (!tri) return; diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index afd3e0b21c..67b985aa24 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -73,6 +73,8 @@ llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, void *handle) llvmpipe->rasterizer->gl_rasterization_rules); lp_setup_set_flatshade_first( llvmpipe->setup, llvmpipe->rasterizer->flatshade_first); + lp_setup_set_line_state( llvmpipe->setup, + llvmpipe->rasterizer->line_width); } llvmpipe->dirty |= LP_NEW_RASTERIZER; -- cgit v1.2.3 From b91553355f848f2174d53429699b116734781ad7 Mon Sep 17 00:00:00 2001 From: Hui Qi Tay Date: Wed, 4 Aug 2010 17:13:39 +0100 Subject: llvmpipe: native line rasterization with correct pixel rasterization Line rasterization that follows diamond exit rule. Can still optimize logic for start/endpoints. --- src/gallium/drivers/llvmpipe/lp_setup_line.c | 242 +++++++++++++++++++++++---- 1 file changed, 213 insertions(+), 29 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index 930207ae33..1566203117 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -243,6 +243,11 @@ print_line(struct lp_setup_context *setup, } +static INLINE boolean sign(float x){ + return x >= 0; +} + + static void lp_setup_line( struct lp_setup_context *setup, const float (*v1)[4], @@ -251,7 +256,7 @@ lp_setup_line( struct lp_setup_context *setup, struct lp_scene *scene = lp_setup_get_current_scene(setup); struct lp_rast_triangle *line; float oneoverarea; - float half_width = setup->line_width / 2; + float width = MAX2(1.0, setup->line_width); int minx, maxx, miny, maxy; int ix0, ix1, iy0, iy1; unsigned tri_bytes; @@ -260,7 +265,25 @@ lp_setup_line( struct lp_setup_context *setup, int i; int nr_planes = 4; boolean opaque; - + + /* linewidth should be interpreted as integer */ + int fixed_width = subpixel_snap(round(width)); + + float xdiamond_offset=0; + float ydiamond_offset=0; + float xdiamond_offset_end=0; + float ydiamond_offset_end=0; + + float x1diff; + float y1diff; + float x2diff; + float y2diff; + + boolean draw_start; + boolean draw_end; + boolean will_draw_start; + boolean will_draw_end; + if (0) print_line(setup, v1, v2); @@ -278,21 +301,76 @@ lp_setup_line( struct lp_setup_context *setup, if (!line) return; -#ifndef DEBUG +#ifdef DEBUG line->v[0][0] = v1[0][0]; line->v[1][0] = v2[0][0]; line->v[0][1] = v1[0][1]; line->v[1][1] = v2[0][1]; #endif - /* pre-calculation(based on given vertices) to determine if line is - * more horizontal or more vertical - */ line->dx = v1[0][0] - v2[0][0]; line->dy = v1[0][1] - v2[0][1]; - - /* x-major line */ + +/* X-MAJOR LINE */ if (fabsf(line->dx) >= fabsf(line->dy)) { + + x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5; + y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5; + x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5; + y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5; + + if (y2diff==-0.5 && line->dy<0){ + y2diff = 0.5; + } + + /* + * Diamond exit rule test for starting point + */ + if (fabsf(x1diff) + fabsf(y1diff) < 0.5) { + draw_start = TRUE; + } + else if (sign(x1diff) == sign(-line->dx)) { + draw_start = FALSE; + } + else if (sign(-y1diff) != sign(line->dy)) { + draw_start = TRUE; + } + else { + /* do intersection test */ + float yintersect = v1[0][1] + x1diff*((float)line->dy/(float)line->dx); + if (yintersect < ceil(v1[0][1]) && yintersect > floor(v1[0][1])){ + draw_start = TRUE; + } + else draw_start = FALSE; + } + + + /* + * Diamond exit rule test for ending point + */ + if (fabsf(x2diff) + fabsf(y2diff) < 0.5) { + draw_end = FALSE; + } + else if (sign(x2diff) != sign(-line->dx)) { + draw_end = FALSE; + } + else if (sign(-y2diff) == sign(line->dy)) { + draw_end = TRUE; + } + else { + /* do intersection test */ + float yintersect = v2[0][1] + x2diff*((float)line->dy/(float)line->dx); + if (yintersect < ceil(v2[0][1]) && yintersect > floor(v2[0][1])){ + draw_end = TRUE; + } + else draw_end = FALSE; + } + + /* Are we already drawing start/end? + */ + will_draw_start = sign(-x1diff) != sign(line->dx); + will_draw_end = (sign(x2diff) == sign(-line->dx)) || x2diff==0; + if (line->dx < 0) { /* if v2 is to the right of v1, swap pointers */ const float (*temp)[4] = v1; @@ -300,21 +378,102 @@ lp_setup_line( struct lp_setup_context *setup, v2 = temp; line->dx = -line->dx; line->dy = -line->dy; + /* Otherwise shift planes appropriately */ + if (will_draw_start != draw_start) { + xdiamond_offset_end = - x1diff - 0.5; + ydiamond_offset_end = xdiamond_offset_end*(float)line->dy/(float)line->dx; + + } + if (will_draw_end != draw_end) { + xdiamond_offset = - x2diff - 0.5; + ydiamond_offset = xdiamond_offset*(float)line->dy/(float)line->dx; + } + } - + else{ + /* Otherwise shift planes appropriately */ + if (will_draw_start != draw_start) { + xdiamond_offset = - x1diff + 0.5; + ydiamond_offset = xdiamond_offset*(float)line->dy/(float)line->dx; + } + if (will_draw_end != draw_end) { + xdiamond_offset_end = - x2diff + 0.5; + ydiamond_offset_end = xdiamond_offset_end*(float)line->dy/(float)line->dx; + } + } + /* x/y positions in fixed point */ - x[0] = subpixel_snap(v1[0][0] - setup->pixel_offset); - x[1] = subpixel_snap(v2[0][0] - setup->pixel_offset); - x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset); - x[3] = subpixel_snap(v1[0][0] - setup->pixel_offset); + x[0] = subpixel_snap(v1[0][0] + xdiamond_offset - setup->pixel_offset); + x[1] = subpixel_snap(v2[0][0] + xdiamond_offset_end - setup->pixel_offset); + x[2] = subpixel_snap(v2[0][0] + xdiamond_offset_end - setup->pixel_offset); + x[3] = subpixel_snap(v1[0][0] + xdiamond_offset - setup->pixel_offset); + + y[0] = subpixel_snap(v1[0][1] + ydiamond_offset - setup->pixel_offset) - fixed_width/2; + y[1] = subpixel_snap(v2[0][1] + ydiamond_offset_end - setup->pixel_offset) - fixed_width/2; + y[2] = subpixel_snap(v2[0][1] + ydiamond_offset_end - setup->pixel_offset) + fixed_width/2; + y[3] = subpixel_snap(v1[0][1] + ydiamond_offset - setup->pixel_offset) + fixed_width/2; - y[0] = subpixel_snap(v1[0][1] - half_width - setup->pixel_offset); - y[1] = subpixel_snap(v2[0][1] - half_width - setup->pixel_offset); - y[2] = subpixel_snap(v2[0][1] + half_width - setup->pixel_offset); - y[3] = subpixel_snap(v1[0][1] + half_width - setup->pixel_offset); } + + else{ - /* y-major line */ +/* Y-MAJOR LINE */ + x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5; + y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5; + x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5; + y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5; + + if (x2diff==-0.5 && line->dx<0){ + x2diff = 0.5; + } + +/* + * Diamond exit rule test for starting point + */ + if (fabsf(x1diff) + fabsf(y1diff) < 0.5) { + draw_start = TRUE; + } + else if (sign(-y1diff) == sign(line->dy)) { + draw_start = FALSE; + } + else if (sign(x1diff) != sign(-line->dx)) { + draw_start = TRUE; + } + else { + /* do intersection test */ + float xintersect = v1[0][0] + y1diff*((float)line->dx/(float)line->dy); + if (xintersect < ceil(v1[0][0]) && xintersect > floor(v1[0][0])){ + draw_start = TRUE; + } + else draw_start = FALSE; + } + + /* + * Diamond exit rule test for ending point + */ + if (fabsf(x2diff) + fabsf(y2diff) < 0.5) { + draw_end = FALSE; + } + else if (sign(-y2diff) != sign(line->dy) ) { + draw_end = FALSE; + } + else if (sign(x2diff) == sign(-line->dx) ) { + draw_end = TRUE; + } + else { + /* do intersection test */ + float xintersect = v2[0][0] + y2diff*((float)line->dx/(float)line->dy); + if (xintersect < ceil(v2[0][0]) && xintersect > floor(v2[0][0])){ + draw_end = TRUE; + } + else draw_end = FALSE; + } + + /* Are we already drawing start/end? + */ + will_draw_start = sign(y1diff) == sign(line->dy); + will_draw_end = (sign(-y2diff) == sign(line->dy)) || y2diff==0; + if (line->dy > 0) { /* if v2 is on top of v1, swap pointers */ const float (*temp)[4] = v1; @@ -322,19 +481,44 @@ lp_setup_line( struct lp_setup_context *setup, v2 = temp; line->dx = -line->dx; line->dy = -line->dy; + + /* Otherwise shift planes appropriately */ + if (will_draw_start != draw_start) { + ydiamond_offset_end = - y1diff + 0.5; + xdiamond_offset_end = ydiamond_offset_end*(float)line->dx/(float)line->dy; + } + if (will_draw_end != draw_end) { + ydiamond_offset = - y2diff + 0.5; + xdiamond_offset = ydiamond_offset*(float)line->dx/(float)line->dy; + } + } + + else{ + /* Otherwise shift planes appropriately */ + if (will_draw_start != draw_start) { + ydiamond_offset = - y1diff - 0.5; + xdiamond_offset = ydiamond_offset*(float)line->dx/(float)line->dy; + + } + if (will_draw_end != draw_end) { + ydiamond_offset_end = - y2diff - 0.5; + xdiamond_offset_end = ydiamond_offset_end*(float)line->dx/(float)line->dy; + } } - x[0] = subpixel_snap(v1[0][0] - half_width - setup->pixel_offset); - x[1] = subpixel_snap(v2[0][0] - half_width - setup->pixel_offset); - x[2] = subpixel_snap(v2[0][0] + half_width - setup->pixel_offset); - x[3] = subpixel_snap(v1[0][0] + half_width - setup->pixel_offset); + /* x/y positions in fixed point */ + x[0] = subpixel_snap(v1[0][0] + xdiamond_offset - setup->pixel_offset) - fixed_width/2; + x[1] = subpixel_snap(v2[0][0] + xdiamond_offset_end - setup->pixel_offset) - fixed_width/2; + x[2] = subpixel_snap(v2[0][0] + xdiamond_offset_end - setup->pixel_offset) + fixed_width/2; + x[3] = subpixel_snap(v1[0][0] + xdiamond_offset - setup->pixel_offset) + fixed_width/2; - y[0] = subpixel_snap(v1[0][1] - setup->pixel_offset); - y[1] = subpixel_snap(v2[0][1] - setup->pixel_offset); - y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset); - y[3] = subpixel_snap(v1[0][1] - setup->pixel_offset); + y[0] = subpixel_snap(v1[0][1] + ydiamond_offset - setup->pixel_offset); + y[1] = subpixel_snap(v2[0][1] + ydiamond_offset_end - setup->pixel_offset); + y[2] = subpixel_snap(v2[0][1] + ydiamond_offset_end - setup->pixel_offset); + y[3] = subpixel_snap(v1[0][1] + ydiamond_offset - setup->pixel_offset); } + /* calculate the deltas */ line->plane[0].dcdy = x[0] - x[1]; line->plane[1].dcdy = x[1] - x[2]; @@ -361,8 +545,8 @@ lp_setup_line( struct lp_setup_context *setup, minx = (MIN4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER; maxx = (MAX4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER; - miny = (MIN4(y[0], y[1], y[3], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; - maxy = (MAX4(y[0], y[1], y[3], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + miny = (MIN4(y[0], y[1], y[2], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + maxy = (MAX4(y[0], y[1], y[2], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; } if (setup->scissor_test) { @@ -526,7 +710,7 @@ lp_setup_line( struct lp_setup_context *setup, /* - * All fields of 'tri' are now set. The remaining code here is + * All fields of 'line' are now set. The remaining code here is * concerned with binning. */ -- cgit v1.2.3 From 3783053fa59fceef59fe0356af5c8dbc095e9adf Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 26 Aug 2010 20:09:22 +0100 Subject: llvmpipe: update line rasterization code to current master --- src/gallium/drivers/llvmpipe/lp_rast.h | 5 + src/gallium/drivers/llvmpipe/lp_setup_context.h | 32 +- src/gallium/drivers/llvmpipe/lp_setup_line.c | 531 +++++++++--------------- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 33 +- 4 files changed, 227 insertions(+), 374 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index b4564ef33b..37b4fdc31e 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -116,6 +116,11 @@ struct lp_rast_triangle { /* inputs for the shader */ struct lp_rast_shader_inputs inputs; + /* XXX: temporarily use these additional fields for line + * coefficient setup + */ + float dx, dy; + #ifdef DEBUG float v[3][2]; #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index a4838d59a5..97919c09d4 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -171,26 +171,6 @@ lp_setup_print_vertex(struct lp_setup_context *setup, const char *name, const float (*v)[4]); -/** shared code between lp_setup_line and lp_setup_tri */ -extern lp_rast_cmd lp_rast_tri_tab[]; - -void -do_triangle_ccw_whole_tile(struct lp_setup_context *setup, - struct lp_scene *scene, - struct lp_rast_triangle *tri, - int x, int y, - boolean opaque, - int *is_blit); - - -void -lp_setup_tri_coefficients( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, - float oneoverarea, - const float (*v1)[4], - const float (*v2)[4], - const float (*v3)[4], - boolean frontface); struct lp_rast_triangle * lp_setup_alloc_triangle(struct lp_scene *scene, @@ -199,14 +179,10 @@ lp_setup_alloc_triangle(struct lp_scene *scene, unsigned *tri_size); void -lp_setup_fragcoord_coef(struct lp_setup_context *setup, - struct lp_rast_triangle *tri, - float oneoverarea, - unsigned slot, - const float (*v1)[4], - const float (*v2)[4], - const float (*v3)[4], - unsigned usage_mask); +lp_setup_bin_triangle( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + const struct u_rect *bbox, + int nr_planes ); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index 1566203117..ebf000dd19 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -39,35 +39,6 @@ #define NUM_CHANNELS 4 -static const int step_scissor_minx[16] = { - 0, 1, 0, 1, - 2, 3, 2, 3, - 0, 1, 0, 1, - 2, 3, 2, 3 -}; - -static const int step_scissor_maxx[16] = { - 0, -1, 0, -1, - -2, -3, -2, -3, - 0, -1, 0, -1, - -2, -3, -2, -3 -}; - -static const int step_scissor_miny[16] = { - 0, 0, 1, 1, - 0, 0, 1, 1, - 2, 2, 3, 3, - 2, 2, 3, 3 -}; - -static const int step_scissor_maxy[16] = { - 0, 0, -1, -1, - 0, 0, -1, -1, - -2, -2, -3, -3, - -2, -2, -3, -3 -}; - - /** * Compute a0 for a constant-valued coefficient (GL_FLAT shading). @@ -147,6 +118,40 @@ static void perspective_coef( struct lp_setup_context *setup, dady * (v1[0][1] - setup->pixel_offset))); } +static void +setup_fragcoord_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + float oneoverarea, + unsigned slot, + const float (*v1)[4], + const float (*v2)[4], + unsigned usage_mask) +{ + /*X*/ + if (usage_mask & TGSI_WRITEMASK_X) { + tri->inputs.a0[slot][0] = 0.0; + tri->inputs.dadx[slot][0] = 1.0; + tri->inputs.dady[slot][0] = 0.0; + } + + /*Y*/ + if (usage_mask & TGSI_WRITEMASK_Y) { + tri->inputs.a0[slot][1] = 0.0; + tri->inputs.dadx[slot][1] = 0.0; + tri->inputs.dady[slot][1] = 1.0; + } + + /*Z*/ + if (usage_mask & TGSI_WRITEMASK_Z) { + linear_coef(setup, tri, oneoverarea, slot, v1, v2, 0, 2); + } + + /*W*/ + if (usage_mask & TGSI_WRITEMASK_W) { + linear_coef(setup, tri, oneoverarea, slot, v1, v2, 0, 3); + } +} + /** * Compute the tri->coef[] array dadx, dady, a0 values. */ @@ -209,8 +214,8 @@ static void setup_line_coefficients( struct lp_setup_context *setup, /* The internal position input is in slot zero: */ - lp_setup_fragcoord_coef(setup, tri, oneoverarea, 0, v1, v2, v2, - fragcoord_usage_mask); + setup_fragcoord_coef(setup, tri, oneoverarea, 0, v1, v2, + fragcoord_usage_mask); } @@ -248,6 +253,15 @@ static INLINE boolean sign(float x){ } +/* Used on positive floats only: + */ +static INLINE float fracf(float f) +{ + return f - floorf(f); +} + + + static void lp_setup_line( struct lp_setup_context *setup, const float (*v1)[4], @@ -257,27 +271,26 @@ lp_setup_line( struct lp_setup_context *setup, struct lp_rast_triangle *line; float oneoverarea; float width = MAX2(1.0, setup->line_width); - int minx, maxx, miny, maxy; - int ix0, ix1, iy0, iy1; + struct u_rect bbox; unsigned tri_bytes; int x[4]; int y[4]; int i; int nr_planes = 4; - boolean opaque; /* linewidth should be interpreted as integer */ int fixed_width = subpixel_snap(round(width)); - float xdiamond_offset=0; - float ydiamond_offset=0; - float xdiamond_offset_end=0; - float ydiamond_offset_end=0; + float x_offset=0; + float y_offset=0; + float x_offset_end=0; + float y_offset_end=0; float x1diff; float y1diff; float x2diff; float y2diff; + float dx, dy; boolean draw_start; boolean draw_end; @@ -294,32 +307,20 @@ lp_setup_line( struct lp_setup_context *setup, nr_planes = 4; } - line = lp_setup_alloc_triangle(scene, - setup->fs.nr_inputs, - nr_planes, - &tri_bytes); - if (!line) - return; - -#ifdef DEBUG - line->v[0][0] = v1[0][0]; - line->v[1][0] = v2[0][0]; - line->v[0][1] = v1[0][1]; - line->v[1][1] = v2[0][1]; -#endif - line->dx = v1[0][0] - v2[0][0]; - line->dy = v1[0][1] - v2[0][1]; + dx = v1[0][0] - v2[0][0]; + dy = v1[0][1] - v2[0][1]; -/* X-MAJOR LINE */ - if (fabsf(line->dx) >= fabsf(line->dy)) { + /* X-MAJOR LINE */ + if (fabsf(dx) >= fabsf(dy)) { + float dydx = dy / dx; x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5; y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5; x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5; y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5; - if (y2diff==-0.5 && line->dy<0){ + if (y2diff==-0.5 && dy<0){ y2diff = 0.5; } @@ -329,19 +330,16 @@ lp_setup_line( struct lp_setup_context *setup, if (fabsf(x1diff) + fabsf(y1diff) < 0.5) { draw_start = TRUE; } - else if (sign(x1diff) == sign(-line->dx)) { + else if (sign(x1diff) == sign(-dx)) { draw_start = FALSE; } - else if (sign(-y1diff) != sign(line->dy)) { + else if (sign(-y1diff) != sign(dy)) { draw_start = TRUE; } else { /* do intersection test */ - float yintersect = v1[0][1] + x1diff*((float)line->dy/(float)line->dx); - if (yintersect < ceil(v1[0][1]) && yintersect > floor(v1[0][1])){ - draw_start = TRUE; - } - else draw_start = FALSE; + float yintersect = fracf(v1[0][1]) + x1diff * dydx; + draw_start = (yintersect < 1.0 && yintersect > 0.0); } @@ -351,101 +349,95 @@ lp_setup_line( struct lp_setup_context *setup, if (fabsf(x2diff) + fabsf(y2diff) < 0.5) { draw_end = FALSE; } - else if (sign(x2diff) != sign(-line->dx)) { + else if (sign(x2diff) != sign(-dx)) { draw_end = FALSE; } - else if (sign(-y2diff) == sign(line->dy)) { + else if (sign(-y2diff) == sign(dy)) { draw_end = TRUE; } else { /* do intersection test */ - float yintersect = v2[0][1] + x2diff*((float)line->dy/(float)line->dx); - if (yintersect < ceil(v2[0][1]) && yintersect > floor(v2[0][1])){ - draw_end = TRUE; - } - else draw_end = FALSE; + float yintersect = fracf(v2[0][1]) + x2diff * dydx; + draw_end = (yintersect < 1.0 && yintersect > 0.0); } /* Are we already drawing start/end? */ - will_draw_start = sign(-x1diff) != sign(line->dx); - will_draw_end = (sign(x2diff) == sign(-line->dx)) || x2diff==0; + will_draw_start = sign(-x1diff) != sign(dx); + will_draw_end = (sign(x2diff) == sign(-dx)) || x2diff==0; - if (line->dx < 0) { + if (dx < 0) { /* if v2 is to the right of v1, swap pointers */ const float (*temp)[4] = v1; v1 = v2; v2 = temp; - line->dx = -line->dx; - line->dy = -line->dy; + dx = -dx; + dy = -dy; /* Otherwise shift planes appropriately */ if (will_draw_start != draw_start) { - xdiamond_offset_end = - x1diff - 0.5; - ydiamond_offset_end = xdiamond_offset_end*(float)line->dy/(float)line->dx; + x_offset_end = - x1diff - 0.5; + y_offset_end = x_offset_end * dydx; } if (will_draw_end != draw_end) { - xdiamond_offset = - x2diff - 0.5; - ydiamond_offset = xdiamond_offset*(float)line->dy/(float)line->dx; + x_offset = - x2diff - 0.5; + y_offset = x_offset * dydx; } } else{ /* Otherwise shift planes appropriately */ if (will_draw_start != draw_start) { - xdiamond_offset = - x1diff + 0.5; - ydiamond_offset = xdiamond_offset*(float)line->dy/(float)line->dx; + x_offset = - x1diff + 0.5; + y_offset = x_offset * dydx; } if (will_draw_end != draw_end) { - xdiamond_offset_end = - x2diff + 0.5; - ydiamond_offset_end = xdiamond_offset_end*(float)line->dy/(float)line->dx; + x_offset_end = - x2diff + 0.5; + y_offset_end = x_offset_end * dydx; } } /* x/y positions in fixed point */ - x[0] = subpixel_snap(v1[0][0] + xdiamond_offset - setup->pixel_offset); - x[1] = subpixel_snap(v2[0][0] + xdiamond_offset_end - setup->pixel_offset); - x[2] = subpixel_snap(v2[0][0] + xdiamond_offset_end - setup->pixel_offset); - x[3] = subpixel_snap(v1[0][0] + xdiamond_offset - setup->pixel_offset); + x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset); + x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset); + x[2] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset); + x[3] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset); - y[0] = subpixel_snap(v1[0][1] + ydiamond_offset - setup->pixel_offset) - fixed_width/2; - y[1] = subpixel_snap(v2[0][1] + ydiamond_offset_end - setup->pixel_offset) - fixed_width/2; - y[2] = subpixel_snap(v2[0][1] + ydiamond_offset_end - setup->pixel_offset) + fixed_width/2; - y[3] = subpixel_snap(v1[0][1] + ydiamond_offset - setup->pixel_offset) + fixed_width/2; + y[0] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset) - fixed_width/2; + y[1] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset) - fixed_width/2; + y[2] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset) + fixed_width/2; + y[3] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset) + fixed_width/2; } + else { + const float dxdy = dx / dy; - - else{ -/* Y-MAJOR LINE */ + /* Y-MAJOR LINE */ x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5; y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5; x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5; y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5; - if (x2diff==-0.5 && line->dx<0){ + if (x2diff==-0.5 && dx<0) { x2diff = 0.5; } -/* - * Diamond exit rule test for starting point - */ + /* + * Diamond exit rule test for starting point + */ if (fabsf(x1diff) + fabsf(y1diff) < 0.5) { draw_start = TRUE; } - else if (sign(-y1diff) == sign(line->dy)) { + else if (sign(-y1diff) == sign(dy)) { draw_start = FALSE; } - else if (sign(x1diff) != sign(-line->dx)) { + else if (sign(x1diff) != sign(-dx)) { draw_start = TRUE; } else { /* do intersection test */ - float xintersect = v1[0][0] + y1diff*((float)line->dx/(float)line->dy); - if (xintersect < ceil(v1[0][0]) && xintersect > floor(v1[0][0])){ - draw_start = TRUE; - } - else draw_start = FALSE; + float xintersect = fracf(v1[0][0]) + y1diff * dxdy; + draw_start = (xintersect < 1.0 && xintersect > 0.0); } /* @@ -454,82 +446,67 @@ lp_setup_line( struct lp_setup_context *setup, if (fabsf(x2diff) + fabsf(y2diff) < 0.5) { draw_end = FALSE; } - else if (sign(-y2diff) != sign(line->dy) ) { + else if (sign(-y2diff) != sign(dy) ) { draw_end = FALSE; } - else if (sign(x2diff) == sign(-line->dx) ) { + else if (sign(x2diff) == sign(-dx) ) { draw_end = TRUE; } else { /* do intersection test */ - float xintersect = v2[0][0] + y2diff*((float)line->dx/(float)line->dy); - if (xintersect < ceil(v2[0][0]) && xintersect > floor(v2[0][0])){ - draw_end = TRUE; - } - else draw_end = FALSE; + float xintersect = fracf(v2[0][0]) + y2diff * dxdy; + draw_end = (xintersect < 1.0 && xintersect > 0.0); } /* Are we already drawing start/end? */ - will_draw_start = sign(y1diff) == sign(line->dy); - will_draw_end = (sign(-y2diff) == sign(line->dy)) || y2diff==0; + will_draw_start = sign(y1diff) == sign(dy); + will_draw_end = (sign(-y2diff) == sign(dy)) || y2diff==0; - if (line->dy > 0) { + if (dy > 0) { /* if v2 is on top of v1, swap pointers */ const float (*temp)[4] = v1; v1 = v2; v2 = temp; - line->dx = -line->dx; - line->dy = -line->dy; + dx = -dx; + dy = -dy; /* Otherwise shift planes appropriately */ if (will_draw_start != draw_start) { - ydiamond_offset_end = - y1diff + 0.5; - xdiamond_offset_end = ydiamond_offset_end*(float)line->dx/(float)line->dy; + y_offset_end = - y1diff + 0.5; + x_offset_end = y_offset_end * dxdy; } if (will_draw_end != draw_end) { - ydiamond_offset = - y2diff + 0.5; - xdiamond_offset = ydiamond_offset*(float)line->dx/(float)line->dy; + y_offset = - y2diff + 0.5; + x_offset = y_offset * dxdy; } } - - else{ + else { /* Otherwise shift planes appropriately */ if (will_draw_start != draw_start) { - ydiamond_offset = - y1diff - 0.5; - xdiamond_offset = ydiamond_offset*(float)line->dx/(float)line->dy; + y_offset = - y1diff - 0.5; + x_offset = y_offset * dxdy; } if (will_draw_end != draw_end) { - ydiamond_offset_end = - y2diff - 0.5; - xdiamond_offset_end = ydiamond_offset_end*(float)line->dx/(float)line->dy; + y_offset_end = - y2diff - 0.5; + x_offset_end = y_offset_end * dxdy; } } /* x/y positions in fixed point */ - x[0] = subpixel_snap(v1[0][0] + xdiamond_offset - setup->pixel_offset) - fixed_width/2; - x[1] = subpixel_snap(v2[0][0] + xdiamond_offset_end - setup->pixel_offset) - fixed_width/2; - x[2] = subpixel_snap(v2[0][0] + xdiamond_offset_end - setup->pixel_offset) + fixed_width/2; - x[3] = subpixel_snap(v1[0][0] + xdiamond_offset - setup->pixel_offset) + fixed_width/2; + x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) - fixed_width/2; + x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) - fixed_width/2; + x[2] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) + fixed_width/2; + x[3] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) + fixed_width/2; - y[0] = subpixel_snap(v1[0][1] + ydiamond_offset - setup->pixel_offset); - y[1] = subpixel_snap(v2[0][1] + ydiamond_offset_end - setup->pixel_offset); - y[2] = subpixel_snap(v2[0][1] + ydiamond_offset_end - setup->pixel_offset); - y[3] = subpixel_snap(v1[0][1] + ydiamond_offset - setup->pixel_offset); + y[0] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset); + y[1] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset); + y[2] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset); + y[3] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset); } - /* calculate the deltas */ - line->plane[0].dcdy = x[0] - x[1]; - line->plane[1].dcdy = x[1] - x[2]; - line->plane[2].dcdy = x[2] - x[3]; - line->plane[3].dcdy = x[3] - x[0]; - - line->plane[0].dcdx = y[0] - y[1]; - line->plane[1].dcdx = y[1] - y[2]; - line->plane[2].dcdx = y[2] - y[3]; - line->plane[3].dcdx = y[3] - y[0]; - LP_COUNT(nr_tris); @@ -543,37 +520,70 @@ lp_setup_line( struct lp_setup_context *setup, */ int adj = (setup->pixel_offset != 0) ? 1 : 0; - minx = (MIN4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER; - maxx = (MAX4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER; - miny = (MIN4(y[0], y[1], y[2], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; - maxy = (MAX4(y[0], y[1], y[2], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + bbox.x0 = (MIN4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER; + bbox.x1 = (MAX4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER; + bbox.y0 = (MIN4(y[0], y[1], y[2], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + bbox.y1 = (MAX4(y[0], y[1], y[2], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + + /* Inclusive coordinates: + */ + bbox.x1--; + bbox.y1--; } - if (setup->scissor_test) { - minx = MAX2(minx, setup->scissor.current.minx); - maxx = MIN2(maxx, setup->scissor.current.maxx); - miny = MAX2(miny, setup->scissor.current.miny); - maxy = MIN2(maxy, setup->scissor.current.maxy); + if (bbox.x1 < bbox.x0 || + bbox.y1 < bbox.y0) { + if (0) debug_printf("empty bounding box\n"); + LP_COUNT(nr_culled_tris); + return; } - else { - minx = MAX2(minx, 0); - miny = MAX2(miny, 0); - maxx = MIN2(maxx, scene->fb.width); - maxy = MIN2(maxy, scene->fb.height); + + if (!u_rect_test_intersection(&setup->draw_region, &bbox)) { + if (0) debug_printf("offscreen\n"); + LP_COUNT(nr_culled_tris); + return; } + u_rect_find_intersection(&setup->draw_region, &bbox); - if (miny >= maxy || minx >= maxx) { - lp_scene_putback_data( scene, tri_bytes ); + line = lp_setup_alloc_triangle(scene, + setup->fs.nr_inputs, + nr_planes, + &tri_bytes); + if (!line) return; - } - oneoverarea = 1.0f / (line->dx * line->dx + line->dy * line->dy); +#ifdef DEBUG + line->v[0][0] = v1[0][0]; + line->v[1][0] = v2[0][0]; + line->v[0][1] = v1[0][1]; + line->v[1][1] = v2[0][1]; +#endif + + line->dx = dx; + line->dy = dy; + + /* calculate the deltas */ + line->plane[0].dcdy = x[0] - x[1]; + line->plane[1].dcdy = x[1] - x[2]; + line->plane[2].dcdy = x[2] - x[3]; + line->plane[3].dcdy = x[3] - x[0]; + + line->plane[0].dcdx = y[0] - y[1]; + line->plane[1].dcdx = y[1] - y[2]; + line->plane[2].dcdx = y[2] - y[3]; + line->plane[3].dcdx = y[3] - y[0]; + + + oneoverarea = 1.0f / (dx * dx + dy * dy); /* Setup parameter interpolants: */ setup_line_coefficients( setup, line, oneoverarea, v1, v2); + line->inputs.facing = 1.0F; + line->inputs.state = setup->fs.stored; + for (i = 0; i < 4; i++) { struct lp_rast_plane *plane = &line->plane[i]; @@ -628,35 +638,6 @@ lp_setup_line( struct lp_setup_context *setup, /* Calculate trivial accept offsets from the above. */ plane->ei = plane->dcdy - plane->dcdx - plane->eo; - - plane->step = line->step[i]; - - /* Fill in the inputs.step[][] arrays. - * We've manually unrolled some loops here. - */ -#define SETUP_STEP(j, x, y) \ - line->step[i][j] = y * plane->dcdy - x * plane->dcdx - - SETUP_STEP(0, 0, 0); - SETUP_STEP(1, 1, 0); - SETUP_STEP(2, 0, 1); - SETUP_STEP(3, 1, 1); - - SETUP_STEP(4, 2, 0); - SETUP_STEP(5, 3, 0); - SETUP_STEP(6, 2, 1); - SETUP_STEP(7, 3, 1); - - SETUP_STEP(8, 0, 2); - SETUP_STEP(9, 1, 2); - SETUP_STEP(10, 0, 3); - SETUP_STEP(11, 1, 3); - - SETUP_STEP(12, 2, 2); - SETUP_STEP(13, 3, 2); - SETUP_STEP(14, 2, 3); - SETUP_STEP(15, 3, 3); -#undef STEP } @@ -679,154 +660,34 @@ lp_setup_line( struct lp_setup_context *setup, * these planes elsewhere. */ if (nr_planes == 8) { - line->plane[4].step = step_scissor_maxx; - line->plane[4].dcdx = 1; + line->plane[4].dcdx = -1; line->plane[4].dcdy = 0; - line->plane[4].c = maxx; - line->plane[4].ei = -1; - line->plane[4].eo = 0; - - line->plane[5].step = step_scissor_miny; - line->plane[5].dcdx = 0; - line->plane[5].dcdy = 1; - line->plane[5].c = 1-miny; - line->plane[5].ei = 0; - line->plane[5].eo = 1; - - line->plane[6].step = step_scissor_maxy; - line->plane[6].dcdx = 0; - line->plane[6].dcdy = -1; - line->plane[6].c = maxy; - line->plane[6].ei = -1; - line->plane[6].eo = 0; - - line->plane[7].step = step_scissor_minx; - line->plane[7].dcdx = -1; - line->plane[7].dcdy = 0; - line->plane[7].c = 1-minx; - line->plane[7].ei = 0; - line->plane[7].eo = 1; - } - + line->plane[4].c = 1-bbox.x0; + line->plane[4].ei = 0; + line->plane[4].eo = 1; - /* - * All fields of 'line' are now set. The remaining code here is - * concerned with binning. - */ + line->plane[5].dcdx = 1; + line->plane[5].dcdy = 0; + line->plane[5].c = bbox.x1+1; + line->plane[5].ei = -1; + line->plane[5].eo = 0; - /* Convert to tile coordinates, and inclusive ranges: - */ - ix0 = minx / TILE_SIZE; - iy0 = miny / TILE_SIZE; - ix1 = (maxx-1) / TILE_SIZE; - iy1 = (maxy-1) / TILE_SIZE; - - /* - * Clamp to framebuffer size - */ - assert(ix0 == MAX2(ix0, 0)); - assert(iy0 == MAX2(iy0, 0)); - assert(ix1 == MIN2(ix1, scene->tiles_x - 1)); - assert(iy1 == MIN2(iy1, scene->tiles_y - 1)); - - /* Determine which tile(s) intersect the triangle's bounding box - */ - if (iy0 == iy1 && ix0 == ix1) - { - /* Triangle is contained in a single tile: - */ - lp_scene_bin_command( scene, ix0, iy0, - lp_rast_tri_tab[nr_planes], - lp_rast_arg_triangle(line, (1<plane[6].dcdx = 0; + line->plane[6].dcdy = 1; + line->plane[6].c = 1-bbox.y0; + line->plane[6].ei = 0; + line->plane[6].eo = 1; + + line->plane[7].dcdx = 0; + line->plane[7].dcdy = -1; + line->plane[7].c = bbox.y1+1; + line->plane[7].ei = -1; + line->plane[7].eo = 0; } - else - { - int c[8]; - int ei[8]; - int eo[8]; - int xstep[8]; - int ystep[8]; - int x, y; - int is_blit = -1; /* undetermined */ - - for (i = 0; i < nr_planes; i++) { - c[i] = (line->plane[i].c + - line->plane[i].dcdy * iy0 * TILE_SIZE - - line->plane[i].dcdx * ix0 * TILE_SIZE); - - ei[i] = line->plane[i].ei << TILE_ORDER; - eo[i] = line->plane[i].eo << TILE_ORDER; - xstep[i] = -(line->plane[i].dcdx << TILE_ORDER); - ystep[i] = line->plane[i].dcdy << TILE_ORDER; - } - - - /* Test tile-sized blocks against the triangle. - * Discard blocks fully outside the tri. If the block is fully - * contained inside the tri, bin an lp_rast_shade_tile command. - * Else, bin a lp_rast_triangle command. - */ - for (y = iy0; y <= iy1; y++) - { - boolean in = FALSE; /* are we inside the triangle? */ - int cx[8]; - - for (i = 0; i < nr_planes; i++) - cx[i] = c[i]; - - for (x = ix0; x <= ix1; x++) - { - int out = 0; - int partial = 0; - - for (i = 0; i < nr_planes; i++) { - int planeout = cx[i] + eo[i]; - int planepartial = cx[i] + ei[i] - 1; - out |= (planeout >> 31); - partial |= (planepartial >> 31) & (1<fs.current.variant; struct lp_rast_triangle *tri; int x[3]; int y[3]; @@ -196,7 +194,6 @@ do_triangle_ccw(struct lp_setup_context *setup, struct lp_tri_info info; int area; struct u_rect bbox; - int ix0, ix1, iy0, iy1; unsigned tri_bytes; int i; int nr_planes = 3; @@ -423,6 +420,20 @@ do_triangle_ccw(struct lp_setup_context *setup, tri->plane[6].eo = 0; } + lp_setup_bin_triangle( setup, tri, &bbox, nr_planes ); +} + + +void +lp_setup_bin_triangle( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + const struct u_rect *bbox, + int nr_planes ) +{ + struct lp_scene *scene = setup->scene; + struct lp_fragment_shader_variant *variant = setup->fs.current.variant; + int ix0, ix1, iy0, iy1; + int i; /* * All fields of 'tri' are now set. The remaining code here is @@ -432,10 +443,10 @@ do_triangle_ccw(struct lp_setup_context *setup, /* Convert to tile coordinates, and inclusive ranges: */ if (nr_planes == 3) { - int ix0 = bbox.x0 / 16; - int iy0 = bbox.y0 / 16; - int ix1 = bbox.x1 / 16; - int iy1 = bbox.y1 / 16; + int ix0 = bbox->x0 / 16; + int iy0 = bbox->y0 / 16; + int ix1 = bbox->x1 / 16; + int iy1 = bbox->y1 / 16; if (iy0 == iy1 && ix0 == ix1) { @@ -451,10 +462,10 @@ do_triangle_ccw(struct lp_setup_context *setup, } } - ix0 = bbox.x0 / TILE_SIZE; - iy0 = bbox.y0 / TILE_SIZE; - ix1 = bbox.x1 / TILE_SIZE; - iy1 = bbox.y1 / TILE_SIZE; + ix0 = bbox->x0 / TILE_SIZE; + iy0 = bbox->y0 / TILE_SIZE; + ix1 = bbox->x1 / TILE_SIZE; + iy1 = bbox->y1 / TILE_SIZE; /* * Clamp to framebuffer size -- cgit v1.2.3 From 57d84d9ca4a645ca326b66ff3b82bee0db18ac97 Mon Sep 17 00:00:00 2001 From: Hui Qi Tay Date: Fri, 27 Aug 2010 10:37:09 +0100 Subject: llvmpipe: native point rasterization Conflicts: src/gallium/drivers/llvmpipe/lp_setup_context.h src/gallium/drivers/llvmpipe/lp_setup_line.c src/gallium/drivers/llvmpipe/lp_setup_tri.c --- src/gallium/drivers/llvmpipe/lp_context.c | 6 +- src/gallium/drivers/llvmpipe/lp_context.h | 3 + src/gallium/drivers/llvmpipe/lp_setup.c | 15 ++ src/gallium/drivers/llvmpipe/lp_setup.h | 4 + src/gallium/drivers/llvmpipe/lp_setup_context.h | 2 + src/gallium/drivers/llvmpipe/lp_setup_point.c | 225 ++++++++++++++++++++- src/gallium/drivers/llvmpipe/lp_state_derived.c | 10 + src/gallium/drivers/llvmpipe/lp_state_rasterizer.c | 2 + 8 files changed, 262 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index a6873abbee..c52b17b298 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -155,8 +155,10 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) draw_install_aapoint_stage(llvmpipe->draw, &llvmpipe->pipe); draw_install_pstipple_stage(llvmpipe->draw, &llvmpipe->pipe); - /* convert points and lines into triangles: */ - draw_wide_point_threshold(llvmpipe->draw, 0.0); + /* convert points and lines into triangles: + * (otherwise, draw points and lines natively) + */ + draw_wide_point_threshold(llvmpipe->draw, 10000.0); draw_wide_line_threshold(llvmpipe->draw, 10000.0); #if USE_DRAW_STAGE_PSTIPPLE diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 50f9091c3c..34fa20e204 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -101,6 +101,9 @@ struct llvmpipe_context { /** Vertex format */ struct vertex_info vertex_info; + + /** Which vertex shader output slot contains point size */ + int psize_slot; /** Fragment shader input interpolation info */ unsigned num_inputs; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 4c8275665e..778bfb4f6e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -494,6 +494,15 @@ lp_setup_set_line_state( struct lp_setup_context *setup, setup->line_width = line_width; } +void +lp_setup_set_point_state( struct lp_setup_context *setup, + float point_size) +{ + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + setup->point_size = point_size; +} + void lp_setup_set_fs_inputs( struct lp_setup_context *setup, const struct lp_shader_input *input, @@ -733,6 +742,12 @@ lp_setup_update_state( struct lp_setup_context *setup ) */ { struct llvmpipe_context *lp = llvmpipe_context(scene->pipe); + + /* Will probably need to move this somewhere else, just need + * to know about vertex shader point size attribute. + */ + setup->psize = lp->psize_slot; + if (lp->dirty) { llvmpipe_update_derived(lp); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 693550b8c8..6b041e7071 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -104,6 +104,10 @@ void lp_setup_set_line_state( struct lp_setup_context *setup, float line_width); +void +lp_setup_set_point_state( struct lp_setup_context *setup, + float point_size); + void lp_setup_set_fs_inputs( struct lp_setup_context *setup, const struct lp_shader_input *interp, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 97919c09d4..7d486afbbf 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -92,6 +92,8 @@ struct lp_setup_context unsigned cullmode; float pixel_offset; float line_width; + float point_size; + float psize; struct pipe_framebuffer_state fb; struct u_rect framebuffer; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c index 709c3e2fd2..07a28fc6e3 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2010, VMware Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -18,7 +18,7 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -30,10 +30,229 @@ */ #include "lp_setup_context.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "lp_perf.h" +#include "lp_setup_context.h" +#include "lp_rast.h" +#include "lp_state_fs.h" + +#define NUM_CHANNELS 4 + +struct point_info { + /* x,y deltas */ + int dy01, dy12; + int dx01, dx12; + + const float (*v0)[4]; +}; + + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + */ +static void constant_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *point, + unsigned slot, + const float value, + unsigned i ) +{ + point->inputs.a0[slot][i] = value; + point->inputs.dadx[slot][i] = 0.0f; + point->inputs.dady[slot][i] = 0.0f; +} + +/** + * Special coefficient setup for gl_FragCoord. + * X and Y are trivial + * Z and W are copied from position_coef which should have already been computed. + * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. + */ +static void +setup_point_fragcoord_coef(struct lp_setup_context *setup, + struct lp_rast_triangle *point, + const struct point_info *info, + unsigned slot, + unsigned usage_mask) +{ + /*X*/ + if (usage_mask & TGSI_WRITEMASK_X) { + point->inputs.a0[slot][0] = 0.0; + point->inputs.dadx[slot][0] = 1.0; + point->inputs.dady[slot][0] = 0.0; + } + + /*Y*/ + if (usage_mask & TGSI_WRITEMASK_Y) { + point->inputs.a0[slot][1] = 0.0; + point->inputs.dadx[slot][1] = 0.0; + point->inputs.dady[slot][1] = 1.0; + } + + /*Z*/ + if (usage_mask & TGSI_WRITEMASK_Z) { + constant_coef(setup, point, slot, info->v0[0][2], 2); + } + + /*W*/ + if (usage_mask & TGSI_WRITEMASK_W) { + constant_coef(setup, point, slot, info->v0[0][3], 3); + } +} + +/** + * Compute the point->coef[] array dadx, dady, a0 values. + */ +static void +setup_point_coefficients( struct lp_setup_context *setup, + struct lp_rast_triangle *point, + const struct point_info *info) +{ + unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; + unsigned slot; + + /* setup interpolation for all the remaining attributes: + */ + for (slot = 0; slot < setup->fs.nr_inputs; slot++) { + unsigned vert_attr = setup->fs.input[slot].src_index; + unsigned usage_mask = setup->fs.input[slot].usage_mask; + unsigned i; + + switch (setup->fs.input[slot].interp) { + case LP_INTERP_POSITION: + /* + * The generated pixel interpolators will pick up the coeffs from + * slot 0, so all need to ensure that the usage mask is covers all + * usages. + */ + fragcoord_usage_mask |= usage_mask; + break; + + default: + for (i = 0; i < NUM_CHANNELS; i++) { + if (usage_mask & (1 << i)) + constant_coef(setup, point, slot+1, info->v0[vert_attr][i], i); + } + } + } + + /* The internal position input is in slot zero: + */ + setup_point_fragcoord_coef(setup, point, info, 0, + fragcoord_usage_mask); +} + +static INLINE int +subpixel_snap(float a) +{ + return util_iround(FIXED_ONE * a); +} + static void lp_setup_point( struct lp_setup_context *setup, - const float (*v0)[4] ) + const float (*v0)[4] ) { + /* x/y positions in fixed point */ + const int sizeAttr = setup->psize; + const float size + = sizeAttr > 0 ? v0[sizeAttr][0] + : setup->point_size; + const float half_width = 0.5F * size; + + const int x0 = subpixel_snap(v0[0][0] - half_width - setup->pixel_offset); + const int x1 = subpixel_snap(v0[0][0] - half_width - setup->pixel_offset); + const int x2 = subpixel_snap(v0[0][0] + half_width - setup->pixel_offset); + const int y0 = subpixel_snap(v0[0][1] - half_width - setup->pixel_offset); + const int y1 = subpixel_snap(v0[0][1] + half_width - setup->pixel_offset); + const int y2 = subpixel_snap(v0[0][1] + half_width - setup->pixel_offset); + struct lp_scene *scene = lp_setup_get_current_scene(setup); + struct lp_rast_triangle *point; + unsigned bytes; + struct u_rect bbox; + unsigned nr_planes = 4; + struct point_info info; + + + /* Bounding rectangle (in pixels) */ + { + /* Yes this is necessary to accurately calculate bounding boxes + * with the two fill-conventions we support. GL (normally) ends + * up needing a bottom-left fill convention, which requires + * slightly different rounding. + */ + int adj = (setup->pixel_offset != 0) ? 1 : 0; + + bbox.x0 = (MIN3(x0, x1, x2) + (FIXED_ONE-1)) >> FIXED_ORDER; + bbox.x1 = (MAX3(x0, x1, x2) + (FIXED_ONE-1)) >> FIXED_ORDER; + bbox.y0 = (MIN3(y0, y1, y2) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + bbox.y1 = (MAX3(y0, y1, y2) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + + /* Inclusive coordinates: + */ + bbox.x1--; + bbox.y1--; + } + + if (!u_rect_test_intersection(&setup->draw_region, &bbox)) { + if (0) debug_printf("offscreen\n"); + LP_COUNT(nr_culled_tris); + return; + } + + u_rect_find_intersection(&setup->draw_region, &bbox); + + point = lp_setup_alloc_triangle(scene, + setup->fs.nr_inputs, + nr_planes, + &bytes); + if (!point) + return; + +#ifdef DEBUG + point->v[0][0] = v0[0][0]; + point->v[0][1] = v0[0][1]; +#endif + + info.v0 = v0; + info.dx01 = x1 - x0; + info.dx12 = x2 - x1; + info.dy01 = y1 - y0; + info.dy12 = y2 - y1; + + /* Setup parameter interpolants: + */ + setup_point_coefficients(setup, point, &info); + + point->inputs.facing = 1.0F; + point->inputs.state = setup->fs.stored; + + { + point->plane[0].dcdx = -1; + point->plane[0].dcdy = 0; + point->plane[0].c = 1-bbox.x0; + point->plane[0].ei = 0; + point->plane[0].eo = 1; + + point->plane[1].dcdx = 1; + point->plane[1].dcdy = 0; + point->plane[1].c = bbox.x1+1; + point->plane[1].ei = -1; + point->plane[1].eo = 0; + + point->plane[2].dcdx = 0; + point->plane[2].dcdy = 1; + point->plane[2].c = 1-bbox.y0; + point->plane[2].ei = 0; + point->plane[2].eo = 1; + + point->plane[3].dcdx = 0; + point->plane[3].dcdy = -1; + point->plane[3].c = bbox.y1+1; + point->plane[3].ei = -1; + point->plane[3].eo = 0; + } + + lp_setup_bin_triangle(setup, point, &bbox, nr_planes); } diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 77bec4640b..9ef9983307 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -125,6 +125,16 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) inputs[i].src_index = vinfo->num_attribs; draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index); } + + /* Figure out if we need pointsize as well. + */ + llvmpipe->psize_slot = draw_find_shader_output(llvmpipe->draw, + TGSI_SEMANTIC_PSIZE, 0); + if (llvmpipe->psize_slot > 0) { + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, + llvmpipe->psize_slot); + } + llvmpipe->num_inputs = lpfs->info.num_inputs; draw_compute_vertex_size(vinfo); diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index 67b985aa24..f845a0c6e5 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -75,6 +75,8 @@ llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, void *handle) llvmpipe->rasterizer->flatshade_first); lp_setup_set_line_state( llvmpipe->setup, llvmpipe->rasterizer->line_width); + lp_setup_set_point_state( llvmpipe->setup, + llvmpipe->rasterizer->point_size); } llvmpipe->dirty |= LP_NEW_RASTERIZER; -- cgit v1.2.3 From 2cd72dd4590b4510931854ed776c72563603f7ff Mon Sep 17 00:00:00 2001 From: Hui Qi Tay Date: Fri, 27 Aug 2010 10:46:19 +0100 Subject: llvmpipe: native point rasterization with better pixel rasterization A few subpixel_snap and fixed width changes. Conflicts: src/gallium/drivers/llvmpipe/lp_setup_point.c --- src/gallium/drivers/llvmpipe/lp_setup_point.c | 34 ++++++++++++++------------- 1 file changed, 18 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c index 07a28fc6e3..afbc816fb9 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -157,14 +157,16 @@ static void lp_setup_point( struct lp_setup_context *setup, const float size = sizeAttr > 0 ? v0[sizeAttr][0] : setup->point_size; - const float half_width = 0.5F * size; - - const int x0 = subpixel_snap(v0[0][0] - half_width - setup->pixel_offset); - const int x1 = subpixel_snap(v0[0][0] - half_width - setup->pixel_offset); - const int x2 = subpixel_snap(v0[0][0] + half_width - setup->pixel_offset); - const int y0 = subpixel_snap(v0[0][1] - half_width - setup->pixel_offset); - const int y1 = subpixel_snap(v0[0][1] + half_width - setup->pixel_offset); - const int y2 = subpixel_snap(v0[0][1] + half_width - setup->pixel_offset); + + /* Point size as fixed point integer, remove rounding errors + * and gives minimum width for very small points + */ + int fixed_width = MAX2(FIXED_ONE, + (subpixel_snap(size) + FIXED_ONE/2 - 1) & ~(FIXED_ONE-1)); + + const int x0 = subpixel_snap(v0[0][0] - setup->pixel_offset) - fixed_width/2; + const int y0 = subpixel_snap(v0[0][1] - setup->pixel_offset) - fixed_width/2; + struct lp_scene *scene = lp_setup_get_current_scene(setup); struct lp_rast_triangle *point; unsigned bytes; @@ -182,10 +184,10 @@ static void lp_setup_point( struct lp_setup_context *setup, */ int adj = (setup->pixel_offset != 0) ? 1 : 0; - bbox.x0 = (MIN3(x0, x1, x2) + (FIXED_ONE-1)) >> FIXED_ORDER; - bbox.x1 = (MAX3(x0, x1, x2) + (FIXED_ONE-1)) >> FIXED_ORDER; - bbox.y0 = (MIN3(y0, y1, y2) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; - bbox.y1 = (MAX3(y0, y1, y2) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + bbox.x0 = (x0 + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + bbox.x1 = (x0 + fixed_width + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + bbox.y0 = (y0 + (FIXED_ONE-1)) >> FIXED_ORDER; + bbox.y1 = (y0 + fixed_width + (FIXED_ONE-1)) >> FIXED_ORDER; /* Inclusive coordinates: */ @@ -214,10 +216,10 @@ static void lp_setup_point( struct lp_setup_context *setup, #endif info.v0 = v0; - info.dx01 = x1 - x0; - info.dx12 = x2 - x1; - info.dy01 = y1 - y0; - info.dy12 = y2 - y1; + info.dx01 = 0; + info.dx12 = fixed_width; + info.dy01 = fixed_width; + info.dy12 = 0; /* Setup parameter interpolants: */ -- cgit v1.2.3 From 29ec116e8f21c65250f1083830b82ff59859496d Mon Sep 17 00:00:00 2001 From: Hui Qi Tay Date: Tue, 10 Aug 2010 11:41:32 +0100 Subject: llvmpipe: point sprites rasterization Point sprites now done in the rasterizer setup code instead of going through the draw module. --- src/gallium/drivers/llvmpipe/lp_context.c | 2 + src/gallium/drivers/llvmpipe/lp_setup.c | 6 ++- src/gallium/drivers/llvmpipe/lp_setup.h | 4 +- src/gallium/drivers/llvmpipe/lp_setup_context.h | 2 + src/gallium/drivers/llvmpipe/lp_setup_point.c | 63 +++++++++++++++++++++- src/gallium/drivers/llvmpipe/lp_state_derived.c | 20 +++++-- src/gallium/drivers/llvmpipe/lp_state_rasterizer.c | 6 ++- 7 files changed, 93 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index c52b17b298..39f2c6085e 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -158,6 +158,8 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) /* convert points and lines into triangles: * (otherwise, draw points and lines natively) */ + draw_wide_point_sprites(llvmpipe->draw, FALSE); + draw_enable_point_sprites(llvmpipe->draw, FALSE); draw_wide_point_threshold(llvmpipe->draw, 10000.0); draw_wide_line_threshold(llvmpipe->draw, 10000.0); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 778bfb4f6e..3da9097154 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -496,11 +496,15 @@ lp_setup_set_line_state( struct lp_setup_context *setup, void lp_setup_set_point_state( struct lp_setup_context *setup, - float point_size) + float point_size, + boolean point_size_per_vertex, + uint sprite) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); setup->point_size = point_size; + setup->sprite = sprite; + setup->point_size_per_vertex = point_size_per_vertex; } void diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 6b041e7071..821ebb1087 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -106,7 +106,9 @@ lp_setup_set_line_state( struct lp_setup_context *setup, void lp_setup_set_point_state( struct lp_setup_context *setup, - float point_size); + float point_size, + boolean point_size_per_vertex, + uint sprite); void lp_setup_set_fs_inputs( struct lp_setup_context *setup, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 7d486afbbf..877a492c6d 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -74,6 +74,7 @@ struct lp_setup_context uint prim; uint vertex_size; uint nr_vertices; + uint sprite; uint vertex_buffer_size; void *vertex_buffer; @@ -89,6 +90,7 @@ struct lp_setup_context boolean flatshade_first; boolean ccw_is_frontface; boolean scissor_test; + boolean point_size_per_vertex; unsigned cullmode; float pixel_offset; float line_width; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c index afbc816fb9..6ae318d328 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -36,6 +36,7 @@ #include "lp_setup_context.h" #include "lp_rast.h" #include "lp_state_fs.h" +#include "tgsi/tgsi_scan.h" #define NUM_CHANNELS 4 @@ -62,6 +63,49 @@ static void constant_coef( struct lp_setup_context *setup, point->inputs.dady[slot][i] = 0.0f; } +static void perspective_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *point, + const struct point_info *info, + unsigned slot, + unsigned vert_attr, + unsigned i) +{ + if (i == 0) { + float dadx = FIXED_ONE / (float)info->dx12; + float dady = 0.0f; + point->inputs.dadx[slot][i] = dadx; + point->inputs.dady[slot][i] = dady; + point->inputs.a0[slot][i] = (0.5 - + (dadx * ((float)info->v0[0][0] - setup->pixel_offset) + + dady * ((float)info->v0[0][1] - setup->pixel_offset))); + } + + else if (i == 1) { + float dadx = 0.0f; + float dady = FIXED_ONE / (float)info->dx12; + + point->inputs.dadx[slot][i] = dadx; + point->inputs.dady[slot][i] = dady; + point->inputs.a0[slot][i] = (0.5 - + (dadx * ((float)info->v0[0][0] - setup->pixel_offset) + + dady * ((float)info->v0[0][1] - setup->pixel_offset))); + } + + else if (i == 2) { + point->inputs.a0[slot][i] = 0.0f; + point->inputs.dadx[slot][i] = 0.0f; + point->inputs.dady[slot][i] = 0.0f; + } + + else if (i == 3) { + point->inputs.a0[slot][i] = 1.0f; + point->inputs.dadx[slot][i] = 0.0f; + point->inputs.dady[slot][i] = 0.0f; + } + +} + + /** * Special coefficient setup for gl_FragCoord. * X and Y are trivial @@ -128,6 +172,23 @@ setup_point_coefficients( struct lp_setup_context *setup, fragcoord_usage_mask |= usage_mask; break; + case LP_INTERP_PERSPECTIVE: + /* For point sprite textures */ + if (setup->fs.current.variant->shader->info.input_semantic_name[slot] + == TGSI_SEMANTIC_GENERIC) + { + int index = setup->fs.current.variant->shader->info.input_semantic_index[slot]; + + if (setup->sprite & (1 << index)) { + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + perspective_coef(setup, point, info, slot+1, vert_attr, i); + fragcoord_usage_mask |= TGSI_WRITEMASK_W; + break; + } + } + + /* Otherwise fallthrough */ default: for (i = 0; i < NUM_CHANNELS; i++) { if (usage_mask & (1 << i)) @@ -155,7 +216,7 @@ static void lp_setup_point( struct lp_setup_context *setup, /* x/y positions in fixed point */ const int sizeAttr = setup->psize; const float size - = sizeAttr > 0 ? v0[sizeAttr][0] + = (setup->point_size_per_vertex && sizeAttr > 0) ? v0[sizeAttr][0] : setup->point_size; /* Point size as fixed point integer, remove rounding errors diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 9ef9983307..edd723f65f 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -74,6 +74,15 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) vs_index = draw_find_shader_output(llvmpipe->draw, lpfs->info.input_semantic_name[i], lpfs->info.input_semantic_index[i]); + if (vs_index < 0) { + /* + * This can happen with sprite coordinates - the vertex + * shader doesn't need to provide an output as we generate + * them internally. However, lets keep pretending that there + * is something there to not confuse other code. + */ + vs_index = 0; + } /* This can be pre-computed, except for flatshade: */ @@ -128,11 +137,12 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) /* Figure out if we need pointsize as well. */ - llvmpipe->psize_slot = draw_find_shader_output(llvmpipe->draw, - TGSI_SEMANTIC_PSIZE, 0); - if (llvmpipe->psize_slot > 0) { - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, - llvmpipe->psize_slot); + vs_index = draw_find_shader_output(llvmpipe->draw, + TGSI_SEMANTIC_PSIZE, 0); + + if (vs_index > 0) { + llvmpipe->psize_slot = vinfo->num_attribs; + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index); } llvmpipe->num_inputs = lpfs->info.num_inputs; diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index f845a0c6e5..0bad7320f3 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -76,8 +76,10 @@ llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, void *handle) lp_setup_set_line_state( llvmpipe->setup, llvmpipe->rasterizer->line_width); lp_setup_set_point_state( llvmpipe->setup, - llvmpipe->rasterizer->point_size); - } + llvmpipe->rasterizer->point_size, + llvmpipe->rasterizer->point_size_per_vertex, + llvmpipe->rasterizer->sprite_coord_enable); + } llvmpipe->dirty |= LP_NEW_RASTERIZER; } -- cgit v1.2.3 From aea6b415deffd7613d67dc85876afab151b7460e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 27 Aug 2010 11:03:58 +0100 Subject: llvmpipe: eliminate tri->dx, tri->dy values Use an internal struct for line setup information. --- src/gallium/drivers/llvmpipe/lp_rast.h | 5 -- src/gallium/drivers/llvmpipe/lp_setup_line.c | 76 ++++++++++++++-------------- 2 files changed, 39 insertions(+), 42 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 37b4fdc31e..b4564ef33b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -116,11 +116,6 @@ struct lp_rast_triangle { /* inputs for the shader */ struct lp_rast_shader_inputs inputs; - /* XXX: temporarily use these additional fields for line - * coefficient setup - */ - float dx, dy; - #ifdef DEBUG float v[3][2]; #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index ebf000dd19..cf770f521d 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -38,6 +38,15 @@ #define NUM_CHANNELS 4 +struct lp_line_info { + + float dx; + float dy; + float oneoverarea; + + const float (*v1)[4]; + const float (*v2)[4]; +}; /** @@ -61,26 +70,24 @@ static void constant_coef( struct lp_setup_context *setup, */ static void linear_coef( struct lp_setup_context *setup, struct lp_rast_triangle *tri, - float oneoverarea, + struct lp_line_info *info, unsigned slot, - const float (*v1)[4], - const float (*v2)[4], unsigned vert_attr, unsigned i) { - float a1 = v1[vert_attr][i]; - float a2 = v2[vert_attr][i]; + float a1 = info->v1[vert_attr][i]; + float a2 = info->v2[vert_attr][i]; float da21 = a1 - a2; - float dadx = da21 * tri->dx * oneoverarea; - float dady = da21 * tri->dy * oneoverarea; + float dadx = da21 * info->dx * info->oneoverarea; + float dady = da21 * info->dy * info->oneoverarea; tri->inputs.dadx[slot][i] = dadx; tri->inputs.dady[slot][i] = dady; tri->inputs.a0[slot][i] = (a1 - - (dadx * (v1[0][0] - setup->pixel_offset) + - dady * (v1[0][1] - setup->pixel_offset))); + (dadx * (info->v1[0][0] - setup->pixel_offset) + + dady * (info->v1[0][1] - setup->pixel_offset))); } @@ -94,37 +101,33 @@ static void linear_coef( struct lp_setup_context *setup, */ static void perspective_coef( struct lp_setup_context *setup, struct lp_rast_triangle *tri, - float oneoverarea, + struct lp_line_info *info, unsigned slot, - const float (*v1)[4], - const float (*v2)[4], unsigned vert_attr, unsigned i) { /* premultiply by 1/w (v[0][3] is always 1/w): */ - float a1 = v1[vert_attr][i] * v1[0][3]; - float a2 = v2[vert_attr][i] * v2[0][3]; + float a1 = info->v1[vert_attr][i] * info->v1[0][3]; + float a2 = info->v2[vert_attr][i] * info->v2[0][3]; float da21 = a1 - a2; - float dadx = da21 * tri->dx * oneoverarea; - float dady = da21 * tri->dy * oneoverarea; + float dadx = da21 * info->dx * info->oneoverarea; + float dady = da21 * info->dy * info->oneoverarea; tri->inputs.dadx[slot][i] = dadx; tri->inputs.dady[slot][i] = dady; tri->inputs.a0[slot][i] = (a1 - - (dadx * (v1[0][0] - setup->pixel_offset) + - dady * (v1[0][1] - setup->pixel_offset))); + (dadx * (info->v1[0][0] - setup->pixel_offset) + + dady * (info->v1[0][1] - setup->pixel_offset))); } static void setup_fragcoord_coef( struct lp_setup_context *setup, struct lp_rast_triangle *tri, - float oneoverarea, + struct lp_line_info *info, unsigned slot, - const float (*v1)[4], - const float (*v2)[4], unsigned usage_mask) { /*X*/ @@ -143,12 +146,12 @@ setup_fragcoord_coef( struct lp_setup_context *setup, /*Z*/ if (usage_mask & TGSI_WRITEMASK_Z) { - linear_coef(setup, tri, oneoverarea, slot, v1, v2, 0, 2); + linear_coef(setup, tri, info, slot, 0, 2); } /*W*/ if (usage_mask & TGSI_WRITEMASK_W) { - linear_coef(setup, tri, oneoverarea, slot, v1, v2, 0, 3); + linear_coef(setup, tri, info, slot, 0, 3); } } @@ -157,9 +160,7 @@ setup_fragcoord_coef( struct lp_setup_context *setup, */ static void setup_line_coefficients( struct lp_setup_context *setup, struct lp_rast_triangle *tri, - float oneoverarea, - const float (*v1)[4], - const float (*v2)[4]) + struct lp_line_info *info) { unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; unsigned slot; @@ -176,25 +177,25 @@ static void setup_line_coefficients( struct lp_setup_context *setup, if (setup->flatshade_first) { for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(setup, tri, slot+1, v1[vert_attr][i], i); + constant_coef(setup, tri, slot+1, info->v1[vert_attr][i], i); } else { for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(setup, tri, slot+1, v2[vert_attr][i], i); + constant_coef(setup, tri, slot+1, info->v2[vert_attr][i], i); } break; case LP_INTERP_LINEAR: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - linear_coef(setup, tri, oneoverarea, slot+1, v1, v2, vert_attr, i); + linear_coef(setup, tri, info, slot+1, vert_attr, i); break; case LP_INTERP_PERSPECTIVE: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - perspective_coef(setup, tri, oneoverarea, slot+1, v1, v2, vert_attr, i); + perspective_coef(setup, tri, info, slot+1, vert_attr, i); fragcoord_usage_mask |= TGSI_WRITEMASK_W; break; @@ -214,7 +215,7 @@ static void setup_line_coefficients( struct lp_setup_context *setup, /* The internal position input is in slot zero: */ - setup_fragcoord_coef(setup, tri, oneoverarea, 0, v1, v2, + setup_fragcoord_coef(setup, tri, info, 0, fragcoord_usage_mask); } @@ -269,7 +270,7 @@ lp_setup_line( struct lp_setup_context *setup, { struct lp_scene *scene = lp_setup_get_current_scene(setup); struct lp_rast_triangle *line; - float oneoverarea; + struct lp_line_info info; float width = MAX2(1.0, setup->line_width); struct u_rect bbox; unsigned tri_bytes; @@ -560,9 +561,6 @@ lp_setup_line( struct lp_setup_context *setup, line->v[1][1] = v2[0][1]; #endif - line->dx = dx; - line->dy = dy; - /* calculate the deltas */ line->plane[0].dcdy = x[0] - x[1]; line->plane[1].dcdy = x[1] - x[2]; @@ -575,11 +573,15 @@ lp_setup_line( struct lp_setup_context *setup, line->plane[3].dcdx = y[3] - y[0]; - oneoverarea = 1.0f / (dx * dx + dy * dy); + info.oneoverarea = 1.0f / (dx * dx + dy * dy); + info.dx = dx; + info.dy = dy; + info.v1 = v1; + info.v2 = v2; /* Setup parameter interpolants: */ - setup_line_coefficients( setup, line, oneoverarea, v1, v2); + setup_line_coefficients( setup, line, &info); line->inputs.facing = 1.0F; line->inputs.state = setup->fs.stored; -- cgit v1.2.3 From 55f4eab93cf964a2ffa540fef9485b6f737a6f41 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 27 Aug 2010 13:40:23 +0100 Subject: llvmpipe: use util_iround in place of round Fix mingw build. --- src/gallium/drivers/llvmpipe/lp_setup_line.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index cf770f521d..ce2da55cf4 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -280,7 +280,7 @@ lp_setup_line( struct lp_setup_context *setup, int nr_planes = 4; /* linewidth should be interpreted as integer */ - int fixed_width = subpixel_snap(round(width)); + int fixed_width = util_iround(width) * FIXED_ONE; float x_offset=0; float y_offset=0; -- cgit v1.2.3 From f7579f2f28999e585ee9a51635c38d57d25c6193 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Sat, 28 Aug 2010 00:29:02 -0700 Subject: llvmpipe: Remove unnecessary header. --- src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c b/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c index b477bc2113..73fb70599c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c @@ -35,7 +35,6 @@ #include "lp_setup_context.h" #include "lp_setup_coef.h" #include "lp_rast.h" -#include "lp_state_fs.h" #if defined(PIPE_ARCH_SSE) #include -- cgit v1.2.3 From cf1e4b15a4d21342320fed0fdb19ba6211e4628b Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Sat, 28 Aug 2010 14:14:33 -0700 Subject: llvmpipe: Include missing header in lp_flush.c. Include p_screen.h for complete type to pipe_screen. --- src/gallium/drivers/llvmpipe/lp_flush.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index 040fe0ba9a..e2c723b7a8 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -31,6 +31,7 @@ #include "pipe/p_defines.h" +#include "pipe/p_screen.h" #include "util/u_string.h" #include "draw/draw_context.h" #include "lp_flush.h" -- cgit v1.2.3 From d8c92a1eea555f8b9d673a3f2a708de5faf8b3cd Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 15 Aug 2010 18:19:52 +0100 Subject: llvmpipe: intrinsics versions of build_mask functions --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 78 +++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 8d729c7481..5b3ad6e0a7 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -67,7 +67,7 @@ block_full_16(struct lp_rasterizer_task *task, block_full_4(task, tri, x + ix, y + iy); } - +#if !defined(PIPE_ARCH_SSE) static INLINE unsigned build_mask(int c, int dcdx, int dcdy) { @@ -98,6 +98,7 @@ build_mask(int c, int dcdx, int dcdy) return mask; } + static INLINE unsigned build_mask_linear(int c, int dcdx, int dcdy) { @@ -127,6 +128,81 @@ build_mask_linear(int c, int dcdx, int dcdy) return mask; } +#else +#include +#include "util/u_sse.h" + + +static INLINE unsigned +build_mask_linear(int c, int dcdx, int dcdy) +{ + __m128i cstep0 = _mm_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3); + __m128i xdcdy = _mm_set1_epi32(dcdy); + + /* Get values across the quad + */ + __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy); + __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy); + __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy); + + /* pack pairs of results into epi16 + */ + __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1); + __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3); + + /* pack into epi8, preserving sign bits + */ + __m128i result = _mm_packs_epi16(cstep01, cstep23); + + /* extract sign bits to create mask + */ + return _mm_movemask_epi8(result); +} + +static INLINE unsigned +build_mask(int c, int dcdx, int dcdy) +{ + __m128i step = _mm_setr_epi32(0, dcdx, dcdy, dcdx + dcdy); + __m128i c0 = _mm_set1_epi32(c); + + /* Get values across the quad + */ + __m128i cstep0 = _mm_add_epi32(c0, step); + + /* Scale up step for moving between quads. This should probably + * be an arithmetic shift left, but there doesn't seem to be + * such a thing in SSE. It's unlikely that the step value is + * going to be large enough to overflow across 4 pixels, though + * if it is that big, rendering will be incorrect anyway. + */ + __m128i step4 = _mm_slli_epi32(step, 1); + + /* Get values for the remaining quads: + */ + __m128i cstep1 = _mm_add_epi32(cstep0, + _mm_shuffle_epi32(step4, _MM_SHUFFLE(1,1,1,1))); + __m128i cstep2 = _mm_add_epi32(cstep0, + _mm_shuffle_epi32(step4, _MM_SHUFFLE(2,2,2,2))); + __m128i cstep3 = _mm_add_epi32(cstep2, + _mm_shuffle_epi32(step4, _MM_SHUFFLE(1,1,1,1))); + + /* pack pairs of results into epi16 + */ + __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1); + __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3); + + /* pack into epi8, preserving sign bits + */ + __m128i result = _mm_packs_epi16(cstep01, cstep23); + + /* extract sign bits to create mask + */ + return _mm_movemask_epi8(result); +} + +#endif + + #define TAG(x) x##_1 -- cgit v1.2.3 From 0aa3a09ced07e150901cd0f7a7917556a018c252 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 22 Aug 2010 22:56:54 +0100 Subject: llvmpipe: combine linear mask calculation --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 73 ++++++++++++++++++++++++-- src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 26 +++++---- 2 files changed, 84 insertions(+), 15 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 5b3ad6e0a7..bdb8d131cc 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -128,11 +128,71 @@ build_mask_linear(int c, int dcdx, int dcdy) return mask; } + + +static INLINE void +build_masks(int c, + int cdiff, + int dcdx, + int dcdy, + unsigned *outmask, + unsigned *partmask) +{ + *outmask |= build_mask_linear(c, dcdx, dcdy); + *partmask |= build_mask_linear(c + cdiff, dcdx, dcdy); +} + #else #include #include "util/u_sse.h" +static INLINE void +build_masks(int c, + int cdiff, + int dcdx, + int dcdy, + unsigned *outmask, + unsigned *partmask) +{ + __m128i cstep0 = _mm_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3); + __m128i xdcdy = _mm_set1_epi32(dcdy); + + /* Get values across the quad + */ + __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy); + __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy); + __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy); + + { + __m128i cstep01, cstep23, result; + + cstep01 = _mm_packs_epi32(cstep0, cstep1); + cstep23 = _mm_packs_epi32(cstep2, cstep3); + result = _mm_packs_epi16(cstep01, cstep23); + + *outmask |= _mm_movemask_epi8(result); + } + + + { + __m128i cio4 = _mm_set1_epi32(cdiff); + __m128i cstep01, cstep23, result; + + cstep0 = _mm_add_epi32(cstep0, cio4); + cstep1 = _mm_add_epi32(cstep1, cio4); + cstep2 = _mm_add_epi32(cstep2, cio4); + cstep3 = _mm_add_epi32(cstep3, cio4); + + cstep01 = _mm_packs_epi32(cstep0, cstep1); + cstep23 = _mm_packs_epi32(cstep2, cstep3); + result = _mm_packs_epi16(cstep01, cstep23); + + *partmask |= _mm_movemask_epi8(result); + } +} + + static INLINE unsigned build_mask_linear(int c, int dcdx, int dcdy) { @@ -263,11 +323,14 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task, { const int dcdx = -plane[j].dcdx * 4; const int dcdy = plane[j].dcdy * 4; - const int cox = c[j] + plane[j].eo * 4; - const int cio = c[j] + plane[j].ei * 4 - 1; - - outmask |= build_mask_linear(cox, dcdx, dcdy); - partmask |= build_mask_linear(cio, dcdx, dcdy); + const int cox = plane[j].eo * 4; + const int cio = plane[j].ei * 4 - 1; + + build_masks(c[j] + cox, + cio - cox, + dcdx, dcdy, + &outmask, /* sign bits from c[i][0..15] + cox */ + &partmask); /* sign bits from c[i][0..15] + cio */ } } diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index 0def5f7243..99a0bae45d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -81,11 +81,14 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, for (j = 0; j < NR_PLANES; j++) { const int dcdx = -plane[j].dcdx * 4; const int dcdy = plane[j].dcdy * 4; - const int cox = c[j] + plane[j].eo * 4; - const int cio = c[j] + plane[j].ei * 4 - 1; - - outmask |= build_mask_linear(cox, dcdx, dcdy); - partmask |= build_mask_linear(cio, dcdx, dcdy); + const int cox = plane[j].eo * 4; + const int cio = plane[j].ei * 4 - 1; + + build_masks(c[j] + cox, + cio - cox, + dcdx, dcdy, + &outmask, /* sign bits from c[i][0..15] + cox */ + &partmask); /* sign bits from c[i][0..15] + cio */ } if (outmask == 0xffff) @@ -171,11 +174,14 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, { const int dcdx = -plane[j].dcdx * 16; const int dcdy = plane[j].dcdy * 16; - const int cox = c[j] + plane[j].eo * 16; - const int cio = c[j] + plane[j].ei * 16 - 1; - - outmask |= build_mask_linear(cox, dcdx, dcdy); - partmask |= build_mask_linear(cio, dcdx, dcdy); + const int cox = plane[j].eo * 16; + const int cio = plane[j].ei * 16 - 1; + + build_masks(c[j] + cox, + cio - cox, + dcdx, dcdy, + &outmask, /* sign bits from c[i][0..15] + cox */ + &partmask); /* sign bits from c[i][0..15] + cio */ } j++; -- cgit v1.2.3 From e38d2f716381385e2aad219a3d125065ec0a01bd Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 24 Aug 2010 23:05:57 +0100 Subject: llvmpipe: slightly simplify build_mask --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers/llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index bdb8d131cc..dbaa8e023a 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -229,13 +229,9 @@ build_mask(int c, int dcdx, int dcdy) */ __m128i cstep0 = _mm_add_epi32(c0, step); - /* Scale up step for moving between quads. This should probably - * be an arithmetic shift left, but there doesn't seem to be - * such a thing in SSE. It's unlikely that the step value is - * going to be large enough to overflow across 4 pixels, though - * if it is that big, rendering will be incorrect anyway. + /* Scale up step for moving between quads. */ - __m128i step4 = _mm_slli_epi32(step, 1); + __m128i step4 = _mm_add_epi32(step, step); /* Get values for the remaining quads: */ -- cgit v1.2.3