From c4046d4fda2fe838659bff99bfa17f57f895a943 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 7 Sep 2010 23:13:31 +0100 Subject: llvmpipe: introduce tri_3_4 for tiny triangles --- src/gallium/drivers/llvmpipe/lp_rast.c | 1 + src/gallium/drivers/llvmpipe/lp_rast.h | 16 +-- src/gallium/drivers/llvmpipe/lp_rast_debug.c | 1 + src/gallium/drivers/llvmpipe/lp_rast_priv.h | 4 +- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 139 +++++++++++++++++++-------- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 12 +++ 6 files changed, 127 insertions(+), 46 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 36068d74be..d7e6415e13 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -579,6 +579,7 @@ static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] = lp_rast_triangle_6, lp_rast_triangle_7, lp_rast_triangle_8, + lp_rast_triangle_3_4, lp_rast_triangle_3_16, lp_rast_shade_tile, lp_rast_shade_tile_opaque, diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 3c8dae6b01..5767667935 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -236,13 +236,14 @@ lp_rast_arg_null( void ) #define LP_RAST_OP_TRIANGLE_6 0x7 #define LP_RAST_OP_TRIANGLE_7 0x8 #define LP_RAST_OP_TRIANGLE_8 0x9 -#define LP_RAST_OP_TRIANGLE_3_16 0xa -#define LP_RAST_OP_SHADE_TILE 0xb -#define LP_RAST_OP_SHADE_TILE_OPAQUE 0xc -#define LP_RAST_OP_BEGIN_QUERY 0xd -#define LP_RAST_OP_END_QUERY 0xe - -#define LP_RAST_OP_MAX 0xf +#define LP_RAST_OP_TRIANGLE_3_4 0xa +#define LP_RAST_OP_TRIANGLE_3_16 0xb +#define LP_RAST_OP_SHADE_TILE 0xc +#define LP_RAST_OP_SHADE_TILE_OPAQUE 0xd +#define LP_RAST_OP_BEGIN_QUERY 0xe +#define LP_RAST_OP_END_QUERY 0xf + +#define LP_RAST_OP_MAX 0x10 #define LP_RAST_OP_MASK 0xff void @@ -252,4 +253,5 @@ lp_debug_draw_bins_by_cmd_length( struct lp_scene *scene ); void lp_debug_draw_bins_by_coverage( struct lp_scene *scene ); + #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_debug.c b/src/gallium/drivers/llvmpipe/lp_rast_debug.c index f2ef21f2a9..9fc78645a3 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_debug.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_debug.c @@ -40,6 +40,7 @@ static const char *cmd_names[LP_RAST_OP_MAX] = "triangle_6", "triangle_7", "triangle_8", + "triangle_3_4", "triangle_3_16", "shade_tile", "shade_tile_opaque", diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 3bcdfd6074..7370119e96 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -288,9 +288,11 @@ void lp_rast_triangle_7( struct lp_rasterizer_task *, void lp_rast_triangle_8( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); +void lp_rast_triangle_3_4(struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + void lp_rast_triangle_3_16( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); - void lp_debug_bin( const struct cmd_bin *bin ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index ab2d766318..ee5cd71b74 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -112,6 +112,23 @@ build_masks(int c, *partmask |= build_mask_linear(c + cdiff, dcdx, dcdy); } +void +lp_rast_triangle_3_16(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + union lp_rast_cmd_arg arg2; + arg2.tri = arg.tri; + arg2.mask = (1<<3)-1; + lp_rast_triangle_3(task, arg2); +} + +void +lp_rast_triangle_3_4(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + lp_rast_triangle_3_16(task, arg); +} + #else #include #include "util/u_sse.h" @@ -189,44 +206,6 @@ build_mask_linear(int c, int dcdx, int dcdy) return _mm_movemask_epi8(result); } - -#endif - - - - -#define TAG(x) x##_1 -#define NR_PLANES 1 -#include "lp_rast_tri_tmp.h" - -#define TAG(x) x##_2 -#define NR_PLANES 2 -#include "lp_rast_tri_tmp.h" - -#define TAG(x) x##_3 -#define NR_PLANES 3 -#include "lp_rast_tri_tmp.h" - -#define TAG(x) x##_4 -#define NR_PLANES 4 -#include "lp_rast_tri_tmp.h" - -#define TAG(x) x##_5 -#define NR_PLANES 5 -#include "lp_rast_tri_tmp.h" - -#define TAG(x) x##_6 -#define NR_PLANES 6 -#include "lp_rast_tri_tmp.h" - -#define TAG(x) x##_7 -#define NR_PLANES 7 -#include "lp_rast_tri_tmp.h" - -#define TAG(x) x##_8 -#define NR_PLANES 8 -#include "lp_rast_tri_tmp.h" - static INLINE unsigned sign_bits4(const __m128i *cstep, int cdiff) { @@ -342,3 +321,87 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task, block_full_4(task, tri, px, py); } } + + +void +lp_rast_triangle_3_4(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + const struct lp_rast_triangle *tri = arg.triangle.tri; + const struct lp_rast_plane *plane = tri->plane; + unsigned mask = arg.triangle.plane_mask; + const int x = task->x + (mask & 0xff); + const int y = task->y + (mask >> 8); + unsigned j; + + /* Iterate over partials: + */ + { + unsigned mask = 0xffff; + + for (j = 0; j < 3; j++) { + const int cx = (plane[j].c + - plane[j].dcdx * x + + plane[j].dcdy * y); + + const int dcdx = -plane[j].dcdx; + const int dcdy = plane[j].dcdy; + __m128i xdcdy = _mm_set1_epi32(dcdy); + + __m128i cstep0 = _mm_setr_epi32(cx, cx + dcdx, cx + dcdx*2, cx + dcdx*3); + __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy); + __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy); + __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy); + + __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1); + __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3); + __m128i result = _mm_packs_epi16(cstep01, cstep23); + + /* Extract the sign bits + */ + mask &= ~_mm_movemask_epi8(result); + } + + if (mask) + lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask); + } +} + + +#endif + + + + +#define TAG(x) x##_1 +#define NR_PLANES 1 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_2 +#define NR_PLANES 2 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_3 +#define NR_PLANES 3 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_4 +#define NR_PLANES 4 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_5 +#define NR_PLANES 5 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_6 +#define NR_PLANES 6 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_7 +#define NR_PLANES 7 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_8 +#define NR_PLANES 8 +#include "lp_rast_tri_tmp.h" + diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index d4ef8f4c9c..2cd10ed420 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -487,6 +487,18 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, (bbox->y1 - (bbox->y0 & ~3))); if (nr_planes == 3) { + if (sz < 4 && dx < 64) + { + /* Triangle is contained in a single 4x4 stamp: + */ + int mask = (bbox->x0 & 63 & ~3) | ((bbox->y0 & 63 & ~3) << 8); + + return lp_scene_bin_command( scene, + bbox->x0/64, bbox->y0/64, + LP_RAST_OP_TRIANGLE_3_4, + lp_rast_arg_triangle(tri, mask) ); + } + if (sz < 16 && dx < 64) { int mask = (bbox->x0 & 63 & ~3) | ((bbox->y0 & 63 & ~3) << 8); -- cgit v1.2.3