diff options
Diffstat (limited to 'src/gallium/drivers/llvmpipe')
35 files changed, 1959 insertions, 1225 deletions
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index dec874623e..55b877b4ab 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -22,6 +22,7 @@ C_SOURCES = \ lp_perf.c \ lp_query.c \ lp_rast.c \ + lp_rast_debug.c \ lp_rast_tri.c \ lp_scene.c \ lp_scene_queue.c \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 8d57db72cf..650435f0f1 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -55,6 +55,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_perf.c', 'lp_query.c', 'lp_rast.c', + 'lp_rast_debug.c', 'lp_rast_tri.c', 'lp_scene.c', 'lp_scene_queue.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c index 8514030cde..e28efe778f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c @@ -44,21 +44,20 @@ void lp_build_alpha_test(LLVMBuilderRef builder, - const struct pipe_alpha_state *state, + unsigned func, struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef alpha, LLVMValueRef ref) { struct lp_build_context bld; + LLVMValueRef test; lp_build_context_init(&bld, builder, type); - if(state->enabled) { - LLVMValueRef test = lp_build_cmp(&bld, state->func, alpha, ref); + test = lp_build_cmp(&bld, func, alpha, ref); - lp_build_name(test, "alpha_mask"); + lp_build_name(test, "alpha_mask"); - lp_build_mask_update(mask, test); - } + lp_build_mask_update(mask, test); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h index 0f99fec65e..44603b418c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h @@ -44,7 +44,7 @@ struct lp_build_mask_context; void lp_build_alpha_test(LLVMBuilderRef builder, - const struct pipe_alpha_state *state, + unsigned func, struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef alpha, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c index 09e9833057..b5924cbb7d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -197,7 +197,7 @@ lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld, swizzled_rgb = rgb; break; case LP_BUILD_BLEND_SWIZZLE_AAAA: - swizzled_rgb = lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle); + swizzled_rgb = lp_build_swizzle_scalar_aos(&bld->base, rgb, alpha_swizzle); break; default: assert(0); @@ -205,9 +205,8 @@ lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld, } if (rgb != alpha) { - boolean cond[4] = {0, 0, 0, 0}; - cond[alpha_swizzle] = 1; - swizzled_rgb = lp_build_select_aos(&bld->base, alpha, swizzled_rgb, cond); + swizzled_rgb = lp_build_select_aos(&bld->base, 1 << alpha_swizzle, + alpha, swizzled_rgb); } return swizzled_rgb; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index 2cf6f38c4b..2a374f8c39 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -75,6 +75,33 @@ */ +/** + * Do one perspective divide per quad. + * + * For perspective interpolation, the final attribute value is given + * + * a' = a/w = a * oow + * + * where + * + * a = a0 + dadx*x + dady*y + * w = w0 + dwdx*x + dwdy*y + * oow = 1/w = 1/(w0 + dwdx*x + dwdy*y) + * + * Instead of computing the division per pixel, with this macro we compute the + * division on the upper left pixel of each quad, and use a linear + * approximation in the remaining pixels, given by: + * + * da'dx = (dadx - dwdx*a)*oow + * da'dy = (dady - dwdy*a)*oow + * + * Ironically, this actually makes things slower -- probably because the + * divide hardware unit is rarely used, whereas the multiply unit is typically + * already saturated. + */ +#define PERSPECTIVE_DIVIDE_PER_QUAD 0 + + static const unsigned char quad_offset_x[4] = {0, 1, 0, 1}; static const unsigned char quad_offset_y[4] = {0, 0, 1, 1}; @@ -107,7 +134,6 @@ coeffs_init(struct lp_build_interp_soa_context *bld, LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0); LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0); LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0); - LLVMValueRef oow = NULL; unsigned attrib; unsigned chan; @@ -213,22 +239,22 @@ coeffs_init(struct lp_build_interp_soa_context *bld, a = LLVMBuildFAdd(builder, a, dadq2, ""); +#if PERSPECTIVE_DIVIDE_PER_QUAD /* - * a *= 1 / w - * dadq *= 1 / w + * a *= 1 / w */ if (interp == LP_INTERP_PERSPECTIVE) { LLVMValueRef w = bld->a[0][3]; assert(attrib != 0); assert(bld->mask[0] & TGSI_WRITEMASK_W); - if (!oow) { - oow = lp_build_rcp(coeff_bld, w); - lp_build_name(oow, "oow"); + if (!bld->oow) { + bld->oow = lp_build_rcp(coeff_bld, w); + lp_build_name(bld->oow, "oow"); } - a = lp_build_mul(coeff_bld, a, oow); - dadq = lp_build_mul(coeff_bld, dadq, oow); + a = lp_build_mul(coeff_bld, a, bld->oow); } +#endif attrib_name(a, attrib, chan, ".a"); attrib_name(dadq, attrib, chan, ".dadq"); @@ -250,6 +276,7 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) { struct lp_build_context *coeff_bld = &bld->coeff_bld; LLVMValueRef shuffle = lp_build_const_int_vec(coeff_bld->type, quad_index); + LLVMValueRef oow = NULL; unsigned attrib; unsigned chan; @@ -270,6 +297,8 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) a = bld->attribs[0][chan]; } else { + LLVMValueRef dadq; + a = bld->a[attrib][chan]; /* @@ -280,10 +309,46 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) a, coeff_bld->undef, shuffle, ""); /* + * Get the derivatives. + */ + + dadq = bld->dadq[attrib][chan]; + +#if PERSPECTIVE_DIVIDE_PER_QUAD + if (interp == LP_INTERP_PERSPECTIVE) { + LLVMValueRef dwdq = bld->dadq[0][3]; + + if (oow == NULL) { + assert(bld->oow); + oow = LLVMBuildShuffleVector(coeff_bld->builder, + bld->oow, coeff_bld->undef, + shuffle, ""); + } + + dadq = lp_build_sub(coeff_bld, + dadq, + lp_build_mul(coeff_bld, a, dwdq)); + dadq = lp_build_mul(coeff_bld, dadq, oow); + } +#endif + + /* * Add the derivatives */ - a = lp_build_add(coeff_bld, a, bld->dadq[attrib][chan]); + a = lp_build_add(coeff_bld, a, dadq); + +#if !PERSPECTIVE_DIVIDE_PER_QUAD + if (interp == LP_INTERP_PERSPECTIVE) { + if (oow == NULL) { + LLVMValueRef w = bld->attribs[0][3]; + assert(attrib != 0); + assert(bld->mask[0] & TGSI_WRITEMASK_W); + oow = lp_build_rcp(coeff_bld, w); + } + a = lp_build_mul(coeff_bld, a, oow); + } +#endif attrib_name(a, attrib, chan, ""); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h index 2905513301..3054030f73 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -64,6 +64,8 @@ struct lp_build_interp_soa_context LLVMValueRef a [1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; LLVMValueRef dadq[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + LLVMValueRef oow; + LLVMValueRef attribs[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; /* diff --git a/src/gallium/drivers/llvmpipe/lp_debug.h b/src/gallium/drivers/llvmpipe/lp_debug.h index a928ee38be..add43e4fca 100644 --- a/src/gallium/drivers/llvmpipe/lp_debug.h +++ b/src/gallium/drivers/llvmpipe/lp_debug.h @@ -48,6 +48,7 @@ st_print_current(void); #define DEBUG_COUNTERS 0x800 #define DEBUG_SCENE 0x1000 #define DEBUG_FENCE 0x2000 +#define DEBUG_MEM 0x4000 #ifdef DEBUG diff --git a/src/gallium/drivers/llvmpipe/lp_perf.h b/src/gallium/drivers/llvmpipe/lp_perf.h index c28652fc30..b23a100b87 100644 --- a/src/gallium/drivers/llvmpipe/lp_perf.h +++ b/src/gallium/drivers/llvmpipe/lp_perf.h @@ -74,7 +74,7 @@ extern struct lp_counters lp_count; #define LP_COUNT_GET(counter) (lp_count.counter) #else #define LP_COUNT(counter) -#define LP_COUNT_ADD(counter, incr) (void) incr +#define LP_COUNT_ADD(counter, incr) (void)(incr) #define LP_COUNT_GET(counter) 0 #endif diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c index 67fd797af2..ff0e207a54 100644 --- a/src/gallium/drivers/llvmpipe/lp_query.c +++ b/src/gallium/drivers/llvmpipe/lp_query.c @@ -54,9 +54,6 @@ llvmpipe_create_query(struct pipe_context *pipe, assert(type == PIPE_QUERY_OCCLUSION_COUNTER); pq = CALLOC_STRUCT( llvmpipe_query ); - if (pq) { - pipe_mutex_init(pq->mutex); - } return (struct pipe_query *) pq; } @@ -66,12 +63,20 @@ static void llvmpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q) { struct llvmpipe_query *pq = llvmpipe_query(q); - /* query might still be in process if we never waited for the result */ - if (!pq->done) { - llvmpipe_finish(pipe, __FUNCTION__); + + /* Ideally we would refcount queries & not get destroyed until the + * last scene had finished with us. + */ + if (pq->fence) { + if (!lp_fence_issued(pq->fence)) + llvmpipe_flush(pipe, 0, NULL, __FUNCTION__); + + if (!lp_fence_signalled(pq->fence)) + lp_fence_wait(pq->fence); + + lp_fence_reference(&pq->fence, NULL); } - pipe_mutex_destroy(pq->mutex); FREE(pq); } @@ -84,22 +89,31 @@ llvmpipe_get_query_result(struct pipe_context *pipe, { struct llvmpipe_query *pq = llvmpipe_query(q); uint64_t *result = (uint64_t *)vresult; + int i; + + if (!pq->fence) { + assert(0); /* query not in issued state */ + return FALSE; + } - if (!pq->done) { - if (wait) { - llvmpipe_finish(pipe, __FUNCTION__); - } - /* this is a bit inconsequent but should be ok */ - else { + if (!lp_fence_signalled(pq->fence)) { + if (!lp_fence_issued(pq->fence)) llvmpipe_flush(pipe, 0, NULL, __FUNCTION__); - } + + if (!wait) + return FALSE; + + lp_fence_wait(pq->fence); } - if (pq->done) { - *result = pq->result; + /* Sum the results from each of the threads: + */ + *result = 0; + for (i = 0; i < LP_MAX_THREADS; i++) { + *result += pq->count[i]; } - return pq->done; + return TRUE; } @@ -113,10 +127,12 @@ llvmpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q) * flush the scene now. Real apps shouldn't re-use a query in a * frame of rendering. */ - if (pq->binned) { + if (pq->fence && !lp_fence_issued(pq->fence)) { llvmpipe_finish(pipe, __FUNCTION__); } + + memset(pq->count, 0, sizeof(pq->count)); lp_setup_begin_query(llvmpipe->setup, pq); llvmpipe->active_query_count++; diff --git a/src/gallium/drivers/llvmpipe/lp_query.h b/src/gallium/drivers/llvmpipe/lp_query.h index 721c41cb5c..e93842a2fd 100644 --- a/src/gallium/drivers/llvmpipe/lp_query.h +++ b/src/gallium/drivers/llvmpipe/lp_query.h @@ -43,13 +43,7 @@ struct llvmpipe_context; struct llvmpipe_query { uint64_t count[LP_MAX_THREADS]; /**< a counter for each thread */ - uint64_t result; /**< total of all counters */ - - pipe_mutex mutex; - unsigned num_tiles, tile_count; - - boolean done; - boolean binned; /**< has this query been binned in the scene? */ + struct lp_fence *fence; /* fence from last scene this was binned in */ }; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index b1c306bbe9..d7e6415e13 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -30,6 +30,7 @@ #include "util/u_math.h" #include "util/u_rect.h" #include "util/u_surface.h" +#include "util/u_pack_color.h" #include "lp_scene_queue.h" #include "lp_debug.h" @@ -57,39 +58,12 @@ static void lp_rast_begin( struct lp_rasterizer *rast, struct lp_scene *scene ) { - const struct pipe_framebuffer_state *fb = &scene->fb; - int i; rast->curr_scene = scene; LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); - rast->state.nr_cbufs = scene->fb.nr_cbufs; - - for (i = 0; i < rast->state.nr_cbufs; i++) { - struct pipe_surface *cbuf = scene->fb.cbufs[i]; - llvmpipe_resource_map(cbuf->texture, - cbuf->face, - cbuf->level, - cbuf->zslice, - LP_TEX_USAGE_READ_WRITE, - LP_TEX_LAYOUT_LINEAR); - } - - if (fb->zsbuf) { - struct pipe_surface *zsbuf = scene->fb.zsbuf; - rast->zsbuf.stride = llvmpipe_resource_stride(zsbuf->texture, zsbuf->level); - rast->zsbuf.blocksize = - util_format_get_blocksize(zsbuf->texture->format); - - rast->zsbuf.map = llvmpipe_resource_map(zsbuf->texture, - zsbuf->face, - zsbuf->level, - zsbuf->zslice, - LP_TEX_USAGE_READ_WRITE, - LP_TEX_LAYOUT_NONE); - } - + lp_scene_begin_rasterization( scene ); lp_scene_bin_iter_begin( scene ); } @@ -97,29 +71,7 @@ lp_rast_begin( struct lp_rasterizer *rast, static void lp_rast_end( struct lp_rasterizer *rast ) { - struct lp_scene *scene = rast->curr_scene; - unsigned i; - - /* Unmap color buffers */ - for (i = 0; i < rast->state.nr_cbufs; i++) { - struct pipe_surface *cbuf = scene->fb.cbufs[i]; - llvmpipe_resource_unmap(cbuf->texture, - cbuf->face, - cbuf->level, - cbuf->zslice); - } - - /* Unmap z/stencil buffer */ - if (rast->zsbuf.map) { - struct pipe_surface *zsbuf = scene->fb.zsbuf; - llvmpipe_resource_unmap(zsbuf->texture, - zsbuf->face, - zsbuf->level, - zsbuf->zslice); - rast->zsbuf.map = NULL; - } - - lp_scene_reset( rast->curr_scene ); + lp_scene_end_rasterization( rast->curr_scene ); rast->curr_scene = NULL; @@ -138,26 +90,23 @@ lp_rast_end( struct lp_rasterizer *rast ) */ static void lp_rast_tile_begin(struct lp_rasterizer_task *task, - unsigned x, unsigned y) + const struct cmd_bin *bin) { - struct lp_rasterizer *rast = task->rast; - struct lp_scene *scene = rast->curr_scene; + const struct lp_scene *scene = task->scene; enum lp_texture_usage usage; - LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); - - assert(x % TILE_SIZE == 0); - assert(y % TILE_SIZE == 0); + LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, bin->x, bin->y); - task->x = x; - task->y = y; + task->bin = bin; + task->x = bin->x * TILE_SIZE; + task->y = bin->y * TILE_SIZE; /* reset pointers to color tile(s) */ memset(task->color_tiles, 0, sizeof(task->color_tiles)); /* get pointer to depth/stencil tile */ { - struct pipe_surface *zsbuf = rast->curr_scene->fb.zsbuf; + struct pipe_surface *zsbuf = task->scene->fb.zsbuf; if (zsbuf) { struct llvmpipe_resource *lpt = llvmpipe_resource(zsbuf->texture); @@ -173,11 +122,14 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task, zsbuf->face + zsbuf->zslice, zsbuf->level, usage, - x, y); + task->x, + task->y); /* Get actual pointer to the tile data. Note that depth/stencil * data is tiled differently than color data. */ - task->depth_tile = lp_rast_get_depth_block_pointer(task, x, y); + task->depth_tile = lp_rast_get_depth_block_pointer(task, + task->x, + task->y); assert(task->depth_tile); } @@ -192,11 +144,11 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task, * Clear the rasterizer's current color tile. * This is a bin command called during bin processing. */ -void +static void lp_rast_clear_color(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { - struct lp_rasterizer *rast = task->rast; + const struct lp_scene *scene = task->scene; const uint8_t *clear_color = arg.clear_color; unsigned i; @@ -211,7 +163,7 @@ lp_rast_clear_color(struct lp_rasterizer_task *task, clear_color[1] == clear_color[2] && clear_color[2] == clear_color[3]) { /* clear to grayscale value {x, x, x, x} */ - for (i = 0; i < rast->state.nr_cbufs; i++) { + for (i = 0; i < scene->fb.nr_cbufs; i++) { uint8_t *ptr = lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL); memset(ptr, clear_color[0], TILE_SIZE * TILE_SIZE * 4); @@ -224,7 +176,7 @@ lp_rast_clear_color(struct lp_rasterizer_task *task, * works. */ const unsigned chunk = TILE_SIZE / 4; - for (i = 0; i < rast->state.nr_cbufs; i++) { + for (i = 0; i < scene->fb.nr_cbufs; i++) { uint8_t *c = lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL); unsigned j; @@ -246,22 +198,25 @@ lp_rast_clear_color(struct lp_rasterizer_task *task, } + + + + /** * Clear the rasterizer's current z/stencil tile. * This is a bin command called during bin processing. */ -void +static void lp_rast_clear_zstencil(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { - struct lp_rasterizer *rast = task->rast; - const struct lp_rast_clearzs *clearzs = arg.clear_zstencil; - unsigned clear_value = clearzs->clearzs_value; - unsigned clear_mask = clearzs->clearzs_mask; + const struct lp_scene *scene = task->scene; + unsigned clear_value = arg.clear_zstencil.value; + unsigned clear_mask = arg.clear_zstencil.mask; const unsigned height = TILE_SIZE / TILE_VECTOR_HEIGHT; const unsigned width = TILE_SIZE * TILE_VECTOR_HEIGHT; - const unsigned block_size = rast->zsbuf.blocksize; - const unsigned dst_stride = rast->zsbuf.stride * TILE_VECTOR_HEIGHT; + const unsigned block_size = scene->zsbuf.blocksize; + const unsigned dst_stride = scene->zsbuf.stride * TILE_VECTOR_HEIGHT; uint8_t *dst; unsigned i, j; @@ -327,15 +282,13 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, * threading/parallelism. * This is a bin command which is stored in all bins. */ -void -lp_rast_store_linear_color( struct lp_rasterizer_task *task, - const union lp_rast_cmd_arg arg) +static void +lp_rast_store_linear_color( struct lp_rasterizer_task *task ) { - struct lp_rasterizer *rast = task->rast; - struct lp_scene *scene = rast->curr_scene; + const struct lp_scene *scene = task->scene; unsigned buf; - for (buf = 0; buf < rast->state.nr_cbufs; buf++) { + for (buf = 0; buf < scene->fb.nr_cbufs; buf++) { struct pipe_surface *cbuf = scene->fb.cbufs[buf]; const unsigned face_slice = cbuf->face + cbuf->zslice; const unsigned level = cbuf->level; @@ -359,17 +312,22 @@ lp_rast_store_linear_color( struct lp_rasterizer_task *task, * completely contained inside a triangle. * This is a bin command called during bin processing. */ -void +static void lp_rast_shade_tile(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { - struct lp_rasterizer *rast = task->rast; + const struct lp_scene *scene = task->scene; const struct lp_rast_shader_inputs *inputs = arg.shade_tile; const struct lp_rast_state *state = inputs->state; struct lp_fragment_shader_variant *variant = state->variant; const unsigned tile_x = task->x, tile_y = task->y; unsigned x, y; + if (inputs->disable) { + /* This command was partially binned and has been disabled */ + return; + } + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); /* render the whole 64x64 tile in 4x4 chunks */ @@ -380,7 +338,7 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, unsigned i; /* color buffer */ - for (i = 0; i < rast->state.nr_cbufs; i++) + for (i = 0; i < scene->fb.nr_cbufs; i++) color[i] = lp_rast_get_color_block_pointer(task, i, tile_x + x, tile_y + y); @@ -410,17 +368,17 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, * completely contained inside a triangle, and the shader is opaque. * This is a bin command called during bin processing. */ -void +static void lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { - struct lp_rasterizer *rast = task->rast; + const struct lp_scene *scene = task->scene; unsigned i; LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); /* this will prevent converting the layout from tiled to linear */ - for (i = 0; i < rast->state.nr_cbufs; i++) { + for (i = 0; i < scene->fb.nr_cbufs; i++) { (void)lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL); } @@ -442,7 +400,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, { const struct lp_rast_state *state = inputs->state; struct lp_fragment_shader_variant *variant = state->variant; - struct lp_rasterizer *rast = task->rast; + const struct lp_scene *scene = task->scene; uint8_t *color[PIPE_MAX_COLOR_BUFS]; void *depth; unsigned i; @@ -457,7 +415,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, assert((y % 4) == 0); /* color buffer */ - for (i = 0; i < rast->state.nr_cbufs; i++) { + for (i = 0; i < scene->fb.nr_cbufs; i++) { color[i] = lp_rast_get_color_block_pointer(task, i, x, y); assert(lp_check_alignment(color[i], 16)); } @@ -486,6 +444,38 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, /** + * Begin a new occlusion query. + * This is a bin command put in all bins. + * Called per thread. + */ +static void +lp_rast_begin_query(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + struct llvmpipe_query *pq = arg.query_obj; + + assert(task->query == NULL); + task->vis_counter = 0; + task->query = pq; +} + + +/** + * End the current occlusion query. + * This is a bin command put in all bins. + * Called per thread. + */ +static void +lp_rast_end_query(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + task->query->count[task->thread_index] += task->vis_counter; + task->query = NULL; +} + + + +/** * Set top row and left column of the tile's pixels to white. For debugging. */ static void @@ -546,10 +536,10 @@ lp_rast_tile_end(struct lp_rasterizer_task *task) { #ifdef DEBUG if (LP_DEBUG & (DEBUG_SHOW_SUBTILES | DEBUG_SHOW_TILES)) { - struct lp_rasterizer *rast = task->rast; + const struct lp_scene *scene = task->scene; unsigned buf; - for (buf = 0; buf < rast->state.nr_cbufs; buf++) { + for (buf = 0; buf < scene->fb.nr_cbufs; buf++) { uint8_t *color = lp_rast_get_color_block_pointer(task, buf, task->x, task->y); @@ -563,83 +553,56 @@ lp_rast_tile_end(struct lp_rasterizer_task *task) (void) outline_subtiles; #endif - { + lp_rast_store_linear_color(task); + + if (task->query) { union lp_rast_cmd_arg dummy = {0}; - lp_rast_store_linear_color(task, dummy); + lp_rast_end_query(task, dummy); } /* debug */ memset(task->color_tiles, 0, sizeof(task->color_tiles)); task->depth_tile = NULL; -} - - - -/** - * Signal on a fence. This is called during bin execution/rasterization. - * Called per thread. - */ -void -lp_rast_fence(struct lp_rasterizer_task *task, - const union lp_rast_cmd_arg arg) -{ - struct lp_fence *fence = arg.fence; - lp_fence_signal(fence); -} - -/** - * Begin a new occlusion query. - * This is a bin command put in all bins. - * Called per thread. - */ -void -lp_rast_begin_query(struct lp_rasterizer_task *task, - const union lp_rast_cmd_arg arg) -{ - /* Reset the per-task counter */ - task->vis_counter = 0; + task->bin = NULL; } - -/** - * End the current occlusion query. - * This is a bin command put in all bins. - * Called per thread. - */ -void -lp_rast_end_query(struct lp_rasterizer_task *task, - const union lp_rast_cmd_arg arg) +static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] = { - struct llvmpipe_query *pq = arg.query_obj; - - pipe_mutex_lock(pq->mutex); - { - /* Accumulate the visible fragment counter from this tile in - * the query object. - */ - pq->count[task->thread_index] += task->vis_counter; + lp_rast_clear_color, + lp_rast_clear_zstencil, + lp_rast_triangle_1, + lp_rast_triangle_2, + lp_rast_triangle_3, + lp_rast_triangle_4, + lp_rast_triangle_5, + lp_rast_triangle_6, + lp_rast_triangle_7, + lp_rast_triangle_8, + lp_rast_triangle_3_4, + lp_rast_triangle_3_16, + lp_rast_shade_tile, + lp_rast_shade_tile_opaque, + lp_rast_begin_query, + lp_rast_end_query, +}; - /* check if this is the last tile in the scene */ - pq->tile_count++; - if (pq->tile_count == pq->num_tiles) { - uint i; - /* sum the per-thread counters for the query */ - pq->result = 0; - for (i = 0; i < LP_MAX_THREADS; i++) { - pq->result += pq->count[i]; - } +static void +do_rasterize_bin(struct lp_rasterizer_task *task, + const struct cmd_bin *bin) +{ + const struct cmd_block *block; + unsigned k; - /* reset counters (in case this query is re-used in the scene) */ - memset(pq->count, 0, sizeof(pq->count)); + if (0) + lp_debug_bin(bin); - pq->tile_count = 0; - pq->binned = FALSE; - pq->done = TRUE; + for (block = bin->head; block; block = block->next) { + for (k = 0; k < block->count; k++) { + dispatch[block->cmd[k]]( task, block->arg[k] ); } } - pipe_mutex_unlock(pq->mutex); } @@ -652,74 +615,26 @@ lp_rast_end_query(struct lp_rasterizer_task *task, */ static void rasterize_bin(struct lp_rasterizer_task *task, - const struct cmd_bin *bin, - int x, int y) + const struct cmd_bin *bin ) { - const struct cmd_block_list *commands = &bin->commands; - struct cmd_block *block; - unsigned k; + lp_rast_tile_begin( task, bin ); - lp_rast_tile_begin( task, x * TILE_SIZE, y * TILE_SIZE ); - - /* simply execute each of the commands in the block list */ - for (block = commands->head; block; block = block->next) { - for (k = 0; k < block->count; k++) { - block->cmd[k]( task, block->arg[k] ); - } - } + do_rasterize_bin(task, bin); lp_rast_tile_end(task); - /* Free data for this bin. - */ - lp_scene_bin_reset( task->rast->curr_scene, x, y); -} - -#define RAST(x) { lp_rast_##x, #x } - -static struct { - lp_rast_cmd cmd; - const char *name; -} cmd_names[] = -{ - RAST(clear_color), - RAST(clear_zstencil), - RAST(triangle_1), - RAST(triangle_2), - RAST(triangle_3), - RAST(triangle_4), - RAST(triangle_5), - RAST(triangle_6), - RAST(triangle_7), - RAST(shade_tile), - RAST(shade_tile_opaque), - RAST(store_linear_color), - RAST(fence), - RAST(begin_query), - RAST(end_query), -}; - -static void -debug_bin( const struct cmd_bin *bin ) -{ - const struct cmd_block *head = bin->commands.head; - int i, j; - - for (i = 0; i < head->count; i++) { - debug_printf("%d: ", i); - for (j = 0; j < Elements(cmd_names); j++) { - if (head->cmd[i] == cmd_names[j].cmd) { - debug_printf("%s\n", cmd_names[j].name); - break; - } - } - if (j == Elements(cmd_names)) - debug_printf("...other\n"); + /* Debug/Perf flags: + */ + if (bin->head->count == 1) { + if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE_OPAQUE) + LP_COUNT(nr_pure_shade_opaque_64); + else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE) + LP_COUNT(nr_pure_shade_64); } - } + /* An empty bin is one that just loads the contents of the tile and * stores them again unchanged. This typically happens when bins have * been flushed for some reason in the middle of a frame, or when @@ -730,12 +645,10 @@ debug_bin( const struct cmd_bin *bin ) static boolean is_empty_bin( const struct cmd_bin *bin ) { - if (0) debug_bin(bin); - return bin->commands.head->count == 0; + return bin->head == NULL; } - /** * Rasterize/execute all bins within a scene. * Called per thread. @@ -744,6 +657,7 @@ static void rasterize_scene(struct lp_rasterizer_task *task, struct lp_scene *scene) { + task->scene = scene; /* loop over scene bins, rasterize each */ #if 0 { @@ -758,19 +672,20 @@ rasterize_scene(struct lp_rasterizer_task *task, #else { struct cmd_bin *bin; - int x, y; assert(scene); - while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) { + while ((bin = lp_scene_bin_iter_next(scene))) { if (!is_empty_bin( bin )) - rasterize_bin(task, bin, x, y); + rasterize_bin(task, bin); } } #endif if (scene->fence) { - lp_rast_fence(task, lp_rast_arg_fence(scene->fence)); + lp_fence_signal(scene->fence); } + + task->scene = NULL; } @@ -790,8 +705,6 @@ lp_rast_queue_scene( struct lp_rasterizer *rast, rasterize_scene( &rast->tasks[0], scene ); - lp_scene_reset( scene ); - lp_rast_end( rast ); rast->curr_scene = NULL; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index b4564ef33b..5767667935 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -79,6 +79,8 @@ struct lp_rast_state { */ struct lp_rast_shader_inputs { float facing; /** Positive for front-facing, negative for back-facing */ + boolean disable:1; /** Partially binned, disable this command */ + boolean opaque:1; /** Is opaque */ float (*a0)[4]; float (*dadx)[4]; @@ -87,10 +89,6 @@ struct lp_rast_shader_inputs { const struct lp_rast_state *state; }; -struct lp_rast_clearzs { - unsigned clearzs_value; - unsigned clearzs_mask; -}; struct lp_rast_plane { /* one-pixel sized trivial accept offsets for each plane */ @@ -150,7 +148,10 @@ union lp_rast_cmd_arg { } triangle; const struct lp_rast_state *set_state; uint8_t clear_color[4]; - const struct lp_rast_clearzs *clear_zstencil; + struct { + unsigned value; + unsigned mask; + } clear_zstencil; struct lp_fence *fence; struct llvmpipe_query *query_obj; }; @@ -194,10 +195,20 @@ lp_rast_arg_fence( struct lp_fence *fence ) static INLINE union lp_rast_cmd_arg -lp_rast_arg_clearzs( const struct lp_rast_clearzs *clearzs ) +lp_rast_arg_clearzs( unsigned value, unsigned mask ) { union lp_rast_cmd_arg arg; - arg.clear_zstencil = clearzs; + arg.clear_zstencil.value = value; + arg.clear_zstencil.mask = mask; + return arg; +} + + +static INLINE union lp_rast_cmd_arg +lp_rast_arg_query( struct llvmpipe_query *pq ) +{ + union lp_rast_cmd_arg arg; + arg.query_obj = pq; return arg; } @@ -215,52 +226,32 @@ lp_rast_arg_null( void ) * These get put into bins by the setup code and are called when * the bins are executed. */ +#define LP_RAST_OP_CLEAR_COLOR 0x0 +#define LP_RAST_OP_CLEAR_ZSTENCIL 0x1 +#define LP_RAST_OP_TRIANGLE_1 0x2 +#define LP_RAST_OP_TRIANGLE_2 0x3 +#define LP_RAST_OP_TRIANGLE_3 0x4 +#define LP_RAST_OP_TRIANGLE_4 0x5 +#define LP_RAST_OP_TRIANGLE_5 0x6 +#define LP_RAST_OP_TRIANGLE_6 0x7 +#define LP_RAST_OP_TRIANGLE_7 0x8 +#define LP_RAST_OP_TRIANGLE_8 0x9 +#define LP_RAST_OP_TRIANGLE_3_4 0xa +#define LP_RAST_OP_TRIANGLE_3_16 0xb +#define LP_RAST_OP_SHADE_TILE 0xc +#define LP_RAST_OP_SHADE_TILE_OPAQUE 0xd +#define LP_RAST_OP_BEGIN_QUERY 0xe +#define LP_RAST_OP_END_QUERY 0xf + +#define LP_RAST_OP_MAX 0x10 +#define LP_RAST_OP_MASK 0xff -void lp_rast_clear_color( struct lp_rasterizer_task *, - const union lp_rast_cmd_arg ); - -void lp_rast_clear_zstencil( struct lp_rasterizer_task *, - const union lp_rast_cmd_arg ); - -void lp_rast_triangle_1( struct lp_rasterizer_task *, - const union lp_rast_cmd_arg ); -void lp_rast_triangle_2( struct lp_rasterizer_task *, - const union lp_rast_cmd_arg ); -void lp_rast_triangle_3( struct lp_rasterizer_task *, - const union lp_rast_cmd_arg ); -void lp_rast_triangle_4( struct lp_rasterizer_task *, - const union lp_rast_cmd_arg ); -void lp_rast_triangle_5( struct lp_rasterizer_task *, - const union lp_rast_cmd_arg ); -void lp_rast_triangle_6( struct lp_rasterizer_task *, - const union lp_rast_cmd_arg ); -void lp_rast_triangle_7( struct lp_rasterizer_task *, - const union lp_rast_cmd_arg ); -void lp_rast_triangle_8( struct lp_rasterizer_task *, - const union lp_rast_cmd_arg ); - -void lp_rast_shade_tile( struct lp_rasterizer_task *, - const union lp_rast_cmd_arg ); - -void lp_rast_shade_tile_opaque( struct lp_rasterizer_task *, - const union lp_rast_cmd_arg ); - -void lp_rast_fence( struct lp_rasterizer_task *, - const union lp_rast_cmd_arg ); - -void lp_rast_store_linear_color( struct lp_rasterizer_task *, - const union lp_rast_cmd_arg ); - - -void lp_rast_begin_query(struct lp_rasterizer_task *, - const union lp_rast_cmd_arg ); - -void lp_rast_end_query(struct lp_rasterizer_task *, - const union lp_rast_cmd_arg ); - void -lp_rast_triangle_3_16(struct lp_rasterizer_task *task, - const union lp_rast_cmd_arg arg); +lp_debug_bins( struct lp_scene *scene ); +void +lp_debug_draw_bins_by_cmd_length( struct lp_scene *scene ); +void +lp_debug_draw_bins_by_coverage( struct lp_scene *scene ); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_debug.c b/src/gallium/drivers/llvmpipe/lp_rast_debug.c new file mode 100644 index 0000000000..9fc78645a3 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_rast_debug.c @@ -0,0 +1,410 @@ +#include "util/u_math.h" +#include "lp_rast_priv.h" +#include "lp_state_fs.h" + +static INLINE int u_bit_scan(unsigned *mask) +{ + int i = ffs(*mask) - 1; + *mask &= ~(1 << i); + return i; +} + +struct tile { + int coverage; + int overdraw; + char data[TILE_SIZE][TILE_SIZE]; +}; + +static char get_label( int i ) +{ + static const char *cmd_labels = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + unsigned max_label = (2*26+10); + + if (i < max_label) + return cmd_labels[i]; + else + return '?'; +} + + + +static const char *cmd_names[LP_RAST_OP_MAX] = +{ + "clear_color", + "clear_zstencil", + "triangle_1", + "triangle_2", + "triangle_3", + "triangle_4", + "triangle_5", + "triangle_6", + "triangle_7", + "triangle_8", + "triangle_3_4", + "triangle_3_16", + "shade_tile", + "shade_tile_opaque", + "begin_query", + "end_query", +}; + +static const char *cmd_name(unsigned cmd) +{ + assert(Elements(cmd_names) > cmd); + return cmd_names[cmd]; +} + +static const struct lp_fragment_shader_variant * +get_variant( const struct cmd_block *block, + int k ) +{ + if (block->cmd[k] == LP_RAST_OP_SHADE_TILE || + block->cmd[k] == LP_RAST_OP_SHADE_TILE_OPAQUE) + return block->arg[k].shade_tile->state->variant; + + if (block->cmd[k] == LP_RAST_OP_TRIANGLE_1 || + block->cmd[k] == LP_RAST_OP_TRIANGLE_2 || + block->cmd[k] == LP_RAST_OP_TRIANGLE_3 || + block->cmd[k] == LP_RAST_OP_TRIANGLE_4 || + block->cmd[k] == LP_RAST_OP_TRIANGLE_5 || + block->cmd[k] == LP_RAST_OP_TRIANGLE_6 || + block->cmd[k] == LP_RAST_OP_TRIANGLE_7) + return block->arg[k].triangle.tri->inputs.state->variant; + + return NULL; +} + + +static boolean +is_blend( const struct cmd_block *block, + int k ) +{ + const struct lp_fragment_shader_variant *variant = get_variant(block, k); + + if (variant) + return variant->key.blend.rt[0].blend_enable; + + return FALSE; +} + + + +static void +debug_bin( const struct cmd_bin *bin ) +{ + const struct cmd_block *head = bin->head; + int i, j = 0; + + debug_printf("bin %d,%d:\n", bin->x, bin->y); + + while (head) { + for (i = 0; i < head->count; i++, j++) { + debug_printf("%d: %s %s\n", j, + cmd_name(head->cmd[i]), + is_blend(head, i) ? "blended" : ""); + } + head = head->next; + } +} + + +static void plot(struct tile *tile, + int x, int y, + char val, + boolean blend) +{ + if (tile->data[x][y] == ' ') + tile->coverage++; + else + tile->overdraw++; + + tile->data[x][y] = val; +} + + + + + + +static int +debug_shade_tile(int x, int y, + const union lp_rast_cmd_arg arg, + struct tile *tile, + char val) +{ + const struct lp_rast_shader_inputs *inputs = arg.shade_tile; + boolean blend = inputs->state->variant->key.blend.rt[0].blend_enable; + unsigned i,j; + + if (inputs->disable) + return 0; + + for (i = 0; i < TILE_SIZE; i++) + for (j = 0; j < TILE_SIZE; j++) + plot(tile, i, j, val, blend); + + return TILE_SIZE * TILE_SIZE; +} + +static int +debug_clear_tile(int x, int y, + const union lp_rast_cmd_arg arg, + struct tile *tile, + char val) +{ + unsigned i,j; + + for (i = 0; i < TILE_SIZE; i++) + for (j = 0; j < TILE_SIZE; j++) + plot(tile, i, j, val, FALSE); + + return TILE_SIZE * TILE_SIZE; + +} + + +static int +debug_triangle(int tilex, int tiley, + const union lp_rast_cmd_arg arg, + struct tile *tile, + char val) +{ + const struct lp_rast_triangle *tri = arg.triangle.tri; + unsigned plane_mask = arg.triangle.plane_mask; + struct lp_rast_plane plane[8]; + int x, y; + int count = 0; + unsigned i, nr_planes = 0; + boolean blend = tri->inputs.state->variant->key.blend.rt[0].blend_enable; + + if (tri->inputs.disable) { + /* This triangle was partially binned and has been disabled */ + return 0; + } + + while (plane_mask) { + plane[nr_planes] = tri->plane[u_bit_scan(&plane_mask)]; + plane[nr_planes].c = (plane[nr_planes].c + + plane[nr_planes].dcdy * tiley - + plane[nr_planes].dcdx * tilex); + nr_planes++; + } + + for(y = 0; y < TILE_SIZE; y++) + { + for(x = 0; x < TILE_SIZE; x++) + { + for (i = 0; i < nr_planes; i++) + if (plane[i].c <= 0) + goto out; + + plot(tile, x, y, val, blend); + count++; + + out: + for (i = 0; i < nr_planes; i++) + plane[i].c -= plane[i].dcdx; + } + + for (i = 0; i < nr_planes; i++) { + plane[i].c += plane[i].dcdx * TILE_SIZE; + plane[i].c += plane[i].dcdy; + } + } + return count; +} + + + + + +static void +do_debug_bin( struct tile *tile, + const struct cmd_bin *bin, + boolean print_cmds) +{ + unsigned k, j = 0; + const struct cmd_block *block; + + int tx = bin->x * TILE_SIZE; + int ty = bin->y * TILE_SIZE; + + memset(tile->data, ' ', sizeof tile->data); + tile->coverage = 0; + tile->overdraw = 0; + + for (block = bin->head; block; block = block->next) { + for (k = 0; k < block->count; k++, j++) { + boolean blend = is_blend(block, k); + char val = get_label(j); + int count = 0; + + if (print_cmds) + debug_printf("%c: %15s", val, cmd_name(block->cmd[k])); + + if (block->cmd[k] == LP_RAST_OP_CLEAR_COLOR || + block->cmd[k] == LP_RAST_OP_CLEAR_ZSTENCIL) + count = debug_clear_tile(tx, ty, block->arg[k], tile, val); + + if (block->cmd[k] == LP_RAST_OP_SHADE_TILE || + block->cmd[k] == LP_RAST_OP_SHADE_TILE_OPAQUE) + count = debug_shade_tile(tx, ty, block->arg[k], tile, val); + + if (block->cmd[k] == LP_RAST_OP_TRIANGLE_1 || + block->cmd[k] == LP_RAST_OP_TRIANGLE_2 || + block->cmd[k] == LP_RAST_OP_TRIANGLE_3 || + block->cmd[k] == LP_RAST_OP_TRIANGLE_4 || + block->cmd[k] == LP_RAST_OP_TRIANGLE_5 || + block->cmd[k] == LP_RAST_OP_TRIANGLE_6 || + block->cmd[k] == LP_RAST_OP_TRIANGLE_7) + count = debug_triangle(tx, ty, block->arg[k], tile, val); + + if (print_cmds) { + debug_printf(" % 5d", count); + + if (blend) + debug_printf(" blended"); + + debug_printf("\n"); + } + } + } +} + +void +lp_debug_bin( const struct cmd_bin *bin) +{ + struct tile tile; + int x,y; + + if (bin->head) { + do_debug_bin(&tile, bin, TRUE); + + debug_printf("------------------------------------------------------------------\n"); + for (y = 0; y < TILE_SIZE; y++) { + for (x = 0; x < TILE_SIZE; x++) { + debug_printf("%c", tile.data[y][x]); + } + debug_printf("|\n"); + } + debug_printf("------------------------------------------------------------------\n"); + + debug_printf("each pixel drawn avg %f times\n", + ((float)tile.overdraw + tile.coverage)/(float)tile.coverage); + } +} + + + + + + +/** Return number of bytes used for a single bin */ +static unsigned +lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y ) +{ + struct cmd_bin *bin = lp_scene_get_bin((struct lp_scene *) scene, x, y); + const struct cmd_block *cmd; + unsigned size = 0; + for (cmd = bin->head; cmd; cmd = cmd->next) { + size += (cmd->count * + (sizeof(uint8_t) + sizeof(union lp_rast_cmd_arg))); + } + return size; +} + + + +void +lp_debug_draw_bins_by_coverage( struct lp_scene *scene ) +{ + unsigned x, y; + unsigned total = 0; + unsigned possible = 0; + static unsigned long long _total; + static unsigned long long _possible; + + for (x = 0; x < scene->tiles_x; x++) + debug_printf("-"); + debug_printf("\n"); + + for (y = 0; y < scene->tiles_y; y++) { + for (x = 0; x < scene->tiles_x; x++) { + struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); + const char *bits = "0123456789"; + struct tile tile; + + if (bin->head) { + //lp_debug_bin(bin); + + do_debug_bin(&tile, bin, FALSE); + + total += tile.coverage; + possible += 64*64; + + if (tile.coverage == 64*64) + debug_printf("*"); + else if (tile.coverage) { + int bit = tile.coverage/(64.0*64.0)*10; + debug_printf("%c", bits[MIN2(bit,10)]); + } + else + debug_printf("?"); + } + else { + debug_printf(" "); + } + } + debug_printf("|\n"); + } + + for (x = 0; x < scene->tiles_x; x++) + debug_printf("-"); + debug_printf("\n"); + + debug_printf("this tile total: %u possible %u: percentage: %f\n", + total, + possible, + total * 100.0 / (float)possible); + + _total += total; + _possible += possible; + + debug_printf("overall total: %llu possible %llu: percentage: %f\n", + _total, + _possible, + _total * 100.0 / (double)_possible); +} + + +void +lp_debug_draw_bins_by_cmd_length( struct lp_scene *scene ) +{ + unsigned x, y; + + for (y = 0; y < scene->tiles_y; y++) { + for (x = 0; x < scene->tiles_x; x++) { + const char *bits = " ...,-~:;=o+xaw*#XAWWWWWWWWWWWWWWWW"; + int sz = lp_scene_bin_size(scene, x, y); + int sz2 = util_unsigned_logbase2(sz); + debug_printf("%c", bits[MIN2(sz2,32)]); + } + debug_printf("\n"); + } +} + + +void +lp_debug_bins( struct lp_scene *scene ) +{ + unsigned x, y; + + for (y = 0; y < scene->tiles_y; y++) { + for (x = 0; x < scene->tiles_x; x++) { + struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); + if (bin->head) { + debug_bin(bin); + } + } + } +} diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index fae7f6d3dc..7370119e96 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -69,13 +69,16 @@ extern const struct lp_rast_state *jit_state; struct lp_rasterizer; - +struct cmd_bin; /** * Per-thread rasterization state */ struct lp_rasterizer_task { + const struct cmd_bin *bin; + + struct lp_scene *scene; unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */ uint8_t *color_tiles[PIPE_MAX_COLOR_BUFS]; @@ -89,6 +92,7 @@ struct lp_rasterizer_task /* occlude counter for visiable pixels */ uint32_t vis_counter; + struct llvmpipe_query *query; pipe_semaphore work_ready; pipe_semaphore work_done; @@ -104,33 +108,9 @@ struct lp_rasterizer { boolean exit_flag; - /* Framebuffer stuff - */ - struct { - uint8_t *map; - unsigned stride; - unsigned blocksize; - } zsbuf; - - struct { - unsigned nr_cbufs; - unsigned clear_color; - unsigned clear_depth; - char clear_stencil; - } state; - /** The incoming queue of scenes ready to rasterize */ struct lp_scene_queue *full_scenes; - /** - * The outgoing queue of processed scenes to return to setup module - * - * XXX: while scenes are per-context but the rasterizer is - * (potentially) shared, these empty scenes should be returned to - * the context which created them rather than retained here. - */ - /* struct lp_scene_queue *empty_scenes; */ - /** The scene currently being rasterized by the threads */ struct lp_scene *curr_scene; @@ -164,13 +144,13 @@ static INLINE void * lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task, unsigned x, unsigned y) { - const struct lp_rasterizer *rast = task->rast; + const struct lp_scene *scene = task->scene; void *depth; assert((x % TILE_VECTOR_WIDTH) == 0); assert((y % TILE_VECTOR_HEIGHT) == 0); - if (!rast->zsbuf.map) { + if (!scene->zsbuf.map) { /* Either out of memory or no zsbuf. Can't tell without access * to the state. Just use dummy tile memory, but don't print * the oom warning as this most likely because there is no @@ -179,9 +159,9 @@ lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task, return lp_dummy_tile; } - depth = (rast->zsbuf.map + - rast->zsbuf.stride * y + - rast->zsbuf.blocksize * x * TILE_VECTOR_HEIGHT); + depth = (scene->zsbuf.map + + scene->zsbuf.stride * y + + scene->zsbuf.blocksize * x * TILE_VECTOR_HEIGHT); assert(lp_check_alignment(depth, 16)); return depth; @@ -195,14 +175,14 @@ static INLINE uint8_t * lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task, unsigned buf, enum lp_texture_usage usage) { - struct lp_rasterizer *rast = task->rast; + const struct lp_scene *scene = task->scene; assert(task->x % TILE_SIZE == 0); assert(task->y % TILE_SIZE == 0); - assert(buf < rast->state.nr_cbufs); + assert(buf < scene->fb.nr_cbufs); if (!task->color_tiles[buf]) { - struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf]; + struct pipe_surface *cbuf = scene->fb.cbufs[buf]; struct llvmpipe_resource *lpt; assert(cbuf); lpt = llvmpipe_resource(cbuf->texture); @@ -263,7 +243,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y ) { - const struct lp_rasterizer *rast = task->rast; + const struct lp_scene *scene = task->scene; const struct lp_rast_state *state = inputs->state; struct lp_fragment_shader_variant *variant = state->variant; uint8_t *color[PIPE_MAX_COLOR_BUFS]; @@ -271,7 +251,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, unsigned i; /* color buffer */ - for (i = 0; i < rast->state.nr_cbufs; i++) + for (i = 0; i < scene->fb.nr_cbufs; i++) color[i] = lp_rast_get_color_block_pointer(task, i, x, y); depth = lp_rast_get_depth_block_pointer(task, x, y); @@ -291,5 +271,29 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, END_JIT_CALL(); } +void lp_rast_triangle_1( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_2( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_3( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_4( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_5( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_6( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_7( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_8( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + +void lp_rast_triangle_3_4(struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + +void lp_rast_triangle_3_16( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void +lp_debug_bin( const struct cmd_bin *bin ); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index dbaa8e023a..a1f309d4b0 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -68,36 +68,6 @@ block_full_16(struct lp_rasterizer_task *task, } #if !defined(PIPE_ARCH_SSE) -static INLINE unsigned -build_mask(int c, int dcdx, int dcdy) -{ - int mask = 0; - - int c0 = c; - int c1 = c0 + dcdx; - int c2 = c1 + dcdx; - int c3 = c2 + dcdx; - - mask |= ((c0 + 0 * dcdy) >> 31) & (1 << 0); - mask |= ((c0 + 1 * dcdy) >> 31) & (1 << 2); - mask |= ((c0 + 2 * dcdy) >> 31) & (1 << 8); - mask |= ((c0 + 3 * dcdy) >> 31) & (1 << 10); - mask |= ((c1 + 0 * dcdy) >> 31) & (1 << 1); - mask |= ((c1 + 1 * dcdy) >> 31) & (1 << 3); - mask |= ((c1 + 2 * dcdy) >> 31) & (1 << 9); - mask |= ((c1 + 3 * dcdy) >> 31) & (1 << 11); - mask |= ((c2 + 0 * dcdy) >> 31) & (1 << 4); - mask |= ((c2 + 1 * dcdy) >> 31) & (1 << 6); - mask |= ((c2 + 2 * dcdy) >> 31) & (1 << 12); - mask |= ((c2 + 3 * dcdy) >> 31) & (1 << 14); - mask |= ((c3 + 0 * dcdy) >> 31) & (1 << 5); - mask |= ((c3 + 1 * dcdy) >> 31) & (1 << 7); - mask |= ((c3 + 2 * dcdy) >> 31) & (1 << 13); - mask |= ((c3 + 3 * dcdy) >> 31) & (1 << 15); - - return mask; -} - static INLINE unsigned build_mask_linear(int c, int dcdx, int dcdy) @@ -142,6 +112,23 @@ build_masks(int c, *partmask |= build_mask_linear(c + cdiff, dcdx, dcdy); } +void +lp_rast_triangle_3_16(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + union lp_rast_cmd_arg arg2; + arg2.triangle.tri = arg.triangle.tri; + arg2.triangle.plane_mask = (1<<3)-1; + lp_rast_triangle_3(task, arg2); +} + +void +lp_rast_triangle_3_4(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + lp_rast_triangle_3_16(task, arg); +} + #else #include <emmintrin.h> #include "util/u_sse.h" @@ -220,79 +207,28 @@ build_mask_linear(int c, int dcdx, int dcdy) } static INLINE unsigned -build_mask(int c, int dcdx, int dcdy) +sign_bits4(const __m128i *cstep, int cdiff) { - __m128i step = _mm_setr_epi32(0, dcdx, dcdy, dcdx + dcdy); - __m128i c0 = _mm_set1_epi32(c); - - /* Get values across the quad - */ - __m128i cstep0 = _mm_add_epi32(c0, step); - - /* Scale up step for moving between quads. - */ - __m128i step4 = _mm_add_epi32(step, step); - /* Get values for the remaining quads: + /* Adjust the step values */ - __m128i cstep1 = _mm_add_epi32(cstep0, - _mm_shuffle_epi32(step4, _MM_SHUFFLE(1,1,1,1))); - __m128i cstep2 = _mm_add_epi32(cstep0, - _mm_shuffle_epi32(step4, _MM_SHUFFLE(2,2,2,2))); - __m128i cstep3 = _mm_add_epi32(cstep2, - _mm_shuffle_epi32(step4, _MM_SHUFFLE(1,1,1,1))); + __m128i cio4 = _mm_set1_epi32(cdiff); + __m128i cstep0 = _mm_add_epi32(cstep[0], cio4); + __m128i cstep1 = _mm_add_epi32(cstep[1], cio4); + __m128i cstep2 = _mm_add_epi32(cstep[2], cio4); + __m128i cstep3 = _mm_add_epi32(cstep[3], cio4); - /* pack pairs of results into epi16 + /* Pack down to epi8 */ __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1); __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3); - - /* pack into epi8, preserving sign bits - */ __m128i result = _mm_packs_epi16(cstep01, cstep23); - /* extract sign bits to create mask + /* Extract the sign bits */ return _mm_movemask_epi8(result); } -#endif - - - - -#define TAG(x) x##_1 -#define NR_PLANES 1 -#include "lp_rast_tri_tmp.h" - -#define TAG(x) x##_2 -#define NR_PLANES 2 -#include "lp_rast_tri_tmp.h" - -#define TAG(x) x##_3 -#define NR_PLANES 3 -#include "lp_rast_tri_tmp.h" - -#define TAG(x) x##_4 -#define NR_PLANES 4 -#include "lp_rast_tri_tmp.h" - -#define TAG(x) x##_5 -#define NR_PLANES 5 -#include "lp_rast_tri_tmp.h" - -#define TAG(x) x##_6 -#define NR_PLANES 6 -#include "lp_rast_tri_tmp.h" - -#define TAG(x) x##_7 -#define NR_PLANES 7 -#include "lp_rast_tri_tmp.h" - -#define TAG(x) x##_8 -#define NR_PLANES 8 -#include "lp_rast_tri_tmp.h" - /* Special case for 3 plane triangle which is contained entirely * within a 16x16 block. @@ -304,29 +240,32 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task, const struct lp_rast_triangle *tri = arg.triangle.tri; const struct lp_rast_plane *plane = tri->plane; unsigned mask = arg.triangle.plane_mask; - const int x = task->x + (mask & 0xf) * 16; - const int y = task->y + (mask >> 4) * 16; + const int x = task->x + (mask & 0xff); + const int y = task->y + (mask >> 8); unsigned outmask, inmask, partmask, partial_mask; unsigned j; - int c[3]; + __m128i cstep4[3][4]; outmask = 0; /* outside one or more trivial reject planes */ partmask = 0; /* outside one or more trivial accept planes */ for (j = 0; j < 3; j++) { - c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; + const int dcdx = -plane[j].dcdx * 4; + const int dcdy = plane[j].dcdy * 4; + __m128i xdcdy = _mm_set1_epi32(dcdy); + + cstep4[j][0] = _mm_setr_epi32(0, dcdx, dcdx*2, dcdx*3); + cstep4[j][1] = _mm_add_epi32(cstep4[j][0], xdcdy); + cstep4[j][2] = _mm_add_epi32(cstep4[j][1], xdcdy); + cstep4[j][3] = _mm_add_epi32(cstep4[j][2], xdcdy); { - const int dcdx = -plane[j].dcdx * 4; - const int dcdy = plane[j].dcdy * 4; + const int c = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; const int cox = plane[j].eo * 4; const int cio = plane[j].ei * 4 - 1; - build_masks(c[j] + cox, - cio - cox, - dcdx, dcdy, - &outmask, /* sign bits from c[i][0..15] + cox */ - &partmask); /* sign bits from c[i][0..15] + cio */ + outmask |= sign_bits4(cstep4[j], c + cox); + partmask |= sign_bits4(cstep4[j], c + cio); } } @@ -352,16 +291,20 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task, int iy = (i >> 2) * 4; int px = x + ix; int py = y + iy; - int cx[3]; + unsigned mask = 0xffff; partial_mask &= ~(1 << i); - for (j = 0; j < 3; j++) - cx[j] = (c[j] - - plane[j].dcdx * ix - + plane[j].dcdy * iy); + for (j = 0; j < 3; j++) { + const int cx = (plane[j].c + - plane[j].dcdx * px + + plane[j].dcdy * py) * 4; + + mask &= ~sign_bits4(cstep4[j], cx); + } - do_block_4_3(task, tri, plane, px, py, cx); + if (mask) + lp_rast_shade_quads_mask(task, &tri->inputs, px, py, mask); } /* Iterate over fulls: @@ -378,3 +321,87 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task, block_full_4(task, tri, px, py); } } + + +void +lp_rast_triangle_3_4(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + const struct lp_rast_triangle *tri = arg.triangle.tri; + const struct lp_rast_plane *plane = tri->plane; + unsigned mask = arg.triangle.plane_mask; + const int x = task->x + (mask & 0xff); + const int y = task->y + (mask >> 8); + unsigned j; + + /* Iterate over partials: + */ + { + unsigned mask = 0xffff; + + for (j = 0; j < 3; j++) { + const int cx = (plane[j].c + - plane[j].dcdx * x + + plane[j].dcdy * y); + + const int dcdx = -plane[j].dcdx; + const int dcdy = plane[j].dcdy; + __m128i xdcdy = _mm_set1_epi32(dcdy); + + __m128i cstep0 = _mm_setr_epi32(cx, cx + dcdx, cx + dcdx*2, cx + dcdx*3); + __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy); + __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy); + __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy); + + __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1); + __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3); + __m128i result = _mm_packs_epi16(cstep01, cstep23); + + /* Extract the sign bits + */ + mask &= ~_mm_movemask_epi8(result); + } + + if (mask) + lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask); + } +} + + +#endif + + + + +#define TAG(x) x##_1 +#define NR_PLANES 1 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_2 +#define NR_PLANES 2 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_3 +#define NR_PLANES 3 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_4 +#define NR_PLANES 4 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_5 +#define NR_PLANES 5 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_6 +#define NR_PLANES 6 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_7 +#define NR_PLANES 7 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_8 +#define NR_PLANES 8 +#include "lp_rast_tri_tmp.h" + diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index 99a0bae45d..9830a43ba5 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -50,9 +50,9 @@ TAG(do_block_4)(struct lp_rasterizer_task *task, int j; for (j = 0; j < NR_PLANES; j++) { - mask &= ~build_mask(c[j] - 1, - -plane[j].dcdx, - plane[j].dcdy); + mask &= ~build_mask_linear(c[j] - 1, + -plane[j].dcdx, + plane[j].dcdy); } /* Now pass to the shader: @@ -162,6 +162,11 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, unsigned outmask, inmask, partmask, partial_mask; unsigned j = 0; + if (tri->inputs.disable) { + /* This triangle was partially binned and has been disabled */ + return; + } + outmask = 0; /* outside one or more trivial reject planes */ partmask = 0; /* outside one or more trivial accept planes */ diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 15a09b7100..c0732e4ab7 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -30,17 +30,20 @@ #include "util/u_memory.h" #include "util/u_inlines.h" #include "util/u_simple_list.h" +#include "util/u_format.h" #include "lp_scene.h" -#include "lp_scene_queue.h" #include "lp_fence.h" +#include "lp_debug.h" -/** List of texture references */ -struct texture_ref { - struct pipe_resource *texture; - struct texture_ref *prev, *next; /**< linked list w/ u_simple_list.h */ -}; +#define RESOURCE_REF_SZ 32 +/** List of resource references */ +struct resource_ref { + struct pipe_resource *resource[RESOURCE_REF_SZ]; + int count; + struct resource_ref *next; +}; /** @@ -48,28 +51,16 @@ struct texture_ref { * \param queue the queue to put newly rendered/emptied scenes into */ struct lp_scene * -lp_scene_create( struct pipe_context *pipe, - struct lp_scene_queue *queue ) +lp_scene_create( struct pipe_context *pipe ) { - unsigned i, j; struct lp_scene *scene = CALLOC_STRUCT(lp_scene); if (!scene) return NULL; scene->pipe = pipe; - scene->empty_queue = queue; - - for (i = 0; i < TILES_X; i++) { - for (j = 0; j < TILES_Y; j++) { - struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); - bin->commands.head = bin->commands.tail = CALLOC_STRUCT(cmd_block); - } - } scene->data.head = - scene->data.tail = CALLOC_STRUCT(data_block); - - make_empty_list(&scene->resources); + CALLOC_STRUCT(data_block); pipe_mutex_init(scene->mutex); @@ -83,24 +74,9 @@ lp_scene_create( struct pipe_context *pipe, void lp_scene_destroy(struct lp_scene *scene) { - unsigned i, j; - - lp_scene_reset(scene); - - for (i = 0; i < TILES_X; i++) - for (j = 0; j < TILES_Y; j++) { - struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); - assert(bin->commands.head == bin->commands.tail); - FREE(bin->commands.head); - bin->commands.head = NULL; - bin->commands.tail = NULL; - } - - FREE(scene->data.head); - scene->data.head = NULL; - pipe_mutex_destroy(scene->mutex); - + assert(scene->data.head->next == NULL); + FREE(scene->data.head); FREE(scene); } @@ -117,8 +93,7 @@ lp_scene_is_empty(struct lp_scene *scene ) for (y = 0; y < TILES_Y; y++) { for (x = 0; x < TILES_X; x++) { const struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); - const struct cmd_block_list *list = &bin->commands; - if (list->head != list->tail || list->head->count > 0) { + if (bin->head) { return FALSE; } } @@ -127,45 +102,108 @@ lp_scene_is_empty(struct lp_scene *scene ) } -/* Free data for one particular bin. May be called from the - * rasterizer thread(s). +/* Returns true if there has ever been a failed allocation attempt in + * this scene. Used in triangle emit to avoid having to check success + * at each bin. + */ +boolean +lp_scene_is_oom(struct lp_scene *scene) +{ + return scene->alloc_failed; +} + + +/* Remove all commands from a bin. Tries to reuse some of the memory + * allocated to the bin, however. */ void lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y) { struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); - struct cmd_block_list *list = &bin->commands; - struct cmd_block *block; - struct cmd_block *tmp; - assert(x < TILES_X); - assert(y < TILES_Y); + bin->head = bin->tail; + if (bin->tail) { + bin->tail->next = NULL; + bin->tail->count = 0; + } +} + - for (block = list->head; block != list->tail; block = tmp) { - tmp = block->next; - FREE(block); +void +lp_scene_begin_rasterization(struct lp_scene *scene) +{ + const struct pipe_framebuffer_state *fb = &scene->fb; + int i; + + //LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + + for (i = 0; i < scene->fb.nr_cbufs; i++) { + struct pipe_surface *cbuf = scene->fb.cbufs[i]; + scene->cbufs[i].stride = llvmpipe_resource_stride(cbuf->texture, + cbuf->level); + + scene->cbufs[i].map = llvmpipe_resource_map(cbuf->texture, + cbuf->face, + cbuf->level, + cbuf->zslice, + LP_TEX_USAGE_READ_WRITE, + LP_TEX_LAYOUT_LINEAR); } - assert(list->tail->next == NULL); - list->head = list->tail; - list->head->count = 0; + if (fb->zsbuf) { + struct pipe_surface *zsbuf = scene->fb.zsbuf; + scene->zsbuf.stride = llvmpipe_resource_stride(zsbuf->texture, zsbuf->level); + scene->zsbuf.blocksize = + util_format_get_blocksize(zsbuf->texture->format); + + scene->zsbuf.map = llvmpipe_resource_map(zsbuf->texture, + zsbuf->face, + zsbuf->level, + zsbuf->zslice, + LP_TEX_USAGE_READ_WRITE, + LP_TEX_LAYOUT_NONE); + } } + + /** - * Free all the temporary data in a scene. May be called from the - * rasterizer thread(s). + * Free all the temporary data in a scene. */ void -lp_scene_reset(struct lp_scene *scene ) +lp_scene_end_rasterization(struct lp_scene *scene ) { - unsigned i, j; + int i, j; + + /* Unmap color buffers */ + for (i = 0; i < scene->fb.nr_cbufs; i++) { + if (scene->cbufs[i].map) { + struct pipe_surface *cbuf = scene->fb.cbufs[i]; + llvmpipe_resource_unmap(cbuf->texture, + cbuf->face, + cbuf->level, + cbuf->zslice); + scene->cbufs[i].map = NULL; + } + } - /* Free all but last binner command lists: + /* Unmap z/stencil buffer */ + if (scene->zsbuf.map) { + struct pipe_surface *zsbuf = scene->fb.zsbuf; + llvmpipe_resource_unmap(zsbuf->texture, + zsbuf->face, + zsbuf->level, + zsbuf->zslice); + scene->zsbuf.map = NULL; + } + + /* Reset all command lists: */ for (i = 0; i < scene->tiles_x; i++) { for (j = 0; j < scene->tiles_y; j++) { - lp_scene_bin_reset(scene, i, j); + struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); + bin->head = bin->tail = NULL; } } @@ -174,40 +212,56 @@ lp_scene_reset(struct lp_scene *scene ) */ assert(lp_scene_is_empty(scene)); - /* Free all but last binned data block: + /* Decrement texture ref counts */ { - struct data_block_list *list = &scene->data; - struct data_block *block, *tmp; - - for (block = list->head; block != list->tail; block = tmp) { - tmp = block->next; - FREE(block); + struct resource_ref *ref; + int i, j = 0; + + for (ref = scene->resources; ref; ref = ref->next) { + for (i = 0; i < ref->count; i++) { + if (LP_DEBUG & DEBUG_SETUP) + debug_printf("resource %d: %p %dx%d sz %d\n", + j, + ref->resource[i], + ref->resource[i]->width0, + ref->resource[i]->height0, + llvmpipe_resource_size(ref->resource[i])); + j++; + pipe_resource_reference(&ref->resource[i], NULL); + } } - - assert(list->tail->next == NULL); - list->head = list->tail; - list->head->used = 0; + + if (LP_DEBUG & DEBUG_SETUP) + debug_printf("scene %d resources, sz %d\n", + j, scene->resource_reference_size); } - /* Release texture refs + /* Free all scene data blocks: */ { - struct resource_ref *ref, *next, *ref_list = &scene->resources; - for (ref = ref_list->next; ref != ref_list; ref = next) { - next = next_elem(ref); - pipe_resource_reference(&ref->resource, NULL); - FREE(ref); + struct data_block_list *list = &scene->data; + struct data_block *block, *tmp; + + for (block = list->head->next; block; block = tmp) { + tmp = block->next; + FREE(block); } - make_empty_list(ref_list); + + list->head->next = NULL; + list->head->used = 0; } lp_fence_reference(&scene->fence, NULL); + scene->resources = NULL; scene->scene_size = 0; + scene->resource_reference_size = 0; - scene->has_color_clear = FALSE; scene->has_depthstencil_clear = FALSE; + scene->alloc_failed = FALSE; + + util_unreference_framebuffer_state( &scene->fb ); } @@ -216,12 +270,20 @@ lp_scene_reset(struct lp_scene *scene ) struct cmd_block * -lp_bin_new_cmd_block( struct cmd_block_list *list ) +lp_scene_new_cmd_block( struct lp_scene *scene, + struct cmd_bin *bin ) { - struct cmd_block *block = MALLOC_STRUCT(cmd_block); + struct cmd_block *block = lp_scene_alloc(scene, sizeof(struct cmd_block)); if (block) { - list->tail->next = block; - list->tail = block; + if (bin->tail) { + bin->tail->next = block; + bin->tail = block; + } + else { + bin->head = block; + bin->tail = block; + } + //memset(block, 0, sizeof *block); block->next = NULL; block->count = 0; } @@ -230,16 +292,26 @@ lp_bin_new_cmd_block( struct cmd_block_list *list ) struct data_block * -lp_bin_new_data_block( struct data_block_list *list ) +lp_scene_new_data_block( struct lp_scene *scene ) { - struct data_block *block = MALLOC_STRUCT(data_block); - if (block) { - list->tail->next = block; - list->tail = block; - block->next = NULL; + if (scene->scene_size + DATA_BLOCK_SIZE > LP_SCENE_MAX_SIZE) { + if (0) debug_printf("%s: failed\n", __FUNCTION__); + scene->alloc_failed = TRUE; + return NULL; + } + else { + struct data_block *block = MALLOC_STRUCT(data_block); + if (block == NULL) + return NULL; + + scene->scene_size += sizeof *block; + block->used = 0; + block->next = scene->data.head; + scene->data.head = block; + + return block; } - return block; } @@ -247,7 +319,7 @@ lp_bin_new_data_block( struct data_block_list *list ) * Return number of bytes used for all bin data within a scene. * This does not include resources (textures) referenced by the scene. */ -unsigned +static unsigned lp_scene_data_size( const struct lp_scene *scene ) { unsigned size = 0; @@ -259,36 +331,63 @@ lp_scene_data_size( const struct lp_scene *scene ) } -/** Return number of bytes used for a single bin */ -unsigned -lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y ) -{ - struct cmd_bin *bin = lp_scene_get_bin((struct lp_scene *) scene, x, y); - const struct cmd_block *cmd; - unsigned size = 0; - for (cmd = bin->commands.head; cmd; cmd = cmd->next) { - size += (cmd->count * - (sizeof(lp_rast_cmd) + sizeof(union lp_rast_cmd_arg))); - } - return size; -} - /** * Add a reference to a resource by the scene. */ -void +boolean lp_scene_add_resource_reference(struct lp_scene *scene, - struct pipe_resource *resource) + struct pipe_resource *resource, + boolean initializing_scene) { - struct resource_ref *ref = CALLOC_STRUCT(resource_ref); - if (ref) { - struct resource_ref *ref_list = &scene->resources; - pipe_resource_reference(&ref->resource, resource); - insert_at_tail(ref_list, ref); + struct resource_ref *ref, **last = &scene->resources; + int i; + + /* Look at existing resource blocks: + */ + for (ref = scene->resources; ref; ref = ref->next) { + last = &ref->next; + + /* Search for this resource: + */ + for (i = 0; i < ref->count; i++) + if (ref->resource[i] == resource) + return TRUE; + + if (ref->count < RESOURCE_REF_SZ) { + /* If the block is half-empty, then append the reference here. + */ + break; + } + } + + /* Create a new block if no half-empty block was found. + */ + if (!ref) { + assert(*last == NULL); + *last = lp_scene_alloc(scene, sizeof *ref); + if (*last == NULL) + return FALSE; + + ref = *last; + memset(ref, 0, sizeof *ref); } - scene->scene_size += llvmpipe_resource_size(resource); + /* Append the reference to the reference block. + */ + pipe_resource_reference(&ref->resource[ref->count++], resource); + scene->resource_reference_size += llvmpipe_resource_size(resource); + + /* Heuristic to advise scene flushes. This isn't helpful in the + * initial setup of the scene, but after that point flush on the + * next resource added which exceeds 64MB in referenced texture + * data. + */ + if (!initializing_scene && + scene->resource_reference_size >= LP_SCENE_MAX_RESOURCE_SIZE) + return FALSE; + + return TRUE; } @@ -299,12 +398,15 @@ boolean lp_scene_is_resource_referenced(const struct lp_scene *scene, const struct pipe_resource *resource) { - const struct resource_ref *ref_list = &scene->resources; const struct resource_ref *ref; - foreach (ref, ref_list) { - if (ref->resource == resource) - return TRUE; + int i; + + for (ref = scene->resources; ref; ref = ref->next) { + for (i = 0; i < ref->count; i++) + if (ref->resource[i] == resource) + return TRUE; } + return FALSE; } @@ -342,7 +444,7 @@ lp_scene_bin_iter_begin( struct lp_scene *scene ) * of work (a bin) to work on. */ struct cmd_bin * -lp_scene_bin_iter_next( struct lp_scene *scene, int *bin_x, int *bin_y ) +lp_scene_bin_iter_next( struct lp_scene *scene ) { struct cmd_bin *bin = NULL; @@ -359,8 +461,6 @@ lp_scene_bin_iter_next( struct lp_scene *scene, int *bin_x, int *bin_y ) } bin = lp_scene_get_bin(scene, scene->curr_x, scene->curr_y); - *bin_x = scene->curr_x; - *bin_y = scene->curr_y; end: /*printf("return bin %p at %d, %d\n", (void *) bin, *bin_x, *bin_y);*/ @@ -384,34 +484,16 @@ void lp_scene_begin_binning( struct lp_scene *scene, } -void lp_scene_rasterize( struct lp_scene *scene, - struct lp_rasterizer *rast ) +void lp_scene_end_binning( struct lp_scene *scene ) { - if (0) { - unsigned x, y; + if (LP_DEBUG & DEBUG_SCENE) { debug_printf("rasterize scene:\n"); - debug_printf(" data size: %u\n", lp_scene_data_size(scene)); - for (y = 0; y < scene->tiles_y; y++) { - for (x = 0; x < scene->tiles_x; x++) { - debug_printf(" bin %u, %u size: %u\n", x, y, - lp_scene_bin_size(scene, x, y)); - } - } - } - - /* Enqueue the scene for rasterization, then immediately wait for - * it to finish. - */ - lp_rast_queue_scene( rast, scene ); + debug_printf(" scene_size: %u\n", + scene->scene_size); + debug_printf(" data size: %u\n", + lp_scene_data_size(scene)); - /* Currently just wait for the rasterizer to finish. Some - * threading interactions need to be worked out, particularly once - * transfers become per-context: - */ - lp_rast_finish( rast ); - - util_unreference_framebuffer_state( &scene->fb ); - - /* put scene into the empty list */ - lp_scene_enqueue( scene->empty_queue, scene ); + if (0) + lp_debug_bins( scene ); + } } diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index fa1b311fa1..dbef7692e4 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -38,6 +38,7 @@ #include "os/os_thread.h" #include "lp_tile_soa.h" #include "lp_rast.h" +#include "lp_debug.h" struct lp_scene_queue; @@ -49,58 +50,71 @@ struct lp_scene_queue; #define CMD_BLOCK_MAX 128 -#define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *)) - +#define DATA_BLOCK_SIZE (64 * 1024) + +/* Scene temporary storage is clamped to this size: + */ +#define LP_SCENE_MAX_SIZE (4*1024*1024) + +/* The maximum amount of texture storage referenced by a scene is + * clamped ot this size: + */ +#define LP_SCENE_MAX_RESOURCE_SIZE (64*1024*1024) /* switch to a non-pointer value for this: */ -typedef void (*lp_rast_cmd)( struct lp_rasterizer_task *, - const union lp_rast_cmd_arg ); +typedef void (*lp_rast_cmd_func)( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + struct cmd_block { - lp_rast_cmd cmd[CMD_BLOCK_MAX]; + uint8_t cmd[CMD_BLOCK_MAX]; union lp_rast_cmd_arg arg[CMD_BLOCK_MAX]; unsigned count; struct cmd_block *next; }; +struct cmd_block_list { + struct cmd_block *head; + struct cmd_block *tail; +}; + struct data_block { ubyte data[DATA_BLOCK_SIZE]; unsigned used; struct data_block *next; }; -struct cmd_block_list { - struct cmd_block *head; - struct cmd_block *tail; -}; + /** * For each screen tile we have one of these bins. */ struct cmd_bin { - struct cmd_block_list commands; + ushort x; + ushort y; + struct cmd_block *head; + struct cmd_block *tail; }; /** - * This stores bulk data which is shared by all bins within a scene. + * This stores bulk data which is used for all memory allocations + * within a scene. + * * Examples include triangle data and state data. The commands in * the per-tile bins will point to chunks of data in this structure. + * + * Include the first block of data statically to ensure we can always + * initiate a scene without relying on malloc succeeding. */ struct data_block_list { + struct data_block first; struct data_block *head; - struct data_block *tail; -}; - - -/** List of resource references */ -struct resource_ref { - struct pipe_resource *resource; - struct resource_ref *prev, *next; /**< linked list w/ u_simple_list.h */ }; +struct resource_ref; /** * All bins and bin data are contained here. @@ -114,18 +128,33 @@ struct lp_scene { struct pipe_context *pipe; struct lp_fence *fence; + /* Framebuffer mappings - valid only between begin_rasterization() + * and end_rasterization(). + */ + struct { + uint8_t *map; + unsigned stride; + unsigned blocksize; + } zsbuf, cbufs[PIPE_MAX_COLOR_BUFS]; + /** the framebuffer to render the scene into */ struct pipe_framebuffer_state fb; /** list of resources referenced by the scene commands */ - struct resource_ref resources; + struct resource_ref *resources; - /** Approx memory used by the scene (in bytes). This includes the - * shared and per-tile bins plus any referenced resources/textures. + /** Total memory used by the scene (in bytes). This sums all the + * data blocks and counts all bins, state, resource references and + * other random allocations within the scene. */ unsigned scene_size; - boolean has_color_clear; + /** Sum of sizes of all resources referenced by the scene. Sums + * all the textures read by the scene: + */ + unsigned resource_reference_size; + + boolean alloc_failed; boolean has_depthstencil_clear; /** @@ -137,38 +166,28 @@ struct lp_scene { int curr_x, curr_y; /**< for iterating over bins */ pipe_mutex mutex; - /* Where to place this scene once it has been rasterized: - */ - struct lp_scene_queue *empty_queue; - struct cmd_bin tile[TILES_X][TILES_Y]; struct data_block_list data; }; -struct lp_scene *lp_scene_create(struct pipe_context *pipe, - struct lp_scene_queue *empty_queue); +struct lp_scene *lp_scene_create(struct pipe_context *pipe); void lp_scene_destroy(struct lp_scene *scene); - - boolean lp_scene_is_empty(struct lp_scene *scene ); - -void lp_scene_reset(struct lp_scene *scene ); +boolean lp_scene_is_oom(struct lp_scene *scene ); -struct data_block *lp_bin_new_data_block( struct data_block_list *list ); +struct data_block *lp_scene_new_data_block( struct lp_scene *scene ); -struct cmd_block *lp_bin_new_cmd_block( struct cmd_block_list *list ); +struct cmd_block *lp_scene_new_cmd_block( struct lp_scene *scene, + struct cmd_bin *bin ); -unsigned lp_scene_data_size( const struct lp_scene *scene ); - -unsigned lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y ); - -void lp_scene_add_resource_reference(struct lp_scene *scene, - struct pipe_resource *resource); +boolean lp_scene_add_resource_reference(struct lp_scene *scene, + struct pipe_resource *resource, + boolean initializing_scene); boolean lp_scene_is_resource_referenced(const struct lp_scene *scene, const struct pipe_resource *resource ); @@ -182,21 +201,27 @@ static INLINE void * lp_scene_alloc( struct lp_scene *scene, unsigned size) { struct data_block_list *list = &scene->data; - struct data_block *tail = list->tail; + struct data_block *block = list->head; - if (tail->used + size > DATA_BLOCK_SIZE) { - tail = lp_bin_new_data_block( list ); - if (!tail) { + assert(size <= DATA_BLOCK_SIZE); + assert(block != NULL); + + if (LP_DEBUG & DEBUG_MEM) + debug_printf("alloc %u block %u/%u tot %u/%u\n", + size, block->used, DATA_BLOCK_SIZE, + scene->scene_size, LP_SCENE_MAX_SIZE); + + if (block->used + size > DATA_BLOCK_SIZE) { + block = lp_scene_new_data_block( scene ); + if (!block) { /* out of memory */ return NULL; } } - scene->scene_size += size; - { - ubyte *data = tail->data + tail->used; - tail->used += size; + ubyte *data = block->data + block->used; + block->used += size; return data; } } @@ -210,20 +235,26 @@ lp_scene_alloc_aligned( struct lp_scene *scene, unsigned size, unsigned alignment ) { struct data_block_list *list = &scene->data; - struct data_block *tail = list->tail; - - if (tail->used + size + alignment - 1 > DATA_BLOCK_SIZE) { - tail = lp_bin_new_data_block( list ); - if (!tail) + struct data_block *block = list->head; + + assert(block != NULL); + + if (LP_DEBUG & DEBUG_MEM) + debug_printf("alloc %u block %u/%u tot %u/%u\n", + size + alignment - 1, + block->used, DATA_BLOCK_SIZE, + scene->scene_size, LP_SCENE_MAX_SIZE); + + if (block->used + size + alignment - 1 > DATA_BLOCK_SIZE) { + block = lp_scene_new_data_block( scene ); + if (!block) return NULL; } - scene->scene_size += size; - { - ubyte *data = tail->data + tail->used; + ubyte *data = block->data + block->used; unsigned offset = (((uintptr_t)data + alignment - 1) & ~(alignment - 1)) - (uintptr_t)data; - tail->used += offset + size; + block->used += offset + size; return data + offset; } } @@ -235,9 +266,8 @@ static INLINE void lp_scene_putback_data( struct lp_scene *scene, unsigned size) { struct data_block_list *list = &scene->data; - scene->scene_size -= size; - assert(list->tail->used >= size); - list->tail->used -= size; + assert(list->head && list->head->used >= size); + list->head->used -= size; } @@ -256,55 +286,55 @@ lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y); /* Add a command to bin[x][y]. */ -static INLINE void +static INLINE boolean lp_scene_bin_command( struct lp_scene *scene, - unsigned x, unsigned y, - lp_rast_cmd cmd, - union lp_rast_cmd_arg arg ) + unsigned x, unsigned y, + unsigned cmd, + union lp_rast_cmd_arg arg ) { struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); - struct cmd_block_list *list = &bin->commands; - struct cmd_block *tail = list->tail; + struct cmd_block *tail = bin->tail; assert(x < scene->tiles_x); assert(y < scene->tiles_y); + assert(cmd <= LP_RAST_OP_END_QUERY); - if (tail->count == CMD_BLOCK_MAX) { - tail = lp_bin_new_cmd_block( list ); + if (tail == NULL || tail->count == CMD_BLOCK_MAX) { + tail = lp_scene_new_cmd_block( scene, bin ); if (!tail) { - /* out of memory - simply ignore this command (for now) */ - return; + return FALSE; } assert(tail->count == 0); } { unsigned i = tail->count; - tail->cmd[i] = cmd; + tail->cmd[i] = cmd & LP_RAST_OP_MASK; tail->arg[i] = arg; tail->count++; } + + return TRUE; } /* Add a command to all active bins. */ -static INLINE void +static INLINE boolean lp_scene_bin_everywhere( struct lp_scene *scene, - lp_rast_cmd cmd, + unsigned cmd, const union lp_rast_cmd_arg arg ) { unsigned i, j; - for (i = 0; i < scene->tiles_x; i++) - for (j = 0; j < scene->tiles_y; j++) - lp_scene_bin_command( scene, i, j, cmd, arg ); -} - + for (i = 0; i < scene->tiles_x; i++) { + for (j = 0; j < scene->tiles_y; j++) { + if (!lp_scene_bin_command( scene, i, j, cmd, arg )) + return FALSE; + } + } -void -lp_scene_bin_state_command( struct lp_scene *scene, - lp_rast_cmd cmd, - const union lp_rast_cmd_arg arg ); + return TRUE; +} static INLINE unsigned @@ -318,23 +348,30 @@ void lp_scene_bin_iter_begin( struct lp_scene *scene ); struct cmd_bin * -lp_scene_bin_iter_next( struct lp_scene *scene, int *bin_x, int *bin_y ); +lp_scene_bin_iter_next( struct lp_scene *scene ); -void -lp_scene_rasterize( struct lp_scene *scene, - struct lp_rasterizer *rast ); +/* Begin/end binning of a scene + */ void lp_scene_begin_binning( struct lp_scene *scene, struct pipe_framebuffer_state *fb ); +void +lp_scene_end_binning( struct lp_scene *scene ); + + +/* Begin/end rasterization of a scene + */ +void +lp_scene_begin_rasterization(struct lp_scene *scene); + +void +lp_scene_end_rasterization(struct lp_scene *scene ); + + -static INLINE unsigned -lp_scene_get_size(const struct lp_scene *scene) -{ - return scene->scene_size; -} #endif /* LP_BIN_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 1e65a91fc6..0d40dc5020 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -33,8 +33,8 @@ #include "util/u_format_s3tc.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" +#include "draw/draw_context.h" -#include "gallivm/lp_bld_limits.h" #include "lp_texture.h" #include "lp_fence.h" #include "lp_jit.h" @@ -63,6 +63,7 @@ static const struct debug_named_value lp_debug_flags[] = { { "counters", DEBUG_COUNTERS, NULL }, { "scene", DEBUG_SCENE, NULL }, { "fence", DEBUG_FENCE, NULL }, + { "mem", DEBUG_MEM, NULL }, DEBUG_NAMED_VALUE_END }; #endif @@ -131,8 +132,6 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return LP_MAX_TEXTURE_3D_LEVELS; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return LP_MAX_TEXTURE_2D_LEVELS; - case PIPE_CAP_TGSI_CONT_SUPPORTED: - return 1; case PIPE_CAP_BLEND_EQUATION_SEPARATE: return 1; case PIPE_CAP_INDEP_BLEND_ENABLE: @@ -145,47 +144,29 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: return 0; - case PIPE_CAP_MAX_VS_INSTRUCTIONS: - case PIPE_CAP_MAX_FS_INSTRUCTIONS: - case PIPE_CAP_MAX_VS_ALU_INSTRUCTIONS: - case PIPE_CAP_MAX_FS_ALU_INSTRUCTIONS: - case PIPE_CAP_MAX_VS_TEX_INSTRUCTIONS: - case PIPE_CAP_MAX_FS_TEX_INSTRUCTIONS: - case PIPE_CAP_MAX_VS_TEX_INDIRECTIONS: - case PIPE_CAP_MAX_FS_TEX_INDIRECTIONS: - /* There is no limit in number of instructions beyond available memory */ - return 32768; - case PIPE_CAP_MAX_VS_CONTROL_FLOW_DEPTH: - case PIPE_CAP_MAX_FS_CONTROL_FLOW_DEPTH: - return LP_MAX_TGSI_NESTING; - case PIPE_CAP_MAX_VS_INPUTS: - case PIPE_CAP_MAX_FS_INPUTS: - return PIPE_MAX_ATTRIBS; - case PIPE_CAP_MAX_FS_CONSTS: - case PIPE_CAP_MAX_VS_CONSTS: - /* There is no limit in number of constants beyond available memory */ - return 32768; - case PIPE_CAP_MAX_VS_TEMPS: - case PIPE_CAP_MAX_FS_TEMPS: - return LP_MAX_TGSI_TEMPS; - case PIPE_CAP_MAX_VS_ADDRS: - case PIPE_CAP_MAX_FS_ADDRS: - return LP_MAX_TGSI_ADDRS; - case PIPE_CAP_MAX_VS_PREDS: - case PIPE_CAP_MAX_FS_PREDS: - return LP_MAX_TGSI_PREDS; case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: return 1; - case PIPE_CAP_GEOMETRY_SHADER4: - return 1; case PIPE_CAP_DEPTH_CLAMP: return 0; default: - assert(0); return 0; } } +static int +llvmpipe_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param) +{ + switch(shader) + { + case PIPE_SHADER_FRAGMENT: + return tgsi_exec_get_shader_param(param); + case PIPE_SHADER_VERTEX: + case PIPE_SHADER_GEOMETRY: + return draw_get_shader_param(shader, param); + default: + return 0; + } +} static float llvmpipe_get_paramf(struct pipe_screen *screen, enum pipe_cap param) @@ -401,6 +382,7 @@ llvmpipe_create_screen(struct sw_winsys *winsys) screen->base.get_name = llvmpipe_get_name; screen->base.get_vendor = llvmpipe_get_vendor; screen->base.get_param = llvmpipe_get_param; + screen->base.get_shader_param = llvmpipe_get_shader_param; screen->base.get_paramf = llvmpipe_get_paramf; screen->base.is_format_supported = llvmpipe_is_format_supported; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 3da9097154..e6a8196761 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -42,7 +42,6 @@ #include "lp_context.h" #include "lp_memory.h" #include "lp_scene.h" -#include "lp_scene_queue.h" #include "lp_texture.h" #include "lp_debug.h" #include "lp_fence.h" @@ -57,36 +56,31 @@ #include "draw/draw_vbuf.h" -static void set_scene_state( struct lp_setup_context *, enum setup_state ); +static void set_scene_state( struct lp_setup_context *, enum setup_state, + const char *reason); +static boolean try_update_scene_state( struct lp_setup_context *setup ); -struct lp_scene * -lp_setup_get_current_scene(struct lp_setup_context *setup) +static void +lp_setup_get_empty_scene(struct lp_setup_context *setup) { - if (!setup->scene) { - set_scene_state( setup, SETUP_EMPTY ); - } - return setup->scene; -} + assert(setup->scene == NULL); + setup->scene_idx++; + setup->scene_idx %= Elements(setup->scenes); -/** - * Check if the size of the current scene has exceeded the limit. - * If so, flush/render it. - */ -static void -setup_check_scene_size_and_flush(struct lp_setup_context *setup) -{ - if (setup->scene) { - struct lp_scene *scene = lp_setup_get_current_scene(setup); - unsigned size = lp_scene_get_size(scene); + setup->scene = setup->scenes[setup->scene_idx]; - if (size > LP_MAX_SCENE_SIZE) { - /*printf("LLVMPIPE: scene size = %u, flushing.\n", size);*/ - set_scene_state( setup, SETUP_FLUSHED ); - /*assert(lp_scene_get_size(scene) == 0);*/ - } + if (setup->scene->fence) { + if (LP_DEBUG & DEBUG_SETUP) + debug_printf("%s: wait for scene %d\n", + __FUNCTION__, setup->scene->fence->id); + + lp_fence_wait(setup->scene->fence); } + + lp_scene_begin_binning(setup->scene, &setup->fb); + } @@ -96,7 +90,7 @@ first_triangle( struct lp_setup_context *setup, const float (*v1)[4], const float (*v2)[4]) { - set_scene_state( setup, SETUP_ACTIVE ); + assert(setup->state == SETUP_ACTIVE); lp_setup_choose_triangle( setup ); setup->triangle( setup, v0, v1, v2 ); } @@ -106,7 +100,7 @@ first_line( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4]) { - set_scene_state( setup, SETUP_ACTIVE ); + assert(setup->state == SETUP_ACTIVE); lp_setup_choose_line( setup ); setup->line( setup, v0, v1 ); } @@ -115,12 +109,12 @@ static void first_point( struct lp_setup_context *setup, const float (*v0)[4]) { - set_scene_state( setup, SETUP_ACTIVE ); + assert(setup->state == SETUP_ACTIVE); lp_setup_choose_point( setup ); setup->point( setup, v0 ); } -static void reset_context( struct lp_setup_context *setup ) +static void lp_setup_reset( struct lp_setup_context *setup ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -135,8 +129,7 @@ static void reset_context( struct lp_setup_context *setup ) /* Reset some state: */ - setup->clear.flags = 0; - setup->clear.clearzs.clearzs_mask = 0; + memset(&setup->clear, 0, sizeof setup->clear); /* Have an explicit "start-binning" call and get rid of this * pointer twiddling? @@ -151,14 +144,23 @@ static void reset_context( struct lp_setup_context *setup ) static void lp_setup_rasterize_scene( struct lp_setup_context *setup ) { - struct lp_scene *scene = lp_setup_get_current_scene(setup); + struct lp_scene *scene = setup->scene; struct llvmpipe_screen *screen = llvmpipe_screen(scene->pipe->screen); + lp_scene_end_binning(scene); + + lp_fence_reference(&setup->last_fence, scene->fence); + + if (setup->last_fence) + setup->last_fence->issued = TRUE; + pipe_mutex_lock(screen->rast_mutex); - lp_scene_rasterize(scene, screen->rast); + lp_rast_queue_scene(screen->rast, scene); + lp_rast_finish(screen->rast); pipe_mutex_unlock(screen->rast_mutex); - reset_context( setup ); + lp_scene_end_rasterization(setup->scene); + lp_setup_reset( setup ); LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); } @@ -168,8 +170,30 @@ lp_setup_rasterize_scene( struct lp_setup_context *setup ) static void begin_binning( struct lp_setup_context *setup ) { - struct lp_scene *scene = lp_setup_get_current_scene(setup); + struct lp_scene *scene = setup->scene; boolean need_zsload = FALSE; + boolean ok; + unsigned i, j; + + assert(scene); + assert(scene->fence == NULL); + + /* Always create a fence: + */ + scene->fence = lp_fence_create(MAX2(1, setup->num_threads)); + + /* Initialize the bin flags and x/y coords: + */ + for (i = 0; i < scene->tiles_x; i++) { + for (j = 0; j < scene->tiles_y; j++) { + scene->tile[i][j].x = i; + scene->tile[i][j].y = j; + } + } + + ok = try_update_scene_state(setup); + assert(ok); + if (setup->fb.zsbuf && ((setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) && util_format_is_depth_and_stencil(setup->fb.zsbuf->format)) @@ -181,10 +205,10 @@ begin_binning( struct lp_setup_context *setup ) if (setup->fb.nr_cbufs) { if (setup->clear.flags & PIPE_CLEAR_COLOR) { - lp_scene_bin_everywhere( scene, - lp_rast_clear_color, - setup->clear.color ); - scene->has_color_clear = TRUE; + ok = lp_scene_bin_everywhere( scene, + LP_RAST_OP_CLEAR_COLOR, + setup->clear.color ); + assert(ok); } } @@ -192,12 +216,27 @@ begin_binning( struct lp_setup_context *setup ) if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) { if (!need_zsload) scene->has_depthstencil_clear = TRUE; - lp_scene_bin_everywhere( scene, - lp_rast_clear_zstencil, - lp_rast_arg_clearzs(&setup->clear.clearzs) ); + ok = lp_scene_bin_everywhere( scene, + LP_RAST_OP_CLEAR_ZSTENCIL, + lp_rast_arg_clearzs( + setup->clear.zsvalue, + setup->clear.zsmask)); + assert(ok); } } + if (setup->active_query) { + ok = lp_scene_bin_everywhere( scene, + LP_RAST_OP_BEGIN_QUERY, + lp_rast_arg_query(setup->active_query) ); + assert(ok); + } + + + setup->clear.flags = 0; + setup->clear.zsmask = 0; + setup->clear.zsvalue = 0; + LP_DBG(DEBUG_SETUP, "%s done\n", __FUNCTION__); } @@ -213,51 +252,56 @@ execute_clears( struct lp_setup_context *setup ) LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); begin_binning( setup ); - lp_setup_rasterize_scene( setup ); } +const char *states[] = { + "FLUSHED", + "EMPTY ", + "CLEARED", + "ACTIVE " +}; + static void set_scene_state( struct lp_setup_context *setup, - enum setup_state new_state ) + enum setup_state new_state, + const char *reason) { unsigned old_state = setup->state; if (old_state == new_state) return; - - LP_DBG(DEBUG_SETUP, "%s old %d new %d\n", __FUNCTION__, old_state, new_state); - - switch (new_state) { - case SETUP_EMPTY: - assert(old_state == SETUP_FLUSHED); - assert(setup->scene == NULL); + + if (LP_DEBUG & DEBUG_SCENE) { + debug_printf("%s old %s new %s%s%s\n", + __FUNCTION__, + states[old_state], + states[new_state], + (new_state == SETUP_FLUSHED) ? ": " : "", + (new_state == SETUP_FLUSHED) ? reason : ""); + + if (new_state == SETUP_FLUSHED && setup->scene) + lp_debug_draw_bins_by_cmd_length(setup->scene); + } - /* wait for a free/empty scene - */ - setup->scene = lp_scene_dequeue(setup->empty_scenes, TRUE); - assert(lp_scene_is_empty(setup->scene)); - lp_scene_begin_binning(setup->scene, - &setup->fb ); - break; + /* wait for a free/empty scene + */ + if (old_state == SETUP_FLUSHED) + lp_setup_get_empty_scene(setup); + switch (new_state) { case SETUP_CLEARED: - assert(old_state == SETUP_EMPTY); - assert(setup->scene != NULL); break; case SETUP_ACTIVE: - assert(old_state == SETUP_EMPTY || - old_state == SETUP_CLEARED); - assert(setup->scene != NULL); begin_binning( setup ); break; case SETUP_FLUSHED: if (old_state == SETUP_CLEARED) execute_clears( setup ); - else - lp_setup_rasterize_scene( setup ); + + lp_setup_rasterize_scene( setup ); assert(setup->scene == NULL); break; @@ -278,21 +322,11 @@ lp_setup_flush( struct lp_setup_context *setup, struct pipe_fence_handle **fence, const char *reason) { - LP_DBG(DEBUG_SETUP, "%s %s\n", __FUNCTION__, reason); - - if (setup->scene) { - if (fence) { - /* if we're going to flush the setup/rasterization modules, emit - * a fence. - */ - *fence = lp_setup_fence( setup ); - } + set_scene_state( setup, SETUP_FLUSHED, reason ); - if (setup->scene->fence) - setup->scene->fence->issued = TRUE; + if (fence) { + lp_fence_reference((struct lp_fence **)fence, setup->last_fence); } - - set_scene_state( setup, SETUP_FLUSHED ); } @@ -304,7 +338,7 @@ lp_setup_bind_framebuffer( struct lp_setup_context *setup, /* Flush any old scene. */ - set_scene_state( setup, SETUP_FLUSHED ); + set_scene_state( setup, SETUP_FLUSHED, __FUNCTION__ ); /* * Ensure the old scene is not reused. @@ -323,78 +357,41 @@ lp_setup_bind_framebuffer( struct lp_setup_context *setup, } -void -lp_setup_clear( struct lp_setup_context *setup, - const float *color, - double depth, - unsigned stencil, - unsigned flags ) +static boolean +lp_setup_try_clear( struct lp_setup_context *setup, + const float *color, + double depth, + unsigned stencil, + unsigned flags ) { - struct lp_scene *scene = lp_setup_get_current_scene(setup); + uint32_t zsmask = 0; + uint32_t zsvalue = 0; + union lp_rast_cmd_arg color_arg; unsigned i; - boolean full_zs_clear = TRUE; - uint32_t mask = 0; LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state); - if (flags & PIPE_CLEAR_COLOR) { - for (i = 0; i < 4; ++i) - setup->clear.color.clear_color[i] = float_to_ubyte(color[i]); + for (i = 0; i < 4; i++) + color_arg.clear_color[i] = float_to_ubyte(color[i]); } if (flags & PIPE_CLEAR_DEPTHSTENCIL) { - if (setup->fb.zsbuf && - ((flags & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) && - util_format_is_depth_and_stencil(setup->fb.zsbuf->format)) - full_zs_clear = FALSE; - - if (full_zs_clear) { - setup->clear.clearzs.clearzs_value = - util_pack_z_stencil(setup->fb.zsbuf->format, - depth, - stencil); - setup->clear.clearzs.clearzs_mask = 0xffffffff; - } - else { - /* hmm */ - uint32_t tmpval; - if (flags & PIPE_CLEAR_DEPTH) { - tmpval = util_pack_z(setup->fb.zsbuf->format, - depth); - switch (setup->fb.zsbuf->format) { - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - mask = 0xffffff; - break; - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - mask = 0xffffff00; - break; - default: - assert(0); - } - } - else { - switch (setup->fb.zsbuf->format) { - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - mask = 0xff000000; - tmpval = stencil << 24; - break; - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - mask = 0xff; - tmpval = stencil; - break; - default: - assert(0); - tmpval = 0; - } - } - setup->clear.clearzs.clearzs_mask |= mask; - setup->clear.clearzs.clearzs_value = - (setup->clear.clearzs.clearzs_value & ~mask) | (tmpval & mask); - } + unsigned zmask = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0; + unsigned smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0; + + zsvalue = util_pack_z_stencil(setup->fb.zsbuf->format, + depth, + stencil); + + zsmask = util_pack_uint_z_stencil(setup->fb.zsbuf->format, + zmask, + smask); } if (setup->state == SETUP_ACTIVE) { + struct lp_scene *scene = setup->scene; + /* Add the clear to existing scene. In the unusual case where * both color and depth-stencil are being cleared when there's * already been some rendering, we could discard the currently @@ -402,24 +399,18 @@ lp_setup_clear( struct lp_setup_context *setup, * a common usage. */ if (flags & PIPE_CLEAR_COLOR) { - lp_scene_bin_everywhere( scene, - lp_rast_clear_color, - setup->clear.color ); - scene->has_color_clear = TRUE; + if (!lp_scene_bin_everywhere( scene, + LP_RAST_OP_CLEAR_COLOR, + color_arg )) + return FALSE; } if (flags & PIPE_CLEAR_DEPTHSTENCIL) { - if (full_zs_clear) - scene->has_depthstencil_clear = TRUE; - else - setup->clear.clearzs.clearzs_mask = mask; - lp_scene_bin_everywhere( scene, - lp_rast_clear_zstencil, - lp_rast_arg_clearzs(&setup->clear.clearzs) ); - - + if (!lp_scene_bin_everywhere( scene, + LP_RAST_OP_CLEAR_ZSTENCIL, + lp_rast_arg_clearzs(zsvalue, zsmask) )) + return FALSE; } - } else { /* Put ourselves into the 'pre-clear' state, specifically to try @@ -427,42 +418,43 @@ lp_setup_clear( struct lp_setup_context *setup, * buffers which the app or state-tracker might issue * separately. */ - set_scene_state( setup, SETUP_CLEARED ); + set_scene_state( setup, SETUP_CLEARED, __FUNCTION__ ); setup->clear.flags |= flags; + + if (flags & PIPE_CLEAR_DEPTHSTENCIL) { + setup->clear.zsmask |= zsmask; + setup->clear.zsvalue = + (setup->clear.zsvalue & ~zsmask) | (zsvalue & zsmask); + } + + if (flags & PIPE_CLEAR_COLOR) { + memcpy(setup->clear.color.clear_color, + &color_arg, + sizeof color_arg); + } } + + return TRUE; } - -/** - * Emit a fence. - */ -struct pipe_fence_handle * -lp_setup_fence( struct lp_setup_context *setup ) +void +lp_setup_clear( struct lp_setup_context *setup, + const float *color, + double depth, + unsigned stencil, + unsigned flags ) { - if (setup->scene == NULL) - return NULL; - else if (setup->num_threads == 0) - return NULL; - else - { - struct lp_scene *scene = lp_setup_get_current_scene(setup); - const unsigned rank = setup->num_threads; + if (!lp_setup_try_clear( setup, color, depth, stencil, flags )) { + lp_setup_flush(setup, 0, NULL, __FUNCTION__); - set_scene_state( setup, SETUP_ACTIVE ); - - assert(scene->fence == NULL); + if (!lp_setup_try_clear( setup, color, depth, stencil, flags )) + assert(0); + } +} - /* The caller gets a reference, we keep a copy too, so need to - * bump the refcount: - */ - lp_fence_reference(&scene->fence, lp_fence_create(rank)); - LP_DBG(DEBUG_SETUP, "%s rank %u\n", __FUNCTION__, rank); - return (struct pipe_fence_handle *) scene->fence; - } -} void @@ -725,58 +717,33 @@ lp_setup_is_resource_referenced( const struct lp_setup_context *setup, /** * Called by vbuf code when we're about to draw something. */ -void -lp_setup_update_state( struct lp_setup_context *setup ) +static boolean +try_update_scene_state( struct lp_setup_context *setup ) { - struct lp_scene *scene; + boolean new_scene = (setup->fs.stored == NULL); + struct lp_scene *scene = setup->scene; - LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - - setup_check_scene_size_and_flush(setup); - - scene = lp_setup_get_current_scene(setup); - - assert(setup->fs.current.variant); - - /* Some of the 'draw' pipeline stages may have changed some driver state. - * Make sure we've processed those state changes before anything else. - * - * XXX this is the only place where llvmpipe_context is used in the - * setup code. This may get refactored/changed... - */ - { - struct llvmpipe_context *lp = llvmpipe_context(scene->pipe); - - /* Will probably need to move this somewhere else, just need - * to know about vertex shader point size attribute. - */ - setup->psize = lp->psize_slot; - - if (lp->dirty) { - llvmpipe_update_derived(lp); - } - assert(lp->dirty == 0); - } + assert(scene); if(setup->dirty & LP_SETUP_NEW_BLEND_COLOR) { uint8_t *stored; unsigned i, j; stored = lp_scene_alloc_aligned(scene, 4 * 16, 16); + if (!stored) { + assert(!new_scene); + return FALSE; + } - if (stored) { - /* smear each blend color component across 16 ubyte elements */ - for (i = 0; i < 4; ++i) { - uint8_t c = float_to_ubyte(setup->blend_color.current.color[i]); - for (j = 0; j < 16; ++j) - stored[i*16 + j] = c; - } - - setup->blend_color.stored = stored; - - setup->fs.current.jit_context.blend_color = setup->blend_color.stored; + /* smear each blend color component across 16 ubyte elements */ + for (i = 0; i < 4; ++i) { + uint8_t c = float_to_ubyte(setup->blend_color.current.color[i]); + for (j = 0; j < 16; ++j) + stored[i*16 + j] = c; } + setup->blend_color.stored = stored; + setup->fs.current.jit_context.blend_color = setup->blend_color.stored; setup->dirty |= LP_SETUP_NEW_FS; } @@ -797,13 +764,16 @@ lp_setup_update_state( struct lp_setup_context *setup ) void *stored; stored = lp_scene_alloc(scene, current_size); - if(stored) { - memcpy(stored, - current_data, - current_size); - setup->constants.stored_size = current_size; - setup->constants.stored_data = stored; + if (!stored) { + assert(!new_scene); + return FALSE; } + + memcpy(stored, + current_data, + current_size); + setup->constants.stored_size = current_size; + setup->constants.stored_data = stored; } } else { @@ -816,31 +786,42 @@ lp_setup_update_state( struct lp_setup_context *setup ) } - if(setup->dirty & LP_SETUP_NEW_FS) { - if(!setup->fs.stored || - memcmp(setup->fs.stored, - &setup->fs.current, - sizeof setup->fs.current) != 0) { + if (setup->dirty & LP_SETUP_NEW_FS) { + if (!setup->fs.stored || + memcmp(setup->fs.stored, + &setup->fs.current, + sizeof setup->fs.current) != 0) + { + struct lp_rast_state *stored; + uint i; + /* The fs state that's been stored in the scene is different from * the new, current state. So allocate a new lp_rast_state object * and append it to the bin's setup data buffer. */ - uint i; - struct lp_rast_state *stored = - (struct lp_rast_state *) lp_scene_alloc(scene, sizeof *stored); - if(stored) { - memcpy(stored, - &setup->fs.current, - sizeof setup->fs.current); - setup->fs.stored = stored; + stored = (struct lp_rast_state *) lp_scene_alloc(scene, sizeof *stored); + if (!stored) { + assert(!new_scene); + return FALSE; } + memcpy(stored, + &setup->fs.current, + sizeof setup->fs.current); + setup->fs.stored = stored; + /* The scene now references the textures in the rasterization * state record. Note that now. */ for (i = 0; i < Elements(setup->fs.current_tex); i++) { - if (setup->fs.current_tex[i]) - lp_scene_add_resource_reference(scene, setup->fs.current_tex[i]); + if (setup->fs.current_tex[i]) { + if (!lp_scene_add_resource_reference(scene, + setup->fs.current_tex[i], + new_scene)) { + assert(!new_scene); + return FALSE; + } + } } } } @@ -856,6 +837,47 @@ lp_setup_update_state( struct lp_setup_context *setup ) setup->dirty = 0; assert(setup->fs.stored); + return TRUE; +} + +void +lp_setup_update_state( struct lp_setup_context *setup, + boolean update_scene ) +{ + /* Some of the 'draw' pipeline stages may have changed some driver state. + * Make sure we've processed those state changes before anything else. + * + * XXX this is the only place where llvmpipe_context is used in the + * setup code. This may get refactored/changed... + */ + { + struct llvmpipe_context *lp = llvmpipe_context(setup->pipe); + if (lp->dirty) { + llvmpipe_update_derived(lp); + } + + /* Will probably need to move this somewhere else, just need + * to know about vertex shader point size attribute. + */ + setup->psize = lp->psize_slot; + + assert(lp->dirty == 0); + } + + if (update_scene) + set_scene_state( setup, SETUP_ACTIVE, __FUNCTION__ ); + + /* Only call into update_scene_state() if we already have a + * scene: + */ + if (update_scene && setup->scene) { + assert(setup->state == SETUP_ACTIVE); + if (!try_update_scene_state(setup)) { + lp_setup_flush_and_restart(setup); + if (!try_update_scene_state(setup)) + assert(0); + } + } } @@ -867,7 +889,7 @@ lp_setup_destroy( struct lp_setup_context *setup ) { uint i; - reset_context( setup ); + lp_setup_reset( setup ); util_unreference_framebuffer_state(&setup->fb); @@ -878,15 +900,15 @@ lp_setup_destroy( struct lp_setup_context *setup ) pipe_resource_reference(&setup->constants.current, NULL); /* free the scenes in the 'empty' queue */ - while (1) { - struct lp_scene *scene = lp_scene_dequeue(setup->empty_scenes, FALSE); - if (!scene) - break; + for (i = 0; i < Elements(setup->scenes); i++) { + struct lp_scene *scene = setup->scenes[i]; + + if (scene->fence) + lp_fence_wait(scene->fence); + lp_scene_destroy(scene); } - lp_scene_queue_destroy(setup->empty_scenes); - FREE( setup ); } @@ -908,10 +930,11 @@ lp_setup_create( struct pipe_context *pipe, return NULL; lp_setup_init_vbuf(setup); + + /* Used only in update_state(): + */ + setup->pipe = pipe; - setup->empty_scenes = lp_scene_queue_create(); - if (!setup->empty_scenes) - goto fail; setup->num_threads = screen->num_threads; setup->vbuf = draw_vbuf_stage(draw, &setup->base); @@ -923,9 +946,7 @@ lp_setup_create( struct pipe_context *pipe, /* create some empty scenes */ for (i = 0; i < MAX_SCENES; i++) { - setup->scenes[i] = lp_scene_create( pipe, setup->empty_scenes ); - - lp_scene_enqueue(setup->empty_scenes, setup->scenes[i]); + setup->scenes[i] = lp_scene_create( pipe ); } setup->triangle = first_triangle; @@ -940,9 +961,6 @@ fail: if (setup->vbuf) ; - if (setup->empty_scenes) - lp_scene_queue_destroy(setup->empty_scenes); - FREE(setup); return NULL; } @@ -955,22 +973,26 @@ void lp_setup_begin_query(struct lp_setup_context *setup, struct llvmpipe_query *pq) { - struct lp_scene * scene = lp_setup_get_current_scene(setup); - union lp_rast_cmd_arg cmd_arg; - /* init the query to its beginning state */ - pq->done = FALSE; - pq->tile_count = 0; - pq->num_tiles = scene->tiles_x * scene->tiles_y; - assert(pq->num_tiles > 0); + assert(setup->active_query == NULL); + + if (setup->scene) { + if (!lp_scene_bin_everywhere(setup->scene, + LP_RAST_OP_BEGIN_QUERY, + lp_rast_arg_query(pq))) { - memset(pq->count, 0, sizeof(pq->count)); /* reset all counters */ + lp_setup_flush_and_restart(setup); - set_scene_state( setup, SETUP_ACTIVE ); + if (!lp_scene_bin_everywhere(setup->scene, + LP_RAST_OP_BEGIN_QUERY, + lp_rast_arg_query(pq))) { + assert(0); + return; + } + } + } - cmd_arg.query_obj = pq; - lp_scene_bin_everywhere(scene, lp_rast_begin_query, cmd_arg); - pq->binned = TRUE; + setup->active_query = pq; } @@ -980,11 +1002,42 @@ lp_setup_begin_query(struct lp_setup_context *setup, void lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq) { - struct lp_scene * scene = lp_setup_get_current_scene(setup); - union lp_rast_cmd_arg cmd_arg; + union lp_rast_cmd_arg dummy = { 0 }; + + assert(setup->active_query == pq); + setup->active_query = NULL; - set_scene_state( setup, SETUP_ACTIVE ); + /* Setup will automatically re-issue any query which carried over a + * scene boundary, and the rasterizer automatically "ends" queries + * which are active at the end of a scene, so there is no need to + * retry this commands on failure. + */ + if (setup->scene) { + /* pq->fence should be the fence of the *last* scene which + * contributed to the query result. + */ + lp_fence_reference(&pq->fence, setup->scene->fence); - cmd_arg.query_obj = pq; - lp_scene_bin_everywhere(scene, lp_rast_end_query, cmd_arg); + if (!lp_scene_bin_everywhere(setup->scene, + LP_RAST_OP_END_QUERY, + dummy)) { + lp_setup_flush(setup, 0, NULL, __FUNCTION__); + } + } + else { + lp_fence_reference(&pq->fence, setup->last_fence); + } } + + +void +lp_setup_flush_and_restart(struct lp_setup_context *setup) +{ + if (0) debug_printf("%s\n", __FUNCTION__); + + assert(setup->state == SETUP_ACTIVE); + set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__); + lp_setup_update_state(setup, TRUE); +} + + diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 821ebb1087..b94061b7d4 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -65,6 +65,7 @@ struct pipe_framebuffer_state; struct lp_fragment_shader_variant; struct lp_jit_context; struct llvmpipe_query; +struct pipe_fence_handle; struct lp_setup_context * @@ -78,8 +79,6 @@ lp_setup_clear(struct lp_setup_context *setup, unsigned clear_stencil, unsigned flags); -struct pipe_fence_handle * -lp_setup_fence( struct lp_setup_context *setup ); void diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.c b/src/gallium/drivers/llvmpipe/lp_setup_coef.c index 95e3e8fffe..8dc2688ddb 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_coef.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_coef.c @@ -187,11 +187,32 @@ static void setup_facing_coef( struct lp_rast_shader_inputs *inputs, */ void lp_setup_tri_coef( struct lp_setup_context *setup, struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info) + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4], + boolean frontfacing) { unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; unsigned slot; unsigned i; + struct lp_tri_info info; + float dx01 = v0[0][0] - v1[0][0]; + float dy01 = v0[0][1] - v1[0][1]; + float dx20 = v2[0][0] - v0[0][0]; + float dy20 = v2[0][1] - v0[0][1]; + float oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01); + + info.v0 = v0; + info.v1 = v1; + info.v2 = v2; + info.frontfacing = frontfacing; + info.x0_center = v0[0][0] - setup->pixel_offset; + info.y0_center = v0[0][1] - setup->pixel_offset; + info.dx01_ooa = dx01 * oneoverarea; + info.dx20_ooa = dx20 * oneoverarea; + info.dy01_ooa = dy01 * oneoverarea; + info.dy20_ooa = dy20 * oneoverarea; + /* setup interpolation for all the remaining attributes: */ @@ -204,25 +225,25 @@ void lp_setup_tri_coef( struct lp_setup_context *setup, if (setup->flatshade_first) { for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(inputs, slot+1, info->v0[vert_attr][i], i); + constant_coef(inputs, slot+1, info.v0[vert_attr][i], i); } else { for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(inputs, slot+1, info->v2[vert_attr][i], i); + constant_coef(inputs, slot+1, info.v2[vert_attr][i], i); } break; case LP_INTERP_LINEAR: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - linear_coef(inputs, info, slot+1, vert_attr, i); + linear_coef(inputs, &info, slot+1, vert_attr, i); break; case LP_INTERP_PERSPECTIVE: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - perspective_coef(inputs, info, slot+1, vert_attr, i); + perspective_coef(inputs, &info, slot+1, vert_attr, i); fragcoord_usage_mask |= TGSI_WRITEMASK_W; break; @@ -236,7 +257,7 @@ void lp_setup_tri_coef( struct lp_setup_context *setup, break; case LP_INTERP_FACING: - setup_facing_coef(inputs, slot+1, info->frontfacing, usage_mask); + setup_facing_coef(inputs, slot+1, info.frontfacing, usage_mask); break; default: @@ -246,7 +267,7 @@ void lp_setup_tri_coef( struct lp_setup_context *setup, /* The internal position input is in slot zero: */ - setup_fragcoord_coef(inputs, info, 0, fragcoord_usage_mask); + setup_fragcoord_coef(inputs, &info, 0, fragcoord_usage_mask); } #else diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.h b/src/gallium/drivers/llvmpipe/lp_setup_coef.h index d68b39c603..87a3255ccc 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_coef.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_coef.h @@ -56,6 +56,9 @@ struct lp_tri_info { void lp_setup_tri_coef( struct lp_setup_context *setup, struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info); + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4], + boolean frontfacing); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c b/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c index 73fb70599c..3742fd672b 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c @@ -151,13 +151,34 @@ static void perspective_coef( struct lp_rast_shader_inputs *inputs, */ void lp_setup_tri_coef( struct lp_setup_context *setup, struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info) + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4], + boolean frontfacing) { unsigned slot; + struct lp_tri_info info; + float dx01 = v0[0][0] - v1[0][0]; + float dy01 = v0[0][1] - v1[0][1]; + float dx20 = v2[0][0] - v0[0][0]; + float dy20 = v2[0][1] - v0[0][1]; + float oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01); + + info.v0 = v0; + info.v1 = v1; + info.v2 = v2; + info.frontfacing = frontfacing; + info.x0_center = v0[0][0] - setup->pixel_offset; + info.y0_center = v0[0][1] - setup->pixel_offset; + info.dx01_ooa = dx01 * oneoverarea; + info.dx20_ooa = dx20 * oneoverarea; + info.dy01_ooa = dy01 * oneoverarea; + info.dy20_ooa = dy20 * oneoverarea; + /* The internal position input is in slot zero: */ - linear_coef(inputs, info, 0, 0); + linear_coef(inputs, &info, 0, 0); /* setup interpolation for all the remaining attributes: */ @@ -167,19 +188,19 @@ void lp_setup_tri_coef( struct lp_setup_context *setup, switch (setup->fs.input[slot].interp) { case LP_INTERP_CONSTANT: if (setup->flatshade_first) { - constant_coef4(inputs, info, slot+1, info->v0[vert_attr]); + constant_coef4(inputs, &info, slot+1, info.v0[vert_attr]); } else { - constant_coef4(inputs, info, slot+1, info->v2[vert_attr]); + constant_coef4(inputs, &info, slot+1, info.v2[vert_attr]); } break; case LP_INTERP_LINEAR: - linear_coef(inputs, info, slot+1, vert_attr); + linear_coef(inputs, &info, slot+1, vert_attr); break; case LP_INTERP_PERSPECTIVE: - perspective_coef(inputs, info, slot+1, vert_attr); + perspective_coef(inputs, &info, slot+1, vert_attr); break; case LP_INTERP_POSITION: @@ -190,7 +211,7 @@ void lp_setup_tri_coef( struct lp_setup_context *setup, break; case LP_INTERP_FACING: - setup_facing_coef(inputs, info, slot+1); + setup_facing_coef(inputs, &info, slot+1); break; default: diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 877a492c6d..80b356476a 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -49,8 +49,6 @@ #define LP_SETUP_NEW_SCISSOR 0x08 -struct lp_scene_queue; - /** Max number of scenes */ #define MAX_SCENES 2 @@ -70,6 +68,7 @@ struct lp_setup_context { struct vbuf_render base; + struct pipe_context *pipe; struct vertex_info *vertex_info; uint prim; uint vertex_size; @@ -83,9 +82,12 @@ struct lp_setup_context */ struct draw_stage *vbuf; unsigned num_threads; + unsigned scene_idx; struct lp_scene *scenes[MAX_SCENES]; /**< all the scenes */ struct lp_scene *scene; /**< current scene being built */ - struct lp_scene_queue *empty_scenes; /**< queue of empty scenes */ + + struct lp_fence *last_fence; + struct llvmpipe_query *active_query; boolean flatshade_first; boolean ccw_is_frontface; @@ -105,12 +107,12 @@ struct lp_setup_context struct { unsigned flags; union lp_rast_cmd_arg color; /**< lp_rast_clear_color() cmd */ - struct lp_rast_clearzs clearzs; /**< lp_rast_clear_zstencil() cmd */ + unsigned zsmask; + unsigned zsvalue; /**< lp_rast_clear_zstencil() cmd */ } clear; enum setup_state { SETUP_FLUSHED, /**< scene is null */ - SETUP_EMPTY, /**< scene exists but has only state changes */ SETUP_CLEARED, /**< scene exists but has only clears */ SETUP_ACTIVE /**< scene exists and has at least one draw/query */ } state; @@ -156,14 +158,15 @@ void lp_setup_choose_triangle( struct lp_setup_context *setup ); void lp_setup_choose_line( struct lp_setup_context *setup ); void lp_setup_choose_point( struct lp_setup_context *setup ); -struct lp_scene *lp_setup_get_current_scene(struct lp_setup_context *setup); - void lp_setup_init_vbuf(struct lp_setup_context *setup); -void lp_setup_update_state( struct lp_setup_context *setup ); +void lp_setup_update_state( struct lp_setup_context *setup, + boolean update_scene); void lp_setup_destroy( struct lp_setup_context *setup ); +void lp_setup_flush_and_restart(struct lp_setup_context *setup); + void lp_setup_print_triangle(struct lp_setup_context *setup, const float (*v0)[4], @@ -182,11 +185,12 @@ lp_setup_alloc_triangle(struct lp_scene *scene, unsigned nr_planes, unsigned *tri_size); -void +boolean lp_setup_bin_triangle( struct lp_setup_context *setup, struct lp_rast_triangle *tri, const struct u_rect *bbox, int nr_planes ); -#endif +void lp_setup_flush_and_restart(struct lp_setup_context *setup); +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index ce2da55cf4..9f090d1992 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -263,12 +263,12 @@ static INLINE float fracf(float f) -static void -lp_setup_line( struct lp_setup_context *setup, +static boolean +try_setup_line( struct lp_setup_context *setup, const float (*v1)[4], const float (*v2)[4]) { - struct lp_scene *scene = lp_setup_get_current_scene(setup); + struct lp_scene *scene = setup->scene; struct lp_rast_triangle *line; struct lp_line_info info; float width = MAX2(1.0, setup->line_width); @@ -536,13 +536,13 @@ lp_setup_line( struct lp_setup_context *setup, bbox.y1 < bbox.y0) { if (0) debug_printf("empty bounding box\n"); LP_COUNT(nr_culled_tris); - return; + return TRUE; } if (!u_rect_test_intersection(&setup->draw_region, &bbox)) { if (0) debug_printf("offscreen\n"); LP_COUNT(nr_culled_tris); - return; + return TRUE; } u_rect_find_intersection(&setup->draw_region, &bbox); @@ -552,7 +552,7 @@ lp_setup_line( struct lp_setup_context *setup, nr_planes, &tri_bytes); if (!line) - return; + return FALSE; #ifdef DEBUG line->v[0][0] = v1[0][0]; @@ -585,6 +585,8 @@ lp_setup_line( struct lp_setup_context *setup, line->inputs.facing = 1.0F; line->inputs.state = setup->fs.stored; + line->inputs.disable = FALSE; + line->inputs.opaque = FALSE; for (i = 0; i < 4; i++) { struct lp_rast_plane *plane = &line->plane[i]; @@ -687,9 +689,23 @@ lp_setup_line( struct lp_setup_context *setup, line->plane[7].eo = 0; } - lp_setup_bin_triangle(setup, line, &bbox, nr_planes); + return lp_setup_bin_triangle(setup, line, &bbox, nr_planes); } - + + +static void lp_setup_line( struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4] ) +{ + if (!try_setup_line( setup, v0, v1 )) + { + lp_setup_flush_and_restart(setup); + + if (!try_setup_line( setup, v0, v1 )) + assert(0); + } +} + void lp_setup_choose_line( struct lp_setup_context *setup ) { diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c index 6ae318d328..5538987151 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -210,8 +210,9 @@ subpixel_snap(float a) } -static void lp_setup_point( struct lp_setup_context *setup, - const float (*v0)[4] ) +static boolean +try_setup_point( struct lp_setup_context *setup, + const float (*v0)[4] ) { /* x/y positions in fixed point */ const int sizeAttr = setup->psize; @@ -228,7 +229,7 @@ static void lp_setup_point( struct lp_setup_context *setup, const int x0 = subpixel_snap(v0[0][0] - setup->pixel_offset) - fixed_width/2; const int y0 = subpixel_snap(v0[0][1] - setup->pixel_offset) - fixed_width/2; - struct lp_scene *scene = lp_setup_get_current_scene(setup); + struct lp_scene *scene = setup->scene; struct lp_rast_triangle *point; unsigned bytes; struct u_rect bbox; @@ -259,7 +260,7 @@ static void lp_setup_point( struct lp_setup_context *setup, if (!u_rect_test_intersection(&setup->draw_region, &bbox)) { if (0) debug_printf("offscreen\n"); LP_COUNT(nr_culled_tris); - return; + return TRUE; } u_rect_find_intersection(&setup->draw_region, &bbox); @@ -269,7 +270,7 @@ static void lp_setup_point( struct lp_setup_context *setup, nr_planes, &bytes); if (!point) - return; + return FALSE; #ifdef DEBUG point->v[0][0] = v0[0][0]; @@ -288,6 +289,8 @@ static void lp_setup_point( struct lp_setup_context *setup, point->inputs.facing = 1.0F; point->inputs.state = setup->fs.stored; + point->inputs.disable = FALSE; + point->inputs.opaque = FALSE; { point->plane[0].dcdx = -1; @@ -315,7 +318,20 @@ static void lp_setup_point( struct lp_setup_context *setup, point->plane[3].eo = 0; } - lp_setup_bin_triangle(setup, point, &bbox, nr_planes); + return lp_setup_bin_triangle(setup, point, &bbox, nr_planes); +} + + +static void lp_setup_point( struct lp_setup_context *setup, + const float (*v0)[4] ) +{ + if (!try_setup_point( setup, v0 )) + { + lp_setup_flush_and_restart(setup); + + if (!try_setup_point( setup, v0 )) + assert(0); + } } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 0180d95090..5090f82ab5 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -160,44 +160,79 @@ lp_setup_print_triangle(struct lp_setup_context *setup, } -lp_rast_cmd lp_rast_tri_tab[9] = { - NULL, /* should be impossible */ - lp_rast_triangle_1, - lp_rast_triangle_2, - lp_rast_triangle_3, - lp_rast_triangle_4, - lp_rast_triangle_5, - lp_rast_triangle_6, - lp_rast_triangle_7, - lp_rast_triangle_8 +static unsigned +lp_rast_tri_tab[9] = { + 0, /* should be impossible */ + LP_RAST_OP_TRIANGLE_1, + LP_RAST_OP_TRIANGLE_2, + LP_RAST_OP_TRIANGLE_3, + LP_RAST_OP_TRIANGLE_4, + LP_RAST_OP_TRIANGLE_5, + LP_RAST_OP_TRIANGLE_6, + LP_RAST_OP_TRIANGLE_7, + LP_RAST_OP_TRIANGLE_8 }; + + +/** + * The primitive covers the whole tile- shade whole tile. + * + * \param tx, ty the tile position in tiles, not pixels + */ +static boolean +lp_setup_whole_tile(struct lp_setup_context *setup, + const struct lp_rast_shader_inputs *inputs, + int tx, int ty) +{ + struct lp_scene *scene = setup->scene; + + LP_COUNT(nr_fully_covered_64); + + /* if variant is opaque and scissor doesn't effect the tile */ + if (inputs->opaque) { + if (!scene->fb.zsbuf) { + /* + * All previous rendering will be overwritten so reset the bin. + */ + lp_scene_bin_reset( scene, tx, ty ); + } + + LP_COUNT(nr_shade_opaque_64); + return lp_scene_bin_command( scene, tx, ty, + LP_RAST_OP_SHADE_TILE_OPAQUE, + lp_rast_arg_inputs(inputs) ); + } else { + LP_COUNT(nr_shade_64); + return lp_scene_bin_command( scene, tx, ty, + LP_RAST_OP_SHADE_TILE, + lp_rast_arg_inputs(inputs) ); + } +} + + /** * Do basic setup for triangle rasterization and determine which * framebuffer tiles are touched. Put the triangle in the scene's * bins for the tiles which we overlap. */ -static void +static boolean do_triangle_ccw(struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4], boolean frontfacing ) { - struct lp_scene *scene = lp_setup_get_current_scene(setup); + struct lp_scene *scene = setup->scene; struct lp_rast_triangle *tri; int x[3]; int y[3]; - float dy01, dy20; - float dx01, dx20; - float oneoverarea; - struct lp_tri_info info; int area; struct u_rect bbox; unsigned tri_bytes; int i; int nr_planes = 3; - + if (0) lp_setup_print_triangle(setup, v0, v1, v2); @@ -241,13 +276,13 @@ do_triangle_ccw(struct lp_setup_context *setup, bbox.y1 < bbox.y0) { if (0) debug_printf("empty bounding box\n"); LP_COUNT(nr_culled_tris); - return; + return TRUE; } if (!u_rect_test_intersection(&setup->draw_region, &bbox)) { if (0) debug_printf("offscreen\n"); LP_COUNT(nr_culled_tris); - return; + return TRUE; } u_rect_find_intersection(&setup->draw_region, &bbox); @@ -257,7 +292,7 @@ do_triangle_ccw(struct lp_setup_context *setup, nr_planes, &tri_bytes); if (!tri) - return; + return FALSE; #ifdef DEBUG tri->v[0][0] = v0[0][0]; @@ -288,37 +323,18 @@ do_triangle_ccw(struct lp_setup_context *setup, if (area <= 0) { lp_scene_putback_data( scene, tri_bytes ); LP_COUNT(nr_culled_tris); - return; + return TRUE; } - - /* - */ - dx01 = v0[0][0] - v1[0][0]; - dy01 = v0[0][1] - v1[0][1]; - dx20 = v2[0][0] - v0[0][0]; - dy20 = v2[0][1] - v0[0][1]; - oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01); - - info.v0 = v0; - info.v1 = v1; - info.v2 = v2; - info.frontfacing = frontfacing; - info.x0_center = v0[0][0] - setup->pixel_offset; - info.y0_center = v0[0][1] - setup->pixel_offset; - info.dx01_ooa = dx01 * oneoverarea; - info.dx20_ooa = dx20 * oneoverarea; - info.dy01_ooa = dy01 * oneoverarea; - info.dy20_ooa = dy20 * oneoverarea; - /* Setup parameter interpolants: */ - lp_setup_tri_coef( setup, &tri->inputs, &info ); + lp_setup_tri_coef( setup, &tri->inputs, v0, v1, v2, frontfacing ); tri->inputs.facing = frontfacing ? 1.0F : -1.0F; + tri->inputs.disable = FALSE; + tri->inputs.opaque = setup->fs.current.variant->opaque; tri->inputs.state = setup->fs.stored; - for (i = 0; i < 3; i++) { struct lp_rast_plane *plane = &tri->plane[i]; @@ -420,70 +436,98 @@ do_triangle_ccw(struct lp_setup_context *setup, tri->plane[6].eo = 0; } - lp_setup_bin_triangle( setup, tri, &bbox, nr_planes ); + return lp_setup_bin_triangle( setup, tri, &bbox, nr_planes ); +} + +/* + * Round to nearest less or equal power of two of the input. + * + * Undefined if no bit set exists, so code should check against 0 first. + */ +static INLINE uint32_t +floor_pot(uint32_t n) +{ +#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86) + if (n == 0) + return 0; + + __asm__("bsr %1,%0" + : "=r" (n) + : "rm" (n)); + return 1 << n; +#else + n |= (n >> 1); + n |= (n >> 2); + n |= (n >> 4); + n |= (n >> 8); + n |= (n >> 16); + return n - (n >> 1); +#endif } -void +boolean lp_setup_bin_triangle( struct lp_setup_context *setup, struct lp_rast_triangle *tri, const struct u_rect *bbox, int nr_planes ) { struct lp_scene *scene = setup->scene; - struct lp_fragment_shader_variant *variant = setup->fs.current.variant; - int ix0, ix1, iy0, iy1; int i; - /* - * All fields of 'tri' are now set. The remaining code here is - * concerned with binning. + /* What is the largest power-of-two boundary this triangle crosses: */ + int dx = floor_pot((bbox->x0 ^ bbox->x1) | + (bbox->y0 ^ bbox->y1)); - /* Convert to tile coordinates, and inclusive ranges: + /* The largest dimension of the rasterized area of the triangle + * (aligned to a 4x4 grid), rounded down to the nearest power of two: */ + int sz = floor_pot((bbox->x1 - (bbox->x0 & ~3)) | + (bbox->y1 - (bbox->y0 & ~3))); + if (nr_planes == 3) { - int ix0 = bbox->x0 / 16; - int iy0 = bbox->y0 / 16; - int ix1 = bbox->x1 / 16; - int iy1 = bbox->y1 / 16; - - if (iy0 == iy1 && ix0 == ix1) + if (sz < 4 && dx < 64) { + /* Triangle is contained in a single 4x4 stamp: + */ + int mask = (bbox->x0 & 63 & ~3) | ((bbox->y0 & 63 & ~3) << 8); + + return lp_scene_bin_command( scene, + bbox->x0/64, bbox->y0/64, + LP_RAST_OP_TRIANGLE_3_4, + lp_rast_arg_triangle(tri, mask) ); + } + + if (sz < 16 && dx < 64) + { + int mask = (bbox->x0 & 63 & ~3) | ((bbox->y0 & 63 & ~3) << 8); /* Triangle is contained in a single 16x16 block: */ - int mask = (ix0 & 3) | ((iy0 & 3) << 4); - - lp_scene_bin_command( scene, ix0/4, iy0/4, - lp_rast_triangle_3_16, - lp_rast_arg_triangle(tri, mask) ); - return; + return lp_scene_bin_command( scene, + bbox->x0/64, bbox->y0/64, + LP_RAST_OP_TRIANGLE_3_16, + lp_rast_arg_triangle(tri, mask) ); } } - ix0 = bbox->x0 / TILE_SIZE; - iy0 = bbox->y0 / TILE_SIZE; - ix1 = bbox->x1 / TILE_SIZE; - iy1 = bbox->y1 / TILE_SIZE; - - /* - * Clamp to framebuffer size - */ - assert(ix0 == MAX2(ix0, 0)); - assert(iy0 == MAX2(iy0, 0)); - assert(ix1 == MIN2(ix1, scene->tiles_x - 1)); - assert(iy1 == MIN2(iy1, scene->tiles_y - 1)); /* Determine which tile(s) intersect the triangle's bounding box */ - if (iy0 == iy1 && ix0 == ix1) + if (dx < TILE_SIZE) { + int ix0 = bbox->x0 / TILE_SIZE; + int iy0 = bbox->y0 / TILE_SIZE; + + assert(iy0 == bbox->y1 / TILE_SIZE && + ix0 == bbox->x1 / TILE_SIZE); + /* Triangle is contained in a single tile: */ - lp_scene_bin_command( scene, ix0, iy0, - lp_rast_tri_tab[nr_planes], - lp_rast_arg_triangle(tri, (1<<nr_planes)-1) ); + return lp_scene_bin_command( scene, ix0, iy0, + lp_rast_tri_tab[nr_planes], + lp_rast_arg_triangle(tri, (1<<nr_planes)-1) ); } else { @@ -493,6 +537,11 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, int xstep[7]; int ystep[7]; int x, y; + + int ix0 = bbox->x0 / TILE_SIZE; + int iy0 = bbox->y0 / TILE_SIZE; + int ix1 = bbox->x1 / TILE_SIZE; + int iy1 = bbox->y1 / TILE_SIZE; for (i = 0; i < nr_planes; i++) { c[i] = (tri->plane[i].c + @@ -544,9 +593,10 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, */ int count = util_bitcount(partial); in = TRUE; - lp_scene_bin_command( scene, x, y, - lp_rast_tri_tab[count], - lp_rast_arg_triangle(tri, partial) ); + if (!lp_scene_bin_command( scene, x, y, + lp_rast_tri_tab[count], + lp_rast_arg_triangle(tri, partial) )) + goto fail; LP_COUNT(nr_partially_covered_64); } @@ -554,13 +604,8 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, /* triangle covers the whole tile- shade whole tile */ LP_COUNT(nr_fully_covered_64); in = TRUE; - if (variant->opaque && - !setup->fb.zsbuf) { - lp_scene_bin_reset( scene, x, y ); - } - lp_scene_bin_command( scene, x, y, - lp_rast_shade_tile, - lp_rast_arg_inputs(&tri->inputs) ); + if (!lp_setup_whole_tile(setup, &tri->inputs, x, y)) + goto fail; } /* Iterate cx values across the region: @@ -575,6 +620,16 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, c[i] += ystep[i]; } } + + return TRUE; + +fail: + /* Need to disable any partially binned triangle. This is easier + * than trying to locate all the triangle, shade-tile, etc, + * commands which may have been binned. + */ + tri->inputs.disable = TRUE; + return FALSE; } @@ -586,7 +641,13 @@ static void triangle_cw( struct lp_setup_context *setup, const float (*v1)[4], const float (*v2)[4] ) { - do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface ); + if (!do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface )) + { + lp_setup_flush_and_restart(setup); + + if (!do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface )) + assert(0); + } } @@ -598,7 +659,12 @@ static void triangle_ccw( struct lp_setup_context *setup, const float (*v1)[4], const float (*v2)[4] ) { - do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface ); + if (!do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface )) + { + lp_setup_flush_and_restart(setup); + if (!do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface )) + assert(0); + } } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c index 51948f5bf2..6308561f24 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c @@ -64,7 +64,7 @@ lp_setup_get_vertex_info(struct vbuf_render *vbr) /* Vertex size/info depends on the latest state. * The draw module may have issued additional state-change commands. */ - lp_setup_update_state(setup); + lp_setup_update_state(setup, FALSE); return setup->vertex_info; } @@ -141,7 +141,7 @@ lp_setup_draw_elements(struct vbuf_render *vbr, const ushort *indices, uint nr) const boolean flatshade_first = setup->flatshade_first; unsigned i; - lp_setup_update_state(setup); + lp_setup_update_state(setup, TRUE); switch (setup->prim) { case PIPE_PRIM_POINTS: @@ -338,7 +338,7 @@ lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) const boolean flatshade_first = setup->flatshade_first; unsigned i; - lp_setup_update_state(setup); + lp_setup_update_state(setup, TRUE); switch (setup->prim) { case PIPE_PRIM_POINTS: diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 33c1a49efe..8101e2d843 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -186,6 +186,7 @@ generate_quad_mask(LLVMBuilderRef builder, LLVMTypeRef i32t = LLVMInt32Type(); LLVMValueRef bits[4]; LLVMValueRef mask; + int shift; /* * XXX: We'll need a different path for 16 x u8 @@ -197,10 +198,28 @@ generate_quad_mask(LLVMBuilderRef builder, /* * mask_input >>= (quad * 4) */ + + switch (quad) { + case 0: + shift = 0; + break; + case 1: + shift = 2; + break; + case 2: + shift = 8; + break; + case 3: + shift = 10; + break; + default: + assert(0); + shift = 0; + } mask_input = LLVMBuildLShr(builder, mask_input, - LLVMConstInt(i32t, quad * 4, 0), + LLVMConstInt(i32t, shift, 0), ""); /* @@ -211,9 +230,9 @@ generate_quad_mask(LLVMBuilderRef builder, bits[0] = LLVMConstInt(i32t, 1 << 0, 0); bits[1] = LLVMConstInt(i32t, 1 << 1, 0); - bits[2] = LLVMConstInt(i32t, 1 << 2, 0); - bits[3] = LLVMConstInt(i32t, 1 << 3, 0); - + bits[2] = LLVMConstInt(i32t, 1 << 4, 0); + bits[3] = LLVMConstInt(i32t, 1 << 5, 0); + mask = LLVMBuildAnd(builder, mask, LLVMConstVector(bits, 4), ""); /* @@ -332,14 +351,13 @@ generate_fs(struct llvmpipe_context *lp, lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]); /* Alpha test */ - /* XXX: should the alpha reference value be passed separately? */ /* XXX: should only test the final assignment to alpha */ - if(cbuf == 0 && chan == 3) { + if (cbuf == 0 && chan == 3 && key->alpha.enabled) { LLVMValueRef alpha = out; LLVMValueRef alpha_ref_value; alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr); alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value); - lp_build_alpha_test(builder, &key->alpha, type, + lp_build_alpha_test(builder, key->alpha.func, type, &mask, alpha, alpha_ref_value); } @@ -728,6 +746,9 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key) debug_printf("fs variant %p:\n", (void *) key); + for (i = 0; i < key->nr_cbufs; ++i) { + debug_printf("cbuf_format[%u] = %s\n", i, util_format_name(key->cbuf_format[i])); + } if (key->depth.enabled) { debug_printf("depth.format = %s\n", util_format_name(key->zsbuf_format)); debug_printf("depth.func = %s\n", util_dump_func(key->depth.func, TRUE)); @@ -747,7 +768,6 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key) if (key->alpha.enabled) { debug_printf("alpha.func = %s\n", util_dump_func(key->alpha.func, TRUE)); - debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value); } if (key->blend.logicop_enable) { @@ -791,6 +811,16 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key) } +void +lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant) +{ + debug_printf("llvmpipe: Fragment shader #%u variant #%u:\n", + variant->shader->no, variant->no); + tgsi_dump(variant->shader->base.tokens, 0); + dump_fs_variant_key(&variant->key); + debug_printf("variant->opaque = %u\n", variant->opaque); + debug_printf("\n"); +} static struct lp_fragment_shader_variant * generate_variant(struct llvmpipe_context *lp, @@ -798,6 +828,7 @@ generate_variant(struct llvmpipe_context *lp, const struct lp_fragment_shader_variant_key *key) { struct lp_fragment_shader_variant *variant; + boolean fullcolormask; variant = CALLOC_STRUCT(lp_fragment_shader_variant); if(!variant) @@ -810,27 +841,43 @@ generate_variant(struct llvmpipe_context *lp, memcpy(&variant->key, key, shader->variant_key_size); - if (gallivm_debug & GALLIVM_DEBUG_IR) { - debug_printf("llvmpipe: Creating fragment shader #%u variant #%u:\n", - shader->no, variant->no); - tgsi_dump(shader->base.tokens, 0); - dump_fs_variant_key(key); + /* + * Determine whether we are touching all channels in the color buffer. + */ + fullcolormask = FALSE; + if (key->nr_cbufs == 1) { + const struct util_format_description *format_desc; + format_desc = util_format_description(key->cbuf_format[0]); + if ((~key->blend.rt[0].colormask & + util_format_colormask(format_desc)) == 0) { + fullcolormask = TRUE; + } } - generate_fragment(lp, shader, variant, RAST_WHOLE); - generate_fragment(lp, shader, variant, RAST_EDGE_TEST); - - /* TODO: most of these can be relaxed, in particular the colormask */ variant->opaque = !key->blend.logicop_enable && !key->blend.rt[0].blend_enable && - key->blend.rt[0].colormask == 0xf && + fullcolormask && !key->stencil[0].enabled && !key->alpha.enabled && !key->depth.enabled && !shader->info.uses_kill ? TRUE : FALSE; + + if (gallivm_debug & GALLIVM_DEBUG_IR) { + lp_debug_fs_variant(variant); + } + + generate_fragment(lp, shader, variant, RAST_EDGE_TEST); + + if (variant->opaque) { + /* Specialized shader, which doesn't need to read the color buffer. */ + generate_fragment(lp, shader, variant, RAST_WHOLE); + } else { + variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST]; + } + return variant; } @@ -1056,25 +1103,22 @@ make_variant_key(struct llvmpipe_context *lp, key->nr_cbufs = lp->framebuffer.nr_cbufs; for (i = 0; i < lp->framebuffer.nr_cbufs; i++) { + enum pipe_format format = lp->framebuffer.cbufs[i]->format; struct pipe_rt_blend_state *blend_rt = &key->blend.rt[i]; const struct util_format_description *format_desc; - unsigned chan; - format_desc = util_format_description(lp->framebuffer.cbufs[i]->format); + key->cbuf_format[i] = format; + + format_desc = util_format_description(format); assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB); blend_rt->colormask = lp->blend->rt[i].colormask; - /* mask out color channels not present in the color buffer. - * Should be simple to incorporate per-cbuf writemasks: + /* + * Mask out color channels not present in the color buffer. */ - for(chan = 0; chan < 4; ++chan) { - enum util_format_swizzle swizzle = format_desc->swizzle[chan]; - - if(swizzle > UTIL_FORMAT_SWIZZLE_W) - blend_rt->colormask &= ~(1 << chan); - } + blend_rt->colormask &= util_format_colormask(format_desc); /* * Our swizzled render tiles always have an alpha channel, but the linear diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h index 33c480010d..2914e7d7ef 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.h +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h @@ -49,14 +49,21 @@ struct lp_fragment_shader_variant_key { struct pipe_depth_state depth; struct pipe_stencil_state stencil[2]; - struct pipe_alpha_state alpha; struct pipe_blend_state blend; - enum pipe_format zsbuf_format; + + struct { + unsigned enabled:1; + unsigned func:3; + } alpha; + unsigned nr_cbufs:8; unsigned nr_samplers:8; /* actually derivable from just the shader */ unsigned flatshade:1; unsigned occlusion_count:1; + enum pipe_format zsbuf_format; + enum pipe_format cbuf_format[PIPE_MAX_COLOR_BUFS]; + struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS]; }; @@ -101,4 +108,8 @@ struct lp_fragment_shader }; +void +lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant); + + #endif /* LP_STATE_FS_H_ */ diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c index 63ddc669c2..164242eda6 100644 --- a/src/gallium/drivers/llvmpipe/lp_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_surface.c @@ -68,16 +68,16 @@ lp_resource_copy(struct pipe_context *pipe, 0, /* flush_flags */ FALSE, /* read_only */ TRUE, /* cpu_access */ - FALSE, - "blit dst"); /* do_not_block */ + FALSE, /* do_not_block */ + "blit dest"); llvmpipe_flush_resource(pipe, src, subsrc.face, subsrc.level, 0, /* flush_flags */ TRUE, /* read_only */ TRUE, /* cpu_access */ - FALSE, - "blit src"); /* do_not_block */ + FALSE, /* do_not_block */ + "blit src"); /* printf("surface copy from %u to %u: %u,%u to %u,%u %u x %u\n", diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 5832ea2744..a4b9f2590a 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -585,7 +585,7 @@ llvmpipe_get_transfer(struct pipe_context *pipe, read_only, TRUE, /* cpu_access */ do_not_block, - "transfer dest")) { + __FUNCTION__)) { /* * It would have blocked, but state tracker requested no to. */ diff --git a/src/gallium/drivers/llvmpipe/sse_mathfun.h b/src/gallium/drivers/llvmpipe/sse_mathfun.h index 8ac2064b7b..0077f34b5c 100644 --- a/src/gallium/drivers/llvmpipe/sse_mathfun.h +++ b/src/gallium/drivers/llvmpipe/sse_mathfun.h @@ -94,55 +94,6 @@ v4sf sin_ps(v4sf x); v4sf cos_ps(v4sf x); void sincos_ps(v4sf x, v4sf *s, v4sf *c); -#if defined (__MINGW32__) - -/* the ugly part below: many versions of gcc used to be completely buggy with respect to some intrinsics - The movehl_ps is fixed in mingw 3.4.5, but I found out that all the _mm_cmp* intrinsics were completely - broken on my mingw gcc 3.4.5 ... - - Note that the bug on _mm_cmp* does occur only at -O0 optimization level -*/ - -inline __m128 my_movehl_ps(__m128 a, const __m128 b) { - asm ( - "movhlps %2,%0\n\t" - : "=x" (a) - : "0" (a), "x"(b) - ); - return a; } -#warning "redefined _mm_movehl_ps (see gcc bug 21179)" -#define _mm_movehl_ps my_movehl_ps - -inline __m128 my_cmplt_ps(__m128 a, const __m128 b) { - asm ( - "cmpltps %2,%0\n\t" - : "=x" (a) - : "0" (a), "x"(b) - ); - return a; - } -inline __m128 my_cmpgt_ps(__m128 a, const __m128 b) { - asm ( - "cmpnleps %2,%0\n\t" - : "=x" (a) - : "0" (a), "x"(b) - ); - return a; -} -inline __m128 my_cmpeq_ps(__m128 a, const __m128 b) { - asm ( - "cmpeqps %2,%0\n\t" - : "=x" (a) - : "0" (a), "x"(b) - ); - return a; -} -#warning "redefined _mm_cmpxx_ps functions..." -#define _mm_cmplt_ps my_cmplt_ps -#define _mm_cmpgt_ps my_cmpgt_ps -#define _mm_cmpeq_ps my_cmpeq_ps -#endif - #ifndef USE_SSE2 typedef union xmm_mm_union { __m128 xmm; |