summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/llvmpipe
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/llvmpipe')
-rw-r--r--src/gallium/drivers/llvmpipe/Makefile1
-rw-r--r--src/gallium/drivers/llvmpipe/SConscript1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_alpha.c11
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_alpha.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.c83
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_debug.h1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_perf.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_query.c52
-rw-r--r--src/gallium/drivers/llvmpipe/lp_query.h8
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.c351
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.h93
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_debug.c410
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_priv.h74
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri.c241
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h11
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene.c372
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene.h221
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c52
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c599
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.h3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_coef.c35
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_coef.h5
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c35
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_context.h24
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_line.c32
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_point.c28
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_tri.c248
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_vbuf.c6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c100
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.h15
-rw-r--r--src/gallium/drivers/llvmpipe/lp_surface.c8
-rw-r--r--src/gallium/drivers/llvmpipe/lp_texture.c2
-rw-r--r--src/gallium/drivers/llvmpipe/sse_mathfun.h49
35 files changed, 1959 insertions, 1225 deletions
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index dec874623e..55b877b4ab 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -22,6 +22,7 @@ C_SOURCES = \
lp_perf.c \
lp_query.c \
lp_rast.c \
+ lp_rast_debug.c \
lp_rast_tri.c \
lp_scene.c \
lp_scene_queue.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 8d57db72cf..650435f0f1 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -55,6 +55,7 @@ llvmpipe = env.ConvenienceLibrary(
'lp_perf.c',
'lp_query.c',
'lp_rast.c',
+ 'lp_rast_debug.c',
'lp_rast_tri.c',
'lp_scene.c',
'lp_scene_queue.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
index 8514030cde..e28efe778f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
@@ -44,21 +44,20 @@
void
lp_build_alpha_test(LLVMBuilderRef builder,
- const struct pipe_alpha_state *state,
+ unsigned func,
struct lp_type type,
struct lp_build_mask_context *mask,
LLVMValueRef alpha,
LLVMValueRef ref)
{
struct lp_build_context bld;
+ LLVMValueRef test;
lp_build_context_init(&bld, builder, type);
- if(state->enabled) {
- LLVMValueRef test = lp_build_cmp(&bld, state->func, alpha, ref);
+ test = lp_build_cmp(&bld, func, alpha, ref);
- lp_build_name(test, "alpha_mask");
+ lp_build_name(test, "alpha_mask");
- lp_build_mask_update(mask, test);
- }
+ lp_build_mask_update(mask, test);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
index 0f99fec65e..44603b418c 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
@@ -44,7 +44,7 @@ struct lp_build_mask_context;
void
lp_build_alpha_test(LLVMBuilderRef builder,
- const struct pipe_alpha_state *state,
+ unsigned func,
struct lp_type type,
struct lp_build_mask_context *mask,
LLVMValueRef alpha,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
index 09e9833057..b5924cbb7d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -197,7 +197,7 @@ lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
swizzled_rgb = rgb;
break;
case LP_BUILD_BLEND_SWIZZLE_AAAA:
- swizzled_rgb = lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle);
+ swizzled_rgb = lp_build_swizzle_scalar_aos(&bld->base, rgb, alpha_swizzle);
break;
default:
assert(0);
@@ -205,9 +205,8 @@ lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
}
if (rgb != alpha) {
- boolean cond[4] = {0, 0, 0, 0};
- cond[alpha_swizzle] = 1;
- swizzled_rgb = lp_build_select_aos(&bld->base, alpha, swizzled_rgb, cond);
+ swizzled_rgb = lp_build_select_aos(&bld->base, 1 << alpha_swizzle,
+ alpha, swizzled_rgb);
}
return swizzled_rgb;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index 2cf6f38c4b..2a374f8c39 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -75,6 +75,33 @@
*/
+/**
+ * Do one perspective divide per quad.
+ *
+ * For perspective interpolation, the final attribute value is given
+ *
+ * a' = a/w = a * oow
+ *
+ * where
+ *
+ * a = a0 + dadx*x + dady*y
+ * w = w0 + dwdx*x + dwdy*y
+ * oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
+ *
+ * Instead of computing the division per pixel, with this macro we compute the
+ * division on the upper left pixel of each quad, and use a linear
+ * approximation in the remaining pixels, given by:
+ *
+ * da'dx = (dadx - dwdx*a)*oow
+ * da'dy = (dady - dwdy*a)*oow
+ *
+ * Ironically, this actually makes things slower -- probably because the
+ * divide hardware unit is rarely used, whereas the multiply unit is typically
+ * already saturated.
+ */
+#define PERSPECTIVE_DIVIDE_PER_QUAD 0
+
+
static const unsigned char quad_offset_x[4] = {0, 1, 0, 1};
static const unsigned char quad_offset_y[4] = {0, 0, 1, 1};
@@ -107,7 +134,6 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
- LLVMValueRef oow = NULL;
unsigned attrib;
unsigned chan;
@@ -213,22 +239,22 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
a = LLVMBuildFAdd(builder, a, dadq2, "");
+#if PERSPECTIVE_DIVIDE_PER_QUAD
/*
- * a *= 1 / w
- * dadq *= 1 / w
+ * a *= 1 / w
*/
if (interp == LP_INTERP_PERSPECTIVE) {
LLVMValueRef w = bld->a[0][3];
assert(attrib != 0);
assert(bld->mask[0] & TGSI_WRITEMASK_W);
- if (!oow) {
- oow = lp_build_rcp(coeff_bld, w);
- lp_build_name(oow, "oow");
+ if (!bld->oow) {
+ bld->oow = lp_build_rcp(coeff_bld, w);
+ lp_build_name(bld->oow, "oow");
}
- a = lp_build_mul(coeff_bld, a, oow);
- dadq = lp_build_mul(coeff_bld, dadq, oow);
+ a = lp_build_mul(coeff_bld, a, bld->oow);
}
+#endif
attrib_name(a, attrib, chan, ".a");
attrib_name(dadq, attrib, chan, ".dadq");
@@ -250,6 +276,7 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
{
struct lp_build_context *coeff_bld = &bld->coeff_bld;
LLVMValueRef shuffle = lp_build_const_int_vec(coeff_bld->type, quad_index);
+ LLVMValueRef oow = NULL;
unsigned attrib;
unsigned chan;
@@ -270,6 +297,8 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
a = bld->attribs[0][chan];
}
else {
+ LLVMValueRef dadq;
+
a = bld->a[attrib][chan];
/*
@@ -280,10 +309,46 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
a, coeff_bld->undef, shuffle, "");
/*
+ * Get the derivatives.
+ */
+
+ dadq = bld->dadq[attrib][chan];
+
+#if PERSPECTIVE_DIVIDE_PER_QUAD
+ if (interp == LP_INTERP_PERSPECTIVE) {
+ LLVMValueRef dwdq = bld->dadq[0][3];
+
+ if (oow == NULL) {
+ assert(bld->oow);
+ oow = LLVMBuildShuffleVector(coeff_bld->builder,
+ bld->oow, coeff_bld->undef,
+ shuffle, "");
+ }
+
+ dadq = lp_build_sub(coeff_bld,
+ dadq,
+ lp_build_mul(coeff_bld, a, dwdq));
+ dadq = lp_build_mul(coeff_bld, dadq, oow);
+ }
+#endif
+
+ /*
* Add the derivatives
*/
- a = lp_build_add(coeff_bld, a, bld->dadq[attrib][chan]);
+ a = lp_build_add(coeff_bld, a, dadq);
+
+#if !PERSPECTIVE_DIVIDE_PER_QUAD
+ if (interp == LP_INTERP_PERSPECTIVE) {
+ if (oow == NULL) {
+ LLVMValueRef w = bld->attribs[0][3];
+ assert(attrib != 0);
+ assert(bld->mask[0] & TGSI_WRITEMASK_W);
+ oow = lp_build_rcp(coeff_bld, w);
+ }
+ a = lp_build_mul(coeff_bld, a, oow);
+ }
+#endif
attrib_name(a, attrib, chan, "");
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
index 2905513301..3054030f73 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
@@ -64,6 +64,8 @@ struct lp_build_interp_soa_context
LLVMValueRef a [1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
LLVMValueRef dadq[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+ LLVMValueRef oow;
+
LLVMValueRef attribs[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
/*
diff --git a/src/gallium/drivers/llvmpipe/lp_debug.h b/src/gallium/drivers/llvmpipe/lp_debug.h
index a928ee38be..add43e4fca 100644
--- a/src/gallium/drivers/llvmpipe/lp_debug.h
+++ b/src/gallium/drivers/llvmpipe/lp_debug.h
@@ -48,6 +48,7 @@ st_print_current(void);
#define DEBUG_COUNTERS 0x800
#define DEBUG_SCENE 0x1000
#define DEBUG_FENCE 0x2000
+#define DEBUG_MEM 0x4000
#ifdef DEBUG
diff --git a/src/gallium/drivers/llvmpipe/lp_perf.h b/src/gallium/drivers/llvmpipe/lp_perf.h
index c28652fc30..b23a100b87 100644
--- a/src/gallium/drivers/llvmpipe/lp_perf.h
+++ b/src/gallium/drivers/llvmpipe/lp_perf.h
@@ -74,7 +74,7 @@ extern struct lp_counters lp_count;
#define LP_COUNT_GET(counter) (lp_count.counter)
#else
#define LP_COUNT(counter)
-#define LP_COUNT_ADD(counter, incr) (void) incr
+#define LP_COUNT_ADD(counter, incr) (void)(incr)
#define LP_COUNT_GET(counter) 0
#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c
index 67fd797af2..ff0e207a54 100644
--- a/src/gallium/drivers/llvmpipe/lp_query.c
+++ b/src/gallium/drivers/llvmpipe/lp_query.c
@@ -54,9 +54,6 @@ llvmpipe_create_query(struct pipe_context *pipe,
assert(type == PIPE_QUERY_OCCLUSION_COUNTER);
pq = CALLOC_STRUCT( llvmpipe_query );
- if (pq) {
- pipe_mutex_init(pq->mutex);
- }
return (struct pipe_query *) pq;
}
@@ -66,12 +63,20 @@ static void
llvmpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
{
struct llvmpipe_query *pq = llvmpipe_query(q);
- /* query might still be in process if we never waited for the result */
- if (!pq->done) {
- llvmpipe_finish(pipe, __FUNCTION__);
+
+ /* Ideally we would refcount queries & not get destroyed until the
+ * last scene had finished with us.
+ */
+ if (pq->fence) {
+ if (!lp_fence_issued(pq->fence))
+ llvmpipe_flush(pipe, 0, NULL, __FUNCTION__);
+
+ if (!lp_fence_signalled(pq->fence))
+ lp_fence_wait(pq->fence);
+
+ lp_fence_reference(&pq->fence, NULL);
}
- pipe_mutex_destroy(pq->mutex);
FREE(pq);
}
@@ -84,22 +89,31 @@ llvmpipe_get_query_result(struct pipe_context *pipe,
{
struct llvmpipe_query *pq = llvmpipe_query(q);
uint64_t *result = (uint64_t *)vresult;
+ int i;
+
+ if (!pq->fence) {
+ assert(0); /* query not in issued state */
+ return FALSE;
+ }
- if (!pq->done) {
- if (wait) {
- llvmpipe_finish(pipe, __FUNCTION__);
- }
- /* this is a bit inconsequent but should be ok */
- else {
+ if (!lp_fence_signalled(pq->fence)) {
+ if (!lp_fence_issued(pq->fence))
llvmpipe_flush(pipe, 0, NULL, __FUNCTION__);
- }
+
+ if (!wait)
+ return FALSE;
+
+ lp_fence_wait(pq->fence);
}
- if (pq->done) {
- *result = pq->result;
+ /* Sum the results from each of the threads:
+ */
+ *result = 0;
+ for (i = 0; i < LP_MAX_THREADS; i++) {
+ *result += pq->count[i];
}
- return pq->done;
+ return TRUE;
}
@@ -113,10 +127,12 @@ llvmpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q)
* flush the scene now. Real apps shouldn't re-use a query in a
* frame of rendering.
*/
- if (pq->binned) {
+ if (pq->fence && !lp_fence_issued(pq->fence)) {
llvmpipe_finish(pipe, __FUNCTION__);
}
+
+ memset(pq->count, 0, sizeof(pq->count));
lp_setup_begin_query(llvmpipe->setup, pq);
llvmpipe->active_query_count++;
diff --git a/src/gallium/drivers/llvmpipe/lp_query.h b/src/gallium/drivers/llvmpipe/lp_query.h
index 721c41cb5c..e93842a2fd 100644
--- a/src/gallium/drivers/llvmpipe/lp_query.h
+++ b/src/gallium/drivers/llvmpipe/lp_query.h
@@ -43,13 +43,7 @@ struct llvmpipe_context;
struct llvmpipe_query {
uint64_t count[LP_MAX_THREADS]; /**< a counter for each thread */
- uint64_t result; /**< total of all counters */
-
- pipe_mutex mutex;
- unsigned num_tiles, tile_count;
-
- boolean done;
- boolean binned; /**< has this query been binned in the scene? */
+ struct lp_fence *fence; /* fence from last scene this was binned in */
};
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index b1c306bbe9..d7e6415e13 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -30,6 +30,7 @@
#include "util/u_math.h"
#include "util/u_rect.h"
#include "util/u_surface.h"
+#include "util/u_pack_color.h"
#include "lp_scene_queue.h"
#include "lp_debug.h"
@@ -57,39 +58,12 @@ static void
lp_rast_begin( struct lp_rasterizer *rast,
struct lp_scene *scene )
{
- const struct pipe_framebuffer_state *fb = &scene->fb;
- int i;
rast->curr_scene = scene;
LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
- rast->state.nr_cbufs = scene->fb.nr_cbufs;
-
- for (i = 0; i < rast->state.nr_cbufs; i++) {
- struct pipe_surface *cbuf = scene->fb.cbufs[i];
- llvmpipe_resource_map(cbuf->texture,
- cbuf->face,
- cbuf->level,
- cbuf->zslice,
- LP_TEX_USAGE_READ_WRITE,
- LP_TEX_LAYOUT_LINEAR);
- }
-
- if (fb->zsbuf) {
- struct pipe_surface *zsbuf = scene->fb.zsbuf;
- rast->zsbuf.stride = llvmpipe_resource_stride(zsbuf->texture, zsbuf->level);
- rast->zsbuf.blocksize =
- util_format_get_blocksize(zsbuf->texture->format);
-
- rast->zsbuf.map = llvmpipe_resource_map(zsbuf->texture,
- zsbuf->face,
- zsbuf->level,
- zsbuf->zslice,
- LP_TEX_USAGE_READ_WRITE,
- LP_TEX_LAYOUT_NONE);
- }
-
+ lp_scene_begin_rasterization( scene );
lp_scene_bin_iter_begin( scene );
}
@@ -97,29 +71,7 @@ lp_rast_begin( struct lp_rasterizer *rast,
static void
lp_rast_end( struct lp_rasterizer *rast )
{
- struct lp_scene *scene = rast->curr_scene;
- unsigned i;
-
- /* Unmap color buffers */
- for (i = 0; i < rast->state.nr_cbufs; i++) {
- struct pipe_surface *cbuf = scene->fb.cbufs[i];
- llvmpipe_resource_unmap(cbuf->texture,
- cbuf->face,
- cbuf->level,
- cbuf->zslice);
- }
-
- /* Unmap z/stencil buffer */
- if (rast->zsbuf.map) {
- struct pipe_surface *zsbuf = scene->fb.zsbuf;
- llvmpipe_resource_unmap(zsbuf->texture,
- zsbuf->face,
- zsbuf->level,
- zsbuf->zslice);
- rast->zsbuf.map = NULL;
- }
-
- lp_scene_reset( rast->curr_scene );
+ lp_scene_end_rasterization( rast->curr_scene );
rast->curr_scene = NULL;
@@ -138,26 +90,23 @@ lp_rast_end( struct lp_rasterizer *rast )
*/
static void
lp_rast_tile_begin(struct lp_rasterizer_task *task,
- unsigned x, unsigned y)
+ const struct cmd_bin *bin)
{
- struct lp_rasterizer *rast = task->rast;
- struct lp_scene *scene = rast->curr_scene;
+ const struct lp_scene *scene = task->scene;
enum lp_texture_usage usage;
- LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
-
- assert(x % TILE_SIZE == 0);
- assert(y % TILE_SIZE == 0);
+ LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, bin->x, bin->y);
- task->x = x;
- task->y = y;
+ task->bin = bin;
+ task->x = bin->x * TILE_SIZE;
+ task->y = bin->y * TILE_SIZE;
/* reset pointers to color tile(s) */
memset(task->color_tiles, 0, sizeof(task->color_tiles));
/* get pointer to depth/stencil tile */
{
- struct pipe_surface *zsbuf = rast->curr_scene->fb.zsbuf;
+ struct pipe_surface *zsbuf = task->scene->fb.zsbuf;
if (zsbuf) {
struct llvmpipe_resource *lpt = llvmpipe_resource(zsbuf->texture);
@@ -173,11 +122,14 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
zsbuf->face + zsbuf->zslice,
zsbuf->level,
usage,
- x, y);
+ task->x,
+ task->y);
/* Get actual pointer to the tile data. Note that depth/stencil
* data is tiled differently than color data.
*/
- task->depth_tile = lp_rast_get_depth_block_pointer(task, x, y);
+ task->depth_tile = lp_rast_get_depth_block_pointer(task,
+ task->x,
+ task->y);
assert(task->depth_tile);
}
@@ -192,11 +144,11 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
* Clear the rasterizer's current color tile.
* This is a bin command called during bin processing.
*/
-void
+static void
lp_rast_clear_color(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
- struct lp_rasterizer *rast = task->rast;
+ const struct lp_scene *scene = task->scene;
const uint8_t *clear_color = arg.clear_color;
unsigned i;
@@ -211,7 +163,7 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
clear_color[1] == clear_color[2] &&
clear_color[2] == clear_color[3]) {
/* clear to grayscale value {x, x, x, x} */
- for (i = 0; i < rast->state.nr_cbufs; i++) {
+ for (i = 0; i < scene->fb.nr_cbufs; i++) {
uint8_t *ptr =
lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
memset(ptr, clear_color[0], TILE_SIZE * TILE_SIZE * 4);
@@ -224,7 +176,7 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
* works.
*/
const unsigned chunk = TILE_SIZE / 4;
- for (i = 0; i < rast->state.nr_cbufs; i++) {
+ for (i = 0; i < scene->fb.nr_cbufs; i++) {
uint8_t *c =
lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
unsigned j;
@@ -246,22 +198,25 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
}
+
+
+
+
/**
* Clear the rasterizer's current z/stencil tile.
* This is a bin command called during bin processing.
*/
-void
+static void
lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
- struct lp_rasterizer *rast = task->rast;
- const struct lp_rast_clearzs *clearzs = arg.clear_zstencil;
- unsigned clear_value = clearzs->clearzs_value;
- unsigned clear_mask = clearzs->clearzs_mask;
+ const struct lp_scene *scene = task->scene;
+ unsigned clear_value = arg.clear_zstencil.value;
+ unsigned clear_mask = arg.clear_zstencil.mask;
const unsigned height = TILE_SIZE / TILE_VECTOR_HEIGHT;
const unsigned width = TILE_SIZE * TILE_VECTOR_HEIGHT;
- const unsigned block_size = rast->zsbuf.blocksize;
- const unsigned dst_stride = rast->zsbuf.stride * TILE_VECTOR_HEIGHT;
+ const unsigned block_size = scene->zsbuf.blocksize;
+ const unsigned dst_stride = scene->zsbuf.stride * TILE_VECTOR_HEIGHT;
uint8_t *dst;
unsigned i, j;
@@ -327,15 +282,13 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
* threading/parallelism.
* This is a bin command which is stored in all bins.
*/
-void
-lp_rast_store_linear_color( struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
+static void
+lp_rast_store_linear_color( struct lp_rasterizer_task *task )
{
- struct lp_rasterizer *rast = task->rast;
- struct lp_scene *scene = rast->curr_scene;
+ const struct lp_scene *scene = task->scene;
unsigned buf;
- for (buf = 0; buf < rast->state.nr_cbufs; buf++) {
+ for (buf = 0; buf < scene->fb.nr_cbufs; buf++) {
struct pipe_surface *cbuf = scene->fb.cbufs[buf];
const unsigned face_slice = cbuf->face + cbuf->zslice;
const unsigned level = cbuf->level;
@@ -359,17 +312,22 @@ lp_rast_store_linear_color( struct lp_rasterizer_task *task,
* completely contained inside a triangle.
* This is a bin command called during bin processing.
*/
-void
+static void
lp_rast_shade_tile(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
- struct lp_rasterizer *rast = task->rast;
+ const struct lp_scene *scene = task->scene;
const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
const struct lp_rast_state *state = inputs->state;
struct lp_fragment_shader_variant *variant = state->variant;
const unsigned tile_x = task->x, tile_y = task->y;
unsigned x, y;
+ if (inputs->disable) {
+ /* This command was partially binned and has been disabled */
+ return;
+ }
+
LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
/* render the whole 64x64 tile in 4x4 chunks */
@@ -380,7 +338,7 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
unsigned i;
/* color buffer */
- for (i = 0; i < rast->state.nr_cbufs; i++)
+ for (i = 0; i < scene->fb.nr_cbufs; i++)
color[i] = lp_rast_get_color_block_pointer(task, i,
tile_x + x, tile_y + y);
@@ -410,17 +368,17 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
* completely contained inside a triangle, and the shader is opaque.
* This is a bin command called during bin processing.
*/
-void
+static void
lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
- struct lp_rasterizer *rast = task->rast;
+ const struct lp_scene *scene = task->scene;
unsigned i;
LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
/* this will prevent converting the layout from tiled to linear */
- for (i = 0; i < rast->state.nr_cbufs; i++) {
+ for (i = 0; i < scene->fb.nr_cbufs; i++) {
(void)lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
}
@@ -442,7 +400,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
{
const struct lp_rast_state *state = inputs->state;
struct lp_fragment_shader_variant *variant = state->variant;
- struct lp_rasterizer *rast = task->rast;
+ const struct lp_scene *scene = task->scene;
uint8_t *color[PIPE_MAX_COLOR_BUFS];
void *depth;
unsigned i;
@@ -457,7 +415,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
assert((y % 4) == 0);
/* color buffer */
- for (i = 0; i < rast->state.nr_cbufs; i++) {
+ for (i = 0; i < scene->fb.nr_cbufs; i++) {
color[i] = lp_rast_get_color_block_pointer(task, i, x, y);
assert(lp_check_alignment(color[i], 16));
}
@@ -486,6 +444,38 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
/**
+ * Begin a new occlusion query.
+ * This is a bin command put in all bins.
+ * Called per thread.
+ */
+static void
+lp_rast_begin_query(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ struct llvmpipe_query *pq = arg.query_obj;
+
+ assert(task->query == NULL);
+ task->vis_counter = 0;
+ task->query = pq;
+}
+
+
+/**
+ * End the current occlusion query.
+ * This is a bin command put in all bins.
+ * Called per thread.
+ */
+static void
+lp_rast_end_query(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ task->query->count[task->thread_index] += task->vis_counter;
+ task->query = NULL;
+}
+
+
+
+/**
* Set top row and left column of the tile's pixels to white. For debugging.
*/
static void
@@ -546,10 +536,10 @@ lp_rast_tile_end(struct lp_rasterizer_task *task)
{
#ifdef DEBUG
if (LP_DEBUG & (DEBUG_SHOW_SUBTILES | DEBUG_SHOW_TILES)) {
- struct lp_rasterizer *rast = task->rast;
+ const struct lp_scene *scene = task->scene;
unsigned buf;
- for (buf = 0; buf < rast->state.nr_cbufs; buf++) {
+ for (buf = 0; buf < scene->fb.nr_cbufs; buf++) {
uint8_t *color = lp_rast_get_color_block_pointer(task, buf,
task->x, task->y);
@@ -563,83 +553,56 @@ lp_rast_tile_end(struct lp_rasterizer_task *task)
(void) outline_subtiles;
#endif
- {
+ lp_rast_store_linear_color(task);
+
+ if (task->query) {
union lp_rast_cmd_arg dummy = {0};
- lp_rast_store_linear_color(task, dummy);
+ lp_rast_end_query(task, dummy);
}
/* debug */
memset(task->color_tiles, 0, sizeof(task->color_tiles));
task->depth_tile = NULL;
-}
-
-
-
-/**
- * Signal on a fence. This is called during bin execution/rasterization.
- * Called per thread.
- */
-void
-lp_rast_fence(struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
-{
- struct lp_fence *fence = arg.fence;
- lp_fence_signal(fence);
-}
-
-/**
- * Begin a new occlusion query.
- * This is a bin command put in all bins.
- * Called per thread.
- */
-void
-lp_rast_begin_query(struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
-{
- /* Reset the per-task counter */
- task->vis_counter = 0;
+ task->bin = NULL;
}
-
-/**
- * End the current occlusion query.
- * This is a bin command put in all bins.
- * Called per thread.
- */
-void
-lp_rast_end_query(struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
+static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] =
{
- struct llvmpipe_query *pq = arg.query_obj;
-
- pipe_mutex_lock(pq->mutex);
- {
- /* Accumulate the visible fragment counter from this tile in
- * the query object.
- */
- pq->count[task->thread_index] += task->vis_counter;
+ lp_rast_clear_color,
+ lp_rast_clear_zstencil,
+ lp_rast_triangle_1,
+ lp_rast_triangle_2,
+ lp_rast_triangle_3,
+ lp_rast_triangle_4,
+ lp_rast_triangle_5,
+ lp_rast_triangle_6,
+ lp_rast_triangle_7,
+ lp_rast_triangle_8,
+ lp_rast_triangle_3_4,
+ lp_rast_triangle_3_16,
+ lp_rast_shade_tile,
+ lp_rast_shade_tile_opaque,
+ lp_rast_begin_query,
+ lp_rast_end_query,
+};
- /* check if this is the last tile in the scene */
- pq->tile_count++;
- if (pq->tile_count == pq->num_tiles) {
- uint i;
- /* sum the per-thread counters for the query */
- pq->result = 0;
- for (i = 0; i < LP_MAX_THREADS; i++) {
- pq->result += pq->count[i];
- }
+static void
+do_rasterize_bin(struct lp_rasterizer_task *task,
+ const struct cmd_bin *bin)
+{
+ const struct cmd_block *block;
+ unsigned k;
- /* reset counters (in case this query is re-used in the scene) */
- memset(pq->count, 0, sizeof(pq->count));
+ if (0)
+ lp_debug_bin(bin);
- pq->tile_count = 0;
- pq->binned = FALSE;
- pq->done = TRUE;
+ for (block = bin->head; block; block = block->next) {
+ for (k = 0; k < block->count; k++) {
+ dispatch[block->cmd[k]]( task, block->arg[k] );
}
}
- pipe_mutex_unlock(pq->mutex);
}
@@ -652,74 +615,26 @@ lp_rast_end_query(struct lp_rasterizer_task *task,
*/
static void
rasterize_bin(struct lp_rasterizer_task *task,
- const struct cmd_bin *bin,
- int x, int y)
+ const struct cmd_bin *bin )
{
- const struct cmd_block_list *commands = &bin->commands;
- struct cmd_block *block;
- unsigned k;
+ lp_rast_tile_begin( task, bin );
- lp_rast_tile_begin( task, x * TILE_SIZE, y * TILE_SIZE );
-
- /* simply execute each of the commands in the block list */
- for (block = commands->head; block; block = block->next) {
- for (k = 0; k < block->count; k++) {
- block->cmd[k]( task, block->arg[k] );
- }
- }
+ do_rasterize_bin(task, bin);
lp_rast_tile_end(task);
- /* Free data for this bin.
- */
- lp_scene_bin_reset( task->rast->curr_scene, x, y);
-}
-
-#define RAST(x) { lp_rast_##x, #x }
-
-static struct {
- lp_rast_cmd cmd;
- const char *name;
-} cmd_names[] =
-{
- RAST(clear_color),
- RAST(clear_zstencil),
- RAST(triangle_1),
- RAST(triangle_2),
- RAST(triangle_3),
- RAST(triangle_4),
- RAST(triangle_5),
- RAST(triangle_6),
- RAST(triangle_7),
- RAST(shade_tile),
- RAST(shade_tile_opaque),
- RAST(store_linear_color),
- RAST(fence),
- RAST(begin_query),
- RAST(end_query),
-};
-
-static void
-debug_bin( const struct cmd_bin *bin )
-{
- const struct cmd_block *head = bin->commands.head;
- int i, j;
-
- for (i = 0; i < head->count; i++) {
- debug_printf("%d: ", i);
- for (j = 0; j < Elements(cmd_names); j++) {
- if (head->cmd[i] == cmd_names[j].cmd) {
- debug_printf("%s\n", cmd_names[j].name);
- break;
- }
- }
- if (j == Elements(cmd_names))
- debug_printf("...other\n");
+ /* Debug/Perf flags:
+ */
+ if (bin->head->count == 1) {
+ if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE_OPAQUE)
+ LP_COUNT(nr_pure_shade_opaque_64);
+ else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE)
+ LP_COUNT(nr_pure_shade_64);
}
-
}
+
/* An empty bin is one that just loads the contents of the tile and
* stores them again unchanged. This typically happens when bins have
* been flushed for some reason in the middle of a frame, or when
@@ -730,12 +645,10 @@ debug_bin( const struct cmd_bin *bin )
static boolean
is_empty_bin( const struct cmd_bin *bin )
{
- if (0) debug_bin(bin);
- return bin->commands.head->count == 0;
+ return bin->head == NULL;
}
-
/**
* Rasterize/execute all bins within a scene.
* Called per thread.
@@ -744,6 +657,7 @@ static void
rasterize_scene(struct lp_rasterizer_task *task,
struct lp_scene *scene)
{
+ task->scene = scene;
/* loop over scene bins, rasterize each */
#if 0
{
@@ -758,19 +672,20 @@ rasterize_scene(struct lp_rasterizer_task *task,
#else
{
struct cmd_bin *bin;
- int x, y;
assert(scene);
- while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) {
+ while ((bin = lp_scene_bin_iter_next(scene))) {
if (!is_empty_bin( bin ))
- rasterize_bin(task, bin, x, y);
+ rasterize_bin(task, bin);
}
}
#endif
if (scene->fence) {
- lp_rast_fence(task, lp_rast_arg_fence(scene->fence));
+ lp_fence_signal(scene->fence);
}
+
+ task->scene = NULL;
}
@@ -790,8 +705,6 @@ lp_rast_queue_scene( struct lp_rasterizer *rast,
rasterize_scene( &rast->tasks[0], scene );
- lp_scene_reset( scene );
-
lp_rast_end( rast );
rast->curr_scene = NULL;
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index b4564ef33b..5767667935 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -79,6 +79,8 @@ struct lp_rast_state {
*/
struct lp_rast_shader_inputs {
float facing; /** Positive for front-facing, negative for back-facing */
+ boolean disable:1; /** Partially binned, disable this command */
+ boolean opaque:1; /** Is opaque */
float (*a0)[4];
float (*dadx)[4];
@@ -87,10 +89,6 @@ struct lp_rast_shader_inputs {
const struct lp_rast_state *state;
};
-struct lp_rast_clearzs {
- unsigned clearzs_value;
- unsigned clearzs_mask;
-};
struct lp_rast_plane {
/* one-pixel sized trivial accept offsets for each plane */
@@ -150,7 +148,10 @@ union lp_rast_cmd_arg {
} triangle;
const struct lp_rast_state *set_state;
uint8_t clear_color[4];
- const struct lp_rast_clearzs *clear_zstencil;
+ struct {
+ unsigned value;
+ unsigned mask;
+ } clear_zstencil;
struct lp_fence *fence;
struct llvmpipe_query *query_obj;
};
@@ -194,10 +195,20 @@ lp_rast_arg_fence( struct lp_fence *fence )
static INLINE union lp_rast_cmd_arg
-lp_rast_arg_clearzs( const struct lp_rast_clearzs *clearzs )
+lp_rast_arg_clearzs( unsigned value, unsigned mask )
{
union lp_rast_cmd_arg arg;
- arg.clear_zstencil = clearzs;
+ arg.clear_zstencil.value = value;
+ arg.clear_zstencil.mask = mask;
+ return arg;
+}
+
+
+static INLINE union lp_rast_cmd_arg
+lp_rast_arg_query( struct llvmpipe_query *pq )
+{
+ union lp_rast_cmd_arg arg;
+ arg.query_obj = pq;
return arg;
}
@@ -215,52 +226,32 @@ lp_rast_arg_null( void )
* These get put into bins by the setup code and are called when
* the bins are executed.
*/
+#define LP_RAST_OP_CLEAR_COLOR 0x0
+#define LP_RAST_OP_CLEAR_ZSTENCIL 0x1
+#define LP_RAST_OP_TRIANGLE_1 0x2
+#define LP_RAST_OP_TRIANGLE_2 0x3
+#define LP_RAST_OP_TRIANGLE_3 0x4
+#define LP_RAST_OP_TRIANGLE_4 0x5
+#define LP_RAST_OP_TRIANGLE_5 0x6
+#define LP_RAST_OP_TRIANGLE_6 0x7
+#define LP_RAST_OP_TRIANGLE_7 0x8
+#define LP_RAST_OP_TRIANGLE_8 0x9
+#define LP_RAST_OP_TRIANGLE_3_4 0xa
+#define LP_RAST_OP_TRIANGLE_3_16 0xb
+#define LP_RAST_OP_SHADE_TILE 0xc
+#define LP_RAST_OP_SHADE_TILE_OPAQUE 0xd
+#define LP_RAST_OP_BEGIN_QUERY 0xe
+#define LP_RAST_OP_END_QUERY 0xf
+
+#define LP_RAST_OP_MAX 0x10
+#define LP_RAST_OP_MASK 0xff
-void lp_rast_clear_color( struct lp_rasterizer_task *,
- const union lp_rast_cmd_arg );
-
-void lp_rast_clear_zstencil( struct lp_rasterizer_task *,
- const union lp_rast_cmd_arg );
-
-void lp_rast_triangle_1( struct lp_rasterizer_task *,
- const union lp_rast_cmd_arg );
-void lp_rast_triangle_2( struct lp_rasterizer_task *,
- const union lp_rast_cmd_arg );
-void lp_rast_triangle_3( struct lp_rasterizer_task *,
- const union lp_rast_cmd_arg );
-void lp_rast_triangle_4( struct lp_rasterizer_task *,
- const union lp_rast_cmd_arg );
-void lp_rast_triangle_5( struct lp_rasterizer_task *,
- const union lp_rast_cmd_arg );
-void lp_rast_triangle_6( struct lp_rasterizer_task *,
- const union lp_rast_cmd_arg );
-void lp_rast_triangle_7( struct lp_rasterizer_task *,
- const union lp_rast_cmd_arg );
-void lp_rast_triangle_8( struct lp_rasterizer_task *,
- const union lp_rast_cmd_arg );
-
-void lp_rast_shade_tile( struct lp_rasterizer_task *,
- const union lp_rast_cmd_arg );
-
-void lp_rast_shade_tile_opaque( struct lp_rasterizer_task *,
- const union lp_rast_cmd_arg );
-
-void lp_rast_fence( struct lp_rasterizer_task *,
- const union lp_rast_cmd_arg );
-
-void lp_rast_store_linear_color( struct lp_rasterizer_task *,
- const union lp_rast_cmd_arg );
-
-
-void lp_rast_begin_query(struct lp_rasterizer_task *,
- const union lp_rast_cmd_arg );
-
-void lp_rast_end_query(struct lp_rasterizer_task *,
- const union lp_rast_cmd_arg );
-
void
-lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg);
+lp_debug_bins( struct lp_scene *scene );
+void
+lp_debug_draw_bins_by_cmd_length( struct lp_scene *scene );
+void
+lp_debug_draw_bins_by_coverage( struct lp_scene *scene );
#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_debug.c b/src/gallium/drivers/llvmpipe/lp_rast_debug.c
new file mode 100644
index 0000000000..9fc78645a3
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_rast_debug.c
@@ -0,0 +1,410 @@
+#include "util/u_math.h"
+#include "lp_rast_priv.h"
+#include "lp_state_fs.h"
+
+static INLINE int u_bit_scan(unsigned *mask)
+{
+ int i = ffs(*mask) - 1;
+ *mask &= ~(1 << i);
+ return i;
+}
+
+struct tile {
+ int coverage;
+ int overdraw;
+ char data[TILE_SIZE][TILE_SIZE];
+};
+
+static char get_label( int i )
+{
+ static const char *cmd_labels = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ unsigned max_label = (2*26+10);
+
+ if (i < max_label)
+ return cmd_labels[i];
+ else
+ return '?';
+}
+
+
+
+static const char *cmd_names[LP_RAST_OP_MAX] =
+{
+ "clear_color",
+ "clear_zstencil",
+ "triangle_1",
+ "triangle_2",
+ "triangle_3",
+ "triangle_4",
+ "triangle_5",
+ "triangle_6",
+ "triangle_7",
+ "triangle_8",
+ "triangle_3_4",
+ "triangle_3_16",
+ "shade_tile",
+ "shade_tile_opaque",
+ "begin_query",
+ "end_query",
+};
+
+static const char *cmd_name(unsigned cmd)
+{
+ assert(Elements(cmd_names) > cmd);
+ return cmd_names[cmd];
+}
+
+static const struct lp_fragment_shader_variant *
+get_variant( const struct cmd_block *block,
+ int k )
+{
+ if (block->cmd[k] == LP_RAST_OP_SHADE_TILE ||
+ block->cmd[k] == LP_RAST_OP_SHADE_TILE_OPAQUE)
+ return block->arg[k].shade_tile->state->variant;
+
+ if (block->cmd[k] == LP_RAST_OP_TRIANGLE_1 ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_2 ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_3 ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_4 ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_5 ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_6 ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_7)
+ return block->arg[k].triangle.tri->inputs.state->variant;
+
+ return NULL;
+}
+
+
+static boolean
+is_blend( const struct cmd_block *block,
+ int k )
+{
+ const struct lp_fragment_shader_variant *variant = get_variant(block, k);
+
+ if (variant)
+ return variant->key.blend.rt[0].blend_enable;
+
+ return FALSE;
+}
+
+
+
+static void
+debug_bin( const struct cmd_bin *bin )
+{
+ const struct cmd_block *head = bin->head;
+ int i, j = 0;
+
+ debug_printf("bin %d,%d:\n", bin->x, bin->y);
+
+ while (head) {
+ for (i = 0; i < head->count; i++, j++) {
+ debug_printf("%d: %s %s\n", j,
+ cmd_name(head->cmd[i]),
+ is_blend(head, i) ? "blended" : "");
+ }
+ head = head->next;
+ }
+}
+
+
+static void plot(struct tile *tile,
+ int x, int y,
+ char val,
+ boolean blend)
+{
+ if (tile->data[x][y] == ' ')
+ tile->coverage++;
+ else
+ tile->overdraw++;
+
+ tile->data[x][y] = val;
+}
+
+
+
+
+
+
+static int
+debug_shade_tile(int x, int y,
+ const union lp_rast_cmd_arg arg,
+ struct tile *tile,
+ char val)
+{
+ const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
+ boolean blend = inputs->state->variant->key.blend.rt[0].blend_enable;
+ unsigned i,j;
+
+ if (inputs->disable)
+ return 0;
+
+ for (i = 0; i < TILE_SIZE; i++)
+ for (j = 0; j < TILE_SIZE; j++)
+ plot(tile, i, j, val, blend);
+
+ return TILE_SIZE * TILE_SIZE;
+}
+
+static int
+debug_clear_tile(int x, int y,
+ const union lp_rast_cmd_arg arg,
+ struct tile *tile,
+ char val)
+{
+ unsigned i,j;
+
+ for (i = 0; i < TILE_SIZE; i++)
+ for (j = 0; j < TILE_SIZE; j++)
+ plot(tile, i, j, val, FALSE);
+
+ return TILE_SIZE * TILE_SIZE;
+
+}
+
+
+static int
+debug_triangle(int tilex, int tiley,
+ const union lp_rast_cmd_arg arg,
+ struct tile *tile,
+ char val)
+{
+ const struct lp_rast_triangle *tri = arg.triangle.tri;
+ unsigned plane_mask = arg.triangle.plane_mask;
+ struct lp_rast_plane plane[8];
+ int x, y;
+ int count = 0;
+ unsigned i, nr_planes = 0;
+ boolean blend = tri->inputs.state->variant->key.blend.rt[0].blend_enable;
+
+ if (tri->inputs.disable) {
+ /* This triangle was partially binned and has been disabled */
+ return 0;
+ }
+
+ while (plane_mask) {
+ plane[nr_planes] = tri->plane[u_bit_scan(&plane_mask)];
+ plane[nr_planes].c = (plane[nr_planes].c +
+ plane[nr_planes].dcdy * tiley -
+ plane[nr_planes].dcdx * tilex);
+ nr_planes++;
+ }
+
+ for(y = 0; y < TILE_SIZE; y++)
+ {
+ for(x = 0; x < TILE_SIZE; x++)
+ {
+ for (i = 0; i < nr_planes; i++)
+ if (plane[i].c <= 0)
+ goto out;
+
+ plot(tile, x, y, val, blend);
+ count++;
+
+ out:
+ for (i = 0; i < nr_planes; i++)
+ plane[i].c -= plane[i].dcdx;
+ }
+
+ for (i = 0; i < nr_planes; i++) {
+ plane[i].c += plane[i].dcdx * TILE_SIZE;
+ plane[i].c += plane[i].dcdy;
+ }
+ }
+ return count;
+}
+
+
+
+
+
+static void
+do_debug_bin( struct tile *tile,
+ const struct cmd_bin *bin,
+ boolean print_cmds)
+{
+ unsigned k, j = 0;
+ const struct cmd_block *block;
+
+ int tx = bin->x * TILE_SIZE;
+ int ty = bin->y * TILE_SIZE;
+
+ memset(tile->data, ' ', sizeof tile->data);
+ tile->coverage = 0;
+ tile->overdraw = 0;
+
+ for (block = bin->head; block; block = block->next) {
+ for (k = 0; k < block->count; k++, j++) {
+ boolean blend = is_blend(block, k);
+ char val = get_label(j);
+ int count = 0;
+
+ if (print_cmds)
+ debug_printf("%c: %15s", val, cmd_name(block->cmd[k]));
+
+ if (block->cmd[k] == LP_RAST_OP_CLEAR_COLOR ||
+ block->cmd[k] == LP_RAST_OP_CLEAR_ZSTENCIL)
+ count = debug_clear_tile(tx, ty, block->arg[k], tile, val);
+
+ if (block->cmd[k] == LP_RAST_OP_SHADE_TILE ||
+ block->cmd[k] == LP_RAST_OP_SHADE_TILE_OPAQUE)
+ count = debug_shade_tile(tx, ty, block->arg[k], tile, val);
+
+ if (block->cmd[k] == LP_RAST_OP_TRIANGLE_1 ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_2 ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_3 ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_4 ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_5 ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_6 ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_7)
+ count = debug_triangle(tx, ty, block->arg[k], tile, val);
+
+ if (print_cmds) {
+ debug_printf(" % 5d", count);
+
+ if (blend)
+ debug_printf(" blended");
+
+ debug_printf("\n");
+ }
+ }
+ }
+}
+
+void
+lp_debug_bin( const struct cmd_bin *bin)
+{
+ struct tile tile;
+ int x,y;
+
+ if (bin->head) {
+ do_debug_bin(&tile, bin, TRUE);
+
+ debug_printf("------------------------------------------------------------------\n");
+ for (y = 0; y < TILE_SIZE; y++) {
+ for (x = 0; x < TILE_SIZE; x++) {
+ debug_printf("%c", tile.data[y][x]);
+ }
+ debug_printf("|\n");
+ }
+ debug_printf("------------------------------------------------------------------\n");
+
+ debug_printf("each pixel drawn avg %f times\n",
+ ((float)tile.overdraw + tile.coverage)/(float)tile.coverage);
+ }
+}
+
+
+
+
+
+
+/** Return number of bytes used for a single bin */
+static unsigned
+lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y )
+{
+ struct cmd_bin *bin = lp_scene_get_bin((struct lp_scene *) scene, x, y);
+ const struct cmd_block *cmd;
+ unsigned size = 0;
+ for (cmd = bin->head; cmd; cmd = cmd->next) {
+ size += (cmd->count *
+ (sizeof(uint8_t) + sizeof(union lp_rast_cmd_arg)));
+ }
+ return size;
+}
+
+
+
+void
+lp_debug_draw_bins_by_coverage( struct lp_scene *scene )
+{
+ unsigned x, y;
+ unsigned total = 0;
+ unsigned possible = 0;
+ static unsigned long long _total;
+ static unsigned long long _possible;
+
+ for (x = 0; x < scene->tiles_x; x++)
+ debug_printf("-");
+ debug_printf("\n");
+
+ for (y = 0; y < scene->tiles_y; y++) {
+ for (x = 0; x < scene->tiles_x; x++) {
+ struct cmd_bin *bin = lp_scene_get_bin(scene, x, y);
+ const char *bits = "0123456789";
+ struct tile tile;
+
+ if (bin->head) {
+ //lp_debug_bin(bin);
+
+ do_debug_bin(&tile, bin, FALSE);
+
+ total += tile.coverage;
+ possible += 64*64;
+
+ if (tile.coverage == 64*64)
+ debug_printf("*");
+ else if (tile.coverage) {
+ int bit = tile.coverage/(64.0*64.0)*10;
+ debug_printf("%c", bits[MIN2(bit,10)]);
+ }
+ else
+ debug_printf("?");
+ }
+ else {
+ debug_printf(" ");
+ }
+ }
+ debug_printf("|\n");
+ }
+
+ for (x = 0; x < scene->tiles_x; x++)
+ debug_printf("-");
+ debug_printf("\n");
+
+ debug_printf("this tile total: %u possible %u: percentage: %f\n",
+ total,
+ possible,
+ total * 100.0 / (float)possible);
+
+ _total += total;
+ _possible += possible;
+
+ debug_printf("overall total: %llu possible %llu: percentage: %f\n",
+ _total,
+ _possible,
+ _total * 100.0 / (double)_possible);
+}
+
+
+void
+lp_debug_draw_bins_by_cmd_length( struct lp_scene *scene )
+{
+ unsigned x, y;
+
+ for (y = 0; y < scene->tiles_y; y++) {
+ for (x = 0; x < scene->tiles_x; x++) {
+ const char *bits = " ...,-~:;=o+xaw*#XAWWWWWWWWWWWWWWWW";
+ int sz = lp_scene_bin_size(scene, x, y);
+ int sz2 = util_unsigned_logbase2(sz);
+ debug_printf("%c", bits[MIN2(sz2,32)]);
+ }
+ debug_printf("\n");
+ }
+}
+
+
+void
+lp_debug_bins( struct lp_scene *scene )
+{
+ unsigned x, y;
+
+ for (y = 0; y < scene->tiles_y; y++) {
+ for (x = 0; x < scene->tiles_x; x++) {
+ struct cmd_bin *bin = lp_scene_get_bin(scene, x, y);
+ if (bin->head) {
+ debug_bin(bin);
+ }
+ }
+ }
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index fae7f6d3dc..7370119e96 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -69,13 +69,16 @@ extern const struct lp_rast_state *jit_state;
struct lp_rasterizer;
-
+struct cmd_bin;
/**
* Per-thread rasterization state
*/
struct lp_rasterizer_task
{
+ const struct cmd_bin *bin;
+
+ struct lp_scene *scene;
unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */
uint8_t *color_tiles[PIPE_MAX_COLOR_BUFS];
@@ -89,6 +92,7 @@ struct lp_rasterizer_task
/* occlude counter for visiable pixels */
uint32_t vis_counter;
+ struct llvmpipe_query *query;
pipe_semaphore work_ready;
pipe_semaphore work_done;
@@ -104,33 +108,9 @@ struct lp_rasterizer
{
boolean exit_flag;
- /* Framebuffer stuff
- */
- struct {
- uint8_t *map;
- unsigned stride;
- unsigned blocksize;
- } zsbuf;
-
- struct {
- unsigned nr_cbufs;
- unsigned clear_color;
- unsigned clear_depth;
- char clear_stencil;
- } state;
-
/** The incoming queue of scenes ready to rasterize */
struct lp_scene_queue *full_scenes;
- /**
- * The outgoing queue of processed scenes to return to setup module
- *
- * XXX: while scenes are per-context but the rasterizer is
- * (potentially) shared, these empty scenes should be returned to
- * the context which created them rather than retained here.
- */
- /* struct lp_scene_queue *empty_scenes; */
-
/** The scene currently being rasterized by the threads */
struct lp_scene *curr_scene;
@@ -164,13 +144,13 @@ static INLINE void *
lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
unsigned x, unsigned y)
{
- const struct lp_rasterizer *rast = task->rast;
+ const struct lp_scene *scene = task->scene;
void *depth;
assert((x % TILE_VECTOR_WIDTH) == 0);
assert((y % TILE_VECTOR_HEIGHT) == 0);
- if (!rast->zsbuf.map) {
+ if (!scene->zsbuf.map) {
/* Either out of memory or no zsbuf. Can't tell without access
* to the state. Just use dummy tile memory, but don't print
* the oom warning as this most likely because there is no
@@ -179,9 +159,9 @@ lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
return lp_dummy_tile;
}
- depth = (rast->zsbuf.map +
- rast->zsbuf.stride * y +
- rast->zsbuf.blocksize * x * TILE_VECTOR_HEIGHT);
+ depth = (scene->zsbuf.map +
+ scene->zsbuf.stride * y +
+ scene->zsbuf.blocksize * x * TILE_VECTOR_HEIGHT);
assert(lp_check_alignment(depth, 16));
return depth;
@@ -195,14 +175,14 @@ static INLINE uint8_t *
lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task,
unsigned buf, enum lp_texture_usage usage)
{
- struct lp_rasterizer *rast = task->rast;
+ const struct lp_scene *scene = task->scene;
assert(task->x % TILE_SIZE == 0);
assert(task->y % TILE_SIZE == 0);
- assert(buf < rast->state.nr_cbufs);
+ assert(buf < scene->fb.nr_cbufs);
if (!task->color_tiles[buf]) {
- struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf];
+ struct pipe_surface *cbuf = scene->fb.cbufs[buf];
struct llvmpipe_resource *lpt;
assert(cbuf);
lpt = llvmpipe_resource(cbuf->texture);
@@ -263,7 +243,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
const struct lp_rast_shader_inputs *inputs,
unsigned x, unsigned y )
{
- const struct lp_rasterizer *rast = task->rast;
+ const struct lp_scene *scene = task->scene;
const struct lp_rast_state *state = inputs->state;
struct lp_fragment_shader_variant *variant = state->variant;
uint8_t *color[PIPE_MAX_COLOR_BUFS];
@@ -271,7 +251,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
unsigned i;
/* color buffer */
- for (i = 0; i < rast->state.nr_cbufs; i++)
+ for (i = 0; i < scene->fb.nr_cbufs; i++)
color[i] = lp_rast_get_color_block_pointer(task, i, x, y);
depth = lp_rast_get_depth_block_pointer(task, x, y);
@@ -291,5 +271,29 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
END_JIT_CALL();
}
+void lp_rast_triangle_1( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_2( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_3( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_4( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_5( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_6( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_7( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_8( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
+void lp_rast_triangle_3_4(struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
+void lp_rast_triangle_3_16( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void
+lp_debug_bin( const struct cmd_bin *bin );
#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index dbaa8e023a..a1f309d4b0 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -68,36 +68,6 @@ block_full_16(struct lp_rasterizer_task *task,
}
#if !defined(PIPE_ARCH_SSE)
-static INLINE unsigned
-build_mask(int c, int dcdx, int dcdy)
-{
- int mask = 0;
-
- int c0 = c;
- int c1 = c0 + dcdx;
- int c2 = c1 + dcdx;
- int c3 = c2 + dcdx;
-
- mask |= ((c0 + 0 * dcdy) >> 31) & (1 << 0);
- mask |= ((c0 + 1 * dcdy) >> 31) & (1 << 2);
- mask |= ((c0 + 2 * dcdy) >> 31) & (1 << 8);
- mask |= ((c0 + 3 * dcdy) >> 31) & (1 << 10);
- mask |= ((c1 + 0 * dcdy) >> 31) & (1 << 1);
- mask |= ((c1 + 1 * dcdy) >> 31) & (1 << 3);
- mask |= ((c1 + 2 * dcdy) >> 31) & (1 << 9);
- mask |= ((c1 + 3 * dcdy) >> 31) & (1 << 11);
- mask |= ((c2 + 0 * dcdy) >> 31) & (1 << 4);
- mask |= ((c2 + 1 * dcdy) >> 31) & (1 << 6);
- mask |= ((c2 + 2 * dcdy) >> 31) & (1 << 12);
- mask |= ((c2 + 3 * dcdy) >> 31) & (1 << 14);
- mask |= ((c3 + 0 * dcdy) >> 31) & (1 << 5);
- mask |= ((c3 + 1 * dcdy) >> 31) & (1 << 7);
- mask |= ((c3 + 2 * dcdy) >> 31) & (1 << 13);
- mask |= ((c3 + 3 * dcdy) >> 31) & (1 << 15);
-
- return mask;
-}
-
static INLINE unsigned
build_mask_linear(int c, int dcdx, int dcdy)
@@ -142,6 +112,23 @@ build_masks(int c,
*partmask |= build_mask_linear(c + cdiff, dcdx, dcdy);
}
+void
+lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ union lp_rast_cmd_arg arg2;
+ arg2.triangle.tri = arg.triangle.tri;
+ arg2.triangle.plane_mask = (1<<3)-1;
+ lp_rast_triangle_3(task, arg2);
+}
+
+void
+lp_rast_triangle_3_4(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ lp_rast_triangle_3_16(task, arg);
+}
+
#else
#include <emmintrin.h>
#include "util/u_sse.h"
@@ -220,79 +207,28 @@ build_mask_linear(int c, int dcdx, int dcdy)
}
static INLINE unsigned
-build_mask(int c, int dcdx, int dcdy)
+sign_bits4(const __m128i *cstep, int cdiff)
{
- __m128i step = _mm_setr_epi32(0, dcdx, dcdy, dcdx + dcdy);
- __m128i c0 = _mm_set1_epi32(c);
-
- /* Get values across the quad
- */
- __m128i cstep0 = _mm_add_epi32(c0, step);
-
- /* Scale up step for moving between quads.
- */
- __m128i step4 = _mm_add_epi32(step, step);
- /* Get values for the remaining quads:
+ /* Adjust the step values
*/
- __m128i cstep1 = _mm_add_epi32(cstep0,
- _mm_shuffle_epi32(step4, _MM_SHUFFLE(1,1,1,1)));
- __m128i cstep2 = _mm_add_epi32(cstep0,
- _mm_shuffle_epi32(step4, _MM_SHUFFLE(2,2,2,2)));
- __m128i cstep3 = _mm_add_epi32(cstep2,
- _mm_shuffle_epi32(step4, _MM_SHUFFLE(1,1,1,1)));
+ __m128i cio4 = _mm_set1_epi32(cdiff);
+ __m128i cstep0 = _mm_add_epi32(cstep[0], cio4);
+ __m128i cstep1 = _mm_add_epi32(cstep[1], cio4);
+ __m128i cstep2 = _mm_add_epi32(cstep[2], cio4);
+ __m128i cstep3 = _mm_add_epi32(cstep[3], cio4);
- /* pack pairs of results into epi16
+ /* Pack down to epi8
*/
__m128i cstep01 = _mm_packs_epi32(cstep0, cstep1);
__m128i cstep23 = _mm_packs_epi32(cstep2, cstep3);
-
- /* pack into epi8, preserving sign bits
- */
__m128i result = _mm_packs_epi16(cstep01, cstep23);
- /* extract sign bits to create mask
+ /* Extract the sign bits
*/
return _mm_movemask_epi8(result);
}
-#endif
-
-
-
-
-#define TAG(x) x##_1
-#define NR_PLANES 1
-#include "lp_rast_tri_tmp.h"
-
-#define TAG(x) x##_2
-#define NR_PLANES 2
-#include "lp_rast_tri_tmp.h"
-
-#define TAG(x) x##_3
-#define NR_PLANES 3
-#include "lp_rast_tri_tmp.h"
-
-#define TAG(x) x##_4
-#define NR_PLANES 4
-#include "lp_rast_tri_tmp.h"
-
-#define TAG(x) x##_5
-#define NR_PLANES 5
-#include "lp_rast_tri_tmp.h"
-
-#define TAG(x) x##_6
-#define NR_PLANES 6
-#include "lp_rast_tri_tmp.h"
-
-#define TAG(x) x##_7
-#define NR_PLANES 7
-#include "lp_rast_tri_tmp.h"
-
-#define TAG(x) x##_8
-#define NR_PLANES 8
-#include "lp_rast_tri_tmp.h"
-
/* Special case for 3 plane triangle which is contained entirely
* within a 16x16 block.
@@ -304,29 +240,32 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
const struct lp_rast_triangle *tri = arg.triangle.tri;
const struct lp_rast_plane *plane = tri->plane;
unsigned mask = arg.triangle.plane_mask;
- const int x = task->x + (mask & 0xf) * 16;
- const int y = task->y + (mask >> 4) * 16;
+ const int x = task->x + (mask & 0xff);
+ const int y = task->y + (mask >> 8);
unsigned outmask, inmask, partmask, partial_mask;
unsigned j;
- int c[3];
+ __m128i cstep4[3][4];
outmask = 0; /* outside one or more trivial reject planes */
partmask = 0; /* outside one or more trivial accept planes */
for (j = 0; j < 3; j++) {
- c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
+ const int dcdx = -plane[j].dcdx * 4;
+ const int dcdy = plane[j].dcdy * 4;
+ __m128i xdcdy = _mm_set1_epi32(dcdy);
+
+ cstep4[j][0] = _mm_setr_epi32(0, dcdx, dcdx*2, dcdx*3);
+ cstep4[j][1] = _mm_add_epi32(cstep4[j][0], xdcdy);
+ cstep4[j][2] = _mm_add_epi32(cstep4[j][1], xdcdy);
+ cstep4[j][3] = _mm_add_epi32(cstep4[j][2], xdcdy);
{
- const int dcdx = -plane[j].dcdx * 4;
- const int dcdy = plane[j].dcdy * 4;
+ const int c = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
const int cox = plane[j].eo * 4;
const int cio = plane[j].ei * 4 - 1;
- build_masks(c[j] + cox,
- cio - cox,
- dcdx, dcdy,
- &outmask, /* sign bits from c[i][0..15] + cox */
- &partmask); /* sign bits from c[i][0..15] + cio */
+ outmask |= sign_bits4(cstep4[j], c + cox);
+ partmask |= sign_bits4(cstep4[j], c + cio);
}
}
@@ -352,16 +291,20 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
int iy = (i >> 2) * 4;
int px = x + ix;
int py = y + iy;
- int cx[3];
+ unsigned mask = 0xffff;
partial_mask &= ~(1 << i);
- for (j = 0; j < 3; j++)
- cx[j] = (c[j]
- - plane[j].dcdx * ix
- + plane[j].dcdy * iy);
+ for (j = 0; j < 3; j++) {
+ const int cx = (plane[j].c
+ - plane[j].dcdx * px
+ + plane[j].dcdy * py) * 4;
+
+ mask &= ~sign_bits4(cstep4[j], cx);
+ }
- do_block_4_3(task, tri, plane, px, py, cx);
+ if (mask)
+ lp_rast_shade_quads_mask(task, &tri->inputs, px, py, mask);
}
/* Iterate over fulls:
@@ -378,3 +321,87 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
block_full_4(task, tri, px, py);
}
}
+
+
+void
+lp_rast_triangle_3_4(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ const struct lp_rast_triangle *tri = arg.triangle.tri;
+ const struct lp_rast_plane *plane = tri->plane;
+ unsigned mask = arg.triangle.plane_mask;
+ const int x = task->x + (mask & 0xff);
+ const int y = task->y + (mask >> 8);
+ unsigned j;
+
+ /* Iterate over partials:
+ */
+ {
+ unsigned mask = 0xffff;
+
+ for (j = 0; j < 3; j++) {
+ const int cx = (plane[j].c
+ - plane[j].dcdx * x
+ + plane[j].dcdy * y);
+
+ const int dcdx = -plane[j].dcdx;
+ const int dcdy = plane[j].dcdy;
+ __m128i xdcdy = _mm_set1_epi32(dcdy);
+
+ __m128i cstep0 = _mm_setr_epi32(cx, cx + dcdx, cx + dcdx*2, cx + dcdx*3);
+ __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy);
+ __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy);
+ __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy);
+
+ __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1);
+ __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3);
+ __m128i result = _mm_packs_epi16(cstep01, cstep23);
+
+ /* Extract the sign bits
+ */
+ mask &= ~_mm_movemask_epi8(result);
+ }
+
+ if (mask)
+ lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask);
+ }
+}
+
+
+#endif
+
+
+
+
+#define TAG(x) x##_1
+#define NR_PLANES 1
+#include "lp_rast_tri_tmp.h"
+
+#define TAG(x) x##_2
+#define NR_PLANES 2
+#include "lp_rast_tri_tmp.h"
+
+#define TAG(x) x##_3
+#define NR_PLANES 3
+#include "lp_rast_tri_tmp.h"
+
+#define TAG(x) x##_4
+#define NR_PLANES 4
+#include "lp_rast_tri_tmp.h"
+
+#define TAG(x) x##_5
+#define NR_PLANES 5
+#include "lp_rast_tri_tmp.h"
+
+#define TAG(x) x##_6
+#define NR_PLANES 6
+#include "lp_rast_tri_tmp.h"
+
+#define TAG(x) x##_7
+#define NR_PLANES 7
+#include "lp_rast_tri_tmp.h"
+
+#define TAG(x) x##_8
+#define NR_PLANES 8
+#include "lp_rast_tri_tmp.h"
+
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
index 99a0bae45d..9830a43ba5 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
@@ -50,9 +50,9 @@ TAG(do_block_4)(struct lp_rasterizer_task *task,
int j;
for (j = 0; j < NR_PLANES; j++) {
- mask &= ~build_mask(c[j] - 1,
- -plane[j].dcdx,
- plane[j].dcdy);
+ mask &= ~build_mask_linear(c[j] - 1,
+ -plane[j].dcdx,
+ plane[j].dcdy);
}
/* Now pass to the shader:
@@ -162,6 +162,11 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
unsigned outmask, inmask, partmask, partial_mask;
unsigned j = 0;
+ if (tri->inputs.disable) {
+ /* This triangle was partially binned and has been disabled */
+ return;
+ }
+
outmask = 0; /* outside one or more trivial reject planes */
partmask = 0; /* outside one or more trivial accept planes */
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c
index 15a09b7100..c0732e4ab7 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.c
+++ b/src/gallium/drivers/llvmpipe/lp_scene.c
@@ -30,17 +30,20 @@
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/u_simple_list.h"
+#include "util/u_format.h"
#include "lp_scene.h"
-#include "lp_scene_queue.h"
#include "lp_fence.h"
+#include "lp_debug.h"
-/** List of texture references */
-struct texture_ref {
- struct pipe_resource *texture;
- struct texture_ref *prev, *next; /**< linked list w/ u_simple_list.h */
-};
+#define RESOURCE_REF_SZ 32
+/** List of resource references */
+struct resource_ref {
+ struct pipe_resource *resource[RESOURCE_REF_SZ];
+ int count;
+ struct resource_ref *next;
+};
/**
@@ -48,28 +51,16 @@ struct texture_ref {
* \param queue the queue to put newly rendered/emptied scenes into
*/
struct lp_scene *
-lp_scene_create( struct pipe_context *pipe,
- struct lp_scene_queue *queue )
+lp_scene_create( struct pipe_context *pipe )
{
- unsigned i, j;
struct lp_scene *scene = CALLOC_STRUCT(lp_scene);
if (!scene)
return NULL;
scene->pipe = pipe;
- scene->empty_queue = queue;
-
- for (i = 0; i < TILES_X; i++) {
- for (j = 0; j < TILES_Y; j++) {
- struct cmd_bin *bin = lp_scene_get_bin(scene, i, j);
- bin->commands.head = bin->commands.tail = CALLOC_STRUCT(cmd_block);
- }
- }
scene->data.head =
- scene->data.tail = CALLOC_STRUCT(data_block);
-
- make_empty_list(&scene->resources);
+ CALLOC_STRUCT(data_block);
pipe_mutex_init(scene->mutex);
@@ -83,24 +74,9 @@ lp_scene_create( struct pipe_context *pipe,
void
lp_scene_destroy(struct lp_scene *scene)
{
- unsigned i, j;
-
- lp_scene_reset(scene);
-
- for (i = 0; i < TILES_X; i++)
- for (j = 0; j < TILES_Y; j++) {
- struct cmd_bin *bin = lp_scene_get_bin(scene, i, j);
- assert(bin->commands.head == bin->commands.tail);
- FREE(bin->commands.head);
- bin->commands.head = NULL;
- bin->commands.tail = NULL;
- }
-
- FREE(scene->data.head);
- scene->data.head = NULL;
-
pipe_mutex_destroy(scene->mutex);
-
+ assert(scene->data.head->next == NULL);
+ FREE(scene->data.head);
FREE(scene);
}
@@ -117,8 +93,7 @@ lp_scene_is_empty(struct lp_scene *scene )
for (y = 0; y < TILES_Y; y++) {
for (x = 0; x < TILES_X; x++) {
const struct cmd_bin *bin = lp_scene_get_bin(scene, x, y);
- const struct cmd_block_list *list = &bin->commands;
- if (list->head != list->tail || list->head->count > 0) {
+ if (bin->head) {
return FALSE;
}
}
@@ -127,45 +102,108 @@ lp_scene_is_empty(struct lp_scene *scene )
}
-/* Free data for one particular bin. May be called from the
- * rasterizer thread(s).
+/* Returns true if there has ever been a failed allocation attempt in
+ * this scene. Used in triangle emit to avoid having to check success
+ * at each bin.
+ */
+boolean
+lp_scene_is_oom(struct lp_scene *scene)
+{
+ return scene->alloc_failed;
+}
+
+
+/* Remove all commands from a bin. Tries to reuse some of the memory
+ * allocated to the bin, however.
*/
void
lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y)
{
struct cmd_bin *bin = lp_scene_get_bin(scene, x, y);
- struct cmd_block_list *list = &bin->commands;
- struct cmd_block *block;
- struct cmd_block *tmp;
- assert(x < TILES_X);
- assert(y < TILES_Y);
+ bin->head = bin->tail;
+ if (bin->tail) {
+ bin->tail->next = NULL;
+ bin->tail->count = 0;
+ }
+}
+
- for (block = list->head; block != list->tail; block = tmp) {
- tmp = block->next;
- FREE(block);
+void
+lp_scene_begin_rasterization(struct lp_scene *scene)
+{
+ const struct pipe_framebuffer_state *fb = &scene->fb;
+ int i;
+
+ //LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
+
+ for (i = 0; i < scene->fb.nr_cbufs; i++) {
+ struct pipe_surface *cbuf = scene->fb.cbufs[i];
+ scene->cbufs[i].stride = llvmpipe_resource_stride(cbuf->texture,
+ cbuf->level);
+
+ scene->cbufs[i].map = llvmpipe_resource_map(cbuf->texture,
+ cbuf->face,
+ cbuf->level,
+ cbuf->zslice,
+ LP_TEX_USAGE_READ_WRITE,
+ LP_TEX_LAYOUT_LINEAR);
}
- assert(list->tail->next == NULL);
- list->head = list->tail;
- list->head->count = 0;
+ if (fb->zsbuf) {
+ struct pipe_surface *zsbuf = scene->fb.zsbuf;
+ scene->zsbuf.stride = llvmpipe_resource_stride(zsbuf->texture, zsbuf->level);
+ scene->zsbuf.blocksize =
+ util_format_get_blocksize(zsbuf->texture->format);
+
+ scene->zsbuf.map = llvmpipe_resource_map(zsbuf->texture,
+ zsbuf->face,
+ zsbuf->level,
+ zsbuf->zslice,
+ LP_TEX_USAGE_READ_WRITE,
+ LP_TEX_LAYOUT_NONE);
+ }
}
+
+
/**
- * Free all the temporary data in a scene. May be called from the
- * rasterizer thread(s).
+ * Free all the temporary data in a scene.
*/
void
-lp_scene_reset(struct lp_scene *scene )
+lp_scene_end_rasterization(struct lp_scene *scene )
{
- unsigned i, j;
+ int i, j;
+
+ /* Unmap color buffers */
+ for (i = 0; i < scene->fb.nr_cbufs; i++) {
+ if (scene->cbufs[i].map) {
+ struct pipe_surface *cbuf = scene->fb.cbufs[i];
+ llvmpipe_resource_unmap(cbuf->texture,
+ cbuf->face,
+ cbuf->level,
+ cbuf->zslice);
+ scene->cbufs[i].map = NULL;
+ }
+ }
- /* Free all but last binner command lists:
+ /* Unmap z/stencil buffer */
+ if (scene->zsbuf.map) {
+ struct pipe_surface *zsbuf = scene->fb.zsbuf;
+ llvmpipe_resource_unmap(zsbuf->texture,
+ zsbuf->face,
+ zsbuf->level,
+ zsbuf->zslice);
+ scene->zsbuf.map = NULL;
+ }
+
+ /* Reset all command lists:
*/
for (i = 0; i < scene->tiles_x; i++) {
for (j = 0; j < scene->tiles_y; j++) {
- lp_scene_bin_reset(scene, i, j);
+ struct cmd_bin *bin = lp_scene_get_bin(scene, i, j);
+ bin->head = bin->tail = NULL;
}
}
@@ -174,40 +212,56 @@ lp_scene_reset(struct lp_scene *scene )
*/
assert(lp_scene_is_empty(scene));
- /* Free all but last binned data block:
+ /* Decrement texture ref counts
*/
{
- struct data_block_list *list = &scene->data;
- struct data_block *block, *tmp;
-
- for (block = list->head; block != list->tail; block = tmp) {
- tmp = block->next;
- FREE(block);
+ struct resource_ref *ref;
+ int i, j = 0;
+
+ for (ref = scene->resources; ref; ref = ref->next) {
+ for (i = 0; i < ref->count; i++) {
+ if (LP_DEBUG & DEBUG_SETUP)
+ debug_printf("resource %d: %p %dx%d sz %d\n",
+ j,
+ ref->resource[i],
+ ref->resource[i]->width0,
+ ref->resource[i]->height0,
+ llvmpipe_resource_size(ref->resource[i]));
+ j++;
+ pipe_resource_reference(&ref->resource[i], NULL);
+ }
}
-
- assert(list->tail->next == NULL);
- list->head = list->tail;
- list->head->used = 0;
+
+ if (LP_DEBUG & DEBUG_SETUP)
+ debug_printf("scene %d resources, sz %d\n",
+ j, scene->resource_reference_size);
}
- /* Release texture refs
+ /* Free all scene data blocks:
*/
{
- struct resource_ref *ref, *next, *ref_list = &scene->resources;
- for (ref = ref_list->next; ref != ref_list; ref = next) {
- next = next_elem(ref);
- pipe_resource_reference(&ref->resource, NULL);
- FREE(ref);
+ struct data_block_list *list = &scene->data;
+ struct data_block *block, *tmp;
+
+ for (block = list->head->next; block; block = tmp) {
+ tmp = block->next;
+ FREE(block);
}
- make_empty_list(ref_list);
+
+ list->head->next = NULL;
+ list->head->used = 0;
}
lp_fence_reference(&scene->fence, NULL);
+ scene->resources = NULL;
scene->scene_size = 0;
+ scene->resource_reference_size = 0;
- scene->has_color_clear = FALSE;
scene->has_depthstencil_clear = FALSE;
+ scene->alloc_failed = FALSE;
+
+ util_unreference_framebuffer_state( &scene->fb );
}
@@ -216,12 +270,20 @@ lp_scene_reset(struct lp_scene *scene )
struct cmd_block *
-lp_bin_new_cmd_block( struct cmd_block_list *list )
+lp_scene_new_cmd_block( struct lp_scene *scene,
+ struct cmd_bin *bin )
{
- struct cmd_block *block = MALLOC_STRUCT(cmd_block);
+ struct cmd_block *block = lp_scene_alloc(scene, sizeof(struct cmd_block));
if (block) {
- list->tail->next = block;
- list->tail = block;
+ if (bin->tail) {
+ bin->tail->next = block;
+ bin->tail = block;
+ }
+ else {
+ bin->head = block;
+ bin->tail = block;
+ }
+ //memset(block, 0, sizeof *block);
block->next = NULL;
block->count = 0;
}
@@ -230,16 +292,26 @@ lp_bin_new_cmd_block( struct cmd_block_list *list )
struct data_block *
-lp_bin_new_data_block( struct data_block_list *list )
+lp_scene_new_data_block( struct lp_scene *scene )
{
- struct data_block *block = MALLOC_STRUCT(data_block);
- if (block) {
- list->tail->next = block;
- list->tail = block;
- block->next = NULL;
+ if (scene->scene_size + DATA_BLOCK_SIZE > LP_SCENE_MAX_SIZE) {
+ if (0) debug_printf("%s: failed\n", __FUNCTION__);
+ scene->alloc_failed = TRUE;
+ return NULL;
+ }
+ else {
+ struct data_block *block = MALLOC_STRUCT(data_block);
+ if (block == NULL)
+ return NULL;
+
+ scene->scene_size += sizeof *block;
+
block->used = 0;
+ block->next = scene->data.head;
+ scene->data.head = block;
+
+ return block;
}
- return block;
}
@@ -247,7 +319,7 @@ lp_bin_new_data_block( struct data_block_list *list )
* Return number of bytes used for all bin data within a scene.
* This does not include resources (textures) referenced by the scene.
*/
-unsigned
+static unsigned
lp_scene_data_size( const struct lp_scene *scene )
{
unsigned size = 0;
@@ -259,36 +331,63 @@ lp_scene_data_size( const struct lp_scene *scene )
}
-/** Return number of bytes used for a single bin */
-unsigned
-lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y )
-{
- struct cmd_bin *bin = lp_scene_get_bin((struct lp_scene *) scene, x, y);
- const struct cmd_block *cmd;
- unsigned size = 0;
- for (cmd = bin->commands.head; cmd; cmd = cmd->next) {
- size += (cmd->count *
- (sizeof(lp_rast_cmd) + sizeof(union lp_rast_cmd_arg)));
- }
- return size;
-}
-
/**
* Add a reference to a resource by the scene.
*/
-void
+boolean
lp_scene_add_resource_reference(struct lp_scene *scene,
- struct pipe_resource *resource)
+ struct pipe_resource *resource,
+ boolean initializing_scene)
{
- struct resource_ref *ref = CALLOC_STRUCT(resource_ref);
- if (ref) {
- struct resource_ref *ref_list = &scene->resources;
- pipe_resource_reference(&ref->resource, resource);
- insert_at_tail(ref_list, ref);
+ struct resource_ref *ref, **last = &scene->resources;
+ int i;
+
+ /* Look at existing resource blocks:
+ */
+ for (ref = scene->resources; ref; ref = ref->next) {
+ last = &ref->next;
+
+ /* Search for this resource:
+ */
+ for (i = 0; i < ref->count; i++)
+ if (ref->resource[i] == resource)
+ return TRUE;
+
+ if (ref->count < RESOURCE_REF_SZ) {
+ /* If the block is half-empty, then append the reference here.
+ */
+ break;
+ }
+ }
+
+ /* Create a new block if no half-empty block was found.
+ */
+ if (!ref) {
+ assert(*last == NULL);
+ *last = lp_scene_alloc(scene, sizeof *ref);
+ if (*last == NULL)
+ return FALSE;
+
+ ref = *last;
+ memset(ref, 0, sizeof *ref);
}
- scene->scene_size += llvmpipe_resource_size(resource);
+ /* Append the reference to the reference block.
+ */
+ pipe_resource_reference(&ref->resource[ref->count++], resource);
+ scene->resource_reference_size += llvmpipe_resource_size(resource);
+
+ /* Heuristic to advise scene flushes. This isn't helpful in the
+ * initial setup of the scene, but after that point flush on the
+ * next resource added which exceeds 64MB in referenced texture
+ * data.
+ */
+ if (!initializing_scene &&
+ scene->resource_reference_size >= LP_SCENE_MAX_RESOURCE_SIZE)
+ return FALSE;
+
+ return TRUE;
}
@@ -299,12 +398,15 @@ boolean
lp_scene_is_resource_referenced(const struct lp_scene *scene,
const struct pipe_resource *resource)
{
- const struct resource_ref *ref_list = &scene->resources;
const struct resource_ref *ref;
- foreach (ref, ref_list) {
- if (ref->resource == resource)
- return TRUE;
+ int i;
+
+ for (ref = scene->resources; ref; ref = ref->next) {
+ for (i = 0; i < ref->count; i++)
+ if (ref->resource[i] == resource)
+ return TRUE;
}
+
return FALSE;
}
@@ -342,7 +444,7 @@ lp_scene_bin_iter_begin( struct lp_scene *scene )
* of work (a bin) to work on.
*/
struct cmd_bin *
-lp_scene_bin_iter_next( struct lp_scene *scene, int *bin_x, int *bin_y )
+lp_scene_bin_iter_next( struct lp_scene *scene )
{
struct cmd_bin *bin = NULL;
@@ -359,8 +461,6 @@ lp_scene_bin_iter_next( struct lp_scene *scene, int *bin_x, int *bin_y )
}
bin = lp_scene_get_bin(scene, scene->curr_x, scene->curr_y);
- *bin_x = scene->curr_x;
- *bin_y = scene->curr_y;
end:
/*printf("return bin %p at %d, %d\n", (void *) bin, *bin_x, *bin_y);*/
@@ -384,34 +484,16 @@ void lp_scene_begin_binning( struct lp_scene *scene,
}
-void lp_scene_rasterize( struct lp_scene *scene,
- struct lp_rasterizer *rast )
+void lp_scene_end_binning( struct lp_scene *scene )
{
- if (0) {
- unsigned x, y;
+ if (LP_DEBUG & DEBUG_SCENE) {
debug_printf("rasterize scene:\n");
- debug_printf(" data size: %u\n", lp_scene_data_size(scene));
- for (y = 0; y < scene->tiles_y; y++) {
- for (x = 0; x < scene->tiles_x; x++) {
- debug_printf(" bin %u, %u size: %u\n", x, y,
- lp_scene_bin_size(scene, x, y));
- }
- }
- }
-
- /* Enqueue the scene for rasterization, then immediately wait for
- * it to finish.
- */
- lp_rast_queue_scene( rast, scene );
+ debug_printf(" scene_size: %u\n",
+ scene->scene_size);
+ debug_printf(" data size: %u\n",
+ lp_scene_data_size(scene));
- /* Currently just wait for the rasterizer to finish. Some
- * threading interactions need to be worked out, particularly once
- * transfers become per-context:
- */
- lp_rast_finish( rast );
-
- util_unreference_framebuffer_state( &scene->fb );
-
- /* put scene into the empty list */
- lp_scene_enqueue( scene->empty_queue, scene );
+ if (0)
+ lp_debug_bins( scene );
+ }
}
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h
index fa1b311fa1..dbef7692e4 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.h
+++ b/src/gallium/drivers/llvmpipe/lp_scene.h
@@ -38,6 +38,7 @@
#include "os/os_thread.h"
#include "lp_tile_soa.h"
#include "lp_rast.h"
+#include "lp_debug.h"
struct lp_scene_queue;
@@ -49,58 +50,71 @@ struct lp_scene_queue;
#define CMD_BLOCK_MAX 128
-#define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *))
-
+#define DATA_BLOCK_SIZE (64 * 1024)
+
+/* Scene temporary storage is clamped to this size:
+ */
+#define LP_SCENE_MAX_SIZE (4*1024*1024)
+
+/* The maximum amount of texture storage referenced by a scene is
+ * clamped ot this size:
+ */
+#define LP_SCENE_MAX_RESOURCE_SIZE (64*1024*1024)
/* switch to a non-pointer value for this:
*/
-typedef void (*lp_rast_cmd)( struct lp_rasterizer_task *,
- const union lp_rast_cmd_arg );
+typedef void (*lp_rast_cmd_func)( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
struct cmd_block {
- lp_rast_cmd cmd[CMD_BLOCK_MAX];
+ uint8_t cmd[CMD_BLOCK_MAX];
union lp_rast_cmd_arg arg[CMD_BLOCK_MAX];
unsigned count;
struct cmd_block *next;
};
+struct cmd_block_list {
+ struct cmd_block *head;
+ struct cmd_block *tail;
+};
+
struct data_block {
ubyte data[DATA_BLOCK_SIZE];
unsigned used;
struct data_block *next;
};
-struct cmd_block_list {
- struct cmd_block *head;
- struct cmd_block *tail;
-};
+
/**
* For each screen tile we have one of these bins.
*/
struct cmd_bin {
- struct cmd_block_list commands;
+ ushort x;
+ ushort y;
+ struct cmd_block *head;
+ struct cmd_block *tail;
};
/**
- * This stores bulk data which is shared by all bins within a scene.
+ * This stores bulk data which is used for all memory allocations
+ * within a scene.
+ *
* Examples include triangle data and state data. The commands in
* the per-tile bins will point to chunks of data in this structure.
+ *
+ * Include the first block of data statically to ensure we can always
+ * initiate a scene without relying on malloc succeeding.
*/
struct data_block_list {
+ struct data_block first;
struct data_block *head;
- struct data_block *tail;
-};
-
-
-/** List of resource references */
-struct resource_ref {
- struct pipe_resource *resource;
- struct resource_ref *prev, *next; /**< linked list w/ u_simple_list.h */
};
+struct resource_ref;
/**
* All bins and bin data are contained here.
@@ -114,18 +128,33 @@ struct lp_scene {
struct pipe_context *pipe;
struct lp_fence *fence;
+ /* Framebuffer mappings - valid only between begin_rasterization()
+ * and end_rasterization().
+ */
+ struct {
+ uint8_t *map;
+ unsigned stride;
+ unsigned blocksize;
+ } zsbuf, cbufs[PIPE_MAX_COLOR_BUFS];
+
/** the framebuffer to render the scene into */
struct pipe_framebuffer_state fb;
/** list of resources referenced by the scene commands */
- struct resource_ref resources;
+ struct resource_ref *resources;
- /** Approx memory used by the scene (in bytes). This includes the
- * shared and per-tile bins plus any referenced resources/textures.
+ /** Total memory used by the scene (in bytes). This sums all the
+ * data blocks and counts all bins, state, resource references and
+ * other random allocations within the scene.
*/
unsigned scene_size;
- boolean has_color_clear;
+ /** Sum of sizes of all resources referenced by the scene. Sums
+ * all the textures read by the scene:
+ */
+ unsigned resource_reference_size;
+
+ boolean alloc_failed;
boolean has_depthstencil_clear;
/**
@@ -137,38 +166,28 @@ struct lp_scene {
int curr_x, curr_y; /**< for iterating over bins */
pipe_mutex mutex;
- /* Where to place this scene once it has been rasterized:
- */
- struct lp_scene_queue *empty_queue;
-
struct cmd_bin tile[TILES_X][TILES_Y];
struct data_block_list data;
};
-struct lp_scene *lp_scene_create(struct pipe_context *pipe,
- struct lp_scene_queue *empty_queue);
+struct lp_scene *lp_scene_create(struct pipe_context *pipe);
void lp_scene_destroy(struct lp_scene *scene);
-
-
boolean lp_scene_is_empty(struct lp_scene *scene );
-
-void lp_scene_reset(struct lp_scene *scene );
+boolean lp_scene_is_oom(struct lp_scene *scene );
-struct data_block *lp_bin_new_data_block( struct data_block_list *list );
+struct data_block *lp_scene_new_data_block( struct lp_scene *scene );
-struct cmd_block *lp_bin_new_cmd_block( struct cmd_block_list *list );
+struct cmd_block *lp_scene_new_cmd_block( struct lp_scene *scene,
+ struct cmd_bin *bin );
-unsigned lp_scene_data_size( const struct lp_scene *scene );
-
-unsigned lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y );
-
-void lp_scene_add_resource_reference(struct lp_scene *scene,
- struct pipe_resource *resource);
+boolean lp_scene_add_resource_reference(struct lp_scene *scene,
+ struct pipe_resource *resource,
+ boolean initializing_scene);
boolean lp_scene_is_resource_referenced(const struct lp_scene *scene,
const struct pipe_resource *resource );
@@ -182,21 +201,27 @@ static INLINE void *
lp_scene_alloc( struct lp_scene *scene, unsigned size)
{
struct data_block_list *list = &scene->data;
- struct data_block *tail = list->tail;
+ struct data_block *block = list->head;
- if (tail->used + size > DATA_BLOCK_SIZE) {
- tail = lp_bin_new_data_block( list );
- if (!tail) {
+ assert(size <= DATA_BLOCK_SIZE);
+ assert(block != NULL);
+
+ if (LP_DEBUG & DEBUG_MEM)
+ debug_printf("alloc %u block %u/%u tot %u/%u\n",
+ size, block->used, DATA_BLOCK_SIZE,
+ scene->scene_size, LP_SCENE_MAX_SIZE);
+
+ if (block->used + size > DATA_BLOCK_SIZE) {
+ block = lp_scene_new_data_block( scene );
+ if (!block) {
/* out of memory */
return NULL;
}
}
- scene->scene_size += size;
-
{
- ubyte *data = tail->data + tail->used;
- tail->used += size;
+ ubyte *data = block->data + block->used;
+ block->used += size;
return data;
}
}
@@ -210,20 +235,26 @@ lp_scene_alloc_aligned( struct lp_scene *scene, unsigned size,
unsigned alignment )
{
struct data_block_list *list = &scene->data;
- struct data_block *tail = list->tail;
-
- if (tail->used + size + alignment - 1 > DATA_BLOCK_SIZE) {
- tail = lp_bin_new_data_block( list );
- if (!tail)
+ struct data_block *block = list->head;
+
+ assert(block != NULL);
+
+ if (LP_DEBUG & DEBUG_MEM)
+ debug_printf("alloc %u block %u/%u tot %u/%u\n",
+ size + alignment - 1,
+ block->used, DATA_BLOCK_SIZE,
+ scene->scene_size, LP_SCENE_MAX_SIZE);
+
+ if (block->used + size + alignment - 1 > DATA_BLOCK_SIZE) {
+ block = lp_scene_new_data_block( scene );
+ if (!block)
return NULL;
}
- scene->scene_size += size;
-
{
- ubyte *data = tail->data + tail->used;
+ ubyte *data = block->data + block->used;
unsigned offset = (((uintptr_t)data + alignment - 1) & ~(alignment - 1)) - (uintptr_t)data;
- tail->used += offset + size;
+ block->used += offset + size;
return data + offset;
}
}
@@ -235,9 +266,8 @@ static INLINE void
lp_scene_putback_data( struct lp_scene *scene, unsigned size)
{
struct data_block_list *list = &scene->data;
- scene->scene_size -= size;
- assert(list->tail->used >= size);
- list->tail->used -= size;
+ assert(list->head && list->head->used >= size);
+ list->head->used -= size;
}
@@ -256,55 +286,55 @@ lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y);
/* Add a command to bin[x][y].
*/
-static INLINE void
+static INLINE boolean
lp_scene_bin_command( struct lp_scene *scene,
- unsigned x, unsigned y,
- lp_rast_cmd cmd,
- union lp_rast_cmd_arg arg )
+ unsigned x, unsigned y,
+ unsigned cmd,
+ union lp_rast_cmd_arg arg )
{
struct cmd_bin *bin = lp_scene_get_bin(scene, x, y);
- struct cmd_block_list *list = &bin->commands;
- struct cmd_block *tail = list->tail;
+ struct cmd_block *tail = bin->tail;
assert(x < scene->tiles_x);
assert(y < scene->tiles_y);
+ assert(cmd <= LP_RAST_OP_END_QUERY);
- if (tail->count == CMD_BLOCK_MAX) {
- tail = lp_bin_new_cmd_block( list );
+ if (tail == NULL || tail->count == CMD_BLOCK_MAX) {
+ tail = lp_scene_new_cmd_block( scene, bin );
if (!tail) {
- /* out of memory - simply ignore this command (for now) */
- return;
+ return FALSE;
}
assert(tail->count == 0);
}
{
unsigned i = tail->count;
- tail->cmd[i] = cmd;
+ tail->cmd[i] = cmd & LP_RAST_OP_MASK;
tail->arg[i] = arg;
tail->count++;
}
+
+ return TRUE;
}
/* Add a command to all active bins.
*/
-static INLINE void
+static INLINE boolean
lp_scene_bin_everywhere( struct lp_scene *scene,
- lp_rast_cmd cmd,
+ unsigned cmd,
const union lp_rast_cmd_arg arg )
{
unsigned i, j;
- for (i = 0; i < scene->tiles_x; i++)
- for (j = 0; j < scene->tiles_y; j++)
- lp_scene_bin_command( scene, i, j, cmd, arg );
-}
-
+ for (i = 0; i < scene->tiles_x; i++) {
+ for (j = 0; j < scene->tiles_y; j++) {
+ if (!lp_scene_bin_command( scene, i, j, cmd, arg ))
+ return FALSE;
+ }
+ }
-void
-lp_scene_bin_state_command( struct lp_scene *scene,
- lp_rast_cmd cmd,
- const union lp_rast_cmd_arg arg );
+ return TRUE;
+}
static INLINE unsigned
@@ -318,23 +348,30 @@ void
lp_scene_bin_iter_begin( struct lp_scene *scene );
struct cmd_bin *
-lp_scene_bin_iter_next( struct lp_scene *scene, int *bin_x, int *bin_y );
+lp_scene_bin_iter_next( struct lp_scene *scene );
-void
-lp_scene_rasterize( struct lp_scene *scene,
- struct lp_rasterizer *rast );
+/* Begin/end binning of a scene
+ */
void
lp_scene_begin_binning( struct lp_scene *scene,
struct pipe_framebuffer_state *fb );
+void
+lp_scene_end_binning( struct lp_scene *scene );
+
+
+/* Begin/end rasterization of a scene
+ */
+void
+lp_scene_begin_rasterization(struct lp_scene *scene);
+
+void
+lp_scene_end_rasterization(struct lp_scene *scene );
+
+
-static INLINE unsigned
-lp_scene_get_size(const struct lp_scene *scene)
-{
- return scene->scene_size;
-}
#endif /* LP_BIN_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 1e65a91fc6..0d40dc5020 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -33,8 +33,8 @@
#include "util/u_format_s3tc.h"
#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
+#include "draw/draw_context.h"
-#include "gallivm/lp_bld_limits.h"
#include "lp_texture.h"
#include "lp_fence.h"
#include "lp_jit.h"
@@ -63,6 +63,7 @@ static const struct debug_named_value lp_debug_flags[] = {
{ "counters", DEBUG_COUNTERS, NULL },
{ "scene", DEBUG_SCENE, NULL },
{ "fence", DEBUG_FENCE, NULL },
+ { "mem", DEBUG_MEM, NULL },
DEBUG_NAMED_VALUE_END
};
#endif
@@ -131,8 +132,6 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
return LP_MAX_TEXTURE_3D_LEVELS;
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
return LP_MAX_TEXTURE_2D_LEVELS;
- case PIPE_CAP_TGSI_CONT_SUPPORTED:
- return 1;
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
return 1;
case PIPE_CAP_INDEP_BLEND_ENABLE:
@@ -145,47 +144,29 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
return 0;
- case PIPE_CAP_MAX_VS_INSTRUCTIONS:
- case PIPE_CAP_MAX_FS_INSTRUCTIONS:
- case PIPE_CAP_MAX_VS_ALU_INSTRUCTIONS:
- case PIPE_CAP_MAX_FS_ALU_INSTRUCTIONS:
- case PIPE_CAP_MAX_VS_TEX_INSTRUCTIONS:
- case PIPE_CAP_MAX_FS_TEX_INSTRUCTIONS:
- case PIPE_CAP_MAX_VS_TEX_INDIRECTIONS:
- case PIPE_CAP_MAX_FS_TEX_INDIRECTIONS:
- /* There is no limit in number of instructions beyond available memory */
- return 32768;
- case PIPE_CAP_MAX_VS_CONTROL_FLOW_DEPTH:
- case PIPE_CAP_MAX_FS_CONTROL_FLOW_DEPTH:
- return LP_MAX_TGSI_NESTING;
- case PIPE_CAP_MAX_VS_INPUTS:
- case PIPE_CAP_MAX_FS_INPUTS:
- return PIPE_MAX_ATTRIBS;
- case PIPE_CAP_MAX_FS_CONSTS:
- case PIPE_CAP_MAX_VS_CONSTS:
- /* There is no limit in number of constants beyond available memory */
- return 32768;
- case PIPE_CAP_MAX_VS_TEMPS:
- case PIPE_CAP_MAX_FS_TEMPS:
- return LP_MAX_TGSI_TEMPS;
- case PIPE_CAP_MAX_VS_ADDRS:
- case PIPE_CAP_MAX_FS_ADDRS:
- return LP_MAX_TGSI_ADDRS;
- case PIPE_CAP_MAX_VS_PREDS:
- case PIPE_CAP_MAX_FS_PREDS:
- return LP_MAX_TGSI_PREDS;
case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
return 1;
- case PIPE_CAP_GEOMETRY_SHADER4:
- return 1;
case PIPE_CAP_DEPTH_CLAMP:
return 0;
default:
- assert(0);
return 0;
}
}
+static int
+llvmpipe_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param)
+{
+ switch(shader)
+ {
+ case PIPE_SHADER_FRAGMENT:
+ return tgsi_exec_get_shader_param(param);
+ case PIPE_SHADER_VERTEX:
+ case PIPE_SHADER_GEOMETRY:
+ return draw_get_shader_param(shader, param);
+ default:
+ return 0;
+ }
+}
static float
llvmpipe_get_paramf(struct pipe_screen *screen, enum pipe_cap param)
@@ -401,6 +382,7 @@ llvmpipe_create_screen(struct sw_winsys *winsys)
screen->base.get_name = llvmpipe_get_name;
screen->base.get_vendor = llvmpipe_get_vendor;
screen->base.get_param = llvmpipe_get_param;
+ screen->base.get_shader_param = llvmpipe_get_shader_param;
screen->base.get_paramf = llvmpipe_get_paramf;
screen->base.is_format_supported = llvmpipe_is_format_supported;
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index 3da9097154..e6a8196761 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -42,7 +42,6 @@
#include "lp_context.h"
#include "lp_memory.h"
#include "lp_scene.h"
-#include "lp_scene_queue.h"
#include "lp_texture.h"
#include "lp_debug.h"
#include "lp_fence.h"
@@ -57,36 +56,31 @@
#include "draw/draw_vbuf.h"
-static void set_scene_state( struct lp_setup_context *, enum setup_state );
+static void set_scene_state( struct lp_setup_context *, enum setup_state,
+ const char *reason);
+static boolean try_update_scene_state( struct lp_setup_context *setup );
-struct lp_scene *
-lp_setup_get_current_scene(struct lp_setup_context *setup)
+static void
+lp_setup_get_empty_scene(struct lp_setup_context *setup)
{
- if (!setup->scene) {
- set_scene_state( setup, SETUP_EMPTY );
- }
- return setup->scene;
-}
+ assert(setup->scene == NULL);
+ setup->scene_idx++;
+ setup->scene_idx %= Elements(setup->scenes);
-/**
- * Check if the size of the current scene has exceeded the limit.
- * If so, flush/render it.
- */
-static void
-setup_check_scene_size_and_flush(struct lp_setup_context *setup)
-{
- if (setup->scene) {
- struct lp_scene *scene = lp_setup_get_current_scene(setup);
- unsigned size = lp_scene_get_size(scene);
+ setup->scene = setup->scenes[setup->scene_idx];
- if (size > LP_MAX_SCENE_SIZE) {
- /*printf("LLVMPIPE: scene size = %u, flushing.\n", size);*/
- set_scene_state( setup, SETUP_FLUSHED );
- /*assert(lp_scene_get_size(scene) == 0);*/
- }
+ if (setup->scene->fence) {
+ if (LP_DEBUG & DEBUG_SETUP)
+ debug_printf("%s: wait for scene %d\n",
+ __FUNCTION__, setup->scene->fence->id);
+
+ lp_fence_wait(setup->scene->fence);
}
+
+ lp_scene_begin_binning(setup->scene, &setup->fb);
+
}
@@ -96,7 +90,7 @@ first_triangle( struct lp_setup_context *setup,
const float (*v1)[4],
const float (*v2)[4])
{
- set_scene_state( setup, SETUP_ACTIVE );
+ assert(setup->state == SETUP_ACTIVE);
lp_setup_choose_triangle( setup );
setup->triangle( setup, v0, v1, v2 );
}
@@ -106,7 +100,7 @@ first_line( struct lp_setup_context *setup,
const float (*v0)[4],
const float (*v1)[4])
{
- set_scene_state( setup, SETUP_ACTIVE );
+ assert(setup->state == SETUP_ACTIVE);
lp_setup_choose_line( setup );
setup->line( setup, v0, v1 );
}
@@ -115,12 +109,12 @@ static void
first_point( struct lp_setup_context *setup,
const float (*v0)[4])
{
- set_scene_state( setup, SETUP_ACTIVE );
+ assert(setup->state == SETUP_ACTIVE);
lp_setup_choose_point( setup );
setup->point( setup, v0 );
}
-static void reset_context( struct lp_setup_context *setup )
+static void lp_setup_reset( struct lp_setup_context *setup )
{
LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
@@ -135,8 +129,7 @@ static void reset_context( struct lp_setup_context *setup )
/* Reset some state:
*/
- setup->clear.flags = 0;
- setup->clear.clearzs.clearzs_mask = 0;
+ memset(&setup->clear, 0, sizeof setup->clear);
/* Have an explicit "start-binning" call and get rid of this
* pointer twiddling?
@@ -151,14 +144,23 @@ static void reset_context( struct lp_setup_context *setup )
static void
lp_setup_rasterize_scene( struct lp_setup_context *setup )
{
- struct lp_scene *scene = lp_setup_get_current_scene(setup);
+ struct lp_scene *scene = setup->scene;
struct llvmpipe_screen *screen = llvmpipe_screen(scene->pipe->screen);
+ lp_scene_end_binning(scene);
+
+ lp_fence_reference(&setup->last_fence, scene->fence);
+
+ if (setup->last_fence)
+ setup->last_fence->issued = TRUE;
+
pipe_mutex_lock(screen->rast_mutex);
- lp_scene_rasterize(scene, screen->rast);
+ lp_rast_queue_scene(screen->rast, scene);
+ lp_rast_finish(screen->rast);
pipe_mutex_unlock(screen->rast_mutex);
- reset_context( setup );
+ lp_scene_end_rasterization(setup->scene);
+ lp_setup_reset( setup );
LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__);
}
@@ -168,8 +170,30 @@ lp_setup_rasterize_scene( struct lp_setup_context *setup )
static void
begin_binning( struct lp_setup_context *setup )
{
- struct lp_scene *scene = lp_setup_get_current_scene(setup);
+ struct lp_scene *scene = setup->scene;
boolean need_zsload = FALSE;
+ boolean ok;
+ unsigned i, j;
+
+ assert(scene);
+ assert(scene->fence == NULL);
+
+ /* Always create a fence:
+ */
+ scene->fence = lp_fence_create(MAX2(1, setup->num_threads));
+
+ /* Initialize the bin flags and x/y coords:
+ */
+ for (i = 0; i < scene->tiles_x; i++) {
+ for (j = 0; j < scene->tiles_y; j++) {
+ scene->tile[i][j].x = i;
+ scene->tile[i][j].y = j;
+ }
+ }
+
+ ok = try_update_scene_state(setup);
+ assert(ok);
+
if (setup->fb.zsbuf &&
((setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) &&
util_format_is_depth_and_stencil(setup->fb.zsbuf->format))
@@ -181,10 +205,10 @@ begin_binning( struct lp_setup_context *setup )
if (setup->fb.nr_cbufs) {
if (setup->clear.flags & PIPE_CLEAR_COLOR) {
- lp_scene_bin_everywhere( scene,
- lp_rast_clear_color,
- setup->clear.color );
- scene->has_color_clear = TRUE;
+ ok = lp_scene_bin_everywhere( scene,
+ LP_RAST_OP_CLEAR_COLOR,
+ setup->clear.color );
+ assert(ok);
}
}
@@ -192,12 +216,27 @@ begin_binning( struct lp_setup_context *setup )
if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) {
if (!need_zsload)
scene->has_depthstencil_clear = TRUE;
- lp_scene_bin_everywhere( scene,
- lp_rast_clear_zstencil,
- lp_rast_arg_clearzs(&setup->clear.clearzs) );
+ ok = lp_scene_bin_everywhere( scene,
+ LP_RAST_OP_CLEAR_ZSTENCIL,
+ lp_rast_arg_clearzs(
+ setup->clear.zsvalue,
+ setup->clear.zsmask));
+ assert(ok);
}
}
+ if (setup->active_query) {
+ ok = lp_scene_bin_everywhere( scene,
+ LP_RAST_OP_BEGIN_QUERY,
+ lp_rast_arg_query(setup->active_query) );
+ assert(ok);
+ }
+
+
+ setup->clear.flags = 0;
+ setup->clear.zsmask = 0;
+ setup->clear.zsvalue = 0;
+
LP_DBG(DEBUG_SETUP, "%s done\n", __FUNCTION__);
}
@@ -213,51 +252,56 @@ execute_clears( struct lp_setup_context *setup )
LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
begin_binning( setup );
- lp_setup_rasterize_scene( setup );
}
+const char *states[] = {
+ "FLUSHED",
+ "EMPTY ",
+ "CLEARED",
+ "ACTIVE "
+};
+
static void
set_scene_state( struct lp_setup_context *setup,
- enum setup_state new_state )
+ enum setup_state new_state,
+ const char *reason)
{
unsigned old_state = setup->state;
if (old_state == new_state)
return;
-
- LP_DBG(DEBUG_SETUP, "%s old %d new %d\n", __FUNCTION__, old_state, new_state);
-
- switch (new_state) {
- case SETUP_EMPTY:
- assert(old_state == SETUP_FLUSHED);
- assert(setup->scene == NULL);
+
+ if (LP_DEBUG & DEBUG_SCENE) {
+ debug_printf("%s old %s new %s%s%s\n",
+ __FUNCTION__,
+ states[old_state],
+ states[new_state],
+ (new_state == SETUP_FLUSHED) ? ": " : "",
+ (new_state == SETUP_FLUSHED) ? reason : "");
+
+ if (new_state == SETUP_FLUSHED && setup->scene)
+ lp_debug_draw_bins_by_cmd_length(setup->scene);
+ }
- /* wait for a free/empty scene
- */
- setup->scene = lp_scene_dequeue(setup->empty_scenes, TRUE);
- assert(lp_scene_is_empty(setup->scene));
- lp_scene_begin_binning(setup->scene,
- &setup->fb );
- break;
+ /* wait for a free/empty scene
+ */
+ if (old_state == SETUP_FLUSHED)
+ lp_setup_get_empty_scene(setup);
+ switch (new_state) {
case SETUP_CLEARED:
- assert(old_state == SETUP_EMPTY);
- assert(setup->scene != NULL);
break;
case SETUP_ACTIVE:
- assert(old_state == SETUP_EMPTY ||
- old_state == SETUP_CLEARED);
- assert(setup->scene != NULL);
begin_binning( setup );
break;
case SETUP_FLUSHED:
if (old_state == SETUP_CLEARED)
execute_clears( setup );
- else
- lp_setup_rasterize_scene( setup );
+
+ lp_setup_rasterize_scene( setup );
assert(setup->scene == NULL);
break;
@@ -278,21 +322,11 @@ lp_setup_flush( struct lp_setup_context *setup,
struct pipe_fence_handle **fence,
const char *reason)
{
- LP_DBG(DEBUG_SETUP, "%s %s\n", __FUNCTION__, reason);
-
- if (setup->scene) {
- if (fence) {
- /* if we're going to flush the setup/rasterization modules, emit
- * a fence.
- */
- *fence = lp_setup_fence( setup );
- }
+ set_scene_state( setup, SETUP_FLUSHED, reason );
- if (setup->scene->fence)
- setup->scene->fence->issued = TRUE;
+ if (fence) {
+ lp_fence_reference((struct lp_fence **)fence, setup->last_fence);
}
-
- set_scene_state( setup, SETUP_FLUSHED );
}
@@ -304,7 +338,7 @@ lp_setup_bind_framebuffer( struct lp_setup_context *setup,
/* Flush any old scene.
*/
- set_scene_state( setup, SETUP_FLUSHED );
+ set_scene_state( setup, SETUP_FLUSHED, __FUNCTION__ );
/*
* Ensure the old scene is not reused.
@@ -323,78 +357,41 @@ lp_setup_bind_framebuffer( struct lp_setup_context *setup,
}
-void
-lp_setup_clear( struct lp_setup_context *setup,
- const float *color,
- double depth,
- unsigned stencil,
- unsigned flags )
+static boolean
+lp_setup_try_clear( struct lp_setup_context *setup,
+ const float *color,
+ double depth,
+ unsigned stencil,
+ unsigned flags )
{
- struct lp_scene *scene = lp_setup_get_current_scene(setup);
+ uint32_t zsmask = 0;
+ uint32_t zsvalue = 0;
+ union lp_rast_cmd_arg color_arg;
unsigned i;
- boolean full_zs_clear = TRUE;
- uint32_t mask = 0;
LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state);
-
if (flags & PIPE_CLEAR_COLOR) {
- for (i = 0; i < 4; ++i)
- setup->clear.color.clear_color[i] = float_to_ubyte(color[i]);
+ for (i = 0; i < 4; i++)
+ color_arg.clear_color[i] = float_to_ubyte(color[i]);
}
if (flags & PIPE_CLEAR_DEPTHSTENCIL) {
- if (setup->fb.zsbuf &&
- ((flags & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) &&
- util_format_is_depth_and_stencil(setup->fb.zsbuf->format))
- full_zs_clear = FALSE;
-
- if (full_zs_clear) {
- setup->clear.clearzs.clearzs_value =
- util_pack_z_stencil(setup->fb.zsbuf->format,
- depth,
- stencil);
- setup->clear.clearzs.clearzs_mask = 0xffffffff;
- }
- else {
- /* hmm */
- uint32_t tmpval;
- if (flags & PIPE_CLEAR_DEPTH) {
- tmpval = util_pack_z(setup->fb.zsbuf->format,
- depth);
- switch (setup->fb.zsbuf->format) {
- case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- mask = 0xffffff;
- break;
- case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
- mask = 0xffffff00;
- break;
- default:
- assert(0);
- }
- }
- else {
- switch (setup->fb.zsbuf->format) {
- case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- mask = 0xff000000;
- tmpval = stencil << 24;
- break;
- case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
- mask = 0xff;
- tmpval = stencil;
- break;
- default:
- assert(0);
- tmpval = 0;
- }
- }
- setup->clear.clearzs.clearzs_mask |= mask;
- setup->clear.clearzs.clearzs_value =
- (setup->clear.clearzs.clearzs_value & ~mask) | (tmpval & mask);
- }
+ unsigned zmask = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0;
+ unsigned smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0;
+
+ zsvalue = util_pack_z_stencil(setup->fb.zsbuf->format,
+ depth,
+ stencil);
+
+ zsmask = util_pack_uint_z_stencil(setup->fb.zsbuf->format,
+ zmask,
+ smask);
}
if (setup->state == SETUP_ACTIVE) {
+ struct lp_scene *scene = setup->scene;
+
/* Add the clear to existing scene. In the unusual case where
* both color and depth-stencil are being cleared when there's
* already been some rendering, we could discard the currently
@@ -402,24 +399,18 @@ lp_setup_clear( struct lp_setup_context *setup,
* a common usage.
*/
if (flags & PIPE_CLEAR_COLOR) {
- lp_scene_bin_everywhere( scene,
- lp_rast_clear_color,
- setup->clear.color );
- scene->has_color_clear = TRUE;
+ if (!lp_scene_bin_everywhere( scene,
+ LP_RAST_OP_CLEAR_COLOR,
+ color_arg ))
+ return FALSE;
}
if (flags & PIPE_CLEAR_DEPTHSTENCIL) {
- if (full_zs_clear)
- scene->has_depthstencil_clear = TRUE;
- else
- setup->clear.clearzs.clearzs_mask = mask;
- lp_scene_bin_everywhere( scene,
- lp_rast_clear_zstencil,
- lp_rast_arg_clearzs(&setup->clear.clearzs) );
-
-
+ if (!lp_scene_bin_everywhere( scene,
+ LP_RAST_OP_CLEAR_ZSTENCIL,
+ lp_rast_arg_clearzs(zsvalue, zsmask) ))
+ return FALSE;
}
-
}
else {
/* Put ourselves into the 'pre-clear' state, specifically to try
@@ -427,42 +418,43 @@ lp_setup_clear( struct lp_setup_context *setup,
* buffers which the app or state-tracker might issue
* separately.
*/
- set_scene_state( setup, SETUP_CLEARED );
+ set_scene_state( setup, SETUP_CLEARED, __FUNCTION__ );
setup->clear.flags |= flags;
+
+ if (flags & PIPE_CLEAR_DEPTHSTENCIL) {
+ setup->clear.zsmask |= zsmask;
+ setup->clear.zsvalue =
+ (setup->clear.zsvalue & ~zsmask) | (zsvalue & zsmask);
+ }
+
+ if (flags & PIPE_CLEAR_COLOR) {
+ memcpy(setup->clear.color.clear_color,
+ &color_arg,
+ sizeof color_arg);
+ }
}
+
+ return TRUE;
}
-
-/**
- * Emit a fence.
- */
-struct pipe_fence_handle *
-lp_setup_fence( struct lp_setup_context *setup )
+void
+lp_setup_clear( struct lp_setup_context *setup,
+ const float *color,
+ double depth,
+ unsigned stencil,
+ unsigned flags )
{
- if (setup->scene == NULL)
- return NULL;
- else if (setup->num_threads == 0)
- return NULL;
- else
- {
- struct lp_scene *scene = lp_setup_get_current_scene(setup);
- const unsigned rank = setup->num_threads;
+ if (!lp_setup_try_clear( setup, color, depth, stencil, flags )) {
+ lp_setup_flush(setup, 0, NULL, __FUNCTION__);
- set_scene_state( setup, SETUP_ACTIVE );
-
- assert(scene->fence == NULL);
+ if (!lp_setup_try_clear( setup, color, depth, stencil, flags ))
+ assert(0);
+ }
+}
- /* The caller gets a reference, we keep a copy too, so need to
- * bump the refcount:
- */
- lp_fence_reference(&scene->fence, lp_fence_create(rank));
- LP_DBG(DEBUG_SETUP, "%s rank %u\n", __FUNCTION__, rank);
- return (struct pipe_fence_handle *) scene->fence;
- }
-}
void
@@ -725,58 +717,33 @@ lp_setup_is_resource_referenced( const struct lp_setup_context *setup,
/**
* Called by vbuf code when we're about to draw something.
*/
-void
-lp_setup_update_state( struct lp_setup_context *setup )
+static boolean
+try_update_scene_state( struct lp_setup_context *setup )
{
- struct lp_scene *scene;
+ boolean new_scene = (setup->fs.stored == NULL);
+ struct lp_scene *scene = setup->scene;
- LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
-
- setup_check_scene_size_and_flush(setup);
-
- scene = lp_setup_get_current_scene(setup);
-
- assert(setup->fs.current.variant);
-
- /* Some of the 'draw' pipeline stages may have changed some driver state.
- * Make sure we've processed those state changes before anything else.
- *
- * XXX this is the only place where llvmpipe_context is used in the
- * setup code. This may get refactored/changed...
- */
- {
- struct llvmpipe_context *lp = llvmpipe_context(scene->pipe);
-
- /* Will probably need to move this somewhere else, just need
- * to know about vertex shader point size attribute.
- */
- setup->psize = lp->psize_slot;
-
- if (lp->dirty) {
- llvmpipe_update_derived(lp);
- }
- assert(lp->dirty == 0);
- }
+ assert(scene);
if(setup->dirty & LP_SETUP_NEW_BLEND_COLOR) {
uint8_t *stored;
unsigned i, j;
stored = lp_scene_alloc_aligned(scene, 4 * 16, 16);
+ if (!stored) {
+ assert(!new_scene);
+ return FALSE;
+ }
- if (stored) {
- /* smear each blend color component across 16 ubyte elements */
- for (i = 0; i < 4; ++i) {
- uint8_t c = float_to_ubyte(setup->blend_color.current.color[i]);
- for (j = 0; j < 16; ++j)
- stored[i*16 + j] = c;
- }
-
- setup->blend_color.stored = stored;
-
- setup->fs.current.jit_context.blend_color = setup->blend_color.stored;
+ /* smear each blend color component across 16 ubyte elements */
+ for (i = 0; i < 4; ++i) {
+ uint8_t c = float_to_ubyte(setup->blend_color.current.color[i]);
+ for (j = 0; j < 16; ++j)
+ stored[i*16 + j] = c;
}
+ setup->blend_color.stored = stored;
+ setup->fs.current.jit_context.blend_color = setup->blend_color.stored;
setup->dirty |= LP_SETUP_NEW_FS;
}
@@ -797,13 +764,16 @@ lp_setup_update_state( struct lp_setup_context *setup )
void *stored;
stored = lp_scene_alloc(scene, current_size);
- if(stored) {
- memcpy(stored,
- current_data,
- current_size);
- setup->constants.stored_size = current_size;
- setup->constants.stored_data = stored;
+ if (!stored) {
+ assert(!new_scene);
+ return FALSE;
}
+
+ memcpy(stored,
+ current_data,
+ current_size);
+ setup->constants.stored_size = current_size;
+ setup->constants.stored_data = stored;
}
}
else {
@@ -816,31 +786,42 @@ lp_setup_update_state( struct lp_setup_context *setup )
}
- if(setup->dirty & LP_SETUP_NEW_FS) {
- if(!setup->fs.stored ||
- memcmp(setup->fs.stored,
- &setup->fs.current,
- sizeof setup->fs.current) != 0) {
+ if (setup->dirty & LP_SETUP_NEW_FS) {
+ if (!setup->fs.stored ||
+ memcmp(setup->fs.stored,
+ &setup->fs.current,
+ sizeof setup->fs.current) != 0)
+ {
+ struct lp_rast_state *stored;
+ uint i;
+
/* The fs state that's been stored in the scene is different from
* the new, current state. So allocate a new lp_rast_state object
* and append it to the bin's setup data buffer.
*/
- uint i;
- struct lp_rast_state *stored =
- (struct lp_rast_state *) lp_scene_alloc(scene, sizeof *stored);
- if(stored) {
- memcpy(stored,
- &setup->fs.current,
- sizeof setup->fs.current);
- setup->fs.stored = stored;
+ stored = (struct lp_rast_state *) lp_scene_alloc(scene, sizeof *stored);
+ if (!stored) {
+ assert(!new_scene);
+ return FALSE;
}
+ memcpy(stored,
+ &setup->fs.current,
+ sizeof setup->fs.current);
+ setup->fs.stored = stored;
+
/* The scene now references the textures in the rasterization
* state record. Note that now.
*/
for (i = 0; i < Elements(setup->fs.current_tex); i++) {
- if (setup->fs.current_tex[i])
- lp_scene_add_resource_reference(scene, setup->fs.current_tex[i]);
+ if (setup->fs.current_tex[i]) {
+ if (!lp_scene_add_resource_reference(scene,
+ setup->fs.current_tex[i],
+ new_scene)) {
+ assert(!new_scene);
+ return FALSE;
+ }
+ }
}
}
}
@@ -856,6 +837,47 @@ lp_setup_update_state( struct lp_setup_context *setup )
setup->dirty = 0;
assert(setup->fs.stored);
+ return TRUE;
+}
+
+void
+lp_setup_update_state( struct lp_setup_context *setup,
+ boolean update_scene )
+{
+ /* Some of the 'draw' pipeline stages may have changed some driver state.
+ * Make sure we've processed those state changes before anything else.
+ *
+ * XXX this is the only place where llvmpipe_context is used in the
+ * setup code. This may get refactored/changed...
+ */
+ {
+ struct llvmpipe_context *lp = llvmpipe_context(setup->pipe);
+ if (lp->dirty) {
+ llvmpipe_update_derived(lp);
+ }
+
+ /* Will probably need to move this somewhere else, just need
+ * to know about vertex shader point size attribute.
+ */
+ setup->psize = lp->psize_slot;
+
+ assert(lp->dirty == 0);
+ }
+
+ if (update_scene)
+ set_scene_state( setup, SETUP_ACTIVE, __FUNCTION__ );
+
+ /* Only call into update_scene_state() if we already have a
+ * scene:
+ */
+ if (update_scene && setup->scene) {
+ assert(setup->state == SETUP_ACTIVE);
+ if (!try_update_scene_state(setup)) {
+ lp_setup_flush_and_restart(setup);
+ if (!try_update_scene_state(setup))
+ assert(0);
+ }
+ }
}
@@ -867,7 +889,7 @@ lp_setup_destroy( struct lp_setup_context *setup )
{
uint i;
- reset_context( setup );
+ lp_setup_reset( setup );
util_unreference_framebuffer_state(&setup->fb);
@@ -878,15 +900,15 @@ lp_setup_destroy( struct lp_setup_context *setup )
pipe_resource_reference(&setup->constants.current, NULL);
/* free the scenes in the 'empty' queue */
- while (1) {
- struct lp_scene *scene = lp_scene_dequeue(setup->empty_scenes, FALSE);
- if (!scene)
- break;
+ for (i = 0; i < Elements(setup->scenes); i++) {
+ struct lp_scene *scene = setup->scenes[i];
+
+ if (scene->fence)
+ lp_fence_wait(scene->fence);
+
lp_scene_destroy(scene);
}
- lp_scene_queue_destroy(setup->empty_scenes);
-
FREE( setup );
}
@@ -908,10 +930,11 @@ lp_setup_create( struct pipe_context *pipe,
return NULL;
lp_setup_init_vbuf(setup);
+
+ /* Used only in update_state():
+ */
+ setup->pipe = pipe;
- setup->empty_scenes = lp_scene_queue_create();
- if (!setup->empty_scenes)
- goto fail;
setup->num_threads = screen->num_threads;
setup->vbuf = draw_vbuf_stage(draw, &setup->base);
@@ -923,9 +946,7 @@ lp_setup_create( struct pipe_context *pipe,
/* create some empty scenes */
for (i = 0; i < MAX_SCENES; i++) {
- setup->scenes[i] = lp_scene_create( pipe, setup->empty_scenes );
-
- lp_scene_enqueue(setup->empty_scenes, setup->scenes[i]);
+ setup->scenes[i] = lp_scene_create( pipe );
}
setup->triangle = first_triangle;
@@ -940,9 +961,6 @@ fail:
if (setup->vbuf)
;
- if (setup->empty_scenes)
- lp_scene_queue_destroy(setup->empty_scenes);
-
FREE(setup);
return NULL;
}
@@ -955,22 +973,26 @@ void
lp_setup_begin_query(struct lp_setup_context *setup,
struct llvmpipe_query *pq)
{
- struct lp_scene * scene = lp_setup_get_current_scene(setup);
- union lp_rast_cmd_arg cmd_arg;
-
/* init the query to its beginning state */
- pq->done = FALSE;
- pq->tile_count = 0;
- pq->num_tiles = scene->tiles_x * scene->tiles_y;
- assert(pq->num_tiles > 0);
+ assert(setup->active_query == NULL);
+
+ if (setup->scene) {
+ if (!lp_scene_bin_everywhere(setup->scene,
+ LP_RAST_OP_BEGIN_QUERY,
+ lp_rast_arg_query(pq))) {
- memset(pq->count, 0, sizeof(pq->count)); /* reset all counters */
+ lp_setup_flush_and_restart(setup);
- set_scene_state( setup, SETUP_ACTIVE );
+ if (!lp_scene_bin_everywhere(setup->scene,
+ LP_RAST_OP_BEGIN_QUERY,
+ lp_rast_arg_query(pq))) {
+ assert(0);
+ return;
+ }
+ }
+ }
- cmd_arg.query_obj = pq;
- lp_scene_bin_everywhere(scene, lp_rast_begin_query, cmd_arg);
- pq->binned = TRUE;
+ setup->active_query = pq;
}
@@ -980,11 +1002,42 @@ lp_setup_begin_query(struct lp_setup_context *setup,
void
lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq)
{
- struct lp_scene * scene = lp_setup_get_current_scene(setup);
- union lp_rast_cmd_arg cmd_arg;
+ union lp_rast_cmd_arg dummy = { 0 };
+
+ assert(setup->active_query == pq);
+ setup->active_query = NULL;
- set_scene_state( setup, SETUP_ACTIVE );
+ /* Setup will automatically re-issue any query which carried over a
+ * scene boundary, and the rasterizer automatically "ends" queries
+ * which are active at the end of a scene, so there is no need to
+ * retry this commands on failure.
+ */
+ if (setup->scene) {
+ /* pq->fence should be the fence of the *last* scene which
+ * contributed to the query result.
+ */
+ lp_fence_reference(&pq->fence, setup->scene->fence);
- cmd_arg.query_obj = pq;
- lp_scene_bin_everywhere(scene, lp_rast_end_query, cmd_arg);
+ if (!lp_scene_bin_everywhere(setup->scene,
+ LP_RAST_OP_END_QUERY,
+ dummy)) {
+ lp_setup_flush(setup, 0, NULL, __FUNCTION__);
+ }
+ }
+ else {
+ lp_fence_reference(&pq->fence, setup->last_fence);
+ }
}
+
+
+void
+lp_setup_flush_and_restart(struct lp_setup_context *setup)
+{
+ if (0) debug_printf("%s\n", __FUNCTION__);
+
+ assert(setup->state == SETUP_ACTIVE);
+ set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__);
+ lp_setup_update_state(setup, TRUE);
+}
+
+
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h
index 821ebb1087..b94061b7d4 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup.h
@@ -65,6 +65,7 @@ struct pipe_framebuffer_state;
struct lp_fragment_shader_variant;
struct lp_jit_context;
struct llvmpipe_query;
+struct pipe_fence_handle;
struct lp_setup_context *
@@ -78,8 +79,6 @@ lp_setup_clear(struct lp_setup_context *setup,
unsigned clear_stencil,
unsigned flags);
-struct pipe_fence_handle *
-lp_setup_fence( struct lp_setup_context *setup );
void
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.c b/src/gallium/drivers/llvmpipe/lp_setup_coef.c
index 95e3e8fffe..8dc2688ddb 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_coef.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_coef.c
@@ -187,11 +187,32 @@ static void setup_facing_coef( struct lp_rast_shader_inputs *inputs,
*/
void lp_setup_tri_coef( struct lp_setup_context *setup,
struct lp_rast_shader_inputs *inputs,
- const struct lp_tri_info *info)
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4],
+ boolean frontfacing)
{
unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
unsigned slot;
unsigned i;
+ struct lp_tri_info info;
+ float dx01 = v0[0][0] - v1[0][0];
+ float dy01 = v0[0][1] - v1[0][1];
+ float dx20 = v2[0][0] - v0[0][0];
+ float dy20 = v2[0][1] - v0[0][1];
+ float oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01);
+
+ info.v0 = v0;
+ info.v1 = v1;
+ info.v2 = v2;
+ info.frontfacing = frontfacing;
+ info.x0_center = v0[0][0] - setup->pixel_offset;
+ info.y0_center = v0[0][1] - setup->pixel_offset;
+ info.dx01_ooa = dx01 * oneoverarea;
+ info.dx20_ooa = dx20 * oneoverarea;
+ info.dy01_ooa = dy01 * oneoverarea;
+ info.dy20_ooa = dy20 * oneoverarea;
+
/* setup interpolation for all the remaining attributes:
*/
@@ -204,25 +225,25 @@ void lp_setup_tri_coef( struct lp_setup_context *setup,
if (setup->flatshade_first) {
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- constant_coef(inputs, slot+1, info->v0[vert_attr][i], i);
+ constant_coef(inputs, slot+1, info.v0[vert_attr][i], i);
}
else {
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- constant_coef(inputs, slot+1, info->v2[vert_attr][i], i);
+ constant_coef(inputs, slot+1, info.v2[vert_attr][i], i);
}
break;
case LP_INTERP_LINEAR:
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- linear_coef(inputs, info, slot+1, vert_attr, i);
+ linear_coef(inputs, &info, slot+1, vert_attr, i);
break;
case LP_INTERP_PERSPECTIVE:
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- perspective_coef(inputs, info, slot+1, vert_attr, i);
+ perspective_coef(inputs, &info, slot+1, vert_attr, i);
fragcoord_usage_mask |= TGSI_WRITEMASK_W;
break;
@@ -236,7 +257,7 @@ void lp_setup_tri_coef( struct lp_setup_context *setup,
break;
case LP_INTERP_FACING:
- setup_facing_coef(inputs, slot+1, info->frontfacing, usage_mask);
+ setup_facing_coef(inputs, slot+1, info.frontfacing, usage_mask);
break;
default:
@@ -246,7 +267,7 @@ void lp_setup_tri_coef( struct lp_setup_context *setup,
/* The internal position input is in slot zero:
*/
- setup_fragcoord_coef(inputs, info, 0, fragcoord_usage_mask);
+ setup_fragcoord_coef(inputs, &info, 0, fragcoord_usage_mask);
}
#else
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.h b/src/gallium/drivers/llvmpipe/lp_setup_coef.h
index d68b39c603..87a3255ccc 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_coef.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup_coef.h
@@ -56,6 +56,9 @@ struct lp_tri_info {
void lp_setup_tri_coef( struct lp_setup_context *setup,
struct lp_rast_shader_inputs *inputs,
- const struct lp_tri_info *info);
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4],
+ boolean frontfacing);
#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c b/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c
index 73fb70599c..3742fd672b 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c
@@ -151,13 +151,34 @@ static void perspective_coef( struct lp_rast_shader_inputs *inputs,
*/
void lp_setup_tri_coef( struct lp_setup_context *setup,
struct lp_rast_shader_inputs *inputs,
- const struct lp_tri_info *info)
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4],
+ boolean frontfacing)
{
unsigned slot;
+ struct lp_tri_info info;
+ float dx01 = v0[0][0] - v1[0][0];
+ float dy01 = v0[0][1] - v1[0][1];
+ float dx20 = v2[0][0] - v0[0][0];
+ float dy20 = v2[0][1] - v0[0][1];
+ float oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01);
+
+ info.v0 = v0;
+ info.v1 = v1;
+ info.v2 = v2;
+ info.frontfacing = frontfacing;
+ info.x0_center = v0[0][0] - setup->pixel_offset;
+ info.y0_center = v0[0][1] - setup->pixel_offset;
+ info.dx01_ooa = dx01 * oneoverarea;
+ info.dx20_ooa = dx20 * oneoverarea;
+ info.dy01_ooa = dy01 * oneoverarea;
+ info.dy20_ooa = dy20 * oneoverarea;
+
/* The internal position input is in slot zero:
*/
- linear_coef(inputs, info, 0, 0);
+ linear_coef(inputs, &info, 0, 0);
/* setup interpolation for all the remaining attributes:
*/
@@ -167,19 +188,19 @@ void lp_setup_tri_coef( struct lp_setup_context *setup,
switch (setup->fs.input[slot].interp) {
case LP_INTERP_CONSTANT:
if (setup->flatshade_first) {
- constant_coef4(inputs, info, slot+1, info->v0[vert_attr]);
+ constant_coef4(inputs, &info, slot+1, info.v0[vert_attr]);
}
else {
- constant_coef4(inputs, info, slot+1, info->v2[vert_attr]);
+ constant_coef4(inputs, &info, slot+1, info.v2[vert_attr]);
}
break;
case LP_INTERP_LINEAR:
- linear_coef(inputs, info, slot+1, vert_attr);
+ linear_coef(inputs, &info, slot+1, vert_attr);
break;
case LP_INTERP_PERSPECTIVE:
- perspective_coef(inputs, info, slot+1, vert_attr);
+ perspective_coef(inputs, &info, slot+1, vert_attr);
break;
case LP_INTERP_POSITION:
@@ -190,7 +211,7 @@ void lp_setup_tri_coef( struct lp_setup_context *setup,
break;
case LP_INTERP_FACING:
- setup_facing_coef(inputs, info, slot+1);
+ setup_facing_coef(inputs, &info, slot+1);
break;
default:
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h
index 877a492c6d..80b356476a 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -49,8 +49,6 @@
#define LP_SETUP_NEW_SCISSOR 0x08
-struct lp_scene_queue;
-
/** Max number of scenes */
#define MAX_SCENES 2
@@ -70,6 +68,7 @@ struct lp_setup_context
{
struct vbuf_render base;
+ struct pipe_context *pipe;
struct vertex_info *vertex_info;
uint prim;
uint vertex_size;
@@ -83,9 +82,12 @@ struct lp_setup_context
*/
struct draw_stage *vbuf;
unsigned num_threads;
+ unsigned scene_idx;
struct lp_scene *scenes[MAX_SCENES]; /**< all the scenes */
struct lp_scene *scene; /**< current scene being built */
- struct lp_scene_queue *empty_scenes; /**< queue of empty scenes */
+
+ struct lp_fence *last_fence;
+ struct llvmpipe_query *active_query;
boolean flatshade_first;
boolean ccw_is_frontface;
@@ -105,12 +107,12 @@ struct lp_setup_context
struct {
unsigned flags;
union lp_rast_cmd_arg color; /**< lp_rast_clear_color() cmd */
- struct lp_rast_clearzs clearzs; /**< lp_rast_clear_zstencil() cmd */
+ unsigned zsmask;
+ unsigned zsvalue; /**< lp_rast_clear_zstencil() cmd */
} clear;
enum setup_state {
SETUP_FLUSHED, /**< scene is null */
- SETUP_EMPTY, /**< scene exists but has only state changes */
SETUP_CLEARED, /**< scene exists but has only clears */
SETUP_ACTIVE /**< scene exists and has at least one draw/query */
} state;
@@ -156,14 +158,15 @@ void lp_setup_choose_triangle( struct lp_setup_context *setup );
void lp_setup_choose_line( struct lp_setup_context *setup );
void lp_setup_choose_point( struct lp_setup_context *setup );
-struct lp_scene *lp_setup_get_current_scene(struct lp_setup_context *setup);
-
void lp_setup_init_vbuf(struct lp_setup_context *setup);
-void lp_setup_update_state( struct lp_setup_context *setup );
+void lp_setup_update_state( struct lp_setup_context *setup,
+ boolean update_scene);
void lp_setup_destroy( struct lp_setup_context *setup );
+void lp_setup_flush_and_restart(struct lp_setup_context *setup);
+
void
lp_setup_print_triangle(struct lp_setup_context *setup,
const float (*v0)[4],
@@ -182,11 +185,12 @@ lp_setup_alloc_triangle(struct lp_scene *scene,
unsigned nr_planes,
unsigned *tri_size);
-void
+boolean
lp_setup_bin_triangle( struct lp_setup_context *setup,
struct lp_rast_triangle *tri,
const struct u_rect *bbox,
int nr_planes );
-#endif
+void lp_setup_flush_and_restart(struct lp_setup_context *setup);
+#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c
index ce2da55cf4..9f090d1992 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_line.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c
@@ -263,12 +263,12 @@ static INLINE float fracf(float f)
-static void
-lp_setup_line( struct lp_setup_context *setup,
+static boolean
+try_setup_line( struct lp_setup_context *setup,
const float (*v1)[4],
const float (*v2)[4])
{
- struct lp_scene *scene = lp_setup_get_current_scene(setup);
+ struct lp_scene *scene = setup->scene;
struct lp_rast_triangle *line;
struct lp_line_info info;
float width = MAX2(1.0, setup->line_width);
@@ -536,13 +536,13 @@ lp_setup_line( struct lp_setup_context *setup,
bbox.y1 < bbox.y0) {
if (0) debug_printf("empty bounding box\n");
LP_COUNT(nr_culled_tris);
- return;
+ return TRUE;
}
if (!u_rect_test_intersection(&setup->draw_region, &bbox)) {
if (0) debug_printf("offscreen\n");
LP_COUNT(nr_culled_tris);
- return;
+ return TRUE;
}
u_rect_find_intersection(&setup->draw_region, &bbox);
@@ -552,7 +552,7 @@ lp_setup_line( struct lp_setup_context *setup,
nr_planes,
&tri_bytes);
if (!line)
- return;
+ return FALSE;
#ifdef DEBUG
line->v[0][0] = v1[0][0];
@@ -585,6 +585,8 @@ lp_setup_line( struct lp_setup_context *setup,
line->inputs.facing = 1.0F;
line->inputs.state = setup->fs.stored;
+ line->inputs.disable = FALSE;
+ line->inputs.opaque = FALSE;
for (i = 0; i < 4; i++) {
struct lp_rast_plane *plane = &line->plane[i];
@@ -687,9 +689,23 @@ lp_setup_line( struct lp_setup_context *setup,
line->plane[7].eo = 0;
}
- lp_setup_bin_triangle(setup, line, &bbox, nr_planes);
+ return lp_setup_bin_triangle(setup, line, &bbox, nr_planes);
}
-
+
+
+static void lp_setup_line( struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4] )
+{
+ if (!try_setup_line( setup, v0, v1 ))
+ {
+ lp_setup_flush_and_restart(setup);
+
+ if (!try_setup_line( setup, v0, v1 ))
+ assert(0);
+ }
+}
+
void lp_setup_choose_line( struct lp_setup_context *setup )
{
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c
index 6ae318d328..5538987151 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_point.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c
@@ -210,8 +210,9 @@ subpixel_snap(float a)
}
-static void lp_setup_point( struct lp_setup_context *setup,
- const float (*v0)[4] )
+static boolean
+try_setup_point( struct lp_setup_context *setup,
+ const float (*v0)[4] )
{
/* x/y positions in fixed point */
const int sizeAttr = setup->psize;
@@ -228,7 +229,7 @@ static void lp_setup_point( struct lp_setup_context *setup,
const int x0 = subpixel_snap(v0[0][0] - setup->pixel_offset) - fixed_width/2;
const int y0 = subpixel_snap(v0[0][1] - setup->pixel_offset) - fixed_width/2;
- struct lp_scene *scene = lp_setup_get_current_scene(setup);
+ struct lp_scene *scene = setup->scene;
struct lp_rast_triangle *point;
unsigned bytes;
struct u_rect bbox;
@@ -259,7 +260,7 @@ static void lp_setup_point( struct lp_setup_context *setup,
if (!u_rect_test_intersection(&setup->draw_region, &bbox)) {
if (0) debug_printf("offscreen\n");
LP_COUNT(nr_culled_tris);
- return;
+ return TRUE;
}
u_rect_find_intersection(&setup->draw_region, &bbox);
@@ -269,7 +270,7 @@ static void lp_setup_point( struct lp_setup_context *setup,
nr_planes,
&bytes);
if (!point)
- return;
+ return FALSE;
#ifdef DEBUG
point->v[0][0] = v0[0][0];
@@ -288,6 +289,8 @@ static void lp_setup_point( struct lp_setup_context *setup,
point->inputs.facing = 1.0F;
point->inputs.state = setup->fs.stored;
+ point->inputs.disable = FALSE;
+ point->inputs.opaque = FALSE;
{
point->plane[0].dcdx = -1;
@@ -315,7 +318,20 @@ static void lp_setup_point( struct lp_setup_context *setup,
point->plane[3].eo = 0;
}
- lp_setup_bin_triangle(setup, point, &bbox, nr_planes);
+ return lp_setup_bin_triangle(setup, point, &bbox, nr_planes);
+}
+
+
+static void lp_setup_point( struct lp_setup_context *setup,
+ const float (*v0)[4] )
+{
+ if (!try_setup_point( setup, v0 ))
+ {
+ lp_setup_flush_and_restart(setup);
+
+ if (!try_setup_point( setup, v0 ))
+ assert(0);
+ }
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 0180d95090..5090f82ab5 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -160,44 +160,79 @@ lp_setup_print_triangle(struct lp_setup_context *setup,
}
-lp_rast_cmd lp_rast_tri_tab[9] = {
- NULL, /* should be impossible */
- lp_rast_triangle_1,
- lp_rast_triangle_2,
- lp_rast_triangle_3,
- lp_rast_triangle_4,
- lp_rast_triangle_5,
- lp_rast_triangle_6,
- lp_rast_triangle_7,
- lp_rast_triangle_8
+static unsigned
+lp_rast_tri_tab[9] = {
+ 0, /* should be impossible */
+ LP_RAST_OP_TRIANGLE_1,
+ LP_RAST_OP_TRIANGLE_2,
+ LP_RAST_OP_TRIANGLE_3,
+ LP_RAST_OP_TRIANGLE_4,
+ LP_RAST_OP_TRIANGLE_5,
+ LP_RAST_OP_TRIANGLE_6,
+ LP_RAST_OP_TRIANGLE_7,
+ LP_RAST_OP_TRIANGLE_8
};
+
+
+/**
+ * The primitive covers the whole tile- shade whole tile.
+ *
+ * \param tx, ty the tile position in tiles, not pixels
+ */
+static boolean
+lp_setup_whole_tile(struct lp_setup_context *setup,
+ const struct lp_rast_shader_inputs *inputs,
+ int tx, int ty)
+{
+ struct lp_scene *scene = setup->scene;
+
+ LP_COUNT(nr_fully_covered_64);
+
+ /* if variant is opaque and scissor doesn't effect the tile */
+ if (inputs->opaque) {
+ if (!scene->fb.zsbuf) {
+ /*
+ * All previous rendering will be overwritten so reset the bin.
+ */
+ lp_scene_bin_reset( scene, tx, ty );
+ }
+
+ LP_COUNT(nr_shade_opaque_64);
+ return lp_scene_bin_command( scene, tx, ty,
+ LP_RAST_OP_SHADE_TILE_OPAQUE,
+ lp_rast_arg_inputs(inputs) );
+ } else {
+ LP_COUNT(nr_shade_64);
+ return lp_scene_bin_command( scene, tx, ty,
+ LP_RAST_OP_SHADE_TILE,
+ lp_rast_arg_inputs(inputs) );
+ }
+}
+
+
/**
* Do basic setup for triangle rasterization and determine which
* framebuffer tiles are touched. Put the triangle in the scene's
* bins for the tiles which we overlap.
*/
-static void
+static boolean
do_triangle_ccw(struct lp_setup_context *setup,
const float (*v0)[4],
const float (*v1)[4],
const float (*v2)[4],
boolean frontfacing )
{
- struct lp_scene *scene = lp_setup_get_current_scene(setup);
+ struct lp_scene *scene = setup->scene;
struct lp_rast_triangle *tri;
int x[3];
int y[3];
- float dy01, dy20;
- float dx01, dx20;
- float oneoverarea;
- struct lp_tri_info info;
int area;
struct u_rect bbox;
unsigned tri_bytes;
int i;
int nr_planes = 3;
-
+
if (0)
lp_setup_print_triangle(setup, v0, v1, v2);
@@ -241,13 +276,13 @@ do_triangle_ccw(struct lp_setup_context *setup,
bbox.y1 < bbox.y0) {
if (0) debug_printf("empty bounding box\n");
LP_COUNT(nr_culled_tris);
- return;
+ return TRUE;
}
if (!u_rect_test_intersection(&setup->draw_region, &bbox)) {
if (0) debug_printf("offscreen\n");
LP_COUNT(nr_culled_tris);
- return;
+ return TRUE;
}
u_rect_find_intersection(&setup->draw_region, &bbox);
@@ -257,7 +292,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
nr_planes,
&tri_bytes);
if (!tri)
- return;
+ return FALSE;
#ifdef DEBUG
tri->v[0][0] = v0[0][0];
@@ -288,37 +323,18 @@ do_triangle_ccw(struct lp_setup_context *setup,
if (area <= 0) {
lp_scene_putback_data( scene, tri_bytes );
LP_COUNT(nr_culled_tris);
- return;
+ return TRUE;
}
-
- /*
- */
- dx01 = v0[0][0] - v1[0][0];
- dy01 = v0[0][1] - v1[0][1];
- dx20 = v2[0][0] - v0[0][0];
- dy20 = v2[0][1] - v0[0][1];
- oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01);
-
- info.v0 = v0;
- info.v1 = v1;
- info.v2 = v2;
- info.frontfacing = frontfacing;
- info.x0_center = v0[0][0] - setup->pixel_offset;
- info.y0_center = v0[0][1] - setup->pixel_offset;
- info.dx01_ooa = dx01 * oneoverarea;
- info.dx20_ooa = dx20 * oneoverarea;
- info.dy01_ooa = dy01 * oneoverarea;
- info.dy20_ooa = dy20 * oneoverarea;
-
/* Setup parameter interpolants:
*/
- lp_setup_tri_coef( setup, &tri->inputs, &info );
+ lp_setup_tri_coef( setup, &tri->inputs, v0, v1, v2, frontfacing );
tri->inputs.facing = frontfacing ? 1.0F : -1.0F;
+ tri->inputs.disable = FALSE;
+ tri->inputs.opaque = setup->fs.current.variant->opaque;
tri->inputs.state = setup->fs.stored;
-
for (i = 0; i < 3; i++) {
struct lp_rast_plane *plane = &tri->plane[i];
@@ -420,70 +436,98 @@ do_triangle_ccw(struct lp_setup_context *setup,
tri->plane[6].eo = 0;
}
- lp_setup_bin_triangle( setup, tri, &bbox, nr_planes );
+ return lp_setup_bin_triangle( setup, tri, &bbox, nr_planes );
+}
+
+/*
+ * Round to nearest less or equal power of two of the input.
+ *
+ * Undefined if no bit set exists, so code should check against 0 first.
+ */
+static INLINE uint32_t
+floor_pot(uint32_t n)
+{
+#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
+ if (n == 0)
+ return 0;
+
+ __asm__("bsr %1,%0"
+ : "=r" (n)
+ : "rm" (n));
+ return 1 << n;
+#else
+ n |= (n >> 1);
+ n |= (n >> 2);
+ n |= (n >> 4);
+ n |= (n >> 8);
+ n |= (n >> 16);
+ return n - (n >> 1);
+#endif
}
-void
+boolean
lp_setup_bin_triangle( struct lp_setup_context *setup,
struct lp_rast_triangle *tri,
const struct u_rect *bbox,
int nr_planes )
{
struct lp_scene *scene = setup->scene;
- struct lp_fragment_shader_variant *variant = setup->fs.current.variant;
- int ix0, ix1, iy0, iy1;
int i;
- /*
- * All fields of 'tri' are now set. The remaining code here is
- * concerned with binning.
+ /* What is the largest power-of-two boundary this triangle crosses:
*/
+ int dx = floor_pot((bbox->x0 ^ bbox->x1) |
+ (bbox->y0 ^ bbox->y1));
- /* Convert to tile coordinates, and inclusive ranges:
+ /* The largest dimension of the rasterized area of the triangle
+ * (aligned to a 4x4 grid), rounded down to the nearest power of two:
*/
+ int sz = floor_pot((bbox->x1 - (bbox->x0 & ~3)) |
+ (bbox->y1 - (bbox->y0 & ~3)));
+
if (nr_planes == 3) {
- int ix0 = bbox->x0 / 16;
- int iy0 = bbox->y0 / 16;
- int ix1 = bbox->x1 / 16;
- int iy1 = bbox->y1 / 16;
-
- if (iy0 == iy1 && ix0 == ix1)
+ if (sz < 4 && dx < 64)
{
+ /* Triangle is contained in a single 4x4 stamp:
+ */
+ int mask = (bbox->x0 & 63 & ~3) | ((bbox->y0 & 63 & ~3) << 8);
+
+ return lp_scene_bin_command( scene,
+ bbox->x0/64, bbox->y0/64,
+ LP_RAST_OP_TRIANGLE_3_4,
+ lp_rast_arg_triangle(tri, mask) );
+ }
+
+ if (sz < 16 && dx < 64)
+ {
+ int mask = (bbox->x0 & 63 & ~3) | ((bbox->y0 & 63 & ~3) << 8);
/* Triangle is contained in a single 16x16 block:
*/
- int mask = (ix0 & 3) | ((iy0 & 3) << 4);
-
- lp_scene_bin_command( scene, ix0/4, iy0/4,
- lp_rast_triangle_3_16,
- lp_rast_arg_triangle(tri, mask) );
- return;
+ return lp_scene_bin_command( scene,
+ bbox->x0/64, bbox->y0/64,
+ LP_RAST_OP_TRIANGLE_3_16,
+ lp_rast_arg_triangle(tri, mask) );
}
}
- ix0 = bbox->x0 / TILE_SIZE;
- iy0 = bbox->y0 / TILE_SIZE;
- ix1 = bbox->x1 / TILE_SIZE;
- iy1 = bbox->y1 / TILE_SIZE;
-
- /*
- * Clamp to framebuffer size
- */
- assert(ix0 == MAX2(ix0, 0));
- assert(iy0 == MAX2(iy0, 0));
- assert(ix1 == MIN2(ix1, scene->tiles_x - 1));
- assert(iy1 == MIN2(iy1, scene->tiles_y - 1));
/* Determine which tile(s) intersect the triangle's bounding box
*/
- if (iy0 == iy1 && ix0 == ix1)
+ if (dx < TILE_SIZE)
{
+ int ix0 = bbox->x0 / TILE_SIZE;
+ int iy0 = bbox->y0 / TILE_SIZE;
+
+ assert(iy0 == bbox->y1 / TILE_SIZE &&
+ ix0 == bbox->x1 / TILE_SIZE);
+
/* Triangle is contained in a single tile:
*/
- lp_scene_bin_command( scene, ix0, iy0,
- lp_rast_tri_tab[nr_planes],
- lp_rast_arg_triangle(tri, (1<<nr_planes)-1) );
+ return lp_scene_bin_command( scene, ix0, iy0,
+ lp_rast_tri_tab[nr_planes],
+ lp_rast_arg_triangle(tri, (1<<nr_planes)-1) );
}
else
{
@@ -493,6 +537,11 @@ lp_setup_bin_triangle( struct lp_setup_context *setup,
int xstep[7];
int ystep[7];
int x, y;
+
+ int ix0 = bbox->x0 / TILE_SIZE;
+ int iy0 = bbox->y0 / TILE_SIZE;
+ int ix1 = bbox->x1 / TILE_SIZE;
+ int iy1 = bbox->y1 / TILE_SIZE;
for (i = 0; i < nr_planes; i++) {
c[i] = (tri->plane[i].c +
@@ -544,9 +593,10 @@ lp_setup_bin_triangle( struct lp_setup_context *setup,
*/
int count = util_bitcount(partial);
in = TRUE;
- lp_scene_bin_command( scene, x, y,
- lp_rast_tri_tab[count],
- lp_rast_arg_triangle(tri, partial) );
+ if (!lp_scene_bin_command( scene, x, y,
+ lp_rast_tri_tab[count],
+ lp_rast_arg_triangle(tri, partial) ))
+ goto fail;
LP_COUNT(nr_partially_covered_64);
}
@@ -554,13 +604,8 @@ lp_setup_bin_triangle( struct lp_setup_context *setup,
/* triangle covers the whole tile- shade whole tile */
LP_COUNT(nr_fully_covered_64);
in = TRUE;
- if (variant->opaque &&
- !setup->fb.zsbuf) {
- lp_scene_bin_reset( scene, x, y );
- }
- lp_scene_bin_command( scene, x, y,
- lp_rast_shade_tile,
- lp_rast_arg_inputs(&tri->inputs) );
+ if (!lp_setup_whole_tile(setup, &tri->inputs, x, y))
+ goto fail;
}
/* Iterate cx values across the region:
@@ -575,6 +620,16 @@ lp_setup_bin_triangle( struct lp_setup_context *setup,
c[i] += ystep[i];
}
}
+
+ return TRUE;
+
+fail:
+ /* Need to disable any partially binned triangle. This is easier
+ * than trying to locate all the triangle, shade-tile, etc,
+ * commands which may have been binned.
+ */
+ tri->inputs.disable = TRUE;
+ return FALSE;
}
@@ -586,7 +641,13 @@ static void triangle_cw( struct lp_setup_context *setup,
const float (*v1)[4],
const float (*v2)[4] )
{
- do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface );
+ if (!do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface ))
+ {
+ lp_setup_flush_and_restart(setup);
+
+ if (!do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface ))
+ assert(0);
+ }
}
@@ -598,7 +659,12 @@ static void triangle_ccw( struct lp_setup_context *setup,
const float (*v1)[4],
const float (*v2)[4] )
{
- do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface );
+ if (!do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface ))
+ {
+ lp_setup_flush_and_restart(setup);
+ if (!do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface ))
+ assert(0);
+ }
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
index 51948f5bf2..6308561f24 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
@@ -64,7 +64,7 @@ lp_setup_get_vertex_info(struct vbuf_render *vbr)
/* Vertex size/info depends on the latest state.
* The draw module may have issued additional state-change commands.
*/
- lp_setup_update_state(setup);
+ lp_setup_update_state(setup, FALSE);
return setup->vertex_info;
}
@@ -141,7 +141,7 @@ lp_setup_draw_elements(struct vbuf_render *vbr, const ushort *indices, uint nr)
const boolean flatshade_first = setup->flatshade_first;
unsigned i;
- lp_setup_update_state(setup);
+ lp_setup_update_state(setup, TRUE);
switch (setup->prim) {
case PIPE_PRIM_POINTS:
@@ -338,7 +338,7 @@ lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
const boolean flatshade_first = setup->flatshade_first;
unsigned i;
- lp_setup_update_state(setup);
+ lp_setup_update_state(setup, TRUE);
switch (setup->prim) {
case PIPE_PRIM_POINTS:
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 33c1a49efe..8101e2d843 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -186,6 +186,7 @@ generate_quad_mask(LLVMBuilderRef builder,
LLVMTypeRef i32t = LLVMInt32Type();
LLVMValueRef bits[4];
LLVMValueRef mask;
+ int shift;
/*
* XXX: We'll need a different path for 16 x u8
@@ -197,10 +198,28 @@ generate_quad_mask(LLVMBuilderRef builder,
/*
* mask_input >>= (quad * 4)
*/
+
+ switch (quad) {
+ case 0:
+ shift = 0;
+ break;
+ case 1:
+ shift = 2;
+ break;
+ case 2:
+ shift = 8;
+ break;
+ case 3:
+ shift = 10;
+ break;
+ default:
+ assert(0);
+ shift = 0;
+ }
mask_input = LLVMBuildLShr(builder,
mask_input,
- LLVMConstInt(i32t, quad * 4, 0),
+ LLVMConstInt(i32t, shift, 0),
"");
/*
@@ -211,9 +230,9 @@ generate_quad_mask(LLVMBuilderRef builder,
bits[0] = LLVMConstInt(i32t, 1 << 0, 0);
bits[1] = LLVMConstInt(i32t, 1 << 1, 0);
- bits[2] = LLVMConstInt(i32t, 1 << 2, 0);
- bits[3] = LLVMConstInt(i32t, 1 << 3, 0);
-
+ bits[2] = LLVMConstInt(i32t, 1 << 4, 0);
+ bits[3] = LLVMConstInt(i32t, 1 << 5, 0);
+
mask = LLVMBuildAnd(builder, mask, LLVMConstVector(bits, 4), "");
/*
@@ -332,14 +351,13 @@ generate_fs(struct llvmpipe_context *lp,
lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]);
/* Alpha test */
- /* XXX: should the alpha reference value be passed separately? */
/* XXX: should only test the final assignment to alpha */
- if(cbuf == 0 && chan == 3) {
+ if (cbuf == 0 && chan == 3 && key->alpha.enabled) {
LLVMValueRef alpha = out;
LLVMValueRef alpha_ref_value;
alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr);
alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value);
- lp_build_alpha_test(builder, &key->alpha, type,
+ lp_build_alpha_test(builder, key->alpha.func, type,
&mask, alpha, alpha_ref_value);
}
@@ -728,6 +746,9 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
debug_printf("fs variant %p:\n", (void *) key);
+ for (i = 0; i < key->nr_cbufs; ++i) {
+ debug_printf("cbuf_format[%u] = %s\n", i, util_format_name(key->cbuf_format[i]));
+ }
if (key->depth.enabled) {
debug_printf("depth.format = %s\n", util_format_name(key->zsbuf_format));
debug_printf("depth.func = %s\n", util_dump_func(key->depth.func, TRUE));
@@ -747,7 +768,6 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
if (key->alpha.enabled) {
debug_printf("alpha.func = %s\n", util_dump_func(key->alpha.func, TRUE));
- debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value);
}
if (key->blend.logicop_enable) {
@@ -791,6 +811,16 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
}
+void
+lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant)
+{
+ debug_printf("llvmpipe: Fragment shader #%u variant #%u:\n",
+ variant->shader->no, variant->no);
+ tgsi_dump(variant->shader->base.tokens, 0);
+ dump_fs_variant_key(&variant->key);
+ debug_printf("variant->opaque = %u\n", variant->opaque);
+ debug_printf("\n");
+}
static struct lp_fragment_shader_variant *
generate_variant(struct llvmpipe_context *lp,
@@ -798,6 +828,7 @@ generate_variant(struct llvmpipe_context *lp,
const struct lp_fragment_shader_variant_key *key)
{
struct lp_fragment_shader_variant *variant;
+ boolean fullcolormask;
variant = CALLOC_STRUCT(lp_fragment_shader_variant);
if(!variant)
@@ -810,27 +841,43 @@ generate_variant(struct llvmpipe_context *lp,
memcpy(&variant->key, key, shader->variant_key_size);
- if (gallivm_debug & GALLIVM_DEBUG_IR) {
- debug_printf("llvmpipe: Creating fragment shader #%u variant #%u:\n",
- shader->no, variant->no);
- tgsi_dump(shader->base.tokens, 0);
- dump_fs_variant_key(key);
+ /*
+ * Determine whether we are touching all channels in the color buffer.
+ */
+ fullcolormask = FALSE;
+ if (key->nr_cbufs == 1) {
+ const struct util_format_description *format_desc;
+ format_desc = util_format_description(key->cbuf_format[0]);
+ if ((~key->blend.rt[0].colormask &
+ util_format_colormask(format_desc)) == 0) {
+ fullcolormask = TRUE;
+ }
}
- generate_fragment(lp, shader, variant, RAST_WHOLE);
- generate_fragment(lp, shader, variant, RAST_EDGE_TEST);
-
- /* TODO: most of these can be relaxed, in particular the colormask */
variant->opaque =
!key->blend.logicop_enable &&
!key->blend.rt[0].blend_enable &&
- key->blend.rt[0].colormask == 0xf &&
+ fullcolormask &&
!key->stencil[0].enabled &&
!key->alpha.enabled &&
!key->depth.enabled &&
!shader->info.uses_kill
? TRUE : FALSE;
+
+ if (gallivm_debug & GALLIVM_DEBUG_IR) {
+ lp_debug_fs_variant(variant);
+ }
+
+ generate_fragment(lp, shader, variant, RAST_EDGE_TEST);
+
+ if (variant->opaque) {
+ /* Specialized shader, which doesn't need to read the color buffer. */
+ generate_fragment(lp, shader, variant, RAST_WHOLE);
+ } else {
+ variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST];
+ }
+
return variant;
}
@@ -1056,25 +1103,22 @@ make_variant_key(struct llvmpipe_context *lp,
key->nr_cbufs = lp->framebuffer.nr_cbufs;
for (i = 0; i < lp->framebuffer.nr_cbufs; i++) {
+ enum pipe_format format = lp->framebuffer.cbufs[i]->format;
struct pipe_rt_blend_state *blend_rt = &key->blend.rt[i];
const struct util_format_description *format_desc;
- unsigned chan;
- format_desc = util_format_description(lp->framebuffer.cbufs[i]->format);
+ key->cbuf_format[i] = format;
+
+ format_desc = util_format_description(format);
assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
blend_rt->colormask = lp->blend->rt[i].colormask;
- /* mask out color channels not present in the color buffer.
- * Should be simple to incorporate per-cbuf writemasks:
+ /*
+ * Mask out color channels not present in the color buffer.
*/
- for(chan = 0; chan < 4; ++chan) {
- enum util_format_swizzle swizzle = format_desc->swizzle[chan];
-
- if(swizzle > UTIL_FORMAT_SWIZZLE_W)
- blend_rt->colormask &= ~(1 << chan);
- }
+ blend_rt->colormask &= util_format_colormask(format_desc);
/*
* Our swizzled render tiles always have an alpha channel, but the linear
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h
index 33c480010d..2914e7d7ef 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.h
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h
@@ -49,14 +49,21 @@ struct lp_fragment_shader_variant_key
{
struct pipe_depth_state depth;
struct pipe_stencil_state stencil[2];
- struct pipe_alpha_state alpha;
struct pipe_blend_state blend;
- enum pipe_format zsbuf_format;
+
+ struct {
+ unsigned enabled:1;
+ unsigned func:3;
+ } alpha;
+
unsigned nr_cbufs:8;
unsigned nr_samplers:8; /* actually derivable from just the shader */
unsigned flatshade:1;
unsigned occlusion_count:1;
+ enum pipe_format zsbuf_format;
+ enum pipe_format cbuf_format[PIPE_MAX_COLOR_BUFS];
+
struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS];
};
@@ -101,4 +108,8 @@ struct lp_fragment_shader
};
+void
+lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant);
+
+
#endif /* LP_STATE_FS_H_ */
diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c
index 63ddc669c2..164242eda6 100644
--- a/src/gallium/drivers/llvmpipe/lp_surface.c
+++ b/src/gallium/drivers/llvmpipe/lp_surface.c
@@ -68,16 +68,16 @@ lp_resource_copy(struct pipe_context *pipe,
0, /* flush_flags */
FALSE, /* read_only */
TRUE, /* cpu_access */
- FALSE,
- "blit dst"); /* do_not_block */
+ FALSE, /* do_not_block */
+ "blit dest");
llvmpipe_flush_resource(pipe,
src, subsrc.face, subsrc.level,
0, /* flush_flags */
TRUE, /* read_only */
TRUE, /* cpu_access */
- FALSE,
- "blit src"); /* do_not_block */
+ FALSE, /* do_not_block */
+ "blit src");
/*
printf("surface copy from %u to %u: %u,%u to %u,%u %u x %u\n",
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 5832ea2744..a4b9f2590a 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -585,7 +585,7 @@ llvmpipe_get_transfer(struct pipe_context *pipe,
read_only,
TRUE, /* cpu_access */
do_not_block,
- "transfer dest")) {
+ __FUNCTION__)) {
/*
* It would have blocked, but state tracker requested no to.
*/
diff --git a/src/gallium/drivers/llvmpipe/sse_mathfun.h b/src/gallium/drivers/llvmpipe/sse_mathfun.h
index 8ac2064b7b..0077f34b5c 100644
--- a/src/gallium/drivers/llvmpipe/sse_mathfun.h
+++ b/src/gallium/drivers/llvmpipe/sse_mathfun.h
@@ -94,55 +94,6 @@ v4sf sin_ps(v4sf x);
v4sf cos_ps(v4sf x);
void sincos_ps(v4sf x, v4sf *s, v4sf *c);
-#if defined (__MINGW32__)
-
-/* the ugly part below: many versions of gcc used to be completely buggy with respect to some intrinsics
- The movehl_ps is fixed in mingw 3.4.5, but I found out that all the _mm_cmp* intrinsics were completely
- broken on my mingw gcc 3.4.5 ...
-
- Note that the bug on _mm_cmp* does occur only at -O0 optimization level
-*/
-
-inline __m128 my_movehl_ps(__m128 a, const __m128 b) {
- asm (
- "movhlps %2,%0\n\t"
- : "=x" (a)
- : "0" (a), "x"(b)
- );
- return a; }
-#warning "redefined _mm_movehl_ps (see gcc bug 21179)"
-#define _mm_movehl_ps my_movehl_ps
-
-inline __m128 my_cmplt_ps(__m128 a, const __m128 b) {
- asm (
- "cmpltps %2,%0\n\t"
- : "=x" (a)
- : "0" (a), "x"(b)
- );
- return a;
- }
-inline __m128 my_cmpgt_ps(__m128 a, const __m128 b) {
- asm (
- "cmpnleps %2,%0\n\t"
- : "=x" (a)
- : "0" (a), "x"(b)
- );
- return a;
-}
-inline __m128 my_cmpeq_ps(__m128 a, const __m128 b) {
- asm (
- "cmpeqps %2,%0\n\t"
- : "=x" (a)
- : "0" (a), "x"(b)
- );
- return a;
-}
-#warning "redefined _mm_cmpxx_ps functions..."
-#define _mm_cmplt_ps my_cmplt_ps
-#define _mm_cmpgt_ps my_cmpgt_ps
-#define _mm_cmpeq_ps my_cmpeq_ps
-#endif
-
#ifndef USE_SSE2
typedef union xmm_mm_union {
__m128 xmm;