summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/llvmpipe
diff options
context:
space:
mode:
authorChristoph Bumiller <e0425955@student.tuwien.ac.at>2010-09-02 18:31:49 +0200
committerChristoph Bumiller <e0425955@student.tuwien.ac.at>2010-09-02 18:31:49 +0200
commit222d2f2ac2c7d93cbc0643082c78278ad2c8cfce (patch)
treeb79152c238022b2a901201c22e5809ac520732bf /src/gallium/drivers/llvmpipe
parent443abc80db9e1a288ce770e76cccd43664348098 (diff)
parente73c5501b2fe20290d1b691c85a5d82ac3a0431c (diff)
Merge remote branch 'origin/master' into nv50-compiler
Conflicts: src/gallium/drivers/nv50/nv50_program.c
Diffstat (limited to 'src/gallium/drivers/llvmpipe')
-rw-r--r--src/gallium/drivers/llvmpipe/Makefile2
-rw-r--r--src/gallium/drivers/llvmpipe/SConscript2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.c20
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.h3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_debug.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_draw_arrays.c18
-rw-r--r--src/gallium/drivers/llvmpipe/lp_fence.c93
-rw-r--r--src/gallium/drivers/llvmpipe/lp_fence.h13
-rw-r--r--src/gallium/drivers/llvmpipe/lp_flush.c39
-rw-r--r--src/gallium/drivers/llvmpipe/lp_flush.h13
-rw-r--r--src/gallium/drivers/llvmpipe/lp_perf.c21
-rw-r--r--src/gallium/drivers/llvmpipe/lp_perf.h7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_query.c26
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.c37
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.h8
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri.c223
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h35
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene.c7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c61
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c61
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.h13
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_coef.c258
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_coef.h61
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c207
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_context.h37
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_line.c664
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_point.c292
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_tri.c568
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_derived.c20
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c41
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.h6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_rasterizer.c8
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_vertex.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_surface.c6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_texture.c4
35 files changed, 2274 insertions, 604 deletions
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 2892b62920..dec874623e 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -27,6 +27,8 @@ C_SOURCES = \
lp_scene_queue.c \
lp_screen.c \
lp_setup.c \
+ lp_setup_coef.c \
+ lp_setup_coef_intrin.c \
lp_setup_line.c \
lp_setup_point.c \
lp_setup_tri.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 5583fca38e..8d57db72cf 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -63,6 +63,8 @@ llvmpipe = env.ConvenienceLibrary(
'lp_setup_line.c',
'lp_setup_point.c',
'lp_setup_tri.c',
+ 'lp_setup_coef.c',
+ 'lp_setup_coef_intrin.c',
'lp_setup_vbuf.c',
'lp_state_blend.c',
'lp_state_clip.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 7543bd7b2b..39f2c6085e 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -85,6 +85,14 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
align_free( llvmpipe );
}
+static void
+do_flush( struct pipe_context *pipe,
+ unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ llvmpipe_flush(pipe, flags, fence, __FUNCTION__);
+}
+
struct pipe_context *
llvmpipe_create_context( struct pipe_screen *screen, void *priv )
@@ -109,7 +117,7 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv )
llvmpipe->pipe.destroy = llvmpipe_destroy;
llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state;
llvmpipe->pipe.clear = llvmpipe_clear;
- llvmpipe->pipe.flush = llvmpipe_flush;
+ llvmpipe->pipe.flush = do_flush;
llvmpipe_init_blend_funcs(llvmpipe);
llvmpipe_init_clip_funcs(llvmpipe);
@@ -147,9 +155,13 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv )
draw_install_aapoint_stage(llvmpipe->draw, &llvmpipe->pipe);
draw_install_pstipple_stage(llvmpipe->draw, &llvmpipe->pipe);
- /* convert points and lines into triangles: */
- draw_wide_point_threshold(llvmpipe->draw, 0.0);
- draw_wide_line_threshold(llvmpipe->draw, 0.0);
+ /* convert points and lines into triangles:
+ * (otherwise, draw points and lines natively)
+ */
+ draw_wide_point_sprites(llvmpipe->draw, FALSE);
+ draw_enable_point_sprites(llvmpipe->draw, FALSE);
+ draw_wide_point_threshold(llvmpipe->draw, 10000.0);
+ draw_wide_line_threshold(llvmpipe->draw, 10000.0);
#if USE_DRAW_STAGE_PSTIPPLE
/* Do polygon stipple w/ texture map + frag prog? */
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index 50f9091c3c..34fa20e204 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -101,6 +101,9 @@ struct llvmpipe_context {
/** Vertex format */
struct vertex_info vertex_info;
+
+ /** Which vertex shader output slot contains point size */
+ int psize_slot;
/** Fragment shader input interpolation info */
unsigned num_inputs;
diff --git a/src/gallium/drivers/llvmpipe/lp_debug.h b/src/gallium/drivers/llvmpipe/lp_debug.h
index 92fb2b3ee5..a928ee38be 100644
--- a/src/gallium/drivers/llvmpipe/lp_debug.h
+++ b/src/gallium/drivers/llvmpipe/lp_debug.h
@@ -46,6 +46,8 @@ st_print_current(void);
#define DEBUG_SHOW_TILES 0x200
#define DEBUG_SHOW_SUBTILES 0x400
#define DEBUG_COUNTERS 0x800
+#define DEBUG_SCENE 0x1000
+#define DEBUG_FENCE 0x2000
#ifdef DEBUG
diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
index e73b431cb4..3af5c8d5c5 100644
--- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
+++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
@@ -68,25 +68,17 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
}
/* Map index buffer, if present */
- if (info->indexed && lp->index_buffer.buffer) {
- char *indices = (char *) llvmpipe_resource_data(lp->index_buffer.buffer);
- mapped_indices = (void *) (indices + lp->index_buffer.offset);
- }
+ if (info->indexed && lp->index_buffer.buffer)
+ mapped_indices = llvmpipe_resource_data(lp->index_buffer.buffer);
- draw_set_mapped_element_buffer_range(draw, (mapped_indices) ?
- lp->index_buffer.index_size : 0,
- info->index_bias,
- info->min_index,
- info->max_index,
- mapped_indices);
+ draw_set_mapped_index_buffer(draw, mapped_indices);
llvmpipe_prepare_vertex_sampling(lp,
lp->num_vertex_sampler_views,
lp->vertex_sampler_views);
/* draw! */
- draw_arrays_instanced(draw, info->mode, info->start, info->count,
- info->start_instance, info->instance_count);
+ draw_vbo(draw, info);
/*
* unmap vertex/index buffers
@@ -95,7 +87,7 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
draw_set_mapped_vertex_buffer(draw, i, NULL);
}
if (mapped_indices) {
- draw_set_mapped_element_buffer(draw, 0, 0, NULL);
+ draw_set_mapped_index_buffer(draw, NULL);
}
llvmpipe_cleanup_vertex_sampling(lp);
diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c b/src/gallium/drivers/llvmpipe/lp_fence.c
index f9805e5d68..3a55e76bc3 100644
--- a/src/gallium/drivers/llvmpipe/lp_fence.c
+++ b/src/gallium/drivers/llvmpipe/lp_fence.c
@@ -44,6 +44,7 @@
struct lp_fence *
lp_fence_create(unsigned rank)
{
+ static int fence_id;
struct lp_fence *fence = CALLOC_STRUCT(lp_fence);
pipe_reference_init(&fence->reference, 1);
@@ -51,8 +52,12 @@ lp_fence_create(unsigned rank)
pipe_mutex_init(fence->mutex);
pipe_condvar_init(fence->signalled);
+ fence->id = fence_id++;
fence->rank = rank;
+ if (LP_DEBUG & DEBUG_FENCE)
+ debug_printf("%s %d\n", __FUNCTION__, fence->id);
+
return fence;
}
@@ -61,6 +66,9 @@ lp_fence_create(unsigned rank)
void
lp_fence_destroy(struct lp_fence *fence)
{
+ if (LP_DEBUG & DEBUG_FENCE)
+ debug_printf("%s %d\n", __FUNCTION__, fence->id);
+
pipe_mutex_destroy(fence->mutex);
pipe_condvar_destroy(fence->signalled);
FREE(fence);
@@ -68,82 +76,49 @@ lp_fence_destroy(struct lp_fence *fence)
/**
- * For reference counting.
- * This is a Gallium API function.
- */
-static void
-llvmpipe_fence_reference(struct pipe_screen *screen,
- struct pipe_fence_handle **ptr,
- struct pipe_fence_handle *fence)
-{
- struct lp_fence **old = (struct lp_fence **) ptr;
- struct lp_fence *f = (struct lp_fence *) fence;
-
- lp_fence_reference(old, f);
-}
-
-
-/**
- * Has the fence been executed/finished?
- * This is a Gallium API function.
- */
-static int
-llvmpipe_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *fence,
- unsigned flag)
-{
- struct lp_fence *f = (struct lp_fence *) fence;
-
- return f->count == f->rank;
-}
-
-
-/**
- * Wait for the fence to finish.
- * This is a Gallium API function.
- */
-static int
-llvmpipe_fence_finish(struct pipe_screen *screen,
- struct pipe_fence_handle *fence_handle,
- unsigned flag)
-{
- struct lp_fence *fence = (struct lp_fence *) fence_handle;
-
- pipe_mutex_lock(fence->mutex);
- while (fence->count < fence->rank) {
- pipe_condvar_wait(fence->signalled, fence->mutex);
- }
- pipe_mutex_unlock(fence->mutex);
-
- return 0;
-}
-
-
-/**
* Called by the rendering threads to increment the fence counter.
* When the counter == the rank, the fence is finished.
*/
void
lp_fence_signal(struct lp_fence *fence)
{
+ if (LP_DEBUG & DEBUG_FENCE)
+ debug_printf("%s %d\n", __FUNCTION__, fence->id);
+
pipe_mutex_lock(fence->mutex);
fence->count++;
assert(fence->count <= fence->rank);
- LP_DBG(DEBUG_RAST, "%s count=%u rank=%u\n", __FUNCTION__,
- fence->count, fence->rank);
+ if (LP_DEBUG & DEBUG_FENCE)
+ debug_printf("%s count=%u rank=%u\n", __FUNCTION__,
+ fence->count, fence->rank);
- pipe_condvar_signal(fence->signalled);
+ /* Wakeup all threads waiting on the mutex:
+ */
+ pipe_condvar_broadcast(fence->signalled);
pipe_mutex_unlock(fence->mutex);
}
+boolean
+lp_fence_signalled(struct lp_fence *f)
+{
+ return f->count == f->rank;
+}
void
-llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen)
+lp_fence_wait(struct lp_fence *f)
{
- screen->fence_reference = llvmpipe_fence_reference;
- screen->fence_signalled = llvmpipe_fence_signalled;
- screen->fence_finish = llvmpipe_fence_finish;
+ if (LP_DEBUG & DEBUG_FENCE)
+ debug_printf("%s %d\n", __FUNCTION__, f->id);
+
+ pipe_mutex_lock(f->mutex);
+ assert(f->issued);
+ while (f->count < f->rank) {
+ pipe_condvar_wait(f->signalled, f->mutex);
+ }
+ pipe_mutex_unlock(f->mutex);
}
+
+
diff --git a/src/gallium/drivers/llvmpipe/lp_fence.h b/src/gallium/drivers/llvmpipe/lp_fence.h
index 13358fb99f..3c59118780 100644
--- a/src/gallium/drivers/llvmpipe/lp_fence.h
+++ b/src/gallium/drivers/llvmpipe/lp_fence.h
@@ -41,10 +41,12 @@ struct pipe_screen;
struct lp_fence
{
struct pipe_reference reference;
+ unsigned id;
pipe_mutex mutex;
pipe_condvar signalled;
+ boolean issued;
unsigned rank;
unsigned count;
};
@@ -57,6 +59,11 @@ lp_fence_create(unsigned rank);
void
lp_fence_signal(struct lp_fence *fence);
+boolean
+lp_fence_signalled(struct lp_fence *fence);
+
+void
+lp_fence_wait(struct lp_fence *fence);
void
llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen);
@@ -78,5 +85,11 @@ lp_fence_reference(struct lp_fence **ptr,
*ptr = f;
}
+static INLINE boolean
+lp_fence_issued(const struct lp_fence *fence)
+{
+ return fence->issued;
+}
+
#endif /* LP_FENCE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c
index 845292f4ab..e2c723b7a8 100644
--- a/src/gallium/drivers/llvmpipe/lp_flush.c
+++ b/src/gallium/drivers/llvmpipe/lp_flush.c
@@ -31,6 +31,7 @@
#include "pipe/p_defines.h"
+#include "pipe/p_screen.h"
#include "util/u_string.h"
#include "draw/draw_context.h"
#include "lp_flush.h"
@@ -45,14 +46,15 @@
void
llvmpipe_flush( struct pipe_context *pipe,
unsigned flags,
- struct pipe_fence_handle **fence )
+ struct pipe_fence_handle **fence,
+ const char *reason)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
draw_flush(llvmpipe->draw);
/* ask the setup module to flush */
- lp_setup_flush(llvmpipe->setup, flags, fence);
+ lp_setup_flush(llvmpipe->setup, flags, fence, reason);
/* Enable to dump BMPs of the color/depth buffers each frame */
if (0) {
@@ -76,6 +78,17 @@ llvmpipe_flush( struct pipe_context *pipe,
}
}
+void
+llvmpipe_finish( struct pipe_context *pipe,
+ const char *reason )
+{
+ struct pipe_fence_handle *fence = NULL;
+ llvmpipe_flush(pipe, 0, &fence, reason);
+ if (fence) {
+ pipe->screen->fence_finish(pipe->screen, fence, 0);
+ pipe->screen->fence_reference(pipe->screen, &fence, NULL);
+ }
+}
/**
* Flush context if necessary.
@@ -93,7 +106,8 @@ llvmpipe_flush_resource(struct pipe_context *pipe,
unsigned flush_flags,
boolean read_only,
boolean cpu_access,
- boolean do_not_block)
+ boolean do_not_block,
+ const char *reason)
{
unsigned referenced;
@@ -106,31 +120,16 @@ llvmpipe_flush_resource(struct pipe_context *pipe,
/*
* Flush and wait.
*/
-
- struct pipe_fence_handle *fence = NULL;
-
if (do_not_block)
return FALSE;
- /*
- * Do the unswizzling in parallel.
- *
- * XXX: Don't abuse the PIPE_FLUSH_FRAME flag for this.
- */
- flush_flags |= PIPE_FLUSH_FRAME;
-
- llvmpipe_flush(pipe, flush_flags, &fence);
-
- if (fence) {
- pipe->screen->fence_finish(pipe->screen, fence, 0);
- pipe->screen->fence_reference(pipe->screen, &fence, NULL);
- }
+ llvmpipe_finish(pipe, reason);
} else {
/*
* Just flush.
*/
- llvmpipe_flush(pipe, flush_flags, NULL);
+ llvmpipe_flush(pipe, flush_flags, NULL, reason);
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_flush.h b/src/gallium/drivers/llvmpipe/lp_flush.h
index 7b605681a9..bb538b2bd8 100644
--- a/src/gallium/drivers/llvmpipe/lp_flush.h
+++ b/src/gallium/drivers/llvmpipe/lp_flush.h
@@ -34,8 +34,14 @@ struct pipe_context;
struct pipe_fence_handle;
void
-llvmpipe_flush(struct pipe_context *pipe, unsigned flags,
- struct pipe_fence_handle **fence);
+llvmpipe_flush(struct pipe_context *pipe,
+ unsigned flags,
+ struct pipe_fence_handle **fence,
+ const char *reason);
+
+void
+llvmpipe_finish( struct pipe_context *pipe,
+ const char *reason );
boolean
llvmpipe_flush_resource(struct pipe_context *pipe,
@@ -45,6 +51,7 @@ llvmpipe_flush_resource(struct pipe_context *pipe,
unsigned flush_flags,
boolean read_only,
boolean cpu_access,
- boolean do_not_block);
+ boolean do_not_block,
+ const char *reason);
#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_perf.c b/src/gallium/drivers/llvmpipe/lp_perf.c
index 083e7e30a5..e22532f25c 100644
--- a/src/gallium/drivers/llvmpipe/lp_perf.c
+++ b/src/gallium/drivers/llvmpipe/lp_perf.c
@@ -46,7 +46,7 @@ lp_print_counters(void)
{
if (LP_DEBUG & DEBUG_COUNTERS) {
unsigned total_64, total_16, total_4;
- float p1, p2, p3, p4;
+ float p1, p2, p3, p5, p6;
debug_printf("llvmpipe: nr_triangles: %9u\n", lp_count.nr_tris);
debug_printf("llvmpipe: nr_culled_triangles: %9u\n", lp_count.nr_culled_tris);
@@ -58,11 +58,15 @@ lp_print_counters(void)
p1 = 100.0 * (float) lp_count.nr_empty_64 / (float) total_64;
p2 = 100.0 * (float) lp_count.nr_fully_covered_64 / (float) total_64;
p3 = 100.0 * (float) lp_count.nr_partially_covered_64 / (float) total_64;
- p4 = 100.0 * (float) lp_count.nr_shade_opaque_64 / (float) total_64;
+ p5 = 100.0 * (float) lp_count.nr_shade_opaque_64 / (float) total_64;
+ p6 = 100.0 * (float) lp_count.nr_shade_64 / (float) total_64;
debug_printf("llvmpipe: nr_64x64: %9u\n", total_64);
debug_printf("llvmpipe: nr_fully_covered_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_64, p2, total_64);
- debug_printf("llvmpipe: nr_shade_opaque_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_shade_opaque_64, p4, total_64);
+ debug_printf("llvmpipe: nr_shade_opaque_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_shade_opaque_64, p5, total_64);
+ debug_printf("llvmpipe: nr_pure_shade_opaque: %9u (%3.0f%% of %u)\n", lp_count.nr_pure_shade_opaque_64, 0.0, lp_count.nr_shade_opaque_64);
+ debug_printf("llvmpipe: nr_shade_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_shade_64, p6, total_64);
+ debug_printf("llvmpipe: nr_pure_shade: %9u (%3.0f%% of %u)\n", lp_count.nr_pure_shade_64, 0.0, lp_count.nr_shade_64);
debug_printf("llvmpipe: nr_partially_covered_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_64, p3, total_64);
debug_printf("llvmpipe: nr_empty_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_64, p1, total_64);
@@ -79,12 +83,17 @@ lp_print_counters(void)
debug_printf("llvmpipe: nr_partially_covered_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_16, p3, total_16);
debug_printf("llvmpipe: nr_empty_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_16, p1, total_16);
- total_4 = (lp_count.nr_empty_4 + lp_count.nr_non_empty_4);
+ total_4 = (lp_count.nr_empty_4 +
+ lp_count.nr_fully_covered_4 +
+ lp_count.nr_partially_covered_4);
p1 = 100.0 * (float) lp_count.nr_empty_4 / (float) total_4;
- p2 = 100.0 * (float) lp_count.nr_non_empty_4 / (float) total_4;
+ p2 = 100.0 * (float) lp_count.nr_fully_covered_4 / (float) total_4;
+ p3 = 100.0 * (float) lp_count.nr_partially_covered_4 / (float) total_4;
- debug_printf("llvmpipe: nr_4x4: %9u\n", total_4);
+ debug_printf("llvmpipe: nr_tri_4x4: %9u\n", total_4);
+ debug_printf("llvmpipe: nr_fully_covered_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_4, p2, total_4);
+ debug_printf("llvmpipe: nr_partially_covered_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_4, p3, total_4);
debug_printf("llvmpipe: nr_empty_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_4, p1, total_4);
debug_printf("llvmpipe: nr_non_empty_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_non_empty_4, p2, total_4);
diff --git a/src/gallium/drivers/llvmpipe/lp_perf.h b/src/gallium/drivers/llvmpipe/lp_perf.h
index 4774f64550..c28652fc30 100644
--- a/src/gallium/drivers/llvmpipe/lp_perf.h
+++ b/src/gallium/drivers/llvmpipe/lp_perf.h
@@ -44,11 +44,16 @@ struct lp_counters
unsigned nr_empty_64;
unsigned nr_fully_covered_64;
unsigned nr_partially_covered_64;
+ unsigned nr_pure_shade_opaque_64;
+ unsigned nr_pure_shade_64;
+ unsigned nr_shade_64;
unsigned nr_shade_opaque_64;
unsigned nr_empty_16;
unsigned nr_fully_covered_16;
unsigned nr_partially_covered_16;
unsigned nr_empty_4;
+ unsigned nr_fully_covered_4;
+ unsigned nr_partially_covered_4;
unsigned nr_non_empty_4;
unsigned nr_llvm_compiles;
int64_t llvm_compile_time; /**< total, in microseconds */
@@ -66,9 +71,11 @@ extern struct lp_counters lp_count;
#ifdef DEBUG
#define LP_COUNT(counter) lp_count.counter++
#define LP_COUNT_ADD(counter, incr) lp_count.counter += (incr)
+#define LP_COUNT_GET(counter) (lp_count.counter)
#else
#define LP_COUNT(counter)
#define LP_COUNT_ADD(counter, incr) (void) incr
+#define LP_COUNT_GET(counter) 0
#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c
index 02eeaf6487..67fd797af2 100644
--- a/src/gallium/drivers/llvmpipe/lp_query.c
+++ b/src/gallium/drivers/llvmpipe/lp_query.c
@@ -35,9 +35,8 @@
#include "util/u_memory.h"
#include "lp_context.h"
#include "lp_flush.h"
+#include "lp_fence.h"
#include "lp_query.h"
-#include "lp_rast.h"
-#include "lp_rast_priv.h"
#include "lp_state.h"
@@ -69,12 +68,7 @@ llvmpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
struct llvmpipe_query *pq = llvmpipe_query(q);
/* query might still be in process if we never waited for the result */
if (!pq->done) {
- struct pipe_fence_handle *fence = NULL;
- llvmpipe_flush(pipe, 0, &fence);
- if (fence) {
- pipe->screen->fence_finish(pipe->screen, fence, 0);
- pipe->screen->fence_reference(pipe->screen, &fence, NULL);
- }
+ llvmpipe_finish(pipe, __FUNCTION__);
}
pipe_mutex_destroy(pq->mutex);
@@ -93,16 +87,11 @@ llvmpipe_get_query_result(struct pipe_context *pipe,
if (!pq->done) {
if (wait) {
- struct pipe_fence_handle *fence = NULL;
- llvmpipe_flush(pipe, 0, &fence);
- if (fence) {
- pipe->screen->fence_finish(pipe->screen, fence, 0);
- pipe->screen->fence_reference(pipe->screen, &fence, NULL);
- }
+ llvmpipe_finish(pipe, __FUNCTION__);
}
/* this is a bit inconsequent but should be ok */
else {
- llvmpipe_flush(pipe, 0, NULL);
+ llvmpipe_flush(pipe, 0, NULL, __FUNCTION__);
}
}
@@ -125,12 +114,7 @@ llvmpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q)
* frame of rendering.
*/
if (pq->binned) {
- struct pipe_fence_handle *fence;
- llvmpipe_flush(pipe, 0, &fence);
- if (fence) {
- pipe->screen->fence_finish(pipe->screen, fence, 0);
- pipe->screen->fence_reference(pipe->screen, &fence, NULL);
- }
+ llvmpipe_finish(pipe, __FUNCTION__);
}
lp_setup_begin_query(llvmpipe->setup, pq);
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index 3215d0f652..b1c306bbe9 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -316,43 +316,6 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
}
-/**
- * Load tile color from the framebuffer surface.
- * This is a bin command called during bin processing.
- */
-#if 0
-void
-lp_rast_load_color(struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
-{
- struct lp_rasterizer *rast = task->rast;
- unsigned buf;
- enum lp_texture_usage usage;
-
- LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y);
-
- if (scene->has_color_clear)
- usage = LP_TEX_USAGE_WRITE_ALL;
- else
- usage = LP_TEX_USAGE_READ_WRITE;
-
- /* Get pointers to color tile(s).
- * This will convert linear data to tiled if needed.
- */
- for (buf = 0; buf < rast->state.nr_cbufs; buf++) {
- struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf];
- struct llvmpipe_texture *lpt;
- assert(cbuf);
- lpt = llvmpipe_texture(cbuf->texture);
- task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt,
- cbuf->face + cbuf->zslice,
- cbuf->level,
- usage,
- task->x, task->y);
- assert(task->color_tiles[buf]);
- }
-}
-#endif
/**
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index 44319a0ad6..b4564ef33b 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -120,7 +120,7 @@ struct lp_rast_triangle {
float v[3][2];
#endif
- struct lp_rast_plane plane[7]; /* NOTE: may allocate fewer planes */
+ struct lp_rast_plane plane[8]; /* NOTE: may allocate fewer planes */
};
@@ -236,6 +236,8 @@ void lp_rast_triangle_6( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
void lp_rast_triangle_7( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
+void lp_rast_triangle_8( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
void lp_rast_shade_tile( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
@@ -256,5 +258,9 @@ void lp_rast_begin_query(struct lp_rasterizer_task *,
void lp_rast_end_query(struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
+void
+lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg);
+
#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index 980c18c024..dbaa8e023a 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -67,7 +67,7 @@ block_full_16(struct lp_rasterizer_task *task,
block_full_4(task, tri, x + ix, y + iy);
}
-
+#if !defined(PIPE_ARCH_SSE)
static INLINE unsigned
build_mask(int c, int dcdx, int dcdy)
{
@@ -98,6 +98,7 @@ build_mask(int c, int dcdx, int dcdy)
return mask;
}
+
static INLINE unsigned
build_mask_linear(int c, int dcdx, int dcdy)
{
@@ -129,6 +130,137 @@ build_mask_linear(int c, int dcdx, int dcdy)
}
+static INLINE void
+build_masks(int c,
+ int cdiff,
+ int dcdx,
+ int dcdy,
+ unsigned *outmask,
+ unsigned *partmask)
+{
+ *outmask |= build_mask_linear(c, dcdx, dcdy);
+ *partmask |= build_mask_linear(c + cdiff, dcdx, dcdy);
+}
+
+#else
+#include <emmintrin.h>
+#include "util/u_sse.h"
+
+
+static INLINE void
+build_masks(int c,
+ int cdiff,
+ int dcdx,
+ int dcdy,
+ unsigned *outmask,
+ unsigned *partmask)
+{
+ __m128i cstep0 = _mm_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3);
+ __m128i xdcdy = _mm_set1_epi32(dcdy);
+
+ /* Get values across the quad
+ */
+ __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy);
+ __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy);
+ __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy);
+
+ {
+ __m128i cstep01, cstep23, result;
+
+ cstep01 = _mm_packs_epi32(cstep0, cstep1);
+ cstep23 = _mm_packs_epi32(cstep2, cstep3);
+ result = _mm_packs_epi16(cstep01, cstep23);
+
+ *outmask |= _mm_movemask_epi8(result);
+ }
+
+
+ {
+ __m128i cio4 = _mm_set1_epi32(cdiff);
+ __m128i cstep01, cstep23, result;
+
+ cstep0 = _mm_add_epi32(cstep0, cio4);
+ cstep1 = _mm_add_epi32(cstep1, cio4);
+ cstep2 = _mm_add_epi32(cstep2, cio4);
+ cstep3 = _mm_add_epi32(cstep3, cio4);
+
+ cstep01 = _mm_packs_epi32(cstep0, cstep1);
+ cstep23 = _mm_packs_epi32(cstep2, cstep3);
+ result = _mm_packs_epi16(cstep01, cstep23);
+
+ *partmask |= _mm_movemask_epi8(result);
+ }
+}
+
+
+static INLINE unsigned
+build_mask_linear(int c, int dcdx, int dcdy)
+{
+ __m128i cstep0 = _mm_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3);
+ __m128i xdcdy = _mm_set1_epi32(dcdy);
+
+ /* Get values across the quad
+ */
+ __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy);
+ __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy);
+ __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy);
+
+ /* pack pairs of results into epi16
+ */
+ __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1);
+ __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3);
+
+ /* pack into epi8, preserving sign bits
+ */
+ __m128i result = _mm_packs_epi16(cstep01, cstep23);
+
+ /* extract sign bits to create mask
+ */
+ return _mm_movemask_epi8(result);
+}
+
+static INLINE unsigned
+build_mask(int c, int dcdx, int dcdy)
+{
+ __m128i step = _mm_setr_epi32(0, dcdx, dcdy, dcdx + dcdy);
+ __m128i c0 = _mm_set1_epi32(c);
+
+ /* Get values across the quad
+ */
+ __m128i cstep0 = _mm_add_epi32(c0, step);
+
+ /* Scale up step for moving between quads.
+ */
+ __m128i step4 = _mm_add_epi32(step, step);
+
+ /* Get values for the remaining quads:
+ */
+ __m128i cstep1 = _mm_add_epi32(cstep0,
+ _mm_shuffle_epi32(step4, _MM_SHUFFLE(1,1,1,1)));
+ __m128i cstep2 = _mm_add_epi32(cstep0,
+ _mm_shuffle_epi32(step4, _MM_SHUFFLE(2,2,2,2)));
+ __m128i cstep3 = _mm_add_epi32(cstep2,
+ _mm_shuffle_epi32(step4, _MM_SHUFFLE(1,1,1,1)));
+
+ /* pack pairs of results into epi16
+ */
+ __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1);
+ __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3);
+
+ /* pack into epi8, preserving sign bits
+ */
+ __m128i result = _mm_packs_epi16(cstep01, cstep23);
+
+ /* extract sign bits to create mask
+ */
+ return _mm_movemask_epi8(result);
+}
+
+#endif
+
+
+
+
#define TAG(x) x##_1
#define NR_PLANES 1
#include "lp_rast_tri_tmp.h"
@@ -157,3 +289,92 @@ build_mask_linear(int c, int dcdx, int dcdy)
#define NR_PLANES 7
#include "lp_rast_tri_tmp.h"
+#define TAG(x) x##_8
+#define NR_PLANES 8
+#include "lp_rast_tri_tmp.h"
+
+
+/* Special case for 3 plane triangle which is contained entirely
+ * within a 16x16 block.
+ */
+void
+lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ const struct lp_rast_triangle *tri = arg.triangle.tri;
+ const struct lp_rast_plane *plane = tri->plane;
+ unsigned mask = arg.triangle.plane_mask;
+ const int x = task->x + (mask & 0xf) * 16;
+ const int y = task->y + (mask >> 4) * 16;
+ unsigned outmask, inmask, partmask, partial_mask;
+ unsigned j;
+ int c[3];
+
+ outmask = 0; /* outside one or more trivial reject planes */
+ partmask = 0; /* outside one or more trivial accept planes */
+
+ for (j = 0; j < 3; j++) {
+ c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
+
+ {
+ const int dcdx = -plane[j].dcdx * 4;
+ const int dcdy = plane[j].dcdy * 4;
+ const int cox = plane[j].eo * 4;
+ const int cio = plane[j].ei * 4 - 1;
+
+ build_masks(c[j] + cox,
+ cio - cox,
+ dcdx, dcdy,
+ &outmask, /* sign bits from c[i][0..15] + cox */
+ &partmask); /* sign bits from c[i][0..15] + cio */
+ }
+ }
+
+ if (outmask == 0xffff)
+ return;
+
+ /* Mask of sub-blocks which are inside all trivial accept planes:
+ */
+ inmask = ~partmask & 0xffff;
+
+ /* Mask of sub-blocks which are inside all trivial reject planes,
+ * but outside at least one trivial accept plane:
+ */
+ partial_mask = partmask & ~outmask;
+
+ assert((partial_mask & inmask) == 0);
+
+ /* Iterate over partials:
+ */
+ while (partial_mask) {
+ int i = ffs(partial_mask) - 1;
+ int ix = (i & 3) * 4;
+ int iy = (i >> 2) * 4;
+ int px = x + ix;
+ int py = y + iy;
+ int cx[3];
+
+ partial_mask &= ~(1 << i);
+
+ for (j = 0; j < 3; j++)
+ cx[j] = (c[j]
+ - plane[j].dcdx * ix
+ + plane[j].dcdy * iy);
+
+ do_block_4_3(task, tri, plane, px, py, cx);
+ }
+
+ /* Iterate over fulls:
+ */
+ while (inmask) {
+ int i = ffs(inmask) - 1;
+ int ix = (i & 3) * 4;
+ int iy = (i >> 2) * 4;
+ int px = x + ix;
+ int py = y + iy;
+
+ inmask &= ~(1 << i);
+
+ block_full_4(task, tri, px, py);
+ }
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
index 43f72d8ca8..99a0bae45d 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
@@ -32,7 +32,7 @@
/**
- * Prototype for a 7 plane rasterizer function. Will codegenerate
+ * Prototype for a 8 plane rasterizer function. Will codegenerate
* several of these.
*
* XXX: Varients for more/fewer planes.
@@ -81,11 +81,14 @@ TAG(do_block_16)(struct lp_rasterizer_task *task,
for (j = 0; j < NR_PLANES; j++) {
const int dcdx = -plane[j].dcdx * 4;
const int dcdy = plane[j].dcdy * 4;
- const int cox = c[j] + plane[j].eo * 4;
- const int cio = c[j] + plane[j].ei * 4 - 1;
-
- outmask |= build_mask_linear(cox, dcdx, dcdy);
- partmask |= build_mask_linear(cio, dcdx, dcdy);
+ const int cox = plane[j].eo * 4;
+ const int cio = plane[j].ei * 4 - 1;
+
+ build_masks(c[j] + cox,
+ cio - cox,
+ dcdx, dcdy,
+ &outmask, /* sign bits from c[i][0..15] + cox */
+ &partmask); /* sign bits from c[i][0..15] + cio */
}
if (outmask == 0xffff)
@@ -102,6 +105,8 @@ TAG(do_block_16)(struct lp_rasterizer_task *task,
assert((partial_mask & inmask) == 0);
+ LP_COUNT_ADD(nr_empty_4, util_bitcount(0xffff & ~(partial_mask | inmask)));
+
/* Iterate over partials:
*/
while (partial_mask) {
@@ -114,6 +119,8 @@ TAG(do_block_16)(struct lp_rasterizer_task *task,
partial_mask &= ~(1 << i);
+ LP_COUNT(nr_partially_covered_4);
+
for (j = 0; j < NR_PLANES; j++)
cx[j] = (c[j]
- plane[j].dcdx * ix
@@ -133,6 +140,7 @@ TAG(do_block_16)(struct lp_rasterizer_task *task,
inmask &= ~(1 << i);
+ LP_COUNT(nr_fully_covered_4);
block_full_4(task, tri, px, py);
}
}
@@ -166,11 +174,14 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
{
const int dcdx = -plane[j].dcdx * 16;
const int dcdy = plane[j].dcdy * 16;
- const int cox = c[j] + plane[j].eo * 16;
- const int cio = c[j] + plane[j].ei * 16 - 1;
-
- outmask |= build_mask_linear(cox, dcdx, dcdy);
- partmask |= build_mask_linear(cio, dcdx, dcdy);
+ const int cox = plane[j].eo * 16;
+ const int cio = plane[j].ei * 16 - 1;
+
+ build_masks(c[j] + cox,
+ cio - cox,
+ dcdx, dcdy,
+ &outmask, /* sign bits from c[i][0..15] + cox */
+ &partmask); /* sign bits from c[i][0..15] + cio */
}
j++;
@@ -190,6 +201,8 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
assert((partial_mask & inmask) == 0);
+ LP_COUNT_ADD(nr_empty_16, util_bitcount(0xffff & ~(partial_mask | inmask)));
+
/* Iterate over partials:
*/
while (partial_mask) {
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c
index f88a759fe7..15a09b7100 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.c
+++ b/src/gallium/drivers/llvmpipe/lp_scene.c
@@ -163,12 +163,15 @@ lp_scene_reset(struct lp_scene *scene )
/* Free all but last binner command lists:
*/
- for (i = 0; i < TILES_X; i++) {
- for (j = 0; j < TILES_Y; j++) {
+ for (i = 0; i < scene->tiles_x; i++) {
+ for (j = 0; j < scene->tiles_y; j++) {
lp_scene_bin_reset(scene, i, j);
}
}
+ /* If there are any bins which weren't cleared by the loop above,
+ * they will be caught (on debug builds at least) by this assert:
+ */
assert(lp_scene_is_empty(scene));
/* Free all but last binned data block:
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 167cb2ee2e..1e65a91fc6 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -61,6 +61,8 @@ static const struct debug_named_value lp_debug_flags[] = {
{ "show_tiles", DEBUG_SHOW_TILES, NULL },
{ "show_subtiles", DEBUG_SHOW_SUBTILES, NULL },
{ "counters", DEBUG_COUNTERS, NULL },
+ { "scene", DEBUG_SCENE, NULL },
+ { "fence", DEBUG_FENCE, NULL },
DEBUG_NAMED_VALUE_END
};
#endif
@@ -87,7 +89,14 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
return PIPE_MAX_SAMPLERS;
case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
- return PIPE_MAX_VERTEX_SAMPLERS;
+ /* At this time, the draw module and llvmpipe driver only
+ * support vertex shader texture lookups when LLVM is enabled in
+ * the draw module.
+ */
+ if (debug_get_bool_option("DRAW_USE_LLVM", TRUE))
+ return PIPE_MAX_VERTEX_SAMPLERS;
+ else
+ return 0;
case PIPE_CAP_MAX_COMBINED_SAMPLERS:
return PIPE_MAX_SAMPLERS + PIPE_MAX_VERTEX_SAMPLERS;
case PIPE_CAP_NPOT_TEXTURES:
@@ -230,6 +239,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
assert(target == PIPE_BUFFER ||
target == PIPE_TEXTURE_1D ||
target == PIPE_TEXTURE_2D ||
+ target == PIPE_TEXTURE_RECT ||
target == PIPE_TEXTURE_3D ||
target == PIPE_TEXTURE_CUBE);
@@ -314,6 +324,51 @@ llvmpipe_destroy_screen( struct pipe_screen *_screen )
+
+/**
+ * Fence reference counting.
+ */
+static void
+llvmpipe_fence_reference(struct pipe_screen *screen,
+ struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *fence)
+{
+ struct lp_fence **old = (struct lp_fence **) ptr;
+ struct lp_fence *f = (struct lp_fence *) fence;
+
+ lp_fence_reference(old, f);
+}
+
+
+/**
+ * Has the fence been executed/finished?
+ */
+static int
+llvmpipe_fence_signalled(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence,
+ unsigned flag)
+{
+ struct lp_fence *f = (struct lp_fence *) fence;
+ return lp_fence_signalled(f);
+}
+
+
+/**
+ * Wait for the fence to finish.
+ */
+static int
+llvmpipe_fence_finish(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence_handle,
+ unsigned flag)
+{
+ struct lp_fence *f = (struct lp_fence *) fence_handle;
+
+ lp_fence_wait(f);
+ return 0;
+}
+
+
+
/**
* Create a new pipe_screen object
* Note: we're not presently subclassing pipe_screen (no llvmpipe_screen).
@@ -351,9 +406,11 @@ llvmpipe_create_screen(struct sw_winsys *winsys)
screen->base.context_create = llvmpipe_create_context;
screen->base.flush_frontbuffer = llvmpipe_flush_frontbuffer;
+ screen->base.fence_reference = llvmpipe_fence_reference;
+ screen->base.fence_signalled = llvmpipe_fence_signalled;
+ screen->base.fence_finish = llvmpipe_fence_finish;
llvmpipe_init_screen_resource_funcs(&screen->base);
- llvmpipe_init_screen_fence_funcs(&screen->base);
lp_jit_screen_init(screen);
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index 556e571585..3da9097154 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -275,9 +275,10 @@ set_scene_state( struct lp_setup_context *setup,
void
lp_setup_flush( struct lp_setup_context *setup,
unsigned flags,
- struct pipe_fence_handle **fence)
+ struct pipe_fence_handle **fence,
+ const char *reason)
{
- LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+ LP_DBG(DEBUG_SETUP, "%s %s\n", __FUNCTION__, reason);
if (setup->scene) {
if (fence) {
@@ -287,6 +288,8 @@ lp_setup_flush( struct lp_setup_context *setup,
*fence = lp_setup_fence( setup );
}
+ if (setup->scene->fence)
+ setup->scene->fence->issued = TRUE;
}
set_scene_state( setup, SETUP_FLUSHED );
@@ -312,6 +315,11 @@ lp_setup_bind_framebuffer( struct lp_setup_context *setup,
* scene.
*/
util_copy_framebuffer_state(&setup->fb, fb);
+ setup->framebuffer.x0 = 0;
+ setup->framebuffer.y0 = 0;
+ setup->framebuffer.x1 = fb->width-1;
+ setup->framebuffer.y1 = fb->height-1;
+ setup->dirty |= LP_SETUP_NEW_SCISSOR;
}
@@ -469,11 +477,35 @@ lp_setup_set_triangle_state( struct lp_setup_context *setup,
setup->ccw_is_frontface = ccw_is_frontface;
setup->cullmode = cull_mode;
setup->triangle = first_triangle;
- setup->scissor_test = scissor;
setup->pixel_offset = gl_rasterization_rules ? 0.5f : 0.0f;
+
+ if (setup->scissor_test != scissor) {
+ setup->dirty |= LP_SETUP_NEW_SCISSOR;
+ setup->scissor_test = scissor;
+ }
}
+void
+lp_setup_set_line_state( struct lp_setup_context *setup,
+ float line_width)
+{
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+ setup->line_width = line_width;
+}
+
+void
+lp_setup_set_point_state( struct lp_setup_context *setup,
+ float point_size,
+ boolean point_size_per_vertex,
+ uint sprite)
+{
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ setup->point_size = point_size;
+ setup->sprite = sprite;
+ setup->point_size_per_vertex = point_size_per_vertex;
+}
void
lp_setup_set_fs_inputs( struct lp_setup_context *setup,
@@ -559,10 +591,11 @@ lp_setup_set_scissor( struct lp_setup_context *setup,
assert(scissor);
- if (memcmp(&setup->scissor.current, scissor, sizeof(*scissor)) != 0) {
- setup->scissor.current = *scissor; /* struct copy */
- setup->dirty |= LP_SETUP_NEW_SCISSOR;
- }
+ setup->scissor.x0 = scissor->minx;
+ setup->scissor.x1 = scissor->maxx-1;
+ setup->scissor.y0 = scissor->miny;
+ setup->scissor.y1 = scissor->maxy-1;
+ setup->dirty |= LP_SETUP_NEW_SCISSOR;
}
@@ -713,6 +746,12 @@ lp_setup_update_state( struct lp_setup_context *setup )
*/
{
struct llvmpipe_context *lp = llvmpipe_context(scene->pipe);
+
+ /* Will probably need to move this somewhere else, just need
+ * to know about vertex shader point size attribute.
+ */
+ setup->psize = lp->psize_slot;
+
if (lp->dirty) {
llvmpipe_update_derived(lp);
}
@@ -806,6 +845,14 @@ lp_setup_update_state( struct lp_setup_context *setup )
}
}
+ if (setup->dirty & LP_SETUP_NEW_SCISSOR) {
+ setup->draw_region = setup->framebuffer;
+ if (setup->scissor_test) {
+ u_rect_possible_intersection(&setup->scissor,
+ &setup->draw_region);
+ }
+ }
+
setup->dirty = 0;
assert(setup->fs.stored);
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h
index 73b1c85325..821ebb1087 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup.h
@@ -85,7 +85,8 @@ lp_setup_fence( struct lp_setup_context *setup );
void
lp_setup_flush( struct lp_setup_context *setup,
unsigned flags,
- struct pipe_fence_handle **fence);
+ struct pipe_fence_handle **fence,
+ const char *reason);
void
@@ -99,6 +100,16 @@ lp_setup_set_triangle_state( struct lp_setup_context *setup,
boolean scissor,
boolean gl_rasterization_rules );
+void
+lp_setup_set_line_state( struct lp_setup_context *setup,
+ float line_width);
+
+void
+lp_setup_set_point_state( struct lp_setup_context *setup,
+ float point_size,
+ boolean point_size_per_vertex,
+ uint sprite);
+
void
lp_setup_set_fs_inputs( struct lp_setup_context *setup,
const struct lp_shader_input *interp,
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.c b/src/gallium/drivers/llvmpipe/lp_setup_coef.c
new file mode 100644
index 0000000000..95e3e8fffe
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_setup_coef.c
@@ -0,0 +1,258 @@
+/**************************************************************************
+ *
+ * Copyright 2010, VMware.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Binning code for triangles
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "lp_perf.h"
+#include "lp_setup_context.h"
+#include "lp_setup_coef.h"
+#include "lp_rast.h"
+#include "lp_state_fs.h"
+
+#if !defined(PIPE_ARCH_SSE)
+
+/**
+ * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
+ */
+static void constant_coef( struct lp_rast_shader_inputs *inputs,
+ unsigned slot,
+ const float value,
+ unsigned i )
+{
+ inputs->a0[slot][i] = value;
+ inputs->dadx[slot][i] = 0.0f;
+ inputs->dady[slot][i] = 0.0f;
+}
+
+
+
+static void linear_coef( struct lp_rast_shader_inputs *inputs,
+ const struct lp_tri_info *info,
+ unsigned slot,
+ unsigned vert_attr,
+ unsigned i)
+{
+ float a0 = info->v0[vert_attr][i];
+ float a1 = info->v1[vert_attr][i];
+ float a2 = info->v2[vert_attr][i];
+
+ float da01 = a0 - a1;
+ float da20 = a2 - a0;
+ float dadx = (da01 * info->dy20_ooa - info->dy01_ooa * da20);
+ float dady = (da20 * info->dx01_ooa - info->dx20_ooa * da01);
+
+ inputs->dadx[slot][i] = dadx;
+ inputs->dady[slot][i] = dady;
+
+ /* calculate a0 as the value which would be sampled for the
+ * fragment at (0,0), taking into account that we want to sample at
+ * pixel centers, in other words (0.5, 0.5).
+ *
+ * this is neat but unfortunately not a good way to do things for
+ * triangles with very large values of dadx or dady as it will
+ * result in the subtraction and re-addition from a0 of a very
+ * large number, which means we'll end up loosing a lot of the
+ * fractional bits and precision from a0. the way to fix this is
+ * to define a0 as the sample at a pixel center somewhere near vmin
+ * instead - i'll switch to this later.
+ */
+ inputs->a0[slot][i] = a0 - (dadx * info->x0_center +
+ dady * info->y0_center);
+}
+
+
+/**
+ * Compute a0, dadx and dady for a perspective-corrected interpolant,
+ * for a triangle.
+ * We basically multiply the vertex value by 1/w before computing
+ * the plane coefficients (a0, dadx, dady).
+ * Later, when we compute the value at a particular fragment position we'll
+ * divide the interpolated value by the interpolated W at that fragment.
+ */
+static void perspective_coef( struct lp_rast_shader_inputs *inputs,
+ const struct lp_tri_info *info,
+ unsigned slot,
+ unsigned vert_attr,
+ unsigned i)
+{
+ /* premultiply by 1/w (v[0][3] is always 1/w):
+ */
+ float a0 = info->v0[vert_attr][i] * info->v0[0][3];
+ float a1 = info->v1[vert_attr][i] * info->v1[0][3];
+ float a2 = info->v2[vert_attr][i] * info->v2[0][3];
+ float da01 = a0 - a1;
+ float da20 = a2 - a0;
+ float dadx = da01 * info->dy20_ooa - info->dy01_ooa * da20;
+ float dady = da20 * info->dx01_ooa - info->dx20_ooa * da01;
+
+ inputs->dadx[slot][i] = dadx;
+ inputs->dady[slot][i] = dady;
+ inputs->a0[slot][i] = a0 - (dadx * info->x0_center +
+ dady * info->y0_center);
+}
+
+
+/**
+ * Special coefficient setup for gl_FragCoord.
+ * X and Y are trivial
+ * Z and W are copied from position_coef which should have already been computed.
+ * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
+ */
+static void
+setup_fragcoord_coef(struct lp_rast_shader_inputs *inputs,
+ const struct lp_tri_info *info,
+ unsigned slot,
+ unsigned usage_mask)
+{
+ /*X*/
+ if (usage_mask & TGSI_WRITEMASK_X) {
+ inputs->a0[slot][0] = 0.0;
+ inputs->dadx[slot][0] = 1.0;
+ inputs->dady[slot][0] = 0.0;
+ }
+
+ /*Y*/
+ if (usage_mask & TGSI_WRITEMASK_Y) {
+ inputs->a0[slot][1] = 0.0;
+ inputs->dadx[slot][1] = 0.0;
+ inputs->dady[slot][1] = 1.0;
+ }
+
+ /*Z*/
+ if (usage_mask & TGSI_WRITEMASK_Z) {
+ linear_coef(inputs, info, slot, 0, 2);
+ }
+
+ /*W*/
+ if (usage_mask & TGSI_WRITEMASK_W) {
+ linear_coef(inputs, info, slot, 0, 3);
+ }
+}
+
+
+/**
+ * Setup the fragment input attribute with the front-facing value.
+ * \param frontface is the triangle front facing?
+ */
+static void setup_facing_coef( struct lp_rast_shader_inputs *inputs,
+ unsigned slot,
+ boolean frontface,
+ unsigned usage_mask)
+{
+ /* convert TRUE to 1.0 and FALSE to -1.0 */
+ if (usage_mask & TGSI_WRITEMASK_X)
+ constant_coef( inputs, slot, 2.0f * frontface - 1.0f, 0 );
+
+ if (usage_mask & TGSI_WRITEMASK_Y)
+ constant_coef( inputs, slot, 0.0f, 1 ); /* wasted */
+
+ if (usage_mask & TGSI_WRITEMASK_Z)
+ constant_coef( inputs, slot, 0.0f, 2 ); /* wasted */
+
+ if (usage_mask & TGSI_WRITEMASK_W)
+ constant_coef( inputs, slot, 0.0f, 3 ); /* wasted */
+}
+
+
+/**
+ * Compute the tri->coef[] array dadx, dady, a0 values.
+ */
+void lp_setup_tri_coef( struct lp_setup_context *setup,
+ struct lp_rast_shader_inputs *inputs,
+ const struct lp_tri_info *info)
+{
+ unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
+ unsigned slot;
+ unsigned i;
+
+ /* setup interpolation for all the remaining attributes:
+ */
+ for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
+ unsigned vert_attr = setup->fs.input[slot].src_index;
+ unsigned usage_mask = setup->fs.input[slot].usage_mask;
+
+ switch (setup->fs.input[slot].interp) {
+ case LP_INTERP_CONSTANT:
+ if (setup->flatshade_first) {
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ constant_coef(inputs, slot+1, info->v0[vert_attr][i], i);
+ }
+ else {
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ constant_coef(inputs, slot+1, info->v2[vert_attr][i], i);
+ }
+ break;
+
+ case LP_INTERP_LINEAR:
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ linear_coef(inputs, info, slot+1, vert_attr, i);
+ break;
+
+ case LP_INTERP_PERSPECTIVE:
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ perspective_coef(inputs, info, slot+1, vert_attr, i);
+ fragcoord_usage_mask |= TGSI_WRITEMASK_W;
+ break;
+
+ case LP_INTERP_POSITION:
+ /*
+ * The generated pixel interpolators will pick up the coeffs from
+ * slot 0, so all need to ensure that the usage mask is covers all
+ * usages.
+ */
+ fragcoord_usage_mask |= usage_mask;
+ break;
+
+ case LP_INTERP_FACING:
+ setup_facing_coef(inputs, slot+1, info->frontfacing, usage_mask);
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+
+ /* The internal position input is in slot zero:
+ */
+ setup_fragcoord_coef(inputs, info, 0, fragcoord_usage_mask);
+}
+
+#else
+extern void lp_setup_coef_dummy(void);
+void lp_setup_coef_dummy(void)
+{
+}
+
+#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.h b/src/gallium/drivers/llvmpipe/lp_setup_coef.h
new file mode 100644
index 0000000000..d68b39c603
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_setup_coef.h
@@ -0,0 +1,61 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * The setup code is concerned with point/line/triangle setup and
+ * putting commands/data into the bins.
+ */
+
+
+#ifndef LP_SETUP_COEF_H
+#define LP_SETUP_COEF_H
+
+
+struct lp_tri_info {
+
+ float x0_center;
+ float y0_center;
+
+ /* turn these into an aligned float[4] */
+ float dy01_ooa;
+ float dy20_ooa;
+ float dx01_ooa;
+ float dx20_ooa;
+
+ const float (*v0)[4];
+ const float (*v1)[4];
+ const float (*v2)[4];
+
+ boolean frontfacing; /* remove eventually */
+};
+
+void lp_setup_tri_coef( struct lp_setup_context *setup,
+ struct lp_rast_shader_inputs *inputs,
+ const struct lp_tri_info *info);
+
+#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c b/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c
new file mode 100644
index 0000000000..73fb70599c
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c
@@ -0,0 +1,207 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Binning code for triangles
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "lp_perf.h"
+#include "lp_setup_context.h"
+#include "lp_setup_coef.h"
+#include "lp_rast.h"
+
+#if defined(PIPE_ARCH_SSE)
+#include <emmintrin.h>
+
+
+static void constant_coef4( struct lp_rast_shader_inputs *inputs,
+ const struct lp_tri_info *info,
+ unsigned slot,
+ const float *attr)
+{
+ *(__m128 *)inputs->a0[slot] = *(__m128 *)attr;
+ *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0);
+ *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0);
+}
+
+
+
+/**
+ * Setup the fragment input attribute with the front-facing value.
+ * \param frontface is the triangle front facing?
+ */
+static void setup_facing_coef( struct lp_rast_shader_inputs *inputs,
+ const struct lp_tri_info *info,
+ unsigned slot )
+{
+ /* XXX: just pass frontface directly to the shader, don't bother
+ * treating it as an input.
+ */
+ __m128 a0 = _mm_setr_ps(info->frontfacing ? 1.0 : -1.0,
+ 0, 0, 0);
+
+ *(__m128 *)inputs->a0[slot] = a0;
+ *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0);
+ *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0);
+}
+
+
+
+static void calc_coef4( struct lp_rast_shader_inputs *inputs,
+ const struct lp_tri_info *info,
+ unsigned slot,
+ __m128 a0,
+ __m128 a1,
+ __m128 a2)
+{
+ __m128 da01 = _mm_sub_ps(a0, a1);
+ __m128 da20 = _mm_sub_ps(a2, a0);
+
+ __m128 da01_dy20_ooa = _mm_mul_ps(da01, _mm_set1_ps(info->dy20_ooa));
+ __m128 da20_dy01_ooa = _mm_mul_ps(da20, _mm_set1_ps(info->dy01_ooa));
+ __m128 dadx = _mm_sub_ps(da01_dy20_ooa, da20_dy01_ooa);
+
+ __m128 da01_dx20_ooa = _mm_mul_ps(da01, _mm_set1_ps(info->dx20_ooa));
+ __m128 da20_dx01_ooa = _mm_mul_ps(da20, _mm_set1_ps(info->dx01_ooa));
+ __m128 dady = _mm_sub_ps(da20_dx01_ooa, da01_dx20_ooa);
+
+ __m128 dadx_x0 = _mm_mul_ps(dadx, _mm_set1_ps(info->x0_center));
+ __m128 dady_y0 = _mm_mul_ps(dady, _mm_set1_ps(info->y0_center));
+ __m128 attr_v0 = _mm_add_ps(dadx_x0, dady_y0);
+ __m128 attr_0 = _mm_sub_ps(a0, attr_v0);
+
+ *(__m128 *)inputs->a0[slot] = attr_0;
+ *(__m128 *)inputs->dadx[slot] = dadx;
+ *(__m128 *)inputs->dady[slot] = dady;
+}
+
+
+static void linear_coef( struct lp_rast_shader_inputs *inputs,
+ const struct lp_tri_info *info,
+ unsigned slot,
+ unsigned vert_attr)
+{
+ __m128 a0 = *(const __m128 *)info->v0[vert_attr];
+ __m128 a1 = *(const __m128 *)info->v1[vert_attr];
+ __m128 a2 = *(const __m128 *)info->v2[vert_attr];
+
+ calc_coef4(inputs, info, slot, a0, a1, a2);
+}
+
+
+
+/**
+ * Compute a0, dadx and dady for a perspective-corrected interpolant,
+ * for a triangle.
+ * We basically multiply the vertex value by 1/w before computing
+ * the plane coefficients (a0, dadx, dady).
+ * Later, when we compute the value at a particular fragment position we'll
+ * divide the interpolated value by the interpolated W at that fragment.
+ */
+static void perspective_coef( struct lp_rast_shader_inputs *inputs,
+ const struct lp_tri_info *info,
+ unsigned slot,
+ unsigned vert_attr)
+{
+ /* premultiply by 1/w (v[0][3] is always 1/w):
+ */
+ __m128 a0 = *(const __m128 *)info->v0[vert_attr];
+ __m128 a1 = *(const __m128 *)info->v1[vert_attr];
+ __m128 a2 = *(const __m128 *)info->v2[vert_attr];
+
+ __m128 a0_oow = _mm_mul_ps(a0, _mm_set1_ps(info->v0[0][3]));
+ __m128 a1_oow = _mm_mul_ps(a1, _mm_set1_ps(info->v1[0][3]));
+ __m128 a2_oow = _mm_mul_ps(a2, _mm_set1_ps(info->v2[0][3]));
+
+ calc_coef4(inputs, info, slot, a0_oow, a1_oow, a2_oow);
+}
+
+
+
+
+
+/**
+ * Compute the inputs-> dadx, dady, a0 values.
+ */
+void lp_setup_tri_coef( struct lp_setup_context *setup,
+ struct lp_rast_shader_inputs *inputs,
+ const struct lp_tri_info *info)
+{
+ unsigned slot;
+
+ /* The internal position input is in slot zero:
+ */
+ linear_coef(inputs, info, 0, 0);
+
+ /* setup interpolation for all the remaining attributes:
+ */
+ for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
+ unsigned vert_attr = setup->fs.input[slot].src_index;
+
+ switch (setup->fs.input[slot].interp) {
+ case LP_INTERP_CONSTANT:
+ if (setup->flatshade_first) {
+ constant_coef4(inputs, info, slot+1, info->v0[vert_attr]);
+ }
+ else {
+ constant_coef4(inputs, info, slot+1, info->v2[vert_attr]);
+ }
+ break;
+
+ case LP_INTERP_LINEAR:
+ linear_coef(inputs, info, slot+1, vert_attr);
+ break;
+
+ case LP_INTERP_PERSPECTIVE:
+ perspective_coef(inputs, info, slot+1, vert_attr);
+ break;
+
+ case LP_INTERP_POSITION:
+ /*
+ * The generated pixel interpolators will pick up the coeffs from
+ * slot 0.
+ */
+ break;
+
+ case LP_INTERP_FACING:
+ setup_facing_coef(inputs, info, slot+1);
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+}
+
+#else
+extern void lp_setup_coef_dummy(void);
+void lp_setup_coef_dummy(void)
+{
+}
+#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h
index a0606f5034..877a492c6d 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -41,6 +41,7 @@
#include "lp_scene.h"
#include "draw/draw_vbuf.h"
+#include "util/u_rect.h"
#define LP_SETUP_NEW_FS 0x01
#define LP_SETUP_NEW_CONSTANTS 0x02
@@ -73,6 +74,7 @@ struct lp_setup_context
uint prim;
uint vertex_size;
uint nr_vertices;
+ uint sprite;
uint vertex_buffer_size;
void *vertex_buffer;
@@ -88,10 +90,17 @@ struct lp_setup_context
boolean flatshade_first;
boolean ccw_is_frontface;
boolean scissor_test;
+ boolean point_size_per_vertex;
unsigned cullmode;
float pixel_offset;
+ float line_width;
+ float point_size;
+ float psize;
struct pipe_framebuffer_state fb;
+ struct u_rect framebuffer;
+ struct u_rect scissor;
+ struct u_rect draw_region; /* intersection of fb & scissor */
struct {
unsigned flags;
@@ -127,9 +136,6 @@ struct lp_setup_context
uint8_t *stored;
} blend_color;
- struct {
- struct pipe_scissor_state current;
- } scissor;
unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */
@@ -158,4 +164,29 @@ void lp_setup_update_state( struct lp_setup_context *setup );
void lp_setup_destroy( struct lp_setup_context *setup );
+void
+lp_setup_print_triangle(struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4]);
+
+void
+lp_setup_print_vertex(struct lp_setup_context *setup,
+ const char *name,
+ const float (*v)[4]);
+
+
+struct lp_rast_triangle *
+lp_setup_alloc_triangle(struct lp_scene *scene,
+ unsigned nr_inputs,
+ unsigned nr_planes,
+ unsigned *tri_size);
+
+void
+lp_setup_bin_triangle( struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ const struct u_rect *bbox,
+ int nr_planes );
+
#endif
+
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c
index be41c44e6f..ce2da55cf4 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_line.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c
@@ -29,19 +29,671 @@
* Binning code for lines
*/
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "lp_perf.h"
#include "lp_setup_context.h"
+#include "lp_rast.h"
+#include "lp_state_fs.h"
-static void line_nop( struct lp_setup_context *setup,
- const float (*v0)[4],
- const float (*v1)[4] )
+#define NUM_CHANNELS 4
+
+struct lp_line_info {
+
+ float dx;
+ float dy;
+ float oneoverarea;
+
+ const float (*v1)[4];
+ const float (*v2)[4];
+};
+
+
+/**
+ * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
+ */
+static void constant_coef( struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ unsigned slot,
+ const float value,
+ unsigned i )
+{
+ tri->inputs.a0[slot][i] = value;
+ tri->inputs.dadx[slot][i] = 0.0f;
+ tri->inputs.dady[slot][i] = 0.0f;
+}
+
+
+/**
+ * Compute a0, dadx and dady for a linearly interpolated coefficient,
+ * for a triangle.
+ */
+static void linear_coef( struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ struct lp_line_info *info,
+ unsigned slot,
+ unsigned vert_attr,
+ unsigned i)
+{
+ float a1 = info->v1[vert_attr][i];
+ float a2 = info->v2[vert_attr][i];
+
+ float da21 = a1 - a2;
+ float dadx = da21 * info->dx * info->oneoverarea;
+ float dady = da21 * info->dy * info->oneoverarea;
+
+ tri->inputs.dadx[slot][i] = dadx;
+ tri->inputs.dady[slot][i] = dady;
+
+ tri->inputs.a0[slot][i] = (a1 -
+ (dadx * (info->v1[0][0] - setup->pixel_offset) +
+ dady * (info->v1[0][1] - setup->pixel_offset)));
+}
+
+
+/**
+ * Compute a0, dadx and dady for a perspective-corrected interpolant,
+ * for a triangle.
+ * We basically multiply the vertex value by 1/w before computing
+ * the plane coefficients (a0, dadx, dady).
+ * Later, when we compute the value at a particular fragment position we'll
+ * divide the interpolated value by the interpolated W at that fragment.
+ */
+static void perspective_coef( struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ struct lp_line_info *info,
+ unsigned slot,
+ unsigned vert_attr,
+ unsigned i)
+{
+ /* premultiply by 1/w (v[0][3] is always 1/w):
+ */
+ float a1 = info->v1[vert_attr][i] * info->v1[0][3];
+ float a2 = info->v2[vert_attr][i] * info->v2[0][3];
+
+ float da21 = a1 - a2;
+ float dadx = da21 * info->dx * info->oneoverarea;
+ float dady = da21 * info->dy * info->oneoverarea;
+
+ tri->inputs.dadx[slot][i] = dadx;
+ tri->inputs.dady[slot][i] = dady;
+
+ tri->inputs.a0[slot][i] = (a1 -
+ (dadx * (info->v1[0][0] - setup->pixel_offset) +
+ dady * (info->v1[0][1] - setup->pixel_offset)));
+}
+
+static void
+setup_fragcoord_coef( struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ struct lp_line_info *info,
+ unsigned slot,
+ unsigned usage_mask)
+{
+ /*X*/
+ if (usage_mask & TGSI_WRITEMASK_X) {
+ tri->inputs.a0[slot][0] = 0.0;
+ tri->inputs.dadx[slot][0] = 1.0;
+ tri->inputs.dady[slot][0] = 0.0;
+ }
+
+ /*Y*/
+ if (usage_mask & TGSI_WRITEMASK_Y) {
+ tri->inputs.a0[slot][1] = 0.0;
+ tri->inputs.dadx[slot][1] = 0.0;
+ tri->inputs.dady[slot][1] = 1.0;
+ }
+
+ /*Z*/
+ if (usage_mask & TGSI_WRITEMASK_Z) {
+ linear_coef(setup, tri, info, slot, 0, 2);
+ }
+
+ /*W*/
+ if (usage_mask & TGSI_WRITEMASK_W) {
+ linear_coef(setup, tri, info, slot, 0, 3);
+ }
+}
+
+/**
+ * Compute the tri->coef[] array dadx, dady, a0 values.
+ */
+static void setup_line_coefficients( struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ struct lp_line_info *info)
+{
+ unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
+ unsigned slot;
+
+ /* setup interpolation for all the remaining attributes:
+ */
+ for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
+ unsigned vert_attr = setup->fs.input[slot].src_index;
+ unsigned usage_mask = setup->fs.input[slot].usage_mask;
+ unsigned i;
+
+ switch (setup->fs.input[slot].interp) {
+ case LP_INTERP_CONSTANT:
+ if (setup->flatshade_first) {
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ constant_coef(setup, tri, slot+1, info->v1[vert_attr][i], i);
+ }
+ else {
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ constant_coef(setup, tri, slot+1, info->v2[vert_attr][i], i);
+ }
+ break;
+
+ case LP_INTERP_LINEAR:
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ linear_coef(setup, tri, info, slot+1, vert_attr, i);
+ break;
+
+ case LP_INTERP_PERSPECTIVE:
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ perspective_coef(setup, tri, info, slot+1, vert_attr, i);
+ fragcoord_usage_mask |= TGSI_WRITEMASK_W;
+ break;
+
+ case LP_INTERP_POSITION:
+ /*
+ * The generated pixel interpolators will pick up the coeffs from
+ * slot 0, so all need to ensure that the usage mask is covers all
+ * usages.
+ */
+ fragcoord_usage_mask |= usage_mask;
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+
+ /* The internal position input is in slot zero:
+ */
+ setup_fragcoord_coef(setup, tri, info, 0,
+ fragcoord_usage_mask);
+}
+
+
+
+static INLINE int subpixel_snap( float a )
+{
+ return util_iround(FIXED_ONE * a);
+}
+
+
+/**
+ * Print line vertex attribs (for debug).
+ */
+static void
+print_line(struct lp_setup_context *setup,
+ const float (*v1)[4],
+ const float (*v2)[4])
+{
+ uint i;
+
+ debug_printf("llvmpipe line\n");
+ for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
+ debug_printf(" v1[%d]: %f %f %f %f\n", i,
+ v1[i][0], v1[i][1], v1[i][2], v1[i][3]);
+ }
+ for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
+ debug_printf(" v2[%d]: %f %f %f %f\n", i,
+ v2[i][0], v2[i][1], v2[i][2], v2[i][3]);
+ }
+}
+
+
+static INLINE boolean sign(float x){
+ return x >= 0;
+}
+
+
+/* Used on positive floats only:
+ */
+static INLINE float fracf(float f)
{
+ return f - floorf(f);
}
-void
-lp_setup_choose_line( struct lp_setup_context *setup )
+
+static void
+lp_setup_line( struct lp_setup_context *setup,
+ const float (*v1)[4],
+ const float (*v2)[4])
{
- setup->line = line_nop;
+ struct lp_scene *scene = lp_setup_get_current_scene(setup);
+ struct lp_rast_triangle *line;
+ struct lp_line_info info;
+ float width = MAX2(1.0, setup->line_width);
+ struct u_rect bbox;
+ unsigned tri_bytes;
+ int x[4];
+ int y[4];
+ int i;
+ int nr_planes = 4;
+
+ /* linewidth should be interpreted as integer */
+ int fixed_width = util_iround(width) * FIXED_ONE;
+
+ float x_offset=0;
+ float y_offset=0;
+ float x_offset_end=0;
+ float y_offset_end=0;
+
+ float x1diff;
+ float y1diff;
+ float x2diff;
+ float y2diff;
+ float dx, dy;
+
+ boolean draw_start;
+ boolean draw_end;
+ boolean will_draw_start;
+ boolean will_draw_end;
+
+ if (0)
+ print_line(setup, v1, v2);
+
+ if (setup->scissor_test) {
+ nr_planes = 8;
+ }
+ else {
+ nr_planes = 4;
+ }
+
+
+ dx = v1[0][0] - v2[0][0];
+ dy = v1[0][1] - v2[0][1];
+
+ /* X-MAJOR LINE */
+ if (fabsf(dx) >= fabsf(dy)) {
+ float dydx = dy / dx;
+
+ x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5;
+ y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5;
+ x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5;
+ y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5;
+
+ if (y2diff==-0.5 && dy<0){
+ y2diff = 0.5;
+ }
+
+ /*
+ * Diamond exit rule test for starting point
+ */
+ if (fabsf(x1diff) + fabsf(y1diff) < 0.5) {
+ draw_start = TRUE;
+ }
+ else if (sign(x1diff) == sign(-dx)) {
+ draw_start = FALSE;
+ }
+ else if (sign(-y1diff) != sign(dy)) {
+ draw_start = TRUE;
+ }
+ else {
+ /* do intersection test */
+ float yintersect = fracf(v1[0][1]) + x1diff * dydx;
+ draw_start = (yintersect < 1.0 && yintersect > 0.0);
+ }
+
+
+ /*
+ * Diamond exit rule test for ending point
+ */
+ if (fabsf(x2diff) + fabsf(y2diff) < 0.5) {
+ draw_end = FALSE;
+ }
+ else if (sign(x2diff) != sign(-dx)) {
+ draw_end = FALSE;
+ }
+ else if (sign(-y2diff) == sign(dy)) {
+ draw_end = TRUE;
+ }
+ else {
+ /* do intersection test */
+ float yintersect = fracf(v2[0][1]) + x2diff * dydx;
+ draw_end = (yintersect < 1.0 && yintersect > 0.0);
+ }
+
+ /* Are we already drawing start/end?
+ */
+ will_draw_start = sign(-x1diff) != sign(dx);
+ will_draw_end = (sign(x2diff) == sign(-dx)) || x2diff==0;
+
+ if (dx < 0) {
+ /* if v2 is to the right of v1, swap pointers */
+ const float (*temp)[4] = v1;
+ v1 = v2;
+ v2 = temp;
+ dx = -dx;
+ dy = -dy;
+ /* Otherwise shift planes appropriately */
+ if (will_draw_start != draw_start) {
+ x_offset_end = - x1diff - 0.5;
+ y_offset_end = x_offset_end * dydx;
+
+ }
+ if (will_draw_end != draw_end) {
+ x_offset = - x2diff - 0.5;
+ y_offset = x_offset * dydx;
+ }
+
+ }
+ else{
+ /* Otherwise shift planes appropriately */
+ if (will_draw_start != draw_start) {
+ x_offset = - x1diff + 0.5;
+ y_offset = x_offset * dydx;
+ }
+ if (will_draw_end != draw_end) {
+ x_offset_end = - x2diff + 0.5;
+ y_offset_end = x_offset_end * dydx;
+ }
+ }
+
+ /* x/y positions in fixed point */
+ x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset);
+ x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset);
+ x[2] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset);
+ x[3] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset);
+
+ y[0] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset) - fixed_width/2;
+ y[1] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset) - fixed_width/2;
+ y[2] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset) + fixed_width/2;
+ y[3] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset) + fixed_width/2;
+
+ }
+ else {
+ const float dxdy = dx / dy;
+
+ /* Y-MAJOR LINE */
+ x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5;
+ y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5;
+ x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5;
+ y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5;
+
+ if (x2diff==-0.5 && dx<0) {
+ x2diff = 0.5;
+ }
+
+ /*
+ * Diamond exit rule test for starting point
+ */
+ if (fabsf(x1diff) + fabsf(y1diff) < 0.5) {
+ draw_start = TRUE;
+ }
+ else if (sign(-y1diff) == sign(dy)) {
+ draw_start = FALSE;
+ }
+ else if (sign(x1diff) != sign(-dx)) {
+ draw_start = TRUE;
+ }
+ else {
+ /* do intersection test */
+ float xintersect = fracf(v1[0][0]) + y1diff * dxdy;
+ draw_start = (xintersect < 1.0 && xintersect > 0.0);
+ }
+
+ /*
+ * Diamond exit rule test for ending point
+ */
+ if (fabsf(x2diff) + fabsf(y2diff) < 0.5) {
+ draw_end = FALSE;
+ }
+ else if (sign(-y2diff) != sign(dy) ) {
+ draw_end = FALSE;
+ }
+ else if (sign(x2diff) == sign(-dx) ) {
+ draw_end = TRUE;
+ }
+ else {
+ /* do intersection test */
+ float xintersect = fracf(v2[0][0]) + y2diff * dxdy;
+ draw_end = (xintersect < 1.0 && xintersect > 0.0);
+ }
+
+ /* Are we already drawing start/end?
+ */
+ will_draw_start = sign(y1diff) == sign(dy);
+ will_draw_end = (sign(-y2diff) == sign(dy)) || y2diff==0;
+
+ if (dy > 0) {
+ /* if v2 is on top of v1, swap pointers */
+ const float (*temp)[4] = v1;
+ v1 = v2;
+ v2 = temp;
+ dx = -dx;
+ dy = -dy;
+
+ /* Otherwise shift planes appropriately */
+ if (will_draw_start != draw_start) {
+ y_offset_end = - y1diff + 0.5;
+ x_offset_end = y_offset_end * dxdy;
+ }
+ if (will_draw_end != draw_end) {
+ y_offset = - y2diff + 0.5;
+ x_offset = y_offset * dxdy;
+ }
+ }
+ else {
+ /* Otherwise shift planes appropriately */
+ if (will_draw_start != draw_start) {
+ y_offset = - y1diff - 0.5;
+ x_offset = y_offset * dxdy;
+
+ }
+ if (will_draw_end != draw_end) {
+ y_offset_end = - y2diff - 0.5;
+ x_offset_end = y_offset_end * dxdy;
+ }
+ }
+
+ /* x/y positions in fixed point */
+ x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) - fixed_width/2;
+ x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) - fixed_width/2;
+ x[2] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) + fixed_width/2;
+ x[3] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) + fixed_width/2;
+
+ y[0] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset);
+ y[1] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset);
+ y[2] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset);
+ y[3] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset);
+ }
+
+
+
+ LP_COUNT(nr_tris);
+
+
+ /* Bounding rectangle (in pixels) */
+ {
+ /* Yes this is necessary to accurately calculate bounding boxes
+ * with the two fill-conventions we support. GL (normally) ends
+ * up needing a bottom-left fill convention, which requires
+ * slightly different rounding.
+ */
+ int adj = (setup->pixel_offset != 0) ? 1 : 0;
+
+ bbox.x0 = (MIN4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER;
+ bbox.x1 = (MAX4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER;
+ bbox.y0 = (MIN4(y[0], y[1], y[2], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+ bbox.y1 = (MAX4(y[0], y[1], y[2], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+
+ /* Inclusive coordinates:
+ */
+ bbox.x1--;
+ bbox.y1--;
+ }
+
+ if (bbox.x1 < bbox.x0 ||
+ bbox.y1 < bbox.y0) {
+ if (0) debug_printf("empty bounding box\n");
+ LP_COUNT(nr_culled_tris);
+ return;
+ }
+
+ if (!u_rect_test_intersection(&setup->draw_region, &bbox)) {
+ if (0) debug_printf("offscreen\n");
+ LP_COUNT(nr_culled_tris);
+ return;
+ }
+
+ u_rect_find_intersection(&setup->draw_region, &bbox);
+
+ line = lp_setup_alloc_triangle(scene,
+ setup->fs.nr_inputs,
+ nr_planes,
+ &tri_bytes);
+ if (!line)
+ return;
+
+#ifdef DEBUG
+ line->v[0][0] = v1[0][0];
+ line->v[1][0] = v2[0][0];
+ line->v[0][1] = v1[0][1];
+ line->v[1][1] = v2[0][1];
+#endif
+
+ /* calculate the deltas */
+ line->plane[0].dcdy = x[0] - x[1];
+ line->plane[1].dcdy = x[1] - x[2];
+ line->plane[2].dcdy = x[2] - x[3];
+ line->plane[3].dcdy = x[3] - x[0];
+
+ line->plane[0].dcdx = y[0] - y[1];
+ line->plane[1].dcdx = y[1] - y[2];
+ line->plane[2].dcdx = y[2] - y[3];
+ line->plane[3].dcdx = y[3] - y[0];
+
+
+ info.oneoverarea = 1.0f / (dx * dx + dy * dy);
+ info.dx = dx;
+ info.dy = dy;
+ info.v1 = v1;
+ info.v2 = v2;
+
+ /* Setup parameter interpolants:
+ */
+ setup_line_coefficients( setup, line, &info);
+
+ line->inputs.facing = 1.0F;
+ line->inputs.state = setup->fs.stored;
+
+ for (i = 0; i < 4; i++) {
+ struct lp_rast_plane *plane = &line->plane[i];
+
+ /* half-edge constants, will be interated over the whole render
+ * target.
+ */
+ plane->c = plane->dcdx * x[i] - plane->dcdy * y[i];
+
+
+ /* correct for top-left vs. bottom-left fill convention.
+ *
+ * note that we're overloading gl_rasterization_rules to mean
+ * both (0.5,0.5) pixel centers *and* bottom-left filling
+ * convention.
+ *
+ * GL actually has a top-left filling convention, but GL's
+ * notion of "top" differs from gallium's...
+ *
+ * Also, sometimes (in FBO cases) GL will render upside down
+ * to its usual method, in which case it will probably want
+ * to use the opposite, top-left convention.
+ */
+ if (plane->dcdx < 0) {
+ /* both fill conventions want this - adjust for left edges */
+ plane->c++;
+ }
+ else if (plane->dcdx == 0) {
+ if (setup->pixel_offset == 0) {
+ /* correct for top-left fill convention:
+ */
+ if (plane->dcdy > 0) plane->c++;
+ }
+ else {
+ /* correct for bottom-left fill convention:
+ */
+ if (plane->dcdy < 0) plane->c++;
+ }
+ }
+
+ plane->dcdx *= FIXED_ONE;
+ plane->dcdy *= FIXED_ONE;
+
+ /* find trivial reject offsets for each edge for a single-pixel
+ * sized block. These will be scaled up at each recursive level to
+ * match the active blocksize. Scaling in this way works best if
+ * the blocks are square.
+ */
+ plane->eo = 0;
+ if (plane->dcdx < 0) plane->eo -= plane->dcdx;
+ if (plane->dcdy > 0) plane->eo += plane->dcdy;
+
+ /* Calculate trivial accept offsets from the above.
+ */
+ plane->ei = plane->dcdy - plane->dcdx - plane->eo;
+ }
+
+
+ /*
+ * When rasterizing scissored tris, use the intersection of the
+ * triangle bounding box and the scissor rect to generate the
+ * scissor planes.
+ *
+ * This permits us to cut off the triangle "tails" that are present
+ * in the intermediate recursive levels caused when two of the
+ * triangles edges don't diverge quickly enough to trivially reject
+ * exterior blocks from the triangle.
+ *
+ * It's not really clear if it's worth worrying about these tails,
+ * but since we generate the planes for each scissored tri, it's
+ * free to trim them in this case.
+ *
+ * Note that otherwise, the scissor planes only vary in 'C' value,
+ * and even then only on state-changes. Could alternatively store
+ * these planes elsewhere.
+ */
+ if (nr_planes == 8) {
+ line->plane[4].dcdx = -1;
+ line->plane[4].dcdy = 0;
+ line->plane[4].c = 1-bbox.x0;
+ line->plane[4].ei = 0;
+ line->plane[4].eo = 1;
+
+ line->plane[5].dcdx = 1;
+ line->plane[5].dcdy = 0;
+ line->plane[5].c = bbox.x1+1;
+ line->plane[5].ei = -1;
+ line->plane[5].eo = 0;
+
+ line->plane[6].dcdx = 0;
+ line->plane[6].dcdy = 1;
+ line->plane[6].c = 1-bbox.y0;
+ line->plane[6].ei = 0;
+ line->plane[6].eo = 1;
+
+ line->plane[7].dcdx = 0;
+ line->plane[7].dcdy = -1;
+ line->plane[7].c = bbox.y1+1;
+ line->plane[7].ei = -1;
+ line->plane[7].eo = 0;
+ }
+
+ lp_setup_bin_triangle(setup, line, &bbox, nr_planes);
+}
+
+
+void lp_setup_choose_line( struct lp_setup_context *setup )
+{
+ setup->line = lp_setup_line;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c
index 9f69e6c5ce..6ae318d328 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_point.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2010, VMware Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -18,7 +18,7 @@
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -30,17 +30,299 @@
*/
#include "lp_setup_context.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "lp_perf.h"
+#include "lp_setup_context.h"
+#include "lp_rast.h"
+#include "lp_state_fs.h"
+#include "tgsi/tgsi_scan.h"
+
+#define NUM_CHANNELS 4
+
+struct point_info {
+ /* x,y deltas */
+ int dy01, dy12;
+ int dx01, dx12;
+
+ const float (*v0)[4];
+};
+
+
+/**
+ * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
+ */
+static void constant_coef( struct lp_setup_context *setup,
+ struct lp_rast_triangle *point,
+ unsigned slot,
+ const float value,
+ unsigned i )
+{
+ point->inputs.a0[slot][i] = value;
+ point->inputs.dadx[slot][i] = 0.0f;
+ point->inputs.dady[slot][i] = 0.0f;
+}
+
+static void perspective_coef( struct lp_setup_context *setup,
+ struct lp_rast_triangle *point,
+ const struct point_info *info,
+ unsigned slot,
+ unsigned vert_attr,
+ unsigned i)
+{
+ if (i == 0) {
+ float dadx = FIXED_ONE / (float)info->dx12;
+ float dady = 0.0f;
+ point->inputs.dadx[slot][i] = dadx;
+ point->inputs.dady[slot][i] = dady;
+ point->inputs.a0[slot][i] = (0.5 -
+ (dadx * ((float)info->v0[0][0] - setup->pixel_offset) +
+ dady * ((float)info->v0[0][1] - setup->pixel_offset)));
+ }
+
+ else if (i == 1) {
+ float dadx = 0.0f;
+ float dady = FIXED_ONE / (float)info->dx12;
+
+ point->inputs.dadx[slot][i] = dadx;
+ point->inputs.dady[slot][i] = dady;
+ point->inputs.a0[slot][i] = (0.5 -
+ (dadx * ((float)info->v0[0][0] - setup->pixel_offset) +
+ dady * ((float)info->v0[0][1] - setup->pixel_offset)));
+ }
+
+ else if (i == 2) {
+ point->inputs.a0[slot][i] = 0.0f;
+ point->inputs.dadx[slot][i] = 0.0f;
+ point->inputs.dady[slot][i] = 0.0f;
+ }
+
+ else if (i == 3) {
+ point->inputs.a0[slot][i] = 1.0f;
+ point->inputs.dadx[slot][i] = 0.0f;
+ point->inputs.dady[slot][i] = 0.0f;
+ }
+
+}
+
+
+/**
+ * Special coefficient setup for gl_FragCoord.
+ * X and Y are trivial
+ * Z and W are copied from position_coef which should have already been computed.
+ * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
+ */
+static void
+setup_point_fragcoord_coef(struct lp_setup_context *setup,
+ struct lp_rast_triangle *point,
+ const struct point_info *info,
+ unsigned slot,
+ unsigned usage_mask)
+{
+ /*X*/
+ if (usage_mask & TGSI_WRITEMASK_X) {
+ point->inputs.a0[slot][0] = 0.0;
+ point->inputs.dadx[slot][0] = 1.0;
+ point->inputs.dady[slot][0] = 0.0;
+ }
+
+ /*Y*/
+ if (usage_mask & TGSI_WRITEMASK_Y) {
+ point->inputs.a0[slot][1] = 0.0;
+ point->inputs.dadx[slot][1] = 0.0;
+ point->inputs.dady[slot][1] = 1.0;
+ }
+
+ /*Z*/
+ if (usage_mask & TGSI_WRITEMASK_Z) {
+ constant_coef(setup, point, slot, info->v0[0][2], 2);
+ }
+
+ /*W*/
+ if (usage_mask & TGSI_WRITEMASK_W) {
+ constant_coef(setup, point, slot, info->v0[0][3], 3);
+ }
+}
+
+/**
+ * Compute the point->coef[] array dadx, dady, a0 values.
+ */
+static void
+setup_point_coefficients( struct lp_setup_context *setup,
+ struct lp_rast_triangle *point,
+ const struct point_info *info)
+{
+ unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
+ unsigned slot;
+
+ /* setup interpolation for all the remaining attributes:
+ */
+ for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
+ unsigned vert_attr = setup->fs.input[slot].src_index;
+ unsigned usage_mask = setup->fs.input[slot].usage_mask;
+ unsigned i;
+
+ switch (setup->fs.input[slot].interp) {
+ case LP_INTERP_POSITION:
+ /*
+ * The generated pixel interpolators will pick up the coeffs from
+ * slot 0, so all need to ensure that the usage mask is covers all
+ * usages.
+ */
+ fragcoord_usage_mask |= usage_mask;
+ break;
+
+ case LP_INTERP_PERSPECTIVE:
+ /* For point sprite textures */
+ if (setup->fs.current.variant->shader->info.input_semantic_name[slot]
+ == TGSI_SEMANTIC_GENERIC)
+ {
+ int index = setup->fs.current.variant->shader->info.input_semantic_index[slot];
+
+ if (setup->sprite & (1 << index)) {
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ perspective_coef(setup, point, info, slot+1, vert_attr, i);
+ fragcoord_usage_mask |= TGSI_WRITEMASK_W;
+ break;
+ }
+ }
+
+ /* Otherwise fallthrough */
+ default:
+ for (i = 0; i < NUM_CHANNELS; i++) {
+ if (usage_mask & (1 << i))
+ constant_coef(setup, point, slot+1, info->v0[vert_attr][i], i);
+ }
+ }
+ }
-static void point_nop( struct lp_setup_context *setup,
- const float (*v0)[4] )
+ /* The internal position input is in slot zero:
+ */
+ setup_point_fragcoord_coef(setup, point, info, 0,
+ fragcoord_usage_mask);
+}
+
+static INLINE int
+subpixel_snap(float a)
{
+ return util_iround(FIXED_ONE * a);
+}
+
+
+static void lp_setup_point( struct lp_setup_context *setup,
+ const float (*v0)[4] )
+{
+ /* x/y positions in fixed point */
+ const int sizeAttr = setup->psize;
+ const float size
+ = (setup->point_size_per_vertex && sizeAttr > 0) ? v0[sizeAttr][0]
+ : setup->point_size;
+
+ /* Point size as fixed point integer, remove rounding errors
+ * and gives minimum width for very small points
+ */
+ int fixed_width = MAX2(FIXED_ONE,
+ (subpixel_snap(size) + FIXED_ONE/2 - 1) & ~(FIXED_ONE-1));
+
+ const int x0 = subpixel_snap(v0[0][0] - setup->pixel_offset) - fixed_width/2;
+ const int y0 = subpixel_snap(v0[0][1] - setup->pixel_offset) - fixed_width/2;
+
+ struct lp_scene *scene = lp_setup_get_current_scene(setup);
+ struct lp_rast_triangle *point;
+ unsigned bytes;
+ struct u_rect bbox;
+ unsigned nr_planes = 4;
+ struct point_info info;
+
+
+ /* Bounding rectangle (in pixels) */
+ {
+ /* Yes this is necessary to accurately calculate bounding boxes
+ * with the two fill-conventions we support. GL (normally) ends
+ * up needing a bottom-left fill convention, which requires
+ * slightly different rounding.
+ */
+ int adj = (setup->pixel_offset != 0) ? 1 : 0;
+
+ bbox.x0 = (x0 + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+ bbox.x1 = (x0 + fixed_width + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+ bbox.y0 = (y0 + (FIXED_ONE-1)) >> FIXED_ORDER;
+ bbox.y1 = (y0 + fixed_width + (FIXED_ONE-1)) >> FIXED_ORDER;
+
+ /* Inclusive coordinates:
+ */
+ bbox.x1--;
+ bbox.y1--;
+ }
+
+ if (!u_rect_test_intersection(&setup->draw_region, &bbox)) {
+ if (0) debug_printf("offscreen\n");
+ LP_COUNT(nr_culled_tris);
+ return;
+ }
+
+ u_rect_find_intersection(&setup->draw_region, &bbox);
+
+ point = lp_setup_alloc_triangle(scene,
+ setup->fs.nr_inputs,
+ nr_planes,
+ &bytes);
+ if (!point)
+ return;
+
+#ifdef DEBUG
+ point->v[0][0] = v0[0][0];
+ point->v[0][1] = v0[0][1];
+#endif
+
+ info.v0 = v0;
+ info.dx01 = 0;
+ info.dx12 = fixed_width;
+ info.dy01 = fixed_width;
+ info.dy12 = 0;
+
+ /* Setup parameter interpolants:
+ */
+ setup_point_coefficients(setup, point, &info);
+
+ point->inputs.facing = 1.0F;
+ point->inputs.state = setup->fs.stored;
+
+ {
+ point->plane[0].dcdx = -1;
+ point->plane[0].dcdy = 0;
+ point->plane[0].c = 1-bbox.x0;
+ point->plane[0].ei = 0;
+ point->plane[0].eo = 1;
+
+ point->plane[1].dcdx = 1;
+ point->plane[1].dcdy = 0;
+ point->plane[1].c = bbox.x1+1;
+ point->plane[1].ei = -1;
+ point->plane[1].eo = 0;
+
+ point->plane[2].dcdx = 0;
+ point->plane[2].dcdy = 1;
+ point->plane[2].c = 1-bbox.y0;
+ point->plane[2].ei = 0;
+ point->plane[2].eo = 1;
+
+ point->plane[3].dcdx = 0;
+ point->plane[3].dcdy = -1;
+ point->plane[3].c = bbox.y1+1;
+ point->plane[3].ei = -1;
+ point->plane[3].eo = 0;
+ }
+
+ lp_setup_bin_triangle(setup, point, &bbox, nr_planes);
}
void
lp_setup_choose_point( struct lp_setup_context *setup )
{
- setup->point = point_nop;
+ setup->point = lp_setup_point;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 393533ebee..0180d95090 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -31,35 +31,15 @@
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_rect.h"
#include "lp_perf.h"
#include "lp_setup_context.h"
+#include "lp_setup_coef.h"
#include "lp_rast.h"
#include "lp_state_fs.h"
#define NUM_CHANNELS 4
-struct tri_info {
-
- float pixel_offset;
-
- /* fixed point vertex coordinates */
- int x[3];
- int y[3];
-
- /* float x,y deltas - all from the original coordinates
- */
- float dy01, dy20;
- float dx01, dx20;
- float oneoverarea;
-
- const float (*v0)[4];
- const float (*v1)[4];
- const float (*v2)[4];
-
- boolean frontfacing;
-};
-
-
static INLINE int
@@ -76,247 +56,6 @@ fixed_to_float(int a)
-/**
- * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
- */
-static void constant_coef( struct lp_rast_triangle *tri,
- unsigned slot,
- const float value,
- unsigned i )
-{
- tri->inputs.a0[slot][i] = value;
- tri->inputs.dadx[slot][i] = 0.0f;
- tri->inputs.dady[slot][i] = 0.0f;
-}
-
-
-
-static void linear_coef( struct lp_rast_triangle *tri,
- const struct tri_info *info,
- unsigned slot,
- unsigned vert_attr,
- unsigned i)
-{
- float a0 = info->v0[vert_attr][i];
- float a1 = info->v1[vert_attr][i];
- float a2 = info->v2[vert_attr][i];
-
- float da01 = a0 - a1;
- float da20 = a2 - a0;
- float dadx = (da01 * info->dy20 - info->dy01 * da20) * info->oneoverarea;
- float dady = (da20 * info->dx01 - info->dx20 * da01) * info->oneoverarea;
-
- tri->inputs.dadx[slot][i] = dadx;
- tri->inputs.dady[slot][i] = dady;
-
- /* calculate a0 as the value which would be sampled for the
- * fragment at (0,0), taking into account that we want to sample at
- * pixel centers, in other words (0.5, 0.5).
- *
- * this is neat but unfortunately not a good way to do things for
- * triangles with very large values of dadx or dady as it will
- * result in the subtraction and re-addition from a0 of a very
- * large number, which means we'll end up loosing a lot of the
- * fractional bits and precision from a0. the way to fix this is
- * to define a0 as the sample at a pixel center somewhere near vmin
- * instead - i'll switch to this later.
- */
- tri->inputs.a0[slot][i] = (a0 -
- (dadx * (info->v0[0][0] - info->pixel_offset) +
- dady * (info->v0[0][1] - info->pixel_offset)));
-}
-
-
-/**
- * Compute a0, dadx and dady for a perspective-corrected interpolant,
- * for a triangle.
- * We basically multiply the vertex value by 1/w before computing
- * the plane coefficients (a0, dadx, dady).
- * Later, when we compute the value at a particular fragment position we'll
- * divide the interpolated value by the interpolated W at that fragment.
- */
-static void perspective_coef( struct lp_rast_triangle *tri,
- const struct tri_info *info,
- unsigned slot,
- unsigned vert_attr,
- unsigned i)
-{
- /* premultiply by 1/w (v[0][3] is always 1/w):
- */
- float a0 = info->v0[vert_attr][i] * info->v0[0][3];
- float a1 = info->v1[vert_attr][i] * info->v1[0][3];
- float a2 = info->v2[vert_attr][i] * info->v2[0][3];
- float da01 = a0 - a1;
- float da20 = a2 - a0;
- float dadx = (da01 * info->dy20 - info->dy01 * da20) * info->oneoverarea;
- float dady = (da20 * info->dx01 - info->dx20 * da01) * info->oneoverarea;
-
- tri->inputs.dadx[slot][i] = dadx;
- tri->inputs.dady[slot][i] = dady;
- tri->inputs.a0[slot][i] = (a0 -
- (dadx * (info->v0[0][0] - info->pixel_offset) +
- dady * (info->v0[0][1] - info->pixel_offset)));
-}
-
-
-/**
- * Special coefficient setup for gl_FragCoord.
- * X and Y are trivial
- * Z and W are copied from position_coef which should have already been computed.
- * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
- */
-static void
-setup_fragcoord_coef(struct lp_rast_triangle *tri,
- const struct tri_info *info,
- unsigned slot,
- unsigned usage_mask)
-{
- /*X*/
- if (usage_mask & TGSI_WRITEMASK_X) {
- tri->inputs.a0[slot][0] = 0.0;
- tri->inputs.dadx[slot][0] = 1.0;
- tri->inputs.dady[slot][0] = 0.0;
- }
-
- /*Y*/
- if (usage_mask & TGSI_WRITEMASK_Y) {
- tri->inputs.a0[slot][1] = 0.0;
- tri->inputs.dadx[slot][1] = 0.0;
- tri->inputs.dady[slot][1] = 1.0;
- }
-
- /*Z*/
- if (usage_mask & TGSI_WRITEMASK_Z) {
- linear_coef(tri, info, slot, 0, 2);
- }
-
- /*W*/
- if (usage_mask & TGSI_WRITEMASK_W) {
- linear_coef(tri, info, slot, 0, 3);
- }
-}
-
-
-/**
- * Setup the fragment input attribute with the front-facing value.
- * \param frontface is the triangle front facing?
- */
-static void setup_facing_coef( struct lp_rast_triangle *tri,
- unsigned slot,
- boolean frontface,
- unsigned usage_mask)
-{
- /* convert TRUE to 1.0 and FALSE to -1.0 */
- if (usage_mask & TGSI_WRITEMASK_X)
- constant_coef( tri, slot, 2.0f * frontface - 1.0f, 0 );
-
- if (usage_mask & TGSI_WRITEMASK_Y)
- constant_coef( tri, slot, 0.0f, 1 ); /* wasted */
-
- if (usage_mask & TGSI_WRITEMASK_Z)
- constant_coef( tri, slot, 0.0f, 2 ); /* wasted */
-
- if (usage_mask & TGSI_WRITEMASK_W)
- constant_coef( tri, slot, 0.0f, 3 ); /* wasted */
-}
-
-
-/**
- * Compute the tri->coef[] array dadx, dady, a0 values.
- */
-static void setup_tri_coefficients( struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
- const struct tri_info *info)
-{
- unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
- unsigned slot;
- unsigned i;
-
- /* setup interpolation for all the remaining attributes:
- */
- for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
- unsigned vert_attr = setup->fs.input[slot].src_index;
- unsigned usage_mask = setup->fs.input[slot].usage_mask;
-
- switch (setup->fs.input[slot].interp) {
- case LP_INTERP_CONSTANT:
- if (setup->flatshade_first) {
- for (i = 0; i < NUM_CHANNELS; i++)
- if (usage_mask & (1 << i))
- constant_coef(tri, slot+1, info->v0[vert_attr][i], i);
- }
- else {
- for (i = 0; i < NUM_CHANNELS; i++)
- if (usage_mask & (1 << i))
- constant_coef(tri, slot+1, info->v2[vert_attr][i], i);
- }
- break;
-
- case LP_INTERP_LINEAR:
- for (i = 0; i < NUM_CHANNELS; i++)
- if (usage_mask & (1 << i))
- linear_coef(tri, info, slot+1, vert_attr, i);
- break;
-
- case LP_INTERP_PERSPECTIVE:
- for (i = 0; i < NUM_CHANNELS; i++)
- if (usage_mask & (1 << i))
- perspective_coef(tri, info, slot+1, vert_attr, i);
- fragcoord_usage_mask |= TGSI_WRITEMASK_W;
- break;
-
- case LP_INTERP_POSITION:
- /*
- * The generated pixel interpolators will pick up the coeffs from
- * slot 0, so all need to ensure that the usage mask is covers all
- * usages.
- */
- fragcoord_usage_mask |= usage_mask;
- break;
-
- case LP_INTERP_FACING:
- setup_facing_coef(tri, slot+1, info->frontfacing, usage_mask);
- break;
-
- default:
- assert(0);
- }
- }
-
- /* The internal position input is in slot zero:
- */
- setup_fragcoord_coef(tri, info, 0, fragcoord_usage_mask);
-
- if (0) {
- for (i = 0; i < NUM_CHANNELS; i++) {
- float a0 = tri->inputs.a0 [0][i];
- float dadx = tri->inputs.dadx[0][i];
- float dady = tri->inputs.dady[0][i];
-
- debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n",
- "xyzw"[i],
- a0, dadx, dady);
- }
-
- for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
- unsigned usage_mask = setup->fs.input[slot].usage_mask;
- for (i = 0; i < NUM_CHANNELS; i++) {
- if (usage_mask & (1 << i)) {
- float a0 = tri->inputs.a0 [1 + slot][i];
- float dadx = tri->inputs.dadx[1 + slot][i];
- float dady = tri->inputs.dady[1 + slot][i];
-
- debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n",
- slot,
- "xyzw"[i],
- a0, dadx, dady);
- }
- }
- }
- }
-}
-
-
@@ -329,11 +68,11 @@ static void setup_tri_coefficients( struct lp_setup_context *setup,
* \param nr_inputs number of fragment shader inputs
* \return pointer to triangle space
*/
-static INLINE struct lp_rast_triangle *
-alloc_triangle(struct lp_scene *scene,
- unsigned nr_inputs,
- unsigned nr_planes,
- unsigned *tri_size)
+struct lp_rast_triangle *
+lp_setup_alloc_triangle(struct lp_scene *scene,
+ unsigned nr_inputs,
+ unsigned nr_planes,
+ unsigned *tri_size)
{
unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float);
struct lp_rast_triangle *tri;
@@ -357,35 +96,71 @@ alloc_triangle(struct lp_scene *scene,
return tri;
}
+void
+lp_setup_print_vertex(struct lp_setup_context *setup,
+ const char *name,
+ const float (*v)[4])
+{
+ int i, j;
+
+ debug_printf(" wpos (%s[0]) xyzw %f %f %f %f\n",
+ name,
+ v[0][0], v[0][1], v[0][2], v[0][3]);
+
+ for (i = 0; i < setup->fs.nr_inputs; i++) {
+ const float *in = v[setup->fs.input[i].src_index];
+
+ debug_printf(" in[%d] (%s[%d]) %s%s%s%s ",
+ i,
+ name, setup->fs.input[i].src_index,
+ (setup->fs.input[i].usage_mask & 0x1) ? "x" : " ",
+ (setup->fs.input[i].usage_mask & 0x2) ? "y" : " ",
+ (setup->fs.input[i].usage_mask & 0x4) ? "z" : " ",
+ (setup->fs.input[i].usage_mask & 0x8) ? "w" : " ");
+
+ for (j = 0; j < 4; j++)
+ if (setup->fs.input[i].usage_mask & (1<<j))
+ debug_printf("%.5f ", in[j]);
+
+ debug_printf("\n");
+ }
+}
+
/**
* Print triangle vertex attribs (for debug).
*/
-static void
-print_triangle(struct lp_setup_context *setup,
- const float (*v1)[4],
- const float (*v2)[4],
- const float (*v3)[4])
+void
+lp_setup_print_triangle(struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4])
{
- uint i;
+ debug_printf("triangle\n");
- debug_printf("llvmpipe triangle\n");
- for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
- debug_printf(" v1[%d]: %f %f %f %f\n", i,
- v1[i][0], v1[i][1], v1[i][2], v1[i][3]);
- }
- for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
- debug_printf(" v2[%d]: %f %f %f %f\n", i,
- v2[i][0], v2[i][1], v2[i][2], v2[i][3]);
- }
- for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
- debug_printf(" v3[%d]: %f %f %f %f\n", i,
- v3[i][0], v3[i][1], v3[i][2], v3[i][3]);
+ {
+ const float ex = v0[0][0] - v2[0][0];
+ const float ey = v0[0][1] - v2[0][1];
+ const float fx = v1[0][0] - v2[0][0];
+ const float fy = v1[0][1] - v2[0][1];
+
+ /* det = cross(e,f).z */
+ const float det = ex * fy - ey * fx;
+ if (det < 0.0f)
+ debug_printf(" - ccw\n");
+ else if (det > 0.0f)
+ debug_printf(" - cw\n");
+ else
+ debug_printf(" - zero area\n");
}
+
+ lp_setup_print_vertex(setup, "v0", v0);
+ lp_setup_print_vertex(setup, "v1", v1);
+ lp_setup_print_vertex(setup, "v2", v2);
}
-lp_rast_cmd lp_rast_tri_tab[8] = {
+lp_rast_cmd lp_rast_tri_tab[9] = {
NULL, /* should be impossible */
lp_rast_triangle_1,
lp_rast_triangle_2,
@@ -393,7 +168,8 @@ lp_rast_cmd lp_rast_tri_tab[8] = {
lp_rast_triangle_4,
lp_rast_triangle_5,
lp_rast_triangle_6,
- lp_rast_triangle_7
+ lp_rast_triangle_7,
+ lp_rast_triangle_8
};
/**
@@ -403,25 +179,27 @@ lp_rast_cmd lp_rast_tri_tab[8] = {
*/
static void
do_triangle_ccw(struct lp_setup_context *setup,
+ const float (*v0)[4],
const float (*v1)[4],
const float (*v2)[4],
- const float (*v3)[4],
boolean frontfacing )
{
-
struct lp_scene *scene = lp_setup_get_current_scene(setup);
- struct lp_fragment_shader_variant *variant = setup->fs.current.variant;
struct lp_rast_triangle *tri;
- struct tri_info info;
+ int x[3];
+ int y[3];
+ float dy01, dy20;
+ float dx01, dx20;
+ float oneoverarea;
+ struct lp_tri_info info;
int area;
- int minx, maxx, miny, maxy;
- int ix0, ix1, iy0, iy1;
+ struct u_rect bbox;
unsigned tri_bytes;
int i;
int nr_planes = 3;
if (0)
- print_triangle(setup, v1, v2, v3);
+ lp_setup_print_triangle(setup, v0, v1, v2);
if (setup->scissor_test) {
nr_planes = 7;
@@ -430,38 +208,73 @@ do_triangle_ccw(struct lp_setup_context *setup,
nr_planes = 3;
}
+ /* x/y positions in fixed point */
+ x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset);
+ x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset);
+ x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset);
+ y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset);
+ y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset);
+ y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset);
+
+
+ /* Bounding rectangle (in pixels) */
+ {
+ /* Yes this is necessary to accurately calculate bounding boxes
+ * with the two fill-conventions we support. GL (normally) ends
+ * up needing a bottom-left fill convention, which requires
+ * slightly different rounding.
+ */
+ int adj = (setup->pixel_offset != 0) ? 1 : 0;
+
+ bbox.x0 = (MIN3(x[0], x[1], x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER;
+ bbox.x1 = (MAX3(x[0], x[1], x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER;
+ bbox.y0 = (MIN3(y[0], y[1], y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+ bbox.y1 = (MAX3(y[0], y[1], y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+
+ /* Inclusive coordinates:
+ */
+ bbox.x1--;
+ bbox.y1--;
+ }
+
+ if (bbox.x1 < bbox.x0 ||
+ bbox.y1 < bbox.y0) {
+ if (0) debug_printf("empty bounding box\n");
+ LP_COUNT(nr_culled_tris);
+ return;
+ }
+
+ if (!u_rect_test_intersection(&setup->draw_region, &bbox)) {
+ if (0) debug_printf("offscreen\n");
+ LP_COUNT(nr_culled_tris);
+ return;
+ }
+
+ u_rect_find_intersection(&setup->draw_region, &bbox);
- tri = alloc_triangle(scene,
- setup->fs.nr_inputs,
- nr_planes,
- &tri_bytes);
+ tri = lp_setup_alloc_triangle(scene,
+ setup->fs.nr_inputs,
+ nr_planes,
+ &tri_bytes);
if (!tri)
return;
#ifdef DEBUG
- tri->v[0][0] = v1[0][0];
- tri->v[1][0] = v2[0][0];
- tri->v[2][0] = v3[0][0];
- tri->v[0][1] = v1[0][1];
- tri->v[1][1] = v2[0][1];
- tri->v[2][1] = v3[0][1];
+ tri->v[0][0] = v0[0][0];
+ tri->v[1][0] = v1[0][0];
+ tri->v[2][0] = v2[0][0];
+ tri->v[0][1] = v0[0][1];
+ tri->v[1][1] = v1[0][1];
+ tri->v[2][1] = v2[0][1];
#endif
- /* x/y positions in fixed point */
- info.x[0] = subpixel_snap(v1[0][0] - setup->pixel_offset);
- info.x[1] = subpixel_snap(v2[0][0] - setup->pixel_offset);
- info.x[2] = subpixel_snap(v3[0][0] - setup->pixel_offset);
- info.y[0] = subpixel_snap(v1[0][1] - setup->pixel_offset);
- info.y[1] = subpixel_snap(v2[0][1] - setup->pixel_offset);
- info.y[2] = subpixel_snap(v3[0][1] - setup->pixel_offset);
-
- tri->plane[0].dcdy = info.x[0] - info.x[1];
- tri->plane[1].dcdy = info.x[1] - info.x[2];
- tri->plane[2].dcdy = info.x[2] - info.x[0];
+ tri->plane[0].dcdy = x[0] - x[1];
+ tri->plane[1].dcdy = x[1] - x[2];
+ tri->plane[2].dcdy = x[2] - x[0];
- tri->plane[0].dcdx = info.y[0] - info.y[1];
- tri->plane[1].dcdx = info.y[1] - info.y[2];
- tri->plane[2].dcdx = info.y[2] - info.y[0];
+ tri->plane[0].dcdx = y[0] - y[1];
+ tri->plane[1].dcdx = y[1] - y[2];
+ tri->plane[2].dcdx = y[2] - y[0];
area = (tri->plane[0].dcdy * tri->plane[2].dcdx -
tri->plane[2].dcdy * tri->plane[0].dcdx);
@@ -478,57 +291,29 @@ do_triangle_ccw(struct lp_setup_context *setup,
return;
}
- /* Bounding rectangle (in pixels) */
- {
- /* Yes this is necessary to accurately calculate bounding boxes
- * with the two fill-conventions we support. GL (normally) ends
- * up needing a bottom-left fill convention, which requires
- * slightly different rounding.
- */
- int adj = (setup->pixel_offset != 0) ? 1 : 0;
-
- minx = (MIN3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER;
- maxx = (MAX3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER;
- miny = (MIN3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
- maxy = (MAX3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
- }
-
- if (setup->scissor_test) {
- minx = MAX2(minx, setup->scissor.current.minx);
- maxx = MIN2(maxx, setup->scissor.current.maxx);
- miny = MAX2(miny, setup->scissor.current.miny);
- maxy = MIN2(maxy, setup->scissor.current.maxy);
- }
- else {
- minx = MAX2(minx, 0);
- miny = MAX2(miny, 0);
- maxx = MIN2(maxx, scene->fb.width);
- maxy = MIN2(maxy, scene->fb.height);
- }
-
-
- if (miny >= maxy || minx >= maxx) {
- lp_scene_putback_data( scene, tri_bytes );
- LP_COUNT(nr_culled_tris);
- return;
- }
/*
*/
- info.pixel_offset = setup->pixel_offset;
- info.v0 = v1;
- info.v1 = v2;
- info.v2 = v3;
- info.dx01 = info.v0[0][0] - info.v1[0][0];
- info.dx20 = info.v2[0][0] - info.v0[0][0];
- info.dy01 = info.v0[0][1] - info.v1[0][1];
- info.dy20 = info.v2[0][1] - info.v0[0][1];
- info.oneoverarea = 1.0f / (info.dx01 * info.dy20 - info.dx20 * info.dy01);
+ dx01 = v0[0][0] - v1[0][0];
+ dy01 = v0[0][1] - v1[0][1];
+ dx20 = v2[0][0] - v0[0][0];
+ dy20 = v2[0][1] - v0[0][1];
+ oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01);
+
+ info.v0 = v0;
+ info.v1 = v1;
+ info.v2 = v2;
info.frontfacing = frontfacing;
+ info.x0_center = v0[0][0] - setup->pixel_offset;
+ info.y0_center = v0[0][1] - setup->pixel_offset;
+ info.dx01_ooa = dx01 * oneoverarea;
+ info.dx20_ooa = dx20 * oneoverarea;
+ info.dy01_ooa = dy01 * oneoverarea;
+ info.dy20_ooa = dy20 * oneoverarea;
/* Setup parameter interpolants:
*/
- setup_tri_coefficients( setup, tri, &info );
+ lp_setup_tri_coef( setup, &tri->inputs, &info );
tri->inputs.facing = frontfacing ? 1.0F : -1.0F;
tri->inputs.state = setup->fs.stored;
@@ -541,7 +326,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
/* half-edge constants, will be interated over the whole render
* target.
*/
- plane->c = plane->dcdx * info.x[i] - plane->dcdy * info.y[i];
+ plane->c = plane->dcdx * x[i] - plane->dcdy * y[i];
/* correct for top-left vs. bottom-left fill convention.
*
@@ -612,29 +397,43 @@ do_triangle_ccw(struct lp_setup_context *setup,
if (nr_planes == 7) {
tri->plane[3].dcdx = -1;
tri->plane[3].dcdy = 0;
- tri->plane[3].c = 1-minx;
+ tri->plane[3].c = 1-bbox.x0;
tri->plane[3].ei = 0;
tri->plane[3].eo = 1;
tri->plane[4].dcdx = 1;
tri->plane[4].dcdy = 0;
- tri->plane[4].c = maxx;
+ tri->plane[4].c = bbox.x1+1;
tri->plane[4].ei = -1;
tri->plane[4].eo = 0;
tri->plane[5].dcdx = 0;
tri->plane[5].dcdy = 1;
- tri->plane[5].c = 1-miny;
+ tri->plane[5].c = 1-bbox.y0;
tri->plane[5].ei = 0;
tri->plane[5].eo = 1;
tri->plane[6].dcdx = 0;
tri->plane[6].dcdy = -1;
- tri->plane[6].c = maxy;
+ tri->plane[6].c = bbox.y1+1;
tri->plane[6].ei = -1;
tri->plane[6].eo = 0;
}
+ lp_setup_bin_triangle( setup, tri, &bbox, nr_planes );
+}
+
+
+void
+lp_setup_bin_triangle( struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ const struct u_rect *bbox,
+ int nr_planes )
+{
+ struct lp_scene *scene = setup->scene;
+ struct lp_fragment_shader_variant *variant = setup->fs.current.variant;
+ int ix0, ix1, iy0, iy1;
+ int i;
/*
* All fields of 'tri' are now set. The remaining code here is
@@ -643,10 +442,30 @@ do_triangle_ccw(struct lp_setup_context *setup,
/* Convert to tile coordinates, and inclusive ranges:
*/
- ix0 = minx / TILE_SIZE;
- iy0 = miny / TILE_SIZE;
- ix1 = (maxx-1) / TILE_SIZE;
- iy1 = (maxy-1) / TILE_SIZE;
+ if (nr_planes == 3) {
+ int ix0 = bbox->x0 / 16;
+ int iy0 = bbox->y0 / 16;
+ int ix1 = bbox->x1 / 16;
+ int iy1 = bbox->y1 / 16;
+
+ if (iy0 == iy1 && ix0 == ix1)
+ {
+
+ /* Triangle is contained in a single 16x16 block:
+ */
+ int mask = (ix0 & 3) | ((iy0 & 3) << 4);
+
+ lp_scene_bin_command( scene, ix0/4, iy0/4,
+ lp_rast_triangle_3_16,
+ lp_rast_arg_triangle(tri, mask) );
+ return;
+ }
+ }
+
+ ix0 = bbox->x0 / TILE_SIZE;
+ iy0 = bbox->y0 / TILE_SIZE;
+ ix1 = bbox->x1 / TILE_SIZE;
+ iy1 = bbox->y1 / TILE_SIZE;
/*
* Clamp to framebuffer size
@@ -799,9 +618,10 @@ static void triangle_both( struct lp_setup_context *setup,
const float fy = v1[0][1] - v2[0][1];
/* det = cross(e,f).z */
- if (ex * fy - ey * fx < 0.0f)
+ const float det = ex * fy - ey * fx;
+ if (det < 0.0f)
triangle_ccw( setup, v0, v1, v2 );
- else
+ else if (det > 0.0f)
triangle_cw( setup, v0, v1, v2 );
}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index 77bec4640b..edd723f65f 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -74,6 +74,15 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
vs_index = draw_find_shader_output(llvmpipe->draw,
lpfs->info.input_semantic_name[i],
lpfs->info.input_semantic_index[i]);
+ if (vs_index < 0) {
+ /*
+ * This can happen with sprite coordinates - the vertex
+ * shader doesn't need to provide an output as we generate
+ * them internally. However, lets keep pretending that there
+ * is something there to not confuse other code.
+ */
+ vs_index = 0;
+ }
/* This can be pre-computed, except for flatshade:
*/
@@ -125,6 +134,17 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
inputs[i].src_index = vinfo->num_attribs;
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
}
+
+ /* Figure out if we need pointsize as well.
+ */
+ vs_index = draw_find_shader_output(llvmpipe->draw,
+ TGSI_SEMANTIC_PSIZE, 0);
+
+ if (vs_index > 0) {
+ llvmpipe->psize_slot = vinfo->num_attribs;
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
+ }
+
llvmpipe->num_inputs = lpfs->info.num_inputs;
draw_compute_vertex_size(vinfo);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index dbca49a2ef..33c1a49efe 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -808,7 +808,7 @@ generate_variant(struct llvmpipe_context *lp,
variant->list_item_local.base = variant;
variant->no = shader->variants_created++;
- memcpy(&variant->key, key, sizeof *key);
+ memcpy(&variant->key, key, shader->variant_key_size);
if (gallivm_debug & GALLIVM_DEBUG_IR) {
debug_printf("llvmpipe: Creating fragment shader #%u variant #%u:\n",
@@ -840,6 +840,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
const struct pipe_shader_state *templ)
{
struct lp_fragment_shader *shader;
+ int nr_samplers;
shader = CALLOC_STRUCT(lp_fragment_shader);
if (!shader)
@@ -854,6 +855,11 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
/* we need to keep a local copy of the tokens */
shader->base.tokens = tgsi_dup_tokens(templ->tokens);
+ nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
+
+ shader->variant_key_size = Offset(struct lp_fragment_shader_variant_key,
+ sampler[nr_samplers]);
+
if (LP_DEBUG & DEBUG_TGSI) {
unsigned attrib;
debug_printf("llvmpipe: Create fragment shader #%u %p:\n", shader->no, (void *) shader);
@@ -921,7 +927,6 @@ static void
llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
- struct pipe_fence_handle *fence = NULL;
struct lp_fragment_shader *shader = fs;
struct lp_fs_variant_list_item *li;
@@ -934,12 +939,7 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
* Flushing alone might not sufficient we need to wait on it too.
*/
- llvmpipe_flush(pipe, 0, &fence);
-
- if (fence) {
- pipe->screen->fence_finish(pipe->screen, fence, 0);
- pipe->screen->fence_reference(pipe->screen, &fence, NULL);
- }
+ llvmpipe_finish(pipe, __FUNCTION__);
li = first_elem(&shader->variants);
while(!at_end(&shader->variants, li)) {
@@ -1027,7 +1027,7 @@ make_variant_key(struct llvmpipe_context *lp,
{
unsigned i;
- memset(key, 0, sizeof *key);
+ memset(key, 0, shader->variant_key_size);
if (lp->framebuffer.zsbuf) {
if (lp->depth_stencil->depth.enabled) {
@@ -1097,9 +1097,17 @@ make_variant_key(struct llvmpipe_context *lp,
}
}
- for(i = 0; i < PIPE_MAX_SAMPLERS; ++i)
- if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i))
- lp_sampler_static_state(&key->sampler[i], lp->fragment_sampler_views[i], lp->sampler[i]);
+ /* This value will be the same for all the variants of a given shader:
+ */
+ key->nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
+
+ for(i = 0; i < key->nr_samplers; ++i) {
+ if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
+ lp_sampler_static_state(&key->sampler[i],
+ lp->fragment_sampler_views[i],
+ lp->sampler[i]);
+ }
+ }
}
/**
@@ -1118,7 +1126,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
li = first_elem(&shader->variants);
while(!at_end(&shader->variants, li)) {
- if(memcmp(&li->base->key, &key, sizeof key) == 0) {
+ if(memcmp(&li->base->key, &key, shader->variant_key_size) == 0) {
variant = li->base;
break;
}
@@ -1134,19 +1142,14 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
unsigned i;
if (lp->nr_fs_variants >= LP_MAX_SHADER_VARIANTS) {
struct pipe_context *pipe = &lp->pipe;
- struct pipe_fence_handle *fence = NULL;
/*
* XXX: we need to flush the context until we have some sort of reference
* counting in fragment shaders as they may still be binned
* Flushing alone might not be sufficient we need to wait on it too.
*/
- llvmpipe_flush(pipe, 0, &fence);
+ llvmpipe_finish(pipe, __FUNCTION__);
- if (fence) {
- pipe->screen->fence_finish(pipe->screen, fence, 0);
- pipe->screen->fence_reference(pipe->screen, &fence, NULL);
- }
for (i = 0; i < LP_MAX_SHADER_VARIANTS / 4; i++) {
struct lp_fs_variant_list_item *item = last_elem(&lp->fs_variants_list);
remove_shader_variant(lp, item->base);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h
index 37900fc544..33c480010d 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.h
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h
@@ -53,13 +53,10 @@ struct lp_fragment_shader_variant_key
struct pipe_blend_state blend;
enum pipe_format zsbuf_format;
unsigned nr_cbufs:8;
+ unsigned nr_samplers:8; /* actually derivable from just the shader */
unsigned flatshade:1;
unsigned occlusion_count:1;
- struct {
- ubyte colormask;
- } cbuf_blend[PIPE_MAX_COLOR_BUFS];
-
struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS];
};
@@ -97,6 +94,7 @@ struct lp_fragment_shader
struct lp_fs_variant_list_item variants;
/* For debugging/profiling purposes */
+ unsigned variant_key_size;
unsigned no;
unsigned variants_created;
unsigned variants_cached;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
index afd3e0b21c..0bad7320f3 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
@@ -73,7 +73,13 @@ llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, void *handle)
llvmpipe->rasterizer->gl_rasterization_rules);
lp_setup_set_flatshade_first( llvmpipe->setup,
llvmpipe->rasterizer->flatshade_first);
- }
+ lp_setup_set_line_state( llvmpipe->setup,
+ llvmpipe->rasterizer->line_width);
+ lp_setup_set_point_state( llvmpipe->setup,
+ llvmpipe->rasterizer->point_size,
+ llvmpipe->rasterizer->point_size_per_vertex,
+ llvmpipe->rasterizer->sprite_coord_enable);
+ }
llvmpipe->dirty |= LP_NEW_RASTERIZER;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c
index d86e66b4fb..fb29423dd3 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c
@@ -100,7 +100,7 @@ llvmpipe_set_index_buffer(struct pipe_context *pipe,
else
memset(&llvmpipe->index_buffer, 0, sizeof(llvmpipe->index_buffer));
- /* TODO make this more like a state */
+ draw_set_index_buffer(llvmpipe->draw, ib);
}
void
diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c
index f761e82850..63ddc669c2 100644
--- a/src/gallium/drivers/llvmpipe/lp_surface.c
+++ b/src/gallium/drivers/llvmpipe/lp_surface.c
@@ -68,14 +68,16 @@ lp_resource_copy(struct pipe_context *pipe,
0, /* flush_flags */
FALSE, /* read_only */
TRUE, /* cpu_access */
- FALSE); /* do_not_block */
+ FALSE,
+ "blit dst"); /* do_not_block */
llvmpipe_flush_resource(pipe,
src, subsrc.face, subsrc.level,
0, /* flush_flags */
TRUE, /* read_only */
TRUE, /* cpu_access */
- FALSE); /* do_not_block */
+ FALSE,
+ "blit src"); /* do_not_block */
/*
printf("surface copy from %u to %u: %u,%u to %u,%u %u x %u\n",
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 25112c10a6..5832ea2744 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -67,6 +67,7 @@ resource_is_texture(const struct pipe_resource *resource)
return FALSE;
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_3D:
case PIPE_TEXTURE_CUBE:
return TRUE;
@@ -583,7 +584,8 @@ llvmpipe_get_transfer(struct pipe_context *pipe,
0, /* flush_flags */
read_only,
TRUE, /* cpu_access */
- do_not_block)) {
+ do_not_block,
+ "transfer dest")) {
/*
* It would have blocked, but state tracker requested no to.
*/