summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/llvmpipe
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2010-07-26 17:47:59 -0700
committerEric Anholt <eric@anholt.net>2010-07-26 17:53:27 -0700
commitafe125e0a18ac3886c45c7e6b02b122fb2d327b5 (patch)
tree78621707e71154c0b388b0baacffc26432b7e992 /src/gallium/drivers/llvmpipe
parentd64343f1ae84979bd154475badf11af8a9bfc2eb (diff)
parent5403ca79b225605c79f49866a6497c97da53be3b (diff)
Merge remote branch 'origin/master' into glsl2
This pulls in multiple i965 driver fixes which will help ensure better testing coverage during development, and also gets past the conflicts of the src/mesa/shader -> src/mesa/program move. Conflicts: src/mesa/Makefile src/mesa/main/shaderapi.c src/mesa/main/shaderobj.h
Diffstat (limited to 'src/gallium/drivers/llvmpipe')
-rw-r--r--src/gallium/drivers/llvmpipe/.gitignore2
-rw-r--r--src/gallium/drivers/llvmpipe/Makefile5
-rw-r--r--src/gallium/drivers/llvmpipe/SConscript18
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c41
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.h1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_draw_arrays.c132
-rw-r--r--src/gallium/drivers/llvmpipe/lp_fence.c9
-rw-r--r--src/gallium/drivers/llvmpipe/lp_fence.h18
-rw-r--r--src/gallium/drivers/llvmpipe/lp_flush.c14
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.c16
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.h26
-rw-r--r--src/gallium/drivers/llvmpipe/lp_memory.c45
-rw-r--r--src/gallium/drivers/llvmpipe/lp_memory.h40
-rw-r--r--src/gallium/drivers/llvmpipe/lp_perf.c39
-rw-r--r--src/gallium/drivers/llvmpipe/lp_perf.h1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_query.c32
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.c208
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.h85
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_priv.h87
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri.c179
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h238
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene.c61
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene.h1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c26
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c148
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.h3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_context.h9
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_tri.c616
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_vbuf.c4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state.h6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_derived.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c286
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.h1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_sampler.c84
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_so.c1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_surface.c46
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_conv.c20
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_format.c240
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_round.c277
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_sincos.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_texture.c147
-rw-r--r--src/gallium/drivers/llvmpipe/lp_texture.h11
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_image.c4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_shuffle_mask.py32
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_soa.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_soa.py247
48 files changed, 2334 insertions, 1186 deletions
diff --git a/src/gallium/drivers/llvmpipe/.gitignore b/src/gallium/drivers/llvmpipe/.gitignore
index a1b6f56e0d..6ebd2b8a63 100644
--- a/src/gallium/drivers/llvmpipe/.gitignore
+++ b/src/gallium/drivers/llvmpipe/.gitignore
@@ -3,3 +3,5 @@ lp_test_blend
lp_test_conv
lp_test_format
lp_test_printf
+lp_test_round
+lp_test_sincos
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index ee28179c30..2892b62920 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -18,6 +18,7 @@ C_SOURCES = \
lp_fence.c \
lp_flush.c \
lp_jit.c \
+ lp_memory.c \
lp_perf.c \
lp_query.c \
lp_rast.c \
@@ -53,8 +54,12 @@ PROGS := lp_test_format \
lp_test_blend \
lp_test_conv \
lp_test_printf \
+ lp_test_round \
lp_test_sincos
+# Need this for the lp_test_*.o files
+CLEAN_EXTRA = *.o
+
lp_test_sincos.o : sse_mathfun.h
PROGS_DEPS := ../../auxiliary/libgallium.a
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index a1ef71da89..fd6ba1561e 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -1,3 +1,5 @@
+import distutils.version
+
Import('*')
if not env['llvm']:
@@ -23,6 +25,16 @@ env.Depends('lp_tile_soa.c', [
'#src/gallium/auxiliary/util/u_format_pack.py',
])
+
+# Only enable SSSE3 for lp_tile_soa_sse3.c
+ssse3_env = env.Clone()
+if env['gcc'] \
+ and distutils.version.LooseVersion(env['CCVERSION']) >= distutils.version.LooseVersion('4.3') \
+ and env['machine'] in ('x86', 'x86_64') :
+ ssse3_env.Append(CCFLAGS = ['-mssse3'])
+lp_tile_soa_os = ssse3_env.SharedObject('lp_tile_soa.c')
+
+
llvmpipe = env.ConvenienceLibrary(
target = 'llvmpipe',
source = [
@@ -38,6 +50,7 @@ llvmpipe = env.ConvenienceLibrary(
'lp_fence.c',
'lp_flush.c',
'lp_jit.c',
+ 'lp_memory.c',
'lp_perf.c',
'lp_query.c',
'lp_rast.c',
@@ -65,7 +78,7 @@ llvmpipe = env.ConvenienceLibrary(
'lp_tex_sample.c',
'lp_texture.c',
'lp_tile_image.c',
- 'lp_tile_soa.c',
+ lp_tile_soa_os,
])
@@ -82,6 +95,9 @@ if env['platform'] != 'embedded':
'sincos',
]
+ if not msvc:
+ tests.append('round')
+
for test in tests:
target = env.Program(
target = 'lp_test_' + test,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
index 70d08e71f6..09e9833057 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -190,30 +190,27 @@ lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
enum lp_build_blend_swizzle rgb_swizzle,
unsigned alpha_swizzle)
{
- if(rgb == alpha) {
- if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA)
- return rgb;
- if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA)
- return lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle);
+ LLVMValueRef swizzled_rgb;
+
+ switch (rgb_swizzle) {
+ case LP_BUILD_BLEND_SWIZZLE_RGBA:
+ swizzled_rgb = rgb;
+ break;
+ case LP_BUILD_BLEND_SWIZZLE_AAAA:
+ swizzled_rgb = lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle);
+ break;
+ default:
+ assert(0);
+ swizzled_rgb = bld->base.undef;
}
- else {
- if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) {
- boolean cond[4] = {0, 0, 0, 0};
- cond[alpha_swizzle] = 1;
- return lp_build_select_aos(&bld->base, alpha, rgb, cond);
- }
- if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) {
- unsigned char swizzle[4];
- swizzle[0] = alpha_swizzle;
- swizzle[1] = alpha_swizzle;
- swizzle[2] = alpha_swizzle;
- swizzle[3] = alpha_swizzle;
- swizzle[alpha_swizzle] += 4;
- return lp_build_swizzle2_aos(&bld->base, rgb, alpha, swizzle);
- }
+
+ if (rgb != alpha) {
+ boolean cond[4] = {0, 0, 0, 0};
+ cond[alpha_swizzle] = 1;
+ swizzled_rgb = lp_build_select_aos(&bld->base, alpha, swizzled_rgb, cond);
}
- assert(0);
- return bld->base.undef;
+
+ return swizzled_rgb;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index 90d2b26f9f..78744da500 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -261,7 +261,7 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
const unsigned interp = bld->interp[attrib];
for(chan = 0; chan < NUM_CHANNELS; ++chan) {
if(mask & (1 << chan)) {
- LLVMValueRef a = coeff_bld->undef;
+ LLVMValueRef a;
if (interp == LP_INTERP_CONSTANT ||
interp == LP_INTERP_FACING) {
a = bld->a[attrib][chan];
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index 986e604ce7..b2643ab33c 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -83,6 +83,7 @@ struct llvmpipe_context {
int so_count[PIPE_MAX_SO_BUFFERS];
int num_buffers;
} so_target;
+ struct pipe_resource *mapped_vs_tex[PIPE_MAX_VERTEX_SAMPLERS];
unsigned num_samplers;
unsigned num_fragment_sampler_views;
diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
index 98780d7631..625d0c8a8c 100644
--- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
+++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
@@ -43,18 +43,23 @@
/**
- * Draw vertex arrays, with optional indexing.
+ * Draw vertex arrays, with optional indexing, optional instancing.
+ * All the other drawing functions are implemented in terms of this function.
* Basically, map the vertex buffers (and drawing surfaces), then hand off
* the drawing to the 'draw' module.
*/
static void
-llvmpipe_draw_range_elements(struct pipe_context *pipe,
- struct pipe_resource *indexBuffer,
- unsigned indexSize,
- int indexBias,
- unsigned min_index,
- unsigned max_index,
- unsigned mode, unsigned start, unsigned count)
+llvmpipe_draw_range_elements_instanced(struct pipe_context *pipe,
+ struct pipe_resource *indexBuffer,
+ unsigned indexSize,
+ int indexBias,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount)
{
struct llvmpipe_context *lp = llvmpipe_context(pipe);
struct draw_context *draw = lp->draw;
@@ -74,9 +79,11 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
/* Map index buffer, if present */
if (indexBuffer) {
void *mapped_indexes = llvmpipe_resource_data(indexBuffer);
- draw_set_mapped_element_buffer_range(draw, indexSize, indexBias,
- min_index,
- max_index,
+ draw_set_mapped_element_buffer_range(draw,
+ indexSize,
+ indexBias,
+ minIndex,
+ maxIndex,
mapped_indexes);
}
else {
@@ -84,9 +91,13 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
draw_set_mapped_element_buffer_range(draw, 0, 0, start,
start + count - 1, NULL);
}
+ llvmpipe_prepare_vertex_sampling(lp,
+ lp->num_vertex_sampler_views,
+ lp->vertex_sampler_views);
/* draw! */
- draw_arrays(draw, mode, start, count);
+ draw_arrays_instanced(draw, mode, start, count,
+ startInstance, instanceCount);
/*
* unmap vertex/index buffers
@@ -97,6 +108,7 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
if (indexBuffer) {
draw_set_mapped_element_buffer(draw, 0, 0, NULL);
}
+ llvmpipe_cleanup_vertex_sampling(lp);
/*
* TODO: Flush only when a user vertex/index buffer is present
@@ -108,24 +120,102 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
static void
+llvmpipe_draw_arrays_instanced(struct pipe_context *pipe,
+ unsigned mode,
+ unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount)
+{
+ llvmpipe_draw_range_elements_instanced(pipe,
+ NULL, /* no indexBuffer */
+ 0, 0, /* indexSize, indexBias */
+ 0, ~0, /* minIndex, maxIndex */
+ mode,
+ start,
+ count,
+ startInstance,
+ instanceCount);
+}
+
+
+static void
+llvmpipe_draw_elements_instanced(struct pipe_context *pipe,
+ struct pipe_resource *indexBuffer,
+ unsigned indexSize,
+ int indexBias,
+ unsigned mode,
+ unsigned start,
+ unsigned count,
+ unsigned startInstance,
+ unsigned instanceCount)
+{
+ llvmpipe_draw_range_elements_instanced(pipe,
+ indexBuffer,
+ indexSize, indexBias,
+ 0, ~0, /* minIndex, maxIndex */
+ mode,
+ start,
+ count,
+ startInstance,
+ instanceCount);
+}
+
+
+static void
llvmpipe_draw_elements(struct pipe_context *pipe,
struct pipe_resource *indexBuffer,
unsigned indexSize,
int indexBias,
- unsigned mode, unsigned start, unsigned count)
+ unsigned mode,
+ unsigned start,
+ unsigned count)
+{
+ llvmpipe_draw_range_elements_instanced(pipe,
+ indexBuffer,
+ indexSize, indexBias,
+ 0, 0xffffffff, /* min, maxIndex */
+ mode, start, count,
+ 0, /* startInstance */
+ 1); /* instanceCount */
+}
+
+
+static void
+llvmpipe_draw_range_elements(struct pipe_context *pipe,
+ struct pipe_resource *indexBuffer,
+ unsigned indexSize,
+ int indexBias,
+ unsigned min_index,
+ unsigned max_index,
+ unsigned mode,
+ unsigned start,
+ unsigned count)
{
- llvmpipe_draw_range_elements( pipe, indexBuffer,
- indexSize, indexBias,
- 0, 0xffffffff,
- mode, start, count );
+ llvmpipe_draw_range_elements_instanced(pipe,
+ indexBuffer,
+ indexSize, indexBias,
+ min_index, max_index,
+ mode, start, count,
+ 0, /* startInstance */
+ 1); /* instanceCount */
}
static void
-llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
- unsigned start, unsigned count)
+llvmpipe_draw_arrays(struct pipe_context *pipe,
+ unsigned mode,
+ unsigned start,
+ unsigned count)
{
- llvmpipe_draw_elements(pipe, NULL, 0, 0, mode, start, count);
+ llvmpipe_draw_range_elements_instanced(pipe,
+ NULL, /* indexBuffer */
+ 0, /* indexSize */
+ 0, /* indexBias */
+ 0, ~0, /* min, maxIndex */
+ mode, start, count,
+ 0, /* startInstance */
+ 1); /* instanceCount */
}
@@ -135,4 +225,6 @@ llvmpipe_init_draw_funcs(struct llvmpipe_context *llvmpipe)
llvmpipe->pipe.draw_arrays = llvmpipe_draw_arrays;
llvmpipe->pipe.draw_elements = llvmpipe_draw_elements;
llvmpipe->pipe.draw_range_elements = llvmpipe_draw_range_elements;
+ llvmpipe->pipe.draw_arrays_instanced = llvmpipe_draw_arrays_instanced;
+ llvmpipe->pipe.draw_elements_instanced = llvmpipe_draw_elements_instanced;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c b/src/gallium/drivers/llvmpipe/lp_fence.c
index 75d8d2b825..f9805e5d68 100644
--- a/src/gallium/drivers/llvmpipe/lp_fence.c
+++ b/src/gallium/drivers/llvmpipe/lp_fence.c
@@ -28,7 +28,6 @@
#include "pipe/p_screen.h"
#include "util/u_memory.h"
-#include "util/u_inlines.h"
#include "lp_debug.h"
#include "lp_fence.h"
@@ -59,7 +58,7 @@ lp_fence_create(unsigned rank)
/** Destroy a fence. Called when refcount hits zero. */
-static void
+void
lp_fence_destroy(struct lp_fence *fence)
{
pipe_mutex_destroy(fence->mutex);
@@ -77,12 +76,10 @@ llvmpipe_fence_reference(struct pipe_screen *screen,
struct pipe_fence_handle **ptr,
struct pipe_fence_handle *fence)
{
- struct lp_fence *old = (struct lp_fence *) *ptr;
+ struct lp_fence **old = (struct lp_fence **) ptr;
struct lp_fence *f = (struct lp_fence *) fence;
- if (pipe_reference(&old->reference, &f->reference)) {
- lp_fence_destroy(old);
- }
+ lp_fence_reference(old, f);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_fence.h b/src/gallium/drivers/llvmpipe/lp_fence.h
index d9270f5784..13358fb99f 100644
--- a/src/gallium/drivers/llvmpipe/lp_fence.h
+++ b/src/gallium/drivers/llvmpipe/lp_fence.h
@@ -32,6 +32,7 @@
#include "os/os_thread.h"
#include "pipe/p_state.h"
+#include "util/u_inlines.h"
struct pipe_screen;
@@ -61,4 +62,21 @@ void
llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen);
+void
+lp_fence_destroy(struct lp_fence *fence);
+
+static INLINE void
+lp_fence_reference(struct lp_fence **ptr,
+ struct lp_fence *f)
+{
+ struct lp_fence *old = *ptr;
+
+ if (pipe_reference(&old->reference, &f->reference)) {
+ lp_fence_destroy(old);
+ }
+
+ *ptr = f;
+}
+
+
#endif /* LP_FENCE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c
index 0cd288bb73..845292f4ab 100644
--- a/src/gallium/drivers/llvmpipe/lp_flush.c
+++ b/src/gallium/drivers/llvmpipe/lp_flush.c
@@ -40,27 +40,19 @@
/**
* \param flags bitmask of PIPE_FLUSH_x flags
- * \param fence if non-null, returns pointer to a fench which can be waited on
+ * \param fence if non-null, returns pointer to a fence which can be waited on
*/
void
llvmpipe_flush( struct pipe_context *pipe,
- unsigned flags,
+ unsigned flags,
struct pipe_fence_handle **fence )
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
draw_flush(llvmpipe->draw);
- if (fence) {
- /* if we're going to flush the setup/rasterization modules, emit
- * a fence.
- * XXX this (and the code below) may need fine tuning...
- */
- *fence = lp_setup_fence( llvmpipe->setup );
- }
-
/* ask the setup module to flush */
- lp_setup_flush(llvmpipe->setup, flags);
+ lp_setup_flush(llvmpipe->setup, flags, fence);
/* Enable to dump BMPs of the color/depth buffers each frame */
if (0) {
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index 23aa34ddec..8e6dfb293d 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -103,10 +103,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen)
elem_types[LP_JIT_CTX_ALPHA_REF] = LLVMFloatType();
elem_types[LP_JIT_CTX_STENCIL_REF_FRONT] = LLVMInt32Type();
elem_types[LP_JIT_CTX_STENCIL_REF_BACK] = LLVMInt32Type();
- elem_types[LP_JIT_CTX_SCISSOR_XMIN] = LLVMFloatType();
- elem_types[LP_JIT_CTX_SCISSOR_YMIN] = LLVMFloatType();
- elem_types[LP_JIT_CTX_SCISSOR_XMAX] = LLVMFloatType();
- elem_types[LP_JIT_CTX_SCISSOR_YMAX] = LLVMFloatType();
elem_types[LP_JIT_CTX_BLEND_COLOR] = LLVMPointerType(LLVMInt8Type(), 0);
elem_types[LP_JIT_CTX_TEXTURES] = LLVMArrayType(texture_type,
PIPE_MAX_SAMPLERS);
@@ -125,18 +121,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen)
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, stencil_ref_back,
screen->target, context_type,
LP_JIT_CTX_STENCIL_REF_BACK);
- LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_xmin,
- screen->target, context_type,
- LP_JIT_CTX_SCISSOR_XMIN);
- LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_ymin,
- screen->target, context_type,
- LP_JIT_CTX_SCISSOR_YMIN);
- LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_xmax,
- screen->target, context_type,
- LP_JIT_CTX_SCISSOR_XMAX);
- LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_ymax,
- screen->target, context_type,
- LP_JIT_CTX_SCISSOR_YMAX);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color,
screen->target, context_type,
LP_JIT_CTX_BLEND_COLOR);
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
index 8d06e65725..c94189413a 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -89,9 +89,6 @@ struct lp_jit_context
uint32_t stencil_ref_front, stencil_ref_back;
- /** floats, not ints */
- float scissor_xmin, scissor_ymin, scissor_xmax, scissor_ymax;
-
/* FIXME: store (also?) in floats */
uint8_t *blend_color;
@@ -108,10 +105,6 @@ enum {
LP_JIT_CTX_ALPHA_REF,
LP_JIT_CTX_STENCIL_REF_FRONT,
LP_JIT_CTX_STENCIL_REF_BACK,
- LP_JIT_CTX_SCISSOR_XMIN,
- LP_JIT_CTX_SCISSOR_YMIN,
- LP_JIT_CTX_SCISSOR_XMAX,
- LP_JIT_CTX_SCISSOR_YMAX,
LP_JIT_CTX_BLEND_COLOR,
LP_JIT_CTX_TEXTURES,
LP_JIT_CTX_COUNT
@@ -130,18 +123,6 @@ enum {
#define lp_jit_context_stencil_ref_back_value(_builder, _ptr) \
lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_STENCIL_REF_BACK, "stencil_ref_back")
-#define lp_jit_context_scissor_xmin_value(_builder, _ptr) \
- lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_XMIN, "scissor_xmin")
-
-#define lp_jit_context_scissor_ymin_value(_builder, _ptr) \
- lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_YMIN, "scissor_ymin")
-
-#define lp_jit_context_scissor_xmax_value(_builder, _ptr) \
- lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_XMAX, "scissor_xmax")
-
-#define lp_jit_context_scissor_ymax_value(_builder, _ptr) \
- lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_YMAX, "scissor_ymax")
-
#define lp_jit_context_blend_color(_builder, _ptr) \
lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_BLEND_COLOR, "blend_color")
@@ -160,12 +141,7 @@ typedef void
const void *dady,
uint8_t **color,
void *depth,
- const int32_t c1,
- const int32_t c2,
- const int32_t c3,
- const int32_t *step1,
- const int32_t *step2,
- const int32_t *step3,
+ uint32_t mask,
uint32_t *counter);
diff --git a/src/gallium/drivers/llvmpipe/lp_memory.c b/src/gallium/drivers/llvmpipe/lp_memory.c
new file mode 100644
index 0000000000..0f55d4a80a
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_memory.c
@@ -0,0 +1,45 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_debug.h"
+#include "lp_limits.h"
+#include "lp_memory.h"
+
+/**
+ * 32bpp RGBA swizzled tiles. One for for each thread and each
+ * possible colorbuf. Adds up to quite a bit 8*8*64*64*4 == 1MB.
+ * Several schemes exist to reduce this, such as scaling back the
+ * number of threads or using a smaller tilesize when multiple
+ * colorbuffers are bound.
+ */
+PIPE_ALIGN_VAR(16) uint8_t lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4];
+
+
+/* A single dummy tile used in a couple of out-of-memory situations.
+ */
+PIPE_ALIGN_VAR(16) uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4];
+
diff --git a/src/gallium/drivers/llvmpipe/lp_memory.h b/src/gallium/drivers/llvmpipe/lp_memory.h
new file mode 100644
index 0000000000..f7418f5e08
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_memory.h
@@ -0,0 +1,40 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef LP_MEMORY_H
+#define LP_MEMORY_H
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+#include "lp_limits.h"
+
+extern PIPE_ALIGN_VAR(16) uint8_t lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4];
+
+extern PIPE_ALIGN_VAR(16) uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4];
+
+#endif /* LP_MEMORY_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_perf.c b/src/gallium/drivers/llvmpipe/lp_perf.c
index a316597675..083e7e30a5 100644
--- a/src/gallium/drivers/llvmpipe/lp_perf.c
+++ b/src/gallium/drivers/llvmpipe/lp_perf.c
@@ -46,10 +46,10 @@ lp_print_counters(void)
{
if (LP_DEBUG & DEBUG_COUNTERS) {
unsigned total_64, total_16, total_4;
- float p1, p2, p3;
+ float p1, p2, p3, p4;
- debug_printf("llvmpipe: nr_triangles: %9u\n", lp_count.nr_tris);
- debug_printf("llvmpipe: nr_culled_triangles: %9u\n", lp_count.nr_culled_tris);
+ debug_printf("llvmpipe: nr_triangles: %9u\n", lp_count.nr_tris);
+ debug_printf("llvmpipe: nr_culled_triangles: %9u\n", lp_count.nr_culled_tris);
total_64 = (lp_count.nr_empty_64 +
lp_count.nr_fully_covered_64 +
@@ -58,10 +58,13 @@ lp_print_counters(void)
p1 = 100.0 * (float) lp_count.nr_empty_64 / (float) total_64;
p2 = 100.0 * (float) lp_count.nr_fully_covered_64 / (float) total_64;
p3 = 100.0 * (float) lp_count.nr_partially_covered_64 / (float) total_64;
+ p4 = 100.0 * (float) lp_count.nr_shade_opaque_64 / (float) total_64;
- debug_printf("llvmpipe: nr_empty_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_64, p1, total_64);
- debug_printf("llvmpipe: nr_fully_covered_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_fully_covered_64, p2, total_64);
- debug_printf("llvmpipe: nr_partially_covered_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_partially_covered_64, p3, total_64);
+ debug_printf("llvmpipe: nr_64x64: %9u\n", total_64);
+ debug_printf("llvmpipe: nr_fully_covered_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_64, p2, total_64);
+ debug_printf("llvmpipe: nr_shade_opaque_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_shade_opaque_64, p4, total_64);
+ debug_printf("llvmpipe: nr_partially_covered_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_64, p3, total_64);
+ debug_printf("llvmpipe: nr_empty_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_64, p1, total_64);
total_16 = (lp_count.nr_empty_16 +
lp_count.nr_fully_covered_16 +
@@ -71,25 +74,27 @@ lp_print_counters(void)
p2 = 100.0 * (float) lp_count.nr_fully_covered_16 / (float) total_16;
p3 = 100.0 * (float) lp_count.nr_partially_covered_16 / (float) total_16;
- debug_printf("llvmpipe: nr_empty_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_16, p1, total_16);
- debug_printf("llvmpipe: nr_fully_covered_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_fully_covered_16, p2, total_16);
- debug_printf("llvmpipe: nr_partially_covered_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_partially_covered_16, p3, total_16);
+ debug_printf("llvmpipe: nr_16x16: %9u\n", total_16);
+ debug_printf("llvmpipe: nr_fully_covered_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_16, p2, total_16);
+ debug_printf("llvmpipe: nr_partially_covered_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_16, p3, total_16);
+ debug_printf("llvmpipe: nr_empty_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_16, p1, total_16);
total_4 = (lp_count.nr_empty_4 + lp_count.nr_non_empty_4);
p1 = 100.0 * (float) lp_count.nr_empty_4 / (float) total_4;
p2 = 100.0 * (float) lp_count.nr_non_empty_4 / (float) total_4;
- debug_printf("llvmpipe: nr_empty_4x4: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_4, p1, total_4);
- debug_printf("llvmpipe: nr_non_empty_4x4: %9u (%2.0f%% of %u)\n", lp_count.nr_non_empty_4, p2, total_4);
+ debug_printf("llvmpipe: nr_4x4: %9u\n", total_4);
+ debug_printf("llvmpipe: nr_empty_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_4, p1, total_4);
+ debug_printf("llvmpipe: nr_non_empty_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_non_empty_4, p2, total_4);
- debug_printf("llvmpipe: nr_color_tile_clear: %9u\n", lp_count.nr_color_tile_clear);
- debug_printf("llvmpipe: nr_color_tile_load: %9u\n", lp_count.nr_color_tile_load);
- debug_printf("llvmpipe: nr_color_tile_store: %9u\n", lp_count.nr_color_tile_store);
+ debug_printf("llvmpipe: nr_color_tile_clear: %9u\n", lp_count.nr_color_tile_clear);
+ debug_printf("llvmpipe: nr_color_tile_load: %9u\n", lp_count.nr_color_tile_load);
+ debug_printf("llvmpipe: nr_color_tile_store: %9u\n", lp_count.nr_color_tile_store);
- debug_printf("llvmpipe: nr_llvm_compiles: %u\n", lp_count.nr_llvm_compiles);
- debug_printf("llvmpipe: total LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0);
- debug_printf("llvmpipe: average LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0 / lp_count.nr_llvm_compiles);
+ debug_printf("llvmpipe: nr_llvm_compiles: %u\n", lp_count.nr_llvm_compiles);
+ debug_printf("llvmpipe: total LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0);
+ debug_printf("llvmpipe: average LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0 / lp_count.nr_llvm_compiles);
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_perf.h b/src/gallium/drivers/llvmpipe/lp_perf.h
index a9629dae3c..4774f64550 100644
--- a/src/gallium/drivers/llvmpipe/lp_perf.h
+++ b/src/gallium/drivers/llvmpipe/lp_perf.h
@@ -44,6 +44,7 @@ struct lp_counters
unsigned nr_empty_64;
unsigned nr_fully_covered_64;
unsigned nr_partially_covered_64;
+ unsigned nr_shade_opaque_64;
unsigned nr_empty_16;
unsigned nr_fully_covered_16;
unsigned nr_partially_covered_16;
diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c
index c902c04684..02eeaf6487 100644
--- a/src/gallium/drivers/llvmpipe/lp_query.c
+++ b/src/gallium/drivers/llvmpipe/lp_query.c
@@ -48,7 +48,7 @@ static struct llvmpipe_query *llvmpipe_query( struct pipe_query *p )
static struct pipe_query *
llvmpipe_create_query(struct pipe_context *pipe,
- unsigned type)
+ unsigned type)
{
struct llvmpipe_query *pq;
@@ -67,6 +67,16 @@ static void
llvmpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
{
struct llvmpipe_query *pq = llvmpipe_query(q);
+ /* query might still be in process if we never waited for the result */
+ if (!pq->done) {
+ struct pipe_fence_handle *fence = NULL;
+ llvmpipe_flush(pipe, 0, &fence);
+ if (fence) {
+ pipe->screen->fence_finish(pipe->screen, fence, 0);
+ pipe->screen->fence_reference(pipe->screen, &fence, NULL);
+ }
+ }
+
pipe_mutex_destroy(pq->mutex);
FREE(pq);
}
@@ -74,16 +84,26 @@ llvmpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
static boolean
llvmpipe_get_query_result(struct pipe_context *pipe,
- struct pipe_query *q,
- boolean wait,
- void *vresult)
+ struct pipe_query *q,
+ boolean wait,
+ void *vresult)
{
- struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe );
struct llvmpipe_query *pq = llvmpipe_query(q);
uint64_t *result = (uint64_t *)vresult;
if (!pq->done) {
- lp_setup_flush(llvmpipe->setup, 0);
+ if (wait) {
+ struct pipe_fence_handle *fence = NULL;
+ llvmpipe_flush(pipe, 0, &fence);
+ if (fence) {
+ pipe->screen->fence_finish(pipe->screen, fence, 0);
+ pipe->screen->fence_reference(pipe->screen, &fence, NULL);
+ }
+ }
+ /* this is a bit inconsequent but should be ok */
+ else {
+ llvmpipe_flush(pipe, 0, NULL);
+ }
}
if (pq->done) {
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index 50e44dcb2b..654f4ea48e 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -28,6 +28,7 @@
#include <limits.h>
#include "util/u_memory.h"
#include "util/u_math.h"
+#include "util/u_rect.h"
#include "util/u_surface.h"
#include "lp_scene_queue.h"
@@ -66,7 +67,7 @@ lp_rast_begin( struct lp_rasterizer *rast,
cbuf->level,
cbuf->zslice,
LP_TEX_USAGE_READ_WRITE,
- LP_TEX_LAYOUT_NONE);
+ LP_TEX_LAYOUT_LINEAR);
}
if (fb->zsbuf) {
@@ -81,7 +82,6 @@ lp_rast_begin( struct lp_rasterizer *rast,
zsbuf->zslice,
LP_TEX_USAGE_READ_WRITE,
LP_TEX_LAYOUT_NONE);
- assert(rast->zsbuf.map);
}
lp_scene_bin_iter_begin( scene );
@@ -137,7 +137,6 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
struct lp_rasterizer *rast = task->rast;
struct lp_scene *scene = rast->curr_scene;
enum lp_texture_usage usage;
- unsigned buf;
LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
@@ -147,24 +146,8 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
task->x = x;
task->y = y;
- if (scene->has_color_clear)
- usage = LP_TEX_USAGE_WRITE_ALL;
- else
- usage = LP_TEX_USAGE_READ_WRITE;
-
- /* get pointers to color tile(s) */
- for (buf = 0; buf < rast->state.nr_cbufs; buf++) {
- struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf];
- struct llvmpipe_resource *lpt;
- assert(cbuf);
- lpt = llvmpipe_resource(cbuf->texture);
- task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt,
- cbuf->face + cbuf->zslice,
- cbuf->level,
- usage,
- x, y);
- assert(task->color_tiles[buf]);
- }
+ /* reset pointers to color tile(s) */
+ memset(task->color_tiles, 0, sizeof(task->color_tiles));
/* get pointer to depth/stencil tile */
{
@@ -188,7 +171,7 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
/* Get actual pointer to the tile data. Note that depth/stencil
* data is tiled differently than color data.
*/
- task->depth_tile = lp_rast_get_depth_block_pointer(rast, x, y);
+ task->depth_tile = lp_rast_get_depth_block_pointer(task, x, y);
assert(task->depth_tile);
}
@@ -223,7 +206,8 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
clear_color[2] == clear_color[3]) {
/* clear to grayscale value {x, x, x, x} */
for (i = 0; i < rast->state.nr_cbufs; i++) {
- uint8_t *ptr = task->color_tiles[i];
+ uint8_t *ptr =
+ lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
memset(ptr, clear_color[0], TILE_SIZE * TILE_SIZE * 4);
}
}
@@ -235,7 +219,8 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
*/
const unsigned chunk = TILE_SIZE / 4;
for (i = 0; i < rast->state.nr_cbufs; i++) {
- uint8_t *c = task->color_tiles[i];
+ uint8_t *c =
+ lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
unsigned j;
for (j = 0; j < 4 * TILE_SIZE; j++) {
@@ -286,8 +271,6 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
dst = task->depth_tile;
- assert(dst == lp_rast_get_depth_block_pointer(rast, task->x, task->y));
-
switch (block_size) {
case 1:
memset(dst, (uint8_t) clear_value, height * width);
@@ -376,8 +359,8 @@ lp_rast_load_color(struct lp_rasterizer_task *task,
* This is a bin command which is stored in all bins.
*/
void
-lp_rast_store_color( struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
+lp_rast_store_linear_color( struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
{
struct lp_rasterizer *rast = task->rast;
struct lp_scene *scene = rast->curr_scene;
@@ -387,30 +370,20 @@ lp_rast_store_color( struct lp_rasterizer_task *task,
struct pipe_surface *cbuf = scene->fb.cbufs[buf];
const unsigned face = cbuf->face, level = cbuf->level;
struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture);
- /* this will convert the tiled data to linear if needed */
- (void) llvmpipe_get_texture_tile_linear(lpt, face, level,
- LP_TEX_USAGE_READ,
- task->x, task->y);
- }
-}
-
-/**
- * This is a bin command called during bin processing.
- */
-void
-lp_rast_set_state(struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
-{
- const struct lp_rast_state *state = arg.set_state;
+ if (!task->color_tiles[buf])
+ continue;
- LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state);
-
- /* just set the current state pointer for this rasterizer */
- task->current_state = state;
+ llvmpipe_unswizzle_cbuf_tile(lpt,
+ face,
+ level,
+ task->x, task->y,
+ task->color_tiles[buf]);
+ }
}
+
/**
* Run the shader on all blocks in a tile. This is used when a tile is
* completely contained inside a triangle.
@@ -421,8 +394,8 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
struct lp_rasterizer *rast = task->rast;
- const struct lp_rast_state *state = task->current_state;
const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
+ const struct lp_rast_state *state = inputs->state;
struct lp_fragment_shader_variant *variant = state->variant;
const unsigned tile_x = task->x, tile_y = task->y;
unsigned x, y;
@@ -442,36 +415,60 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
tile_x + x, tile_y + y);
/* depth buffer */
- depth = lp_rast_get_depth_block_pointer(rast, tile_x + x, tile_y + y);
+ depth = lp_rast_get_depth_block_pointer(task, tile_x + x, tile_y + y);
/* run shader on 4x4 block */
variant->jit_function[RAST_WHOLE]( &state->jit_context,
- tile_x + x, tile_y + y,
- inputs->facing,
- inputs->a0,
- inputs->dadx,
- inputs->dady,
- color,
- depth,
- INT_MIN, INT_MIN, INT_MIN,
- NULL, NULL, NULL, &task->vis_counter);
+ tile_x + x, tile_y + y,
+ inputs->facing,
+ inputs->a0,
+ inputs->dadx,
+ inputs->dady,
+ color,
+ depth,
+ 0xffff,
+ &task->vis_counter);
}
}
}
/**
- * Compute shading for a 4x4 block of pixels.
+ * Run the shader on all blocks in a tile. This is used when a tile is
+ * completely contained inside a triangle, and the shader is opaque.
+ * This is a bin command called during bin processing.
+ */
+void
+lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ struct lp_rasterizer *rast = task->rast;
+ unsigned i;
+
+ LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
+
+ /* this will prevent converting the layout from tiled to linear */
+ for (i = 0; i < rast->state.nr_cbufs; i++) {
+ (void)lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
+ }
+
+ lp_rast_shade_tile(task, arg);
+}
+
+
+/**
+ * Compute shading for a 4x4 block of pixels inside a triangle.
* This is a bin command called during bin processing.
* \param x X position of quad in window coords
* \param y Y position of quad in window coords
*/
-void lp_rast_shade_quads( struct lp_rasterizer_task *task,
- const struct lp_rast_shader_inputs *inputs,
- unsigned x, unsigned y,
- int32_t c1, int32_t c2, int32_t c3)
+void
+lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
+ const struct lp_rast_shader_inputs *inputs,
+ unsigned x, unsigned y,
+ unsigned mask)
{
- const struct lp_rast_state *state = task->current_state;
+ const struct lp_rast_state *state = inputs->state;
struct lp_fragment_shader_variant *variant = state->variant;
struct lp_rasterizer *rast = task->rast;
uint8_t *color[PIPE_MAX_COLOR_BUFS];
@@ -494,32 +491,26 @@ void lp_rast_shade_quads( struct lp_rasterizer_task *task,
}
/* depth buffer */
- depth = lp_rast_get_depth_block_pointer(rast, x, y);
+ depth = lp_rast_get_depth_block_pointer(task, x, y);
assert(lp_check_alignment(state->jit_context.blend_color, 16));
- assert(lp_check_alignment(inputs->step[0], 16));
- assert(lp_check_alignment(inputs->step[1], 16));
- assert(lp_check_alignment(inputs->step[2], 16));
-
/* run shader on 4x4 block */
- variant->jit_function[RAST_EDGE_TEST]( &state->jit_context,
- x, y,
- inputs->facing,
- inputs->a0,
- inputs->dadx,
- inputs->dady,
- color,
- depth,
- c1, c2, c3,
- inputs->step[0],
- inputs->step[1],
- inputs->step[2],
- &task->vis_counter);
+ variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
+ x, y,
+ inputs->facing,
+ inputs->a0,
+ inputs->dadx,
+ inputs->dady,
+ color,
+ depth,
+ mask,
+ &task->vis_counter);
}
+
/**
* Set top row and left column of the tile's pixels to white. For debugging.
*/
@@ -598,6 +589,11 @@ lp_rast_tile_end(struct lp_rasterizer_task *task)
(void) outline_subtiles;
#endif
+ {
+ union lp_rast_cmd_arg dummy = {0};
+ lp_rast_store_linear_color(task, dummy);
+ }
+
/* debug */
memset(task->color_tiles, 0, sizeof(task->color_tiles));
task->depth_tile = NULL;
@@ -627,7 +623,7 @@ void
lp_rast_begin_query(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
- /* Reset the the per-task counter */
+ /* Reset the per-task counter */
task->vis_counter = 0;
}
@@ -715,10 +711,16 @@ static struct {
{
RAST(clear_color),
RAST(clear_zstencil),
- RAST(triangle),
+ RAST(triangle_1),
+ RAST(triangle_2),
+ RAST(triangle_3),
+ RAST(triangle_4),
+ RAST(triangle_5),
+ RAST(triangle_6),
+ RAST(triangle_7),
RAST(shade_tile),
- RAST(set_state),
- RAST(store_color),
+ RAST(shade_tile_opaque),
+ RAST(store_linear_color),
RAST(fence),
RAST(begin_query),
RAST(end_query),
@@ -754,30 +756,8 @@ debug_bin( const struct cmd_bin *bin )
static boolean
is_empty_bin( const struct cmd_bin *bin )
{
- const struct cmd_block *head = bin->commands.head;
- int i;
-
- if (0)
- debug_bin(bin);
-
- /* We emit at most two load-tile commands at the start of the first
- * command block. In addition we seem to emit a couple of
- * set-state commands even in empty bins.
- *
- * As a heuristic, if a bin has more than 4 commands, consider it
- * non-empty.
- */
- if (head->next != NULL ||
- head->count > 4) {
- return FALSE;
- }
-
- for (i = 0; i < head->count; i++)
- if (head->cmd[i] != lp_rast_set_state) {
- return FALSE;
- }
-
- return TRUE;
+ if (0) debug_bin(bin);
+ return bin->commands.head->count == 0;
}
@@ -813,6 +793,10 @@ rasterize_scene(struct lp_rasterizer_task *task,
}
}
#endif
+
+ if (scene->fence) {
+ lp_rast_fence(task, lp_rast_arg_fence(scene->fence));
+ }
}
@@ -983,6 +967,10 @@ lp_rast_create( unsigned num_threads )
/* for synchronizing rasterization threads */
pipe_barrier_init( &rast->barrier, rast->num_threads );
+ memset(lp_swizzled_cbuf, 0, sizeof lp_swizzled_cbuf);
+
+ memset(lp_dummy_tile, 0, sizeof lp_dummy_tile);
+
return rast;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index 80ca68f5a2..eaf2a6f334 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -84,8 +84,7 @@ struct lp_rast_shader_inputs {
float (*dadx)[4];
float (*dady)[4];
- /* edge/step info for 3 edges and 4x4 block of pixels */
- PIPE_ALIGN_VAR(16) int step[3][16];
+ const struct lp_rast_state *state;
};
struct lp_rast_clearzs {
@@ -93,6 +92,22 @@ struct lp_rast_clearzs {
unsigned clearzs_mask;
};
+struct lp_rast_plane {
+ /* one-pixel sized trivial accept offsets for each plane */
+ int ei;
+
+ /* one-pixel sized trivial reject offsets for each plane */
+ int eo;
+
+ /* edge function values at minx,miny ?? */
+ int c;
+
+ int dcdx;
+ int dcdy;
+
+ /* edge/step info for 3 edges and 4x4 block of pixels */
+ const int *step;
+};
/**
* Rasterization information for a triangle known to be in this bin,
@@ -101,35 +116,16 @@ struct lp_rast_clearzs {
* Objects of this type are put into the lp_setup_context::data buffer.
*/
struct lp_rast_triangle {
+ /* inputs for the shader */
+ struct lp_rast_shader_inputs inputs;
+
+ int step[3][16];
+
#ifdef DEBUG
float v[3][2];
#endif
- /* one-pixel sized trivial accept offsets for each plane */
- int ei1;
- int ei2;
- int ei3;
-
- /* one-pixel sized trivial reject offsets for each plane */
- int eo1;
- int eo2;
- int eo3;
-
- /* y deltas for vertex pairs (in fixed pt) */
- int dy12;
- int dy23;
- int dy31;
-
- /* x deltas for vertex pairs (in fixed pt) */
- int dx12;
- int dx23;
- int dx31;
-
- /* edge function values at minx,miny ?? */
- int c1, c2, c3;
-
- /* inputs for the shader */
- PIPE_ALIGN_VAR(16) struct lp_rast_shader_inputs inputs;
+ struct lp_rast_plane plane[7]; /* NOTE: may allocate fewer planes */
};
@@ -153,7 +149,10 @@ lp_rast_finish( struct lp_rasterizer *rast );
union lp_rast_cmd_arg {
const struct lp_rast_shader_inputs *shade_tile;
- const struct lp_rast_triangle *triangle;
+ struct {
+ const struct lp_rast_triangle *tri;
+ unsigned plane_mask;
+ } triangle;
const struct lp_rast_state *set_state;
uint8_t clear_color[4];
const struct lp_rast_clearzs *clear_zstencil;
@@ -173,10 +172,12 @@ lp_rast_arg_inputs( const struct lp_rast_shader_inputs *shade_tile )
}
static INLINE union lp_rast_cmd_arg
-lp_rast_arg_triangle( const struct lp_rast_triangle *triangle )
+lp_rast_arg_triangle( const struct lp_rast_triangle *triangle,
+ unsigned plane_mask)
{
union lp_rast_cmd_arg arg;
- arg.triangle = triangle;
+ arg.triangle.tri = triangle;
+ arg.triangle.plane_mask = plane_mask;
return arg;
}
@@ -226,19 +227,31 @@ void lp_rast_clear_color( struct lp_rasterizer_task *,
void lp_rast_clear_zstencil( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
-void lp_rast_set_state( struct lp_rasterizer_task *,
- const union lp_rast_cmd_arg );
-
-void lp_rast_triangle( struct lp_rasterizer_task *,
- const union lp_rast_cmd_arg );
+void lp_rast_triangle_1( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_2( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_3( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_4( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_5( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_6( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_7( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
void lp_rast_shade_tile( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
+void lp_rast_shade_tile_opaque( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
void lp_rast_fence( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
-void lp_rast_store_color( struct lp_rasterizer_task *,
+void lp_rast_store_linear_color( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index d33dd49f3a..b4a48cfd02 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -31,6 +31,7 @@
#include "os/os_thread.h"
#include "util/u_format.h"
#include "gallivm/lp_bld_debug.h"
+#include "lp_memory.h"
#include "lp_rast.h"
#include "lp_scene.h"
#include "lp_state.h"
@@ -52,8 +53,6 @@ struct lp_rasterizer_task
uint8_t *color_tiles[PIPE_MAX_COLOR_BUFS];
uint8_t *depth_tile;
- const struct lp_rast_state *current_state;
-
/** "back" pointer */
struct lp_rasterizer *rast;
@@ -118,10 +117,12 @@ struct lp_rasterizer
};
-void lp_rast_shade_quads( struct lp_rasterizer_task *task,
- const struct lp_rast_shader_inputs *inputs,
- unsigned x, unsigned y,
- int32_t c1, int32_t c2, int32_t c3);
+void
+lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
+ const struct lp_rast_shader_inputs *inputs,
+ unsigned x, unsigned y,
+ unsigned mask);
+
/**
@@ -132,18 +133,23 @@ void lp_rast_shade_quads( struct lp_rasterizer_task *task,
* \param x, y location of 4x4 block in window coords
*/
static INLINE void *
-lp_rast_get_depth_block_pointer(const struct lp_rasterizer *rast,
+lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
unsigned x, unsigned y)
{
+ const struct lp_rasterizer *rast = task->rast;
void *depth;
assert((x % TILE_VECTOR_WIDTH) == 0);
assert((y % TILE_VECTOR_HEIGHT) == 0);
- assert(rast->zsbuf.map || !rast->curr_scene->fb.zsbuf);
-
- if (!rast->zsbuf.map)
- return NULL;
+ if (!rast->zsbuf.map) {
+ /* Either out of memory or no zsbuf. Can't tell without access
+ * to the state. Just use dummy tile memory, but don't print
+ * the oom warning as this most likely because there is no
+ * zsbuf.
+ */
+ return lp_dummy_tile;
+ }
depth = (rast->zsbuf.map +
rast->zsbuf.stride * y +
@@ -155,6 +161,39 @@ lp_rast_get_depth_block_pointer(const struct lp_rasterizer *rast,
/**
+ * Get pointer to the swizzled color tile
+ */
+static INLINE uint8_t *
+lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task,
+ unsigned buf, enum lp_texture_usage usage)
+{
+ struct lp_rasterizer *rast = task->rast;
+
+ assert(task->x % TILE_SIZE == 0);
+ assert(task->y % TILE_SIZE == 0);
+ assert(buf < rast->state.nr_cbufs);
+
+ if (!task->color_tiles[buf]) {
+ struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf];
+ struct llvmpipe_resource *lpt;
+ assert(cbuf);
+ lpt = llvmpipe_resource(cbuf->texture);
+ task->color_tiles[buf] = lp_swizzled_cbuf[task->thread_index][buf];
+
+ if (usage != LP_TEX_USAGE_WRITE_ALL) {
+ llvmpipe_swizzle_cbuf_tile(lpt,
+ cbuf->face + cbuf->zslice,
+ cbuf->level,
+ task->x, task->y,
+ task->color_tiles[buf]);
+ }
+ }
+
+ return task->color_tiles[buf];
+}
+
+
+/**
* Get the pointer to a 4x4 color block (within a 64x64 tile).
* We'll map the color buffer on demand here.
* Note that this may be called even when there's no color buffers - return
@@ -171,7 +210,7 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
assert((x % TILE_VECTOR_WIDTH) == 0);
assert((y % TILE_VECTOR_HEIGHT) == 0);
- color = task->color_tiles[buf];
+ color = lp_rast_get_color_tile_pointer(task, buf, LP_TEX_USAGE_READ_WRITE);
assert(color);
px = x % TILE_SIZE;
@@ -196,8 +235,8 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
const struct lp_rast_shader_inputs *inputs,
unsigned x, unsigned y )
{
- struct lp_rasterizer *rast = task->rast;
- const struct lp_rast_state *state = task->current_state;
+ const struct lp_rasterizer *rast = task->rast;
+ const struct lp_rast_state *state = inputs->state;
struct lp_fragment_shader_variant *variant = state->variant;
uint8_t *color[PIPE_MAX_COLOR_BUFS];
void *depth;
@@ -207,19 +246,19 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
for (i = 0; i < rast->state.nr_cbufs; i++)
color[i] = lp_rast_get_color_block_pointer(task, i, x, y);
- depth = lp_rast_get_depth_block_pointer(rast, x, y);
+ depth = lp_rast_get_depth_block_pointer(task, x, y);
/* run shader on 4x4 block */
variant->jit_function[RAST_WHOLE]( &state->jit_context,
- x, y,
- inputs->facing,
- inputs->a0,
- inputs->dadx,
- inputs->dady,
- color,
- depth,
- INT_MIN, INT_MIN, INT_MIN,
- NULL, NULL, NULL, &task->vis_counter );
+ x, y,
+ inputs->facing,
+ inputs->a0,
+ inputs->dadx,
+ inputs->dady,
+ color,
+ depth,
+ 0xffff,
+ &task->vis_counter );
}
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index a5f0d14c95..ebe9a8e92b 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -113,168 +113,31 @@ block_full_16(struct lp_rasterizer_task *task,
block_full_4(task, tri, x + ix, y + iy);
}
+#define TAG(x) x##_1
+#define NR_PLANES 1
+#include "lp_rast_tri_tmp.h"
-/**
- * Pass the 4x4 pixel block to the shader function.
- * Determination of which of the 16 pixels lies inside the triangle
- * will be done as part of the fragment shader.
- */
-static void
-do_block_4(struct lp_rasterizer_task *task,
- const struct lp_rast_triangle *tri,
- int x, int y,
- int c1, int c2, int c3)
-{
- assert(x >= 0);
- assert(y >= 0);
-
- lp_rast_shade_quads(task, &tri->inputs, x, y, -c1, -c2, -c3);
-}
-
-
-/**
- * Evaluate a 16x16 block of pixels to determine which 4x4 subblocks are in/out
- * of the triangle's bounds.
- */
-static void
-do_block_16(struct lp_rasterizer_task *task,
- const struct lp_rast_triangle *tri,
- int x, int y,
- int c0, int c1, int c2)
-{
- unsigned mask = 0;
- int eo[3];
- int c[3];
- int i, j;
-
- assert(x >= 0);
- assert(y >= 0);
- assert(x % 16 == 0);
- assert(y % 16 == 0);
-
- eo[0] = tri->eo1 * 4;
- eo[1] = tri->eo2 * 4;
- eo[2] = tri->eo3 * 4;
-
- c[0] = c0;
- c[1] = c1;
- c[2] = c2;
-
- for (j = 0; j < 3; j++) {
- const int *step = tri->inputs.step[j];
- const int cx = c[j] + eo[j];
-
- /* Mask has bits set whenever we are outside any of the edges.
- */
- for (i = 0; i < 16; i++) {
- int out = cx + step[i] * 4;
- mask |= (out >> 31) & (1 << i);
- }
- }
+#define TAG(x) x##_2
+#define NR_PLANES 2
+#include "lp_rast_tri_tmp.h"
- mask = ~mask & 0xffff;
- while (mask) {
- int i = ffs(mask) - 1;
- int px = x + pos_table4[i][0];
- int py = y + pos_table4[i][1];
- int cx1 = c0 + tri->inputs.step[0][i] * 4;
- int cx2 = c1 + tri->inputs.step[1][i] * 4;
- int cx3 = c2 + tri->inputs.step[2][i] * 4;
+#define TAG(x) x##_3
+#define NR_PLANES 3
+#include "lp_rast_tri_tmp.h"
- mask &= ~(1 << i);
+#define TAG(x) x##_4
+#define NR_PLANES 4
+#include "lp_rast_tri_tmp.h"
- /* Don't bother testing if the 4x4 block is entirely in/out of
- * the triangle. It's a little faster to do it in the jit code.
- */
- LP_COUNT(nr_non_empty_4);
- do_block_4(task, tri, px, py, cx1, cx2, cx3);
- }
-}
-
-
-/**
- * Scan the tile in chunks and figure out which pixels to rasterize
- * for this triangle.
- */
-void
-lp_rast_triangle(struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
-{
- const struct lp_rast_triangle *tri = arg.triangle;
- const int x = task->x, y = task->y;
- int ei[3], eo[3], c[3];
- unsigned outmask, inmask, partial_mask;
- unsigned i, j;
-
- c[0] = tri->c1 + tri->dx12 * y - tri->dy12 * x;
- c[1] = tri->c2 + tri->dx23 * y - tri->dy23 * x;
- c[2] = tri->c3 + tri->dx31 * y - tri->dy31 * x;
-
- eo[0] = tri->eo1 * 16;
- eo[1] = tri->eo2 * 16;
- eo[2] = tri->eo3 * 16;
-
- ei[0] = tri->ei1 * 16;
- ei[1] = tri->ei2 * 16;
- ei[2] = tri->ei3 * 16;
-
- outmask = 0;
- inmask = 0xffff;
+#define TAG(x) x##_5
+#define NR_PLANES 5
+#include "lp_rast_tri_tmp.h"
- for (j = 0; j < 3; j++) {
- const int *step = tri->inputs.step[j];
- const int cox = c[j] + eo[j];
- const int cio = ei[j]- eo[j];
+#define TAG(x) x##_6
+#define NR_PLANES 6
+#include "lp_rast_tri_tmp.h"
- /* Outmask has bits set whenever we are outside any of the
- * edges.
- */
- /* Inmask has bits set whenever we are inside all of the edges.
- */
- for (i = 0; i < 16; i++) {
- int out = cox + step[i] * 16;
- int in = out + cio;
- outmask |= (out >> 31) & (1 << i);
- inmask &= ~((in >> 31) & (1 << i));
- }
- }
+#define TAG(x) x##_7
+#define NR_PLANES 7
+#include "lp_rast_tri_tmp.h"
- assert((outmask & inmask) == 0);
-
- if (outmask == 0xffff)
- return;
-
- /* Invert mask, so that bits are set whenever we are at least
- * partially inside all of the edges:
- */
- partial_mask = ~inmask & ~outmask & 0xffff;
-
- /* Iterate over partials:
- */
- while (partial_mask) {
- int i = ffs(partial_mask) - 1;
- int px = x + pos_table16[i][0];
- int py = y + pos_table16[i][1];
- int cx1 = c[0] + tri->inputs.step[0][i] * 16;
- int cx2 = c[1] + tri->inputs.step[1][i] * 16;
- int cx3 = c[2] + tri->inputs.step[2][i] * 16;
-
- partial_mask &= ~(1 << i);
-
- LP_COUNT(nr_partially_covered_16);
- do_block_16(task, tri, px, py, cx1, cx2, cx3);
- }
-
- /* Iterate over fulls:
- */
- while (inmask) {
- int i = ffs(inmask) - 1;
- int px = x + pos_table16[i][0];
- int py = y + pos_table16[i][1];
-
- inmask &= ~(1 << i);
-
- LP_COUNT(nr_fully_covered_16);
- block_full_16(task, tri, px, py);
- }
-}
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
new file mode 100644
index 0000000000..a410c611a3
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
@@ -0,0 +1,238 @@
+/**************************************************************************
+ *
+ * Copyright 2007-2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Rasterization for binned triangles within a tile
+ */
+
+
+
+/**
+ * Prototype for a 7 plane rasterizer function. Will codegenerate
+ * several of these.
+ *
+ * XXX: Varients for more/fewer planes.
+ * XXX: Need ways of dropping planes as we descend.
+ * XXX: SIMD
+ */
+static void
+TAG(do_block_4)(struct lp_rasterizer_task *task,
+ const struct lp_rast_triangle *tri,
+ const struct lp_rast_plane *plane,
+ int x, int y,
+ const int *c)
+{
+ unsigned mask = 0;
+ int i;
+
+ for (i = 0; i < 16; i++) {
+ int any_negative = 0;
+ int j;
+
+ for (j = 0; j < NR_PLANES; j++)
+ any_negative |= (c[j] - 1 + plane[j].step[i]);
+
+ any_negative >>= 31;
+
+ mask |= (~any_negative) & (1 << i);
+ }
+
+ /* Now pass to the shader:
+ */
+ if (mask)
+ lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask);
+}
+
+/**
+ * Evaluate a 16x16 block of pixels to determine which 4x4 subblocks are in/out
+ * of the triangle's bounds.
+ */
+static void
+TAG(do_block_16)(struct lp_rasterizer_task *task,
+ const struct lp_rast_triangle *tri,
+ const struct lp_rast_plane *plane,
+ int x, int y,
+ const int *c)
+{
+ unsigned outmask, inmask, partmask, partial_mask;
+ unsigned i, j;
+
+ outmask = 0; /* outside one or more trivial reject planes */
+ partmask = 0; /* outside one or more trivial accept planes */
+
+ for (j = 0; j < NR_PLANES; j++) {
+ const int *step = plane[j].step;
+ const int eo = plane[j].eo * 4;
+ const int ei = plane[j].ei * 4;
+ const int cox = c[j] + eo;
+ const int cio = ei - 1 - eo;
+
+ for (i = 0; i < 16; i++) {
+ int out = cox + step[i] * 4;
+ int part = out + cio;
+ outmask |= (out >> 31) & (1 << i);
+ partmask |= (part >> 31) & (1 << i);
+ }
+ }
+
+ if (outmask == 0xffff)
+ return;
+
+ /* Mask of sub-blocks which are inside all trivial accept planes:
+ */
+ inmask = ~partmask & 0xffff;
+
+ /* Mask of sub-blocks which are inside all trivial reject planes,
+ * but outside at least one trivial accept plane:
+ */
+ partial_mask = partmask & ~outmask;
+
+ assert((partial_mask & inmask) == 0);
+
+ /* Iterate over partials:
+ */
+ while (partial_mask) {
+ int i = ffs(partial_mask) - 1;
+ int px = x + pos_table4[i][0];
+ int py = y + pos_table4[i][1];
+ int cx[NR_PLANES];
+
+ for (j = 0; j < NR_PLANES; j++)
+ cx[j] = c[j] + plane[j].step[i] * 4;
+
+ partial_mask &= ~(1 << i);
+
+ TAG(do_block_4)(task, tri, plane, px, py, cx);
+ }
+
+ /* Iterate over fulls:
+ */
+ while (inmask) {
+ int i = ffs(inmask) - 1;
+ int px = x + pos_table4[i][0];
+ int py = y + pos_table4[i][1];
+
+ inmask &= ~(1 << i);
+
+ block_full_4(task, tri, px, py);
+ }
+}
+
+
+/**
+ * Scan the tile in chunks and figure out which pixels to rasterize
+ * for this triangle.
+ */
+void
+TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ const struct lp_rast_triangle *tri = arg.triangle.tri;
+ unsigned plane_mask = arg.triangle.plane_mask;
+ const int x = task->x, y = task->y;
+ struct lp_rast_plane plane[NR_PLANES];
+ int c[NR_PLANES];
+ unsigned outmask, inmask, partmask, partial_mask;
+ unsigned i, j, nr_planes = 0;
+
+ while (plane_mask) {
+ int i = ffs(plane_mask) - 1;
+ plane[nr_planes] = tri->plane[i];
+ plane_mask &= ~(1 << i);
+ nr_planes++;
+ };
+
+ assert(nr_planes == NR_PLANES);
+ outmask = 0; /* outside one or more trivial reject planes */
+ partmask = 0; /* outside one or more trivial accept planes */
+
+ for (j = 0; j < NR_PLANES; j++) {
+ const int *step = plane[j].step;
+ const int eo = plane[j].eo * 16;
+ const int ei = plane[j].ei * 16;
+ int cox, cio;
+
+ c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
+ cox = c[j] + eo;
+ cio = ei - 1 - eo;
+
+ for (i = 0; i < 16; i++) {
+ int out = cox + step[i] * 16;
+ int part = out + cio;
+ outmask |= (out >> 31) & (1 << i);
+ partmask |= (part >> 31) & (1 << i);
+ }
+ }
+
+ if (outmask == 0xffff)
+ return;
+
+ /* Mask of sub-blocks which are inside all trivial accept planes:
+ */
+ inmask = ~partmask & 0xffff;
+
+ /* Mask of sub-blocks which are inside all trivial reject planes,
+ * but outside at least one trivial accept plane:
+ */
+ partial_mask = partmask & ~outmask;
+
+ assert((partial_mask & inmask) == 0);
+
+ /* Iterate over partials:
+ */
+ while (partial_mask) {
+ int i = ffs(partial_mask) - 1;
+ int px = x + pos_table16[i][0];
+ int py = y + pos_table16[i][1];
+ int cx[NR_PLANES];
+
+ for (j = 0; j < NR_PLANES; j++)
+ cx[j] = c[j] + plane[j].step[i] * 16;
+
+ partial_mask &= ~(1 << i);
+
+ LP_COUNT(nr_partially_covered_16);
+ TAG(do_block_16)(task, tri, plane, px, py, cx);
+ }
+
+ /* Iterate over fulls:
+ */
+ while (inmask) {
+ int i = ffs(inmask) - 1;
+ int px = x + pos_table16[i][0];
+ int py = y + pos_table16[i][1];
+
+ inmask &= ~(1 << i);
+
+ LP_COUNT(nr_fully_covered_16);
+ block_full_16(task, tri, px, py);
+ }
+}
+
+#undef TAG
+#undef NR_PLANES
+
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c
index 845c175cf2..f88a759fe7 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.c
+++ b/src/gallium/drivers/llvmpipe/lp_scene.c
@@ -32,6 +32,7 @@
#include "util/u_simple_list.h"
#include "lp_scene.h"
#include "lp_scene_queue.h"
+#include "lp_fence.h"
/** List of texture references */
@@ -162,8 +163,8 @@ lp_scene_reset(struct lp_scene *scene )
/* Free all but last binner command lists:
*/
- for (i = 0; i < scene->tiles_x; i++) {
- for (j = 0; j < scene->tiles_y; j++) {
+ for (i = 0; i < TILES_X; i++) {
+ for (j = 0; j < TILES_Y; j++) {
lp_scene_bin_reset(scene, i, j);
}
}
@@ -198,6 +199,8 @@ lp_scene_reset(struct lp_scene *scene )
make_empty_list(ref_list);
}
+ lp_fence_reference(&scene->fence, NULL);
+
scene->scene_size = 0;
scene->has_color_clear = FALSE;
@@ -303,60 +306,6 @@ lp_scene_is_resource_referenced(const struct lp_scene *scene,
}
-/**
- * Return last command in the bin
- */
-static lp_rast_cmd
-lp_get_last_command( const struct cmd_bin *bin )
-{
- const struct cmd_block *tail = bin->commands.tail;
- const unsigned i = tail->count;
- if (i > 0)
- return tail->cmd[i - 1];
- else
- return NULL;
-}
-
-
-/**
- * Replace the arg of the last command in the bin.
- */
-static void
-lp_replace_last_command_arg( struct cmd_bin *bin,
- const union lp_rast_cmd_arg arg )
-{
- struct cmd_block *tail = bin->commands.tail;
- const unsigned i = tail->count;
- assert(i > 0);
- tail->arg[i - 1] = arg;
-}
-
-
-
-/**
- * Put a state-change command into all bins.
- * If we find that the last command in a bin was also a state-change
- * command, we can simply replace that one with the new one.
- */
-void
-lp_scene_bin_state_command( struct lp_scene *scene,
- lp_rast_cmd cmd,
- const union lp_rast_cmd_arg arg )
-{
- unsigned i, j;
- for (i = 0; i < scene->tiles_x; i++) {
- for (j = 0; j < scene->tiles_y; j++) {
- struct cmd_bin *bin = lp_scene_get_bin(scene, i, j);
- lp_rast_cmd last_cmd = lp_get_last_command(bin);
- if (last_cmd == cmd) {
- lp_replace_last_command_arg(bin, arg);
- }
- else {
- lp_scene_bin_command( scene, i, j, cmd, arg );
- }
- }
- }
-}
/** advance curr_x,y to the next bin */
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h
index 4e55d43174..fa1b311fa1 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.h
+++ b/src/gallium/drivers/llvmpipe/lp_scene.h
@@ -112,6 +112,7 @@ struct resource_ref {
*/
struct lp_scene {
struct pipe_context *pipe;
+ struct lp_fence *fence;
/** the framebuffer to render the scene into */
struct pipe_framebuffer_state fb;
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 6432cea862..167cb2ee2e 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -43,6 +43,7 @@
#include "lp_debug.h"
#include "lp_public.h"
#include "lp_limits.h"
+#include "lp_rast.h"
#include "state_tracker/sw_winsys.h"
@@ -86,7 +87,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
return PIPE_MAX_SAMPLERS;
case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
- return 0;
+ return PIPE_MAX_VERTEX_SAMPLERS;
case PIPE_CAP_MAX_COMBINED_SAMPLERS:
return PIPE_MAX_SAMPLERS + PIPE_MAX_VERTEX_SAMPLERS;
case PIPE_CAP_NPOT_TEXTURES:
@@ -166,6 +167,10 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
return LP_MAX_TGSI_PREDS;
case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
return 1;
+ case PIPE_CAP_GEOMETRY_SHADER4:
+ return 1;
+ case PIPE_CAP_DEPTH_CLAMP:
+ return 0;
default:
assert(0);
return 0;
@@ -294,11 +299,16 @@ llvmpipe_destroy_screen( struct pipe_screen *_screen )
struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
struct sw_winsys *winsys = screen->winsys;
+ if (screen->rast)
+ lp_rast_destroy(screen->rast);
+
lp_jit_screen_cleanup(screen);
if(winsys->destroy)
winsys->destroy(winsys);
+ pipe_mutex_destroy(screen->rast_mutex);
+
FREE(screen);
}
@@ -347,11 +357,6 @@ llvmpipe_create_screen(struct sw_winsys *winsys)
lp_jit_screen_init(screen);
-#ifdef PIPE_OS_WINDOWS
- /* Multithreading not supported on windows until conditions and barriers are
- * properly implemented. */
- screen->num_threads = 0;
-#else
#ifdef PIPE_OS_EMBEDDED
screen->num_threads = 0;
#else
@@ -359,7 +364,14 @@ llvmpipe_create_screen(struct sw_winsys *winsys)
#endif
screen->num_threads = debug_get_num_option("LP_NUM_THREADS", screen->num_threads);
screen->num_threads = MIN2(screen->num_threads, LP_MAX_THREADS);
-#endif
+
+ screen->rast = lp_rast_create(screen->num_threads);
+ if (!screen->rast) {
+ lp_jit_screen_cleanup(screen);
+ FREE(screen);
+ return NULL;
+ }
+ pipe_mutex_init(screen->rast_mutex);
util_format_s3tc_init();
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.h b/src/gallium/drivers/llvmpipe/lp_screen.h
index eb40f6823f..731526dfab 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.h
+++ b/src/gallium/drivers/llvmpipe/lp_screen.h
@@ -37,6 +37,7 @@
#include "gallivm/lp_bld.h"
#include <llvm-c/ExecutionEngine.h>
+#include "os/os_thread.h"
#include "pipe/p_screen.h"
#include "pipe/p_defines.h"
@@ -63,6 +64,9 @@ struct llvmpipe_screen
/* Increments whenever textures are modified. Contexts can track this.
*/
unsigned timestamp;
+
+ struct lp_rasterizer *rast;
+ pipe_mutex rast_mutex;
};
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index e8aafee33f..556e571585 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -40,6 +40,7 @@
#include "util/u_memory.h"
#include "util/u_pack_color.h"
#include "lp_context.h"
+#include "lp_memory.h"
#include "lp_scene.h"
#include "lp_scene_queue.h"
#include "lp_texture.h"
@@ -63,15 +64,7 @@ struct lp_scene *
lp_setup_get_current_scene(struct lp_setup_context *setup)
{
if (!setup->scene) {
-
- /* wait for a free/empty scene
- */
- setup->scene = lp_scene_dequeue(setup->empty_scenes, TRUE);
-
- assert(lp_scene_is_empty(setup->scene));
-
- lp_scene_begin_binning(setup->scene,
- &setup->fb );
+ set_scene_state( setup, SETUP_EMPTY );
}
return setup->scene;
}
@@ -159,8 +152,11 @@ static void
lp_setup_rasterize_scene( struct lp_setup_context *setup )
{
struct lp_scene *scene = lp_setup_get_current_scene(setup);
+ struct llvmpipe_screen *screen = llvmpipe_screen(scene->pipe->screen);
- lp_scene_rasterize(scene, setup->rast);
+ pipe_mutex_lock(screen->rast_mutex);
+ lp_scene_rasterize(scene, screen->rast);
+ pipe_mutex_unlock(screen->rast_mutex);
reset_context( setup );
@@ -233,22 +229,36 @@ set_scene_state( struct lp_setup_context *setup,
LP_DBG(DEBUG_SETUP, "%s old %d new %d\n", __FUNCTION__, old_state, new_state);
switch (new_state) {
- case SETUP_ACTIVE:
- begin_binning( setup );
+ case SETUP_EMPTY:
+ assert(old_state == SETUP_FLUSHED);
+ assert(setup->scene == NULL);
+
+ /* wait for a free/empty scene
+ */
+ setup->scene = lp_scene_dequeue(setup->empty_scenes, TRUE);
+ assert(lp_scene_is_empty(setup->scene));
+ lp_scene_begin_binning(setup->scene,
+ &setup->fb );
break;
case SETUP_CLEARED:
- if (old_state == SETUP_ACTIVE) {
- assert(0);
- return;
- }
+ assert(old_state == SETUP_EMPTY);
+ assert(setup->scene != NULL);
break;
-
+
+ case SETUP_ACTIVE:
+ assert(old_state == SETUP_EMPTY ||
+ old_state == SETUP_CLEARED);
+ assert(setup->scene != NULL);
+ begin_binning( setup );
+ break;
+
case SETUP_FLUSHED:
if (old_state == SETUP_CLEARED)
execute_clears( setup );
else
lp_setup_rasterize_scene( setup );
+ assert(setup->scene == NULL);
break;
default:
@@ -264,23 +274,19 @@ set_scene_state( struct lp_setup_context *setup,
*/
void
lp_setup_flush( struct lp_setup_context *setup,
- unsigned flags )
+ unsigned flags,
+ struct pipe_fence_handle **fence)
{
LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
if (setup->scene) {
- struct lp_scene *scene = lp_setup_get_current_scene(setup);
- union lp_rast_cmd_arg dummy = {0};
-
- if (flags & (PIPE_FLUSH_SWAPBUFFERS |
- PIPE_FLUSH_FRAME)) {
- /* Store colors in the linear color buffer(s).
- * If we don't do this here, we'll end up converting the tiled
- * data to linear in the texture_unmap() function, which will
- * not be a parallel/threaded operation as here.
+ if (fence) {
+ /* if we're going to flush the setup/rasterization modules, emit
+ * a fence.
*/
- lp_scene_bin_everywhere(scene, lp_rast_store_color, dummy);
+ *fence = lp_setup_fence( setup );
}
+
}
set_scene_state( setup, SETUP_FLUSHED );
@@ -297,6 +303,11 @@ lp_setup_bind_framebuffer( struct lp_setup_context *setup,
*/
set_scene_state( setup, SETUP_FLUSHED );
+ /*
+ * Ensure the old scene is not reused.
+ */
+ assert(!setup->scene);
+
/* Set new state. This will be picked up later when we next need a
* scene.
*/
@@ -421,24 +432,27 @@ lp_setup_clear( struct lp_setup_context *setup,
struct pipe_fence_handle *
lp_setup_fence( struct lp_setup_context *setup )
{
- if (setup->num_threads == 0) {
+ if (setup->scene == NULL)
return NULL;
- }
- else {
+ else if (setup->num_threads == 0)
+ return NULL;
+ else
+ {
struct lp_scene *scene = lp_setup_get_current_scene(setup);
- const unsigned rank = lp_scene_get_num_bins( scene ); /* xxx */
- struct lp_fence *fence = lp_fence_create(rank);
-
- LP_DBG(DEBUG_SETUP, "%s rank %u\n", __FUNCTION__, rank);
+ const unsigned rank = setup->num_threads;
set_scene_state( setup, SETUP_ACTIVE );
+
+ assert(scene->fence == NULL);
+
+ /* The caller gets a reference, we keep a copy too, so need to
+ * bump the refcount:
+ */
+ lp_fence_reference(&scene->fence, lp_fence_create(rank));
- /* insert the fence into all command bins */
- lp_scene_bin_everywhere( scene,
- lp_rast_fence,
- lp_rast_arg_fence(fence) );
+ LP_DBG(DEBUG_SETUP, "%s rank %u\n", __FUNCTION__, rank);
- return (struct pipe_fence_handle *) fence;
+ return (struct pipe_fence_handle *) scene->fence;
}
}
@@ -611,6 +625,17 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
LP_TEX_LAYOUT_LINEAR);
jit_tex->row_stride[j] = lp_tex->row_stride[j];
jit_tex->img_stride[j] = lp_tex->img_stride[j];
+
+ if (!jit_tex->data[j]) {
+ /* out of memory - use dummy tile memory */
+ jit_tex->data[j] = lp_dummy_tile;
+ jit_tex->width = TILE_SIZE;
+ jit_tex->height = TILE_SIZE;
+ jit_tex->depth = 1;
+ jit_tex->last_level = 0;
+ jit_tex->row_stride[j] = 0;
+ jit_tex->img_stride[j] = 0;
+ }
}
}
else {
@@ -618,7 +643,6 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
/*
* XXX: Where should this be unmapped?
*/
-
struct llvmpipe_screen *screen = llvmpipe_screen(tex->screen);
struct sw_winsys *winsys = screen->winsys;
jit_tex->data[0] = winsys->displaytarget_map(winsys, lp_tex->dt,
@@ -717,28 +741,6 @@ lp_setup_update_state( struct lp_setup_context *setup )
setup->dirty |= LP_SETUP_NEW_FS;
}
- if (setup->dirty & LP_SETUP_NEW_SCISSOR) {
- float *stored;
-
- stored = lp_scene_alloc_aligned(scene, 4 * sizeof(int32_t), 16);
-
- if (stored) {
- stored[0] = (float) setup->scissor.current.minx;
- stored[1] = (float) setup->scissor.current.miny;
- stored[2] = (float) setup->scissor.current.maxx;
- stored[3] = (float) setup->scissor.current.maxy;
-
- setup->scissor.stored = stored;
-
- setup->fs.current.jit_context.scissor_xmin = stored[0];
- setup->fs.current.jit_context.scissor_ymin = stored[1];
- setup->fs.current.jit_context.scissor_xmax = stored[2];
- setup->fs.current.jit_context.scissor_ymax = stored[3];
- }
-
- setup->dirty |= LP_SETUP_NEW_FS;
- }
-
if(setup->dirty & LP_SETUP_NEW_CONSTANTS) {
struct pipe_resource *buffer = setup->constants.current;
@@ -792,11 +794,6 @@ lp_setup_update_state( struct lp_setup_context *setup )
&setup->fs.current,
sizeof setup->fs.current);
setup->fs.stored = stored;
-
- /* put the state-set command into all bins */
- lp_scene_bin_state_command( scene,
- lp_rast_set_state,
- lp_rast_arg_state(setup->fs.stored) );
}
/* The scene now references the textures in the rasterization
@@ -843,8 +840,6 @@ lp_setup_destroy( struct lp_setup_context *setup )
lp_scene_queue_destroy(setup->empty_scenes);
- lp_rast_destroy( setup->rast );
-
FREE( setup );
}
@@ -871,13 +866,7 @@ lp_setup_create( struct pipe_context *pipe,
if (!setup->empty_scenes)
goto fail;
- /* XXX: move this to the screen and share between contexts:
- */
setup->num_threads = screen->num_threads;
- setup->rast = lp_rast_create(screen->num_threads);
- if (!setup->rast)
- goto fail;
-
setup->vbuf = draw_vbuf_stage(draw, &setup->base);
if (!setup->vbuf)
goto fail;
@@ -901,9 +890,6 @@ lp_setup_create( struct pipe_context *pipe,
return setup;
fail:
- if (setup->rast)
- lp_rast_destroy( setup->rast );
-
if (setup->vbuf)
;
@@ -933,6 +919,8 @@ lp_setup_begin_query(struct lp_setup_context *setup,
memset(pq->count, 0, sizeof(pq->count)); /* reset all counters */
+ set_scene_state( setup, SETUP_ACTIVE );
+
cmd_arg.query_obj = pq;
lp_scene_bin_everywhere(scene, lp_rast_begin_query, cmd_arg);
pq->binned = TRUE;
@@ -948,6 +936,8 @@ lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq)
struct lp_scene * scene = lp_setup_get_current_scene(setup);
union lp_rast_cmd_arg cmd_arg;
+ set_scene_state( setup, SETUP_ACTIVE );
+
cmd_arg.query_obj = pq;
lp_scene_bin_everywhere(scene, lp_rast_end_query, cmd_arg);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h
index 6a0dc55129..73b1c85325 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup.h
@@ -84,7 +84,8 @@ lp_setup_fence( struct lp_setup_context *setup );
void
lp_setup_flush( struct lp_setup_context *setup,
- unsigned flags );
+ unsigned flags,
+ struct pipe_fence_handle **fence);
void
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h
index c8b8a2480b..a0606f5034 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -81,7 +81,6 @@ struct lp_setup_context
*/
struct draw_stage *vbuf;
unsigned num_threads;
- struct lp_rasterizer *rast;
struct lp_scene *scenes[MAX_SCENES]; /**< all the scenes */
struct lp_scene *scene; /**< current scene being built */
struct lp_scene_queue *empty_scenes; /**< queue of empty scenes */
@@ -101,9 +100,10 @@ struct lp_setup_context
} clear;
enum setup_state {
- SETUP_FLUSHED,
- SETUP_CLEARED,
- SETUP_ACTIVE
+ SETUP_FLUSHED, /**< scene is null */
+ SETUP_EMPTY, /**< scene exists but has only state changes */
+ SETUP_CLEARED, /**< scene exists but has only clears */
+ SETUP_ACTIVE /**< scene exists and has at least one draw/query */
} state;
struct {
@@ -129,7 +129,6 @@ struct lp_setup_context
struct {
struct pipe_scissor_state current;
- const void *stored;
} scissor;
unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 0557d35f8b..7e432503c1 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -38,12 +38,78 @@
#define NUM_CHANNELS 4
+struct tri_info {
+
+ float pixel_offset;
+
+ /* fixed point vertex coordinates */
+ int x[3];
+ int y[3];
+
+ /* float x,y deltas - all from the original coordinates
+ */
+ float dy01, dy20;
+ float dx01, dx20;
+ float oneoverarea;
+
+ const float (*v0)[4];
+ const float (*v1)[4];
+ const float (*v2)[4];
+
+ boolean frontfacing;
+};
+
+
+
+static const int step_scissor_minx[16] = {
+ 0, 1, 0, 1,
+ 2, 3, 2, 3,
+ 0, 1, 0, 1,
+ 2, 3, 2, 3
+};
+
+static const int step_scissor_maxx[16] = {
+ 0, -1, 0, -1,
+ -2, -3, -2, -3,
+ 0, -1, 0, -1,
+ -2, -3, -2, -3
+};
+
+static const int step_scissor_miny[16] = {
+ 0, 0, 1, 1,
+ 0, 0, 1, 1,
+ 2, 2, 3, 3,
+ 2, 2, 3, 3
+};
+
+static const int step_scissor_maxy[16] = {
+ 0, 0, -1, -1,
+ 0, 0, -1, -1,
+ -2, -2, -3, -3,
+ -2, -2, -3, -3
+};
+
+
+
+
+static INLINE int
+subpixel_snap(float a)
+{
+ return util_iround(FIXED_ONE * a);
+}
+
+static INLINE float
+fixed_to_float(int a)
+{
+ return a * (1.0 / FIXED_ONE);
+}
+
+
/**
* Compute a0 for a constant-valued coefficient (GL_FLAT shading).
*/
-static void constant_coef( struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
+static void constant_coef( struct lp_rast_triangle *tri,
unsigned slot,
const float value,
unsigned i )
@@ -54,28 +120,21 @@ static void constant_coef( struct lp_setup_context *setup,
}
-/**
- * Compute a0, dadx and dady for a linearly interpolated coefficient,
- * for a triangle.
- */
-static void linear_coef( struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
- float oneoverarea,
+
+static void linear_coef( struct lp_rast_triangle *tri,
+ const struct tri_info *info,
unsigned slot,
- const float (*v1)[4],
- const float (*v2)[4],
- const float (*v3)[4],
unsigned vert_attr,
unsigned i)
{
- float a1 = v1[vert_attr][i];
- float a2 = v2[vert_attr][i];
- float a3 = v3[vert_attr][i];
+ float a0 = info->v0[vert_attr][i];
+ float a1 = info->v1[vert_attr][i];
+ float a2 = info->v2[vert_attr][i];
- float da12 = a1 - a2;
- float da31 = a3 - a1;
- float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea;
- float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea;
+ float da01 = a0 - a1;
+ float da20 = a2 - a0;
+ float dadx = (da01 * info->dy20 - info->dy01 * da20) * info->oneoverarea;
+ float dady = (da20 * info->dx01 - info->dx20 * da01) * info->oneoverarea;
tri->inputs.dadx[slot][i] = dadx;
tri->inputs.dady[slot][i] = dady;
@@ -92,9 +151,9 @@ static void linear_coef( struct lp_setup_context *setup,
* to define a0 as the sample at a pixel center somewhere near vmin
* instead - i'll switch to this later.
*/
- tri->inputs.a0[slot][i] = (a1 -
- (dadx * (v1[0][0] - setup->pixel_offset) +
- dady * (v1[0][1] - setup->pixel_offset)));
+ tri->inputs.a0[slot][i] = (a0 -
+ (dadx * (info->v0[0][0] - info->pixel_offset) +
+ dady * (info->v0[0][1] - info->pixel_offset)));
}
@@ -106,31 +165,27 @@ static void linear_coef( struct lp_setup_context *setup,
* Later, when we compute the value at a particular fragment position we'll
* divide the interpolated value by the interpolated W at that fragment.
*/
-static void perspective_coef( struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
- float oneoverarea,
+static void perspective_coef( struct lp_rast_triangle *tri,
+ const struct tri_info *info,
unsigned slot,
- const float (*v1)[4],
- const float (*v2)[4],
- const float (*v3)[4],
unsigned vert_attr,
unsigned i)
{
/* premultiply by 1/w (v[0][3] is always 1/w):
*/
- float a1 = v1[vert_attr][i] * v1[0][3];
- float a2 = v2[vert_attr][i] * v2[0][3];
- float a3 = v3[vert_attr][i] * v3[0][3];
- float da12 = a1 - a2;
- float da31 = a3 - a1;
- float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea;
- float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea;
+ float a0 = info->v0[vert_attr][i] * info->v0[0][3];
+ float a1 = info->v1[vert_attr][i] * info->v1[0][3];
+ float a2 = info->v2[vert_attr][i] * info->v2[0][3];
+ float da01 = a0 - a1;
+ float da20 = a2 - a0;
+ float dadx = (da01 * info->dy20 - info->dy01 * da20) * info->oneoverarea;
+ float dady = (da20 * info->dx01 - info->dx20 * da01) * info->oneoverarea;
tri->inputs.dadx[slot][i] = dadx;
tri->inputs.dady[slot][i] = dady;
- tri->inputs.a0[slot][i] = (a1 -
- (dadx * (v1[0][0] - setup->pixel_offset) +
- dady * (v1[0][1] - setup->pixel_offset)));
+ tri->inputs.a0[slot][i] = (a0 -
+ (dadx * (info->v0[0][0] - info->pixel_offset) +
+ dady * (info->v0[0][1] - info->pixel_offset)));
}
@@ -141,13 +196,9 @@ static void perspective_coef( struct lp_setup_context *setup,
* We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
*/
static void
-setup_fragcoord_coef(struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
- float oneoverarea,
+setup_fragcoord_coef(struct lp_rast_triangle *tri,
+ const struct tri_info *info,
unsigned slot,
- const float (*v1)[4],
- const float (*v2)[4],
- const float (*v3)[4],
unsigned usage_mask)
{
/*X*/
@@ -166,12 +217,12 @@ setup_fragcoord_coef(struct lp_setup_context *setup,
/*Z*/
if (usage_mask & TGSI_WRITEMASK_Z) {
- linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 2);
+ linear_coef(tri, info, slot, 0, 2);
}
/*W*/
if (usage_mask & TGSI_WRITEMASK_W) {
- linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 3);
+ linear_coef(tri, info, slot, 0, 3);
}
}
@@ -180,24 +231,23 @@ setup_fragcoord_coef(struct lp_setup_context *setup,
* Setup the fragment input attribute with the front-facing value.
* \param frontface is the triangle front facing?
*/
-static void setup_facing_coef( struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
+static void setup_facing_coef( struct lp_rast_triangle *tri,
unsigned slot,
boolean frontface,
unsigned usage_mask)
{
/* convert TRUE to 1.0 and FALSE to -1.0 */
if (usage_mask & TGSI_WRITEMASK_X)
- constant_coef( setup, tri, slot, 2.0f * frontface - 1.0f, 0 );
+ constant_coef( tri, slot, 2.0f * frontface - 1.0f, 0 );
if (usage_mask & TGSI_WRITEMASK_Y)
- constant_coef( setup, tri, slot, 0.0f, 1 ); /* wasted */
+ constant_coef( tri, slot, 0.0f, 1 ); /* wasted */
if (usage_mask & TGSI_WRITEMASK_Z)
- constant_coef( setup, tri, slot, 0.0f, 2 ); /* wasted */
+ constant_coef( tri, slot, 0.0f, 2 ); /* wasted */
if (usage_mask & TGSI_WRITEMASK_W)
- constant_coef( setup, tri, slot, 0.0f, 3 ); /* wasted */
+ constant_coef( tri, slot, 0.0f, 3 ); /* wasted */
}
@@ -206,11 +256,7 @@ static void setup_facing_coef( struct lp_setup_context *setup,
*/
static void setup_tri_coefficients( struct lp_setup_context *setup,
struct lp_rast_triangle *tri,
- float oneoverarea,
- const float (*v1)[4],
- const float (*v2)[4],
- const float (*v3)[4],
- boolean frontface)
+ const struct tri_info *info)
{
unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
unsigned slot;
@@ -227,25 +273,25 @@ static void setup_tri_coefficients( struct lp_setup_context *setup,
if (setup->flatshade_first) {
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- constant_coef(setup, tri, slot+1, v1[vert_attr][i], i);
+ constant_coef(tri, slot+1, info->v0[vert_attr][i], i);
}
else {
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- constant_coef(setup, tri, slot+1, v3[vert_attr][i], i);
+ constant_coef(tri, slot+1, info->v2[vert_attr][i], i);
}
break;
case LP_INTERP_LINEAR:
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- linear_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i);
+ linear_coef(tri, info, slot+1, vert_attr, i);
break;
case LP_INTERP_PERSPECTIVE:
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- perspective_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i);
+ perspective_coef(tri, info, slot+1, vert_attr, i);
fragcoord_usage_mask |= TGSI_WRITEMASK_W;
break;
@@ -259,7 +305,7 @@ static void setup_tri_coefficients( struct lp_setup_context *setup,
break;
case LP_INTERP_FACING:
- setup_facing_coef(setup, tri, slot+1, frontface, usage_mask);
+ setup_facing_coef(tri, slot+1, info->frontfacing, usage_mask);
break;
default:
@@ -269,16 +315,11 @@ static void setup_tri_coefficients( struct lp_setup_context *setup,
/* The internal position input is in slot zero:
*/
- setup_fragcoord_coef(setup, tri, oneoverarea, 0, v1, v2, v3,
- fragcoord_usage_mask);
+ setup_fragcoord_coef(tri, info, 0, fragcoord_usage_mask);
}
-static INLINE int subpixel_snap( float a )
-{
- return util_iround(FIXED_ONE * a - (FIXED_ONE / 2));
-}
@@ -291,21 +332,23 @@ static INLINE int subpixel_snap( float a )
* \return pointer to triangle space
*/
static INLINE struct lp_rast_triangle *
-alloc_triangle(struct lp_scene *scene, unsigned nr_inputs, unsigned *tri_size)
+alloc_triangle(struct lp_scene *scene,
+ unsigned nr_inputs,
+ unsigned nr_planes,
+ unsigned *tri_size)
{
unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float);
struct lp_rast_triangle *tri;
- unsigned bytes;
+ unsigned tri_bytes, bytes;
char *inputs;
- assert(sizeof(*tri) % 16 == 0);
-
- bytes = sizeof(*tri) + (3 * input_array_sz);
+ tri_bytes = align(Offset(struct lp_rast_triangle, plane[nr_planes]), 16);
+ bytes = tri_bytes + (3 * input_array_sz);
tri = lp_scene_alloc_aligned( scene, bytes, 16 );
if (tri) {
- inputs = (char *) (tri + 1);
+ inputs = ((char *)tri) + tri_bytes;
tri->inputs.a0 = (float (*)[4]) inputs;
tri->inputs.dadx = (float (*)[4]) (inputs + input_array_sz);
tri->inputs.dady = (float (*)[4]) (inputs + 2 * input_array_sz);
@@ -329,52 +372,71 @@ print_triangle(struct lp_setup_context *setup,
uint i;
debug_printf("llvmpipe triangle\n");
- for (i = 0; i < setup->fs.nr_inputs; i++) {
+ for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
debug_printf(" v1[%d]: %f %f %f %f\n", i,
v1[i][0], v1[i][1], v1[i][2], v1[i][3]);
}
- for (i = 0; i < setup->fs.nr_inputs; i++) {
+ for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
debug_printf(" v2[%d]: %f %f %f %f\n", i,
v2[i][0], v2[i][1], v2[i][2], v2[i][3]);
}
- for (i = 0; i < setup->fs.nr_inputs; i++) {
+ for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
debug_printf(" v3[%d]: %f %f %f %f\n", i,
v3[i][0], v3[i][1], v3[i][2], v3[i][3]);
}
}
+lp_rast_cmd lp_rast_tri_tab[8] = {
+ NULL, /* should be impossible */
+ lp_rast_triangle_1,
+ lp_rast_triangle_2,
+ lp_rast_triangle_3,
+ lp_rast_triangle_4,
+ lp_rast_triangle_5,
+ lp_rast_triangle_6,
+ lp_rast_triangle_7
+};
+
/**
* Do basic setup for triangle rasterization and determine which
* framebuffer tiles are touched. Put the triangle in the scene's
* bins for the tiles which we overlap.
*/
-static void
+static void
do_triangle_ccw(struct lp_setup_context *setup,
const float (*v1)[4],
const float (*v2)[4],
const float (*v3)[4],
boolean frontfacing )
{
- /* x/y positions in fixed point */
- const int x1 = subpixel_snap(v1[0][0] + 0.5 - setup->pixel_offset);
- const int x2 = subpixel_snap(v2[0][0] + 0.5 - setup->pixel_offset);
- const int x3 = subpixel_snap(v3[0][0] + 0.5 - setup->pixel_offset);
- const int y1 = subpixel_snap(v1[0][1] + 0.5 - setup->pixel_offset);
- const int y2 = subpixel_snap(v2[0][1] + 0.5 - setup->pixel_offset);
- const int y3 = subpixel_snap(v3[0][1] + 0.5 - setup->pixel_offset);
struct lp_scene *scene = lp_setup_get_current_scene(setup);
+ struct lp_fragment_shader_variant *variant = setup->fs.current.variant;
struct lp_rast_triangle *tri;
+ struct tri_info info;
int area;
- float oneoverarea;
int minx, maxx, miny, maxy;
+ int ix0, ix1, iy0, iy1;
unsigned tri_bytes;
-
+ int i;
+ int nr_planes = 3;
+
if (0)
print_triangle(setup, v1, v2, v3);
- tri = alloc_triangle(scene, setup->fs.nr_inputs, &tri_bytes);
+ if (setup->scissor_test) {
+ nr_planes = 7;
+ }
+ else {
+ nr_planes = 3;
+ }
+
+
+ tri = alloc_triangle(scene,
+ setup->fs.nr_inputs,
+ nr_planes,
+ &tri_bytes);
if (!tri)
return;
@@ -387,15 +449,24 @@ do_triangle_ccw(struct lp_setup_context *setup,
tri->v[2][1] = v3[0][1];
#endif
- tri->dx12 = x1 - x2;
- tri->dx23 = x2 - x3;
- tri->dx31 = x3 - x1;
+ /* x/y positions in fixed point */
+ info.x[0] = subpixel_snap(v1[0][0] - setup->pixel_offset);
+ info.x[1] = subpixel_snap(v2[0][0] - setup->pixel_offset);
+ info.x[2] = subpixel_snap(v3[0][0] - setup->pixel_offset);
+ info.y[0] = subpixel_snap(v1[0][1] - setup->pixel_offset);
+ info.y[1] = subpixel_snap(v2[0][1] - setup->pixel_offset);
+ info.y[2] = subpixel_snap(v3[0][1] - setup->pixel_offset);
+
+ tri->plane[0].dcdy = info.x[0] - info.x[1];
+ tri->plane[1].dcdy = info.x[1] - info.x[2];
+ tri->plane[2].dcdy = info.x[2] - info.x[0];
- tri->dy12 = y1 - y2;
- tri->dy23 = y2 - y3;
- tri->dy31 = y3 - y1;
+ tri->plane[0].dcdx = info.y[0] - info.y[1];
+ tri->plane[1].dcdx = info.y[1] - info.y[2];
+ tri->plane[2].dcdx = info.y[2] - info.y[0];
- area = (tri->dx12 * tri->dy31 - tri->dx31 * tri->dy12);
+ area = (tri->plane[0].dcdy * tri->plane[2].dcdx -
+ tri->plane[2].dcdy * tri->plane[0].dcdx);
LP_COUNT(nr_tris);
@@ -410,20 +481,35 @@ do_triangle_ccw(struct lp_setup_context *setup,
}
/* Bounding rectangle (in pixels) */
- minx = (MIN3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER;
- maxx = (MAX3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER;
- miny = (MIN3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER;
- maxy = (MAX3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER;
-
+ {
+ /* Yes this is necessary to accurately calculate bounding boxes
+ * with the two fill-conventions we support. GL (normally) ends
+ * up needing a bottom-left fill convention, which requires
+ * slightly different rounding.
+ */
+ int adj = (setup->pixel_offset != 0) ? 1 : 0;
+
+ minx = (MIN3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER;
+ maxx = (MAX3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER;
+ miny = (MIN3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+ maxy = (MAX3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+ }
+
if (setup->scissor_test) {
minx = MAX2(minx, setup->scissor.current.minx);
maxx = MIN2(maxx, setup->scissor.current.maxx);
miny = MAX2(miny, setup->scissor.current.miny);
maxy = MIN2(maxy, setup->scissor.current.maxy);
}
+ else {
+ minx = MAX2(minx, 0);
+ miny = MAX2(miny, 0);
+ maxx = MIN2(maxx, scene->fb.width);
+ maxy = MIN2(maxy, scene->fb.height);
+ }
+
- if (miny == maxy ||
- minx == maxx) {
+ if (miny >= maxy || minx >= maxx) {
lp_scene_putback_data( scene, tri_bytes );
LP_COUNT(nr_culled_tris);
return;
@@ -431,75 +517,88 @@ do_triangle_ccw(struct lp_setup_context *setup,
/*
*/
- oneoverarea = ((float)FIXED_ONE) / (float)area;
+ info.pixel_offset = setup->pixel_offset;
+ info.v0 = v1;
+ info.v1 = v2;
+ info.v2 = v3;
+ info.dx01 = info.v0[0][0] - info.v1[0][0];
+ info.dx20 = info.v2[0][0] - info.v0[0][0];
+ info.dy01 = info.v0[0][1] - info.v1[0][1];
+ info.dy20 = info.v2[0][1] - info.v0[0][1];
+ info.oneoverarea = 1.0 / (info.dx01 * info.dy20 - info.dx20 * info.dy01);
+ info.frontfacing = frontfacing;
/* Setup parameter interpolants:
*/
- setup_tri_coefficients( setup, tri, oneoverarea, v1, v2, v3, frontfacing );
+ setup_tri_coefficients( setup, tri, &info );
tri->inputs.facing = frontfacing ? 1.0F : -1.0F;
+ tri->inputs.state = setup->fs.stored;
- /* half-edge constants, will be interated over the whole render target.
- */
- tri->c1 = tri->dy12 * x1 - tri->dx12 * y1;
- tri->c2 = tri->dy23 * x2 - tri->dx23 * y2;
- tri->c3 = tri->dy31 * x3 - tri->dx31 * y3;
- /* correct for top-left fill convention:
- */
- if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) tri->c1++;
- if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) tri->c2++;
- if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) tri->c3++;
-
- tri->dy12 *= FIXED_ONE;
- tri->dy23 *= FIXED_ONE;
- tri->dy31 *= FIXED_ONE;
-
- tri->dx12 *= FIXED_ONE;
- tri->dx23 *= FIXED_ONE;
- tri->dx31 *= FIXED_ONE;
-
- /* find trivial reject offsets for each edge for a single-pixel
- * sized block. These will be scaled up at each recursive level to
- * match the active blocksize. Scaling in this way works best if
- * the blocks are square.
- */
- tri->eo1 = 0;
- if (tri->dy12 < 0) tri->eo1 -= tri->dy12;
- if (tri->dx12 > 0) tri->eo1 += tri->dx12;
+
+ for (i = 0; i < 3; i++) {
+ struct lp_rast_plane *plane = &tri->plane[i];
- tri->eo2 = 0;
- if (tri->dy23 < 0) tri->eo2 -= tri->dy23;
- if (tri->dx23 > 0) tri->eo2 += tri->dx23;
+ /* half-edge constants, will be interated over the whole render
+ * target.
+ */
+ plane->c = plane->dcdx * info.x[i] - plane->dcdy * info.y[i];
+
+ /* correct for top-left vs. bottom-left fill convention.
+ *
+ * note that we're overloading gl_rasterization_rules to mean
+ * both (0.5,0.5) pixel centers *and* bottom-left filling
+ * convention.
+ *
+ * GL actually has a top-left filling convention, but GL's
+ * notion of "top" differs from gallium's...
+ *
+ * Also, sometimes (in FBO cases) GL will render upside down
+ * to its usual method, in which case it will probably want
+ * to use the opposite, top-left convention.
+ */
+ if (plane->dcdx < 0) {
+ /* both fill conventions want this - adjust for left edges */
+ plane->c++;
+ }
+ else if (plane->dcdx == 0) {
+ if (setup->pixel_offset == 0) {
+ /* correct for top-left fill convention:
+ */
+ if (plane->dcdy > 0) plane->c++;
+ }
+ else {
+ /* correct for bottom-left fill convention:
+ */
+ if (plane->dcdy < 0) plane->c++;
+ }
+ }
- tri->eo3 = 0;
- if (tri->dy31 < 0) tri->eo3 -= tri->dy31;
- if (tri->dx31 > 0) tri->eo3 += tri->dx31;
+ plane->dcdx *= FIXED_ONE;
+ plane->dcdy *= FIXED_ONE;
- /* Calculate trivial accept offsets from the above.
- */
- tri->ei1 = tri->dx12 - tri->dy12 - tri->eo1;
- tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2;
- tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3;
+ /* find trivial reject offsets for each edge for a single-pixel
+ * sized block. These will be scaled up at each recursive level to
+ * match the active blocksize. Scaling in this way works best if
+ * the blocks are square.
+ */
+ plane->eo = 0;
+ if (plane->dcdx < 0) plane->eo -= plane->dcdx;
+ if (plane->dcdy > 0) plane->eo += plane->dcdy;
- /* Fill in the inputs.step[][] arrays.
- * We've manually unrolled some loops here.
- */
- {
- const int xstep1 = -tri->dy12;
- const int xstep2 = -tri->dy23;
- const int xstep3 = -tri->dy31;
- const int ystep1 = tri->dx12;
- const int ystep2 = tri->dx23;
- const int ystep3 = tri->dx31;
-
-#define SETUP_STEP(i, x, y) \
- do { \
- tri->inputs.step[0][i] = x * xstep1 + y * ystep1; \
- tri->inputs.step[1][i] = x * xstep2 + y * ystep2; \
- tri->inputs.step[2][i] = x * xstep3 + y * ystep3; \
- } while (0)
+ /* Calculate trivial accept offsets from the above.
+ */
+ plane->ei = plane->dcdy - plane->dcdx - plane->eo;
+ plane->step = tri->step[i];
+
+ /* Fill in the inputs.step[][] arrays.
+ * We've manually unrolled some loops here.
+ */
+#define SETUP_STEP(j, x, y) \
+ tri->step[i][j] = y * plane->dcdy - x * plane->dcdx
+
SETUP_STEP(0, 0, 0);
SETUP_STEP(1, 1, 0);
SETUP_STEP(2, 0, 1);
@@ -522,63 +621,106 @@ do_triangle_ccw(struct lp_setup_context *setup,
#undef STEP
}
+
+ /*
+ * When rasterizing scissored tris, use the intersection of the
+ * triangle bounding box and the scissor rect to generate the
+ * scissor planes.
+ *
+ * This permits us to cut off the triangle "tails" that are present
+ * in the intermediate recursive levels caused when two of the
+ * triangles edges don't diverge quickly enough to trivially reject
+ * exterior blocks from the triangle.
+ *
+ * It's not really clear if it's worth worrying about these tails,
+ * but since we generate the planes for each scissored tri, it's
+ * free to trim them in this case.
+ *
+ * Note that otherwise, the scissor planes only vary in 'C' value,
+ * and even then only on state-changes. Could alternatively store
+ * these planes elsewhere.
+ */
+ if (nr_planes == 7) {
+ tri->plane[3].step = step_scissor_minx;
+ tri->plane[3].dcdx = -1;
+ tri->plane[3].dcdy = 0;
+ tri->plane[3].c = 1-minx;
+ tri->plane[3].ei = 0;
+ tri->plane[3].eo = 1;
+
+ tri->plane[4].step = step_scissor_maxx;
+ tri->plane[4].dcdx = 1;
+ tri->plane[4].dcdy = 0;
+ tri->plane[4].c = maxx;
+ tri->plane[4].ei = -1;
+ tri->plane[4].eo = 0;
+
+ tri->plane[5].step = step_scissor_miny;
+ tri->plane[5].dcdx = 0;
+ tri->plane[5].dcdy = 1;
+ tri->plane[5].c = 1-miny;
+ tri->plane[5].ei = 0;
+ tri->plane[5].eo = 1;
+
+ tri->plane[6].step = step_scissor_maxy;
+ tri->plane[6].dcdx = 0;
+ tri->plane[6].dcdy = -1;
+ tri->plane[6].c = maxy;
+ tri->plane[6].ei = -1;
+ tri->plane[6].eo = 0;
+ }
+
+
/*
* All fields of 'tri' are now set. The remaining code here is
* concerned with binning.
*/
- /* Convert to tile coordinates:
+ /* Convert to tile coordinates, and inclusive ranges:
*/
- minx = minx / TILE_SIZE;
- miny = miny / TILE_SIZE;
- maxx = maxx / TILE_SIZE;
- maxy = maxy / TILE_SIZE;
+ ix0 = minx / TILE_SIZE;
+ iy0 = miny / TILE_SIZE;
+ ix1 = (maxx-1) / TILE_SIZE;
+ iy1 = (maxy-1) / TILE_SIZE;
/*
* Clamp to framebuffer size
*/
- minx = MAX2(minx, 0);
- miny = MAX2(miny, 0);
- maxx = MIN2(maxx, scene->tiles_x - 1);
- maxy = MIN2(maxy, scene->tiles_y - 1);
+ assert(ix0 == MAX2(ix0, 0));
+ assert(iy0 == MAX2(iy0, 0));
+ assert(ix1 == MIN2(ix1, scene->tiles_x - 1));
+ assert(iy1 == MIN2(iy1, scene->tiles_y - 1));
/* Determine which tile(s) intersect the triangle's bounding box
*/
- if (miny == maxy && minx == maxx)
+ if (iy0 == iy1 && ix0 == ix1)
{
/* Triangle is contained in a single tile:
*/
- lp_scene_bin_command( scene, minx, miny, lp_rast_triangle,
- lp_rast_arg_triangle(tri) );
+ lp_scene_bin_command( scene, ix0, iy0,
+ lp_rast_tri_tab[nr_planes],
+ lp_rast_arg_triangle(tri, (1<<nr_planes)-1) );
}
- else
+ else
{
- int c1 = (tri->c1 +
- tri->dx12 * miny * TILE_SIZE -
- tri->dy12 * minx * TILE_SIZE);
- int c2 = (tri->c2 +
- tri->dx23 * miny * TILE_SIZE -
- tri->dy23 * minx * TILE_SIZE);
- int c3 = (tri->c3 +
- tri->dx31 * miny * TILE_SIZE -
- tri->dy31 * minx * TILE_SIZE);
-
- int ei1 = tri->ei1 << TILE_ORDER;
- int ei2 = tri->ei2 << TILE_ORDER;
- int ei3 = tri->ei3 << TILE_ORDER;
-
- int eo1 = tri->eo1 << TILE_ORDER;
- int eo2 = tri->eo2 << TILE_ORDER;
- int eo3 = tri->eo3 << TILE_ORDER;
-
- int xstep1 = -(tri->dy12 << TILE_ORDER);
- int xstep2 = -(tri->dy23 << TILE_ORDER);
- int xstep3 = -(tri->dy31 << TILE_ORDER);
-
- int ystep1 = tri->dx12 << TILE_ORDER;
- int ystep2 = tri->dx23 << TILE_ORDER;
- int ystep3 = tri->dx31 << TILE_ORDER;
+ int c[7];
+ int ei[7];
+ int eo[7];
+ int xstep[7];
+ int ystep[7];
int x, y;
+
+ for (i = 0; i < nr_planes; i++) {
+ c[i] = (tri->plane[i].c +
+ tri->plane[i].dcdy * iy0 * TILE_SIZE -
+ tri->plane[i].dcdx * ix0 * TILE_SIZE);
+
+ ei[i] = tri->plane[i].ei << TILE_ORDER;
+ eo[i] = tri->plane[i].eo << TILE_ORDER;
+ xstep[i] = -(tri->plane[i].dcdx << TILE_ORDER);
+ ystep[i] = tri->plane[i].dcdy << TILE_ORDER;
+ }
+
/* Test tile-sized blocks against the triangle.
@@ -586,63 +728,67 @@ do_triangle_ccw(struct lp_setup_context *setup,
* contained inside the tri, bin an lp_rast_shade_tile command.
* Else, bin a lp_rast_triangle command.
*/
- for (y = miny; y <= maxy; y++)
+ for (y = iy0; y <= iy1; y++)
{
- int cx1 = c1;
- int cx2 = c2;
- int cx3 = c3;
boolean in = FALSE; /* are we inside the triangle? */
+ int cx[7];
+
+ for (i = 0; i < nr_planes; i++)
+ cx[i] = c[i];
- for (x = minx; x <= maxx; x++)
+ for (x = ix0; x <= ix1; x++)
{
- if (cx1 + eo1 < 0 ||
- cx2 + eo2 < 0 ||
- cx3 + eo3 < 0)
- {
- /* do nothing */
+ int out = 0;
+ int partial = 0;
+
+ for (i = 0; i < nr_planes; i++) {
+ int planeout = cx[i] + eo[i];
+ int planepartial = cx[i] + ei[i] - 1;
+ out |= (planeout >> 31);
+ partial |= (planepartial >> 31) & (1<<i);
+ }
+
+ if (out) {
+ /* do nothing */
+ if (in)
+ break; /* exiting triangle, all done with this row */
LP_COUNT(nr_empty_64);
- if (in)
- break; /* exiting triangle, all done with this row */
- }
- else if (cx1 + ei1 > 0 &&
- cx2 + ei2 > 0 &&
- cx3 + ei3 > 0)
- {
+ }
+ else if (partial) {
+ /* Not trivially accepted by at least one plane -
+ * rasterize/shade partial tile
+ */
+ int count = util_bitcount(partial);
+ in = TRUE;
+ lp_scene_bin_command( scene, x, y,
+ lp_rast_tri_tab[count],
+ lp_rast_arg_triangle(tri, partial) );
+
+ LP_COUNT(nr_partially_covered_64);
+ }
+ else {
/* triangle covers the whole tile- shade whole tile */
LP_COUNT(nr_fully_covered_64);
- in = TRUE;
- if (setup->fs.current.variant->opaque) {
+ in = TRUE;
+ if (variant->opaque &&
+ !setup->fb.zsbuf) {
lp_scene_bin_reset( scene, x, y );
- lp_scene_bin_command( scene, x, y,
- lp_rast_set_state,
- lp_rast_arg_state(setup->fs.stored) );
}
lp_scene_bin_command( scene, x, y,
lp_rast_shade_tile,
lp_rast_arg_inputs(&tri->inputs) );
- }
- else
- {
- /* rasterizer/shade partial tile */
- LP_COUNT(nr_partially_covered_64);
- in = TRUE;
- lp_scene_bin_command( scene, x, y,
- lp_rast_triangle,
- lp_rast_arg_triangle(tri) );
- }
+ }
/* Iterate cx values across the region:
*/
- cx1 += xstep1;
- cx2 += xstep2;
- cx3 += xstep3;
+ for (i = 0; i < nr_planes; i++)
+ cx[i] += xstep[i];
}
/* Iterate c values down the region:
*/
- c1 += ystep1;
- c2 += ystep2;
- c3 += ystep3;
+ for (i = 0; i < nr_planes; i++)
+ c[i] += ystep[i];
}
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
index f6a424f25a..51948f5bf2 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
@@ -61,7 +61,9 @@ lp_setup_get_vertex_info(struct vbuf_render *vbr)
{
struct lp_setup_context *setup = lp_setup_context(vbr);
- /* vertex size/info depends on the latest state */
+ /* Vertex size/info depends on the latest state.
+ * The draw module may have issued additional state-change commands.
+ */
lp_setup_update_state(setup);
return setup->vertex_info;
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index 05d1b93794..86313e1c48 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -130,6 +130,12 @@ llvmpipe_init_rasterizer_funcs(struct llvmpipe_context *llvmpipe);
void
llvmpipe_init_so_funcs(struct llvmpipe_context *llvmpipe);
+void
+llvmpipe_prepare_vertex_sampling(struct llvmpipe_context *ctx,
+ unsigned num,
+ struct pipe_sampler_view **views);
+void
+llvmpipe_cleanup_vertex_sampling(struct llvmpipe_context *ctx);
#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index d20a5218d4..77bec4640b 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -189,7 +189,7 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
llvmpipe->constants[PIPE_SHADER_FRAGMENT][0]);
if (llvmpipe->dirty & LP_NEW_SAMPLER_VIEW)
- lp_setup_set_fragment_sampler_views(llvmpipe->setup,
+ lp_setup_set_fragment_sampler_views(llvmpipe->setup,
llvmpipe->num_fragment_sampler_views,
llvmpipe->fragment_sampler_views);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 65115052cd..5953d690a4 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -31,9 +31,6 @@
* Code generate the whole fragment pipeline.
*
* The fragment pipeline consists of the following stages:
- * - triangle edge in/out testing
- * - scissor test
- * - stipple (TBI)
* - early depth test
* - fragment shader
* - alpha test
@@ -97,6 +94,7 @@
#include "lp_state.h"
#include "lp_tex_sample.h"
#include "lp_flush.h"
+#include "lp_state_fs.h"
#include <llvm-c/Analysis.h>
@@ -170,177 +168,63 @@ generate_depth_stencil(LLVMBuilderRef builder,
/**
- * Generate the code to do inside/outside triangle testing for the
+ * Expand the relevent bits of mask_input to a 4-dword mask for the
* four pixels in a 2x2 quad. This will set the four elements of the
* quad mask vector to 0 or ~0.
- * \param i which quad of the quad group to test, in [0,3]
+ *
+ * \param quad which quad of the quad group to test, in [0,3]
+ * \param mask_input bitwise mask for the whole 4x4 stamp
*/
-static void
-generate_tri_edge_mask(LLVMBuilderRef builder,
- unsigned i,
- LLVMValueRef *mask, /* ivec4, out */
- LLVMValueRef c0, /* int32 */
- LLVMValueRef c1, /* int32 */
- LLVMValueRef c2, /* int32 */
- LLVMValueRef step0_ptr, /* ivec4 */
- LLVMValueRef step1_ptr, /* ivec4 */
- LLVMValueRef step2_ptr) /* ivec4 */
+static LLVMValueRef
+generate_quad_mask(LLVMBuilderRef builder,
+ struct lp_type fs_type,
+ unsigned quad,
+ LLVMValueRef mask_input) /* int32 */
{
-#define OPTIMIZE_IN_OUT_TEST 0
-#if OPTIMIZE_IN_OUT_TEST
- struct lp_build_if_state ifctx;
- LLVMValueRef not_draw_all;
-#endif
- struct lp_build_flow_context *flow;
- struct lp_type i32_type;
- LLVMTypeRef i32vec4_type;
- LLVMValueRef c0_vec, c1_vec, c2_vec;
- LLVMValueRef in_out_mask;
-
- assert(i < 4);
-
- /* int32 vector type */
- memset(&i32_type, 0, sizeof i32_type);
- i32_type.floating = FALSE; /* values are integers */
- i32_type.sign = TRUE; /* values are signed */
- i32_type.norm = FALSE; /* values are not normalized */
- i32_type.width = 32; /* 32-bit int values */
- i32_type.length = 4; /* 4 elements per vector */
-
- i32vec4_type = lp_build_int32_vec4_type();
+ struct lp_type mask_type;
+ LLVMTypeRef i32t = LLVMInt32Type();
+ LLVMValueRef bits[4];
+ LLVMValueRef mask;
/*
- * Use a conditional here to do detailed pixel in/out testing.
- * We only have to do this if c0 != INT_MIN.
+ * XXX: We'll need a different path for 16 x u8
*/
- flow = lp_build_flow_create(builder);
- lp_build_flow_scope_begin(flow);
-
- {
-#if OPTIMIZE_IN_OUT_TEST
- /* not_draw_all = (c0 != INT_MIN) */
- not_draw_all = LLVMBuildICmp(builder,
- LLVMIntNE,
- c0,
- LLVMConstInt(LLVMInt32Type(), INT_MIN, 0),
- "");
-
- in_out_mask = lp_build_const_int_vec(i32_type, ~0);
-
-
- lp_build_flow_scope_declare(flow, &in_out_mask);
-
- /* if (not_draw_all) {... */
- lp_build_if(&ifctx, flow, builder, not_draw_all);
-#endif
- {
- LLVMValueRef step0_vec, step1_vec, step2_vec;
- LLVMValueRef m0_vec, m1_vec, m2_vec;
- LLVMValueRef index, m;
-
- /* c0_vec = {c0, c0, c0, c0}
- * Note that we emit this code four times but LLVM optimizes away
- * three instances of it.
- */
- c0_vec = lp_build_broadcast(builder, i32vec4_type, c0);
- c1_vec = lp_build_broadcast(builder, i32vec4_type, c1);
- c2_vec = lp_build_broadcast(builder, i32vec4_type, c2);
- lp_build_name(c0_vec, "edgeconst0vec");
- lp_build_name(c1_vec, "edgeconst1vec");
- lp_build_name(c2_vec, "edgeconst2vec");
-
- /* load step0vec, step1, step2 vec from memory */
- index = LLVMConstInt(LLVMInt32Type(), i, 0);
- step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), "");
- step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), "");
- step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), "");
- lp_build_name(step0_vec, "step0vec");
- lp_build_name(step1_vec, "step1vec");
- lp_build_name(step2_vec, "step2vec");
-
- /* m0_vec = step0_ptr[i] > c0_vec */
- m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec);
- m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec);
- m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec);
-
- /* in_out_mask = m0_vec & m1_vec & m2_vec */
- m = LLVMBuildAnd(builder, m0_vec, m1_vec, "");
- in_out_mask = LLVMBuildAnd(builder, m, m2_vec, "");
- lp_build_name(in_out_mask, "inoutmaskvec");
- }
-#if OPTIMIZE_IN_OUT_TEST
- lp_build_endif(&ifctx);
-#endif
-
- }
- lp_build_flow_scope_end(flow);
- lp_build_flow_destroy(flow);
+ assert(fs_type.width == 32);
+ assert(fs_type.length == 4);
+ mask_type = lp_int_type(fs_type);
- /* This is the initial alive/dead pixel mask for a quad of four pixels.
- * It's an int[4] vector with each word set to 0 or ~0.
- * Words will get cleared when pixels faile the Z test, etc.
+ /*
+ * mask_input >>= (quad * 4)
*/
- *mask = in_out_mask;
-}
-
-
-static LLVMValueRef
-generate_scissor_test(LLVMBuilderRef builder,
- LLVMValueRef context_ptr,
- const struct lp_build_interp_soa_context *interp,
- struct lp_type type)
-{
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- LLVMValueRef xpos = interp->pos[0], ypos = interp->pos[1];
- LLVMValueRef xmin, ymin, xmax, ymax;
- LLVMValueRef m0, m1, m2, m3, m;
-
- /* xpos, ypos contain the window coords for the four pixels in the quad */
- assert(xpos);
- assert(ypos);
-
- /* get the current scissor bounds, convert to vectors */
- xmin = lp_jit_context_scissor_xmin_value(builder, context_ptr);
- xmin = lp_build_broadcast(builder, vec_type, xmin);
-
- ymin = lp_jit_context_scissor_ymin_value(builder, context_ptr);
- ymin = lp_build_broadcast(builder, vec_type, ymin);
- xmax = lp_jit_context_scissor_xmax_value(builder, context_ptr);
- xmax = lp_build_broadcast(builder, vec_type, xmax);
+ mask_input = LLVMBuildLShr(builder,
+ mask_input,
+ LLVMConstInt(i32t, quad * 4, 0),
+ "");
- ymax = lp_jit_context_scissor_ymax_value(builder, context_ptr);
- ymax = lp_build_broadcast(builder, vec_type, ymax);
+ /*
+ * mask = { mask_input & (1 << i), for i in [0,3] }
+ */
- /* compare the fragment's position coordinates against the scissor bounds */
- m0 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, xpos, xmin);
- m1 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, ypos, ymin);
- m2 = lp_build_compare(builder, type, PIPE_FUNC_LESS, xpos, xmax);
- m3 = lp_build_compare(builder, type, PIPE_FUNC_LESS, ypos, ymax);
+ mask = lp_build_broadcast(builder, lp_build_vec_type(mask_type), mask_input);
- /* AND all the masks together */
- m = LLVMBuildAnd(builder, m0, m1, "");
- m = LLVMBuildAnd(builder, m, m2, "");
- m = LLVMBuildAnd(builder, m, m3, "");
+ bits[0] = LLVMConstInt(i32t, 1 << 0, 0);
+ bits[1] = LLVMConstInt(i32t, 1 << 1, 0);
+ bits[2] = LLVMConstInt(i32t, 1 << 2, 0);
+ bits[3] = LLVMConstInt(i32t, 1 << 3, 0);
- lp_build_name(m, "scissormask");
+ mask = LLVMBuildAnd(builder, mask, LLVMConstVector(bits, 4), "");
- return m;
-}
+ /*
+ * mask = mask != 0 ? ~0 : 0
+ */
+ mask = lp_build_compare(builder,
+ mask_type, PIPE_FUNC_NOTEQUAL,
+ mask,
+ lp_build_const_int_vec(mask_type, 0));
-static LLVMValueRef
-build_int32_vec_const(int value)
-{
- struct lp_type i32_type;
-
- memset(&i32_type, 0, sizeof i32_type);
- i32_type.floating = FALSE; /* values are integers */
- i32_type.sign = TRUE; /* values are signed */
- i32_type.norm = FALSE; /* values are not normalized */
- i32_type.width = 32; /* 32-bit int values */
- i32_type.length = 4; /* 4 elements per vector */
- return lp_build_const_int_vec(i32_type, value);
+ return mask;
}
@@ -348,7 +232,7 @@ build_int32_vec_const(int value)
/**
* Generate the fragment shader, depth/stencil test, and alpha tests.
* \param i which quad in the tile, in range [0,3]
- * \param do_tri_test if 1, do triangle edge in/out testing
+ * \param partial_mask if 1, do mask_input testing
*/
static void
generate_fs(struct llvmpipe_context *lp,
@@ -364,13 +248,8 @@ generate_fs(struct llvmpipe_context *lp,
LLVMValueRef (*color)[4],
LLVMValueRef depth_ptr,
LLVMValueRef facing,
- unsigned do_tri_test,
- LLVMValueRef c0,
- LLVMValueRef c1,
- LLVMValueRef c2,
- LLVMValueRef step0_ptr,
- LLVMValueRef step1_ptr,
- LLVMValueRef step2_ptr,
+ unsigned partial_mask,
+ LLVMValueRef mask_input,
LLVMValueRef counter)
{
const struct tgsi_token *tokens = shader->base.tokens;
@@ -411,23 +290,17 @@ generate_fs(struct llvmpipe_context *lp,
lp_build_flow_scope_declare(flow, &z);
/* do triangle edge testing */
- if (do_tri_test) {
- generate_tri_edge_mask(builder, i, pmask,
- c0, c1, c2, step0_ptr, step1_ptr, step2_ptr);
+ if (partial_mask) {
+ *pmask = generate_quad_mask(builder, type,
+ i, mask_input);
}
else {
- *pmask = build_int32_vec_const(~0);
+ *pmask = lp_build_const_int_vec(type, ~0);
}
/* 'mask' will control execution based on quad's pixel alive/killed state */
lp_build_mask_begin(&mask, flow, type, *pmask);
- if (key->scissor) {
- LLVMValueRef smask =
- generate_scissor_test(builder, context_ptr, interp, type);
- lp_build_mask_update(&mask, smask);
- }
-
early_depth_stencil_test =
(key->depth.enabled || key->stencil[0].enabled) &&
!key->alpha.enabled &&
@@ -579,7 +452,7 @@ static void
generate_fragment(struct llvmpipe_context *lp,
struct lp_fragment_shader *shader,
struct lp_fragment_shader_variant *variant,
- unsigned do_tri_test)
+ unsigned partial_mask)
{
struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
const struct lp_fragment_shader_variant_key *key = &variant->key;
@@ -589,9 +462,8 @@ generate_fragment(struct llvmpipe_context *lp,
LLVMTypeRef fs_elem_type;
LLVMTypeRef fs_int_vec_type;
LLVMTypeRef blend_vec_type;
- LLVMTypeRef arg_types[16];
+ LLVMTypeRef arg_types[11];
LLVMTypeRef func_type;
- LLVMTypeRef int32_vec4_type = lp_build_int32_vec4_type();
LLVMValueRef context_ptr;
LLVMValueRef x;
LLVMValueRef y;
@@ -600,7 +472,8 @@ generate_fragment(struct llvmpipe_context *lp,
LLVMValueRef dady_ptr;
LLVMValueRef color_ptr_ptr;
LLVMValueRef depth_ptr;
- LLVMValueRef c0, c1, c2, step0_ptr, step1_ptr, step2_ptr, counter = NULL;
+ LLVMValueRef mask_input;
+ LLVMValueRef counter = NULL;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
struct lp_build_sampler_soa *sampler;
@@ -645,7 +518,7 @@ generate_fragment(struct llvmpipe_context *lp,
blend_vec_type = lp_build_vec_type(blend_type);
util_snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s",
- shader->no, variant->no, do_tri_test ? "edge" : "whole");
+ shader->no, variant->no, partial_mask ? "partial" : "whole");
arg_types[0] = screen->context_ptr_type; /* context */
arg_types[1] = LLVMInt32Type(); /* x */
@@ -656,23 +529,15 @@ generate_fragment(struct llvmpipe_context *lp,
arg_types[6] = LLVMPointerType(fs_elem_type, 0); /* dady */
arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */
arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */
- arg_types[9] = LLVMInt32Type(); /* c0 */
- arg_types[10] = LLVMInt32Type(); /* c1 */
- arg_types[11] = LLVMInt32Type(); /* c2 */
- /* Note: the step arrays are built as int32[16] but we interpret
- * them here as int32_vec4[4].
- */
- arg_types[12] = LLVMPointerType(int32_vec4_type, 0);/* step0 */
- arg_types[13] = LLVMPointerType(int32_vec4_type, 0);/* step1 */
- arg_types[14] = LLVMPointerType(int32_vec4_type, 0);/* step2 */
- arg_types[15] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */
+ arg_types[9] = LLVMInt32Type(); /* mask_input */
+ arg_types[10] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */
func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
function = LLVMAddFunction(screen->module, func_name, func_type);
LLVMSetFunctionCallConv(function, LLVMCCallConv);
- variant->function[do_tri_test] = function;
+ variant->function[partial_mask] = function;
/* XXX: need to propagate noalias down into color param now we are
@@ -691,12 +556,7 @@ generate_fragment(struct llvmpipe_context *lp,
dady_ptr = LLVMGetParam(function, 6);
color_ptr_ptr = LLVMGetParam(function, 7);
depth_ptr = LLVMGetParam(function, 8);
- c0 = LLVMGetParam(function, 9);
- c1 = LLVMGetParam(function, 10);
- c2 = LLVMGetParam(function, 11);
- step0_ptr = LLVMGetParam(function, 12);
- step1_ptr = LLVMGetParam(function, 13);
- step2_ptr = LLVMGetParam(function, 14);
+ mask_input = LLVMGetParam(function, 9);
lp_build_name(context_ptr, "context");
lp_build_name(x, "x");
@@ -706,15 +566,10 @@ generate_fragment(struct llvmpipe_context *lp,
lp_build_name(dady_ptr, "dady");
lp_build_name(color_ptr_ptr, "color_ptr_ptr");
lp_build_name(depth_ptr, "depth");
- lp_build_name(c0, "c0");
- lp_build_name(c1, "c1");
- lp_build_name(c2, "c2");
- lp_build_name(step0_ptr, "step0");
- lp_build_name(step1_ptr, "step1");
- lp_build_name(step2_ptr, "step2");
+ lp_build_name(mask_input, "mask_input");
if (key->occlusion_count) {
- counter = LLVMGetParam(function, 15);
+ counter = LLVMGetParam(function, 10);
lp_build_name(counter, "counter");
}
@@ -763,9 +618,9 @@ generate_fragment(struct llvmpipe_context *lp,
out_color,
depth_ptr_i,
facing,
- do_tri_test,
- c0, c1, c2,
- step0_ptr, step1_ptr, step2_ptr, counter);
+ partial_mask,
+ mask_input,
+ counter);
for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++)
for(chan = 0; chan < NUM_CHANNELS; ++chan)
@@ -792,9 +647,13 @@ generate_fragment(struct llvmpipe_context *lp,
lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]);
}
- lp_build_conv_mask(builder, fs_type, blend_type,
- fs_mask, num_fs,
- &blend_mask, 1);
+ if (partial_mask || !variant->opaque) {
+ lp_build_conv_mask(builder, fs_type, blend_type,
+ fs_mask, num_fs,
+ &blend_mask, 1);
+ } else {
+ blend_mask = lp_build_const_int_vec(blend_type, ~0);
+ }
color_ptr = LLVMBuildLoad(builder,
LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""),
@@ -832,8 +691,7 @@ generate_fragment(struct llvmpipe_context *lp,
#endif
/* Apply optimizations to LLVM IR */
- if (1)
- LLVMRunFunctionPassManager(screen->pass, function);
+ LLVMRunFunctionPassManager(screen->pass, function);
if (gallivm_debug & GALLIVM_DEBUG_IR) {
/* Print the LLVM IR to stderr */
@@ -847,7 +705,7 @@ generate_fragment(struct llvmpipe_context *lp,
{
void *f = LLVMGetPointerToGlobal(screen->engine, function);
- variant->jit_function[do_tri_test] = (lp_jit_frag_func)pointer_to_func(f);
+ variant->jit_function[partial_mask] = (lp_jit_frag_func)pointer_to_func(f);
if (gallivm_debug & GALLIVM_DEBUG_ASM) {
lp_disassemble(f);
@@ -963,7 +821,6 @@ generate_variant(struct llvmpipe_context *lp,
!key->stencil[0].enabled &&
!key->alpha.enabled &&
!key->depth.enabled &&
- !key->scissor &&
!shader->info.uses_kill
? TRUE : FALSE;
@@ -1182,7 +1039,6 @@ make_variant_key(struct llvmpipe_context *lp,
/* alpha.ref_value is passed in jit_context */
key->flatshade = lp->rasterizer->flatshade;
- key->scissor = lp->rasterizer->scissor;
if (lp->active_query_count) {
key->occlusion_count = TRUE;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h
index 593cd4de6b..37900fc544 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.h
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h
@@ -54,7 +54,6 @@ struct lp_fragment_shader_variant_key
enum pipe_format zsbuf_format;
unsigned nr_cbufs:8;
unsigned flatshade:1;
- unsigned scissor:1;
unsigned occlusion_count:1;
struct {
diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index e94065fb6a..715ce2f02e 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -35,10 +35,9 @@
#include "draw/draw_context.h"
#include "lp_context.h"
-#include "lp_context.h"
+#include "lp_screen.h"
#include "lp_state.h"
-#include "draw/draw_context.h"
-
+#include "state_tracker/sw_winsys.h"
static void *
@@ -100,6 +99,10 @@ llvmpipe_bind_vertex_sampler_states(struct pipe_context *pipe,
llvmpipe->num_vertex_samplers = num_samplers;
+ draw_set_samplers(llvmpipe->draw,
+ llvmpipe->vertex_samplers,
+ llvmpipe->num_vertex_samplers);
+
llvmpipe->dirty |= LP_NEW_SAMPLER;
}
@@ -166,6 +169,10 @@ llvmpipe_set_vertex_sampler_views(struct pipe_context *pipe,
llvmpipe->num_vertex_sampler_views = num;
+ draw_set_sampler_views(llvmpipe->draw,
+ llvmpipe->vertex_sampler_views,
+ llvmpipe->num_vertex_sampler_views);
+
llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW;
}
@@ -214,6 +221,77 @@ llvmpipe_delete_sampler_state(struct pipe_context *pipe,
}
+/**
+ * Called during state validation when LP_NEW_SAMPLER_VIEW is set.
+ */
+void
+llvmpipe_prepare_vertex_sampling(struct llvmpipe_context *lp,
+ unsigned num,
+ struct pipe_sampler_view **views)
+{
+ unsigned i;
+ uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS];
+ uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS];
+ const void *data[DRAW_MAX_TEXTURE_LEVELS];
+
+ assert(num <= PIPE_MAX_VERTEX_SAMPLERS);
+ if (!num)
+ return;
+
+ for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+ struct pipe_sampler_view *view = i < num ? views[i] : NULL;
+
+ if (view) {
+ struct pipe_resource *tex = view->texture;
+ struct llvmpipe_resource *lp_tex = llvmpipe_resource(tex);
+
+ /* We're referencing the texture's internal data, so save a
+ * reference to it.
+ */
+ pipe_resource_reference(&lp->mapped_vs_tex[i], tex);
+
+ if (!lp_tex->dt) {
+ /* regular texture - setup array of mipmap level pointers */
+ int j;
+ for (j = 0; j <= tex->last_level; j++) {
+ data[j] =
+ llvmpipe_get_texture_image_all(lp_tex, j, LP_TEX_USAGE_READ,
+ LP_TEX_LAYOUT_LINEAR);
+ row_stride[j] = lp_tex->row_stride[j];
+ img_stride[j] = lp_tex->img_stride[j];
+ }
+ }
+ else {
+ /* display target texture/surface */
+ /*
+ * XXX: Where should this be unmapped?
+ */
+ struct llvmpipe_screen *screen = llvmpipe_screen(tex->screen);
+ struct sw_winsys *winsys = screen->winsys;
+ data[0] = winsys->displaytarget_map(winsys, lp_tex->dt,
+ PIPE_TRANSFER_READ);
+ row_stride[0] = lp_tex->row_stride[0];
+ img_stride[0] = lp_tex->img_stride[0];
+ assert(data[0]);
+ }
+ draw_set_mapped_texture(lp->draw,
+ i,
+ tex->width0, tex->height0, tex->depth0,
+ tex->last_level,
+ row_stride, img_stride, data);
+ }
+ }
+}
+
+void
+llvmpipe_cleanup_vertex_sampling(struct llvmpipe_context *ctx)
+{
+ unsigned i;
+ for (i = 0; i < Elements(ctx->mapped_vs_tex); i++) {
+ pipe_resource_reference(&ctx->mapped_vs_tex[i], NULL);
+ }
+}
+
void
llvmpipe_init_sampler_funcs(struct llvmpipe_context *llvmpipe)
{
diff --git a/src/gallium/drivers/llvmpipe/lp_state_so.c b/src/gallium/drivers/llvmpipe/lp_state_so.c
index 4c64a5b142..30b17c9881 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_so.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_so.c
@@ -29,7 +29,6 @@
#include "lp_state.h"
#include "lp_texture.h"
-#include "util/u_format.h"
#include "util/u_memory.h"
#include "draw/draw_context.h"
diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c
index 76b3fce1fa..f761e82850 100644
--- a/src/gallium/drivers/llvmpipe/lp_surface.c
+++ b/src/gallium/drivers/llvmpipe/lp_surface.c
@@ -67,14 +67,14 @@ lp_resource_copy(struct pipe_context *pipe,
dst, subdst.face, subdst.level,
0, /* flush_flags */
FALSE, /* read_only */
- FALSE, /* cpu_access */
+ TRUE, /* cpu_access */
FALSE); /* do_not_block */
llvmpipe_flush_resource(pipe,
src, subsrc.face, subsrc.level,
0, /* flush_flags */
TRUE, /* read_only */
- FALSE, /* cpu_access */
+ TRUE, /* cpu_access */
FALSE); /* do_not_block */
/*
@@ -106,19 +106,27 @@ lp_resource_copy(struct pipe_context *pipe,
unsigned x, y;
enum lp_texture_usage usage;
- /* XXX for the tiles which are completely contained by the
- * dest rectangle, we could set the usage mode to WRITE_ALL.
- * Just test for the case of replacing the whole dest region for now.
- */
- if (width == dst_tex->base.width0 && height == dst_tex->base.height0)
- usage = LP_TEX_USAGE_WRITE_ALL;
- else
- usage = LP_TEX_USAGE_READ_WRITE;
-
adjust_to_tile_bounds(dstx, dsty, width, height, &tx, &ty, &tw, &th);
for (y = 0; y < th; y += TILE_SIZE) {
+ boolean contained_y = ty + y >= dsty &&
+ ty + y + TILE_SIZE <= dsty + height ?
+ TRUE : FALSE;
+
for (x = 0; x < tw; x += TILE_SIZE) {
+ boolean contained_x = tx + x >= dstx &&
+ tx + x + TILE_SIZE <= dstx + width ?
+ TRUE : FALSE;
+
+ /*
+ * Set the usage mode to WRITE_ALL for the tiles which are
+ * completely contained by the dest rectangle.
+ */
+ if (contained_y && contained_x)
+ usage = LP_TEX_USAGE_WRITE_ALL;
+ else
+ usage = LP_TEX_USAGE_READ_WRITE;
+
(void) llvmpipe_get_texture_tile_linear(dst_tex,
subdst.face, subdst.level,
usage,
@@ -138,13 +146,15 @@ lp_resource_copy(struct pipe_context *pipe,
subdst.level,
LP_TEX_LAYOUT_LINEAR);
- util_copy_rect(dst_linear_ptr, format,
- llvmpipe_resource_stride(&dst_tex->base, subdst.level),
- dstx, dsty,
- width, height,
- src_linear_ptr,
- llvmpipe_resource_stride(&src_tex->base, subsrc.level),
- srcx, srcy);
+ if (dst_linear_ptr && src_linear_ptr) {
+ util_copy_rect(dst_linear_ptr, format,
+ llvmpipe_resource_stride(&dst_tex->base, subdst.level),
+ dstx, dsty,
+ width, height,
+ src_linear_ptr,
+ llvmpipe_resource_stride(&src_tex->base, subsrc.level),
+ srcx, srcy);
+ }
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c
index 9b02f436c5..cf41b40581 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -167,19 +167,26 @@ test_one(unsigned verbose,
unsigned i, j;
void *code;
+ if (src_type.width * src_type.length != dst_type.width * dst_type.length &&
+ src_type.length != dst_type.length) {
+ return TRUE;
+ }
+
if(verbose >= 1)
dump_conv_types(stdout, src_type, dst_type);
- if(src_type.length > dst_type.length) {
+ if (src_type.length > dst_type.length) {
num_srcs = 1;
num_dsts = src_type.length/dst_type.length;
}
- else {
+ else if (src_type.length < dst_type.length) {
num_dsts = 1;
num_srcs = dst_type.length/src_type.length;
}
-
- assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
+ else {
+ num_dsts = 1;
+ num_srcs = 1;
+ }
/* We must not loose or gain channels. Only precision */
assert(src_type.length * num_srcs == dst_type.length * num_dsts);
@@ -381,6 +388,11 @@ const struct lp_type conv_types[] = {
{ FALSE, FALSE, TRUE, FALSE, 8, 16 },
{ FALSE, FALSE, FALSE, TRUE, 8, 16 },
{ FALSE, FALSE, FALSE, FALSE, 8, 16 },
+
+ { FALSE, FALSE, TRUE, TRUE, 8, 4 },
+ { FALSE, FALSE, TRUE, FALSE, 8, 4 },
+ { FALSE, FALSE, FALSE, TRUE, 8, 4 },
+ { FALSE, FALSE, FALSE, FALSE, 8, 4 },
};
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
index 8b6dc1c7f5..2855d7cea4 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -31,6 +31,7 @@
#include <float.h>
#include "gallivm/lp_bld.h"
+#include "gallivm/lp_bld_debug.h"
#include "gallivm/lp_bld_init.h"
#include <llvm-c/Analysis.h>
#include <llvm-c/Target.h>
@@ -38,6 +39,7 @@
#include "util/u_memory.h"
#include "util/u_pointer.h"
+#include "util/u_string.h"
#include "util/u_format.h"
#include "util/u_format_tests.h"
#include "util/u_format_s3tc.h"
@@ -71,17 +73,20 @@ write_tsv_row(FILE *fp,
typedef void
-(*fetch_ptr_t)(float *, const void *packed,
+(*fetch_ptr_t)(void *unpacked, const void *packed,
unsigned i, unsigned j);
static LLVMValueRef
-add_fetch_rgba_test(LLVMModuleRef lp_build_module,
- const struct util_format_description *desc)
+add_fetch_rgba_test(unsigned verbose,
+ const struct util_format_description *desc,
+ struct lp_type type)
{
+ char name[256];
LLVMTypeRef args[4];
LLVMValueRef func;
LLVMValueRef packed_ptr;
+ LLVMValueRef offset = LLVMConstNull(LLVMInt32Type());
LLVMValueRef rgba_ptr;
LLVMValueRef i;
LLVMValueRef j;
@@ -89,11 +94,15 @@ add_fetch_rgba_test(LLVMModuleRef lp_build_module,
LLVMBuilderRef builder;
LLVMValueRef rgba;
- args[0] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
+ util_snprintf(name, sizeof name, "fetch_%s_%s", desc->short_name,
+ type.floating ? "float" : "unorm8");
+
+ args[0] = LLVMPointerType(lp_build_vec_type(type), 0);
args[1] = LLVMPointerType(LLVMInt8Type(), 0);
args[3] = args[2] = LLVMInt32Type();
- func = LLVMAddFunction(lp_build_module, "fetch", LLVMFunctionType(LLVMVoidType(), args, Elements(args), 0));
+ func = LLVMAddFunction(lp_build_module, name,
+ LLVMFunctionType(LLVMVoidType(), args, Elements(args), 0));
LLVMSetFunctionCallConv(func, LLVMCCallConv);
rgba_ptr = LLVMGetParam(func, 0);
packed_ptr = LLVMGetParam(func, 1);
@@ -104,91 +113,104 @@ add_fetch_rgba_test(LLVMModuleRef lp_build_module,
builder = LLVMCreateBuilder();
LLVMPositionBuilderAtEnd(builder, block);
- rgba = lp_build_fetch_rgba_aos(builder, desc, packed_ptr, i, j);
+ rgba = lp_build_fetch_rgba_aos(builder, desc, type,
+ packed_ptr, offset, i, j);
LLVMBuildStore(builder, rgba, rgba_ptr);
LLVMBuildRetVoid(builder);
LLVMDisposeBuilder(builder);
+
+ if (LLVMVerifyFunction(func, LLVMPrintMessageAction)) {
+ LLVMDumpValue(func);
+ abort();
+ }
+
+ LLVMRunFunctionPassManager(lp_build_pass, func);
+
+ if (verbose >= 1) {
+ LLVMDumpValue(func);
+ }
+
return func;
}
PIPE_ALIGN_STACK
static boolean
-test_format(unsigned verbose, FILE *fp,
- const struct util_format_description *desc,
- const struct util_format_test_case *test)
+test_format_float(unsigned verbose, FILE *fp,
+ const struct util_format_description *desc)
{
LLVMValueRef fetch = NULL;
- LLVMPassManagerRef pass = NULL;
fetch_ptr_t fetch_ptr;
PIPE_ALIGN_VAR(16) float unpacked[4];
- boolean success;
- unsigned i, j, k;
+ boolean first = TRUE;
+ boolean success = TRUE;
+ unsigned i, j, k, l;
+ void *f;
- fetch = add_fetch_rgba_test(lp_build_module, desc);
+ fetch = add_fetch_rgba_test(verbose, desc, lp_float32_vec4_type());
- if (LLVMVerifyFunction(fetch, LLVMPrintMessageAction)) {
- LLVMDumpValue(fetch);
- abort();
+ f = LLVMGetPointerToGlobal(lp_build_engine, fetch);
+ fetch_ptr = (fetch_ptr_t) pointer_to_func(f);
+
+ if (verbose >= 2) {
+ lp_disassemble(f);
}
-#if 0
- pass = LLVMCreatePassManager();
- LLVMAddTargetData(LLVMGetExecutionEngineTargetData(lp_build_engine), pass);
- /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
- * but there are more on SVN. */
- LLVMAddConstantPropagationPass(pass);
- LLVMAddInstructionCombiningPass(pass);
- LLVMAddPromoteMemoryToRegisterPass(pass);
- LLVMAddGVNPass(pass);
- LLVMAddCFGSimplificationPass(pass);
- LLVMRunPassManager(pass, lp_build_module);
-#else
- (void)pass;
-#endif
-
- fetch_ptr = (fetch_ptr_t)pointer_to_func(LLVMGetPointerToGlobal(lp_build_engine, fetch));
-
- for (i = 0; i < desc->block.height; ++i) {
- for (j = 0; j < desc->block.width; ++j) {
-
- memset(unpacked, 0, sizeof unpacked);
-
- fetch_ptr(unpacked, test->packed, j, i);
-
- success = TRUE;
- for(k = 0; k < 4; ++k)
- if (fabs((float)test->unpacked[i][j][k] - unpacked[k]) > FLT_EPSILON)
- success = FALSE;
-
- if (!success) {
- printf("FAILED\n");
- printf(" Packed: %02x %02x %02x %02x\n",
- test->packed[0], test->packed[1], test->packed[2], test->packed[3]);
- printf(" Unpacked (%u,%u): %f %f %f %f obtained\n",
- j, i,
- unpacked[0], unpacked[1], unpacked[2], unpacked[3]);
- printf(" %f %f %f %f expected\n",
- test->unpacked[i][j][0],
- test->unpacked[i][j][1],
- test->unpacked[i][j][2],
- test->unpacked[i][j][3]);
+ for (l = 0; l < util_format_nr_test_cases; ++l) {
+ const struct util_format_test_case *test = &util_format_test_cases[l];
+
+ if (test->format == desc->format) {
+
+ if (first) {
+ printf("Testing %s (float) ...\n",
+ desc->name);
+ first = FALSE;
+ }
+
+ for (i = 0; i < desc->block.height; ++i) {
+ for (j = 0; j < desc->block.width; ++j) {
+ boolean match;
+
+ memset(unpacked, 0, sizeof unpacked);
+
+ fetch_ptr(unpacked, test->packed, j, i);
+
+ match = TRUE;
+ for(k = 0; k < 4; ++k)
+ if (fabs((float)test->unpacked[i][j][k] - unpacked[k]) > FLT_EPSILON)
+ match = FALSE;
+
+ if (!match) {
+ printf("FAILED\n");
+ printf(" Packed: %02x %02x %02x %02x\n",
+ test->packed[0], test->packed[1], test->packed[2], test->packed[3]);
+ printf(" Unpacked (%u,%u): %f %f %f %f obtained\n",
+ j, i,
+ unpacked[0], unpacked[1], unpacked[2], unpacked[3]);
+ printf(" %f %f %f %f expected\n",
+ test->unpacked[i][j][0],
+ test->unpacked[i][j][1],
+ test->unpacked[i][j][2],
+ test->unpacked[i][j][3]);
+ success = FALSE;
+ }
+ }
}
}
}
- if (!success)
- LLVMDumpValue(fetch);
+ if (!success) {
+ if (verbose < 1) {
+ LLVMDumpValue(fetch);
+ }
+ }
LLVMFreeMachineCodeForFunction(lp_build_engine, fetch);
LLVMDeleteFunction(fetch);
- if(pass)
- LLVMDisposePassManager(pass);
-
if(fp)
write_tsv_row(fp, desc, success);
@@ -196,32 +218,104 @@ test_format(unsigned verbose, FILE *fp,
}
-
+PIPE_ALIGN_STACK
static boolean
-test_one(unsigned verbose, FILE *fp,
- const struct util_format_description *format_desc)
+test_format_unorm8(unsigned verbose, FILE *fp,
+ const struct util_format_description *desc)
{
- unsigned i;
+ LLVMValueRef fetch = NULL;
+ fetch_ptr_t fetch_ptr;
+ uint8_t unpacked[4];
boolean first = TRUE;
boolean success = TRUE;
+ unsigned i, j, k, l;
+ void *f;
- for (i = 0; i < util_format_nr_test_cases; ++i) {
- const struct util_format_test_case *test = &util_format_test_cases[i];
+ fetch = add_fetch_rgba_test(verbose, desc, lp_unorm8_vec4_type());
- if (test->format == format_desc->format) {
+ f = LLVMGetPointerToGlobal(lp_build_engine, fetch);
+ fetch_ptr = (fetch_ptr_t) pointer_to_func(f);
+
+ if (verbose >= 2) {
+ lp_disassemble(f);
+ }
+
+ for (l = 0; l < util_format_nr_test_cases; ++l) {
+ const struct util_format_test_case *test = &util_format_test_cases[l];
+
+ if (test->format == desc->format) {
if (first) {
- printf("Testing %s ...\n",
- format_desc->name);
+ printf("Testing %s (unorm8) ...\n",
+ desc->name);
first = FALSE;
}
- if (!test_format(verbose, fp, format_desc, test)) {
- success = FALSE;
+ for (i = 0; i < desc->block.height; ++i) {
+ for (j = 0; j < desc->block.width; ++j) {
+ boolean match;
+
+ memset(unpacked, 0, sizeof unpacked);
+
+ fetch_ptr(unpacked, test->packed, j, i);
+
+ match = TRUE;
+ for(k = 0; k < 4; ++k) {
+ int error = float_to_ubyte(test->unpacked[i][j][k]) - unpacked[k];
+ if (error < 0)
+ error = -error;
+ if (error > 1)
+ match = FALSE;
+ }
+
+ if (!match) {
+ printf("FAILED\n");
+ printf(" Packed: %02x %02x %02x %02x\n",
+ test->packed[0], test->packed[1], test->packed[2], test->packed[3]);
+ printf(" Unpacked (%u,%u): %02x %02x %02x %02x obtained\n",
+ j, i,
+ unpacked[0], unpacked[1], unpacked[2], unpacked[3]);
+ printf(" %02x %02x %02x %02x expected\n",
+ float_to_ubyte(test->unpacked[i][j][0]),
+ float_to_ubyte(test->unpacked[i][j][1]),
+ float_to_ubyte(test->unpacked[i][j][2]),
+ float_to_ubyte(test->unpacked[i][j][3]));
+ success = FALSE;
+ }
+ }
}
}
}
+ if (!success)
+ LLVMDumpValue(fetch);
+
+ LLVMFreeMachineCodeForFunction(lp_build_engine, fetch);
+ LLVMDeleteFunction(fetch);
+
+ if(fp)
+ write_tsv_row(fp, desc, success);
+
+ return success;
+}
+
+
+
+
+static boolean
+test_one(unsigned verbose, FILE *fp,
+ const struct util_format_description *format_desc)
+{
+ boolean success = TRUE;
+
+ if (!test_format_float(verbose, fp, format_desc)) {
+ success = FALSE;
+ }
+
+ if (!test_format_unorm8(verbose, fp, format_desc)) {
+ success = FALSE;
+ }
+
return success;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_round.c b/src/gallium/drivers/llvmpipe/lp_test_round.c
new file mode 100644
index 0000000000..f571a81a4a
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_test_round.c
@@ -0,0 +1,277 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "util/u_pointer.h"
+#include "gallivm/lp_bld.h"
+#include "gallivm/lp_bld_printf.h"
+#include "gallivm/lp_bld_arit.h"
+
+#include <llvm-c/Analysis.h>
+#include <llvm-c/ExecutionEngine.h>
+#include <llvm-c/Target.h>
+#include <llvm-c/Transforms/Scalar.h>
+
+#include "lp_test.h"
+
+
+void
+write_tsv_header(FILE *fp)
+{
+ fprintf(fp,
+ "result\t"
+ "format\n");
+
+ fflush(fp);
+}
+
+
+#ifdef PIPE_ARCH_SSE
+
+#define USE_SSE2
+#include "sse_mathfun.h"
+
+typedef __m128 (*test_round_t)(__m128);
+
+typedef LLVMValueRef (*lp_func_t)(struct lp_build_context *, LLVMValueRef);
+
+
+static LLVMValueRef
+add_test(LLVMModuleRef module, const char *name, lp_func_t lp_func)
+{
+ LLVMTypeRef v4sf = LLVMVectorType(LLVMFloatType(), 4);
+ LLVMTypeRef args[1] = { v4sf };
+ LLVMValueRef func = LLVMAddFunction(module, name, LLVMFunctionType(v4sf, args, 1, 0));
+ LLVMValueRef arg1 = LLVMGetParam(func, 0);
+ LLVMBuilderRef builder = LLVMCreateBuilder();
+ LLVMBasicBlockRef block = LLVMAppendBasicBlock(func, "entry");
+ LLVMValueRef ret;
+ struct lp_build_context bld;
+
+ bld.builder = builder;
+ bld.type.floating = 1;
+ bld.type.width = 32;
+ bld.type.length = 4;
+
+ LLVMSetFunctionCallConv(func, LLVMCCallConv);
+
+ LLVMPositionBuilderAtEnd(builder, block);
+
+ ret = lp_func(&bld, arg1);
+
+ LLVMBuildRet(builder, ret);
+ LLVMDisposeBuilder(builder);
+ return func;
+}
+
+static void
+printv(char* string, v4sf value)
+{
+ v4sf v = value;
+ float *f = (float *)&v;
+ printf("%s: %10f %10f %10f %10f\n", string,
+ f[0], f[1], f[2], f[3]);
+}
+
+static void
+compare(v4sf x, v4sf y)
+{
+ float *xp = (float *) &x;
+ float *yp = (float *) &y;
+ if (xp[0] != yp[0] ||
+ xp[1] != yp[1] ||
+ xp[2] != yp[2] ||
+ xp[3] != yp[3]) {
+ printf(" Incorrect result! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ \n");
+ }
+}
+
+
+
+PIPE_ALIGN_STACK
+static boolean
+test_round(unsigned verbose, FILE *fp)
+{
+ LLVMModuleRef module = NULL;
+ LLVMValueRef test_round = NULL, test_trunc, test_floor, test_ceil;
+ LLVMExecutionEngineRef engine = NULL;
+ LLVMModuleProviderRef provider = NULL;
+ LLVMPassManagerRef pass = NULL;
+ char *error = NULL;
+ test_round_t round_func, trunc_func, floor_func, ceil_func;
+ float unpacked[4];
+ unsigned packed;
+ boolean success = TRUE;
+ int i;
+
+ module = LLVMModuleCreateWithName("test");
+
+ test_round = add_test(module, "round", lp_build_round);
+ test_trunc = add_test(module, "trunc", lp_build_trunc);
+ test_floor = add_test(module, "floor", lp_build_floor);
+ test_ceil = add_test(module, "ceil", lp_build_ceil);
+
+ if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
+ printf("LLVMVerifyModule: %s\n", error);
+ LLVMDumpModule(module);
+ abort();
+ }
+ LLVMDisposeMessage(error);
+
+ provider = LLVMCreateModuleProviderForExistingModule(module);
+ if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
+ fprintf(stderr, "%s\n", error);
+ LLVMDisposeMessage(error);
+ abort();
+ }
+
+#if 0
+ pass = LLVMCreatePassManager();
+ LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
+ /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
+ * but there are more on SVN. */
+ LLVMAddConstantPropagationPass(pass);
+ LLVMAddInstructionCombiningPass(pass);
+ LLVMAddPromoteMemoryToRegisterPass(pass);
+ LLVMAddGVNPass(pass);
+ LLVMAddCFGSimplificationPass(pass);
+ LLVMRunPassManager(pass, module);
+#else
+ (void)pass;
+#endif
+
+ round_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_round));
+ trunc_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_trunc));
+ floor_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_floor));
+ ceil_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_ceil));
+
+ memset(unpacked, 0, sizeof unpacked);
+ packed = 0;
+
+ if (0)
+ LLVMDumpModule(module);
+
+ for (i = 0; i < 3; i++) {
+ v4sf xvals[3] = {
+ {-10.0, -1, 0, 12.0},
+ {-1.5, -0.25, 1.25, 2.5},
+ {-0.99, -0.01, 0.01, 0.99}
+ };
+ v4sf x = xvals[i];
+ v4sf y, ref;
+ float *xp = (float *) &x;
+ float *refp = (float *) &ref;
+
+ printf("\n");
+ printv("x ", x);
+
+ refp[0] = round(xp[0]);
+ refp[1] = round(xp[1]);
+ refp[2] = round(xp[2]);
+ refp[3] = round(xp[3]);
+ y = round_func(x);
+ printv("C round(x) ", ref);
+ printv("LLVM round(x)", y);
+ compare(ref, y);
+
+ refp[0] = trunc(xp[0]);
+ refp[1] = trunc(xp[1]);
+ refp[2] = trunc(xp[2]);
+ refp[3] = trunc(xp[3]);
+ y = trunc_func(x);
+ printv("C trunc(x) ", ref);
+ printv("LLVM trunc(x)", y);
+ compare(ref, y);
+
+ refp[0] = floor(xp[0]);
+ refp[1] = floor(xp[1]);
+ refp[2] = floor(xp[2]);
+ refp[3] = floor(xp[3]);
+ y = floor_func(x);
+ printv("C floor(x) ", ref);
+ printv("LLVM floor(x)", y);
+ compare(ref, y);
+
+ refp[0] = ceil(xp[0]);
+ refp[1] = ceil(xp[1]);
+ refp[2] = ceil(xp[2]);
+ refp[3] = ceil(xp[3]);
+ y = ceil_func(x);
+ printv("C ceil(x) ", ref);
+ printv("LLVM ceil(x) ", y);
+ compare(ref, y);
+ }
+
+ LLVMFreeMachineCodeForFunction(engine, test_round);
+ LLVMFreeMachineCodeForFunction(engine, test_trunc);
+ LLVMFreeMachineCodeForFunction(engine, test_floor);
+ LLVMFreeMachineCodeForFunction(engine, test_ceil);
+
+ LLVMDisposeExecutionEngine(engine);
+ if(pass)
+ LLVMDisposePassManager(pass);
+
+ return success;
+}
+
+#else /* !PIPE_ARCH_SSE */
+
+static boolean
+test_round(unsigned verbose, FILE *fp)
+{
+ return TRUE;
+}
+
+#endif /* !PIPE_ARCH_SSE */
+
+
+boolean
+test_all(unsigned verbose, FILE *fp)
+{
+ boolean success = TRUE;
+
+ test_round(verbose, fp);
+
+ return success;
+}
+
+
+boolean
+test_some(unsigned verbose, FILE *fp, unsigned long n)
+{
+ return test_all(verbose, fp);
+}
+
+boolean
+test_single(unsigned verbose, FILE *fp)
+{
+ printf("no test_single()");
+ return TRUE;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_sincos.c b/src/gallium/drivers/llvmpipe/lp_test_sincos.c
index c7a903a025..1366ecddcb 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_sincos.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_sincos.c
@@ -108,7 +108,6 @@ test_sincos(unsigned verbose, FILE *fp)
test_sincos_t sin_func;
test_sincos_t cos_func;
float unpacked[4];
- unsigned packed;
boolean success = TRUE;
module = LLVMModuleCreateWithName("test");
@@ -149,7 +148,6 @@ test_sincos(unsigned verbose, FILE *fp)
cos_func = (test_sincos_t)LLVMGetPointerToGlobal(engine, test_cos);
memset(unpacked, 0, sizeof unpacked);
- packed = 0;
// LLVMDumpModule(module);
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 0d526ead89..25112c10a6 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -36,6 +36,7 @@
#include "pipe/p_defines.h"
#include "util/u_inlines.h"
+#include "util/u_cpu_detect.h"
#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -55,6 +56,7 @@
#ifdef DEBUG
static struct llvmpipe_resource resource_list;
#endif
+static unsigned id_counter = 0;
static INLINE boolean
@@ -183,8 +185,8 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen,
*/
const unsigned width = align(lpr->base.width0, TILE_SIZE);
const unsigned height = align(lpr->base.height0, TILE_SIZE);
- const unsigned width_t = align(width, TILE_SIZE) / TILE_SIZE;
- const unsigned height_t = align(height, TILE_SIZE) / TILE_SIZE;
+ const unsigned width_t = width / TILE_SIZE;
+ const unsigned height_t = height / TILE_SIZE;
lpr->tiles_per_row[0] = width_t;
lpr->tiles_per_image[0] = width_t * height_t;
@@ -209,7 +211,6 @@ static struct pipe_resource *
llvmpipe_resource_create(struct pipe_screen *_screen,
const struct pipe_resource *templat)
{
- static unsigned id_counter = 0;
struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
struct llvmpipe_resource *lpr = CALLOC_STRUCT(llvmpipe_resource);
if (!lpr)
@@ -389,7 +390,6 @@ llvmpipe_resource_map(struct pipe_resource *resource,
map = llvmpipe_get_texture_image(lpr, face + zslice, level,
tex_usage, layout);
- assert(map);
return map;
}
else {
@@ -446,6 +446,10 @@ llvmpipe_resource_from_handle(struct pipe_screen *screen,
{
struct sw_winsys *winsys = llvmpipe_screen(screen)->winsys;
struct llvmpipe_resource *lpr = CALLOC_STRUCT(llvmpipe_resource);
+ unsigned width, height, width_t, height_t;
+
+ /* XXX Seems like from_handled depth textures doesn't work that well */
+
if (!lpr)
return NULL;
@@ -453,6 +457,25 @@ llvmpipe_resource_from_handle(struct pipe_screen *screen,
pipe_reference_init(&lpr->base.reference, 1);
lpr->base.screen = screen;
+ width = align(lpr->base.width0, TILE_SIZE);
+ height = align(lpr->base.height0, TILE_SIZE);
+ width_t = width / TILE_SIZE;
+ height_t = height / TILE_SIZE;
+
+ /*
+ * Looks like unaligned displaytargets work just fine,
+ * at least sampler/render ones.
+ */
+#if 0
+ assert(lpr->base.width0 == width);
+ assert(lpr->base.height0 == height);
+#endif
+
+ lpr->tiles_per_row[0] = width_t;
+ lpr->tiles_per_image[0] = width_t * height_t;
+ lpr->num_slices_faces[0] = 1;
+ lpr->img_stride[0] = 0;
+
lpr->dt = winsys->displaytarget_from_handle(winsys,
template,
whandle,
@@ -460,6 +483,17 @@ llvmpipe_resource_from_handle(struct pipe_screen *screen,
if (!lpr->dt)
goto fail;
+ lpr->layout[0] = alloc_layout_array(1, lpr->base.width0, lpr->base.height0);
+
+ assert(lpr->layout[0]);
+ assert(lpr->layout[0][0] == LP_TEX_LAYOUT_NONE);
+
+ lpr->id = id_counter++;
+
+#ifdef DEBUG
+ insert_at_tail(&resource_list, lpr);
+#endif
+
return &lpr->base;
fail:
@@ -899,13 +933,15 @@ static void
alloc_image_data(struct llvmpipe_resource *lpr, unsigned level,
enum lp_texture_layout layout)
{
+ uint alignment = MAX2(16, util_cpu_caps.cacheline);
+
if (lpr->dt)
assert(level == 0);
if (layout == LP_TEX_LAYOUT_TILED) {
/* tiled data is stored in regular memory */
uint buffer_size = tex_image_size(lpr, level, layout);
- lpr->tiled[level].data = align_malloc(buffer_size, 16);
+ lpr->tiled[level].data = align_malloc(buffer_size, alignment);
}
else {
assert(layout == LP_TEX_LAYOUT_LINEAR);
@@ -921,7 +957,7 @@ alloc_image_data(struct llvmpipe_resource *lpr, unsigned level,
else {
/* not a display target - allocate regular memory */
uint buffer_size = tex_image_size(lpr, level, LP_TEX_LAYOUT_LINEAR);
- lpr->linear[level].data = align_malloc(buffer_size, 16);
+ lpr->linear[level].data = align_malloc(buffer_size, alignment);
}
}
}
@@ -1035,7 +1071,7 @@ llvmpipe_get_texture_image(struct llvmpipe_resource *lpr,
layout_logic(cur_layout, layout, usage, &new_layout, &convert);
- if (convert) {
+ if (convert && other_data && target_data) {
if (layout == LP_TEX_LAYOUT_TILED) {
lp_linear_to_tiled(other_data, target_data,
x * TILE_SIZE, y * TILE_SIZE,
@@ -1067,8 +1103,6 @@ llvmpipe_get_texture_image(struct llvmpipe_resource *lpr,
width_t, height_t, layout);
}
- assert(target_data);
-
return target_data;
}
@@ -1138,7 +1172,7 @@ llvmpipe_get_texture_tile_linear(struct llvmpipe_resource *lpr,
layout_logic(cur_layout, LP_TEX_LAYOUT_LINEAR, usage,
&new_layout, &convert);
- if (convert) {
+ if (convert && tiled_image && linear_image) {
lp_tiled_to_linear(tiled_image, linear_image,
x, y, TILE_SIZE, TILE_SIZE, lpr->base.format,
lpr->row_stride[level],
@@ -1187,13 +1221,16 @@ llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr,
cur_layout = llvmpipe_get_texture_tile_layout(lpr, face_slice, level, tx, ty);
layout_logic(cur_layout, LP_TEX_LAYOUT_TILED, usage, &new_layout, &convert);
- if (convert) {
+ if (convert && linear_image && tiled_image) {
lp_linear_to_tiled(linear_image, tiled_image,
x, y, TILE_SIZE, TILE_SIZE, lpr->base.format,
lpr->row_stride[level],
lpr->tiles_per_row[level]);
}
+ if (!tiled_image)
+ return NULL;
+
if (new_layout != cur_layout)
llvmpipe_set_texture_tile_layout(lpr, face_slice, level, tx, ty, new_layout);
@@ -1206,6 +1243,94 @@ llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr,
/**
+ * Get pointer to tiled data for rendering.
+ * \return pointer to the tiled data at the given tile position
+ */
+void
+llvmpipe_unswizzle_cbuf_tile(struct llvmpipe_resource *lpr,
+ unsigned face_slice, unsigned level,
+ unsigned x, unsigned y,
+ uint8_t *tile)
+{
+ struct llvmpipe_texture_image *linear_img = &lpr->linear[level];
+ const unsigned tx = x / TILE_SIZE, ty = y / TILE_SIZE;
+ uint8_t *linear_image;
+
+ assert(x % TILE_SIZE == 0);
+ assert(y % TILE_SIZE == 0);
+
+ if (!linear_img->data) {
+ /* allocate memory for the linear image now */
+ alloc_image_data(lpr, level, LP_TEX_LAYOUT_LINEAR);
+ }
+
+ /* compute address of the slice/face of the image that contains the tile */
+ linear_image = llvmpipe_get_texture_image_address(lpr, face_slice, level,
+ LP_TEX_LAYOUT_LINEAR);
+
+ {
+ uint ii = x, jj = y;
+ uint tile_offset = jj / TILE_SIZE + ii / TILE_SIZE;
+ uint byte_offset = tile_offset * TILE_SIZE * TILE_SIZE * 4;
+
+ /* Note that lp_tiled_to_linear expects the tile parameter to
+ * point at the first tile in a whole-image sized array. In
+ * this code, we have only a single tile and have to do some
+ * pointer arithmetic to figure out where the "image" would have
+ * started.
+ */
+ lp_tiled_to_linear(tile - byte_offset, linear_image,
+ x, y, TILE_SIZE, TILE_SIZE,
+ lpr->base.format,
+ lpr->row_stride[level],
+ 1); /* tiles per row */
+ }
+
+ llvmpipe_set_texture_tile_layout(lpr, face_slice, level, tx, ty,
+ LP_TEX_LAYOUT_LINEAR);
+}
+
+
+/**
+ * Get pointer to tiled data for rendering.
+ * \return pointer to the tiled data at the given tile position
+ */
+void
+llvmpipe_swizzle_cbuf_tile(struct llvmpipe_resource *lpr,
+ unsigned face_slice, unsigned level,
+ unsigned x, unsigned y,
+ uint8_t *tile)
+{
+ uint8_t *linear_image;
+
+ assert(x % TILE_SIZE == 0);
+ assert(y % TILE_SIZE == 0);
+
+ /* compute address of the slice/face of the image that contains the tile */
+ linear_image = llvmpipe_get_texture_image_address(lpr, face_slice, level,
+ LP_TEX_LAYOUT_LINEAR);
+
+ if (linear_image) {
+ uint ii = x, jj = y;
+ uint tile_offset = jj / TILE_SIZE + ii / TILE_SIZE;
+ uint byte_offset = tile_offset * TILE_SIZE * TILE_SIZE * 4;
+
+ /* Note that lp_linear_to_tiled expects the tile parameter to
+ * point at the first tile in a whole-image sized array. In
+ * this code, we have only a single tile and have to do some
+ * pointer arithmetic to figure out where the "image" would have
+ * started.
+ */
+ lp_linear_to_tiled(linear_image, tile - byte_offset,
+ x, y, TILE_SIZE, TILE_SIZE,
+ lpr->base.format,
+ lpr->row_stride[level],
+ 1); /* tiles per row */
+ }
+}
+
+
+/**
* Return size of resource in bytes
*/
unsigned
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h
index 503b6a19a8..4e4a65dcb4 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.h
+++ b/src/gallium/drivers/llvmpipe/lp_texture.h
@@ -223,6 +223,17 @@ llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr,
unsigned x, unsigned y);
+void
+llvmpipe_unswizzle_cbuf_tile(struct llvmpipe_resource *lpr,
+ unsigned face_slice, unsigned level,
+ unsigned x, unsigned y,
+ uint8_t *tile);
+
+void
+llvmpipe_swizzle_cbuf_tile(struct llvmpipe_resource *lpr,
+ unsigned face_slice, unsigned level,
+ unsigned x, unsigned y,
+ uint8_t *tile);
extern void
llvmpipe_print_resources(void);
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.c b/src/gallium/drivers/llvmpipe/lp_tile_image.c
index 2b63992dd7..0938f7aea7 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_image.c
+++ b/src/gallium/drivers/llvmpipe/lp_tile_image.c
@@ -204,7 +204,7 @@ lp_tiled_to_linear(const void *src, void *dst,
lp_tile_unswizzle_4ub(format,
src_tile,
dst, dst_stride,
- ii, jj, tile_w, tile_h);
+ ii, jj);
}
}
}
@@ -293,7 +293,7 @@ lp_linear_to_tiled(const void *src, void *dst,
lp_tile_swizzle_4ub(format,
dst_tile,
src, src_stride,
- ii, jj, tile_w, tile_h);
+ ii, jj);
}
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_shuffle_mask.py b/src/gallium/drivers/llvmpipe/lp_tile_shuffle_mask.py
new file mode 100644
index 0000000000..ea2fc0f375
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_tile_shuffle_mask.py
@@ -0,0 +1,32 @@
+
+tile = [[0,1,4,5],
+ [2,3,6,7],
+ [8,9,12,13],
+ [10,11,14,15]]
+shift = 0
+align = 1
+value = 0L
+holder = []
+
+import sys
+
+basemask = [0x
+fd = sys.stdout
+indent = " "*9
+for c in range(4):
+ fd.write(indent + "*pdst++ = \n");
+ for l,line in enumerate(tile):
+ fd.write(indent + " %s_mm_shuffle_epi8(line%d, (__m128i){"%(l and '+' or ' ',l))
+ for i,pos in enumerate(line):
+ mask = 0x00ffffffff & (~(0xffL << shift))
+ value = mask | ((pos) << shift)
+ holder.append(value)
+ if holder and (i + 1) %2 == 0:
+ fd.write("0x%8.0x"%(holder[0] + (holder[1] << 32)))
+ holder = []
+ if (i) %4 == 1:
+ fd.write( ',')
+
+ fd.write("})%s\n"%((l == 3) and ';' or ''))
+ print
+ shift += 8
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h
index 07f71b8411..12dac1da6c 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h
@@ -79,14 +79,14 @@ void
lp_tile_swizzle_4ub(enum pipe_format format,
uint8_t *dst,
const void *src, unsigned src_stride,
- unsigned x, unsigned y, unsigned w, unsigned h);
+ unsigned x, unsigned y);
void
lp_tile_unswizzle_4ub(enum pipe_format format,
const uint8_t *src,
void *dst, unsigned dst_stride,
- unsigned x, unsigned y, unsigned w, unsigned h);
+ unsigned x, unsigned y);
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
index 5ab63cbac6..c71ec8066c 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
@@ -75,13 +75,13 @@ def generate_format_read(format, dst_channel, dst_native_type, dst_suffix):
src_native_type = native_type(format)
print 'static void'
- print 'lp_tile_%s_swizzle_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, dst_suffix, dst_native_type)
+ print 'lp_tile_%s_swizzle_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)' % (name, dst_suffix, dst_native_type)
print '{'
print ' unsigned x, y;'
print ' const uint8_t *src_row = src + y0*src_stride;'
- print ' for (y = 0; y < h; ++y) {'
+ print ' for (y = 0; y < TILE_SIZE; ++y) {'
print ' const %s *src_pixel = (const %s *)(src_row + x0*%u);' % (src_native_type, src_native_type, format.stride())
- print ' for (x = 0; x < w; ++x) {'
+ print ' for (x = 0; x < TILE_SIZE; ++x) {'
names = ['']*4
if format.colorspace in ('rgb', 'srgb'):
@@ -202,9 +202,9 @@ def emit_unrolled_unswizzle_code(format, src_channel):
print ' %s *dstpix = (%s *) dst;' % (dst_native_type, dst_native_type)
print ' unsigned int qx, qy, i;'
print
- print ' for (qy = 0; qy < h; qy += TILE_VECTOR_HEIGHT) {'
+ print ' for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {'
print ' const unsigned py = y0 + qy;'
- print ' for (qx = 0; qx < w; qx += TILE_VECTOR_WIDTH) {'
+ print ' for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {'
print ' const unsigned px = x0 + qx;'
print ' const uint8_t *r = src + 0 * TILE_C_STRIDE;'
print ' const uint8_t *g = src + 1 * TILE_C_STRIDE;'
@@ -231,9 +231,9 @@ def emit_tile_pixel_unswizzle_code(format, src_channel):
print ' unsigned x, y;'
print ' uint8_t *dst_row = dst + y0*dst_stride;'
- print ' for (y = 0; y < h; ++y) {'
+ print ' for (y = 0; y < TILE_SIZE; ++y) {'
print ' %s *dst_pixel = (%s *)(dst_row + x0*%u);' % (dst_native_type, dst_native_type, format.stride())
- print ' for (x = 0; x < w; ++x) {'
+ print ' for (x = 0; x < TILE_SIZE; ++x) {'
if format.layout == PLAIN:
if not format.is_array():
@@ -273,7 +273,7 @@ def generate_format_write(format, src_channel, src_native_type, src_suffix):
name = format.short_name()
print 'static void'
- print 'lp_tile_%s_unswizzle_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type)
+ print 'lp_tile_%s_unswizzle_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)' % (name, src_suffix, src_native_type)
print '{'
if format.layout == PLAIN \
and format.colorspace == 'rgb' \
@@ -289,6 +289,202 @@ def generate_format_write(format, src_channel, src_native_type, src_suffix):
print
+def generate_ssse3():
+ print '''
+#if defined(PIPE_ARCH_SSE)
+
+
+#if defined(PIPE_ARCH_SSSE3)
+
+#include <tmmintrin.h>
+
+#else
+
+#include <emmintrin.h>
+
+/**
+ * Describe _mm_shuffle_epi8() with gcc extended inline assembly, for cases
+ * where -mssse3 is not supported/enabled.
+ *
+ * MSVC will never get in here as its intrinsics support do not rely on
+ * compiler command line options.
+ */
+static __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_epi8(__m128i a, __m128i mask)
+{
+ __m128i result;
+ __asm__("pshufb %1, %0"
+ : "=x" (result)
+ : "xm" (mask), "0" (a));
+ return result;
+}
+
+#endif
+
+
+static void
+lp_tile_b8g8r8a8_unorm_swizzle_4ub_ssse3(uint8_t *dst,
+ const uint8_t *src, unsigned src_stride,
+ unsigned x0, unsigned y0)
+{
+
+ unsigned x, y;
+ __m128i *pdst = (__m128i*) dst;
+ const uint8_t *ysrc0 = src + y0*src_stride + x0*sizeof(uint32_t);
+ unsigned int tile_stridex = src_stride*(TILE_VECTOR_HEIGHT - 1) - sizeof(uint32_t)*TILE_VECTOR_WIDTH;
+ unsigned int tile_stridey = src_stride*TILE_VECTOR_HEIGHT;
+
+ const __m128i shuffle00 = _mm_setr_epi8(0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
+ const __m128i shuffle01 = _mm_setr_epi8(0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
+ const __m128i shuffle02 = _mm_setr_epi8(0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
+ const __m128i shuffle03 = _mm_setr_epi8(0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
+
+ const __m128i shuffle10 = _mm_setr_epi8(0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
+ const __m128i shuffle11 = _mm_setr_epi8(0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
+ const __m128i shuffle12 = _mm_setr_epi8(0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
+ const __m128i shuffle13 = _mm_setr_epi8(0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
+
+ const __m128i shuffle20 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff);
+ const __m128i shuffle21 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff);
+ const __m128i shuffle22 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff);
+ const __m128i shuffle23 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff);
+
+ const __m128i shuffle30 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e);
+ const __m128i shuffle31 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d);
+ const __m128i shuffle32 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c);
+ const __m128i shuffle33 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f);
+
+ for (y = 0; y < TILE_SIZE; y += TILE_VECTOR_HEIGHT) {
+ __m128i line0 = *(__m128i*)ysrc0;
+ const uint8_t *ysrc = ysrc0 + src_stride;
+ ysrc0 += tile_stridey;
+
+ for (x = 0; x < TILE_SIZE; x += TILE_VECTOR_WIDTH) {
+ __m128i r, g, b, a, line1;
+ line1 = *(__m128i*)ysrc;
+ PIPE_READ_WRITE_BARRIER();
+ ysrc += src_stride;
+ r = _mm_shuffle_epi8(line0, shuffle00);
+ g = _mm_shuffle_epi8(line0, shuffle01);
+ b = _mm_shuffle_epi8(line0, shuffle02);
+ a = _mm_shuffle_epi8(line0, shuffle03);
+
+ line0 = *(__m128i*)ysrc;
+ PIPE_READ_WRITE_BARRIER();
+ ysrc += src_stride;
+ r = _mm_or_si128(r, _mm_shuffle_epi8(line1, shuffle10));
+ g = _mm_or_si128(g, _mm_shuffle_epi8(line1, shuffle11));
+ b = _mm_or_si128(b, _mm_shuffle_epi8(line1, shuffle12));
+ a = _mm_or_si128(a, _mm_shuffle_epi8(line1, shuffle13));
+
+ line1 = *(__m128i*)ysrc;
+ PIPE_READ_WRITE_BARRIER();
+ ysrc -= tile_stridex;
+ r = _mm_or_si128(r, _mm_shuffle_epi8(line0, shuffle20));
+ g = _mm_or_si128(g, _mm_shuffle_epi8(line0, shuffle21));
+ b = _mm_or_si128(b, _mm_shuffle_epi8(line0, shuffle22));
+ a = _mm_or_si128(a, _mm_shuffle_epi8(line0, shuffle23));
+
+ if (x + 1 < TILE_SIZE) {
+ line0 = *(__m128i*)ysrc;
+ ysrc += src_stride;
+ }
+
+ PIPE_READ_WRITE_BARRIER();
+ r = _mm_or_si128(r, _mm_shuffle_epi8(line1, shuffle30));
+ g = _mm_or_si128(g, _mm_shuffle_epi8(line1, shuffle31));
+ b = _mm_or_si128(b, _mm_shuffle_epi8(line1, shuffle32));
+ a = _mm_or_si128(a, _mm_shuffle_epi8(line1, shuffle33));
+
+ *pdst++ = r;
+ *pdst++ = g;
+ *pdst++ = b;
+ *pdst++ = a;
+ }
+ }
+
+}
+
+static void
+lp_tile_b8g8r8a8_unorm_unswizzle_4ub_ssse3(const uint8_t *src,
+ uint8_t *dst, unsigned dst_stride,
+ unsigned x0, unsigned y0)
+{
+ unsigned int x, y;
+ const __m128i *psrc = (__m128i*) src;
+ const __m128i *end = (__m128i*) (src + (y0 + TILE_SIZE - 1)*dst_stride + (x0 + TILE_SIZE - 1)*sizeof(uint32_t));
+ uint8_t *pdst = dst + y0 * dst_stride + x0 * sizeof(uint32_t);
+ __m128i c0 = *psrc++;
+ __m128i c1;
+
+ const __m128i shuffle00 = _mm_setr_epi8(0xff,0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff);
+ const __m128i shuffle01 = _mm_setr_epi8(0xff,0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff);
+ const __m128i shuffle02 = _mm_setr_epi8(0xff,0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff);
+ const __m128i shuffle03 = _mm_setr_epi8(0xff,0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff);
+
+ const __m128i shuffle10 = _mm_setr_epi8(0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff,0xff);
+ const __m128i shuffle11 = _mm_setr_epi8(0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff,0xff);
+ const __m128i shuffle12 = _mm_setr_epi8(0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff,0xff);
+ const __m128i shuffle13 = _mm_setr_epi8(0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff,0xff);
+
+ const __m128i shuffle20 = _mm_setr_epi8(0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff,0xff,0xff);
+ const __m128i shuffle21 = _mm_setr_epi8(0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff,0xff,0xff);
+ const __m128i shuffle22 = _mm_setr_epi8(0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff,0xff,0xff);
+ const __m128i shuffle23 = _mm_setr_epi8(0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff,0xff,0xff);
+
+ const __m128i shuffle30 = _mm_setr_epi8(0xff,0xff,0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05);
+ const __m128i shuffle31 = _mm_setr_epi8(0xff,0xff,0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07);
+ const __m128i shuffle32 = _mm_setr_epi8(0xff,0xff,0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d);
+ const __m128i shuffle33 = _mm_setr_epi8(0xff,0xff,0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f);
+
+ for (y = 0; y < TILE_SIZE; y += TILE_VECTOR_HEIGHT) {
+ __m128i *tile = (__m128i*) pdst;
+ pdst += dst_stride * TILE_VECTOR_HEIGHT;
+ for (x = 0; x < TILE_SIZE; x += TILE_VECTOR_WIDTH) {
+ uint8_t *linep = (uint8_t*) (tile++);
+ __m128i line0, line1, line2, line3;
+
+ c1 = *psrc++; /* r */
+ PIPE_READ_WRITE_BARRIER();
+ line0 = _mm_shuffle_epi8(c0, shuffle00);
+ line1 = _mm_shuffle_epi8(c0, shuffle01);
+ line2 = _mm_shuffle_epi8(c0, shuffle02);
+ line3 = _mm_shuffle_epi8(c0, shuffle03);
+
+ c0 = *psrc++; /* g */
+ PIPE_READ_WRITE_BARRIER();
+ line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c1, shuffle10));
+ line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c1, shuffle11));
+ line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c1, shuffle12));
+ line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c1, shuffle13));
+
+ c1 = *psrc++; /* b */
+ PIPE_READ_WRITE_BARRIER();
+ line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c0, shuffle20));
+ line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c0, shuffle21));
+ line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c0, shuffle22));
+ line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c0, shuffle23));
+
+ if (psrc != end)
+ c0 = *psrc++; /* a */
+ PIPE_READ_WRITE_BARRIER();
+ line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c1, shuffle30));
+ line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c1, shuffle31));
+ line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c1, shuffle32));
+ line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c1, shuffle33));
+
+ *(__m128i*) (linep) = line0;
+ *(__m128i*) (((char*)linep) + dst_stride) = line1;
+ *(__m128i*) (((char*)linep) + 2 * dst_stride) = line2;
+ *(__m128i*) (((char*)linep) + 3 * dst_stride) = line3;
+ }
+ }
+}
+
+#endif /* PIPE_ARCH_SSSE3 */
+'''
+
+
def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix):
'''Generate the dispatch function to read pixels from any format'''
@@ -297,9 +493,9 @@ def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix):
generate_format_read(format, dst_channel, dst_native_type, dst_suffix)
print 'void'
- print 'lp_tile_swizzle_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (dst_suffix, dst_native_type)
+ print 'lp_tile_swizzle_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y)' % (dst_suffix, dst_native_type)
print '{'
- print ' void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % dst_native_type
+ print ' void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0);' % dst_native_type
print '#ifdef DEBUG'
print ' lp_tile_swizzle_count += 1;'
print '#endif'
@@ -307,13 +503,21 @@ def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix):
for format in formats:
if is_format_supported(format):
print ' case %s:' % format.name
- print ' func = &lp_tile_%s_swizzle_%s;' % (format.short_name(), dst_suffix)
+ func_name = 'lp_tile_%s_swizzle_%s' % (format.short_name(), dst_suffix)
+ if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM':
+ print '#ifdef PIPE_ARCH_SSE'
+ print ' func = util_cpu_caps.has_ssse3 ? %s_ssse3 : %s;' % (func_name, func_name)
+ print '#else'
+ print ' func = %s;' % (func_name,)
+ print '#endif'
+ else:
+ print ' func = %s;' % (func_name,)
print ' break;'
print ' default:'
print ' debug_printf("%s: unsupported format %s\\n", __FUNCTION__, util_format_name(format));'
print ' return;'
print ' }'
- print ' func(dst, (const uint8_t *)src, src_stride, x, y, w, h);'
+ print ' func(dst, (const uint8_t *)src, src_stride, x, y);'
print '}'
print
@@ -326,10 +530,10 @@ def generate_unswizzle(formats, src_channel, src_native_type, src_suffix):
generate_format_write(format, src_channel, src_native_type, src_suffix)
print 'void'
- print 'lp_tile_unswizzle_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (src_suffix, src_native_type)
+ print 'lp_tile_unswizzle_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y)' % (src_suffix, src_native_type)
print '{'
- print ' void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % src_native_type
+ print ' void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0);' % src_native_type
print '#ifdef DEBUG'
print ' lp_tile_unswizzle_count += 1;'
print '#endif'
@@ -337,13 +541,21 @@ def generate_unswizzle(formats, src_channel, src_native_type, src_suffix):
for format in formats:
if is_format_supported(format):
print ' case %s:' % format.name
- print ' func = &lp_tile_%s_unswizzle_%s;' % (format.short_name(), src_suffix)
+ func_name = 'lp_tile_%s_unswizzle_%s' % (format.short_name(), src_suffix)
+ if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM':
+ print '#ifdef PIPE_ARCH_SSE'
+ print ' func = util_cpu_caps.has_ssse3 ? %s_ssse3 : %s;' % (func_name, func_name)
+ print '#else'
+ print ' func = %s;' % (func_name,)
+ print '#endif'
+ else:
+ print ' func = %s;' % (func_name,)
print ' break;'
print ' default:'
print ' debug_printf("%s: unsupported format %s\\n", __FUNCTION__, util_format_name(format));'
print ' return;'
print ' }'
- print ' func(src, (uint8_t *)dst, dst_stride, x, y, w, h);'
+ print ' func(src, (uint8_t *)dst, dst_stride, x, y);'
print '}'
print
@@ -362,6 +574,7 @@ def main():
print '#include "util/u_format.h"'
print '#include "util/u_math.h"'
print '#include "util/u_half.h"'
+ print '#include "util/u_cpu_detect.h"'
print '#include "lp_tile_soa.h"'
print
print '#ifdef DEBUG'
@@ -391,6 +604,8 @@ def main():
print '};'
print
+ generate_ssse3()
+
channel = Channel(UNSIGNED, True, 8)
native_type = 'uint8_t'
suffix = '4ub'