summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/gallium/auxiliary/os/os_thread.h87
-rw-r--r--src/gallium/auxiliary/util/u_blitter.c329
-rw-r--r--src/gallium/auxiliary/util/u_blitter.h48
-rw-r--r--src/gallium/drivers/llvmpipe/.gitignore1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_fence.c9
-rw-r--r--src/gallium/drivers/llvmpipe/lp_fence.h18
-rw-r--r--src/gallium/drivers/llvmpipe/lp_flush.c14
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.c16
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.h26
-rw-r--r--src/gallium/drivers/llvmpipe/lp_memory.c6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_memory.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_perf.c39
-rw-r--r--src/gallium/drivers/llvmpipe/lp_perf.h1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.c149
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.h85
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_priv.h78
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri.c179
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h238
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene.c57
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene.h1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c20
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c84
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.h3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_context.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_tri.c613
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c286
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.h1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_texture.c7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_image.c4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_soa.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_soa.py28
-rw-r--r--src/gallium/drivers/r300/r300_blit.c75
-rw-r--r--src/gallium/drivers/r300/r300_context.c11
-rw-r--r--src/gallium/drivers/r300/r300_context.h11
-rw-r--r--src/gallium/drivers/r300/r300_debug.c9
-rw-r--r--src/gallium/drivers/r300/r300_emit.c49
-rw-r--r--src/gallium/drivers/r300/r300_emit.h8
-rw-r--r--src/gallium/drivers/r300/r300_flush.c5
-rw-r--r--src/gallium/drivers/r300/r300_hyperz.c19
-rw-r--r--src/gallium/drivers/r300/r300_render.c96
-rw-r--r--src/gallium/drivers/r300/r300_screen.c4
-rw-r--r--src/gallium/drivers/r300/r300_screen.h6
-rw-r--r--src/gallium/drivers/r300/r300_state.c4
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c22
-rw-r--r--src/gallium/drivers/r300/r300_texture.c30
-rw-r--r--src/gallium/drivers/rbug/rbug_context.c8
-rw-r--r--src/gallium/drivers/rbug/rbug_core.c6
-rw-r--r--src/gallium/targets/Makefile.xorg15
-rw-r--r--src/gallium/targets/dri-i965/SConscript1
-rw-r--r--src/gallium/targets/libgl-xlib/Makefile2
-rw-r--r--src/gallium/targets/xorg-i915/Makefile8
-rw-r--r--src/gallium/targets/xorg-i965/Makefile7
-rw-r--r--src/gallium/targets/xorg-nouveau/Makefile9
-rw-r--r--src/gallium/targets/xorg-radeon/Makefile16
-rw-r--r--src/gallium/targets/xorg-vmwgfx/Makefile13
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_buffer.c4
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_r300.c9
-rw-r--r--src/mapi/glapi/gen/gl_enums.py3
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c67
-rw-r--r--src/mesa/drivers/dri/r300/r300_draw.c3
-rw-r--r--src/mesa/drivers/dri/r600/r600_cmdbuf.c1
-rw-r--r--src/mesa/drivers/dri/r600/r600_cmdbuf.h1
-rw-r--r--src/mesa/drivers/dri/r600/r600_context.c1
-rw-r--r--src/mesa/drivers/dri/r600/r700_fragprog.c1
-rw-r--r--src/mesa/drivers/dri/r600/r700_oglprog.h2
-rw-r--r--src/mesa/drivers/dri/r600/r700_vertprog.c1
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c10
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_tex_getimage.c9
-rw-r--r--src/mesa/main/enums.c3
70 files changed, 1767 insertions, 1221 deletions
diff --git a/src/gallium/auxiliary/os/os_thread.h b/src/gallium/auxiliary/os/os_thread.h
index 0238308d20..a084310d4f 100644
--- a/src/gallium/auxiliary/os/os_thread.h
+++ b/src/gallium/auxiliary/os/os_thread.h
@@ -45,7 +45,6 @@
#include <pthread.h> /* POSIX threads headers */
#include <stdio.h> /* for perror() */
-#define PIPE_THREAD_HAVE_CONDVAR
/* pipe_thread
*/
@@ -168,19 +167,59 @@ typedef CRITICAL_SECTION pipe_mutex;
#define pipe_mutex_unlock(mutex) \
LeaveCriticalSection(&mutex)
+/* TODO: Need a macro to declare "I don't care about WinXP compatibilty" */
+#if 0 && defined (_WIN32_WINNT) && (_WIN32_WINNT >= 0x0600)
+/* CONDITION_VARIABLE is only available on newer versions of Windows
+ * (Server 2008/Vista or later).
+ * http://msdn.microsoft.com/en-us/library/ms682052(VS.85).aspx
+ *
+ * pipe_condvar
+ */
+typedef CONDITION_VARIABLE pipe_condvar;
+
+#define pipe_static_condvar(cond) \
+ /*static*/ pipe_condvar cond = CONDITION_VARIABLE_INIT
+
+#define pipe_condvar_init(cond) \
+ InitializeConditionVariable(&(cond))
+
+#define pipe_condvar_destroy(cond) \
+ (void) cond /* nothing to do */
+
+#define pipe_condvar_wait(cond, mutex) \
+ SleepConditionVariableCS(&(cond), &(mutex), INFINITE)
+
+#define pipe_condvar_signal(cond) \
+ WakeConditionVariable(&(cond))
+
+#define pipe_condvar_broadcast(cond) \
+ WakeAllConditionVariable(&(cond))
+
+#else /* need compatibility with pre-Vista Win32 */
/* pipe_condvar (XXX FIX THIS)
+ * See http://www.cs.wustl.edu/~schmidt/win32-cv-1.html
+ * for potential pitfalls in implementation.
*/
-typedef unsigned pipe_condvar;
+typedef DWORD pipe_condvar;
+
+#define pipe_static_condvar(cond) \
+ /*static*/ pipe_condvar cond = 1
#define pipe_condvar_init(cond) \
- (void) cond
+ (void) (cond = 1)
#define pipe_condvar_destroy(cond) \
(void) cond
+/* Poor man's pthread_cond_wait():
+ Just release the mutex and sleep for one millisecond.
+ The caller's while() loop does all the work. */
#define pipe_condvar_wait(cond, mutex) \
- (void) cond; (void) mutex
+ do { pipe_mutex_unlock(mutex); \
+ Sleep(cond); \
+ pipe_mutex_lock(mutex); \
+ } while (0)
#define pipe_condvar_signal(cond) \
(void) cond
@@ -188,9 +227,12 @@ typedef unsigned pipe_condvar;
#define pipe_condvar_broadcast(cond) \
(void) cond
+#endif /* pre-Vista win32 */
#else
+#include "os/os_time.h"
+
/** Dummy definitions */
typedef unsigned pipe_thread;
@@ -214,7 +256,6 @@ static INLINE int pipe_thread_destroy( pipe_thread thread )
}
typedef unsigned pipe_mutex;
-typedef unsigned pipe_condvar;
#define pipe_static_mutex(mutex) \
static pipe_mutex mutex = 0
@@ -231,17 +272,25 @@ typedef unsigned pipe_condvar;
#define pipe_mutex_unlock(mutex) \
(void) mutex
+typedef int64_t pipe_condvar;
+
#define pipe_static_condvar(condvar) \
- static unsigned condvar = 0
+ static pipe_condvar condvar = 1000
#define pipe_condvar_init(condvar) \
- (void) condvar
+ (void) (condvar = 1000)
#define pipe_condvar_destroy(condvar) \
(void) condvar
+/* Poor man's pthread_cond_wait():
+ Just release the mutex and sleep for one millisecond.
+ The caller's while() loop does all the work. */
#define pipe_condvar_wait(condvar, mutex) \
- (void) condvar
+ do { pipe_mutex_unlock(mutex); \
+ os_time_sleep(condvar); \
+ pipe_mutex_lock(mutex); \
+ } while (0)
#define pipe_condvar_signal(condvar) \
(void) condvar
@@ -277,27 +326,7 @@ static INLINE void pipe_barrier_wait(pipe_barrier *barrier)
}
-#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
-
-/* XXX FIX THIS */
-typedef unsigned pipe_barrier;
-
-static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count)
-{
- /* XXX we could implement barriers with a mutex and condition var */
-}
-
-static INLINE void pipe_barrier_destroy(pipe_barrier *barrier)
-{
-}
-
-static INLINE void pipe_barrier_wait(pipe_barrier *barrier)
-{
- assert(0);
-}
-
-
-#else
+#else /* If the OS doesn't have its own, implement barriers using a mutex and a condvar */
typedef struct {
unsigned count;
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 85c5f36391..0d94aaae95 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -52,9 +52,8 @@
struct blitter_context_priv
{
- struct blitter_context blitter;
+ struct blitter_context base;
- struct pipe_context *pipe; /**< pipe context */
struct pipe_resource *vbuf; /**< quad */
float vertices[4][2][4]; /**< {pos, color} or {pos, texcoord} */
@@ -102,8 +101,20 @@ struct blitter_context_priv
/* Clip state. */
struct pipe_clip_state clip;
+
+ /* Destination surface dimensions. */
+ unsigned dst_width;
+ unsigned dst_height;
};
+static void blitter_draw_rectangle(struct blitter_context *blitter,
+ unsigned x, unsigned y,
+ unsigned width, unsigned height,
+ float depth,
+ enum blitter_attrib_type type,
+ const float attrib[4]);
+
+
struct blitter_context *util_blitter_create(struct pipe_context *pipe)
{
struct blitter_context_priv *ctx;
@@ -118,19 +129,20 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
if (!ctx)
return NULL;
- ctx->pipe = pipe;
+ ctx->base.pipe = pipe;
+ ctx->base.draw_rectangle = blitter_draw_rectangle;
/* init state objects for them to be considered invalid */
- ctx->blitter.saved_blend_state = INVALID_PTR;
- ctx->blitter.saved_dsa_state = INVALID_PTR;
- ctx->blitter.saved_rs_state = INVALID_PTR;
- ctx->blitter.saved_fs = INVALID_PTR;
- ctx->blitter.saved_vs = INVALID_PTR;
- ctx->blitter.saved_velem_state = INVALID_PTR;
- ctx->blitter.saved_fb_state.nr_cbufs = ~0;
- ctx->blitter.saved_num_sampler_views = ~0;
- ctx->blitter.saved_num_sampler_states = ~0;
- ctx->blitter.saved_num_vertex_buffers = ~0;
+ ctx->base.saved_blend_state = INVALID_PTR;
+ ctx->base.saved_dsa_state = INVALID_PTR;
+ ctx->base.saved_rs_state = INVALID_PTR;
+ ctx->base.saved_fs = INVALID_PTR;
+ ctx->base.saved_vs = INVALID_PTR;
+ ctx->base.saved_velem_state = INVALID_PTR;
+ ctx->base.saved_fb_state.nr_cbufs = ~0;
+ ctx->base.saved_num_sampler_views = ~0;
+ ctx->base.saved_num_sampler_states = ~0;
+ ctx->base.saved_num_vertex_buffers = ~0;
/* blend state objects */
memset(&blend, 0, sizeof(blend));
@@ -217,17 +229,17 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
ctx->vertices[i][0][3] = 1; /*v.w*/
/* create the vertex buffer */
- ctx->vbuf = pipe_buffer_create(ctx->pipe->screen,
+ ctx->vbuf = pipe_buffer_create(ctx->base.pipe->screen,
PIPE_BIND_VERTEX_BUFFER,
sizeof(ctx->vertices));
- return &ctx->blitter;
+ return &ctx->base;
}
void util_blitter_destroy(struct blitter_context *blitter)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = blitter->pipe;
int i;
pipe->delete_blend_state(pipe, ctx->blend_write_color);
@@ -265,118 +277,117 @@ void util_blitter_destroy(struct blitter_context *blitter)
static void blitter_check_saved_CSOs(struct blitter_context_priv *ctx)
{
/* make sure these CSOs have been saved */
- assert(ctx->blitter.saved_blend_state != INVALID_PTR &&
- ctx->blitter.saved_dsa_state != INVALID_PTR &&
- ctx->blitter.saved_rs_state != INVALID_PTR &&
- ctx->blitter.saved_fs != INVALID_PTR &&
- ctx->blitter.saved_vs != INVALID_PTR &&
- ctx->blitter.saved_velem_state != INVALID_PTR);
+ assert(ctx->base.saved_blend_state != INVALID_PTR &&
+ ctx->base.saved_dsa_state != INVALID_PTR &&
+ ctx->base.saved_rs_state != INVALID_PTR &&
+ ctx->base.saved_fs != INVALID_PTR &&
+ ctx->base.saved_vs != INVALID_PTR &&
+ ctx->base.saved_velem_state != INVALID_PTR);
}
static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
{
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
unsigned i;
/* restore the state objects which are always required to be saved */
- pipe->bind_blend_state(pipe, ctx->blitter.saved_blend_state);
- pipe->bind_depth_stencil_alpha_state(pipe, ctx->blitter.saved_dsa_state);
- pipe->bind_rasterizer_state(pipe, ctx->blitter.saved_rs_state);
- pipe->bind_fs_state(pipe, ctx->blitter.saved_fs);
- pipe->bind_vs_state(pipe, ctx->blitter.saved_vs);
- pipe->bind_vertex_elements_state(pipe, ctx->blitter.saved_velem_state);
+ pipe->bind_blend_state(pipe, ctx->base.saved_blend_state);
+ pipe->bind_depth_stencil_alpha_state(pipe, ctx->base.saved_dsa_state);
+ pipe->bind_rasterizer_state(pipe, ctx->base.saved_rs_state);
+ pipe->bind_fs_state(pipe, ctx->base.saved_fs);
+ pipe->bind_vs_state(pipe, ctx->base.saved_vs);
+ pipe->bind_vertex_elements_state(pipe, ctx->base.saved_velem_state);
- ctx->blitter.saved_blend_state = INVALID_PTR;
- ctx->blitter.saved_dsa_state = INVALID_PTR;
- ctx->blitter.saved_rs_state = INVALID_PTR;
- ctx->blitter.saved_fs = INVALID_PTR;
- ctx->blitter.saved_vs = INVALID_PTR;
- ctx->blitter.saved_velem_state = INVALID_PTR;
+ ctx->base.saved_blend_state = INVALID_PTR;
+ ctx->base.saved_dsa_state = INVALID_PTR;
+ ctx->base.saved_rs_state = INVALID_PTR;
+ ctx->base.saved_fs = INVALID_PTR;
+ ctx->base.saved_vs = INVALID_PTR;
+ ctx->base.saved_velem_state = INVALID_PTR;
- pipe->set_stencil_ref(pipe, &ctx->blitter.saved_stencil_ref);
+ pipe->set_stencil_ref(pipe, &ctx->base.saved_stencil_ref);
- pipe->set_viewport_state(pipe, &ctx->blitter.saved_viewport);
- pipe->set_clip_state(pipe, &ctx->blitter.saved_clip);
+ pipe->set_viewport_state(pipe, &ctx->base.saved_viewport);
+ pipe->set_clip_state(pipe, &ctx->base.saved_clip);
/* restore the state objects which are required to be saved before copy/fill
*/
- if (ctx->blitter.saved_fb_state.nr_cbufs != ~0) {
- pipe->set_framebuffer_state(pipe, &ctx->blitter.saved_fb_state);
- util_assign_framebuffer_state(&ctx->blitter.saved_fb_state, NULL);
- ctx->blitter.saved_fb_state.nr_cbufs = ~0;
+ if (ctx->base.saved_fb_state.nr_cbufs != ~0) {
+ pipe->set_framebuffer_state(pipe, &ctx->base.saved_fb_state);
+ util_assign_framebuffer_state(&ctx->base.saved_fb_state, NULL);
+ ctx->base.saved_fb_state.nr_cbufs = ~0;
}
- if (ctx->blitter.saved_num_sampler_states != ~0) {
+ if (ctx->base.saved_num_sampler_states != ~0) {
pipe->bind_fragment_sampler_states(pipe,
- ctx->blitter.saved_num_sampler_states,
- ctx->blitter.saved_sampler_states);
- ctx->blitter.saved_num_sampler_states = ~0;
+ ctx->base.saved_num_sampler_states,
+ ctx->base.saved_sampler_states);
+ ctx->base.saved_num_sampler_states = ~0;
}
- if (ctx->blitter.saved_num_sampler_views != ~0) {
+ if (ctx->base.saved_num_sampler_views != ~0) {
pipe->set_fragment_sampler_views(pipe,
- ctx->blitter.saved_num_sampler_views,
- ctx->blitter.saved_sampler_views);
+ ctx->base.saved_num_sampler_views,
+ ctx->base.saved_sampler_views);
- for (i = 0; i < ctx->blitter.saved_num_sampler_views; i++)
- pipe_sampler_view_reference(&ctx->blitter.saved_sampler_views[i],
+ for (i = 0; i < ctx->base.saved_num_sampler_views; i++)
+ pipe_sampler_view_reference(&ctx->base.saved_sampler_views[i],
NULL);
- ctx->blitter.saved_num_sampler_views = ~0;
+ ctx->base.saved_num_sampler_views = ~0;
}
- if (ctx->blitter.saved_num_vertex_buffers != ~0) {
+ if (ctx->base.saved_num_vertex_buffers != ~0) {
pipe->set_vertex_buffers(pipe,
- ctx->blitter.saved_num_vertex_buffers,
- ctx->blitter.saved_vertex_buffers);
+ ctx->base.saved_num_vertex_buffers,
+ ctx->base.saved_vertex_buffers);
- for (i = 0; i < ctx->blitter.saved_num_vertex_buffers; i++) {
- if (ctx->blitter.saved_vertex_buffers[i].buffer) {
- pipe_resource_reference(&ctx->blitter.saved_vertex_buffers[i].buffer,
+ for (i = 0; i < ctx->base.saved_num_vertex_buffers; i++) {
+ if (ctx->base.saved_vertex_buffers[i].buffer) {
+ pipe_resource_reference(&ctx->base.saved_vertex_buffers[i].buffer,
NULL);
}
}
- ctx->blitter.saved_num_vertex_buffers = ~0;
+ ctx->base.saved_num_vertex_buffers = ~0;
}
}
static void blitter_set_rectangle(struct blitter_context_priv *ctx,
unsigned x1, unsigned y1,
unsigned x2, unsigned y2,
- unsigned width, unsigned height,
float depth)
{
int i;
/* set vertex positions */
- ctx->vertices[0][0][0] = (float)x1 / width * 2.0f - 1.0f; /*v0.x*/
- ctx->vertices[0][0][1] = (float)y1 / height * 2.0f - 1.0f; /*v0.y*/
+ ctx->vertices[0][0][0] = (float)x1 / ctx->dst_width * 2.0f - 1.0f; /*v0.x*/
+ ctx->vertices[0][0][1] = (float)y1 / ctx->dst_height * 2.0f - 1.0f; /*v0.y*/
- ctx->vertices[1][0][0] = (float)x2 / width * 2.0f - 1.0f; /*v1.x*/
- ctx->vertices[1][0][1] = (float)y1 / height * 2.0f - 1.0f; /*v1.y*/
+ ctx->vertices[1][0][0] = (float)x2 / ctx->dst_width * 2.0f - 1.0f; /*v1.x*/
+ ctx->vertices[1][0][1] = (float)y1 / ctx->dst_height * 2.0f - 1.0f; /*v1.y*/
- ctx->vertices[2][0][0] = (float)x2 / width * 2.0f - 1.0f; /*v2.x*/
- ctx->vertices[2][0][1] = (float)y2 / height * 2.0f - 1.0f; /*v2.y*/
+ ctx->vertices[2][0][0] = (float)x2 / ctx->dst_width * 2.0f - 1.0f; /*v2.x*/
+ ctx->vertices[2][0][1] = (float)y2 / ctx->dst_height * 2.0f - 1.0f; /*v2.y*/
- ctx->vertices[3][0][0] = (float)x1 / width * 2.0f - 1.0f; /*v3.x*/
- ctx->vertices[3][0][1] = (float)y2 / height * 2.0f - 1.0f; /*v3.y*/
+ ctx->vertices[3][0][0] = (float)x1 / ctx->dst_width * 2.0f - 1.0f; /*v3.x*/
+ ctx->vertices[3][0][1] = (float)y2 / ctx->dst_height * 2.0f - 1.0f; /*v3.y*/
for (i = 0; i < 4; i++)
ctx->vertices[i][0][2] = depth; /*z*/
/* viewport */
- ctx->viewport.scale[0] = 0.5f * width;
- ctx->viewport.scale[1] = 0.5f * height;
+ ctx->viewport.scale[0] = 0.5f * ctx->dst_width;
+ ctx->viewport.scale[1] = 0.5f * ctx->dst_height;
ctx->viewport.scale[2] = 1.0f;
ctx->viewport.scale[3] = 1.0f;
- ctx->viewport.translate[0] = 0.5f * width;
- ctx->viewport.translate[1] = 0.5f * height;
+ ctx->viewport.translate[0] = 0.5f * ctx->dst_width;
+ ctx->viewport.translate[1] = 0.5f * ctx->dst_height;
ctx->viewport.translate[2] = 0.0f;
ctx->viewport.translate[3] = 0.0f;
- ctx->pipe->set_viewport_state(ctx->pipe, &ctx->viewport);
+ ctx->base.pipe->set_viewport_state(ctx->base.pipe, &ctx->viewport);
/* clip */
- ctx->pipe->set_clip_state(ctx->pipe, &ctx->clip);
+ ctx->base.pipe->set_clip_state(ctx->base.pipe, &ctx->clip);
}
static void blitter_set_clear_color(struct blitter_context_priv *ctx,
@@ -401,29 +412,45 @@ static void blitter_set_clear_color(struct blitter_context_priv *ctx,
}
}
+static void get_normalized_texcoords(struct pipe_resource *src,
+ struct pipe_subresource subsrc,
+ unsigned x1, unsigned y1,
+ unsigned x2, unsigned y2,
+ float out[4])
+{
+ out[0] = x1 / (float)u_minify(src->width0, subsrc.level);
+ out[1] = y1 / (float)u_minify(src->height0, subsrc.level);
+ out[2] = x2 / (float)u_minify(src->width0, subsrc.level);
+ out[3] = y2 / (float)u_minify(src->height0, subsrc.level);
+}
+
+static void set_texcoords_in_vertices(const float coord[4],
+ float *out, unsigned stride)
+{
+ out[0] = coord[0]; /*t0.s*/
+ out[1] = coord[1]; /*t0.t*/
+ out += stride;
+ out[0] = coord[2]; /*t1.s*/
+ out[1] = coord[1]; /*t1.t*/
+ out += stride;
+ out[0] = coord[2]; /*t2.s*/
+ out[1] = coord[3]; /*t2.t*/
+ out += stride;
+ out[0] = coord[0]; /*t3.s*/
+ out[1] = coord[3]; /*t3.t*/
+}
+
static void blitter_set_texcoords_2d(struct blitter_context_priv *ctx,
struct pipe_resource *src,
struct pipe_subresource subsrc,
unsigned x1, unsigned y1,
unsigned x2, unsigned y2)
{
- int i;
- float s1 = x1 / (float)u_minify(src->width0, subsrc.level);
- float t1 = y1 / (float)u_minify(src->height0, subsrc.level);
- float s2 = x2 / (float)u_minify(src->width0, subsrc.level);
- float t2 = y2 / (float)u_minify(src->height0, subsrc.level);
-
- ctx->vertices[0][1][0] = s1; /*t0.s*/
- ctx->vertices[0][1][1] = t1; /*t0.t*/
-
- ctx->vertices[1][1][0] = s2; /*t1.s*/
- ctx->vertices[1][1][1] = t1; /*t1.t*/
-
- ctx->vertices[2][1][0] = s2; /*t2.s*/
- ctx->vertices[2][1][1] = t2; /*t2.t*/
+ unsigned i;
+ float coord[4];
- ctx->vertices[3][1][0] = s1; /*t3.s*/
- ctx->vertices[3][1][1] = t2; /*t3.t*/
+ get_normalized_texcoords(src, subsrc, x1, y1, x2, y2, coord);
+ set_texcoords_in_vertices(coord, &ctx->vertices[0][1][0], 8);
for (i = 0; i < 4; i++) {
ctx->vertices[i][1][2] = 0; /*r*/
@@ -454,20 +481,11 @@ static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx,
unsigned x2, unsigned y2)
{
int i;
- float s1 = x1 / (float)u_minify(src->width0, subsrc.level);
- float t1 = y1 / (float)u_minify(src->height0, subsrc.level);
- float s2 = x2 / (float)u_minify(src->width0, subsrc.level);
- float t2 = y2 / (float)u_minify(src->height0, subsrc.level);
+ float coord[4];
float st[4][2];
- st[0][0] = s1;
- st[0][1] = t1;
- st[1][0] = s2;
- st[1][1] = t1;
- st[2][0] = s2;
- st[2][1] = t2;
- st[3][0] = s1;
- st[3][1] = t2;
+ get_normalized_texcoords(src, subsrc, x1, y1, x2, y2, coord);
+ set_texcoords_in_vertices(coord, &st[0][0], 2);
util_map_texcoords2d_onto_cubemap(subsrc.face,
/* pointer, stride in floats */
@@ -478,9 +496,16 @@ static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx,
ctx->vertices[i][1][3] = 1; /*q*/
}
+static void blitter_set_dst_dimensions(struct blitter_context_priv *ctx,
+ unsigned width, unsigned height)
+{
+ ctx->dst_width = width;
+ ctx->dst_height = height;
+}
+
static void blitter_draw_quad(struct blitter_context_priv *ctx)
{
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
/* write vertices and draw them */
pipe_buffer_write(pipe, ctx->vbuf,
@@ -495,7 +520,7 @@ static INLINE
void **blitter_get_sampler_state(struct blitter_context_priv *ctx,
int miplevel)
{
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
struct pipe_sampler_state *sampler_state = &ctx->template_sampler_state;
assert(miplevel < PIPE_MAX_TEXTURE_LEVELS);
@@ -518,7 +543,7 @@ void **blitter_get_sampler_state(struct blitter_context_priv *ctx,
static INLINE
void *blitter_get_fs_col(struct blitter_context_priv *ctx, unsigned num_cbufs)
{
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
assert(num_cbufs <= PIPE_MAX_COLOR_BUFS);
@@ -531,7 +556,7 @@ void *blitter_get_fs_col(struct blitter_context_priv *ctx, unsigned num_cbufs)
/** Convert PIPE_TEXTURE_x to TGSI_TEXTURE_x */
static unsigned
-pipe_tex_to_tgsi_tex(unsigned pipe_tex_target)
+pipe_tex_to_tgsi_tex(enum pipe_texture_target pipe_tex_target)
{
switch (pipe_tex_target) {
case PIPE_TEXTURE_1D:
@@ -553,7 +578,7 @@ static INLINE
void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx,
unsigned tex_target)
{
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
assert(tex_target < PIPE_MAX_TEXTURE_TYPES);
@@ -572,7 +597,7 @@ static INLINE
void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx,
unsigned tex_target)
{
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
assert(tex_target < PIPE_MAX_TEXTURE_TYPES);
@@ -588,6 +613,31 @@ void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx,
return ctx->fs_texfetch_depth[tex_target];
}
+static void blitter_draw_rectangle(struct blitter_context *blitter,
+ unsigned x1, unsigned y1,
+ unsigned x2, unsigned y2,
+ float depth,
+ enum blitter_attrib_type type,
+ const float attrib[4])
+{
+ struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+
+ switch (type) {
+ case UTIL_BLITTER_ATTRIB_COLOR:
+ blitter_set_clear_color(ctx, attrib);
+ break;
+
+ case UTIL_BLITTER_ATTRIB_TEXCOORD:
+ set_texcoords_in_vertices(attrib, &ctx->vertices[0][1][0], 8);
+ break;
+
+ default:;
+ }
+
+ blitter_set_rectangle(ctx, x1, y1, x2, y2, depth);
+ blitter_draw_quad(ctx);
+}
+
void util_blitter_clear(struct blitter_context *blitter,
unsigned width, unsigned height,
unsigned num_cbufs,
@@ -596,7 +646,7 @@ void util_blitter_clear(struct blitter_context *blitter,
double depth, unsigned stencil)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
struct pipe_stencil_ref sr = { { 0 } };
assert(num_cbufs <= PIPE_MAX_COLOR_BUFS);
@@ -630,9 +680,9 @@ void util_blitter_clear(struct blitter_context *blitter,
pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, num_cbufs));
pipe->bind_vs_state(pipe, ctx->vs_col);
- blitter_set_clear_color(ctx, rgba);
- blitter_set_rectangle(ctx, 0, 0, width, height, width, height, depth);
- blitter_draw_quad(ctx);
+ blitter_set_dst_dimensions(ctx, width, height);
+ blitter->draw_rectangle(blitter, 0, 0, width, height, depth,
+ UTIL_BLITTER_ATTRIB_COLOR, rgba);
blitter_restore_CSOs(ctx);
}
@@ -654,7 +704,7 @@ void util_blitter_copy_region(struct blitter_context *blitter,
boolean ignore_stencil)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
struct pipe_screen *screen = pipe->screen;
struct pipe_surface *dstsurf;
struct pipe_framebuffer_state fb_state;
@@ -745,29 +795,45 @@ void util_blitter_copy_region(struct blitter_context *blitter,
pipe->set_fragment_sampler_views(pipe, 1, &view);
pipe->set_framebuffer_state(pipe, &fb_state);
- /* Set texture coordinates. */
+ blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height);
+
switch (src->target) {
+ /* Draw the quad with the draw_rectangle callback. */
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D:
- blitter_set_texcoords_2d(ctx, src, subsrc,
- srcx, srcy, srcx+width, srcy+height);
+ {
+ /* Set texture coordinates. */
+ float coord[4];
+ get_normalized_texcoords(src, subsrc, srcx, srcy,
+ srcx+width, srcy+height, coord);
+
+ /* Draw. */
+ blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height, 0,
+ UTIL_BLITTER_ATTRIB_TEXCOORD, coord);
+ }
break;
+
+ /* Draw the quad with the generic codepath. */
case PIPE_TEXTURE_3D:
- blitter_set_texcoords_3d(ctx, src, subsrc, srcz,
- srcx, srcy, srcx+width, srcy+height);
- break;
case PIPE_TEXTURE_CUBE:
- blitter_set_texcoords_cube(ctx, src, subsrc,
- srcx, srcy, srcx+width, srcy+height);
+ /* Set texture coordinates. */
+ if (src->target == PIPE_TEXTURE_3D)
+ blitter_set_texcoords_3d(ctx, src, subsrc, srcz,
+ srcx, srcy, srcx+width, srcy+height);
+ else
+ blitter_set_texcoords_cube(ctx, src, subsrc,
+ srcx, srcy, srcx+width, srcy+height);
+
+ /* Draw. */
+ blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, 0);
+ blitter_draw_quad(ctx);
break;
+
default:
assert(0);
return;
}
- blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height,
- dstsurf->width, dstsurf->height, 0);
- blitter_draw_quad(ctx);
blitter_restore_CSOs(ctx);
pipe_surface_reference(&dstsurf, NULL);
@@ -782,7 +848,7 @@ void util_blitter_clear_render_target(struct blitter_context *blitter,
unsigned width, unsigned height)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
struct pipe_framebuffer_state fb_state;
assert(dstsurf->texture);
@@ -809,9 +875,9 @@ void util_blitter_clear_render_target(struct blitter_context *blitter,
fb_state.zsbuf = 0;
pipe->set_framebuffer_state(pipe, &fb_state);
- blitter_set_clear_color(ctx, rgba);
- blitter_set_rectangle(ctx, 0, 0, width, height, dstsurf->width, dstsurf->height, 0);
- blitter_draw_quad(ctx);
+ blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height);
+ blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height, 0,
+ UTIL_BLITTER_ATTRIB_COLOR, rgba);
blitter_restore_CSOs(ctx);
}
@@ -825,7 +891,7 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter,
unsigned width, unsigned height)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
struct pipe_framebuffer_state fb_state;
struct pipe_stencil_ref sr = { { 0 } };
@@ -869,7 +935,8 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter,
fb_state.zsbuf = dstsurf;
pipe->set_framebuffer_state(pipe, &fb_state);
- blitter_set_rectangle(ctx, 0, 0, width, height, dstsurf->width, dstsurf->height, depth);
- blitter_draw_quad(ctx);
+ blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height);
+ blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height, depth,
+ UTIL_BLITTER_ATTRIB_NONE, NULL);
blitter_restore_CSOs(ctx);
}
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
index f421ad5b93..ba3f92eca8 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -39,9 +39,48 @@ extern "C" {
struct pipe_context;
+enum blitter_attrib_type {
+ UTIL_BLITTER_ATTRIB_NONE,
+ UTIL_BLITTER_ATTRIB_COLOR,
+ UTIL_BLITTER_ATTRIB_TEXCOORD
+};
+
struct blitter_context
{
+ /**
+ * Draw a rectangle.
+ *
+ * \param x1 An X coordinate of the top-left corner.
+ * \param y1 A Y coordinate of the top-left corner.
+ * \param x2 An X coordinate of the bottom-right corner.
+ * \param y2 A Y coordinate of the bottom-right corner.
+ * \param depth A depth which the rectangle is rendered at.
+ *
+ * \param type Semantics of the attributes "attrib".
+ * If type is UTIL_BLITTER_ATTRIB_NONE, ignore them.
+ * If type is UTIL_BLITTER_ATTRIB_COLOR, the attributes
+ * make up a constant RGBA color, and should go to the COLOR0
+ * varying slot of a fragment shader.
+ * If type is UTIL_BLITTER_ATTRIB_TEXCOORD, {a1, a2} and
+ * {a3, a4} specify top-left and bottom-right texture
+ * coordinates of the rectangle, respectively, and should go
+ * to the GENERIC0 varying slot of a fragment shader.
+ *
+ * \param attrib See type.
+ *
+ * \note A driver may optionally override this callback to implement
+ * a specialized hardware path for drawing a rectangle, e.g. using
+ * a rectangular point sprite.
+ */
+ void (*draw_rectangle)(struct blitter_context *blitter,
+ unsigned x1, unsigned y1, unsigned x2, unsigned y2,
+ float depth,
+ enum blitter_attrib_type type,
+ const float attrib[4]);
+
/* Private members, really. */
+ struct pipe_context *pipe; /**< pipe context */
+
void *saved_blend_state; /**< blend state */
void *saved_dsa_state; /**< depth stencil alpha state */
void *saved_velem_state; /**< vertex elements state */
@@ -73,6 +112,15 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe);
*/
void util_blitter_destroy(struct blitter_context *blitter);
+/**
+ * Return the pipe context associated with a blitter context.
+ */
+static INLINE
+struct pipe_context *util_blitter_get_pipe(struct blitter_context *blitter)
+{
+ return blitter->pipe;
+}
+
/*
* These CSOs must be saved before any of the following functions is called:
* - blend state
diff --git a/src/gallium/drivers/llvmpipe/.gitignore b/src/gallium/drivers/llvmpipe/.gitignore
index 4e0d4c3fc0..6ebd2b8a63 100644
--- a/src/gallium/drivers/llvmpipe/.gitignore
+++ b/src/gallium/drivers/llvmpipe/.gitignore
@@ -3,4 +3,5 @@ lp_test_blend
lp_test_conv
lp_test_format
lp_test_printf
+lp_test_round
lp_test_sincos
diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c b/src/gallium/drivers/llvmpipe/lp_fence.c
index 75d8d2b825..f9805e5d68 100644
--- a/src/gallium/drivers/llvmpipe/lp_fence.c
+++ b/src/gallium/drivers/llvmpipe/lp_fence.c
@@ -28,7 +28,6 @@
#include "pipe/p_screen.h"
#include "util/u_memory.h"
-#include "util/u_inlines.h"
#include "lp_debug.h"
#include "lp_fence.h"
@@ -59,7 +58,7 @@ lp_fence_create(unsigned rank)
/** Destroy a fence. Called when refcount hits zero. */
-static void
+void
lp_fence_destroy(struct lp_fence *fence)
{
pipe_mutex_destroy(fence->mutex);
@@ -77,12 +76,10 @@ llvmpipe_fence_reference(struct pipe_screen *screen,
struct pipe_fence_handle **ptr,
struct pipe_fence_handle *fence)
{
- struct lp_fence *old = (struct lp_fence *) *ptr;
+ struct lp_fence **old = (struct lp_fence **) ptr;
struct lp_fence *f = (struct lp_fence *) fence;
- if (pipe_reference(&old->reference, &f->reference)) {
- lp_fence_destroy(old);
- }
+ lp_fence_reference(old, f);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_fence.h b/src/gallium/drivers/llvmpipe/lp_fence.h
index d9270f5784..13358fb99f 100644
--- a/src/gallium/drivers/llvmpipe/lp_fence.h
+++ b/src/gallium/drivers/llvmpipe/lp_fence.h
@@ -32,6 +32,7 @@
#include "os/os_thread.h"
#include "pipe/p_state.h"
+#include "util/u_inlines.h"
struct pipe_screen;
@@ -61,4 +62,21 @@ void
llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen);
+void
+lp_fence_destroy(struct lp_fence *fence);
+
+static INLINE void
+lp_fence_reference(struct lp_fence **ptr,
+ struct lp_fence *f)
+{
+ struct lp_fence *old = *ptr;
+
+ if (pipe_reference(&old->reference, &f->reference)) {
+ lp_fence_destroy(old);
+ }
+
+ *ptr = f;
+}
+
+
#endif /* LP_FENCE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c
index 0cd288bb73..845292f4ab 100644
--- a/src/gallium/drivers/llvmpipe/lp_flush.c
+++ b/src/gallium/drivers/llvmpipe/lp_flush.c
@@ -40,27 +40,19 @@
/**
* \param flags bitmask of PIPE_FLUSH_x flags
- * \param fence if non-null, returns pointer to a fench which can be waited on
+ * \param fence if non-null, returns pointer to a fence which can be waited on
*/
void
llvmpipe_flush( struct pipe_context *pipe,
- unsigned flags,
+ unsigned flags,
struct pipe_fence_handle **fence )
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
draw_flush(llvmpipe->draw);
- if (fence) {
- /* if we're going to flush the setup/rasterization modules, emit
- * a fence.
- * XXX this (and the code below) may need fine tuning...
- */
- *fence = lp_setup_fence( llvmpipe->setup );
- }
-
/* ask the setup module to flush */
- lp_setup_flush(llvmpipe->setup, flags);
+ lp_setup_flush(llvmpipe->setup, flags, fence);
/* Enable to dump BMPs of the color/depth buffers each frame */
if (0) {
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index 23aa34ddec..8e6dfb293d 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -103,10 +103,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen)
elem_types[LP_JIT_CTX_ALPHA_REF] = LLVMFloatType();
elem_types[LP_JIT_CTX_STENCIL_REF_FRONT] = LLVMInt32Type();
elem_types[LP_JIT_CTX_STENCIL_REF_BACK] = LLVMInt32Type();
- elem_types[LP_JIT_CTX_SCISSOR_XMIN] = LLVMFloatType();
- elem_types[LP_JIT_CTX_SCISSOR_YMIN] = LLVMFloatType();
- elem_types[LP_JIT_CTX_SCISSOR_XMAX] = LLVMFloatType();
- elem_types[LP_JIT_CTX_SCISSOR_YMAX] = LLVMFloatType();
elem_types[LP_JIT_CTX_BLEND_COLOR] = LLVMPointerType(LLVMInt8Type(), 0);
elem_types[LP_JIT_CTX_TEXTURES] = LLVMArrayType(texture_type,
PIPE_MAX_SAMPLERS);
@@ -125,18 +121,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen)
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, stencil_ref_back,
screen->target, context_type,
LP_JIT_CTX_STENCIL_REF_BACK);
- LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_xmin,
- screen->target, context_type,
- LP_JIT_CTX_SCISSOR_XMIN);
- LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_ymin,
- screen->target, context_type,
- LP_JIT_CTX_SCISSOR_YMIN);
- LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_xmax,
- screen->target, context_type,
- LP_JIT_CTX_SCISSOR_XMAX);
- LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_ymax,
- screen->target, context_type,
- LP_JIT_CTX_SCISSOR_YMAX);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color,
screen->target, context_type,
LP_JIT_CTX_BLEND_COLOR);
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
index 8d06e65725..c94189413a 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -89,9 +89,6 @@ struct lp_jit_context
uint32_t stencil_ref_front, stencil_ref_back;
- /** floats, not ints */
- float scissor_xmin, scissor_ymin, scissor_xmax, scissor_ymax;
-
/* FIXME: store (also?) in floats */
uint8_t *blend_color;
@@ -108,10 +105,6 @@ enum {
LP_JIT_CTX_ALPHA_REF,
LP_JIT_CTX_STENCIL_REF_FRONT,
LP_JIT_CTX_STENCIL_REF_BACK,
- LP_JIT_CTX_SCISSOR_XMIN,
- LP_JIT_CTX_SCISSOR_YMIN,
- LP_JIT_CTX_SCISSOR_XMAX,
- LP_JIT_CTX_SCISSOR_YMAX,
LP_JIT_CTX_BLEND_COLOR,
LP_JIT_CTX_TEXTURES,
LP_JIT_CTX_COUNT
@@ -130,18 +123,6 @@ enum {
#define lp_jit_context_stencil_ref_back_value(_builder, _ptr) \
lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_STENCIL_REF_BACK, "stencil_ref_back")
-#define lp_jit_context_scissor_xmin_value(_builder, _ptr) \
- lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_XMIN, "scissor_xmin")
-
-#define lp_jit_context_scissor_ymin_value(_builder, _ptr) \
- lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_YMIN, "scissor_ymin")
-
-#define lp_jit_context_scissor_xmax_value(_builder, _ptr) \
- lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_XMAX, "scissor_xmax")
-
-#define lp_jit_context_scissor_ymax_value(_builder, _ptr) \
- lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_YMAX, "scissor_ymax")
-
#define lp_jit_context_blend_color(_builder, _ptr) \
lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_BLEND_COLOR, "blend_color")
@@ -160,12 +141,7 @@ typedef void
const void *dady,
uint8_t **color,
void *depth,
- const int32_t c1,
- const int32_t c2,
- const int32_t c3,
- const int32_t *step1,
- const int32_t *step2,
- const int32_t *step3,
+ uint32_t mask,
uint32_t *counter);
diff --git a/src/gallium/drivers/llvmpipe/lp_memory.c b/src/gallium/drivers/llvmpipe/lp_memory.c
index f2e41f3a71..61d16668eb 100644
--- a/src/gallium/drivers/llvmpipe/lp_memory.c
+++ b/src/gallium/drivers/llvmpipe/lp_memory.c
@@ -45,6 +45,12 @@ lp_get_dummy_tile(void)
return lp_dummy_tile;
}
+uint8_t *
+lp_get_dummy_tile_silent(void)
+{
+ return lp_dummy_tile;
+}
+
boolean
lp_is_dummy_tile(void *tile)
diff --git a/src/gallium/drivers/llvmpipe/lp_memory.h b/src/gallium/drivers/llvmpipe/lp_memory.h
index aca7970b46..1d0e5ebdb6 100644
--- a/src/gallium/drivers/llvmpipe/lp_memory.h
+++ b/src/gallium/drivers/llvmpipe/lp_memory.h
@@ -35,6 +35,8 @@
extern uint8_t *
lp_get_dummy_tile(void);
+uint8_t *
+lp_get_dummy_tile_silent(void);
extern boolean
lp_is_dummy_tile(void *tile);
diff --git a/src/gallium/drivers/llvmpipe/lp_perf.c b/src/gallium/drivers/llvmpipe/lp_perf.c
index a316597675..083e7e30a5 100644
--- a/src/gallium/drivers/llvmpipe/lp_perf.c
+++ b/src/gallium/drivers/llvmpipe/lp_perf.c
@@ -46,10 +46,10 @@ lp_print_counters(void)
{
if (LP_DEBUG & DEBUG_COUNTERS) {
unsigned total_64, total_16, total_4;
- float p1, p2, p3;
+ float p1, p2, p3, p4;
- debug_printf("llvmpipe: nr_triangles: %9u\n", lp_count.nr_tris);
- debug_printf("llvmpipe: nr_culled_triangles: %9u\n", lp_count.nr_culled_tris);
+ debug_printf("llvmpipe: nr_triangles: %9u\n", lp_count.nr_tris);
+ debug_printf("llvmpipe: nr_culled_triangles: %9u\n", lp_count.nr_culled_tris);
total_64 = (lp_count.nr_empty_64 +
lp_count.nr_fully_covered_64 +
@@ -58,10 +58,13 @@ lp_print_counters(void)
p1 = 100.0 * (float) lp_count.nr_empty_64 / (float) total_64;
p2 = 100.0 * (float) lp_count.nr_fully_covered_64 / (float) total_64;
p3 = 100.0 * (float) lp_count.nr_partially_covered_64 / (float) total_64;
+ p4 = 100.0 * (float) lp_count.nr_shade_opaque_64 / (float) total_64;
- debug_printf("llvmpipe: nr_empty_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_64, p1, total_64);
- debug_printf("llvmpipe: nr_fully_covered_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_fully_covered_64, p2, total_64);
- debug_printf("llvmpipe: nr_partially_covered_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_partially_covered_64, p3, total_64);
+ debug_printf("llvmpipe: nr_64x64: %9u\n", total_64);
+ debug_printf("llvmpipe: nr_fully_covered_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_64, p2, total_64);
+ debug_printf("llvmpipe: nr_shade_opaque_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_shade_opaque_64, p4, total_64);
+ debug_printf("llvmpipe: nr_partially_covered_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_64, p3, total_64);
+ debug_printf("llvmpipe: nr_empty_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_64, p1, total_64);
total_16 = (lp_count.nr_empty_16 +
lp_count.nr_fully_covered_16 +
@@ -71,25 +74,27 @@ lp_print_counters(void)
p2 = 100.0 * (float) lp_count.nr_fully_covered_16 / (float) total_16;
p3 = 100.0 * (float) lp_count.nr_partially_covered_16 / (float) total_16;
- debug_printf("llvmpipe: nr_empty_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_16, p1, total_16);
- debug_printf("llvmpipe: nr_fully_covered_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_fully_covered_16, p2, total_16);
- debug_printf("llvmpipe: nr_partially_covered_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_partially_covered_16, p3, total_16);
+ debug_printf("llvmpipe: nr_16x16: %9u\n", total_16);
+ debug_printf("llvmpipe: nr_fully_covered_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_16, p2, total_16);
+ debug_printf("llvmpipe: nr_partially_covered_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_16, p3, total_16);
+ debug_printf("llvmpipe: nr_empty_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_16, p1, total_16);
total_4 = (lp_count.nr_empty_4 + lp_count.nr_non_empty_4);
p1 = 100.0 * (float) lp_count.nr_empty_4 / (float) total_4;
p2 = 100.0 * (float) lp_count.nr_non_empty_4 / (float) total_4;
- debug_printf("llvmpipe: nr_empty_4x4: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_4, p1, total_4);
- debug_printf("llvmpipe: nr_non_empty_4x4: %9u (%2.0f%% of %u)\n", lp_count.nr_non_empty_4, p2, total_4);
+ debug_printf("llvmpipe: nr_4x4: %9u\n", total_4);
+ debug_printf("llvmpipe: nr_empty_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_4, p1, total_4);
+ debug_printf("llvmpipe: nr_non_empty_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_non_empty_4, p2, total_4);
- debug_printf("llvmpipe: nr_color_tile_clear: %9u\n", lp_count.nr_color_tile_clear);
- debug_printf("llvmpipe: nr_color_tile_load: %9u\n", lp_count.nr_color_tile_load);
- debug_printf("llvmpipe: nr_color_tile_store: %9u\n", lp_count.nr_color_tile_store);
+ debug_printf("llvmpipe: nr_color_tile_clear: %9u\n", lp_count.nr_color_tile_clear);
+ debug_printf("llvmpipe: nr_color_tile_load: %9u\n", lp_count.nr_color_tile_load);
+ debug_printf("llvmpipe: nr_color_tile_store: %9u\n", lp_count.nr_color_tile_store);
- debug_printf("llvmpipe: nr_llvm_compiles: %u\n", lp_count.nr_llvm_compiles);
- debug_printf("llvmpipe: total LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0);
- debug_printf("llvmpipe: average LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0 / lp_count.nr_llvm_compiles);
+ debug_printf("llvmpipe: nr_llvm_compiles: %u\n", lp_count.nr_llvm_compiles);
+ debug_printf("llvmpipe: total LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0);
+ debug_printf("llvmpipe: average LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0 / lp_count.nr_llvm_compiles);
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_perf.h b/src/gallium/drivers/llvmpipe/lp_perf.h
index a9629dae3c..4774f64550 100644
--- a/src/gallium/drivers/llvmpipe/lp_perf.h
+++ b/src/gallium/drivers/llvmpipe/lp_perf.h
@@ -44,6 +44,7 @@ struct lp_counters
unsigned nr_empty_64;
unsigned nr_fully_covered_64;
unsigned nr_partially_covered_64;
+ unsigned nr_shade_opaque_64;
unsigned nr_empty_16;
unsigned nr_fully_covered_16;
unsigned nr_partially_covered_16;
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index 1a82dd5694..a023d2b668 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -28,6 +28,7 @@
#include <limits.h>
#include "util/u_memory.h"
#include "util/u_math.h"
+#include "util/u_rect.h"
#include "util/u_surface.h"
#include "lp_scene_queue.h"
@@ -136,7 +137,6 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
struct lp_rasterizer *rast = task->rast;
struct lp_scene *scene = rast->curr_scene;
enum lp_texture_usage usage;
- unsigned buf;
LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
@@ -146,24 +146,8 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
task->x = x;
task->y = y;
- if (scene->has_color_clear)
- usage = LP_TEX_USAGE_WRITE_ALL;
- else
- usage = LP_TEX_USAGE_READ_WRITE;
-
- /* get pointers to color tile(s) */
- for (buf = 0; buf < rast->state.nr_cbufs; buf++) {
- struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf];
- struct llvmpipe_resource *lpt;
- assert(cbuf);
- lpt = llvmpipe_resource(cbuf->texture);
- task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt,
- cbuf->face + cbuf->zslice,
- cbuf->level,
- usage,
- x, y);
- assert(task->color_tiles[buf]);
- }
+ /* reset pointers to color tile(s) */
+ memset(task->color_tiles, 0, sizeof(task->color_tiles));
/* get pointer to depth/stencil tile */
{
@@ -222,7 +206,8 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
clear_color[2] == clear_color[3]) {
/* clear to grayscale value {x, x, x, x} */
for (i = 0; i < rast->state.nr_cbufs; i++) {
- uint8_t *ptr = task->color_tiles[i];
+ uint8_t *ptr =
+ lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
memset(ptr, clear_color[0], TILE_SIZE * TILE_SIZE * 4);
}
}
@@ -234,7 +219,8 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
*/
const unsigned chunk = TILE_SIZE / 4;
for (i = 0; i < rast->state.nr_cbufs; i++) {
- uint8_t *c = task->color_tiles[i];
+ uint8_t *c =
+ lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
unsigned j;
for (j = 0; j < 4 * TILE_SIZE; j++) {
@@ -378,8 +364,8 @@ lp_rast_load_color(struct lp_rasterizer_task *task,
* This is a bin command which is stored in all bins.
*/
void
-lp_rast_store_color( struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
+lp_rast_store_linear_color( struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
{
struct lp_rasterizer *rast = task->rast;
struct lp_scene *scene = rast->curr_scene;
@@ -397,21 +383,6 @@ lp_rast_store_color( struct lp_rasterizer_task *task,
}
-/**
- * This is a bin command called during bin processing.
- */
-void
-lp_rast_set_state(struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
-{
- const struct lp_rast_state *state = arg.set_state;
-
- LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state);
-
- /* just set the current state pointer for this rasterizer */
- task->current_state = state;
-}
-
/**
* Run the shader on all blocks in a tile. This is used when a tile is
@@ -423,8 +394,8 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
struct lp_rasterizer *rast = task->rast;
- const struct lp_rast_state *state = task->current_state;
const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
+ const struct lp_rast_state *state = inputs->state;
struct lp_fragment_shader_variant *variant = state->variant;
const unsigned tile_x = task->x, tile_y = task->y;
unsigned x, y;
@@ -448,32 +419,56 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
/* run shader on 4x4 block */
variant->jit_function[RAST_WHOLE]( &state->jit_context,
- tile_x + x, tile_y + y,
- inputs->facing,
- inputs->a0,
- inputs->dadx,
- inputs->dady,
- color,
- depth,
- INT_MIN, INT_MIN, INT_MIN,
- NULL, NULL, NULL, &task->vis_counter);
+ tile_x + x, tile_y + y,
+ inputs->facing,
+ inputs->a0,
+ inputs->dadx,
+ inputs->dady,
+ color,
+ depth,
+ 0xffff,
+ &task->vis_counter);
}
}
}
/**
- * Compute shading for a 4x4 block of pixels.
+ * Run the shader on all blocks in a tile. This is used when a tile is
+ * completely contained inside a triangle, and the shader is opaque.
+ * This is a bin command called during bin processing.
+ */
+void
+lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ struct lp_rasterizer *rast = task->rast;
+ unsigned i;
+
+ LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
+
+ /* this will prevent converting the layout from tiled to linear */
+ for (i = 0; i < rast->state.nr_cbufs; i++) {
+ (void)lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
+ }
+
+ lp_rast_shade_tile(task, arg);
+}
+
+
+/**
+ * Compute shading for a 4x4 block of pixels inside a triangle.
* This is a bin command called during bin processing.
* \param x X position of quad in window coords
* \param y Y position of quad in window coords
*/
-void lp_rast_shade_quads( struct lp_rasterizer_task *task,
- const struct lp_rast_shader_inputs *inputs,
- unsigned x, unsigned y,
- int32_t c1, int32_t c2, int32_t c3)
+void
+lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
+ const struct lp_rast_shader_inputs *inputs,
+ unsigned x, unsigned y,
+ unsigned mask)
{
- const struct lp_rast_state *state = task->current_state;
+ const struct lp_rast_state *state = inputs->state;
struct lp_fragment_shader_variant *variant = state->variant;
struct lp_rasterizer *rast = task->rast;
uint8_t *color[PIPE_MAX_COLOR_BUFS];
@@ -501,27 +496,21 @@ void lp_rast_shade_quads( struct lp_rasterizer_task *task,
assert(lp_check_alignment(state->jit_context.blend_color, 16));
- assert(lp_check_alignment(inputs->step[0], 16));
- assert(lp_check_alignment(inputs->step[1], 16));
- assert(lp_check_alignment(inputs->step[2], 16));
-
/* run shader on 4x4 block */
- variant->jit_function[RAST_EDGE_TEST]( &state->jit_context,
- x, y,
- inputs->facing,
- inputs->a0,
- inputs->dadx,
- inputs->dady,
- color,
- depth,
- c1, c2, c3,
- inputs->step[0],
- inputs->step[1],
- inputs->step[2],
- &task->vis_counter);
+ variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
+ x, y,
+ inputs->facing,
+ inputs->a0,
+ inputs->dadx,
+ inputs->dady,
+ color,
+ depth,
+ mask,
+ &task->vis_counter);
}
+
/**
* Set top row and left column of the tile's pixels to white. For debugging.
*/
@@ -717,10 +706,16 @@ static struct {
{
RAST(clear_color),
RAST(clear_zstencil),
- RAST(triangle),
+ RAST(triangle_1),
+ RAST(triangle_2),
+ RAST(triangle_3),
+ RAST(triangle_4),
+ RAST(triangle_5),
+ RAST(triangle_6),
+ RAST(triangle_7),
RAST(shade_tile),
- RAST(set_state),
- RAST(store_color),
+ RAST(shade_tile_opaque),
+ RAST(store_linear_color),
RAST(fence),
RAST(begin_query),
RAST(end_query),
@@ -775,7 +770,7 @@ is_empty_bin( const struct cmd_bin *bin )
}
for (i = 0; i < head->count; i++)
- if (head->cmd[i] != lp_rast_set_state) {
+ if (head->cmd[i] != lp_rast_store_linear_color) {
return FALSE;
}
@@ -815,6 +810,10 @@ rasterize_scene(struct lp_rasterizer_task *task,
}
}
#endif
+
+ if (scene->fence) {
+ lp_rast_fence(task, lp_rast_arg_fence(scene->fence));
+ }
}
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index 80ca68f5a2..0991344cce 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -84,8 +84,7 @@ struct lp_rast_shader_inputs {
float (*dadx)[4];
float (*dady)[4];
- /* edge/step info for 3 edges and 4x4 block of pixels */
- PIPE_ALIGN_VAR(16) int step[3][16];
+ const struct lp_rast_state *state;
};
struct lp_rast_clearzs {
@@ -93,6 +92,22 @@ struct lp_rast_clearzs {
unsigned clearzs_mask;
};
+struct lp_rast_plane {
+ /* one-pixel sized trivial accept offsets for each plane */
+ int ei;
+
+ /* one-pixel sized trivial reject offsets for each plane */
+ int eo;
+
+ /* edge function values at minx,miny ?? */
+ int c;
+
+ int dcdx;
+ int dcdy;
+
+ /* edge/step info for 3 edges and 4x4 block of pixels */
+ const int *step;
+};
/**
* Rasterization information for a triangle known to be in this bin,
@@ -101,35 +116,16 @@ struct lp_rast_clearzs {
* Objects of this type are put into the lp_setup_context::data buffer.
*/
struct lp_rast_triangle {
+ /* inputs for the shader */
+ PIPE_ALIGN_VAR(16) struct lp_rast_shader_inputs inputs;
+
+ int step[3][16];
+
#ifdef DEBUG
float v[3][2];
#endif
- /* one-pixel sized trivial accept offsets for each plane */
- int ei1;
- int ei2;
- int ei3;
-
- /* one-pixel sized trivial reject offsets for each plane */
- int eo1;
- int eo2;
- int eo3;
-
- /* y deltas for vertex pairs (in fixed pt) */
- int dy12;
- int dy23;
- int dy31;
-
- /* x deltas for vertex pairs (in fixed pt) */
- int dx12;
- int dx23;
- int dx31;
-
- /* edge function values at minx,miny ?? */
- int c1, c2, c3;
-
- /* inputs for the shader */
- PIPE_ALIGN_VAR(16) struct lp_rast_shader_inputs inputs;
+ struct lp_rast_plane plane[7]; /* NOTE: may allocate fewer planes */
};
@@ -153,7 +149,10 @@ lp_rast_finish( struct lp_rasterizer *rast );
union lp_rast_cmd_arg {
const struct lp_rast_shader_inputs *shade_tile;
- const struct lp_rast_triangle *triangle;
+ struct {
+ const struct lp_rast_triangle *tri;
+ unsigned plane_mask;
+ } triangle;
const struct lp_rast_state *set_state;
uint8_t clear_color[4];
const struct lp_rast_clearzs *clear_zstencil;
@@ -173,10 +172,12 @@ lp_rast_arg_inputs( const struct lp_rast_shader_inputs *shade_tile )
}
static INLINE union lp_rast_cmd_arg
-lp_rast_arg_triangle( const struct lp_rast_triangle *triangle )
+lp_rast_arg_triangle( const struct lp_rast_triangle *triangle,
+ unsigned plane_mask)
{
union lp_rast_cmd_arg arg;
- arg.triangle = triangle;
+ arg.triangle.tri = triangle;
+ arg.triangle.plane_mask = plane_mask;
return arg;
}
@@ -226,19 +227,31 @@ void lp_rast_clear_color( struct lp_rasterizer_task *,
void lp_rast_clear_zstencil( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
-void lp_rast_set_state( struct lp_rasterizer_task *,
- const union lp_rast_cmd_arg );
-
-void lp_rast_triangle( struct lp_rasterizer_task *,
- const union lp_rast_cmd_arg );
+void lp_rast_triangle_1( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_2( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_3( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_4( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_5( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_6( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_7( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
void lp_rast_shade_tile( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
+void lp_rast_shade_tile_opaque( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
void lp_rast_fence( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
-void lp_rast_store_color( struct lp_rasterizer_task *,
+void lp_rast_store_linear_color( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index eb4175dfa6..8a884177c1 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -53,8 +53,6 @@ struct lp_rasterizer_task
uint8_t *color_tiles[PIPE_MAX_COLOR_BUFS];
uint8_t *depth_tile;
- const struct lp_rast_state *current_state;
-
/** "back" pointer */
struct lp_rasterizer *rast;
@@ -119,10 +117,12 @@ struct lp_rasterizer
};
-void lp_rast_shade_quads( struct lp_rasterizer_task *task,
- const struct lp_rast_shader_inputs *inputs,
- unsigned x, unsigned y,
- int32_t c1, int32_t c2, int32_t c3);
+void
+lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
+ const struct lp_rast_shader_inputs *inputs,
+ unsigned x, unsigned y,
+ unsigned mask);
+
/**
@@ -142,10 +142,13 @@ lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
assert((x % TILE_VECTOR_WIDTH) == 0);
assert((y % TILE_VECTOR_HEIGHT) == 0);
- if (!rast->zsbuf.map && (task->current_state->variant->key.depth.enabled ||
- task->current_state->variant->key.stencil[0].enabled)) {
- /* out of memory - use dummy tile memory */
- return lp_get_dummy_tile();
+ if (!rast->zsbuf.map) {
+ /* Either out of memory or no zsbuf. Can't tell without access
+ * to the state. Just use dummy tile memory, but don't print
+ * the oom warning as this most likely because there is no
+ * zsbuf.
+ */
+ return lp_get_dummy_tile_silent();
}
depth = (rast->zsbuf.map +
@@ -158,6 +161,40 @@ lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
/**
+ * Get pointer to the swizzled color tile
+ */
+static INLINE uint8_t *
+lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task,
+ unsigned buf, enum lp_texture_usage usage)
+{
+ struct lp_rasterizer *rast = task->rast;
+
+ assert(task->x % TILE_SIZE == 0);
+ assert(task->y % TILE_SIZE == 0);
+ assert(buf < rast->state.nr_cbufs);
+
+ if (!task->color_tiles[buf]) {
+ struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf];
+ struct llvmpipe_resource *lpt;
+ assert(cbuf);
+ lpt = llvmpipe_resource(cbuf->texture);
+ task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt,
+ cbuf->face + cbuf->zslice,
+ cbuf->level,
+ usage,
+ task->x,
+ task->y);
+ if (!task->color_tiles[buf]) {
+ /* out of memory - use dummy tile memory */
+ return lp_get_dummy_tile();
+ }
+ }
+
+ return task->color_tiles[buf];
+}
+
+
+/**
* Get the pointer to a 4x4 color block (within a 64x64 tile).
* We'll map the color buffer on demand here.
* Note that this may be called even when there's no color buffers - return
@@ -174,6 +211,7 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
assert((x % TILE_VECTOR_WIDTH) == 0);
assert((y % TILE_VECTOR_HEIGHT) == 0);
+ color = lp_rast_get_color_tile_pointer(task, buf, LP_TEX_USAGE_READ_WRITE);
color = task->color_tiles[buf];
if (!color) {
/* out of memory - use dummy tile memory */
@@ -203,7 +241,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
unsigned x, unsigned y )
{
const struct lp_rasterizer *rast = task->rast;
- const struct lp_rast_state *state = task->current_state;
+ const struct lp_rast_state *state = inputs->state;
struct lp_fragment_shader_variant *variant = state->variant;
uint8_t *color[PIPE_MAX_COLOR_BUFS];
void *depth;
@@ -217,15 +255,15 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
/* run shader on 4x4 block */
variant->jit_function[RAST_WHOLE]( &state->jit_context,
- x, y,
- inputs->facing,
- inputs->a0,
- inputs->dadx,
- inputs->dady,
- color,
- depth,
- INT_MIN, INT_MIN, INT_MIN,
- NULL, NULL, NULL, &task->vis_counter );
+ x, y,
+ inputs->facing,
+ inputs->a0,
+ inputs->dadx,
+ inputs->dady,
+ color,
+ depth,
+ 0xffff,
+ &task->vis_counter );
}
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index a5f0d14c95..ebe9a8e92b 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -113,168 +113,31 @@ block_full_16(struct lp_rasterizer_task *task,
block_full_4(task, tri, x + ix, y + iy);
}
+#define TAG(x) x##_1
+#define NR_PLANES 1
+#include "lp_rast_tri_tmp.h"
-/**
- * Pass the 4x4 pixel block to the shader function.
- * Determination of which of the 16 pixels lies inside the triangle
- * will be done as part of the fragment shader.
- */
-static void
-do_block_4(struct lp_rasterizer_task *task,
- const struct lp_rast_triangle *tri,
- int x, int y,
- int c1, int c2, int c3)
-{
- assert(x >= 0);
- assert(y >= 0);
-
- lp_rast_shade_quads(task, &tri->inputs, x, y, -c1, -c2, -c3);
-}
-
-
-/**
- * Evaluate a 16x16 block of pixels to determine which 4x4 subblocks are in/out
- * of the triangle's bounds.
- */
-static void
-do_block_16(struct lp_rasterizer_task *task,
- const struct lp_rast_triangle *tri,
- int x, int y,
- int c0, int c1, int c2)
-{
- unsigned mask = 0;
- int eo[3];
- int c[3];
- int i, j;
-
- assert(x >= 0);
- assert(y >= 0);
- assert(x % 16 == 0);
- assert(y % 16 == 0);
-
- eo[0] = tri->eo1 * 4;
- eo[1] = tri->eo2 * 4;
- eo[2] = tri->eo3 * 4;
-
- c[0] = c0;
- c[1] = c1;
- c[2] = c2;
-
- for (j = 0; j < 3; j++) {
- const int *step = tri->inputs.step[j];
- const int cx = c[j] + eo[j];
-
- /* Mask has bits set whenever we are outside any of the edges.
- */
- for (i = 0; i < 16; i++) {
- int out = cx + step[i] * 4;
- mask |= (out >> 31) & (1 << i);
- }
- }
+#define TAG(x) x##_2
+#define NR_PLANES 2
+#include "lp_rast_tri_tmp.h"
- mask = ~mask & 0xffff;
- while (mask) {
- int i = ffs(mask) - 1;
- int px = x + pos_table4[i][0];
- int py = y + pos_table4[i][1];
- int cx1 = c0 + tri->inputs.step[0][i] * 4;
- int cx2 = c1 + tri->inputs.step[1][i] * 4;
- int cx3 = c2 + tri->inputs.step[2][i] * 4;
+#define TAG(x) x##_3
+#define NR_PLANES 3
+#include "lp_rast_tri_tmp.h"
- mask &= ~(1 << i);
+#define TAG(x) x##_4
+#define NR_PLANES 4
+#include "lp_rast_tri_tmp.h"
- /* Don't bother testing if the 4x4 block is entirely in/out of
- * the triangle. It's a little faster to do it in the jit code.
- */
- LP_COUNT(nr_non_empty_4);
- do_block_4(task, tri, px, py, cx1, cx2, cx3);
- }
-}
-
-
-/**
- * Scan the tile in chunks and figure out which pixels to rasterize
- * for this triangle.
- */
-void
-lp_rast_triangle(struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
-{
- const struct lp_rast_triangle *tri = arg.triangle;
- const int x = task->x, y = task->y;
- int ei[3], eo[3], c[3];
- unsigned outmask, inmask, partial_mask;
- unsigned i, j;
-
- c[0] = tri->c1 + tri->dx12 * y - tri->dy12 * x;
- c[1] = tri->c2 + tri->dx23 * y - tri->dy23 * x;
- c[2] = tri->c3 + tri->dx31 * y - tri->dy31 * x;
-
- eo[0] = tri->eo1 * 16;
- eo[1] = tri->eo2 * 16;
- eo[2] = tri->eo3 * 16;
-
- ei[0] = tri->ei1 * 16;
- ei[1] = tri->ei2 * 16;
- ei[2] = tri->ei3 * 16;
-
- outmask = 0;
- inmask = 0xffff;
+#define TAG(x) x##_5
+#define NR_PLANES 5
+#include "lp_rast_tri_tmp.h"
- for (j = 0; j < 3; j++) {
- const int *step = tri->inputs.step[j];
- const int cox = c[j] + eo[j];
- const int cio = ei[j]- eo[j];
+#define TAG(x) x##_6
+#define NR_PLANES 6
+#include "lp_rast_tri_tmp.h"
- /* Outmask has bits set whenever we are outside any of the
- * edges.
- */
- /* Inmask has bits set whenever we are inside all of the edges.
- */
- for (i = 0; i < 16; i++) {
- int out = cox + step[i] * 16;
- int in = out + cio;
- outmask |= (out >> 31) & (1 << i);
- inmask &= ~((in >> 31) & (1 << i));
- }
- }
+#define TAG(x) x##_7
+#define NR_PLANES 7
+#include "lp_rast_tri_tmp.h"
- assert((outmask & inmask) == 0);
-
- if (outmask == 0xffff)
- return;
-
- /* Invert mask, so that bits are set whenever we are at least
- * partially inside all of the edges:
- */
- partial_mask = ~inmask & ~outmask & 0xffff;
-
- /* Iterate over partials:
- */
- while (partial_mask) {
- int i = ffs(partial_mask) - 1;
- int px = x + pos_table16[i][0];
- int py = y + pos_table16[i][1];
- int cx1 = c[0] + tri->inputs.step[0][i] * 16;
- int cx2 = c[1] + tri->inputs.step[1][i] * 16;
- int cx3 = c[2] + tri->inputs.step[2][i] * 16;
-
- partial_mask &= ~(1 << i);
-
- LP_COUNT(nr_partially_covered_16);
- do_block_16(task, tri, px, py, cx1, cx2, cx3);
- }
-
- /* Iterate over fulls:
- */
- while (inmask) {
- int i = ffs(inmask) - 1;
- int px = x + pos_table16[i][0];
- int py = y + pos_table16[i][1];
-
- inmask &= ~(1 << i);
-
- LP_COUNT(nr_fully_covered_16);
- block_full_16(task, tri, px, py);
- }
-}
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
new file mode 100644
index 0000000000..a410c611a3
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
@@ -0,0 +1,238 @@
+/**************************************************************************
+ *
+ * Copyright 2007-2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Rasterization for binned triangles within a tile
+ */
+
+
+
+/**
+ * Prototype for a 7 plane rasterizer function. Will codegenerate
+ * several of these.
+ *
+ * XXX: Varients for more/fewer planes.
+ * XXX: Need ways of dropping planes as we descend.
+ * XXX: SIMD
+ */
+static void
+TAG(do_block_4)(struct lp_rasterizer_task *task,
+ const struct lp_rast_triangle *tri,
+ const struct lp_rast_plane *plane,
+ int x, int y,
+ const int *c)
+{
+ unsigned mask = 0;
+ int i;
+
+ for (i = 0; i < 16; i++) {
+ int any_negative = 0;
+ int j;
+
+ for (j = 0; j < NR_PLANES; j++)
+ any_negative |= (c[j] - 1 + plane[j].step[i]);
+
+ any_negative >>= 31;
+
+ mask |= (~any_negative) & (1 << i);
+ }
+
+ /* Now pass to the shader:
+ */
+ if (mask)
+ lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask);
+}
+
+/**
+ * Evaluate a 16x16 block of pixels to determine which 4x4 subblocks are in/out
+ * of the triangle's bounds.
+ */
+static void
+TAG(do_block_16)(struct lp_rasterizer_task *task,
+ const struct lp_rast_triangle *tri,
+ const struct lp_rast_plane *plane,
+ int x, int y,
+ const int *c)
+{
+ unsigned outmask, inmask, partmask, partial_mask;
+ unsigned i, j;
+
+ outmask = 0; /* outside one or more trivial reject planes */
+ partmask = 0; /* outside one or more trivial accept planes */
+
+ for (j = 0; j < NR_PLANES; j++) {
+ const int *step = plane[j].step;
+ const int eo = plane[j].eo * 4;
+ const int ei = plane[j].ei * 4;
+ const int cox = c[j] + eo;
+ const int cio = ei - 1 - eo;
+
+ for (i = 0; i < 16; i++) {
+ int out = cox + step[i] * 4;
+ int part = out + cio;
+ outmask |= (out >> 31) & (1 << i);
+ partmask |= (part >> 31) & (1 << i);
+ }
+ }
+
+ if (outmask == 0xffff)
+ return;
+
+ /* Mask of sub-blocks which are inside all trivial accept planes:
+ */
+ inmask = ~partmask & 0xffff;
+
+ /* Mask of sub-blocks which are inside all trivial reject planes,
+ * but outside at least one trivial accept plane:
+ */
+ partial_mask = partmask & ~outmask;
+
+ assert((partial_mask & inmask) == 0);
+
+ /* Iterate over partials:
+ */
+ while (partial_mask) {
+ int i = ffs(partial_mask) - 1;
+ int px = x + pos_table4[i][0];
+ int py = y + pos_table4[i][1];
+ int cx[NR_PLANES];
+
+ for (j = 0; j < NR_PLANES; j++)
+ cx[j] = c[j] + plane[j].step[i] * 4;
+
+ partial_mask &= ~(1 << i);
+
+ TAG(do_block_4)(task, tri, plane, px, py, cx);
+ }
+
+ /* Iterate over fulls:
+ */
+ while (inmask) {
+ int i = ffs(inmask) - 1;
+ int px = x + pos_table4[i][0];
+ int py = y + pos_table4[i][1];
+
+ inmask &= ~(1 << i);
+
+ block_full_4(task, tri, px, py);
+ }
+}
+
+
+/**
+ * Scan the tile in chunks and figure out which pixels to rasterize
+ * for this triangle.
+ */
+void
+TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ const struct lp_rast_triangle *tri = arg.triangle.tri;
+ unsigned plane_mask = arg.triangle.plane_mask;
+ const int x = task->x, y = task->y;
+ struct lp_rast_plane plane[NR_PLANES];
+ int c[NR_PLANES];
+ unsigned outmask, inmask, partmask, partial_mask;
+ unsigned i, j, nr_planes = 0;
+
+ while (plane_mask) {
+ int i = ffs(plane_mask) - 1;
+ plane[nr_planes] = tri->plane[i];
+ plane_mask &= ~(1 << i);
+ nr_planes++;
+ };
+
+ assert(nr_planes == NR_PLANES);
+ outmask = 0; /* outside one or more trivial reject planes */
+ partmask = 0; /* outside one or more trivial accept planes */
+
+ for (j = 0; j < NR_PLANES; j++) {
+ const int *step = plane[j].step;
+ const int eo = plane[j].eo * 16;
+ const int ei = plane[j].ei * 16;
+ int cox, cio;
+
+ c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
+ cox = c[j] + eo;
+ cio = ei - 1 - eo;
+
+ for (i = 0; i < 16; i++) {
+ int out = cox + step[i] * 16;
+ int part = out + cio;
+ outmask |= (out >> 31) & (1 << i);
+ partmask |= (part >> 31) & (1 << i);
+ }
+ }
+
+ if (outmask == 0xffff)
+ return;
+
+ /* Mask of sub-blocks which are inside all trivial accept planes:
+ */
+ inmask = ~partmask & 0xffff;
+
+ /* Mask of sub-blocks which are inside all trivial reject planes,
+ * but outside at least one trivial accept plane:
+ */
+ partial_mask = partmask & ~outmask;
+
+ assert((partial_mask & inmask) == 0);
+
+ /* Iterate over partials:
+ */
+ while (partial_mask) {
+ int i = ffs(partial_mask) - 1;
+ int px = x + pos_table16[i][0];
+ int py = y + pos_table16[i][1];
+ int cx[NR_PLANES];
+
+ for (j = 0; j < NR_PLANES; j++)
+ cx[j] = c[j] + plane[j].step[i] * 16;
+
+ partial_mask &= ~(1 << i);
+
+ LP_COUNT(nr_partially_covered_16);
+ TAG(do_block_16)(task, tri, plane, px, py, cx);
+ }
+
+ /* Iterate over fulls:
+ */
+ while (inmask) {
+ int i = ffs(inmask) - 1;
+ int px = x + pos_table16[i][0];
+ int py = y + pos_table16[i][1];
+
+ inmask &= ~(1 << i);
+
+ LP_COUNT(nr_fully_covered_16);
+ block_full_16(task, tri, px, py);
+ }
+}
+
+#undef TAG
+#undef NR_PLANES
+
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c
index e8d36bbdc5..f88a759fe7 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.c
+++ b/src/gallium/drivers/llvmpipe/lp_scene.c
@@ -32,6 +32,7 @@
#include "util/u_simple_list.h"
#include "lp_scene.h"
#include "lp_scene_queue.h"
+#include "lp_fence.h"
/** List of texture references */
@@ -198,6 +199,8 @@ lp_scene_reset(struct lp_scene *scene )
make_empty_list(ref_list);
}
+ lp_fence_reference(&scene->fence, NULL);
+
scene->scene_size = 0;
scene->has_color_clear = FALSE;
@@ -303,60 +306,6 @@ lp_scene_is_resource_referenced(const struct lp_scene *scene,
}
-/**
- * Return last command in the bin
- */
-static lp_rast_cmd
-lp_get_last_command( const struct cmd_bin *bin )
-{
- const struct cmd_block *tail = bin->commands.tail;
- const unsigned i = tail->count;
- if (i > 0)
- return tail->cmd[i - 1];
- else
- return NULL;
-}
-
-
-/**
- * Replace the arg of the last command in the bin.
- */
-static void
-lp_replace_last_command_arg( struct cmd_bin *bin,
- const union lp_rast_cmd_arg arg )
-{
- struct cmd_block *tail = bin->commands.tail;
- const unsigned i = tail->count;
- assert(i > 0);
- tail->arg[i - 1] = arg;
-}
-
-
-
-/**
- * Put a state-change command into all bins.
- * If we find that the last command in a bin was also a state-change
- * command, we can simply replace that one with the new one.
- */
-void
-lp_scene_bin_state_command( struct lp_scene *scene,
- lp_rast_cmd cmd,
- const union lp_rast_cmd_arg arg )
-{
- unsigned i, j;
- for (i = 0; i < scene->tiles_x; i++) {
- for (j = 0; j < scene->tiles_y; j++) {
- struct cmd_bin *bin = lp_scene_get_bin(scene, i, j);
- lp_rast_cmd last_cmd = lp_get_last_command(bin);
- if (last_cmd == cmd) {
- lp_replace_last_command_arg(bin, arg);
- }
- else {
- lp_scene_bin_command( scene, i, j, cmd, arg );
- }
- }
- }
-}
/** advance curr_x,y to the next bin */
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h
index 4e55d43174..fa1b311fa1 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.h
+++ b/src/gallium/drivers/llvmpipe/lp_scene.h
@@ -112,6 +112,7 @@ struct resource_ref {
*/
struct lp_scene {
struct pipe_context *pipe;
+ struct lp_fence *fence;
/** the framebuffer to render the scene into */
struct pipe_framebuffer_state fb;
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index edcab0f8d9..f7f1635ef9 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -43,6 +43,7 @@
#include "lp_debug.h"
#include "lp_public.h"
#include "lp_limits.h"
+#include "lp_rast.h"
#include "state_tracker/sw_winsys.h"
@@ -296,11 +297,16 @@ llvmpipe_destroy_screen( struct pipe_screen *_screen )
struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
struct sw_winsys *winsys = screen->winsys;
+ if (screen->rast)
+ lp_rast_destroy(screen->rast);
+
lp_jit_screen_cleanup(screen);
if(winsys->destroy)
winsys->destroy(winsys);
+ pipe_mutex_destroy(screen->rast_mutex);
+
FREE(screen);
}
@@ -349,11 +355,6 @@ llvmpipe_create_screen(struct sw_winsys *winsys)
lp_jit_screen_init(screen);
-#ifdef PIPE_OS_WINDOWS
- /* Multithreading not supported on windows until conditions and barriers are
- * properly implemented. */
- screen->num_threads = 0;
-#else
#ifdef PIPE_OS_EMBEDDED
screen->num_threads = 0;
#else
@@ -361,7 +362,14 @@ llvmpipe_create_screen(struct sw_winsys *winsys)
#endif
screen->num_threads = debug_get_num_option("LP_NUM_THREADS", screen->num_threads);
screen->num_threads = MIN2(screen->num_threads, LP_MAX_THREADS);
-#endif
+
+ screen->rast = lp_rast_create(screen->num_threads);
+ if (!screen->rast) {
+ lp_jit_screen_cleanup(screen);
+ FREE(screen);
+ return NULL;
+ }
+ pipe_mutex_init(screen->rast_mutex);
util_format_s3tc_init();
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.h b/src/gallium/drivers/llvmpipe/lp_screen.h
index eb40f6823f..731526dfab 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.h
+++ b/src/gallium/drivers/llvmpipe/lp_screen.h
@@ -37,6 +37,7 @@
#include "gallivm/lp_bld.h"
#include <llvm-c/ExecutionEngine.h>
+#include "os/os_thread.h"
#include "pipe/p_screen.h"
#include "pipe/p_defines.h"
@@ -63,6 +64,9 @@ struct llvmpipe_screen
/* Increments whenever textures are modified. Contexts can track this.
*/
unsigned timestamp;
+
+ struct lp_rasterizer *rast;
+ pipe_mutex rast_mutex;
};
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index 2bd6fcebe7..7d48ad8e74 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -152,8 +152,11 @@ static void
lp_setup_rasterize_scene( struct lp_setup_context *setup )
{
struct lp_scene *scene = lp_setup_get_current_scene(setup);
+ struct llvmpipe_screen *screen = llvmpipe_screen(scene->pipe->screen);
- lp_scene_rasterize(scene, setup->rast);
+ pipe_mutex_lock(screen->rast_mutex);
+ lp_scene_rasterize(scene, screen->rast);
+ pipe_mutex_unlock(screen->rast_mutex);
reset_context( setup );
@@ -271,7 +274,8 @@ set_scene_state( struct lp_setup_context *setup,
*/
void
lp_setup_flush( struct lp_setup_context *setup,
- unsigned flags )
+ unsigned flags,
+ struct pipe_fence_handle **fence)
{
LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
@@ -286,8 +290,17 @@ lp_setup_flush( struct lp_setup_context *setup,
* data to linear in the texture_unmap() function, which will
* not be a parallel/threaded operation as here.
*/
- lp_scene_bin_everywhere(scene, lp_rast_store_color, dummy);
+ lp_scene_bin_everywhere(scene, lp_rast_store_linear_color, dummy);
+ }
+
+
+ if (fence) {
+ /* if we're going to flush the setup/rasterization modules, emit
+ * a fence.
+ */
+ *fence = lp_setup_fence( setup );
}
+
}
set_scene_state( setup, SETUP_FLUSHED );
@@ -433,24 +446,27 @@ lp_setup_clear( struct lp_setup_context *setup,
struct pipe_fence_handle *
lp_setup_fence( struct lp_setup_context *setup )
{
- if (setup->num_threads == 0) {
+ if (setup->scene == NULL)
return NULL;
- }
- else {
+ else if (setup->num_threads == 0)
+ return NULL;
+ else
+ {
struct lp_scene *scene = lp_setup_get_current_scene(setup);
- const unsigned rank = lp_scene_get_num_bins( scene ); /* xxx */
- struct lp_fence *fence = lp_fence_create(rank);
-
- LP_DBG(DEBUG_SETUP, "%s rank %u\n", __FUNCTION__, rank);
+ const unsigned rank = setup->num_threads;
set_scene_state( setup, SETUP_ACTIVE );
+
+ assert(scene->fence == NULL);
- /* insert the fence into all command bins */
- lp_scene_bin_everywhere( scene,
- lp_rast_fence,
- lp_rast_arg_fence(fence) );
+ /* The caller gets a reference, we keep a copy too, so need to
+ * bump the refcount:
+ */
+ lp_fence_reference(&scene->fence, lp_fence_create(rank));
+
+ LP_DBG(DEBUG_SETUP, "%s rank %u\n", __FUNCTION__, rank);
- return (struct pipe_fence_handle *) fence;
+ return (struct pipe_fence_handle *) scene->fence;
}
}
@@ -739,28 +755,6 @@ lp_setup_update_state( struct lp_setup_context *setup )
setup->dirty |= LP_SETUP_NEW_FS;
}
- if (setup->dirty & LP_SETUP_NEW_SCISSOR) {
- float *stored;
-
- stored = lp_scene_alloc_aligned(scene, 4 * sizeof(int32_t), 16);
-
- if (stored) {
- stored[0] = (float) setup->scissor.current.minx;
- stored[1] = (float) setup->scissor.current.miny;
- stored[2] = (float) setup->scissor.current.maxx;
- stored[3] = (float) setup->scissor.current.maxy;
-
- setup->scissor.stored = stored;
-
- setup->fs.current.jit_context.scissor_xmin = stored[0];
- setup->fs.current.jit_context.scissor_ymin = stored[1];
- setup->fs.current.jit_context.scissor_xmax = stored[2];
- setup->fs.current.jit_context.scissor_ymax = stored[3];
- }
-
- setup->dirty |= LP_SETUP_NEW_FS;
- }
-
if(setup->dirty & LP_SETUP_NEW_CONSTANTS) {
struct pipe_resource *buffer = setup->constants.current;
@@ -814,11 +808,6 @@ lp_setup_update_state( struct lp_setup_context *setup )
&setup->fs.current,
sizeof setup->fs.current);
setup->fs.stored = stored;
-
- /* put the state-set command into all bins */
- lp_scene_bin_state_command( scene,
- lp_rast_set_state,
- lp_rast_arg_state(setup->fs.stored) );
}
/* The scene now references the textures in the rasterization
@@ -865,8 +854,6 @@ lp_setup_destroy( struct lp_setup_context *setup )
lp_scene_queue_destroy(setup->empty_scenes);
- lp_rast_destroy( setup->rast );
-
FREE( setup );
}
@@ -893,13 +880,7 @@ lp_setup_create( struct pipe_context *pipe,
if (!setup->empty_scenes)
goto fail;
- /* XXX: move this to the screen and share between contexts:
- */
setup->num_threads = screen->num_threads;
- setup->rast = lp_rast_create(screen->num_threads);
- if (!setup->rast)
- goto fail;
-
setup->vbuf = draw_vbuf_stage(draw, &setup->base);
if (!setup->vbuf)
goto fail;
@@ -923,9 +904,6 @@ lp_setup_create( struct pipe_context *pipe,
return setup;
fail:
- if (setup->rast)
- lp_rast_destroy( setup->rast );
-
if (setup->vbuf)
;
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h
index 6a0dc55129..73b1c85325 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup.h
@@ -84,7 +84,8 @@ lp_setup_fence( struct lp_setup_context *setup );
void
lp_setup_flush( struct lp_setup_context *setup,
- unsigned flags );
+ unsigned flags,
+ struct pipe_fence_handle **fence);
void
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h
index 8f4e00f073..a0606f5034 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -81,7 +81,6 @@ struct lp_setup_context
*/
struct draw_stage *vbuf;
unsigned num_threads;
- struct lp_rasterizer *rast;
struct lp_scene *scenes[MAX_SCENES]; /**< all the scenes */
struct lp_scene *scene; /**< current scene being built */
struct lp_scene_queue *empty_scenes; /**< queue of empty scenes */
@@ -130,7 +129,6 @@ struct lp_setup_context
struct {
struct pipe_scissor_state current;
- const void *stored;
} scissor;
unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 4e2e17f77b..4ceb789b77 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -38,12 +38,78 @@
#define NUM_CHANNELS 4
+struct tri_info {
+
+ float pixel_offset;
+
+ /* fixed point vertex coordinates */
+ int x[3];
+ int y[3];
+
+ /* float x,y deltas - all from the original coordinates
+ */
+ float dy01, dy20;
+ float dx01, dx20;
+ float oneoverarea;
+
+ const float (*v0)[4];
+ const float (*v1)[4];
+ const float (*v2)[4];
+
+ boolean frontfacing;
+};
+
+
+
+static const int step_scissor_minx[16] = {
+ 0, 1, 0, 1,
+ 2, 3, 2, 3,
+ 0, 1, 0, 1,
+ 2, 3, 2, 3
+};
+
+static const int step_scissor_maxx[16] = {
+ 0, -1, 0, -1,
+ -2, -3, -2, -3,
+ 0, -1, 0, -1,
+ -2, -3, -2, -3
+};
+
+static const int step_scissor_miny[16] = {
+ 0, 0, 1, 1,
+ 0, 0, 1, 1,
+ 2, 2, 3, 3,
+ 2, 2, 3, 3
+};
+
+static const int step_scissor_maxy[16] = {
+ 0, 0, -1, -1,
+ 0, 0, -1, -1,
+ -2, -2, -3, -3,
+ -2, -2, -3, -3
+};
+
+
+
+
+static INLINE int
+subpixel_snap(float a)
+{
+ return util_iround(FIXED_ONE * a);
+}
+
+static INLINE float
+fixed_to_float(int a)
+{
+ return a * (1.0 / FIXED_ONE);
+}
+
+
/**
* Compute a0 for a constant-valued coefficient (GL_FLAT shading).
*/
-static void constant_coef( struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
+static void constant_coef( struct lp_rast_triangle *tri,
unsigned slot,
const float value,
unsigned i )
@@ -54,28 +120,21 @@ static void constant_coef( struct lp_setup_context *setup,
}
-/**
- * Compute a0, dadx and dady for a linearly interpolated coefficient,
- * for a triangle.
- */
-static void linear_coef( struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
- float oneoverarea,
+
+static void linear_coef( struct lp_rast_triangle *tri,
+ const struct tri_info *info,
unsigned slot,
- const float (*v1)[4],
- const float (*v2)[4],
- const float (*v3)[4],
unsigned vert_attr,
unsigned i)
{
- float a1 = v1[vert_attr][i];
- float a2 = v2[vert_attr][i];
- float a3 = v3[vert_attr][i];
+ float a0 = info->v0[vert_attr][i];
+ float a1 = info->v1[vert_attr][i];
+ float a2 = info->v2[vert_attr][i];
- float da12 = a1 - a2;
- float da31 = a3 - a1;
- float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea;
- float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea;
+ float da01 = a0 - a1;
+ float da20 = a2 - a0;
+ float dadx = (da01 * info->dy20 - info->dy01 * da20) * info->oneoverarea;
+ float dady = (da20 * info->dx01 - info->dx20 * da01) * info->oneoverarea;
tri->inputs.dadx[slot][i] = dadx;
tri->inputs.dady[slot][i] = dady;
@@ -92,9 +151,9 @@ static void linear_coef( struct lp_setup_context *setup,
* to define a0 as the sample at a pixel center somewhere near vmin
* instead - i'll switch to this later.
*/
- tri->inputs.a0[slot][i] = (a1 -
- (dadx * (v1[0][0] - setup->pixel_offset) +
- dady * (v1[0][1] - setup->pixel_offset)));
+ tri->inputs.a0[slot][i] = (a0 -
+ (dadx * (info->v0[0][0] - info->pixel_offset) +
+ dady * (info->v0[0][1] - info->pixel_offset)));
}
@@ -106,31 +165,27 @@ static void linear_coef( struct lp_setup_context *setup,
* Later, when we compute the value at a particular fragment position we'll
* divide the interpolated value by the interpolated W at that fragment.
*/
-static void perspective_coef( struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
- float oneoverarea,
+static void perspective_coef( struct lp_rast_triangle *tri,
+ const struct tri_info *info,
unsigned slot,
- const float (*v1)[4],
- const float (*v2)[4],
- const float (*v3)[4],
unsigned vert_attr,
unsigned i)
{
/* premultiply by 1/w (v[0][3] is always 1/w):
*/
- float a1 = v1[vert_attr][i] * v1[0][3];
- float a2 = v2[vert_attr][i] * v2[0][3];
- float a3 = v3[vert_attr][i] * v3[0][3];
- float da12 = a1 - a2;
- float da31 = a3 - a1;
- float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea;
- float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea;
+ float a0 = info->v0[vert_attr][i] * info->v0[0][3];
+ float a1 = info->v1[vert_attr][i] * info->v1[0][3];
+ float a2 = info->v2[vert_attr][i] * info->v2[0][3];
+ float da01 = a0 - a1;
+ float da20 = a2 - a0;
+ float dadx = (da01 * info->dy20 - info->dy01 * da20) * info->oneoverarea;
+ float dady = (da20 * info->dx01 - info->dx20 * da01) * info->oneoverarea;
tri->inputs.dadx[slot][i] = dadx;
tri->inputs.dady[slot][i] = dady;
- tri->inputs.a0[slot][i] = (a1 -
- (dadx * (v1[0][0] - setup->pixel_offset) +
- dady * (v1[0][1] - setup->pixel_offset)));
+ tri->inputs.a0[slot][i] = (a0 -
+ (dadx * (info->v0[0][0] - info->pixel_offset) +
+ dady * (info->v0[0][1] - info->pixel_offset)));
}
@@ -141,13 +196,9 @@ static void perspective_coef( struct lp_setup_context *setup,
* We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
*/
static void
-setup_fragcoord_coef(struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
- float oneoverarea,
+setup_fragcoord_coef(struct lp_rast_triangle *tri,
+ const struct tri_info *info,
unsigned slot,
- const float (*v1)[4],
- const float (*v2)[4],
- const float (*v3)[4],
unsigned usage_mask)
{
/*X*/
@@ -166,12 +217,12 @@ setup_fragcoord_coef(struct lp_setup_context *setup,
/*Z*/
if (usage_mask & TGSI_WRITEMASK_Z) {
- linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 2);
+ linear_coef(tri, info, slot, 0, 2);
}
/*W*/
if (usage_mask & TGSI_WRITEMASK_W) {
- linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 3);
+ linear_coef(tri, info, slot, 0, 3);
}
}
@@ -180,24 +231,23 @@ setup_fragcoord_coef(struct lp_setup_context *setup,
* Setup the fragment input attribute with the front-facing value.
* \param frontface is the triangle front facing?
*/
-static void setup_facing_coef( struct lp_setup_context *setup,
- struct lp_rast_triangle *tri,
+static void setup_facing_coef( struct lp_rast_triangle *tri,
unsigned slot,
boolean frontface,
unsigned usage_mask)
{
/* convert TRUE to 1.0 and FALSE to -1.0 */
if (usage_mask & TGSI_WRITEMASK_X)
- constant_coef( setup, tri, slot, 2.0f * frontface - 1.0f, 0 );
+ constant_coef( tri, slot, 2.0f * frontface - 1.0f, 0 );
if (usage_mask & TGSI_WRITEMASK_Y)
- constant_coef( setup, tri, slot, 0.0f, 1 ); /* wasted */
+ constant_coef( tri, slot, 0.0f, 1 ); /* wasted */
if (usage_mask & TGSI_WRITEMASK_Z)
- constant_coef( setup, tri, slot, 0.0f, 2 ); /* wasted */
+ constant_coef( tri, slot, 0.0f, 2 ); /* wasted */
if (usage_mask & TGSI_WRITEMASK_W)
- constant_coef( setup, tri, slot, 0.0f, 3 ); /* wasted */
+ constant_coef( tri, slot, 0.0f, 3 ); /* wasted */
}
@@ -206,11 +256,7 @@ static void setup_facing_coef( struct lp_setup_context *setup,
*/
static void setup_tri_coefficients( struct lp_setup_context *setup,
struct lp_rast_triangle *tri,
- float oneoverarea,
- const float (*v1)[4],
- const float (*v2)[4],
- const float (*v3)[4],
- boolean frontface)
+ const struct tri_info *info)
{
unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
unsigned slot;
@@ -227,25 +273,25 @@ static void setup_tri_coefficients( struct lp_setup_context *setup,
if (setup->flatshade_first) {
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- constant_coef(setup, tri, slot+1, v1[vert_attr][i], i);
+ constant_coef(tri, slot+1, info->v0[vert_attr][i], i);
}
else {
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- constant_coef(setup, tri, slot+1, v3[vert_attr][i], i);
+ constant_coef(tri, slot+1, info->v2[vert_attr][i], i);
}
break;
case LP_INTERP_LINEAR:
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- linear_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i);
+ linear_coef(tri, info, slot+1, vert_attr, i);
break;
case LP_INTERP_PERSPECTIVE:
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
- perspective_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i);
+ perspective_coef(tri, info, slot+1, vert_attr, i);
fragcoord_usage_mask |= TGSI_WRITEMASK_W;
break;
@@ -259,7 +305,7 @@ static void setup_tri_coefficients( struct lp_setup_context *setup,
break;
case LP_INTERP_FACING:
- setup_facing_coef(setup, tri, slot+1, frontface, usage_mask);
+ setup_facing_coef(tri, slot+1, info->frontfacing, usage_mask);
break;
default:
@@ -269,16 +315,11 @@ static void setup_tri_coefficients( struct lp_setup_context *setup,
/* The internal position input is in slot zero:
*/
- setup_fragcoord_coef(setup, tri, oneoverarea, 0, v1, v2, v3,
- fragcoord_usage_mask);
+ setup_fragcoord_coef(tri, info, 0, fragcoord_usage_mask);
}
-static INLINE int subpixel_snap( float a )
-{
- return util_iround(FIXED_ONE * a - (FIXED_ONE / 2));
-}
@@ -291,21 +332,25 @@ static INLINE int subpixel_snap( float a )
* \return pointer to triangle space
*/
static INLINE struct lp_rast_triangle *
-alloc_triangle(struct lp_scene *scene, unsigned nr_inputs, unsigned *tri_size)
+alloc_triangle(struct lp_scene *scene,
+ unsigned nr_inputs,
+ unsigned nr_planes,
+ unsigned *tri_size)
{
unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float);
struct lp_rast_triangle *tri;
- unsigned bytes;
+ unsigned tri_bytes, bytes;
char *inputs;
assert(sizeof(*tri) % 16 == 0);
- bytes = sizeof(*tri) + (3 * input_array_sz);
+ tri_bytes = align(Offset(struct lp_rast_triangle, plane[nr_planes]), 16);
+ bytes = tri_bytes + (3 * input_array_sz);
tri = lp_scene_alloc_aligned( scene, bytes, 16 );
if (tri) {
- inputs = (char *) (tri + 1);
+ inputs = ((char *)tri) + tri_bytes;
tri->inputs.a0 = (float (*)[4]) inputs;
tri->inputs.dadx = (float (*)[4]) (inputs + input_array_sz);
tri->inputs.dady = (float (*)[4]) (inputs + 2 * input_array_sz);
@@ -329,52 +374,71 @@ print_triangle(struct lp_setup_context *setup,
uint i;
debug_printf("llvmpipe triangle\n");
- for (i = 0; i < setup->fs.nr_inputs; i++) {
+ for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
debug_printf(" v1[%d]: %f %f %f %f\n", i,
v1[i][0], v1[i][1], v1[i][2], v1[i][3]);
}
- for (i = 0; i < setup->fs.nr_inputs; i++) {
+ for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
debug_printf(" v2[%d]: %f %f %f %f\n", i,
v2[i][0], v2[i][1], v2[i][2], v2[i][3]);
}
- for (i = 0; i < setup->fs.nr_inputs; i++) {
+ for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
debug_printf(" v3[%d]: %f %f %f %f\n", i,
v3[i][0], v3[i][1], v3[i][2], v3[i][3]);
}
}
+lp_rast_cmd lp_rast_tri_tab[8] = {
+ NULL, /* should be impossible */
+ lp_rast_triangle_1,
+ lp_rast_triangle_2,
+ lp_rast_triangle_3,
+ lp_rast_triangle_4,
+ lp_rast_triangle_5,
+ lp_rast_triangle_6,
+ lp_rast_triangle_7
+};
+
/**
* Do basic setup for triangle rasterization and determine which
* framebuffer tiles are touched. Put the triangle in the scene's
* bins for the tiles which we overlap.
*/
-static void
+static void
do_triangle_ccw(struct lp_setup_context *setup,
const float (*v1)[4],
const float (*v2)[4],
const float (*v3)[4],
boolean frontfacing )
{
- /* x/y positions in fixed point */
- const int x1 = subpixel_snap(v1[0][0] + 0.5 - setup->pixel_offset);
- const int x2 = subpixel_snap(v2[0][0] + 0.5 - setup->pixel_offset);
- const int x3 = subpixel_snap(v3[0][0] + 0.5 - setup->pixel_offset);
- const int y1 = subpixel_snap(v1[0][1] + 0.5 - setup->pixel_offset);
- const int y2 = subpixel_snap(v2[0][1] + 0.5 - setup->pixel_offset);
- const int y3 = subpixel_snap(v3[0][1] + 0.5 - setup->pixel_offset);
struct lp_scene *scene = lp_setup_get_current_scene(setup);
+ struct lp_fragment_shader_variant *variant = setup->fs.current.variant;
struct lp_rast_triangle *tri;
+ struct tri_info info;
int area;
- float oneoverarea;
int minx, maxx, miny, maxy;
+ int ix0, ix1, iy0, iy1;
unsigned tri_bytes;
-
+ int i;
+ int nr_planes = 3;
+
if (0)
print_triangle(setup, v1, v2, v3);
- tri = alloc_triangle(scene, setup->fs.nr_inputs, &tri_bytes);
+ if (setup->scissor_test) {
+ nr_planes = 7;
+ }
+ else {
+ nr_planes = 3;
+ }
+
+
+ tri = alloc_triangle(scene,
+ setup->fs.nr_inputs,
+ nr_planes,
+ &tri_bytes);
if (!tri)
return;
@@ -387,15 +451,24 @@ do_triangle_ccw(struct lp_setup_context *setup,
tri->v[2][1] = v3[0][1];
#endif
- tri->dx12 = x1 - x2;
- tri->dx23 = x2 - x3;
- tri->dx31 = x3 - x1;
+ /* x/y positions in fixed point */
+ info.x[0] = subpixel_snap(v1[0][0] - setup->pixel_offset);
+ info.x[1] = subpixel_snap(v2[0][0] - setup->pixel_offset);
+ info.x[2] = subpixel_snap(v3[0][0] - setup->pixel_offset);
+ info.y[0] = subpixel_snap(v1[0][1] - setup->pixel_offset);
+ info.y[1] = subpixel_snap(v2[0][1] - setup->pixel_offset);
+ info.y[2] = subpixel_snap(v3[0][1] - setup->pixel_offset);
+
+ tri->plane[0].dcdy = info.x[0] - info.x[1];
+ tri->plane[1].dcdy = info.x[1] - info.x[2];
+ tri->plane[2].dcdy = info.x[2] - info.x[0];
- tri->dy12 = y1 - y2;
- tri->dy23 = y2 - y3;
- tri->dy31 = y3 - y1;
+ tri->plane[0].dcdx = info.y[0] - info.y[1];
+ tri->plane[1].dcdx = info.y[1] - info.y[2];
+ tri->plane[2].dcdx = info.y[2] - info.y[0];
- area = (tri->dx12 * tri->dy31 - tri->dx31 * tri->dy12);
+ area = (tri->plane[0].dcdy * tri->plane[2].dcdx -
+ tri->plane[2].dcdy * tri->plane[0].dcdx);
LP_COUNT(nr_tris);
@@ -410,20 +483,35 @@ do_triangle_ccw(struct lp_setup_context *setup,
}
/* Bounding rectangle (in pixels) */
- minx = (MIN3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER;
- maxx = (MAX3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER;
- miny = (MIN3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER;
- maxy = (MAX3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER;
-
+ {
+ /* Yes this is necessary to accurately calculate bounding boxes
+ * with the two fill-conventions we support. GL (normally) ends
+ * up needing a bottom-left fill convention, which requires
+ * slightly different rounding.
+ */
+ int adj = (setup->pixel_offset != 0) ? 1 : 0;
+
+ minx = (MIN3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER;
+ maxx = (MAX3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER;
+ miny = (MIN3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+ maxy = (MAX3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+ }
+
if (setup->scissor_test) {
minx = MAX2(minx, setup->scissor.current.minx);
maxx = MIN2(maxx, setup->scissor.current.maxx);
miny = MAX2(miny, setup->scissor.current.miny);
maxy = MIN2(maxy, setup->scissor.current.maxy);
}
+ else {
+ minx = MAX2(minx, 0);
+ miny = MAX2(miny, 0);
+ maxx = MIN2(maxx, scene->fb.width);
+ maxy = MIN2(maxy, scene->fb.height);
+ }
+
- if (miny == maxy ||
- minx == maxx) {
+ if (miny >= maxy || minx >= maxx) {
lp_scene_putback_data( scene, tri_bytes );
LP_COUNT(nr_culled_tris);
return;
@@ -431,75 +519,88 @@ do_triangle_ccw(struct lp_setup_context *setup,
/*
*/
- oneoverarea = ((float)FIXED_ONE) / (float)area;
+ info.pixel_offset = setup->pixel_offset;
+ info.v0 = v1;
+ info.v1 = v2;
+ info.v2 = v3;
+ info.dx01 = info.v0[0][0] - info.v1[0][0];
+ info.dx20 = info.v2[0][0] - info.v0[0][0];
+ info.dy01 = info.v0[0][1] - info.v1[0][1];
+ info.dy20 = info.v2[0][1] - info.v0[0][1];
+ info.oneoverarea = 1.0 / (info.dx01 * info.dy20 - info.dx20 * info.dy01);
+ info.frontfacing = frontfacing;
/* Setup parameter interpolants:
*/
- setup_tri_coefficients( setup, tri, oneoverarea, v1, v2, v3, frontfacing );
+ setup_tri_coefficients( setup, tri, &info );
tri->inputs.facing = frontfacing ? 1.0F : -1.0F;
+ tri->inputs.state = setup->fs.stored;
- /* half-edge constants, will be interated over the whole render target.
- */
- tri->c1 = tri->dy12 * x1 - tri->dx12 * y1;
- tri->c2 = tri->dy23 * x2 - tri->dx23 * y2;
- tri->c3 = tri->dy31 * x3 - tri->dx31 * y3;
- /* correct for top-left fill convention:
- */
- if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) tri->c1++;
- if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) tri->c2++;
- if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) tri->c3++;
-
- tri->dy12 *= FIXED_ONE;
- tri->dy23 *= FIXED_ONE;
- tri->dy31 *= FIXED_ONE;
-
- tri->dx12 *= FIXED_ONE;
- tri->dx23 *= FIXED_ONE;
- tri->dx31 *= FIXED_ONE;
-
- /* find trivial reject offsets for each edge for a single-pixel
- * sized block. These will be scaled up at each recursive level to
- * match the active blocksize. Scaling in this way works best if
- * the blocks are square.
- */
- tri->eo1 = 0;
- if (tri->dy12 < 0) tri->eo1 -= tri->dy12;
- if (tri->dx12 > 0) tri->eo1 += tri->dx12;
+
+ for (i = 0; i < 3; i++) {
+ struct lp_rast_plane *plane = &tri->plane[i];
- tri->eo2 = 0;
- if (tri->dy23 < 0) tri->eo2 -= tri->dy23;
- if (tri->dx23 > 0) tri->eo2 += tri->dx23;
+ /* half-edge constants, will be interated over the whole render
+ * target.
+ */
+ plane->c = plane->dcdx * info.x[i] - plane->dcdy * info.y[i];
+
+ /* correct for top-left vs. bottom-left fill convention.
+ *
+ * note that we're overloading gl_rasterization_rules to mean
+ * both (0.5,0.5) pixel centers *and* bottom-left filling
+ * convention.
+ *
+ * GL actually has a top-left filling convention, but GL's
+ * notion of "top" differs from gallium's...
+ *
+ * Also, sometimes (in FBO cases) GL will render upside down
+ * to its usual method, in which case it will probably want
+ * to use the opposite, top-left convention.
+ */
+ if (plane->dcdx < 0) {
+ /* both fill conventions want this - adjust for left edges */
+ plane->c++;
+ }
+ else if (plane->dcdx == 0) {
+ if (setup->pixel_offset == 0) {
+ /* correct for top-left fill convention:
+ */
+ if (plane->dcdy > 0) plane->c++;
+ }
+ else {
+ /* correct for bottom-left fill convention:
+ */
+ if (plane->dcdy < 0) plane->c++;
+ }
+ }
- tri->eo3 = 0;
- if (tri->dy31 < 0) tri->eo3 -= tri->dy31;
- if (tri->dx31 > 0) tri->eo3 += tri->dx31;
+ plane->dcdx *= FIXED_ONE;
+ plane->dcdy *= FIXED_ONE;
- /* Calculate trivial accept offsets from the above.
- */
- tri->ei1 = tri->dx12 - tri->dy12 - tri->eo1;
- tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2;
- tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3;
+ /* find trivial reject offsets for each edge for a single-pixel
+ * sized block. These will be scaled up at each recursive level to
+ * match the active blocksize. Scaling in this way works best if
+ * the blocks are square.
+ */
+ plane->eo = 0;
+ if (plane->dcdx < 0) plane->eo -= plane->dcdx;
+ if (plane->dcdy > 0) plane->eo += plane->dcdy;
- /* Fill in the inputs.step[][] arrays.
- * We've manually unrolled some loops here.
- */
- {
- const int xstep1 = -tri->dy12;
- const int xstep2 = -tri->dy23;
- const int xstep3 = -tri->dy31;
- const int ystep1 = tri->dx12;
- const int ystep2 = tri->dx23;
- const int ystep3 = tri->dx31;
-
-#define SETUP_STEP(i, x, y) \
- do { \
- tri->inputs.step[0][i] = x * xstep1 + y * ystep1; \
- tri->inputs.step[1][i] = x * xstep2 + y * ystep2; \
- tri->inputs.step[2][i] = x * xstep3 + y * ystep3; \
- } while (0)
+ /* Calculate trivial accept offsets from the above.
+ */
+ plane->ei = plane->dcdy - plane->dcdx - plane->eo;
+ plane->step = tri->step[i];
+
+ /* Fill in the inputs.step[][] arrays.
+ * We've manually unrolled some loops here.
+ */
+#define SETUP_STEP(j, x, y) \
+ tri->step[i][j] = y * plane->dcdy - x * plane->dcdx
+
SETUP_STEP(0, 0, 0);
SETUP_STEP(1, 1, 0);
SETUP_STEP(2, 0, 1);
@@ -522,63 +623,106 @@ do_triangle_ccw(struct lp_setup_context *setup,
#undef STEP
}
+
+ /*
+ * When rasterizing scissored tris, use the intersection of the
+ * triangle bounding box and the scissor rect to generate the
+ * scissor planes.
+ *
+ * This permits us to cut off the triangle "tails" that are present
+ * in the intermediate recursive levels caused when two of the
+ * triangles edges don't diverge quickly enough to trivially reject
+ * exterior blocks from the triangle.
+ *
+ * It's not really clear if it's worth worrying about these tails,
+ * but since we generate the planes for each scissored tri, it's
+ * free to trim them in this case.
+ *
+ * Note that otherwise, the scissor planes only vary in 'C' value,
+ * and even then only on state-changes. Could alternatively store
+ * these planes elsewhere.
+ */
+ if (nr_planes == 7) {
+ tri->plane[3].step = step_scissor_minx;
+ tri->plane[3].dcdx = -1;
+ tri->plane[3].dcdy = 0;
+ tri->plane[3].c = 1-minx;
+ tri->plane[3].ei = 0;
+ tri->plane[3].eo = 1;
+
+ tri->plane[4].step = step_scissor_maxx;
+ tri->plane[4].dcdx = 1;
+ tri->plane[4].dcdy = 0;
+ tri->plane[4].c = maxx;
+ tri->plane[4].ei = -1;
+ tri->plane[4].eo = 0;
+
+ tri->plane[5].step = step_scissor_miny;
+ tri->plane[5].dcdx = 0;
+ tri->plane[5].dcdy = 1;
+ tri->plane[5].c = 1-miny;
+ tri->plane[5].ei = 0;
+ tri->plane[5].eo = 1;
+
+ tri->plane[6].step = step_scissor_maxy;
+ tri->plane[6].dcdx = 0;
+ tri->plane[6].dcdy = -1;
+ tri->plane[6].c = maxy;
+ tri->plane[6].ei = -1;
+ tri->plane[6].eo = 0;
+ }
+
+
/*
* All fields of 'tri' are now set. The remaining code here is
* concerned with binning.
*/
- /* Convert to tile coordinates:
+ /* Convert to tile coordinates, and inclusive ranges:
*/
- minx = minx / TILE_SIZE;
- miny = miny / TILE_SIZE;
- maxx = maxx / TILE_SIZE;
- maxy = maxy / TILE_SIZE;
+ ix0 = minx / TILE_SIZE;
+ iy0 = miny / TILE_SIZE;
+ ix1 = (maxx-1) / TILE_SIZE;
+ iy1 = (maxy-1) / TILE_SIZE;
/*
* Clamp to framebuffer size
*/
- minx = MAX2(minx, 0);
- miny = MAX2(miny, 0);
- maxx = MIN2(maxx, scene->tiles_x - 1);
- maxy = MIN2(maxy, scene->tiles_y - 1);
+ assert(ix0 == MAX2(ix0, 0));
+ assert(iy0 == MAX2(iy0, 0));
+ assert(ix1 == MIN2(ix1, scene->tiles_x - 1));
+ assert(iy1 == MIN2(iy1, scene->tiles_y - 1));
/* Determine which tile(s) intersect the triangle's bounding box
*/
- if (miny == maxy && minx == maxx)
+ if (iy0 == iy1 && ix0 == ix1)
{
/* Triangle is contained in a single tile:
*/
- lp_scene_bin_command( scene, minx, miny, lp_rast_triangle,
- lp_rast_arg_triangle(tri) );
+ lp_scene_bin_command( scene, ix0, iy0,
+ lp_rast_tri_tab[nr_planes],
+ lp_rast_arg_triangle(tri, (1<<nr_planes)-1) );
}
- else
+ else
{
- int c1 = (tri->c1 +
- tri->dx12 * miny * TILE_SIZE -
- tri->dy12 * minx * TILE_SIZE);
- int c2 = (tri->c2 +
- tri->dx23 * miny * TILE_SIZE -
- tri->dy23 * minx * TILE_SIZE);
- int c3 = (tri->c3 +
- tri->dx31 * miny * TILE_SIZE -
- tri->dy31 * minx * TILE_SIZE);
-
- int ei1 = tri->ei1 << TILE_ORDER;
- int ei2 = tri->ei2 << TILE_ORDER;
- int ei3 = tri->ei3 << TILE_ORDER;
-
- int eo1 = tri->eo1 << TILE_ORDER;
- int eo2 = tri->eo2 << TILE_ORDER;
- int eo3 = tri->eo3 << TILE_ORDER;
-
- int xstep1 = -(tri->dy12 << TILE_ORDER);
- int xstep2 = -(tri->dy23 << TILE_ORDER);
- int xstep3 = -(tri->dy31 << TILE_ORDER);
-
- int ystep1 = tri->dx12 << TILE_ORDER;
- int ystep2 = tri->dx23 << TILE_ORDER;
- int ystep3 = tri->dx31 << TILE_ORDER;
+ int c[7];
+ int ei[7];
+ int eo[7];
+ int xstep[7];
+ int ystep[7];
int x, y;
+
+ for (i = 0; i < nr_planes; i++) {
+ c[i] = (tri->plane[i].c +
+ tri->plane[i].dcdy * iy0 * TILE_SIZE -
+ tri->plane[i].dcdx * ix0 * TILE_SIZE);
+
+ ei[i] = tri->plane[i].ei << TILE_ORDER;
+ eo[i] = tri->plane[i].eo << TILE_ORDER;
+ xstep[i] = -(tri->plane[i].dcdx << TILE_ORDER);
+ ystep[i] = tri->plane[i].dcdy << TILE_ORDER;
+ }
+
/* Test tile-sized blocks against the triangle.
@@ -586,64 +730,67 @@ do_triangle_ccw(struct lp_setup_context *setup,
* contained inside the tri, bin an lp_rast_shade_tile command.
* Else, bin a lp_rast_triangle command.
*/
- for (y = miny; y <= maxy; y++)
+ for (y = iy0; y <= iy1; y++)
{
- int cx1 = c1;
- int cx2 = c2;
- int cx3 = c3;
boolean in = FALSE; /* are we inside the triangle? */
+ int cx[7];
+
+ for (i = 0; i < nr_planes; i++)
+ cx[i] = c[i];
- for (x = minx; x <= maxx; x++)
+ for (x = ix0; x <= ix1; x++)
{
- if (cx1 + eo1 < 0 ||
- cx2 + eo2 < 0 ||
- cx3 + eo3 < 0)
- {
- /* do nothing */
+ int out = 0;
+ int partial = 0;
+
+ for (i = 0; i < nr_planes; i++) {
+ int planeout = cx[i] + eo[i];
+ int planepartial = cx[i] + ei[i] - 1;
+ out |= (planeout >> 31);
+ partial |= (planepartial >> 31) & (1<<i);
+ }
+
+ if (out) {
+ /* do nothing */
+ if (in)
+ break; /* exiting triangle, all done with this row */
LP_COUNT(nr_empty_64);
- if (in)
- break; /* exiting triangle, all done with this row */
- }
- else if (cx1 + ei1 > 0 &&
- cx2 + ei2 > 0 &&
- cx3 + ei3 > 0)
- {
+ }
+ else if (partial) {
+ /* Not trivially accepted by at least one plane -
+ * rasterize/shade partial tile
+ */
+ int count = util_bitcount(partial);
+ in = TRUE;
+ lp_scene_bin_command( scene, x, y,
+ lp_rast_tri_tab[count],
+ lp_rast_arg_triangle(tri, partial) );
+
+ LP_COUNT(nr_partially_covered_64);
+ }
+ else {
/* triangle covers the whole tile- shade whole tile */
LP_COUNT(nr_fully_covered_64);
- in = TRUE;
- if (setup->fs.current.variant->opaque &&
+ in = TRUE;
+ if (variant->opaque &&
!setup->fb.zsbuf) {
lp_scene_bin_reset( scene, x, y );
- lp_scene_bin_command( scene, x, y,
- lp_rast_set_state,
- lp_rast_arg_state(setup->fs.stored) );
}
lp_scene_bin_command( scene, x, y,
lp_rast_shade_tile,
lp_rast_arg_inputs(&tri->inputs) );
- }
- else
- {
- /* rasterizer/shade partial tile */
- LP_COUNT(nr_partially_covered_64);
- in = TRUE;
- lp_scene_bin_command( scene, x, y,
- lp_rast_triangle,
- lp_rast_arg_triangle(tri) );
- }
+ }
/* Iterate cx values across the region:
*/
- cx1 += xstep1;
- cx2 += xstep2;
- cx3 += xstep3;
+ for (i = 0; i < nr_planes; i++)
+ cx[i] += xstep[i];
}
/* Iterate c values down the region:
*/
- c1 += ystep1;
- c2 += ystep2;
- c3 += ystep3;
+ for (i = 0; i < nr_planes; i++)
+ c[i] += ystep[i];
}
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 65115052cd..5953d690a4 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -31,9 +31,6 @@
* Code generate the whole fragment pipeline.
*
* The fragment pipeline consists of the following stages:
- * - triangle edge in/out testing
- * - scissor test
- * - stipple (TBI)
* - early depth test
* - fragment shader
* - alpha test
@@ -97,6 +94,7 @@
#include "lp_state.h"
#include "lp_tex_sample.h"
#include "lp_flush.h"
+#include "lp_state_fs.h"
#include <llvm-c/Analysis.h>
@@ -170,177 +168,63 @@ generate_depth_stencil(LLVMBuilderRef builder,
/**
- * Generate the code to do inside/outside triangle testing for the
+ * Expand the relevent bits of mask_input to a 4-dword mask for the
* four pixels in a 2x2 quad. This will set the four elements of the
* quad mask vector to 0 or ~0.
- * \param i which quad of the quad group to test, in [0,3]
+ *
+ * \param quad which quad of the quad group to test, in [0,3]
+ * \param mask_input bitwise mask for the whole 4x4 stamp
*/
-static void
-generate_tri_edge_mask(LLVMBuilderRef builder,
- unsigned i,
- LLVMValueRef *mask, /* ivec4, out */
- LLVMValueRef c0, /* int32 */
- LLVMValueRef c1, /* int32 */
- LLVMValueRef c2, /* int32 */
- LLVMValueRef step0_ptr, /* ivec4 */
- LLVMValueRef step1_ptr, /* ivec4 */
- LLVMValueRef step2_ptr) /* ivec4 */
+static LLVMValueRef
+generate_quad_mask(LLVMBuilderRef builder,
+ struct lp_type fs_type,
+ unsigned quad,
+ LLVMValueRef mask_input) /* int32 */
{
-#define OPTIMIZE_IN_OUT_TEST 0
-#if OPTIMIZE_IN_OUT_TEST
- struct lp_build_if_state ifctx;
- LLVMValueRef not_draw_all;
-#endif
- struct lp_build_flow_context *flow;
- struct lp_type i32_type;
- LLVMTypeRef i32vec4_type;
- LLVMValueRef c0_vec, c1_vec, c2_vec;
- LLVMValueRef in_out_mask;
-
- assert(i < 4);
-
- /* int32 vector type */
- memset(&i32_type, 0, sizeof i32_type);
- i32_type.floating = FALSE; /* values are integers */
- i32_type.sign = TRUE; /* values are signed */
- i32_type.norm = FALSE; /* values are not normalized */
- i32_type.width = 32; /* 32-bit int values */
- i32_type.length = 4; /* 4 elements per vector */
-
- i32vec4_type = lp_build_int32_vec4_type();
+ struct lp_type mask_type;
+ LLVMTypeRef i32t = LLVMInt32Type();
+ LLVMValueRef bits[4];
+ LLVMValueRef mask;
/*
- * Use a conditional here to do detailed pixel in/out testing.
- * We only have to do this if c0 != INT_MIN.
+ * XXX: We'll need a different path for 16 x u8
*/
- flow = lp_build_flow_create(builder);
- lp_build_flow_scope_begin(flow);
-
- {
-#if OPTIMIZE_IN_OUT_TEST
- /* not_draw_all = (c0 != INT_MIN) */
- not_draw_all = LLVMBuildICmp(builder,
- LLVMIntNE,
- c0,
- LLVMConstInt(LLVMInt32Type(), INT_MIN, 0),
- "");
-
- in_out_mask = lp_build_const_int_vec(i32_type, ~0);
-
-
- lp_build_flow_scope_declare(flow, &in_out_mask);
-
- /* if (not_draw_all) {... */
- lp_build_if(&ifctx, flow, builder, not_draw_all);
-#endif
- {
- LLVMValueRef step0_vec, step1_vec, step2_vec;
- LLVMValueRef m0_vec, m1_vec, m2_vec;
- LLVMValueRef index, m;
-
- /* c0_vec = {c0, c0, c0, c0}
- * Note that we emit this code four times but LLVM optimizes away
- * three instances of it.
- */
- c0_vec = lp_build_broadcast(builder, i32vec4_type, c0);
- c1_vec = lp_build_broadcast(builder, i32vec4_type, c1);
- c2_vec = lp_build_broadcast(builder, i32vec4_type, c2);
- lp_build_name(c0_vec, "edgeconst0vec");
- lp_build_name(c1_vec, "edgeconst1vec");
- lp_build_name(c2_vec, "edgeconst2vec");
-
- /* load step0vec, step1, step2 vec from memory */
- index = LLVMConstInt(LLVMInt32Type(), i, 0);
- step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), "");
- step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), "");
- step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), "");
- lp_build_name(step0_vec, "step0vec");
- lp_build_name(step1_vec, "step1vec");
- lp_build_name(step2_vec, "step2vec");
-
- /* m0_vec = step0_ptr[i] > c0_vec */
- m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec);
- m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec);
- m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec);
-
- /* in_out_mask = m0_vec & m1_vec & m2_vec */
- m = LLVMBuildAnd(builder, m0_vec, m1_vec, "");
- in_out_mask = LLVMBuildAnd(builder, m, m2_vec, "");
- lp_build_name(in_out_mask, "inoutmaskvec");
- }
-#if OPTIMIZE_IN_OUT_TEST
- lp_build_endif(&ifctx);
-#endif
-
- }
- lp_build_flow_scope_end(flow);
- lp_build_flow_destroy(flow);
+ assert(fs_type.width == 32);
+ assert(fs_type.length == 4);
+ mask_type = lp_int_type(fs_type);
- /* This is the initial alive/dead pixel mask for a quad of four pixels.
- * It's an int[4] vector with each word set to 0 or ~0.
- * Words will get cleared when pixels faile the Z test, etc.
+ /*
+ * mask_input >>= (quad * 4)
*/
- *mask = in_out_mask;
-}
-
-
-static LLVMValueRef
-generate_scissor_test(LLVMBuilderRef builder,
- LLVMValueRef context_ptr,
- const struct lp_build_interp_soa_context *interp,
- struct lp_type type)
-{
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- LLVMValueRef xpos = interp->pos[0], ypos = interp->pos[1];
- LLVMValueRef xmin, ymin, xmax, ymax;
- LLVMValueRef m0, m1, m2, m3, m;
-
- /* xpos, ypos contain the window coords for the four pixels in the quad */
- assert(xpos);
- assert(ypos);
-
- /* get the current scissor bounds, convert to vectors */
- xmin = lp_jit_context_scissor_xmin_value(builder, context_ptr);
- xmin = lp_build_broadcast(builder, vec_type, xmin);
-
- ymin = lp_jit_context_scissor_ymin_value(builder, context_ptr);
- ymin = lp_build_broadcast(builder, vec_type, ymin);
- xmax = lp_jit_context_scissor_xmax_value(builder, context_ptr);
- xmax = lp_build_broadcast(builder, vec_type, xmax);
+ mask_input = LLVMBuildLShr(builder,
+ mask_input,
+ LLVMConstInt(i32t, quad * 4, 0),
+ "");
- ymax = lp_jit_context_scissor_ymax_value(builder, context_ptr);
- ymax = lp_build_broadcast(builder, vec_type, ymax);
+ /*
+ * mask = { mask_input & (1 << i), for i in [0,3] }
+ */
- /* compare the fragment's position coordinates against the scissor bounds */
- m0 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, xpos, xmin);
- m1 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, ypos, ymin);
- m2 = lp_build_compare(builder, type, PIPE_FUNC_LESS, xpos, xmax);
- m3 = lp_build_compare(builder, type, PIPE_FUNC_LESS, ypos, ymax);
+ mask = lp_build_broadcast(builder, lp_build_vec_type(mask_type), mask_input);
- /* AND all the masks together */
- m = LLVMBuildAnd(builder, m0, m1, "");
- m = LLVMBuildAnd(builder, m, m2, "");
- m = LLVMBuildAnd(builder, m, m3, "");
+ bits[0] = LLVMConstInt(i32t, 1 << 0, 0);
+ bits[1] = LLVMConstInt(i32t, 1 << 1, 0);
+ bits[2] = LLVMConstInt(i32t, 1 << 2, 0);
+ bits[3] = LLVMConstInt(i32t, 1 << 3, 0);
- lp_build_name(m, "scissormask");
+ mask = LLVMBuildAnd(builder, mask, LLVMConstVector(bits, 4), "");
- return m;
-}
+ /*
+ * mask = mask != 0 ? ~0 : 0
+ */
+ mask = lp_build_compare(builder,
+ mask_type, PIPE_FUNC_NOTEQUAL,
+ mask,
+ lp_build_const_int_vec(mask_type, 0));
-static LLVMValueRef
-build_int32_vec_const(int value)
-{
- struct lp_type i32_type;
-
- memset(&i32_type, 0, sizeof i32_type);
- i32_type.floating = FALSE; /* values are integers */
- i32_type.sign = TRUE; /* values are signed */
- i32_type.norm = FALSE; /* values are not normalized */
- i32_type.width = 32; /* 32-bit int values */
- i32_type.length = 4; /* 4 elements per vector */
- return lp_build_const_int_vec(i32_type, value);
+ return mask;
}
@@ -348,7 +232,7 @@ build_int32_vec_const(int value)
/**
* Generate the fragment shader, depth/stencil test, and alpha tests.
* \param i which quad in the tile, in range [0,3]
- * \param do_tri_test if 1, do triangle edge in/out testing
+ * \param partial_mask if 1, do mask_input testing
*/
static void
generate_fs(struct llvmpipe_context *lp,
@@ -364,13 +248,8 @@ generate_fs(struct llvmpipe_context *lp,
LLVMValueRef (*color)[4],
LLVMValueRef depth_ptr,
LLVMValueRef facing,
- unsigned do_tri_test,
- LLVMValueRef c0,
- LLVMValueRef c1,
- LLVMValueRef c2,
- LLVMValueRef step0_ptr,
- LLVMValueRef step1_ptr,
- LLVMValueRef step2_ptr,
+ unsigned partial_mask,
+ LLVMValueRef mask_input,
LLVMValueRef counter)
{
const struct tgsi_token *tokens = shader->base.tokens;
@@ -411,23 +290,17 @@ generate_fs(struct llvmpipe_context *lp,
lp_build_flow_scope_declare(flow, &z);
/* do triangle edge testing */
- if (do_tri_test) {
- generate_tri_edge_mask(builder, i, pmask,
- c0, c1, c2, step0_ptr, step1_ptr, step2_ptr);
+ if (partial_mask) {
+ *pmask = generate_quad_mask(builder, type,
+ i, mask_input);
}
else {
- *pmask = build_int32_vec_const(~0);
+ *pmask = lp_build_const_int_vec(type, ~0);
}
/* 'mask' will control execution based on quad's pixel alive/killed state */
lp_build_mask_begin(&mask, flow, type, *pmask);
- if (key->scissor) {
- LLVMValueRef smask =
- generate_scissor_test(builder, context_ptr, interp, type);
- lp_build_mask_update(&mask, smask);
- }
-
early_depth_stencil_test =
(key->depth.enabled || key->stencil[0].enabled) &&
!key->alpha.enabled &&
@@ -579,7 +452,7 @@ static void
generate_fragment(struct llvmpipe_context *lp,
struct lp_fragment_shader *shader,
struct lp_fragment_shader_variant *variant,
- unsigned do_tri_test)
+ unsigned partial_mask)
{
struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
const struct lp_fragment_shader_variant_key *key = &variant->key;
@@ -589,9 +462,8 @@ generate_fragment(struct llvmpipe_context *lp,
LLVMTypeRef fs_elem_type;
LLVMTypeRef fs_int_vec_type;
LLVMTypeRef blend_vec_type;
- LLVMTypeRef arg_types[16];
+ LLVMTypeRef arg_types[11];
LLVMTypeRef func_type;
- LLVMTypeRef int32_vec4_type = lp_build_int32_vec4_type();
LLVMValueRef context_ptr;
LLVMValueRef x;
LLVMValueRef y;
@@ -600,7 +472,8 @@ generate_fragment(struct llvmpipe_context *lp,
LLVMValueRef dady_ptr;
LLVMValueRef color_ptr_ptr;
LLVMValueRef depth_ptr;
- LLVMValueRef c0, c1, c2, step0_ptr, step1_ptr, step2_ptr, counter = NULL;
+ LLVMValueRef mask_input;
+ LLVMValueRef counter = NULL;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
struct lp_build_sampler_soa *sampler;
@@ -645,7 +518,7 @@ generate_fragment(struct llvmpipe_context *lp,
blend_vec_type = lp_build_vec_type(blend_type);
util_snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s",
- shader->no, variant->no, do_tri_test ? "edge" : "whole");
+ shader->no, variant->no, partial_mask ? "partial" : "whole");
arg_types[0] = screen->context_ptr_type; /* context */
arg_types[1] = LLVMInt32Type(); /* x */
@@ -656,23 +529,15 @@ generate_fragment(struct llvmpipe_context *lp,
arg_types[6] = LLVMPointerType(fs_elem_type, 0); /* dady */
arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */
arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */
- arg_types[9] = LLVMInt32Type(); /* c0 */
- arg_types[10] = LLVMInt32Type(); /* c1 */
- arg_types[11] = LLVMInt32Type(); /* c2 */
- /* Note: the step arrays are built as int32[16] but we interpret
- * them here as int32_vec4[4].
- */
- arg_types[12] = LLVMPointerType(int32_vec4_type, 0);/* step0 */
- arg_types[13] = LLVMPointerType(int32_vec4_type, 0);/* step1 */
- arg_types[14] = LLVMPointerType(int32_vec4_type, 0);/* step2 */
- arg_types[15] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */
+ arg_types[9] = LLVMInt32Type(); /* mask_input */
+ arg_types[10] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */
func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
function = LLVMAddFunction(screen->module, func_name, func_type);
LLVMSetFunctionCallConv(function, LLVMCCallConv);
- variant->function[do_tri_test] = function;
+ variant->function[partial_mask] = function;
/* XXX: need to propagate noalias down into color param now we are
@@ -691,12 +556,7 @@ generate_fragment(struct llvmpipe_context *lp,
dady_ptr = LLVMGetParam(function, 6);
color_ptr_ptr = LLVMGetParam(function, 7);
depth_ptr = LLVMGetParam(function, 8);
- c0 = LLVMGetParam(function, 9);
- c1 = LLVMGetParam(function, 10);
- c2 = LLVMGetParam(function, 11);
- step0_ptr = LLVMGetParam(function, 12);
- step1_ptr = LLVMGetParam(function, 13);
- step2_ptr = LLVMGetParam(function, 14);
+ mask_input = LLVMGetParam(function, 9);
lp_build_name(context_ptr, "context");
lp_build_name(x, "x");
@@ -706,15 +566,10 @@ generate_fragment(struct llvmpipe_context *lp,
lp_build_name(dady_ptr, "dady");
lp_build_name(color_ptr_ptr, "color_ptr_ptr");
lp_build_name(depth_ptr, "depth");
- lp_build_name(c0, "c0");
- lp_build_name(c1, "c1");
- lp_build_name(c2, "c2");
- lp_build_name(step0_ptr, "step0");
- lp_build_name(step1_ptr, "step1");
- lp_build_name(step2_ptr, "step2");
+ lp_build_name(mask_input, "mask_input");
if (key->occlusion_count) {
- counter = LLVMGetParam(function, 15);
+ counter = LLVMGetParam(function, 10);
lp_build_name(counter, "counter");
}
@@ -763,9 +618,9 @@ generate_fragment(struct llvmpipe_context *lp,
out_color,
depth_ptr_i,
facing,
- do_tri_test,
- c0, c1, c2,
- step0_ptr, step1_ptr, step2_ptr, counter);
+ partial_mask,
+ mask_input,
+ counter);
for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++)
for(chan = 0; chan < NUM_CHANNELS; ++chan)
@@ -792,9 +647,13 @@ generate_fragment(struct llvmpipe_context *lp,
lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]);
}
- lp_build_conv_mask(builder, fs_type, blend_type,
- fs_mask, num_fs,
- &blend_mask, 1);
+ if (partial_mask || !variant->opaque) {
+ lp_build_conv_mask(builder, fs_type, blend_type,
+ fs_mask, num_fs,
+ &blend_mask, 1);
+ } else {
+ blend_mask = lp_build_const_int_vec(blend_type, ~0);
+ }
color_ptr = LLVMBuildLoad(builder,
LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""),
@@ -832,8 +691,7 @@ generate_fragment(struct llvmpipe_context *lp,
#endif
/* Apply optimizations to LLVM IR */
- if (1)
- LLVMRunFunctionPassManager(screen->pass, function);
+ LLVMRunFunctionPassManager(screen->pass, function);
if (gallivm_debug & GALLIVM_DEBUG_IR) {
/* Print the LLVM IR to stderr */
@@ -847,7 +705,7 @@ generate_fragment(struct llvmpipe_context *lp,
{
void *f = LLVMGetPointerToGlobal(screen->engine, function);
- variant->jit_function[do_tri_test] = (lp_jit_frag_func)pointer_to_func(f);
+ variant->jit_function[partial_mask] = (lp_jit_frag_func)pointer_to_func(f);
if (gallivm_debug & GALLIVM_DEBUG_ASM) {
lp_disassemble(f);
@@ -963,7 +821,6 @@ generate_variant(struct llvmpipe_context *lp,
!key->stencil[0].enabled &&
!key->alpha.enabled &&
!key->depth.enabled &&
- !key->scissor &&
!shader->info.uses_kill
? TRUE : FALSE;
@@ -1182,7 +1039,6 @@ make_variant_key(struct llvmpipe_context *lp,
/* alpha.ref_value is passed in jit_context */
key->flatshade = lp->rasterizer->flatshade;
- key->scissor = lp->rasterizer->scissor;
if (lp->active_query_count) {
key->occlusion_count = TRUE;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h
index 593cd4de6b..37900fc544 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.h
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h
@@ -54,7 +54,6 @@ struct lp_fragment_shader_variant_key
enum pipe_format zsbuf_format;
unsigned nr_cbufs:8;
unsigned flatshade:1;
- unsigned scissor:1;
unsigned occlusion_count:1;
struct {
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 1d42bdde4e..d236bad69d 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -36,6 +36,7 @@
#include "pipe/p_defines.h"
#include "util/u_inlines.h"
+#include "util/u_cpu_detect.h"
#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -898,13 +899,15 @@ static void
alloc_image_data(struct llvmpipe_resource *lpr, unsigned level,
enum lp_texture_layout layout)
{
+ uint alignment = MAX2(16, util_cpu_caps.cacheline);
+
if (lpr->dt)
assert(level == 0);
if (layout == LP_TEX_LAYOUT_TILED) {
/* tiled data is stored in regular memory */
uint buffer_size = tex_image_size(lpr, level, layout);
- lpr->tiled[level].data = align_malloc(buffer_size, 16);
+ lpr->tiled[level].data = align_malloc(buffer_size, alignment);
}
else {
assert(layout == LP_TEX_LAYOUT_LINEAR);
@@ -920,7 +923,7 @@ alloc_image_data(struct llvmpipe_resource *lpr, unsigned level,
else {
/* not a display target - allocate regular memory */
uint buffer_size = tex_image_size(lpr, level, LP_TEX_LAYOUT_LINEAR);
- lpr->linear[level].data = align_malloc(buffer_size, 16);
+ lpr->linear[level].data = align_malloc(buffer_size, alignment);
}
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.c b/src/gallium/drivers/llvmpipe/lp_tile_image.c
index 2b63992dd7..0938f7aea7 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_image.c
+++ b/src/gallium/drivers/llvmpipe/lp_tile_image.c
@@ -204,7 +204,7 @@ lp_tiled_to_linear(const void *src, void *dst,
lp_tile_unswizzle_4ub(format,
src_tile,
dst, dst_stride,
- ii, jj, tile_w, tile_h);
+ ii, jj);
}
}
}
@@ -293,7 +293,7 @@ lp_linear_to_tiled(const void *src, void *dst,
lp_tile_swizzle_4ub(format,
dst_tile,
src, src_stride,
- ii, jj, tile_w, tile_h);
+ ii, jj);
}
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h
index 07f71b8411..12dac1da6c 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h
@@ -79,14 +79,14 @@ void
lp_tile_swizzle_4ub(enum pipe_format format,
uint8_t *dst,
const void *src, unsigned src_stride,
- unsigned x, unsigned y, unsigned w, unsigned h);
+ unsigned x, unsigned y);
void
lp_tile_unswizzle_4ub(enum pipe_format format,
const uint8_t *src,
void *dst, unsigned dst_stride,
- unsigned x, unsigned y, unsigned w, unsigned h);
+ unsigned x, unsigned y);
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
index 5ab63cbac6..bf70c936b4 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
@@ -75,13 +75,13 @@ def generate_format_read(format, dst_channel, dst_native_type, dst_suffix):
src_native_type = native_type(format)
print 'static void'
- print 'lp_tile_%s_swizzle_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, dst_suffix, dst_native_type)
+ print 'lp_tile_%s_swizzle_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)' % (name, dst_suffix, dst_native_type)
print '{'
print ' unsigned x, y;'
print ' const uint8_t *src_row = src + y0*src_stride;'
- print ' for (y = 0; y < h; ++y) {'
+ print ' for (y = 0; y < TILE_SIZE; ++y) {'
print ' const %s *src_pixel = (const %s *)(src_row + x0*%u);' % (src_native_type, src_native_type, format.stride())
- print ' for (x = 0; x < w; ++x) {'
+ print ' for (x = 0; x < TILE_SIZE; ++x) {'
names = ['']*4
if format.colorspace in ('rgb', 'srgb'):
@@ -202,9 +202,9 @@ def emit_unrolled_unswizzle_code(format, src_channel):
print ' %s *dstpix = (%s *) dst;' % (dst_native_type, dst_native_type)
print ' unsigned int qx, qy, i;'
print
- print ' for (qy = 0; qy < h; qy += TILE_VECTOR_HEIGHT) {'
+ print ' for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {'
print ' const unsigned py = y0 + qy;'
- print ' for (qx = 0; qx < w; qx += TILE_VECTOR_WIDTH) {'
+ print ' for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {'
print ' const unsigned px = x0 + qx;'
print ' const uint8_t *r = src + 0 * TILE_C_STRIDE;'
print ' const uint8_t *g = src + 1 * TILE_C_STRIDE;'
@@ -231,9 +231,9 @@ def emit_tile_pixel_unswizzle_code(format, src_channel):
print ' unsigned x, y;'
print ' uint8_t *dst_row = dst + y0*dst_stride;'
- print ' for (y = 0; y < h; ++y) {'
+ print ' for (y = 0; y < TILE_SIZE; ++y) {'
print ' %s *dst_pixel = (%s *)(dst_row + x0*%u);' % (dst_native_type, dst_native_type, format.stride())
- print ' for (x = 0; x < w; ++x) {'
+ print ' for (x = 0; x < TILE_SIZE; ++x) {'
if format.layout == PLAIN:
if not format.is_array():
@@ -273,7 +273,7 @@ def generate_format_write(format, src_channel, src_native_type, src_suffix):
name = format.short_name()
print 'static void'
- print 'lp_tile_%s_unswizzle_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type)
+ print 'lp_tile_%s_unswizzle_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)' % (name, src_suffix, src_native_type)
print '{'
if format.layout == PLAIN \
and format.colorspace == 'rgb' \
@@ -297,9 +297,9 @@ def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix):
generate_format_read(format, dst_channel, dst_native_type, dst_suffix)
print 'void'
- print 'lp_tile_swizzle_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (dst_suffix, dst_native_type)
+ print 'lp_tile_swizzle_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y)' % (dst_suffix, dst_native_type)
print '{'
- print ' void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % dst_native_type
+ print ' void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0);' % dst_native_type
print '#ifdef DEBUG'
print ' lp_tile_swizzle_count += 1;'
print '#endif'
@@ -313,7 +313,7 @@ def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix):
print ' debug_printf("%s: unsupported format %s\\n", __FUNCTION__, util_format_name(format));'
print ' return;'
print ' }'
- print ' func(dst, (const uint8_t *)src, src_stride, x, y, w, h);'
+ print ' func(dst, (const uint8_t *)src, src_stride, x, y);'
print '}'
print
@@ -326,10 +326,10 @@ def generate_unswizzle(formats, src_channel, src_native_type, src_suffix):
generate_format_write(format, src_channel, src_native_type, src_suffix)
print 'void'
- print 'lp_tile_unswizzle_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (src_suffix, src_native_type)
+ print 'lp_tile_unswizzle_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y)' % (src_suffix, src_native_type)
print '{'
- print ' void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % src_native_type
+ print ' void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0);' % src_native_type
print '#ifdef DEBUG'
print ' lp_tile_unswizzle_count += 1;'
print '#endif'
@@ -343,7 +343,7 @@ def generate_unswizzle(formats, src_channel, src_native_type, src_suffix):
print ' debug_printf("%s: unsupported format %s\\n", __FUNCTION__, util_format_name(format));'
print ' return;'
print ' }'
- print ' func(src, (uint8_t *)dst, dst_stride, x, y, w, h);'
+ print ' func(src, (uint8_t *)dst, dst_stride, x, y);'
print '}'
print
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index 2408a95353..895efaa1c4 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -24,6 +24,7 @@
#include "r300_texture.h"
#include "util/u_format.h"
+#include "util/u_pack_color.h"
enum r300_blitter_op /* bitmask */
{
@@ -79,6 +80,48 @@ static void r300_blitter_end(struct r300_context *r300)
}
}
+static uint32_t r300_depth_clear_cb_value(enum pipe_format format,
+ const float* rgba)
+{
+ union util_color uc;
+ util_pack_color(rgba, format, &uc);
+
+ if (util_format_get_blocksizebits(format) == 32)
+ return uc.ui;
+ else
+ return uc.us | (uc.us << 16);
+}
+
+static boolean r300_cbzb_clear_allowed(struct r300_context *r300,
+ unsigned clear_buffers)
+{
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ struct r300_surface *surf = r300_surface(fb->cbufs[0]);
+ unsigned bpp;
+
+ /* Only color clear allowed, and only one colorbuffer. */
+ if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1)
+ return FALSE;
+
+ /* The colorbuffer must be point-sampled. */
+ if (surf->base.texture->nr_samples > 1)
+ return FALSE;
+
+ bpp = util_format_get_blocksizebits(surf->base.format);
+
+ /* ZB can only work with the two pixel sizes. */
+ if (bpp != 16 && bpp != 32)
+ return FALSE;
+
+ /* If the midpoint ZB offset is not aligned to 2048, it returns garbage
+ * with certain texture sizes. Macrotiling ensures the alignment. */
+ if (!r300_texture(surf->base.texture)->mip_macrotile[surf->base.level])
+ return FALSE;
+
+ return TRUE;
+}
+
/* Clear currently bound buffers. */
static void r300_clear(struct pipe_context* pipe,
unsigned buffers,
@@ -124,15 +167,43 @@ static void r300_clear(struct pipe_context* pipe,
struct r300_context* r300 = r300_context(pipe);
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
+ struct r300_hyperz_state *hyperz =
+ (struct r300_hyperz_state*)r300->hyperz_state.state;
+ uint32_t width = fb->width;
+ uint32_t height = fb->height;
+
+ /* Enable CBZB clear. */
+ if (r300_cbzb_clear_allowed(r300, buffers)) {
+ struct r300_surface *surf = r300_surface(fb->cbufs[0]);
+
+ hyperz->zb_depthclearvalue =
+ r300_depth_clear_cb_value(surf->base.format, rgba);
+
+ width = surf->cbzb_width;
+ height = surf->cbzb_height;
+
+ r300->cbzb_clear = TRUE;
+ r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
+ }
/* Clear. */
r300_blitter_begin(r300, R300_CLEAR);
util_blitter_clear(r300->blitter,
- fb->width,
- fb->height,
+ width,
+ height,
fb->nr_cbufs,
buffers, rgba, depth, stencil);
r300_blitter_end(r300);
+
+ /* Disable CBZB clear. */
+ if (r300->cbzb_clear) {
+ r300->cbzb_clear = FALSE;
+ r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
+ }
+
+ /* XXX this flush "fixes" a hardlock in the cubestorm xscreensaver */
+ if (r300->flush_counter == 0)
+ pipe->flush(pipe, 0, NULL);
}
/* Clear a region of a color surface to a constant value. */
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 7f43281af4..1beab7628a 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -330,7 +330,7 @@ static void r300_init_states(struct pipe_context *pipe)
BEGIN_CB(&hyperz->cb_begin, r300->hyperz_state.size);
OUT_CB_REG(R300_ZB_BW_CNTL, 0);
OUT_CB_REG(R300_ZB_DEPTHCLEARVALUE, 0);
- OUT_CB_REG(R300_SC_HYPERZ, 0x1C);
+ OUT_CB_REG(R300_SC_HYPERZ, R300_SC_HYPERZ_ADJ_2);
END_CB;
}
}
@@ -373,15 +373,16 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300_init_blit_functions(r300);
r300_init_flush_functions(r300);
r300_init_query_functions(r300);
- r300_init_render_functions(r300);
r300_init_state_functions(r300);
r300_init_resource_functions(r300);
- rws->set_flush_cb(r300->rws, r300_flush_cb, r300);
- r300->dirty_hw++;
-
r300->blitter = util_blitter_create(&r300->context);
+ /* Render functions must be initialized after blitter. */
+ r300_init_render_functions(r300);
+
+ rws->set_flush_cb(r300->rws, r300_flush_cb, r300);
+
r300->upload_ib = u_upload_create(&r300->context,
32 * 1024, 16,
PIPE_BIND_INDEX_BUFFER);
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 2483af7fb5..df4299b7ea 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -311,6 +311,13 @@ struct r300_surface {
uint32_t offset; /* COLOROFFSET or DEPTHOFFSET. */
uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */
uint32_t format; /* US_OUT_FMT or ZB_FORMAT. */
+
+ /* Parameters dedicated to the CBZB clear. */
+ uint32_t cbzb_width; /* Aligned width. */
+ uint32_t cbzb_height; /* Half of the height. */
+ uint32_t cbzb_midpoint_offset; /* DEPTHOFFSET. */
+ uint32_t cbzb_pitch; /* DEPTHPITCH. */
+ uint32_t cbzb_format; /* ZB_FORMAT. */
};
struct r300_texture {
@@ -525,6 +532,7 @@ struct r300_context {
/* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */
boolean incompatible_vb_layout;
+ boolean cbzb_clear;
/* upload managers */
struct u_upload_mgr *upload_vb;
struct u_upload_mgr *upload_ib;
@@ -593,7 +601,8 @@ void r300_plug_in_stencil_ref_fallback(struct r300_context *r300);
/* r300_state.c */
enum r300_fb_state_change {
- R300_CHANGED_FB_STATE = 0
+ R300_CHANGED_FB_STATE = 0,
+ R300_CHANGED_CBZB_FLAG
};
void r300_mark_fb_state_dirty(struct r300_context *r300,
diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c
index a6cd86e392..31d4e14681 100644
--- a/src/gallium/drivers/r300/r300_debug.c
+++ b/src/gallium/drivers/r300/r300_debug.c
@@ -29,17 +29,20 @@
static const struct debug_named_value debug_options[] = {
{ "fp", DBG_FP, "Fragment program handling (for debugging)" },
{ "vp", DBG_VP, "Vertex program handling (for debugging)" },
- { "draw", DBG_DRAW, "Draw and emit (for debugging)" },
+ { "draw", DBG_DRAW, "Draw calls (for debugging)" },
+ { "swtcl", DBG_SWTCL, "SWTCL-specific info (for debugging)" },
+ { "rsblock", DBG_RS_BLOCK, "Rasterizer registers (for debugging)" },
+ { "psc", DBG_PSC, "Vertex stream registers (for debugging)" },
{ "tex", DBG_TEX, "Textures (for debugging)" },
{ "texalloc", DBG_TEXALLOC, "Texture allocation (for debugging)" },
{ "fall", DBG_FALL, "Fallbacks (for debugging)" },
{ "rs", DBG_RS, "Rasterizer (for debugging)" },
{ "fb", DBG_FB, "Framebuffer (for debugging)" },
+ { "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries (for debugging)" },
{ "anisohq", DBG_ANISOHQ, "High quality anisotropic filtering (for benchmarking)" },
{ "notiling", DBG_NO_TILING, "Disable tiling (for benchmarking)" },
{ "noimmd", DBG_NO_IMMD, "Disable immediate mode (for benchmarking)" },
- { "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries (for lulz)" },
- { "stats", DBG_STATS, "Gather statistics (for lulz)" },
+ { "stats", DBG_STATS, "Gather statistics" },
/* must be last */
DEBUG_NAMED_VALUE_END
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 5ce3eb63c5..daae6dd510 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -272,8 +272,17 @@ void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state)
struct r300_gpu_flush *gpuflush = (struct r300_gpu_flush*)state;
struct pipe_framebuffer_state* fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
+ uint32_t height = fb->height;
+ uint32_t width = fb->width;
CS_LOCALS(r300);
+ if (r300->cbzb_clear) {
+ struct r300_surface *surf = r300_surface(fb->cbufs[0]);
+
+ height = surf->cbzb_height;
+ width = surf->cbzb_width;
+ }
+
BEGIN_CS(size);
/* Set up scissors.
@@ -281,13 +290,13 @@ void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state)
OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
if (r300->screen->caps.is_r500) {
OUT_CS(0);
- OUT_CS(((fb->width - 1) << R300_SCISSORS_X_SHIFT) |
- ((fb->height - 1) << R300_SCISSORS_Y_SHIFT));
+ OUT_CS(((width - 1) << R300_SCISSORS_X_SHIFT) |
+ ((height - 1) << R300_SCISSORS_Y_SHIFT));
} else {
OUT_CS((1440 << R300_SCISSORS_X_SHIFT) |
(1440 << R300_SCISSORS_Y_SHIFT));
- OUT_CS(((fb->width + 1440-1) << R300_SCISSORS_X_SHIFT) |
- ((fb->height + 1440-1) << R300_SCISSORS_Y_SHIFT));
+ OUT_CS(((width + 1440-1) << R300_SCISSORS_X_SHIFT) |
+ ((height + 1440-1) << R300_SCISSORS_Y_SHIFT));
}
/* Flush CB & ZB caches and wait until the 3D engine is idle and clean. */
@@ -344,8 +353,20 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain, 0);
}
+ /* Set up the ZB part of the CBZB clear. */
+ if (r300->cbzb_clear) {
+ surf = r300_surface(fb->cbufs[0]);
+
+ OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format);
+
+ OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1);
+ OUT_CS_RELOC(surf->buffer, surf->cbzb_midpoint_offset, 0, surf->domain, 0);
+
+ OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1);
+ OUT_CS_RELOC(surf->buffer, surf->cbzb_pitch, 0, surf->domain, 0);
+ }
/* Set up a zbuffer. */
- if (fb->zsbuf) {
+ else if (fb->zsbuf) {
surf = r300_surface(fb->zsbuf);
OUT_CS_REG(R300_ZB_FORMAT, surf->format);
@@ -377,6 +398,18 @@ void r300_emit_hyperz_state(struct r300_context *r300,
WRITE_CS_TABLE(state, size);
}
+void r300_emit_hyperz_end(struct r300_context *r300)
+{
+ struct r300_hyperz_state z =
+ *(struct r300_hyperz_state*)r300->hyperz_state.state;
+
+ z.zb_bw_cntl = 0;
+ z.zb_depthclearvalue = 0;
+ z.sc_hyperz = R300_SC_HYPERZ_ADJ_2;
+
+ r300_emit_hyperz_state(r300, r300->hyperz_state.size, &z);
+}
+
void r300_emit_fb_state_pipelined(struct r300_context *r300,
unsigned size, void *state)
{
@@ -605,7 +638,7 @@ void r300_emit_rs_block_state(struct r300_context* r300,
unsigned count = (rs->inst_count & R300_RS_INST_COUNT_MASK) + 1;
CS_LOCALS(r300);
- if (SCREEN_DBG_ON(r300->screen, DBG_DRAW)) {
+ if (DBG_ON(r300, DBG_RS_BLOCK)) {
r500_dump_rs_block(rs);
fprintf(stderr, "r300: RS emit:\n");
@@ -750,7 +783,7 @@ void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed)
{
CS_LOCALS(r300);
- DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, "
+ DBG(r300, DBG_SWTCL, "r300: Preparing vertex buffer %p for render, "
"vertex size %d\n", r300->vbo,
r300->vertex_info.size);
/* Set the pointer to our vertex buffer. The emitted values are this:
@@ -778,7 +811,7 @@ void r300_emit_vertex_stream_state(struct r300_context* r300,
unsigned i;
CS_LOCALS(r300);
- if (DBG_ON(r300, DBG_DRAW)) {
+ if (DBG_ON(r300, DBG_PSC)) {
fprintf(stderr, "r300: PSC emit:\n");
for (i = 0; i < streams->count; i++) {
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
index 586ccda620..5d05039669 100644
--- a/src/gallium/drivers/r300/r300_emit.h
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -45,6 +45,11 @@ void r300_emit_clip_state(struct r300_context* r300,
void r300_emit_dsa_state(struct r300_context* r300,
unsigned size, void* state);
+void r300_emit_hyperz_state(struct r300_context *r300,
+ unsigned size, void *state);
+
+void r300_emit_hyperz_end(struct r300_context *r300);
+
void r300_emit_fs(struct r300_context* r300, unsigned size, void *state);
void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *state);
@@ -64,9 +69,6 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300,
void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state);
-void r300_emit_hyperz_state(struct r300_context *r300,
- unsigned size, void *state);
-
void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state);
void r300_emit_query_start(struct r300_context *r300, unsigned size, void *state);
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index ba840bfff8..6f31ba159a 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -48,11 +48,10 @@ static void r300_flush(struct pipe_context* pipe,
}
if (r300->dirty_hw) {
+ r300_emit_hyperz_end(r300);
r300_emit_query_end(r300);
- if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) {
- r300->flush_counter++;
- }
+ r300->flush_counter++;
r300->rws->flush_cs(r300->rws);
r300->dirty_hw = 0;
diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c
index 2c4e6c7211..e952895601 100644
--- a/src/gallium/drivers/r300/r300_hyperz.c
+++ b/src/gallium/drivers/r300/r300_hyperz.c
@@ -27,6 +27,22 @@
#include "r300_fs.h"
/*****************************************************************************/
+/* The HyperZ setup */
+/*****************************************************************************/
+
+static void r300_update_hyperz(struct r300_context* r300)
+{
+ struct r300_hyperz_state *z =
+ (struct r300_hyperz_state*)r300->hyperz_state.state;
+
+ z->zb_bw_cntl = 0;
+ z->sc_hyperz = R300_SC_HYPERZ_ADJ_2;
+
+ if (r300->cbzb_clear)
+ z->zb_bw_cntl |= R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY;
+}
+
+/*****************************************************************************/
/* The ZTOP state */
/*****************************************************************************/
@@ -118,4 +134,7 @@ static void r300_update_ztop(struct r300_context* r300)
void r300_update_hyperz_state(struct r300_context* r300)
{
r300_update_ztop(r300);
+ if (r300->hyperz_state.dirty) {
+ r300_update_hyperz(r300);
+ }
}
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 53728431a6..970cb68837 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -224,6 +224,7 @@ static void r300_prepare_for_rendering(struct r300_context *r300,
/* Emitted in flush. */
end_dwords += 26; /* emit_query_end */
+ end_dwords += r300->hyperz_state.size; /* emit_hyperz_end */
cs_dwords += end_dwords;
@@ -278,7 +279,6 @@ static boolean immd_is_good_idea(struct r300_context *r300,
/* We shouldn't map buffers referenced by CS, busy buffers,
* and ones placed in VRAM. */
- /* XXX Check for VRAM buffers. */
for (i = 0; i < vertex_element_count; i++) {
velem = &r300->velems->velem[i];
vbi = velem->vertex_buffer_index;
@@ -286,6 +286,10 @@ static boolean immd_is_good_idea(struct r300_context *r300,
if (!checked[vbi]) {
vbuf = &r300->vertex_buffer[vbi];
+ if (!(r300_buffer(vbuf->buffer)->domain & R300_DOMAIN_GTT)) {
+ return FALSE;
+ }
+
if (r300_buffer_is_referenced(&r300->context,
vbuf->buffer,
R300_REF_CS | R300_REF_HW)) {
@@ -299,8 +303,7 @@ static boolean immd_is_good_idea(struct r300_context *r300,
}
/*****************************************************************************
- * The emission of draw packets for r500. Older GPUs may use these functions *
- * after resolving fallback issues (e.g. stencil ref two-sided). *
+ * The HWTCL draw functions. *
****************************************************************************/
static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
@@ -867,13 +870,12 @@ static void r300_render_draw_arrays(struct vbuf_render* render,
unsigned dwords = 6;
CS_LOCALS(r300);
-
(void) i; (void) ptr;
r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL,
NULL, dwords, 0, 0, NULL);
- DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count);
+ DBG(r300, DBG_DRAW, "r300: render_draw_arrays (count: %d)\n", count);
/* Uncomment to dump all VBOs rendered through this interface.
* Slow and noisy!
@@ -916,6 +918,7 @@ static void r300_render_draw_elements(struct vbuf_render* render,
unsigned free_dwords;
CS_LOCALS(r300);
+ DBG(r300, DBG_DRAW, "r300: render_draw_elements (count: %d)\n", count);
/* Reserve at least 256 dwords.
*
@@ -1017,6 +1020,88 @@ struct draw_stage* r300_draw_stage(struct r300_context* r300)
* End of SW TCL functions *
***************************************************************************/
+/* If we used a quad to draw a rectangle, the pixels on the main diagonal
+ * would be computed and stored twice, which makes the clear/copy codepaths
+ * somewhat inefficient. Instead we use a rectangular point sprite. */
+static void r300_blitter_draw_rectangle(struct blitter_context *blitter,
+ unsigned x1, unsigned y1,
+ unsigned x2, unsigned y2,
+ float depth,
+ enum blitter_attrib_type type,
+ const float attrib[4])
+{
+ struct r300_context *r300 = r300_context(util_blitter_get_pipe(blitter));
+ unsigned last_sprite_coord_enable = r300->sprite_coord_enable;
+ unsigned width = x2 - x1;
+ unsigned height = y2 - y1;
+ unsigned vertex_size =
+ type == UTIL_BLITTER_ATTRIB_COLOR || !r300->draw ? 8 : 4;
+ unsigned dwords = 13 + vertex_size +
+ (type == UTIL_BLITTER_ATTRIB_TEXCOORD ? 7 : 0);
+ const float zeros[4] = {0, 0, 0, 0};
+ CB_LOCALS;
+
+ if (type == UTIL_BLITTER_ATTRIB_TEXCOORD)
+ r300->sprite_coord_enable = 1;
+
+ r300_update_derived_state(r300);
+
+ /* Mark some states we don't care about as non-dirty. */
+ r300->clip_state.dirty = FALSE;
+ r300->viewport_state.dirty = FALSE;
+
+ r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL);
+
+ DBG(r300, DBG_DRAW, "r300: draw_rectangle\n");
+
+ BEGIN_CS_AS_CB(r300, dwords);
+ /* Set up GA. */
+ OUT_CB_REG(R300_GA_POINT_SIZE, (height * 6) | ((width * 6) << 16));
+
+ if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) {
+ /* Set up the GA to generate texcoords. */
+ OUT_CB_REG(R300_GB_ENABLE, R300_GB_POINT_STUFF_ENABLE |
+ (R300_GB_TEX_STR << R300_GB_TEX0_SOURCE_SHIFT));
+ OUT_CB_REG_SEQ(R300_GA_POINT_S0, 4);
+ OUT_CB_32F(attrib[0]);
+ OUT_CB_32F(attrib[3]);
+ OUT_CB_32F(attrib[2]);
+ OUT_CB_32F(attrib[1]);
+ }
+
+ /* Set up VAP controls. */
+ OUT_CB_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE);
+ OUT_CB_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT);
+ OUT_CB_REG(R300_VAP_VTX_SIZE, vertex_size);
+ OUT_CB_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
+ OUT_CB(1);
+ OUT_CB(0);
+
+ /* Draw. */
+ OUT_CB_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, vertex_size);
+ OUT_CB(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (1 << 16) |
+ R300_VAP_VF_CNTL__PRIM_POINTS);
+
+ OUT_CB_32F(x1 + width * 0.5f);
+ OUT_CB_32F(y1 + height * 0.5f);
+ OUT_CB_32F(depth);
+ OUT_CB_32F(1);
+
+ if (vertex_size == 8) {
+ if (!attrib)
+ attrib = zeros;
+ OUT_CB_TABLE(attrib, 4);
+ }
+ END_CB;
+
+ /* Restore the state. */
+ r300->clip_state.dirty = TRUE;
+ r300->rs_state.dirty = TRUE;
+ r300->viewport_state.dirty = TRUE;
+
+ r300->sprite_coord_enable = last_sprite_coord_enable;
+}
+
static void r300_resource_resolve(struct pipe_context* pipe,
struct pipe_resource* dest,
struct pipe_subresource subdest,
@@ -1070,6 +1155,7 @@ void r300_init_render_functions(struct r300_context *r300)
}
r300->context.resource_resolve = r300_resource_resolve;
+ r300->blitter->draw_rectangle = r300_blitter_draw_rectangle;
/* Plug in the two-sided stencil reference value fallback if needed. */
if (!r300->screen->caps.is_r500)
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index cad99ca845..a3b08555cd 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -256,7 +256,6 @@ static boolean r300_is_format_supported(struct pipe_screen* screen,
uint32_t retval = 0;
boolean is_r500 = r300_screen(screen)->caps.is_r500;
boolean is_r400 = r300_screen(screen)->caps.is_r400;
- boolean is_rv350 = r300_screen(screen)->caps.is_rv350;
boolean is_z24 = format == PIPE_FORMAT_X8Z24_UNORM ||
format == PIPE_FORMAT_S8_USCALED_Z24_UNORM;
boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM ||
@@ -272,6 +271,7 @@ static boolean r300_is_format_supported(struct pipe_screen* screen,
format == PIPE_FORMAT_R16G16B16_FLOAT ||
format == PIPE_FORMAT_R16G16B16A16_FLOAT;
+ /* Check multisampling support. */
switch (sample_count) {
case 0:
case 1:
@@ -326,7 +326,7 @@ static boolean r300_is_format_supported(struct pipe_screen* screen,
/* Check vertex buffer format support. */
if (usage & PIPE_BIND_VERTEX_BUFFER &&
/* Half float is supported on >= RV350. */
- (is_rv350 || !is_half_float) &&
+ (is_r400 || is_r500 || !is_half_float) &&
r300_translate_vertex_data_type(format) != R300_INVALID_FORMAT) {
retval |= PIPE_BIND_VERTEX_BUFFER;
}
diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
index 29cd5dbe26..c6b4b57c3b 100644
--- a/src/gallium/drivers/r300/r300_screen.h
+++ b/src/gallium/drivers/r300/r300_screen.h
@@ -61,17 +61,19 @@ static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) {
* those changes.
*/
/*@{*/
-#define DBG_HELP (1 << 0)
+
/* Logging. */
+#define DBG_PSC (1 << 0)
#define DBG_FP (1 << 1)
#define DBG_VP (1 << 2)
-/* The bit (1 << 3) is unused. */
+#define DBG_SWTCL (1 << 3)
#define DBG_DRAW (1 << 4)
#define DBG_TEX (1 << 5)
#define DBG_TEXALLOC (1 << 6)
#define DBG_RS (1 << 7)
#define DBG_FALL (1 << 8)
#define DBG_FB (1 << 9)
+#define DBG_RS_BLOCK (1 << 10)
/* Features. */
#define DBG_ANISOHQ (1 << 16)
#define DBG_NO_TILING (1 << 17)
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index b0722cb95f..f4c6a262d4 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -688,7 +688,9 @@ void r300_mark_fb_state_dirty(struct r300_context *r300,
/* Now compute the fb_state atom size. */
r300->fb_state.size = 2 + (8 * state->nr_cbufs);
- if (state->zsbuf)
+ if (r300->cbzb_clear)
+ r300->fb_state.size += 10;
+ else if (state->zsbuf)
r300->fb_state.size += r300->screen->caps.has_hiz ? 18 : 14;
/* The size of the rest of atoms stays the same. */
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 3aa8deb63c..2ef9766578 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -102,7 +102,8 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300)
* they won't be rasterized. */
gen_count = 0;
for (i = 0; i < ATTR_GENERIC_COUNT && gen_count < 8; i++) {
- if (vs_outputs->generic[i] != ATTR_UNUSED) {
+ if (vs_outputs->generic[i] != ATTR_UNUSED &&
+ !(r300->sprite_coord_enable & (1 << i))) {
r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
vs_outputs->generic[i]);
gen_count++;
@@ -118,7 +119,7 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300)
/* WPOS. */
if (r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED && gen_count < 8) {
- DBG(r300, DBG_DRAW, "draw_emit_attrib: WPOS, index: %i\n",
+ DBG(r300, DBG_SWTCL, "draw_emit_attrib: WPOS, index: %i\n",
vs_outputs->wpos);
r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
vs_outputs->wpos);
@@ -140,18 +141,19 @@ static void r300_swtcl_vertex_psc(struct r300_context *r300)
/* For each Draw attribute, route it to the fragment shader according
* to the vs_output_tab. */
attrib_count = vinfo->num_attribs;
- DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count);
+ DBG(r300, DBG_SWTCL, "r300: attrib count: %d\n", attrib_count);
for (i = 0; i < attrib_count; i++) {
- DBG(r300, DBG_DRAW, "r300: attrib: index %d, interp %d, emit %d,"
- " vs_output_tab %d\n", vinfo->attrib[i].src_index,
- vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit,
- vs_output_tab[i]);
-
- /* Make sure we have a proper destination for our attribute. */
- assert(vs_output_tab[i] != -1);
+ if (vs_output_tab[i] == -1) {
+ assert(0);
+ abort();
+ }
format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
+ DBG(r300, DBG_SWTCL,
+ "r300: swtcl_vertex_psc [%i] <- %s\n",
+ vs_output_tab[i], util_format_short_name(format));
+
/* Obtain the type of data in this attribute. */
type = r300_translate_vertex_data_type(format);
if (type == R300_INVALID_FORMAT) {
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index d378a7150d..e8b1d67007 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -1034,6 +1034,8 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
struct r300_surface* surface = CALLOC_STRUCT(r300_surface);
if (surface) {
+ uint32_t stride, offset, tile_height;
+
pipe_reference_init(&surface->base.reference, 1);
pipe_resource_reference(&surface->base.texture, texture);
surface->base.format = texture->format;
@@ -1054,6 +1056,34 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
surface->offset = r300_texture_get_offset(tex, level, zslice, face);
surface->pitch = tex->fb_state.pitch[level];
surface->format = tex->fb_state.format;
+
+ /* Parameters for the CBZB clear. */
+ surface->cbzb_width = align(surface->base.width, 64);
+
+ /* Height must be aligned to the size of a tile. */
+ tile_height = r300_get_pixel_alignment(tex, tex->mip_macrotile[level],
+ DIM_HEIGHT);
+ surface->cbzb_height = align((surface->base.height + 1) / 2,
+ tile_height);
+
+ /* Offset must be aligned to 2K and must point at the beginning
+ * of a scanline. */
+ stride = r300_texture_get_stride(r300_screen(screen), tex, level);
+ offset = surface->offset + stride * surface->cbzb_height;
+ surface->cbzb_midpoint_offset = offset & ~2047;
+
+ surface->cbzb_pitch = surface->pitch & 0x1ffffc;
+
+ if (util_format_get_blocksizebits(surface->base.format) == 32)
+ surface->cbzb_format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
+ else
+ surface->cbzb_format = R300_DEPTHFORMAT_16BIT_INT_Z;
+
+ SCREEN_DBG(r300_screen(screen), DBG_TEX,
+ "CBZB Dim: %ix%i, Misalignment: %i, Macro: %s\n",
+ surface->cbzb_width, surface->cbzb_height,
+ offset & 2047,
+ tex->mip_macrotile[level] ? "YES" : " NO");
}
return &surface->base;
diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c
index 00b167e256..e0dd5cf8c2 100644
--- a/src/gallium/drivers/rbug/rbug_context.c
+++ b/src/gallium/drivers/rbug/rbug_context.c
@@ -97,15 +97,7 @@ rbug_draw_block_locked(struct rbug_context *rb_pipe, int flag)
/* wait for rbug to clear the blocked flag */
while (rb_pipe->draw_blocked & flag) {
rb_pipe->draw_blocked |= flag;
-#ifdef PIPE_THREAD_HAVE_CONDVAR
pipe_condvar_wait(rb_pipe->draw_cond, rb_pipe->draw_mutex);
-#else
- pipe_mutex_unlock(rb_pipe->draw_mutex);
-#ifdef PIPE_SUBSYSTEM_WINDOWS_USER
- Sleep(1);
-#endif
- pipe_mutex_lock(rb_pipe->draw_mutex);
-#endif
}
}
diff --git a/src/gallium/drivers/rbug/rbug_core.c b/src/gallium/drivers/rbug/rbug_core.c
index f1aab3869b..9dc663b079 100644
--- a/src/gallium/drivers/rbug/rbug_core.c
+++ b/src/gallium/drivers/rbug/rbug_core.c
@@ -407,9 +407,7 @@ rbug_context_draw_step(struct rbug_rbug *tr_rbug, struct rbug_header *header, ui
}
pipe_mutex_unlock(rb_context->draw_mutex);
-#ifdef PIPE_THREAD_HAVE_CONDVAR
pipe_condvar_broadcast(rb_context->draw_cond);
-#endif
pipe_mutex_unlock(rb_screen->list_mutex);
@@ -442,9 +440,7 @@ rbug_context_draw_unblock(struct rbug_rbug *tr_rbug, struct rbug_header *header,
rb_context->draw_blocker &= ~unblock->unblock;
pipe_mutex_unlock(rb_context->draw_mutex);
-#ifdef PIPE_THREAD_HAVE_CONDVAR
pipe_condvar_broadcast(rb_context->draw_cond);
-#endif
pipe_mutex_unlock(rb_screen->list_mutex);
@@ -476,9 +472,7 @@ rbug_context_draw_rule(struct rbug_rbug *tr_rbug, struct rbug_header *header, ui
rb_context->draw_blocker |= RBUG_BLOCK_RULE;
pipe_mutex_unlock(rb_context->draw_mutex);
-#ifdef PIPE_THREAD_HAVE_CONDVAR
pipe_condvar_broadcast(rb_context->draw_cond);
-#endif
pipe_mutex_unlock(rb_screen->list_mutex);
diff --git a/src/gallium/targets/Makefile.xorg b/src/gallium/targets/Makefile.xorg
index 4237f944e0..c2d0064978 100644
--- a/src/gallium/targets/Makefile.xorg
+++ b/src/gallium/targets/Makefile.xorg
@@ -9,7 +9,8 @@
# Optional defines:
# DRIVER_INCLUDES are appended to the list of includes directories.
# DRIVER_DEFINES is not used for makedepend, but for compilation.
-# DRIVER_LINKS are flags given to the linker
+# DRIVER_PIPES are pipe drivers and modules that the driver depends on.
+# DRIVER_LINKS are flags given to the linker.
### Basic defines ###
@@ -27,13 +28,21 @@ INCLUDES = \
LIBNAME_STAGING = $(TOP)/$(LIB_DIR)/gallium/$(TARGET)
+ifeq ($(MESA_LLVM),1)
+LD = g++
+LDFLAGS += $(LLVM_LDFLAGS)
+USE_CXX=1
+DRIVER_PIPES += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a
+DRIVER_LINKS += $(LLVM_LIBS) -lm -ldl
+endif
+
##### TARGETS #####
default: depend $(TOP)/$(LIB_DIR)/gallium $(LIBNAME) $(LIBNAME_STAGING)
-$(LIBNAME): $(OBJECTS) Makefile $(LIBS)
- $(MKLIB) -noprefix -o $@ $(OBJECTS) $(DRIVER_LINKS)
+$(LIBNAME): $(OBJECTS) Makefile ../Makefile.xorg $(LIBS) $(DRIVER_PIPES)
+ $(MKLIB) -noprefix -o $@ $(OBJECTS) $(DRIVER_PIPES) $(GALLIUM_AUXILIARIES) $(DRIVER_LINKS)
depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS) $(GENERATED_SOURCES)
rm -f depend
diff --git a/src/gallium/targets/dri-i965/SConscript b/src/gallium/targets/dri-i965/SConscript
index 7eb3c436a3..684e3488f7 100644
--- a/src/gallium/targets/dri-i965/SConscript
+++ b/src/gallium/targets/dri-i965/SConscript
@@ -17,7 +17,6 @@ env.Append(CPPDEFINES = [
env.Prepend(LIBS = [
st_dri,
i965drm,
- ws_drm,
ws_wrapper,
i965,
trace,
diff --git a/src/gallium/targets/libgl-xlib/Makefile b/src/gallium/targets/libgl-xlib/Makefile
index b173ceb994..e745023ba5 100644
--- a/src/gallium/targets/libgl-xlib/Makefile
+++ b/src/gallium/targets/libgl-xlib/Makefile
@@ -97,7 +97,7 @@ tags:
etags `find . -name \*.[ch]` $(TOP)/include/GL/*.h
clean:
- -rm -f *.o
+ -rm -f *.o depend
include depend
diff --git a/src/gallium/targets/xorg-i915/Makefile b/src/gallium/targets/xorg-i915/Makefile
index 45b0622ca9..865240404c 100644
--- a/src/gallium/targets/xorg-i915/Makefile
+++ b/src/gallium/targets/xorg-i915/Makefile
@@ -10,15 +10,15 @@ C_SOURCES = \
DRIVER_DEFINES = \
-DHAVE_CONFIG_H -DGALLIUM_RBUG -DGALLIUM_TRACE -DGALLIUM_GALAHAD
-DRIVER_LINKS = \
+DRIVER_PIPES = \
$(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \
$(TOP)/src/gallium/winsys/i915/drm/libi915drm.a \
$(TOP)/src/gallium/drivers/i915/libi915.a \
$(TOP)/src/gallium/drivers/galahad/libgalahad.a \
$(TOP)/src/gallium/drivers/trace/libtrace.a \
- $(TOP)/src/gallium/drivers/rbug/librbug.a \
- $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
- $(GALLIUM_AUXILIARIES) \
+ $(TOP)/src/gallium/drivers/rbug/librbug.a
+
+DRIVER_LINKS = \
$(shell pkg-config --libs libdrm libdrm_intel)
include ../Makefile.xorg
diff --git a/src/gallium/targets/xorg-i965/Makefile b/src/gallium/targets/xorg-i965/Makefile
index 9bb8252be2..494dce41c8 100644
--- a/src/gallium/targets/xorg-i965/Makefile
+++ b/src/gallium/targets/xorg-i965/Makefile
@@ -11,15 +11,16 @@ DRIVER_DEFINES = \
-DHAVE_CONFIG_H -DGALLIUM_SOFTPIPE \
-DGALLIUM_RBUG -DGALLIUM_TRACE
-DRIVER_LINKS = \
+DRIVER_PIPES = \
$(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \
$(TOP)/src/gallium/winsys/i965/drm/libi965drm.a \
$(TOP)/src/gallium/drivers/i965/libi965.a \
$(TOP)/src/gallium/drivers/trace/libtrace.a \
$(TOP)/src/gallium/drivers/rbug/librbug.a \
$(TOP)/src/gallium/winsys/sw/wrapper/libwsw.a \
- $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
- $(GALLIUM_AUXILIARIES) \
+ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a
+
+DRIVER_LINKS = \
$(shell pkg-config --libs libdrm libdrm_intel)
include ../Makefile.xorg
diff --git a/src/gallium/targets/xorg-nouveau/Makefile b/src/gallium/targets/xorg-nouveau/Makefile
index 93f53e63bf..2fcd9ffb7d 100644
--- a/src/gallium/targets/xorg-nouveau/Makefile
+++ b/src/gallium/targets/xorg-nouveau/Makefile
@@ -10,15 +10,16 @@ C_SOURCES = \
DRIVER_DEFINES = \
-DHAVE_CONFIG_H -DGALLIUM_RBUG -DGALLIUM_TRACE
-DRIVER_LINKS = \
+DRIVER_PIPES = \
$(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \
$(TOP)/src/gallium/winsys/nouveau/drm/libnouveaudrm.a \
- $(TOP)/src/gallium/drivers/trace/libtrace.a \
- $(TOP)/src/gallium/drivers/rbug/librbug.a \
$(TOP)/src/gallium/drivers/nvfx/libnvfx.a \
$(TOP)/src/gallium/drivers/nv50/libnv50.a \
$(TOP)/src/gallium/drivers/nouveau/libnouveau.a \
- $(GALLIUM_AUXILIARIES) \
+ $(TOP)/src/gallium/drivers/trace/libtrace.a \
+ $(TOP)/src/gallium/drivers/rbug/librbug.a
+
+DRIVER_LINKS = \
$(shell pkg-config --libs libdrm libdrm_nouveau)
include ../Makefile.xorg
diff --git a/src/gallium/targets/xorg-radeon/Makefile b/src/gallium/targets/xorg-radeon/Makefile
index 7def3a2261..d3bc356992 100644
--- a/src/gallium/targets/xorg-radeon/Makefile
+++ b/src/gallium/targets/xorg-radeon/Makefile
@@ -10,15 +10,15 @@ C_SOURCES = \
DRIVER_DEFINES = \
-DHAVE_CONFIG_H -DGALLIUM_RBUG -DGALLIUM_TRACE -DGALLIUM_GALAHAD
+DRIVER_PIPES = \
+ $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \
+ $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
+ $(TOP)/src/gallium/drivers/r300/libr300.a \
+ $(TOP)/src/gallium/drivers/galahad/libgalahad.a \
+ $(TOP)/src/gallium/drivers/trace/libtrace.a \
+ $(TOP)/src/gallium/drivers/rbug/librbug.a
+
DRIVER_LINKS = \
- $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \
- $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
- $(TOP)/src/gallium/drivers/r300/libr300.a \
- $(TOP)/src/gallium/drivers/galahad/libgalahad.a \
- $(TOP)/src/gallium/drivers/trace/libtrace.a \
- $(TOP)/src/gallium/drivers/rbug/librbug.a \
- $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
- $(GALLIUM_AUXILIARIES) \
$(shell pkg-config --libs libdrm libdrm_radeon)
include ../Makefile.xorg
diff --git a/src/gallium/targets/xorg-vmwgfx/Makefile b/src/gallium/targets/xorg-vmwgfx/Makefile
index 73a2cea232..04a444f5e9 100644
--- a/src/gallium/targets/xorg-vmwgfx/Makefile
+++ b/src/gallium/targets/xorg-vmwgfx/Makefile
@@ -20,15 +20,14 @@ DRIVER_DEFINES = \
-DGALLIUM_TRACE \
-DHAVE_CONFIG_H
-
-DRIVER_LINKS = \
+DRIVER_PIPES = \
$(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \
$(TOP)/src/gallium/winsys/svga/drm/libsvgadrm.a \
- $(TOP)/src/gallium/drivers/trace/libtrace.a \
- $(TOP)/src/gallium/drivers/rbug/librbug.a \
$(TOP)/src/gallium/drivers/svga/libsvga.a \
- $(GALLIUM_AUXILIARIES) \
- $(shell pkg-config --libs --silence-errors libkms) \
- $(shell pkg-config --libs libdrm)
+ $(TOP)/src/gallium/drivers/trace/libtrace.a \
+ $(TOP)/src/gallium/drivers/rbug/librbug.a
+
+DRIVER_LINKS = \
+ $(shell pkg-config --libs libdrm libkms)
include ../Makefile.xorg
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c
index cb4ec32fea..c5f133e7b2 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c
@@ -361,8 +361,8 @@ void radeon_drm_bufmgr_write_reloc(struct pb_buffer *_buf,
retval = radeon_cs_write_reloc(buf->mgr->rws->cs,
buf->bo, gem_rd, gem_wd, flags);
if (retval) {
- debug_printf("radeon: Relocation of %p (%d, %d, %d) failed!\n",
- buf, gem_rd, gem_wd, flags);
+ fprintf(stderr, "radeon: Relocation of %p (%d, %d, %d) failed!\n",
+ buf, gem_rd, gem_wd, flags);
}
}
diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c
index 51cfc0fd40..af35497fd7 100644
--- a/src/gallium/winsys/radeon/drm/radeon_r300.c
+++ b/src/gallium/winsys/radeon/drm/radeon_r300.c
@@ -252,8 +252,13 @@ static void radeon_flush_cs(struct r300_winsys_screen *rws)
/* Emit the CS. */
retval = radeon_cs_emit(ws->cs);
if (retval) {
- debug_printf("radeon: Bad CS, dumping...\n");
- radeon_cs_print(ws->cs, stderr);
+ if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) {
+ fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
+ radeon_cs_print(ws->cs, stderr);
+ } else {
+ fprintf(stderr, "radeon: The kernel rejected CS, "
+ "see dmesg for more information.\n");
+ }
}
/* Reset CS.
diff --git a/src/mapi/glapi/gen/gl_enums.py b/src/mapi/glapi/gen/gl_enums.py
index 644e085522..0caa01030f 100644
--- a/src/mapi/glapi/gen/gl_enums.py
+++ b/src/mapi/glapi/gen/gl_enums.py
@@ -105,7 +105,8 @@ const char *_mesa_lookup_enum_by_nr( int nr )
}
else {
/* this is not re-entrant safe, no big deal here */
- sprintf(token_tmp, "0x%x", nr);
+ _mesa_snprintf(token_tmp, sizeof(token_tmp) - 1, "0x%x", nr);
+ token_tmp[sizeof(token_tmp) - 1] = '\\0';
return token_tmp;
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index bd8d63246a..d347b4df9c 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -146,7 +146,8 @@ static unsigned long t_src(struct r300_vertex_program_code *vp,
t_swizzle(GET_SWZ(src->Swizzle, 2)),
t_swizzle(GET_SWZ(src->Swizzle, 3)),
t_src_class(src->File),
- src->Negate) | (src->RelAddr << 4);
+ src->Negate) |
+ (src->RelAddr << 4) | (src->Abs << 3);
}
static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
@@ -162,7 +163,7 @@ static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
t_swizzle(GET_SWZ(src->Swizzle, 0)),
t_src_class(src->File),
src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
- (src->RelAddr << 4);
+ (src->RelAddr << 4) | (src->Abs << 3);
}
static int valid_dst(struct r300_vertex_program_code *vp,
@@ -487,6 +488,44 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
}
}
+/**
+ * R3xx-R4xx vertex engine does not support the Absolute source operand modifier
+ * and the Saturate opcode modifier. Only Absolute is currently transformed.
+ */
+static int transform_nonnative_modifiers(
+ struct radeon_compiler *c,
+ struct rc_instruction *inst,
+ void* unused)
+{
+ const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned i;
+
+ /* Transform ABS(a) to MAX(a, -a). */
+ for (i = 0; i < opcode->NumSrcRegs; i++) {
+ if (inst->U.I.SrcReg[i].Abs) {
+ struct rc_instruction *new_inst;
+ unsigned temp;
+
+ inst->U.I.SrcReg[i].Abs = 0;
+
+ temp = rc_find_free_temporary(c);
+
+ new_inst = rc_insert_new_instruction(c, inst->Prev);
+ new_inst->U.I.Opcode = RC_OPCODE_MAX;
+ new_inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ new_inst->U.I.DstReg.Index = temp;
+ new_inst->U.I.SrcReg[0] = inst->U.I.SrcReg[i];
+ new_inst->U.I.SrcReg[1] = inst->U.I.SrcReg[i];
+ new_inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+
+ memset(&inst->U.I.SrcReg[i], 0, sizeof(inst->U.I.SrcReg[i]));
+ inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[i].Index = temp;
+ inst->U.I.SrcReg[i].Swizzle = RC_SWIZZLE_XYZW;
+ }
+ }
+ return 1;
+}
/**
* Vertex engine cannot read two inputs or two constants at the same time.
@@ -619,15 +658,33 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
debug_program_log(compiler, "after emulate branches");
- {
+ if (compiler->Base.is_r500) {
struct radeon_program_transformation transformations[] = {
{ &r300_transform_vertex_alu, 0 },
{ &r300_transform_trig_scale_vertex, 0 }
};
radeonLocalTransform(&compiler->Base, 2, transformations);
- }
- debug_program_log(compiler, "after native rewrite");
+ debug_program_log(compiler, "after native rewrite");
+ } else {
+ struct radeon_program_transformation transformations[] = {
+ { &r300_transform_vertex_alu, 0 },
+ { &radeonTransformTrigSimple, 0 }
+ };
+ radeonLocalTransform(&compiler->Base, 2, transformations);
+
+ debug_program_log(compiler, "after native rewrite");
+
+ /* Note: This pass has to be done seperately from ALU rewrite,
+ * because it needs to check every instruction.
+ */
+ struct radeon_program_transformation transformations2[] = {
+ { &transform_nonnative_modifiers, 0 },
+ };
+ radeonLocalTransform(&compiler->Base, 1, transformations2);
+
+ debug_program_log(compiler, "after emulate modifiers");
+ }
{
/* Note: This pass has to be done seperately from ALU rewrite,
diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c
index 282c0e18bc..5ae9f49840 100644
--- a/src/mesa/drivers/dri/r300/r300_draw.c
+++ b/src/mesa/drivers/dri/r300/r300_draw.c
@@ -523,8 +523,7 @@ static void r300AllocDmaRegions(GLcontext *ctx, const struct gl_client_array *in
r300ConvertAttrib(ctx, count, input[i], &vbuf->attribs[index]);
} else {
if (input[i]->BufferObj->Name) {
- if (stride % 4 != 0) {
- assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0);
+ if (stride % 4 != 0 || (intptr_t)input[i]->Ptr % 4 != 0) {
r300AlignDataToDword(ctx, input[i], count, &vbuf->attribs[index]);
vbuf->attribs[index].is_named_bo = GL_FALSE;
} else {
diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c
index afe2d55dc7..8013553f67 100644
--- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c
+++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c
@@ -46,7 +46,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r600_context.h"
#include "radeon_reg.h"
#include "r600_cmdbuf.h"
-#include "r600_emit.h"
#include "radeon_bocs_wrapper.h"
#include "radeon_reg.h"
diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.h b/src/mesa/drivers/dri/r600/r600_cmdbuf.h
index dff0009699..78fccd0b60 100644
--- a/src/mesa/drivers/dri/r600/r600_cmdbuf.h
+++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.h
@@ -37,7 +37,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define __R600_CMDBUF_H__
#include "r600_context.h"
-#include "r600_emit.h"
#define RADEON_CP_PACKET3_NOP 0xC0001000
#define RADEON_CP_PACKET3_NEXT_CHAR 0xC0001900
diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c
index f4aed4e87f..84d9d42312 100644
--- a/src/mesa/drivers/dri/r600/r600_context.c
+++ b/src/mesa/drivers/dri/r600/r600_context.c
@@ -59,7 +59,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_buffer_objects.h"
#include "radeon_span.h"
#include "r600_cmdbuf.h"
-#include "r600_emit.h"
#include "radeon_bocs_wrapper.h"
#include "radeon_queryobj.h"
#include "r600_blit.h"
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c
index aab1a7947a..bf17a977ce 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.c
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.c
@@ -38,6 +38,7 @@
#include "r600_context.h"
#include "r600_cmdbuf.h"
+#include "r600_emit.h"
#include "r700_fragprog.h"
diff --git a/src/mesa/drivers/dri/r600/r700_oglprog.h b/src/mesa/drivers/dri/r600/r700_oglprog.h
index fe2e9d1974..4d42133867 100644
--- a/src/mesa/drivers/dri/r600/r700_oglprog.h
+++ b/src/mesa/drivers/dri/r600/r700_oglprog.h
@@ -27,7 +27,7 @@
#ifndef _R700_OGLPROG_H_
#define _R700_OGLPROG_H_
-#include "r600_context.h"
+#include "main/dd.h"
extern void r700InitShaderFuncs(struct dd_function_table *functions);
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c
index 32f538f1c3..137f3007ce 100644
--- a/src/mesa/drivers/dri/r600/r700_vertprog.c
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.c
@@ -42,6 +42,7 @@
#include "radeon_debug.h"
#include "r600_context.h"
#include "r600_cmdbuf.h"
+#include "r600_emit.h"
#include "program/programopt.h"
#include "r700_debug.h"
diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
index 6cd1d87de2..c877e6c176 100644
--- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
+++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
@@ -602,17 +602,17 @@ int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *t
__FUNCTION__, texObj ,t->minLod, t->maxLod);
radeon_mipmap_tree *dst_miptree;
- dst_miptree = get_biggest_matching_miptree(t, t->minLod, t->maxLod);
+ dst_miptree = get_biggest_matching_miptree(t, t->base.BaseLevel, t->base.MaxLevel);
+ radeon_miptree_unreference(&t->mt);
if (!dst_miptree) {
- radeon_miptree_unreference(&t->mt);
radeon_try_alloc_miptree(rmesa, t);
- dst_miptree = t->mt;
radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
"%s: No matching miptree found, allocated new one %p\n",
__FUNCTION__, t->mt);
} else {
+ radeon_miptree_reference(dst_miptree, &t->mt);
radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
"%s: Using miptree %p\n", __FUNCTION__, t->mt);
}
@@ -629,7 +629,7 @@ int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *t
"Checking image level %d, face %d, mt %p ... ",
level, face, img->mt);
- if (img->mt != dst_miptree) {
+ if (img->mt != t->mt) {
radeon_print(RADEON_TEXTURE, RADEON_TRACE,
"MIGRATING\n");
@@ -637,7 +637,7 @@ int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *t
if (src_bo && radeon_bo_is_referenced_by_cs(src_bo, rmesa->cmdbuf.cs)) {
radeon_firevertices(rmesa);
}
- migrate_image_to_miptree(dst_miptree, img, face, level);
+ migrate_image_to_miptree(t->mt, img, face, level);
} else
radeon_print(RADEON_TEXTURE, RADEON_TRACE, "OK\n");
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex_getimage.c b/src/mesa/drivers/dri/radeon/radeon_tex_getimage.c
index 3ababb1ef5..f878b48e5f 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tex_getimage.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tex_getimage.c
@@ -31,6 +31,7 @@
#include "radeon_common_context.h"
#include "radeon_texture.h"
+#include "radeon_mipmap_tree.h"
#include "main/texgetimage.h"
@@ -51,7 +52,15 @@ radeon_get_tex_image(GLcontext * ctx, GLenum target, GLint level,
__func__, ctx, texObj, image, compressed);
if (image->mt) {
+ radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
/* Map the texture image read-only */
+ if (radeon_bo_is_referenced_by_cs(image->mt->bo, rmesa->cmdbuf.cs)) {
+ radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
+ "%s: called for texture that is queued for GPU processing\n",
+ __func__);
+ radeon_firevertices(rmesa);
+ }
+
radeon_teximage_map(image, GL_FALSE);
} else {
/* Image hasn't been uploaded to a miptree yet */
diff --git a/src/mesa/main/enums.c b/src/mesa/main/enums.c
index 456d20603d..bc18e1b113 100644
--- a/src/mesa/main/enums.c
+++ b/src/mesa/main/enums.c
@@ -5648,7 +5648,8 @@ const char *_mesa_lookup_enum_by_nr( int nr )
}
else {
/* this is not re-entrant safe, no big deal here */
- sprintf(token_tmp, "0x%x", nr);
+ _mesa_snprintf(token_tmp, sizeof(token_tmp) - 1, "0x%x", nr);
+ token_tmp[sizeof(token_tmp) - 1] = '\0';
return token_tmp;
}
}