diff options
Diffstat (limited to 'src')
70 files changed, 1767 insertions, 1221 deletions
diff --git a/src/gallium/auxiliary/os/os_thread.h b/src/gallium/auxiliary/os/os_thread.h index 0238308d20..a084310d4f 100644 --- a/src/gallium/auxiliary/os/os_thread.h +++ b/src/gallium/auxiliary/os/os_thread.h @@ -45,7 +45,6 @@ #include <pthread.h> /* POSIX threads headers */ #include <stdio.h> /* for perror() */ -#define PIPE_THREAD_HAVE_CONDVAR /* pipe_thread */ @@ -168,19 +167,59 @@ typedef CRITICAL_SECTION pipe_mutex; #define pipe_mutex_unlock(mutex) \ LeaveCriticalSection(&mutex) +/* TODO: Need a macro to declare "I don't care about WinXP compatibilty" */ +#if 0 && defined (_WIN32_WINNT) && (_WIN32_WINNT >= 0x0600) +/* CONDITION_VARIABLE is only available on newer versions of Windows + * (Server 2008/Vista or later). + * http://msdn.microsoft.com/en-us/library/ms682052(VS.85).aspx + * + * pipe_condvar + */ +typedef CONDITION_VARIABLE pipe_condvar; + +#define pipe_static_condvar(cond) \ + /*static*/ pipe_condvar cond = CONDITION_VARIABLE_INIT + +#define pipe_condvar_init(cond) \ + InitializeConditionVariable(&(cond)) + +#define pipe_condvar_destroy(cond) \ + (void) cond /* nothing to do */ + +#define pipe_condvar_wait(cond, mutex) \ + SleepConditionVariableCS(&(cond), &(mutex), INFINITE) + +#define pipe_condvar_signal(cond) \ + WakeConditionVariable(&(cond)) + +#define pipe_condvar_broadcast(cond) \ + WakeAllConditionVariable(&(cond)) + +#else /* need compatibility with pre-Vista Win32 */ /* pipe_condvar (XXX FIX THIS) + * See http://www.cs.wustl.edu/~schmidt/win32-cv-1.html + * for potential pitfalls in implementation. */ -typedef unsigned pipe_condvar; +typedef DWORD pipe_condvar; + +#define pipe_static_condvar(cond) \ + /*static*/ pipe_condvar cond = 1 #define pipe_condvar_init(cond) \ - (void) cond + (void) (cond = 1) #define pipe_condvar_destroy(cond) \ (void) cond +/* Poor man's pthread_cond_wait(): + Just release the mutex and sleep for one millisecond. + The caller's while() loop does all the work. */ #define pipe_condvar_wait(cond, mutex) \ - (void) cond; (void) mutex + do { pipe_mutex_unlock(mutex); \ + Sleep(cond); \ + pipe_mutex_lock(mutex); \ + } while (0) #define pipe_condvar_signal(cond) \ (void) cond @@ -188,9 +227,12 @@ typedef unsigned pipe_condvar; #define pipe_condvar_broadcast(cond) \ (void) cond +#endif /* pre-Vista win32 */ #else +#include "os/os_time.h" + /** Dummy definitions */ typedef unsigned pipe_thread; @@ -214,7 +256,6 @@ static INLINE int pipe_thread_destroy( pipe_thread thread ) } typedef unsigned pipe_mutex; -typedef unsigned pipe_condvar; #define pipe_static_mutex(mutex) \ static pipe_mutex mutex = 0 @@ -231,17 +272,25 @@ typedef unsigned pipe_condvar; #define pipe_mutex_unlock(mutex) \ (void) mutex +typedef int64_t pipe_condvar; + #define pipe_static_condvar(condvar) \ - static unsigned condvar = 0 + static pipe_condvar condvar = 1000 #define pipe_condvar_init(condvar) \ - (void) condvar + (void) (condvar = 1000) #define pipe_condvar_destroy(condvar) \ (void) condvar +/* Poor man's pthread_cond_wait(): + Just release the mutex and sleep for one millisecond. + The caller's while() loop does all the work. */ #define pipe_condvar_wait(condvar, mutex) \ - (void) condvar + do { pipe_mutex_unlock(mutex); \ + os_time_sleep(condvar); \ + pipe_mutex_lock(mutex); \ + } while (0) #define pipe_condvar_signal(condvar) \ (void) condvar @@ -277,27 +326,7 @@ static INLINE void pipe_barrier_wait(pipe_barrier *barrier) } -#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) - -/* XXX FIX THIS */ -typedef unsigned pipe_barrier; - -static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count) -{ - /* XXX we could implement barriers with a mutex and condition var */ -} - -static INLINE void pipe_barrier_destroy(pipe_barrier *barrier) -{ -} - -static INLINE void pipe_barrier_wait(pipe_barrier *barrier) -{ - assert(0); -} - - -#else +#else /* If the OS doesn't have its own, implement barriers using a mutex and a condvar */ typedef struct { unsigned count; diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 85c5f36391..0d94aaae95 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -52,9 +52,8 @@ struct blitter_context_priv { - struct blitter_context blitter; + struct blitter_context base; - struct pipe_context *pipe; /**< pipe context */ struct pipe_resource *vbuf; /**< quad */ float vertices[4][2][4]; /**< {pos, color} or {pos, texcoord} */ @@ -102,8 +101,20 @@ struct blitter_context_priv /* Clip state. */ struct pipe_clip_state clip; + + /* Destination surface dimensions. */ + unsigned dst_width; + unsigned dst_height; }; +static void blitter_draw_rectangle(struct blitter_context *blitter, + unsigned x, unsigned y, + unsigned width, unsigned height, + float depth, + enum blitter_attrib_type type, + const float attrib[4]); + + struct blitter_context *util_blitter_create(struct pipe_context *pipe) { struct blitter_context_priv *ctx; @@ -118,19 +129,20 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) if (!ctx) return NULL; - ctx->pipe = pipe; + ctx->base.pipe = pipe; + ctx->base.draw_rectangle = blitter_draw_rectangle; /* init state objects for them to be considered invalid */ - ctx->blitter.saved_blend_state = INVALID_PTR; - ctx->blitter.saved_dsa_state = INVALID_PTR; - ctx->blitter.saved_rs_state = INVALID_PTR; - ctx->blitter.saved_fs = INVALID_PTR; - ctx->blitter.saved_vs = INVALID_PTR; - ctx->blitter.saved_velem_state = INVALID_PTR; - ctx->blitter.saved_fb_state.nr_cbufs = ~0; - ctx->blitter.saved_num_sampler_views = ~0; - ctx->blitter.saved_num_sampler_states = ~0; - ctx->blitter.saved_num_vertex_buffers = ~0; + ctx->base.saved_blend_state = INVALID_PTR; + ctx->base.saved_dsa_state = INVALID_PTR; + ctx->base.saved_rs_state = INVALID_PTR; + ctx->base.saved_fs = INVALID_PTR; + ctx->base.saved_vs = INVALID_PTR; + ctx->base.saved_velem_state = INVALID_PTR; + ctx->base.saved_fb_state.nr_cbufs = ~0; + ctx->base.saved_num_sampler_views = ~0; + ctx->base.saved_num_sampler_states = ~0; + ctx->base.saved_num_vertex_buffers = ~0; /* blend state objects */ memset(&blend, 0, sizeof(blend)); @@ -217,17 +229,17 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) ctx->vertices[i][0][3] = 1; /*v.w*/ /* create the vertex buffer */ - ctx->vbuf = pipe_buffer_create(ctx->pipe->screen, + ctx->vbuf = pipe_buffer_create(ctx->base.pipe->screen, PIPE_BIND_VERTEX_BUFFER, sizeof(ctx->vertices)); - return &ctx->blitter; + return &ctx->base; } void util_blitter_destroy(struct blitter_context *blitter) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = blitter->pipe; int i; pipe->delete_blend_state(pipe, ctx->blend_write_color); @@ -265,118 +277,117 @@ void util_blitter_destroy(struct blitter_context *blitter) static void blitter_check_saved_CSOs(struct blitter_context_priv *ctx) { /* make sure these CSOs have been saved */ - assert(ctx->blitter.saved_blend_state != INVALID_PTR && - ctx->blitter.saved_dsa_state != INVALID_PTR && - ctx->blitter.saved_rs_state != INVALID_PTR && - ctx->blitter.saved_fs != INVALID_PTR && - ctx->blitter.saved_vs != INVALID_PTR && - ctx->blitter.saved_velem_state != INVALID_PTR); + assert(ctx->base.saved_blend_state != INVALID_PTR && + ctx->base.saved_dsa_state != INVALID_PTR && + ctx->base.saved_rs_state != INVALID_PTR && + ctx->base.saved_fs != INVALID_PTR && + ctx->base.saved_vs != INVALID_PTR && + ctx->base.saved_velem_state != INVALID_PTR); } static void blitter_restore_CSOs(struct blitter_context_priv *ctx) { - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; unsigned i; /* restore the state objects which are always required to be saved */ - pipe->bind_blend_state(pipe, ctx->blitter.saved_blend_state); - pipe->bind_depth_stencil_alpha_state(pipe, ctx->blitter.saved_dsa_state); - pipe->bind_rasterizer_state(pipe, ctx->blitter.saved_rs_state); - pipe->bind_fs_state(pipe, ctx->blitter.saved_fs); - pipe->bind_vs_state(pipe, ctx->blitter.saved_vs); - pipe->bind_vertex_elements_state(pipe, ctx->blitter.saved_velem_state); + pipe->bind_blend_state(pipe, ctx->base.saved_blend_state); + pipe->bind_depth_stencil_alpha_state(pipe, ctx->base.saved_dsa_state); + pipe->bind_rasterizer_state(pipe, ctx->base.saved_rs_state); + pipe->bind_fs_state(pipe, ctx->base.saved_fs); + pipe->bind_vs_state(pipe, ctx->base.saved_vs); + pipe->bind_vertex_elements_state(pipe, ctx->base.saved_velem_state); - ctx->blitter.saved_blend_state = INVALID_PTR; - ctx->blitter.saved_dsa_state = INVALID_PTR; - ctx->blitter.saved_rs_state = INVALID_PTR; - ctx->blitter.saved_fs = INVALID_PTR; - ctx->blitter.saved_vs = INVALID_PTR; - ctx->blitter.saved_velem_state = INVALID_PTR; + ctx->base.saved_blend_state = INVALID_PTR; + ctx->base.saved_dsa_state = INVALID_PTR; + ctx->base.saved_rs_state = INVALID_PTR; + ctx->base.saved_fs = INVALID_PTR; + ctx->base.saved_vs = INVALID_PTR; + ctx->base.saved_velem_state = INVALID_PTR; - pipe->set_stencil_ref(pipe, &ctx->blitter.saved_stencil_ref); + pipe->set_stencil_ref(pipe, &ctx->base.saved_stencil_ref); - pipe->set_viewport_state(pipe, &ctx->blitter.saved_viewport); - pipe->set_clip_state(pipe, &ctx->blitter.saved_clip); + pipe->set_viewport_state(pipe, &ctx->base.saved_viewport); + pipe->set_clip_state(pipe, &ctx->base.saved_clip); /* restore the state objects which are required to be saved before copy/fill */ - if (ctx->blitter.saved_fb_state.nr_cbufs != ~0) { - pipe->set_framebuffer_state(pipe, &ctx->blitter.saved_fb_state); - util_assign_framebuffer_state(&ctx->blitter.saved_fb_state, NULL); - ctx->blitter.saved_fb_state.nr_cbufs = ~0; + if (ctx->base.saved_fb_state.nr_cbufs != ~0) { + pipe->set_framebuffer_state(pipe, &ctx->base.saved_fb_state); + util_assign_framebuffer_state(&ctx->base.saved_fb_state, NULL); + ctx->base.saved_fb_state.nr_cbufs = ~0; } - if (ctx->blitter.saved_num_sampler_states != ~0) { + if (ctx->base.saved_num_sampler_states != ~0) { pipe->bind_fragment_sampler_states(pipe, - ctx->blitter.saved_num_sampler_states, - ctx->blitter.saved_sampler_states); - ctx->blitter.saved_num_sampler_states = ~0; + ctx->base.saved_num_sampler_states, + ctx->base.saved_sampler_states); + ctx->base.saved_num_sampler_states = ~0; } - if (ctx->blitter.saved_num_sampler_views != ~0) { + if (ctx->base.saved_num_sampler_views != ~0) { pipe->set_fragment_sampler_views(pipe, - ctx->blitter.saved_num_sampler_views, - ctx->blitter.saved_sampler_views); + ctx->base.saved_num_sampler_views, + ctx->base.saved_sampler_views); - for (i = 0; i < ctx->blitter.saved_num_sampler_views; i++) - pipe_sampler_view_reference(&ctx->blitter.saved_sampler_views[i], + for (i = 0; i < ctx->base.saved_num_sampler_views; i++) + pipe_sampler_view_reference(&ctx->base.saved_sampler_views[i], NULL); - ctx->blitter.saved_num_sampler_views = ~0; + ctx->base.saved_num_sampler_views = ~0; } - if (ctx->blitter.saved_num_vertex_buffers != ~0) { + if (ctx->base.saved_num_vertex_buffers != ~0) { pipe->set_vertex_buffers(pipe, - ctx->blitter.saved_num_vertex_buffers, - ctx->blitter.saved_vertex_buffers); + ctx->base.saved_num_vertex_buffers, + ctx->base.saved_vertex_buffers); - for (i = 0; i < ctx->blitter.saved_num_vertex_buffers; i++) { - if (ctx->blitter.saved_vertex_buffers[i].buffer) { - pipe_resource_reference(&ctx->blitter.saved_vertex_buffers[i].buffer, + for (i = 0; i < ctx->base.saved_num_vertex_buffers; i++) { + if (ctx->base.saved_vertex_buffers[i].buffer) { + pipe_resource_reference(&ctx->base.saved_vertex_buffers[i].buffer, NULL); } } - ctx->blitter.saved_num_vertex_buffers = ~0; + ctx->base.saved_num_vertex_buffers = ~0; } } static void blitter_set_rectangle(struct blitter_context_priv *ctx, unsigned x1, unsigned y1, unsigned x2, unsigned y2, - unsigned width, unsigned height, float depth) { int i; /* set vertex positions */ - ctx->vertices[0][0][0] = (float)x1 / width * 2.0f - 1.0f; /*v0.x*/ - ctx->vertices[0][0][1] = (float)y1 / height * 2.0f - 1.0f; /*v0.y*/ + ctx->vertices[0][0][0] = (float)x1 / ctx->dst_width * 2.0f - 1.0f; /*v0.x*/ + ctx->vertices[0][0][1] = (float)y1 / ctx->dst_height * 2.0f - 1.0f; /*v0.y*/ - ctx->vertices[1][0][0] = (float)x2 / width * 2.0f - 1.0f; /*v1.x*/ - ctx->vertices[1][0][1] = (float)y1 / height * 2.0f - 1.0f; /*v1.y*/ + ctx->vertices[1][0][0] = (float)x2 / ctx->dst_width * 2.0f - 1.0f; /*v1.x*/ + ctx->vertices[1][0][1] = (float)y1 / ctx->dst_height * 2.0f - 1.0f; /*v1.y*/ - ctx->vertices[2][0][0] = (float)x2 / width * 2.0f - 1.0f; /*v2.x*/ - ctx->vertices[2][0][1] = (float)y2 / height * 2.0f - 1.0f; /*v2.y*/ + ctx->vertices[2][0][0] = (float)x2 / ctx->dst_width * 2.0f - 1.0f; /*v2.x*/ + ctx->vertices[2][0][1] = (float)y2 / ctx->dst_height * 2.0f - 1.0f; /*v2.y*/ - ctx->vertices[3][0][0] = (float)x1 / width * 2.0f - 1.0f; /*v3.x*/ - ctx->vertices[3][0][1] = (float)y2 / height * 2.0f - 1.0f; /*v3.y*/ + ctx->vertices[3][0][0] = (float)x1 / ctx->dst_width * 2.0f - 1.0f; /*v3.x*/ + ctx->vertices[3][0][1] = (float)y2 / ctx->dst_height * 2.0f - 1.0f; /*v3.y*/ for (i = 0; i < 4; i++) ctx->vertices[i][0][2] = depth; /*z*/ /* viewport */ - ctx->viewport.scale[0] = 0.5f * width; - ctx->viewport.scale[1] = 0.5f * height; + ctx->viewport.scale[0] = 0.5f * ctx->dst_width; + ctx->viewport.scale[1] = 0.5f * ctx->dst_height; ctx->viewport.scale[2] = 1.0f; ctx->viewport.scale[3] = 1.0f; - ctx->viewport.translate[0] = 0.5f * width; - ctx->viewport.translate[1] = 0.5f * height; + ctx->viewport.translate[0] = 0.5f * ctx->dst_width; + ctx->viewport.translate[1] = 0.5f * ctx->dst_height; ctx->viewport.translate[2] = 0.0f; ctx->viewport.translate[3] = 0.0f; - ctx->pipe->set_viewport_state(ctx->pipe, &ctx->viewport); + ctx->base.pipe->set_viewport_state(ctx->base.pipe, &ctx->viewport); /* clip */ - ctx->pipe->set_clip_state(ctx->pipe, &ctx->clip); + ctx->base.pipe->set_clip_state(ctx->base.pipe, &ctx->clip); } static void blitter_set_clear_color(struct blitter_context_priv *ctx, @@ -401,29 +412,45 @@ static void blitter_set_clear_color(struct blitter_context_priv *ctx, } } +static void get_normalized_texcoords(struct pipe_resource *src, + struct pipe_subresource subsrc, + unsigned x1, unsigned y1, + unsigned x2, unsigned y2, + float out[4]) +{ + out[0] = x1 / (float)u_minify(src->width0, subsrc.level); + out[1] = y1 / (float)u_minify(src->height0, subsrc.level); + out[2] = x2 / (float)u_minify(src->width0, subsrc.level); + out[3] = y2 / (float)u_minify(src->height0, subsrc.level); +} + +static void set_texcoords_in_vertices(const float coord[4], + float *out, unsigned stride) +{ + out[0] = coord[0]; /*t0.s*/ + out[1] = coord[1]; /*t0.t*/ + out += stride; + out[0] = coord[2]; /*t1.s*/ + out[1] = coord[1]; /*t1.t*/ + out += stride; + out[0] = coord[2]; /*t2.s*/ + out[1] = coord[3]; /*t2.t*/ + out += stride; + out[0] = coord[0]; /*t3.s*/ + out[1] = coord[3]; /*t3.t*/ +} + static void blitter_set_texcoords_2d(struct blitter_context_priv *ctx, struct pipe_resource *src, struct pipe_subresource subsrc, unsigned x1, unsigned y1, unsigned x2, unsigned y2) { - int i; - float s1 = x1 / (float)u_minify(src->width0, subsrc.level); - float t1 = y1 / (float)u_minify(src->height0, subsrc.level); - float s2 = x2 / (float)u_minify(src->width0, subsrc.level); - float t2 = y2 / (float)u_minify(src->height0, subsrc.level); - - ctx->vertices[0][1][0] = s1; /*t0.s*/ - ctx->vertices[0][1][1] = t1; /*t0.t*/ - - ctx->vertices[1][1][0] = s2; /*t1.s*/ - ctx->vertices[1][1][1] = t1; /*t1.t*/ - - ctx->vertices[2][1][0] = s2; /*t2.s*/ - ctx->vertices[2][1][1] = t2; /*t2.t*/ + unsigned i; + float coord[4]; - ctx->vertices[3][1][0] = s1; /*t3.s*/ - ctx->vertices[3][1][1] = t2; /*t3.t*/ + get_normalized_texcoords(src, subsrc, x1, y1, x2, y2, coord); + set_texcoords_in_vertices(coord, &ctx->vertices[0][1][0], 8); for (i = 0; i < 4; i++) { ctx->vertices[i][1][2] = 0; /*r*/ @@ -454,20 +481,11 @@ static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx, unsigned x2, unsigned y2) { int i; - float s1 = x1 / (float)u_minify(src->width0, subsrc.level); - float t1 = y1 / (float)u_minify(src->height0, subsrc.level); - float s2 = x2 / (float)u_minify(src->width0, subsrc.level); - float t2 = y2 / (float)u_minify(src->height0, subsrc.level); + float coord[4]; float st[4][2]; - st[0][0] = s1; - st[0][1] = t1; - st[1][0] = s2; - st[1][1] = t1; - st[2][0] = s2; - st[2][1] = t2; - st[3][0] = s1; - st[3][1] = t2; + get_normalized_texcoords(src, subsrc, x1, y1, x2, y2, coord); + set_texcoords_in_vertices(coord, &st[0][0], 2); util_map_texcoords2d_onto_cubemap(subsrc.face, /* pointer, stride in floats */ @@ -478,9 +496,16 @@ static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx, ctx->vertices[i][1][3] = 1; /*q*/ } +static void blitter_set_dst_dimensions(struct blitter_context_priv *ctx, + unsigned width, unsigned height) +{ + ctx->dst_width = width; + ctx->dst_height = height; +} + static void blitter_draw_quad(struct blitter_context_priv *ctx) { - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; /* write vertices and draw them */ pipe_buffer_write(pipe, ctx->vbuf, @@ -495,7 +520,7 @@ static INLINE void **blitter_get_sampler_state(struct blitter_context_priv *ctx, int miplevel) { - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; struct pipe_sampler_state *sampler_state = &ctx->template_sampler_state; assert(miplevel < PIPE_MAX_TEXTURE_LEVELS); @@ -518,7 +543,7 @@ void **blitter_get_sampler_state(struct blitter_context_priv *ctx, static INLINE void *blitter_get_fs_col(struct blitter_context_priv *ctx, unsigned num_cbufs) { - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; assert(num_cbufs <= PIPE_MAX_COLOR_BUFS); @@ -531,7 +556,7 @@ void *blitter_get_fs_col(struct blitter_context_priv *ctx, unsigned num_cbufs) /** Convert PIPE_TEXTURE_x to TGSI_TEXTURE_x */ static unsigned -pipe_tex_to_tgsi_tex(unsigned pipe_tex_target) +pipe_tex_to_tgsi_tex(enum pipe_texture_target pipe_tex_target) { switch (pipe_tex_target) { case PIPE_TEXTURE_1D: @@ -553,7 +578,7 @@ static INLINE void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx, unsigned tex_target) { - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; assert(tex_target < PIPE_MAX_TEXTURE_TYPES); @@ -572,7 +597,7 @@ static INLINE void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx, unsigned tex_target) { - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; assert(tex_target < PIPE_MAX_TEXTURE_TYPES); @@ -588,6 +613,31 @@ void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx, return ctx->fs_texfetch_depth[tex_target]; } +static void blitter_draw_rectangle(struct blitter_context *blitter, + unsigned x1, unsigned y1, + unsigned x2, unsigned y2, + float depth, + enum blitter_attrib_type type, + const float attrib[4]) +{ + struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; + + switch (type) { + case UTIL_BLITTER_ATTRIB_COLOR: + blitter_set_clear_color(ctx, attrib); + break; + + case UTIL_BLITTER_ATTRIB_TEXCOORD: + set_texcoords_in_vertices(attrib, &ctx->vertices[0][1][0], 8); + break; + + default:; + } + + blitter_set_rectangle(ctx, x1, y1, x2, y2, depth); + blitter_draw_quad(ctx); +} + void util_blitter_clear(struct blitter_context *blitter, unsigned width, unsigned height, unsigned num_cbufs, @@ -596,7 +646,7 @@ void util_blitter_clear(struct blitter_context *blitter, double depth, unsigned stencil) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; struct pipe_stencil_ref sr = { { 0 } }; assert(num_cbufs <= PIPE_MAX_COLOR_BUFS); @@ -630,9 +680,9 @@ void util_blitter_clear(struct blitter_context *blitter, pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, num_cbufs)); pipe->bind_vs_state(pipe, ctx->vs_col); - blitter_set_clear_color(ctx, rgba); - blitter_set_rectangle(ctx, 0, 0, width, height, width, height, depth); - blitter_draw_quad(ctx); + blitter_set_dst_dimensions(ctx, width, height); + blitter->draw_rectangle(blitter, 0, 0, width, height, depth, + UTIL_BLITTER_ATTRIB_COLOR, rgba); blitter_restore_CSOs(ctx); } @@ -654,7 +704,7 @@ void util_blitter_copy_region(struct blitter_context *blitter, boolean ignore_stencil) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; struct pipe_screen *screen = pipe->screen; struct pipe_surface *dstsurf; struct pipe_framebuffer_state fb_state; @@ -745,29 +795,45 @@ void util_blitter_copy_region(struct blitter_context *blitter, pipe->set_fragment_sampler_views(pipe, 1, &view); pipe->set_framebuffer_state(pipe, &fb_state); - /* Set texture coordinates. */ + blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height); + switch (src->target) { + /* Draw the quad with the draw_rectangle callback. */ case PIPE_TEXTURE_1D: case PIPE_TEXTURE_2D: - blitter_set_texcoords_2d(ctx, src, subsrc, - srcx, srcy, srcx+width, srcy+height); + { + /* Set texture coordinates. */ + float coord[4]; + get_normalized_texcoords(src, subsrc, srcx, srcy, + srcx+width, srcy+height, coord); + + /* Draw. */ + blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height, 0, + UTIL_BLITTER_ATTRIB_TEXCOORD, coord); + } break; + + /* Draw the quad with the generic codepath. */ case PIPE_TEXTURE_3D: - blitter_set_texcoords_3d(ctx, src, subsrc, srcz, - srcx, srcy, srcx+width, srcy+height); - break; case PIPE_TEXTURE_CUBE: - blitter_set_texcoords_cube(ctx, src, subsrc, - srcx, srcy, srcx+width, srcy+height); + /* Set texture coordinates. */ + if (src->target == PIPE_TEXTURE_3D) + blitter_set_texcoords_3d(ctx, src, subsrc, srcz, + srcx, srcy, srcx+width, srcy+height); + else + blitter_set_texcoords_cube(ctx, src, subsrc, + srcx, srcy, srcx+width, srcy+height); + + /* Draw. */ + blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, 0); + blitter_draw_quad(ctx); break; + default: assert(0); return; } - blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, - dstsurf->width, dstsurf->height, 0); - blitter_draw_quad(ctx); blitter_restore_CSOs(ctx); pipe_surface_reference(&dstsurf, NULL); @@ -782,7 +848,7 @@ void util_blitter_clear_render_target(struct blitter_context *blitter, unsigned width, unsigned height) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; struct pipe_framebuffer_state fb_state; assert(dstsurf->texture); @@ -809,9 +875,9 @@ void util_blitter_clear_render_target(struct blitter_context *blitter, fb_state.zsbuf = 0; pipe->set_framebuffer_state(pipe, &fb_state); - blitter_set_clear_color(ctx, rgba); - blitter_set_rectangle(ctx, 0, 0, width, height, dstsurf->width, dstsurf->height, 0); - blitter_draw_quad(ctx); + blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height); + blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height, 0, + UTIL_BLITTER_ATTRIB_COLOR, rgba); blitter_restore_CSOs(ctx); } @@ -825,7 +891,7 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, unsigned width, unsigned height) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; - struct pipe_context *pipe = ctx->pipe; + struct pipe_context *pipe = ctx->base.pipe; struct pipe_framebuffer_state fb_state; struct pipe_stencil_ref sr = { { 0 } }; @@ -869,7 +935,8 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, fb_state.zsbuf = dstsurf; pipe->set_framebuffer_state(pipe, &fb_state); - blitter_set_rectangle(ctx, 0, 0, width, height, dstsurf->width, dstsurf->height, depth); - blitter_draw_quad(ctx); + blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height); + blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height, depth, + UTIL_BLITTER_ATTRIB_NONE, NULL); blitter_restore_CSOs(ctx); } diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index f421ad5b93..ba3f92eca8 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -39,9 +39,48 @@ extern "C" { struct pipe_context; +enum blitter_attrib_type { + UTIL_BLITTER_ATTRIB_NONE, + UTIL_BLITTER_ATTRIB_COLOR, + UTIL_BLITTER_ATTRIB_TEXCOORD +}; + struct blitter_context { + /** + * Draw a rectangle. + * + * \param x1 An X coordinate of the top-left corner. + * \param y1 A Y coordinate of the top-left corner. + * \param x2 An X coordinate of the bottom-right corner. + * \param y2 A Y coordinate of the bottom-right corner. + * \param depth A depth which the rectangle is rendered at. + * + * \param type Semantics of the attributes "attrib". + * If type is UTIL_BLITTER_ATTRIB_NONE, ignore them. + * If type is UTIL_BLITTER_ATTRIB_COLOR, the attributes + * make up a constant RGBA color, and should go to the COLOR0 + * varying slot of a fragment shader. + * If type is UTIL_BLITTER_ATTRIB_TEXCOORD, {a1, a2} and + * {a3, a4} specify top-left and bottom-right texture + * coordinates of the rectangle, respectively, and should go + * to the GENERIC0 varying slot of a fragment shader. + * + * \param attrib See type. + * + * \note A driver may optionally override this callback to implement + * a specialized hardware path for drawing a rectangle, e.g. using + * a rectangular point sprite. + */ + void (*draw_rectangle)(struct blitter_context *blitter, + unsigned x1, unsigned y1, unsigned x2, unsigned y2, + float depth, + enum blitter_attrib_type type, + const float attrib[4]); + /* Private members, really. */ + struct pipe_context *pipe; /**< pipe context */ + void *saved_blend_state; /**< blend state */ void *saved_dsa_state; /**< depth stencil alpha state */ void *saved_velem_state; /**< vertex elements state */ @@ -73,6 +112,15 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe); */ void util_blitter_destroy(struct blitter_context *blitter); +/** + * Return the pipe context associated with a blitter context. + */ +static INLINE +struct pipe_context *util_blitter_get_pipe(struct blitter_context *blitter) +{ + return blitter->pipe; +} + /* * These CSOs must be saved before any of the following functions is called: * - blend state diff --git a/src/gallium/drivers/llvmpipe/.gitignore b/src/gallium/drivers/llvmpipe/.gitignore index 4e0d4c3fc0..6ebd2b8a63 100644 --- a/src/gallium/drivers/llvmpipe/.gitignore +++ b/src/gallium/drivers/llvmpipe/.gitignore @@ -3,4 +3,5 @@ lp_test_blend lp_test_conv lp_test_format lp_test_printf +lp_test_round lp_test_sincos diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c b/src/gallium/drivers/llvmpipe/lp_fence.c index 75d8d2b825..f9805e5d68 100644 --- a/src/gallium/drivers/llvmpipe/lp_fence.c +++ b/src/gallium/drivers/llvmpipe/lp_fence.c @@ -28,7 +28,6 @@ #include "pipe/p_screen.h" #include "util/u_memory.h" -#include "util/u_inlines.h" #include "lp_debug.h" #include "lp_fence.h" @@ -59,7 +58,7 @@ lp_fence_create(unsigned rank) /** Destroy a fence. Called when refcount hits zero. */ -static void +void lp_fence_destroy(struct lp_fence *fence) { pipe_mutex_destroy(fence->mutex); @@ -77,12 +76,10 @@ llvmpipe_fence_reference(struct pipe_screen *screen, struct pipe_fence_handle **ptr, struct pipe_fence_handle *fence) { - struct lp_fence *old = (struct lp_fence *) *ptr; + struct lp_fence **old = (struct lp_fence **) ptr; struct lp_fence *f = (struct lp_fence *) fence; - if (pipe_reference(&old->reference, &f->reference)) { - lp_fence_destroy(old); - } + lp_fence_reference(old, f); } diff --git a/src/gallium/drivers/llvmpipe/lp_fence.h b/src/gallium/drivers/llvmpipe/lp_fence.h index d9270f5784..13358fb99f 100644 --- a/src/gallium/drivers/llvmpipe/lp_fence.h +++ b/src/gallium/drivers/llvmpipe/lp_fence.h @@ -32,6 +32,7 @@ #include "os/os_thread.h" #include "pipe/p_state.h" +#include "util/u_inlines.h" struct pipe_screen; @@ -61,4 +62,21 @@ void llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen); +void +lp_fence_destroy(struct lp_fence *fence); + +static INLINE void +lp_fence_reference(struct lp_fence **ptr, + struct lp_fence *f) +{ + struct lp_fence *old = *ptr; + + if (pipe_reference(&old->reference, &f->reference)) { + lp_fence_destroy(old); + } + + *ptr = f; +} + + #endif /* LP_FENCE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index 0cd288bb73..845292f4ab 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -40,27 +40,19 @@ /** * \param flags bitmask of PIPE_FLUSH_x flags - * \param fence if non-null, returns pointer to a fench which can be waited on + * \param fence if non-null, returns pointer to a fence which can be waited on */ void llvmpipe_flush( struct pipe_context *pipe, - unsigned flags, + unsigned flags, struct pipe_fence_handle **fence ) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); draw_flush(llvmpipe->draw); - if (fence) { - /* if we're going to flush the setup/rasterization modules, emit - * a fence. - * XXX this (and the code below) may need fine tuning... - */ - *fence = lp_setup_fence( llvmpipe->setup ); - } - /* ask the setup module to flush */ - lp_setup_flush(llvmpipe->setup, flags); + lp_setup_flush(llvmpipe->setup, flags, fence); /* Enable to dump BMPs of the color/depth buffers each frame */ if (0) { diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 23aa34ddec..8e6dfb293d 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -103,10 +103,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) elem_types[LP_JIT_CTX_ALPHA_REF] = LLVMFloatType(); elem_types[LP_JIT_CTX_STENCIL_REF_FRONT] = LLVMInt32Type(); elem_types[LP_JIT_CTX_STENCIL_REF_BACK] = LLVMInt32Type(); - elem_types[LP_JIT_CTX_SCISSOR_XMIN] = LLVMFloatType(); - elem_types[LP_JIT_CTX_SCISSOR_YMIN] = LLVMFloatType(); - elem_types[LP_JIT_CTX_SCISSOR_XMAX] = LLVMFloatType(); - elem_types[LP_JIT_CTX_SCISSOR_YMAX] = LLVMFloatType(); elem_types[LP_JIT_CTX_BLEND_COLOR] = LLVMPointerType(LLVMInt8Type(), 0); elem_types[LP_JIT_CTX_TEXTURES] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); @@ -125,18 +121,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, stencil_ref_back, screen->target, context_type, LP_JIT_CTX_STENCIL_REF_BACK); - LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_xmin, - screen->target, context_type, - LP_JIT_CTX_SCISSOR_XMIN); - LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_ymin, - screen->target, context_type, - LP_JIT_CTX_SCISSOR_YMIN); - LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_xmax, - screen->target, context_type, - LP_JIT_CTX_SCISSOR_XMAX); - LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_ymax, - screen->target, context_type, - LP_JIT_CTX_SCISSOR_YMAX); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color, screen->target, context_type, LP_JIT_CTX_BLEND_COLOR); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 8d06e65725..c94189413a 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -89,9 +89,6 @@ struct lp_jit_context uint32_t stencil_ref_front, stencil_ref_back; - /** floats, not ints */ - float scissor_xmin, scissor_ymin, scissor_xmax, scissor_ymax; - /* FIXME: store (also?) in floats */ uint8_t *blend_color; @@ -108,10 +105,6 @@ enum { LP_JIT_CTX_ALPHA_REF, LP_JIT_CTX_STENCIL_REF_FRONT, LP_JIT_CTX_STENCIL_REF_BACK, - LP_JIT_CTX_SCISSOR_XMIN, - LP_JIT_CTX_SCISSOR_YMIN, - LP_JIT_CTX_SCISSOR_XMAX, - LP_JIT_CTX_SCISSOR_YMAX, LP_JIT_CTX_BLEND_COLOR, LP_JIT_CTX_TEXTURES, LP_JIT_CTX_COUNT @@ -130,18 +123,6 @@ enum { #define lp_jit_context_stencil_ref_back_value(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_STENCIL_REF_BACK, "stencil_ref_back") -#define lp_jit_context_scissor_xmin_value(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_XMIN, "scissor_xmin") - -#define lp_jit_context_scissor_ymin_value(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_YMIN, "scissor_ymin") - -#define lp_jit_context_scissor_xmax_value(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_XMAX, "scissor_xmax") - -#define lp_jit_context_scissor_ymax_value(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_YMAX, "scissor_ymax") - #define lp_jit_context_blend_color(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_BLEND_COLOR, "blend_color") @@ -160,12 +141,7 @@ typedef void const void *dady, uint8_t **color, void *depth, - const int32_t c1, - const int32_t c2, - const int32_t c3, - const int32_t *step1, - const int32_t *step2, - const int32_t *step3, + uint32_t mask, uint32_t *counter); diff --git a/src/gallium/drivers/llvmpipe/lp_memory.c b/src/gallium/drivers/llvmpipe/lp_memory.c index f2e41f3a71..61d16668eb 100644 --- a/src/gallium/drivers/llvmpipe/lp_memory.c +++ b/src/gallium/drivers/llvmpipe/lp_memory.c @@ -45,6 +45,12 @@ lp_get_dummy_tile(void) return lp_dummy_tile; } +uint8_t * +lp_get_dummy_tile_silent(void) +{ + return lp_dummy_tile; +} + boolean lp_is_dummy_tile(void *tile) diff --git a/src/gallium/drivers/llvmpipe/lp_memory.h b/src/gallium/drivers/llvmpipe/lp_memory.h index aca7970b46..1d0e5ebdb6 100644 --- a/src/gallium/drivers/llvmpipe/lp_memory.h +++ b/src/gallium/drivers/llvmpipe/lp_memory.h @@ -35,6 +35,8 @@ extern uint8_t * lp_get_dummy_tile(void); +uint8_t * +lp_get_dummy_tile_silent(void); extern boolean lp_is_dummy_tile(void *tile); diff --git a/src/gallium/drivers/llvmpipe/lp_perf.c b/src/gallium/drivers/llvmpipe/lp_perf.c index a316597675..083e7e30a5 100644 --- a/src/gallium/drivers/llvmpipe/lp_perf.c +++ b/src/gallium/drivers/llvmpipe/lp_perf.c @@ -46,10 +46,10 @@ lp_print_counters(void) { if (LP_DEBUG & DEBUG_COUNTERS) { unsigned total_64, total_16, total_4; - float p1, p2, p3; + float p1, p2, p3, p4; - debug_printf("llvmpipe: nr_triangles: %9u\n", lp_count.nr_tris); - debug_printf("llvmpipe: nr_culled_triangles: %9u\n", lp_count.nr_culled_tris); + debug_printf("llvmpipe: nr_triangles: %9u\n", lp_count.nr_tris); + debug_printf("llvmpipe: nr_culled_triangles: %9u\n", lp_count.nr_culled_tris); total_64 = (lp_count.nr_empty_64 + lp_count.nr_fully_covered_64 + @@ -58,10 +58,13 @@ lp_print_counters(void) p1 = 100.0 * (float) lp_count.nr_empty_64 / (float) total_64; p2 = 100.0 * (float) lp_count.nr_fully_covered_64 / (float) total_64; p3 = 100.0 * (float) lp_count.nr_partially_covered_64 / (float) total_64; + p4 = 100.0 * (float) lp_count.nr_shade_opaque_64 / (float) total_64; - debug_printf("llvmpipe: nr_empty_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_64, p1, total_64); - debug_printf("llvmpipe: nr_fully_covered_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_fully_covered_64, p2, total_64); - debug_printf("llvmpipe: nr_partially_covered_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_partially_covered_64, p3, total_64); + debug_printf("llvmpipe: nr_64x64: %9u\n", total_64); + debug_printf("llvmpipe: nr_fully_covered_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_64, p2, total_64); + debug_printf("llvmpipe: nr_shade_opaque_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_shade_opaque_64, p4, total_64); + debug_printf("llvmpipe: nr_partially_covered_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_64, p3, total_64); + debug_printf("llvmpipe: nr_empty_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_64, p1, total_64); total_16 = (lp_count.nr_empty_16 + lp_count.nr_fully_covered_16 + @@ -71,25 +74,27 @@ lp_print_counters(void) p2 = 100.0 * (float) lp_count.nr_fully_covered_16 / (float) total_16; p3 = 100.0 * (float) lp_count.nr_partially_covered_16 / (float) total_16; - debug_printf("llvmpipe: nr_empty_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_16, p1, total_16); - debug_printf("llvmpipe: nr_fully_covered_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_fully_covered_16, p2, total_16); - debug_printf("llvmpipe: nr_partially_covered_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_partially_covered_16, p3, total_16); + debug_printf("llvmpipe: nr_16x16: %9u\n", total_16); + debug_printf("llvmpipe: nr_fully_covered_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_16, p2, total_16); + debug_printf("llvmpipe: nr_partially_covered_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_16, p3, total_16); + debug_printf("llvmpipe: nr_empty_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_16, p1, total_16); total_4 = (lp_count.nr_empty_4 + lp_count.nr_non_empty_4); p1 = 100.0 * (float) lp_count.nr_empty_4 / (float) total_4; p2 = 100.0 * (float) lp_count.nr_non_empty_4 / (float) total_4; - debug_printf("llvmpipe: nr_empty_4x4: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_4, p1, total_4); - debug_printf("llvmpipe: nr_non_empty_4x4: %9u (%2.0f%% of %u)\n", lp_count.nr_non_empty_4, p2, total_4); + debug_printf("llvmpipe: nr_4x4: %9u\n", total_4); + debug_printf("llvmpipe: nr_empty_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_4, p1, total_4); + debug_printf("llvmpipe: nr_non_empty_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_non_empty_4, p2, total_4); - debug_printf("llvmpipe: nr_color_tile_clear: %9u\n", lp_count.nr_color_tile_clear); - debug_printf("llvmpipe: nr_color_tile_load: %9u\n", lp_count.nr_color_tile_load); - debug_printf("llvmpipe: nr_color_tile_store: %9u\n", lp_count.nr_color_tile_store); + debug_printf("llvmpipe: nr_color_tile_clear: %9u\n", lp_count.nr_color_tile_clear); + debug_printf("llvmpipe: nr_color_tile_load: %9u\n", lp_count.nr_color_tile_load); + debug_printf("llvmpipe: nr_color_tile_store: %9u\n", lp_count.nr_color_tile_store); - debug_printf("llvmpipe: nr_llvm_compiles: %u\n", lp_count.nr_llvm_compiles); - debug_printf("llvmpipe: total LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0); - debug_printf("llvmpipe: average LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0 / lp_count.nr_llvm_compiles); + debug_printf("llvmpipe: nr_llvm_compiles: %u\n", lp_count.nr_llvm_compiles); + debug_printf("llvmpipe: total LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0); + debug_printf("llvmpipe: average LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0 / lp_count.nr_llvm_compiles); } } diff --git a/src/gallium/drivers/llvmpipe/lp_perf.h b/src/gallium/drivers/llvmpipe/lp_perf.h index a9629dae3c..4774f64550 100644 --- a/src/gallium/drivers/llvmpipe/lp_perf.h +++ b/src/gallium/drivers/llvmpipe/lp_perf.h @@ -44,6 +44,7 @@ struct lp_counters unsigned nr_empty_64; unsigned nr_fully_covered_64; unsigned nr_partially_covered_64; + unsigned nr_shade_opaque_64; unsigned nr_empty_16; unsigned nr_fully_covered_16; unsigned nr_partially_covered_16; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 1a82dd5694..a023d2b668 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -28,6 +28,7 @@ #include <limits.h> #include "util/u_memory.h" #include "util/u_math.h" +#include "util/u_rect.h" #include "util/u_surface.h" #include "lp_scene_queue.h" @@ -136,7 +137,6 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task, struct lp_rasterizer *rast = task->rast; struct lp_scene *scene = rast->curr_scene; enum lp_texture_usage usage; - unsigned buf; LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); @@ -146,24 +146,8 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task, task->x = x; task->y = y; - if (scene->has_color_clear) - usage = LP_TEX_USAGE_WRITE_ALL; - else - usage = LP_TEX_USAGE_READ_WRITE; - - /* get pointers to color tile(s) */ - for (buf = 0; buf < rast->state.nr_cbufs; buf++) { - struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf]; - struct llvmpipe_resource *lpt; - assert(cbuf); - lpt = llvmpipe_resource(cbuf->texture); - task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt, - cbuf->face + cbuf->zslice, - cbuf->level, - usage, - x, y); - assert(task->color_tiles[buf]); - } + /* reset pointers to color tile(s) */ + memset(task->color_tiles, 0, sizeof(task->color_tiles)); /* get pointer to depth/stencil tile */ { @@ -222,7 +206,8 @@ lp_rast_clear_color(struct lp_rasterizer_task *task, clear_color[2] == clear_color[3]) { /* clear to grayscale value {x, x, x, x} */ for (i = 0; i < rast->state.nr_cbufs; i++) { - uint8_t *ptr = task->color_tiles[i]; + uint8_t *ptr = + lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL); memset(ptr, clear_color[0], TILE_SIZE * TILE_SIZE * 4); } } @@ -234,7 +219,8 @@ lp_rast_clear_color(struct lp_rasterizer_task *task, */ const unsigned chunk = TILE_SIZE / 4; for (i = 0; i < rast->state.nr_cbufs; i++) { - uint8_t *c = task->color_tiles[i]; + uint8_t *c = + lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL); unsigned j; for (j = 0; j < 4 * TILE_SIZE; j++) { @@ -378,8 +364,8 @@ lp_rast_load_color(struct lp_rasterizer_task *task, * This is a bin command which is stored in all bins. */ void -lp_rast_store_color( struct lp_rasterizer_task *task, - const union lp_rast_cmd_arg arg) +lp_rast_store_linear_color( struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { struct lp_rasterizer *rast = task->rast; struct lp_scene *scene = rast->curr_scene; @@ -397,21 +383,6 @@ lp_rast_store_color( struct lp_rasterizer_task *task, } -/** - * This is a bin command called during bin processing. - */ -void -lp_rast_set_state(struct lp_rasterizer_task *task, - const union lp_rast_cmd_arg arg) -{ - const struct lp_rast_state *state = arg.set_state; - - LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state); - - /* just set the current state pointer for this rasterizer */ - task->current_state = state; -} - /** * Run the shader on all blocks in a tile. This is used when a tile is @@ -423,8 +394,8 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { struct lp_rasterizer *rast = task->rast; - const struct lp_rast_state *state = task->current_state; const struct lp_rast_shader_inputs *inputs = arg.shade_tile; + const struct lp_rast_state *state = inputs->state; struct lp_fragment_shader_variant *variant = state->variant; const unsigned tile_x = task->x, tile_y = task->y; unsigned x, y; @@ -448,32 +419,56 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, /* run shader on 4x4 block */ variant->jit_function[RAST_WHOLE]( &state->jit_context, - tile_x + x, tile_y + y, - inputs->facing, - inputs->a0, - inputs->dadx, - inputs->dady, - color, - depth, - INT_MIN, INT_MIN, INT_MIN, - NULL, NULL, NULL, &task->vis_counter); + tile_x + x, tile_y + y, + inputs->facing, + inputs->a0, + inputs->dadx, + inputs->dady, + color, + depth, + 0xffff, + &task->vis_counter); } } } /** - * Compute shading for a 4x4 block of pixels. + * Run the shader on all blocks in a tile. This is used when a tile is + * completely contained inside a triangle, and the shader is opaque. + * This is a bin command called during bin processing. + */ +void +lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + struct lp_rasterizer *rast = task->rast; + unsigned i; + + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + + /* this will prevent converting the layout from tiled to linear */ + for (i = 0; i < rast->state.nr_cbufs; i++) { + (void)lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL); + } + + lp_rast_shade_tile(task, arg); +} + + +/** + * Compute shading for a 4x4 block of pixels inside a triangle. * This is a bin command called during bin processing. * \param x X position of quad in window coords * \param y Y position of quad in window coords */ -void lp_rast_shade_quads( struct lp_rasterizer_task *task, - const struct lp_rast_shader_inputs *inputs, - unsigned x, unsigned y, - int32_t c1, int32_t c2, int32_t c3) +void +lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, + const struct lp_rast_shader_inputs *inputs, + unsigned x, unsigned y, + unsigned mask) { - const struct lp_rast_state *state = task->current_state; + const struct lp_rast_state *state = inputs->state; struct lp_fragment_shader_variant *variant = state->variant; struct lp_rasterizer *rast = task->rast; uint8_t *color[PIPE_MAX_COLOR_BUFS]; @@ -501,27 +496,21 @@ void lp_rast_shade_quads( struct lp_rasterizer_task *task, assert(lp_check_alignment(state->jit_context.blend_color, 16)); - assert(lp_check_alignment(inputs->step[0], 16)); - assert(lp_check_alignment(inputs->step[1], 16)); - assert(lp_check_alignment(inputs->step[2], 16)); - /* run shader on 4x4 block */ - variant->jit_function[RAST_EDGE_TEST]( &state->jit_context, - x, y, - inputs->facing, - inputs->a0, - inputs->dadx, - inputs->dady, - color, - depth, - c1, c2, c3, - inputs->step[0], - inputs->step[1], - inputs->step[2], - &task->vis_counter); + variant->jit_function[RAST_EDGE_TEST](&state->jit_context, + x, y, + inputs->facing, + inputs->a0, + inputs->dadx, + inputs->dady, + color, + depth, + mask, + &task->vis_counter); } + /** * Set top row and left column of the tile's pixels to white. For debugging. */ @@ -717,10 +706,16 @@ static struct { { RAST(clear_color), RAST(clear_zstencil), - RAST(triangle), + RAST(triangle_1), + RAST(triangle_2), + RAST(triangle_3), + RAST(triangle_4), + RAST(triangle_5), + RAST(triangle_6), + RAST(triangle_7), RAST(shade_tile), - RAST(set_state), - RAST(store_color), + RAST(shade_tile_opaque), + RAST(store_linear_color), RAST(fence), RAST(begin_query), RAST(end_query), @@ -775,7 +770,7 @@ is_empty_bin( const struct cmd_bin *bin ) } for (i = 0; i < head->count; i++) - if (head->cmd[i] != lp_rast_set_state) { + if (head->cmd[i] != lp_rast_store_linear_color) { return FALSE; } @@ -815,6 +810,10 @@ rasterize_scene(struct lp_rasterizer_task *task, } } #endif + + if (scene->fence) { + lp_rast_fence(task, lp_rast_arg_fence(scene->fence)); + } } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 80ca68f5a2..0991344cce 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -84,8 +84,7 @@ struct lp_rast_shader_inputs { float (*dadx)[4]; float (*dady)[4]; - /* edge/step info for 3 edges and 4x4 block of pixels */ - PIPE_ALIGN_VAR(16) int step[3][16]; + const struct lp_rast_state *state; }; struct lp_rast_clearzs { @@ -93,6 +92,22 @@ struct lp_rast_clearzs { unsigned clearzs_mask; }; +struct lp_rast_plane { + /* one-pixel sized trivial accept offsets for each plane */ + int ei; + + /* one-pixel sized trivial reject offsets for each plane */ + int eo; + + /* edge function values at minx,miny ?? */ + int c; + + int dcdx; + int dcdy; + + /* edge/step info for 3 edges and 4x4 block of pixels */ + const int *step; +}; /** * Rasterization information for a triangle known to be in this bin, @@ -101,35 +116,16 @@ struct lp_rast_clearzs { * Objects of this type are put into the lp_setup_context::data buffer. */ struct lp_rast_triangle { + /* inputs for the shader */ + PIPE_ALIGN_VAR(16) struct lp_rast_shader_inputs inputs; + + int step[3][16]; + #ifdef DEBUG float v[3][2]; #endif - /* one-pixel sized trivial accept offsets for each plane */ - int ei1; - int ei2; - int ei3; - - /* one-pixel sized trivial reject offsets for each plane */ - int eo1; - int eo2; - int eo3; - - /* y deltas for vertex pairs (in fixed pt) */ - int dy12; - int dy23; - int dy31; - - /* x deltas for vertex pairs (in fixed pt) */ - int dx12; - int dx23; - int dx31; - - /* edge function values at minx,miny ?? */ - int c1, c2, c3; - - /* inputs for the shader */ - PIPE_ALIGN_VAR(16) struct lp_rast_shader_inputs inputs; + struct lp_rast_plane plane[7]; /* NOTE: may allocate fewer planes */ }; @@ -153,7 +149,10 @@ lp_rast_finish( struct lp_rasterizer *rast ); union lp_rast_cmd_arg { const struct lp_rast_shader_inputs *shade_tile; - const struct lp_rast_triangle *triangle; + struct { + const struct lp_rast_triangle *tri; + unsigned plane_mask; + } triangle; const struct lp_rast_state *set_state; uint8_t clear_color[4]; const struct lp_rast_clearzs *clear_zstencil; @@ -173,10 +172,12 @@ lp_rast_arg_inputs( const struct lp_rast_shader_inputs *shade_tile ) } static INLINE union lp_rast_cmd_arg -lp_rast_arg_triangle( const struct lp_rast_triangle *triangle ) +lp_rast_arg_triangle( const struct lp_rast_triangle *triangle, + unsigned plane_mask) { union lp_rast_cmd_arg arg; - arg.triangle = triangle; + arg.triangle.tri = triangle; + arg.triangle.plane_mask = plane_mask; return arg; } @@ -226,19 +227,31 @@ void lp_rast_clear_color( struct lp_rasterizer_task *, void lp_rast_clear_zstencil( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); -void lp_rast_set_state( struct lp_rasterizer_task *, - const union lp_rast_cmd_arg ); - -void lp_rast_triangle( struct lp_rasterizer_task *, - const union lp_rast_cmd_arg ); +void lp_rast_triangle_1( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_2( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_3( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_4( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_5( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_6( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_7( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); void lp_rast_shade_tile( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); +void lp_rast_shade_tile_opaque( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + void lp_rast_fence( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); -void lp_rast_store_color( struct lp_rasterizer_task *, +void lp_rast_store_linear_color( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index eb4175dfa6..8a884177c1 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -53,8 +53,6 @@ struct lp_rasterizer_task uint8_t *color_tiles[PIPE_MAX_COLOR_BUFS]; uint8_t *depth_tile; - const struct lp_rast_state *current_state; - /** "back" pointer */ struct lp_rasterizer *rast; @@ -119,10 +117,12 @@ struct lp_rasterizer }; -void lp_rast_shade_quads( struct lp_rasterizer_task *task, - const struct lp_rast_shader_inputs *inputs, - unsigned x, unsigned y, - int32_t c1, int32_t c2, int32_t c3); +void +lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, + const struct lp_rast_shader_inputs *inputs, + unsigned x, unsigned y, + unsigned mask); + /** @@ -142,10 +142,13 @@ lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task, assert((x % TILE_VECTOR_WIDTH) == 0); assert((y % TILE_VECTOR_HEIGHT) == 0); - if (!rast->zsbuf.map && (task->current_state->variant->key.depth.enabled || - task->current_state->variant->key.stencil[0].enabled)) { - /* out of memory - use dummy tile memory */ - return lp_get_dummy_tile(); + if (!rast->zsbuf.map) { + /* Either out of memory or no zsbuf. Can't tell without access + * to the state. Just use dummy tile memory, but don't print + * the oom warning as this most likely because there is no + * zsbuf. + */ + return lp_get_dummy_tile_silent(); } depth = (rast->zsbuf.map + @@ -158,6 +161,40 @@ lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task, /** + * Get pointer to the swizzled color tile + */ +static INLINE uint8_t * +lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task, + unsigned buf, enum lp_texture_usage usage) +{ + struct lp_rasterizer *rast = task->rast; + + assert(task->x % TILE_SIZE == 0); + assert(task->y % TILE_SIZE == 0); + assert(buf < rast->state.nr_cbufs); + + if (!task->color_tiles[buf]) { + struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf]; + struct llvmpipe_resource *lpt; + assert(cbuf); + lpt = llvmpipe_resource(cbuf->texture); + task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt, + cbuf->face + cbuf->zslice, + cbuf->level, + usage, + task->x, + task->y); + if (!task->color_tiles[buf]) { + /* out of memory - use dummy tile memory */ + return lp_get_dummy_tile(); + } + } + + return task->color_tiles[buf]; +} + + +/** * Get the pointer to a 4x4 color block (within a 64x64 tile). * We'll map the color buffer on demand here. * Note that this may be called even when there's no color buffers - return @@ -174,6 +211,7 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task, assert((x % TILE_VECTOR_WIDTH) == 0); assert((y % TILE_VECTOR_HEIGHT) == 0); + color = lp_rast_get_color_tile_pointer(task, buf, LP_TEX_USAGE_READ_WRITE); color = task->color_tiles[buf]; if (!color) { /* out of memory - use dummy tile memory */ @@ -203,7 +241,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, unsigned x, unsigned y ) { const struct lp_rasterizer *rast = task->rast; - const struct lp_rast_state *state = task->current_state; + const struct lp_rast_state *state = inputs->state; struct lp_fragment_shader_variant *variant = state->variant; uint8_t *color[PIPE_MAX_COLOR_BUFS]; void *depth; @@ -217,15 +255,15 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, /* run shader on 4x4 block */ variant->jit_function[RAST_WHOLE]( &state->jit_context, - x, y, - inputs->facing, - inputs->a0, - inputs->dadx, - inputs->dady, - color, - depth, - INT_MIN, INT_MIN, INT_MIN, - NULL, NULL, NULL, &task->vis_counter ); + x, y, + inputs->facing, + inputs->a0, + inputs->dadx, + inputs->dady, + color, + depth, + 0xffff, + &task->vis_counter ); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index a5f0d14c95..ebe9a8e92b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -113,168 +113,31 @@ block_full_16(struct lp_rasterizer_task *task, block_full_4(task, tri, x + ix, y + iy); } +#define TAG(x) x##_1 +#define NR_PLANES 1 +#include "lp_rast_tri_tmp.h" -/** - * Pass the 4x4 pixel block to the shader function. - * Determination of which of the 16 pixels lies inside the triangle - * will be done as part of the fragment shader. - */ -static void -do_block_4(struct lp_rasterizer_task *task, - const struct lp_rast_triangle *tri, - int x, int y, - int c1, int c2, int c3) -{ - assert(x >= 0); - assert(y >= 0); - - lp_rast_shade_quads(task, &tri->inputs, x, y, -c1, -c2, -c3); -} - - -/** - * Evaluate a 16x16 block of pixels to determine which 4x4 subblocks are in/out - * of the triangle's bounds. - */ -static void -do_block_16(struct lp_rasterizer_task *task, - const struct lp_rast_triangle *tri, - int x, int y, - int c0, int c1, int c2) -{ - unsigned mask = 0; - int eo[3]; - int c[3]; - int i, j; - - assert(x >= 0); - assert(y >= 0); - assert(x % 16 == 0); - assert(y % 16 == 0); - - eo[0] = tri->eo1 * 4; - eo[1] = tri->eo2 * 4; - eo[2] = tri->eo3 * 4; - - c[0] = c0; - c[1] = c1; - c[2] = c2; - - for (j = 0; j < 3; j++) { - const int *step = tri->inputs.step[j]; - const int cx = c[j] + eo[j]; - - /* Mask has bits set whenever we are outside any of the edges. - */ - for (i = 0; i < 16; i++) { - int out = cx + step[i] * 4; - mask |= (out >> 31) & (1 << i); - } - } +#define TAG(x) x##_2 +#define NR_PLANES 2 +#include "lp_rast_tri_tmp.h" - mask = ~mask & 0xffff; - while (mask) { - int i = ffs(mask) - 1; - int px = x + pos_table4[i][0]; - int py = y + pos_table4[i][1]; - int cx1 = c0 + tri->inputs.step[0][i] * 4; - int cx2 = c1 + tri->inputs.step[1][i] * 4; - int cx3 = c2 + tri->inputs.step[2][i] * 4; +#define TAG(x) x##_3 +#define NR_PLANES 3 +#include "lp_rast_tri_tmp.h" - mask &= ~(1 << i); +#define TAG(x) x##_4 +#define NR_PLANES 4 +#include "lp_rast_tri_tmp.h" - /* Don't bother testing if the 4x4 block is entirely in/out of - * the triangle. It's a little faster to do it in the jit code. - */ - LP_COUNT(nr_non_empty_4); - do_block_4(task, tri, px, py, cx1, cx2, cx3); - } -} - - -/** - * Scan the tile in chunks and figure out which pixels to rasterize - * for this triangle. - */ -void -lp_rast_triangle(struct lp_rasterizer_task *task, - const union lp_rast_cmd_arg arg) -{ - const struct lp_rast_triangle *tri = arg.triangle; - const int x = task->x, y = task->y; - int ei[3], eo[3], c[3]; - unsigned outmask, inmask, partial_mask; - unsigned i, j; - - c[0] = tri->c1 + tri->dx12 * y - tri->dy12 * x; - c[1] = tri->c2 + tri->dx23 * y - tri->dy23 * x; - c[2] = tri->c3 + tri->dx31 * y - tri->dy31 * x; - - eo[0] = tri->eo1 * 16; - eo[1] = tri->eo2 * 16; - eo[2] = tri->eo3 * 16; - - ei[0] = tri->ei1 * 16; - ei[1] = tri->ei2 * 16; - ei[2] = tri->ei3 * 16; - - outmask = 0; - inmask = 0xffff; +#define TAG(x) x##_5 +#define NR_PLANES 5 +#include "lp_rast_tri_tmp.h" - for (j = 0; j < 3; j++) { - const int *step = tri->inputs.step[j]; - const int cox = c[j] + eo[j]; - const int cio = ei[j]- eo[j]; +#define TAG(x) x##_6 +#define NR_PLANES 6 +#include "lp_rast_tri_tmp.h" - /* Outmask has bits set whenever we are outside any of the - * edges. - */ - /* Inmask has bits set whenever we are inside all of the edges. - */ - for (i = 0; i < 16; i++) { - int out = cox + step[i] * 16; - int in = out + cio; - outmask |= (out >> 31) & (1 << i); - inmask &= ~((in >> 31) & (1 << i)); - } - } +#define TAG(x) x##_7 +#define NR_PLANES 7 +#include "lp_rast_tri_tmp.h" - assert((outmask & inmask) == 0); - - if (outmask == 0xffff) - return; - - /* Invert mask, so that bits are set whenever we are at least - * partially inside all of the edges: - */ - partial_mask = ~inmask & ~outmask & 0xffff; - - /* Iterate over partials: - */ - while (partial_mask) { - int i = ffs(partial_mask) - 1; - int px = x + pos_table16[i][0]; - int py = y + pos_table16[i][1]; - int cx1 = c[0] + tri->inputs.step[0][i] * 16; - int cx2 = c[1] + tri->inputs.step[1][i] * 16; - int cx3 = c[2] + tri->inputs.step[2][i] * 16; - - partial_mask &= ~(1 << i); - - LP_COUNT(nr_partially_covered_16); - do_block_16(task, tri, px, py, cx1, cx2, cx3); - } - - /* Iterate over fulls: - */ - while (inmask) { - int i = ffs(inmask) - 1; - int px = x + pos_table16[i][0]; - int py = y + pos_table16[i][1]; - - inmask &= ~(1 << i); - - LP_COUNT(nr_fully_covered_16); - block_full_16(task, tri, px, py); - } -} diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h new file mode 100644 index 0000000000..a410c611a3 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -0,0 +1,238 @@ +/************************************************************************** + * + * Copyright 2007-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Rasterization for binned triangles within a tile + */ + + + +/** + * Prototype for a 7 plane rasterizer function. Will codegenerate + * several of these. + * + * XXX: Varients for more/fewer planes. + * XXX: Need ways of dropping planes as we descend. + * XXX: SIMD + */ +static void +TAG(do_block_4)(struct lp_rasterizer_task *task, + const struct lp_rast_triangle *tri, + const struct lp_rast_plane *plane, + int x, int y, + const int *c) +{ + unsigned mask = 0; + int i; + + for (i = 0; i < 16; i++) { + int any_negative = 0; + int j; + + for (j = 0; j < NR_PLANES; j++) + any_negative |= (c[j] - 1 + plane[j].step[i]); + + any_negative >>= 31; + + mask |= (~any_negative) & (1 << i); + } + + /* Now pass to the shader: + */ + if (mask) + lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask); +} + +/** + * Evaluate a 16x16 block of pixels to determine which 4x4 subblocks are in/out + * of the triangle's bounds. + */ +static void +TAG(do_block_16)(struct lp_rasterizer_task *task, + const struct lp_rast_triangle *tri, + const struct lp_rast_plane *plane, + int x, int y, + const int *c) +{ + unsigned outmask, inmask, partmask, partial_mask; + unsigned i, j; + + outmask = 0; /* outside one or more trivial reject planes */ + partmask = 0; /* outside one or more trivial accept planes */ + + for (j = 0; j < NR_PLANES; j++) { + const int *step = plane[j].step; + const int eo = plane[j].eo * 4; + const int ei = plane[j].ei * 4; + const int cox = c[j] + eo; + const int cio = ei - 1 - eo; + + for (i = 0; i < 16; i++) { + int out = cox + step[i] * 4; + int part = out + cio; + outmask |= (out >> 31) & (1 << i); + partmask |= (part >> 31) & (1 << i); + } + } + + if (outmask == 0xffff) + return; + + /* Mask of sub-blocks which are inside all trivial accept planes: + */ + inmask = ~partmask & 0xffff; + + /* Mask of sub-blocks which are inside all trivial reject planes, + * but outside at least one trivial accept plane: + */ + partial_mask = partmask & ~outmask; + + assert((partial_mask & inmask) == 0); + + /* Iterate over partials: + */ + while (partial_mask) { + int i = ffs(partial_mask) - 1; + int px = x + pos_table4[i][0]; + int py = y + pos_table4[i][1]; + int cx[NR_PLANES]; + + for (j = 0; j < NR_PLANES; j++) + cx[j] = c[j] + plane[j].step[i] * 4; + + partial_mask &= ~(1 << i); + + TAG(do_block_4)(task, tri, plane, px, py, cx); + } + + /* Iterate over fulls: + */ + while (inmask) { + int i = ffs(inmask) - 1; + int px = x + pos_table4[i][0]; + int py = y + pos_table4[i][1]; + + inmask &= ~(1 << i); + + block_full_4(task, tri, px, py); + } +} + + +/** + * Scan the tile in chunks and figure out which pixels to rasterize + * for this triangle. + */ +void +TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + const struct lp_rast_triangle *tri = arg.triangle.tri; + unsigned plane_mask = arg.triangle.plane_mask; + const int x = task->x, y = task->y; + struct lp_rast_plane plane[NR_PLANES]; + int c[NR_PLANES]; + unsigned outmask, inmask, partmask, partial_mask; + unsigned i, j, nr_planes = 0; + + while (plane_mask) { + int i = ffs(plane_mask) - 1; + plane[nr_planes] = tri->plane[i]; + plane_mask &= ~(1 << i); + nr_planes++; + }; + + assert(nr_planes == NR_PLANES); + outmask = 0; /* outside one or more trivial reject planes */ + partmask = 0; /* outside one or more trivial accept planes */ + + for (j = 0; j < NR_PLANES; j++) { + const int *step = plane[j].step; + const int eo = plane[j].eo * 16; + const int ei = plane[j].ei * 16; + int cox, cio; + + c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; + cox = c[j] + eo; + cio = ei - 1 - eo; + + for (i = 0; i < 16; i++) { + int out = cox + step[i] * 16; + int part = out + cio; + outmask |= (out >> 31) & (1 << i); + partmask |= (part >> 31) & (1 << i); + } + } + + if (outmask == 0xffff) + return; + + /* Mask of sub-blocks which are inside all trivial accept planes: + */ + inmask = ~partmask & 0xffff; + + /* Mask of sub-blocks which are inside all trivial reject planes, + * but outside at least one trivial accept plane: + */ + partial_mask = partmask & ~outmask; + + assert((partial_mask & inmask) == 0); + + /* Iterate over partials: + */ + while (partial_mask) { + int i = ffs(partial_mask) - 1; + int px = x + pos_table16[i][0]; + int py = y + pos_table16[i][1]; + int cx[NR_PLANES]; + + for (j = 0; j < NR_PLANES; j++) + cx[j] = c[j] + plane[j].step[i] * 16; + + partial_mask &= ~(1 << i); + + LP_COUNT(nr_partially_covered_16); + TAG(do_block_16)(task, tri, plane, px, py, cx); + } + + /* Iterate over fulls: + */ + while (inmask) { + int i = ffs(inmask) - 1; + int px = x + pos_table16[i][0]; + int py = y + pos_table16[i][1]; + + inmask &= ~(1 << i); + + LP_COUNT(nr_fully_covered_16); + block_full_16(task, tri, px, py); + } +} + +#undef TAG +#undef NR_PLANES + diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index e8d36bbdc5..f88a759fe7 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -32,6 +32,7 @@ #include "util/u_simple_list.h" #include "lp_scene.h" #include "lp_scene_queue.h" +#include "lp_fence.h" /** List of texture references */ @@ -198,6 +199,8 @@ lp_scene_reset(struct lp_scene *scene ) make_empty_list(ref_list); } + lp_fence_reference(&scene->fence, NULL); + scene->scene_size = 0; scene->has_color_clear = FALSE; @@ -303,60 +306,6 @@ lp_scene_is_resource_referenced(const struct lp_scene *scene, } -/** - * Return last command in the bin - */ -static lp_rast_cmd -lp_get_last_command( const struct cmd_bin *bin ) -{ - const struct cmd_block *tail = bin->commands.tail; - const unsigned i = tail->count; - if (i > 0) - return tail->cmd[i - 1]; - else - return NULL; -} - - -/** - * Replace the arg of the last command in the bin. - */ -static void -lp_replace_last_command_arg( struct cmd_bin *bin, - const union lp_rast_cmd_arg arg ) -{ - struct cmd_block *tail = bin->commands.tail; - const unsigned i = tail->count; - assert(i > 0); - tail->arg[i - 1] = arg; -} - - - -/** - * Put a state-change command into all bins. - * If we find that the last command in a bin was also a state-change - * command, we can simply replace that one with the new one. - */ -void -lp_scene_bin_state_command( struct lp_scene *scene, - lp_rast_cmd cmd, - const union lp_rast_cmd_arg arg ) -{ - unsigned i, j; - for (i = 0; i < scene->tiles_x; i++) { - for (j = 0; j < scene->tiles_y; j++) { - struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); - lp_rast_cmd last_cmd = lp_get_last_command(bin); - if (last_cmd == cmd) { - lp_replace_last_command_arg(bin, arg); - } - else { - lp_scene_bin_command( scene, i, j, cmd, arg ); - } - } - } -} /** advance curr_x,y to the next bin */ diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index 4e55d43174..fa1b311fa1 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -112,6 +112,7 @@ struct resource_ref { */ struct lp_scene { struct pipe_context *pipe; + struct lp_fence *fence; /** the framebuffer to render the scene into */ struct pipe_framebuffer_state fb; diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index edcab0f8d9..f7f1635ef9 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -43,6 +43,7 @@ #include "lp_debug.h" #include "lp_public.h" #include "lp_limits.h" +#include "lp_rast.h" #include "state_tracker/sw_winsys.h" @@ -296,11 +297,16 @@ llvmpipe_destroy_screen( struct pipe_screen *_screen ) struct llvmpipe_screen *screen = llvmpipe_screen(_screen); struct sw_winsys *winsys = screen->winsys; + if (screen->rast) + lp_rast_destroy(screen->rast); + lp_jit_screen_cleanup(screen); if(winsys->destroy) winsys->destroy(winsys); + pipe_mutex_destroy(screen->rast_mutex); + FREE(screen); } @@ -349,11 +355,6 @@ llvmpipe_create_screen(struct sw_winsys *winsys) lp_jit_screen_init(screen); -#ifdef PIPE_OS_WINDOWS - /* Multithreading not supported on windows until conditions and barriers are - * properly implemented. */ - screen->num_threads = 0; -#else #ifdef PIPE_OS_EMBEDDED screen->num_threads = 0; #else @@ -361,7 +362,14 @@ llvmpipe_create_screen(struct sw_winsys *winsys) #endif screen->num_threads = debug_get_num_option("LP_NUM_THREADS", screen->num_threads); screen->num_threads = MIN2(screen->num_threads, LP_MAX_THREADS); -#endif + + screen->rast = lp_rast_create(screen->num_threads); + if (!screen->rast) { + lp_jit_screen_cleanup(screen); + FREE(screen); + return NULL; + } + pipe_mutex_init(screen->rast_mutex); util_format_s3tc_init(); diff --git a/src/gallium/drivers/llvmpipe/lp_screen.h b/src/gallium/drivers/llvmpipe/lp_screen.h index eb40f6823f..731526dfab 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.h +++ b/src/gallium/drivers/llvmpipe/lp_screen.h @@ -37,6 +37,7 @@ #include "gallivm/lp_bld.h" #include <llvm-c/ExecutionEngine.h> +#include "os/os_thread.h" #include "pipe/p_screen.h" #include "pipe/p_defines.h" @@ -63,6 +64,9 @@ struct llvmpipe_screen /* Increments whenever textures are modified. Contexts can track this. */ unsigned timestamp; + + struct lp_rasterizer *rast; + pipe_mutex rast_mutex; }; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 2bd6fcebe7..7d48ad8e74 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -152,8 +152,11 @@ static void lp_setup_rasterize_scene( struct lp_setup_context *setup ) { struct lp_scene *scene = lp_setup_get_current_scene(setup); + struct llvmpipe_screen *screen = llvmpipe_screen(scene->pipe->screen); - lp_scene_rasterize(scene, setup->rast); + pipe_mutex_lock(screen->rast_mutex); + lp_scene_rasterize(scene, screen->rast); + pipe_mutex_unlock(screen->rast_mutex); reset_context( setup ); @@ -271,7 +274,8 @@ set_scene_state( struct lp_setup_context *setup, */ void lp_setup_flush( struct lp_setup_context *setup, - unsigned flags ) + unsigned flags, + struct pipe_fence_handle **fence) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -286,8 +290,17 @@ lp_setup_flush( struct lp_setup_context *setup, * data to linear in the texture_unmap() function, which will * not be a parallel/threaded operation as here. */ - lp_scene_bin_everywhere(scene, lp_rast_store_color, dummy); + lp_scene_bin_everywhere(scene, lp_rast_store_linear_color, dummy); + } + + + if (fence) { + /* if we're going to flush the setup/rasterization modules, emit + * a fence. + */ + *fence = lp_setup_fence( setup ); } + } set_scene_state( setup, SETUP_FLUSHED ); @@ -433,24 +446,27 @@ lp_setup_clear( struct lp_setup_context *setup, struct pipe_fence_handle * lp_setup_fence( struct lp_setup_context *setup ) { - if (setup->num_threads == 0) { + if (setup->scene == NULL) return NULL; - } - else { + else if (setup->num_threads == 0) + return NULL; + else + { struct lp_scene *scene = lp_setup_get_current_scene(setup); - const unsigned rank = lp_scene_get_num_bins( scene ); /* xxx */ - struct lp_fence *fence = lp_fence_create(rank); - - LP_DBG(DEBUG_SETUP, "%s rank %u\n", __FUNCTION__, rank); + const unsigned rank = setup->num_threads; set_scene_state( setup, SETUP_ACTIVE ); + + assert(scene->fence == NULL); - /* insert the fence into all command bins */ - lp_scene_bin_everywhere( scene, - lp_rast_fence, - lp_rast_arg_fence(fence) ); + /* The caller gets a reference, we keep a copy too, so need to + * bump the refcount: + */ + lp_fence_reference(&scene->fence, lp_fence_create(rank)); + + LP_DBG(DEBUG_SETUP, "%s rank %u\n", __FUNCTION__, rank); - return (struct pipe_fence_handle *) fence; + return (struct pipe_fence_handle *) scene->fence; } } @@ -739,28 +755,6 @@ lp_setup_update_state( struct lp_setup_context *setup ) setup->dirty |= LP_SETUP_NEW_FS; } - if (setup->dirty & LP_SETUP_NEW_SCISSOR) { - float *stored; - - stored = lp_scene_alloc_aligned(scene, 4 * sizeof(int32_t), 16); - - if (stored) { - stored[0] = (float) setup->scissor.current.minx; - stored[1] = (float) setup->scissor.current.miny; - stored[2] = (float) setup->scissor.current.maxx; - stored[3] = (float) setup->scissor.current.maxy; - - setup->scissor.stored = stored; - - setup->fs.current.jit_context.scissor_xmin = stored[0]; - setup->fs.current.jit_context.scissor_ymin = stored[1]; - setup->fs.current.jit_context.scissor_xmax = stored[2]; - setup->fs.current.jit_context.scissor_ymax = stored[3]; - } - - setup->dirty |= LP_SETUP_NEW_FS; - } - if(setup->dirty & LP_SETUP_NEW_CONSTANTS) { struct pipe_resource *buffer = setup->constants.current; @@ -814,11 +808,6 @@ lp_setup_update_state( struct lp_setup_context *setup ) &setup->fs.current, sizeof setup->fs.current); setup->fs.stored = stored; - - /* put the state-set command into all bins */ - lp_scene_bin_state_command( scene, - lp_rast_set_state, - lp_rast_arg_state(setup->fs.stored) ); } /* The scene now references the textures in the rasterization @@ -865,8 +854,6 @@ lp_setup_destroy( struct lp_setup_context *setup ) lp_scene_queue_destroy(setup->empty_scenes); - lp_rast_destroy( setup->rast ); - FREE( setup ); } @@ -893,13 +880,7 @@ lp_setup_create( struct pipe_context *pipe, if (!setup->empty_scenes) goto fail; - /* XXX: move this to the screen and share between contexts: - */ setup->num_threads = screen->num_threads; - setup->rast = lp_rast_create(screen->num_threads); - if (!setup->rast) - goto fail; - setup->vbuf = draw_vbuf_stage(draw, &setup->base); if (!setup->vbuf) goto fail; @@ -923,9 +904,6 @@ lp_setup_create( struct pipe_context *pipe, return setup; fail: - if (setup->rast) - lp_rast_destroy( setup->rast ); - if (setup->vbuf) ; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 6a0dc55129..73b1c85325 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -84,7 +84,8 @@ lp_setup_fence( struct lp_setup_context *setup ); void lp_setup_flush( struct lp_setup_context *setup, - unsigned flags ); + unsigned flags, + struct pipe_fence_handle **fence); void diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 8f4e00f073..a0606f5034 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -81,7 +81,6 @@ struct lp_setup_context */ struct draw_stage *vbuf; unsigned num_threads; - struct lp_rasterizer *rast; struct lp_scene *scenes[MAX_SCENES]; /**< all the scenes */ struct lp_scene *scene; /**< current scene being built */ struct lp_scene_queue *empty_scenes; /**< queue of empty scenes */ @@ -130,7 +129,6 @@ struct lp_setup_context struct { struct pipe_scissor_state current; - const void *stored; } scissor; unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 4e2e17f77b..4ceb789b77 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -38,12 +38,78 @@ #define NUM_CHANNELS 4 +struct tri_info { + + float pixel_offset; + + /* fixed point vertex coordinates */ + int x[3]; + int y[3]; + + /* float x,y deltas - all from the original coordinates + */ + float dy01, dy20; + float dx01, dx20; + float oneoverarea; + + const float (*v0)[4]; + const float (*v1)[4]; + const float (*v2)[4]; + + boolean frontfacing; +}; + + + +static const int step_scissor_minx[16] = { + 0, 1, 0, 1, + 2, 3, 2, 3, + 0, 1, 0, 1, + 2, 3, 2, 3 +}; + +static const int step_scissor_maxx[16] = { + 0, -1, 0, -1, + -2, -3, -2, -3, + 0, -1, 0, -1, + -2, -3, -2, -3 +}; + +static const int step_scissor_miny[16] = { + 0, 0, 1, 1, + 0, 0, 1, 1, + 2, 2, 3, 3, + 2, 2, 3, 3 +}; + +static const int step_scissor_maxy[16] = { + 0, 0, -1, -1, + 0, 0, -1, -1, + -2, -2, -3, -3, + -2, -2, -3, -3 +}; + + + + +static INLINE int +subpixel_snap(float a) +{ + return util_iround(FIXED_ONE * a); +} + +static INLINE float +fixed_to_float(int a) +{ + return a * (1.0 / FIXED_ONE); +} + + /** * Compute a0 for a constant-valued coefficient (GL_FLAT shading). */ -static void constant_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, +static void constant_coef( struct lp_rast_triangle *tri, unsigned slot, const float value, unsigned i ) @@ -54,28 +120,21 @@ static void constant_coef( struct lp_setup_context *setup, } -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a triangle. - */ -static void linear_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, - float oneoverarea, + +static void linear_coef( struct lp_rast_triangle *tri, + const struct tri_info *info, unsigned slot, - const float (*v1)[4], - const float (*v2)[4], - const float (*v3)[4], unsigned vert_attr, unsigned i) { - float a1 = v1[vert_attr][i]; - float a2 = v2[vert_attr][i]; - float a3 = v3[vert_attr][i]; + float a0 = info->v0[vert_attr][i]; + float a1 = info->v1[vert_attr][i]; + float a2 = info->v2[vert_attr][i]; - float da12 = a1 - a2; - float da31 = a3 - a1; - float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea; - float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea; + float da01 = a0 - a1; + float da20 = a2 - a0; + float dadx = (da01 * info->dy20 - info->dy01 * da20) * info->oneoverarea; + float dady = (da20 * info->dx01 - info->dx20 * da01) * info->oneoverarea; tri->inputs.dadx[slot][i] = dadx; tri->inputs.dady[slot][i] = dady; @@ -92,9 +151,9 @@ static void linear_coef( struct lp_setup_context *setup, * to define a0 as the sample at a pixel center somewhere near vmin * instead - i'll switch to this later. */ - tri->inputs.a0[slot][i] = (a1 - - (dadx * (v1[0][0] - setup->pixel_offset) + - dady * (v1[0][1] - setup->pixel_offset))); + tri->inputs.a0[slot][i] = (a0 - + (dadx * (info->v0[0][0] - info->pixel_offset) + + dady * (info->v0[0][1] - info->pixel_offset))); } @@ -106,31 +165,27 @@ static void linear_coef( struct lp_setup_context *setup, * Later, when we compute the value at a particular fragment position we'll * divide the interpolated value by the interpolated W at that fragment. */ -static void perspective_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, - float oneoverarea, +static void perspective_coef( struct lp_rast_triangle *tri, + const struct tri_info *info, unsigned slot, - const float (*v1)[4], - const float (*v2)[4], - const float (*v3)[4], unsigned vert_attr, unsigned i) { /* premultiply by 1/w (v[0][3] is always 1/w): */ - float a1 = v1[vert_attr][i] * v1[0][3]; - float a2 = v2[vert_attr][i] * v2[0][3]; - float a3 = v3[vert_attr][i] * v3[0][3]; - float da12 = a1 - a2; - float da31 = a3 - a1; - float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea; - float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea; + float a0 = info->v0[vert_attr][i] * info->v0[0][3]; + float a1 = info->v1[vert_attr][i] * info->v1[0][3]; + float a2 = info->v2[vert_attr][i] * info->v2[0][3]; + float da01 = a0 - a1; + float da20 = a2 - a0; + float dadx = (da01 * info->dy20 - info->dy01 * da20) * info->oneoverarea; + float dady = (da20 * info->dx01 - info->dx20 * da01) * info->oneoverarea; tri->inputs.dadx[slot][i] = dadx; tri->inputs.dady[slot][i] = dady; - tri->inputs.a0[slot][i] = (a1 - - (dadx * (v1[0][0] - setup->pixel_offset) + - dady * (v1[0][1] - setup->pixel_offset))); + tri->inputs.a0[slot][i] = (a0 - + (dadx * (info->v0[0][0] - info->pixel_offset) + + dady * (info->v0[0][1] - info->pixel_offset))); } @@ -141,13 +196,9 @@ static void perspective_coef( struct lp_setup_context *setup, * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. */ static void -setup_fragcoord_coef(struct lp_setup_context *setup, - struct lp_rast_triangle *tri, - float oneoverarea, +setup_fragcoord_coef(struct lp_rast_triangle *tri, + const struct tri_info *info, unsigned slot, - const float (*v1)[4], - const float (*v2)[4], - const float (*v3)[4], unsigned usage_mask) { /*X*/ @@ -166,12 +217,12 @@ setup_fragcoord_coef(struct lp_setup_context *setup, /*Z*/ if (usage_mask & TGSI_WRITEMASK_Z) { - linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 2); + linear_coef(tri, info, slot, 0, 2); } /*W*/ if (usage_mask & TGSI_WRITEMASK_W) { - linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 3); + linear_coef(tri, info, slot, 0, 3); } } @@ -180,24 +231,23 @@ setup_fragcoord_coef(struct lp_setup_context *setup, * Setup the fragment input attribute with the front-facing value. * \param frontface is the triangle front facing? */ -static void setup_facing_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, +static void setup_facing_coef( struct lp_rast_triangle *tri, unsigned slot, boolean frontface, unsigned usage_mask) { /* convert TRUE to 1.0 and FALSE to -1.0 */ if (usage_mask & TGSI_WRITEMASK_X) - constant_coef( setup, tri, slot, 2.0f * frontface - 1.0f, 0 ); + constant_coef( tri, slot, 2.0f * frontface - 1.0f, 0 ); if (usage_mask & TGSI_WRITEMASK_Y) - constant_coef( setup, tri, slot, 0.0f, 1 ); /* wasted */ + constant_coef( tri, slot, 0.0f, 1 ); /* wasted */ if (usage_mask & TGSI_WRITEMASK_Z) - constant_coef( setup, tri, slot, 0.0f, 2 ); /* wasted */ + constant_coef( tri, slot, 0.0f, 2 ); /* wasted */ if (usage_mask & TGSI_WRITEMASK_W) - constant_coef( setup, tri, slot, 0.0f, 3 ); /* wasted */ + constant_coef( tri, slot, 0.0f, 3 ); /* wasted */ } @@ -206,11 +256,7 @@ static void setup_facing_coef( struct lp_setup_context *setup, */ static void setup_tri_coefficients( struct lp_setup_context *setup, struct lp_rast_triangle *tri, - float oneoverarea, - const float (*v1)[4], - const float (*v2)[4], - const float (*v3)[4], - boolean frontface) + const struct tri_info *info) { unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; unsigned slot; @@ -227,25 +273,25 @@ static void setup_tri_coefficients( struct lp_setup_context *setup, if (setup->flatshade_first) { for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(setup, tri, slot+1, v1[vert_attr][i], i); + constant_coef(tri, slot+1, info->v0[vert_attr][i], i); } else { for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(setup, tri, slot+1, v3[vert_attr][i], i); + constant_coef(tri, slot+1, info->v2[vert_attr][i], i); } break; case LP_INTERP_LINEAR: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - linear_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); + linear_coef(tri, info, slot+1, vert_attr, i); break; case LP_INTERP_PERSPECTIVE: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - perspective_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); + perspective_coef(tri, info, slot+1, vert_attr, i); fragcoord_usage_mask |= TGSI_WRITEMASK_W; break; @@ -259,7 +305,7 @@ static void setup_tri_coefficients( struct lp_setup_context *setup, break; case LP_INTERP_FACING: - setup_facing_coef(setup, tri, slot+1, frontface, usage_mask); + setup_facing_coef(tri, slot+1, info->frontfacing, usage_mask); break; default: @@ -269,16 +315,11 @@ static void setup_tri_coefficients( struct lp_setup_context *setup, /* The internal position input is in slot zero: */ - setup_fragcoord_coef(setup, tri, oneoverarea, 0, v1, v2, v3, - fragcoord_usage_mask); + setup_fragcoord_coef(tri, info, 0, fragcoord_usage_mask); } -static INLINE int subpixel_snap( float a ) -{ - return util_iround(FIXED_ONE * a - (FIXED_ONE / 2)); -} @@ -291,21 +332,25 @@ static INLINE int subpixel_snap( float a ) * \return pointer to triangle space */ static INLINE struct lp_rast_triangle * -alloc_triangle(struct lp_scene *scene, unsigned nr_inputs, unsigned *tri_size) +alloc_triangle(struct lp_scene *scene, + unsigned nr_inputs, + unsigned nr_planes, + unsigned *tri_size) { unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float); struct lp_rast_triangle *tri; - unsigned bytes; + unsigned tri_bytes, bytes; char *inputs; assert(sizeof(*tri) % 16 == 0); - bytes = sizeof(*tri) + (3 * input_array_sz); + tri_bytes = align(Offset(struct lp_rast_triangle, plane[nr_planes]), 16); + bytes = tri_bytes + (3 * input_array_sz); tri = lp_scene_alloc_aligned( scene, bytes, 16 ); if (tri) { - inputs = (char *) (tri + 1); + inputs = ((char *)tri) + tri_bytes; tri->inputs.a0 = (float (*)[4]) inputs; tri->inputs.dadx = (float (*)[4]) (inputs + input_array_sz); tri->inputs.dady = (float (*)[4]) (inputs + 2 * input_array_sz); @@ -329,52 +374,71 @@ print_triangle(struct lp_setup_context *setup, uint i; debug_printf("llvmpipe triangle\n"); - for (i = 0; i < setup->fs.nr_inputs; i++) { + for (i = 0; i < 1 + setup->fs.nr_inputs; i++) { debug_printf(" v1[%d]: %f %f %f %f\n", i, v1[i][0], v1[i][1], v1[i][2], v1[i][3]); } - for (i = 0; i < setup->fs.nr_inputs; i++) { + for (i = 0; i < 1 + setup->fs.nr_inputs; i++) { debug_printf(" v2[%d]: %f %f %f %f\n", i, v2[i][0], v2[i][1], v2[i][2], v2[i][3]); } - for (i = 0; i < setup->fs.nr_inputs; i++) { + for (i = 0; i < 1 + setup->fs.nr_inputs; i++) { debug_printf(" v3[%d]: %f %f %f %f\n", i, v3[i][0], v3[i][1], v3[i][2], v3[i][3]); } } +lp_rast_cmd lp_rast_tri_tab[8] = { + NULL, /* should be impossible */ + lp_rast_triangle_1, + lp_rast_triangle_2, + lp_rast_triangle_3, + lp_rast_triangle_4, + lp_rast_triangle_5, + lp_rast_triangle_6, + lp_rast_triangle_7 +}; + /** * Do basic setup for triangle rasterization and determine which * framebuffer tiles are touched. Put the triangle in the scene's * bins for the tiles which we overlap. */ -static void +static void do_triangle_ccw(struct lp_setup_context *setup, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], boolean frontfacing ) { - /* x/y positions in fixed point */ - const int x1 = subpixel_snap(v1[0][0] + 0.5 - setup->pixel_offset); - const int x2 = subpixel_snap(v2[0][0] + 0.5 - setup->pixel_offset); - const int x3 = subpixel_snap(v3[0][0] + 0.5 - setup->pixel_offset); - const int y1 = subpixel_snap(v1[0][1] + 0.5 - setup->pixel_offset); - const int y2 = subpixel_snap(v2[0][1] + 0.5 - setup->pixel_offset); - const int y3 = subpixel_snap(v3[0][1] + 0.5 - setup->pixel_offset); struct lp_scene *scene = lp_setup_get_current_scene(setup); + struct lp_fragment_shader_variant *variant = setup->fs.current.variant; struct lp_rast_triangle *tri; + struct tri_info info; int area; - float oneoverarea; int minx, maxx, miny, maxy; + int ix0, ix1, iy0, iy1; unsigned tri_bytes; - + int i; + int nr_planes = 3; + if (0) print_triangle(setup, v1, v2, v3); - tri = alloc_triangle(scene, setup->fs.nr_inputs, &tri_bytes); + if (setup->scissor_test) { + nr_planes = 7; + } + else { + nr_planes = 3; + } + + + tri = alloc_triangle(scene, + setup->fs.nr_inputs, + nr_planes, + &tri_bytes); if (!tri) return; @@ -387,15 +451,24 @@ do_triangle_ccw(struct lp_setup_context *setup, tri->v[2][1] = v3[0][1]; #endif - tri->dx12 = x1 - x2; - tri->dx23 = x2 - x3; - tri->dx31 = x3 - x1; + /* x/y positions in fixed point */ + info.x[0] = subpixel_snap(v1[0][0] - setup->pixel_offset); + info.x[1] = subpixel_snap(v2[0][0] - setup->pixel_offset); + info.x[2] = subpixel_snap(v3[0][0] - setup->pixel_offset); + info.y[0] = subpixel_snap(v1[0][1] - setup->pixel_offset); + info.y[1] = subpixel_snap(v2[0][1] - setup->pixel_offset); + info.y[2] = subpixel_snap(v3[0][1] - setup->pixel_offset); + + tri->plane[0].dcdy = info.x[0] - info.x[1]; + tri->plane[1].dcdy = info.x[1] - info.x[2]; + tri->plane[2].dcdy = info.x[2] - info.x[0]; - tri->dy12 = y1 - y2; - tri->dy23 = y2 - y3; - tri->dy31 = y3 - y1; + tri->plane[0].dcdx = info.y[0] - info.y[1]; + tri->plane[1].dcdx = info.y[1] - info.y[2]; + tri->plane[2].dcdx = info.y[2] - info.y[0]; - area = (tri->dx12 * tri->dy31 - tri->dx31 * tri->dy12); + area = (tri->plane[0].dcdy * tri->plane[2].dcdx - + tri->plane[2].dcdy * tri->plane[0].dcdx); LP_COUNT(nr_tris); @@ -410,20 +483,35 @@ do_triangle_ccw(struct lp_setup_context *setup, } /* Bounding rectangle (in pixels) */ - minx = (MIN3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER; - maxx = (MAX3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER; - miny = (MIN3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER; - maxy = (MAX3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER; - + { + /* Yes this is necessary to accurately calculate bounding boxes + * with the two fill-conventions we support. GL (normally) ends + * up needing a bottom-left fill convention, which requires + * slightly different rounding. + */ + int adj = (setup->pixel_offset != 0) ? 1 : 0; + + minx = (MIN3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; + maxx = (MAX3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; + miny = (MIN3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + maxy = (MAX3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + } + if (setup->scissor_test) { minx = MAX2(minx, setup->scissor.current.minx); maxx = MIN2(maxx, setup->scissor.current.maxx); miny = MAX2(miny, setup->scissor.current.miny); maxy = MIN2(maxy, setup->scissor.current.maxy); } + else { + minx = MAX2(minx, 0); + miny = MAX2(miny, 0); + maxx = MIN2(maxx, scene->fb.width); + maxy = MIN2(maxy, scene->fb.height); + } + - if (miny == maxy || - minx == maxx) { + if (miny >= maxy || minx >= maxx) { lp_scene_putback_data( scene, tri_bytes ); LP_COUNT(nr_culled_tris); return; @@ -431,75 +519,88 @@ do_triangle_ccw(struct lp_setup_context *setup, /* */ - oneoverarea = ((float)FIXED_ONE) / (float)area; + info.pixel_offset = setup->pixel_offset; + info.v0 = v1; + info.v1 = v2; + info.v2 = v3; + info.dx01 = info.v0[0][0] - info.v1[0][0]; + info.dx20 = info.v2[0][0] - info.v0[0][0]; + info.dy01 = info.v0[0][1] - info.v1[0][1]; + info.dy20 = info.v2[0][1] - info.v0[0][1]; + info.oneoverarea = 1.0 / (info.dx01 * info.dy20 - info.dx20 * info.dy01); + info.frontfacing = frontfacing; /* Setup parameter interpolants: */ - setup_tri_coefficients( setup, tri, oneoverarea, v1, v2, v3, frontfacing ); + setup_tri_coefficients( setup, tri, &info ); tri->inputs.facing = frontfacing ? 1.0F : -1.0F; + tri->inputs.state = setup->fs.stored; - /* half-edge constants, will be interated over the whole render target. - */ - tri->c1 = tri->dy12 * x1 - tri->dx12 * y1; - tri->c2 = tri->dy23 * x2 - tri->dx23 * y2; - tri->c3 = tri->dy31 * x3 - tri->dx31 * y3; - /* correct for top-left fill convention: - */ - if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) tri->c1++; - if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) tri->c2++; - if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) tri->c3++; - - tri->dy12 *= FIXED_ONE; - tri->dy23 *= FIXED_ONE; - tri->dy31 *= FIXED_ONE; - - tri->dx12 *= FIXED_ONE; - tri->dx23 *= FIXED_ONE; - tri->dx31 *= FIXED_ONE; - - /* find trivial reject offsets for each edge for a single-pixel - * sized block. These will be scaled up at each recursive level to - * match the active blocksize. Scaling in this way works best if - * the blocks are square. - */ - tri->eo1 = 0; - if (tri->dy12 < 0) tri->eo1 -= tri->dy12; - if (tri->dx12 > 0) tri->eo1 += tri->dx12; + + for (i = 0; i < 3; i++) { + struct lp_rast_plane *plane = &tri->plane[i]; - tri->eo2 = 0; - if (tri->dy23 < 0) tri->eo2 -= tri->dy23; - if (tri->dx23 > 0) tri->eo2 += tri->dx23; + /* half-edge constants, will be interated over the whole render + * target. + */ + plane->c = plane->dcdx * info.x[i] - plane->dcdy * info.y[i]; + + /* correct for top-left vs. bottom-left fill convention. + * + * note that we're overloading gl_rasterization_rules to mean + * both (0.5,0.5) pixel centers *and* bottom-left filling + * convention. + * + * GL actually has a top-left filling convention, but GL's + * notion of "top" differs from gallium's... + * + * Also, sometimes (in FBO cases) GL will render upside down + * to its usual method, in which case it will probably want + * to use the opposite, top-left convention. + */ + if (plane->dcdx < 0) { + /* both fill conventions want this - adjust for left edges */ + plane->c++; + } + else if (plane->dcdx == 0) { + if (setup->pixel_offset == 0) { + /* correct for top-left fill convention: + */ + if (plane->dcdy > 0) plane->c++; + } + else { + /* correct for bottom-left fill convention: + */ + if (plane->dcdy < 0) plane->c++; + } + } - tri->eo3 = 0; - if (tri->dy31 < 0) tri->eo3 -= tri->dy31; - if (tri->dx31 > 0) tri->eo3 += tri->dx31; + plane->dcdx *= FIXED_ONE; + plane->dcdy *= FIXED_ONE; - /* Calculate trivial accept offsets from the above. - */ - tri->ei1 = tri->dx12 - tri->dy12 - tri->eo1; - tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2; - tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3; + /* find trivial reject offsets for each edge for a single-pixel + * sized block. These will be scaled up at each recursive level to + * match the active blocksize. Scaling in this way works best if + * the blocks are square. + */ + plane->eo = 0; + if (plane->dcdx < 0) plane->eo -= plane->dcdx; + if (plane->dcdy > 0) plane->eo += plane->dcdy; - /* Fill in the inputs.step[][] arrays. - * We've manually unrolled some loops here. - */ - { - const int xstep1 = -tri->dy12; - const int xstep2 = -tri->dy23; - const int xstep3 = -tri->dy31; - const int ystep1 = tri->dx12; - const int ystep2 = tri->dx23; - const int ystep3 = tri->dx31; - -#define SETUP_STEP(i, x, y) \ - do { \ - tri->inputs.step[0][i] = x * xstep1 + y * ystep1; \ - tri->inputs.step[1][i] = x * xstep2 + y * ystep2; \ - tri->inputs.step[2][i] = x * xstep3 + y * ystep3; \ - } while (0) + /* Calculate trivial accept offsets from the above. + */ + plane->ei = plane->dcdy - plane->dcdx - plane->eo; + plane->step = tri->step[i]; + + /* Fill in the inputs.step[][] arrays. + * We've manually unrolled some loops here. + */ +#define SETUP_STEP(j, x, y) \ + tri->step[i][j] = y * plane->dcdy - x * plane->dcdx + SETUP_STEP(0, 0, 0); SETUP_STEP(1, 1, 0); SETUP_STEP(2, 0, 1); @@ -522,63 +623,106 @@ do_triangle_ccw(struct lp_setup_context *setup, #undef STEP } + + /* + * When rasterizing scissored tris, use the intersection of the + * triangle bounding box and the scissor rect to generate the + * scissor planes. + * + * This permits us to cut off the triangle "tails" that are present + * in the intermediate recursive levels caused when two of the + * triangles edges don't diverge quickly enough to trivially reject + * exterior blocks from the triangle. + * + * It's not really clear if it's worth worrying about these tails, + * but since we generate the planes for each scissored tri, it's + * free to trim them in this case. + * + * Note that otherwise, the scissor planes only vary in 'C' value, + * and even then only on state-changes. Could alternatively store + * these planes elsewhere. + */ + if (nr_planes == 7) { + tri->plane[3].step = step_scissor_minx; + tri->plane[3].dcdx = -1; + tri->plane[3].dcdy = 0; + tri->plane[3].c = 1-minx; + tri->plane[3].ei = 0; + tri->plane[3].eo = 1; + + tri->plane[4].step = step_scissor_maxx; + tri->plane[4].dcdx = 1; + tri->plane[4].dcdy = 0; + tri->plane[4].c = maxx; + tri->plane[4].ei = -1; + tri->plane[4].eo = 0; + + tri->plane[5].step = step_scissor_miny; + tri->plane[5].dcdx = 0; + tri->plane[5].dcdy = 1; + tri->plane[5].c = 1-miny; + tri->plane[5].ei = 0; + tri->plane[5].eo = 1; + + tri->plane[6].step = step_scissor_maxy; + tri->plane[6].dcdx = 0; + tri->plane[6].dcdy = -1; + tri->plane[6].c = maxy; + tri->plane[6].ei = -1; + tri->plane[6].eo = 0; + } + + /* * All fields of 'tri' are now set. The remaining code here is * concerned with binning. */ - /* Convert to tile coordinates: + /* Convert to tile coordinates, and inclusive ranges: */ - minx = minx / TILE_SIZE; - miny = miny / TILE_SIZE; - maxx = maxx / TILE_SIZE; - maxy = maxy / TILE_SIZE; + ix0 = minx / TILE_SIZE; + iy0 = miny / TILE_SIZE; + ix1 = (maxx-1) / TILE_SIZE; + iy1 = (maxy-1) / TILE_SIZE; /* * Clamp to framebuffer size */ - minx = MAX2(minx, 0); - miny = MAX2(miny, 0); - maxx = MIN2(maxx, scene->tiles_x - 1); - maxy = MIN2(maxy, scene->tiles_y - 1); + assert(ix0 == MAX2(ix0, 0)); + assert(iy0 == MAX2(iy0, 0)); + assert(ix1 == MIN2(ix1, scene->tiles_x - 1)); + assert(iy1 == MIN2(iy1, scene->tiles_y - 1)); /* Determine which tile(s) intersect the triangle's bounding box */ - if (miny == maxy && minx == maxx) + if (iy0 == iy1 && ix0 == ix1) { /* Triangle is contained in a single tile: */ - lp_scene_bin_command( scene, minx, miny, lp_rast_triangle, - lp_rast_arg_triangle(tri) ); + lp_scene_bin_command( scene, ix0, iy0, + lp_rast_tri_tab[nr_planes], + lp_rast_arg_triangle(tri, (1<<nr_planes)-1) ); } - else + else { - int c1 = (tri->c1 + - tri->dx12 * miny * TILE_SIZE - - tri->dy12 * minx * TILE_SIZE); - int c2 = (tri->c2 + - tri->dx23 * miny * TILE_SIZE - - tri->dy23 * minx * TILE_SIZE); - int c3 = (tri->c3 + - tri->dx31 * miny * TILE_SIZE - - tri->dy31 * minx * TILE_SIZE); - - int ei1 = tri->ei1 << TILE_ORDER; - int ei2 = tri->ei2 << TILE_ORDER; - int ei3 = tri->ei3 << TILE_ORDER; - - int eo1 = tri->eo1 << TILE_ORDER; - int eo2 = tri->eo2 << TILE_ORDER; - int eo3 = tri->eo3 << TILE_ORDER; - - int xstep1 = -(tri->dy12 << TILE_ORDER); - int xstep2 = -(tri->dy23 << TILE_ORDER); - int xstep3 = -(tri->dy31 << TILE_ORDER); - - int ystep1 = tri->dx12 << TILE_ORDER; - int ystep2 = tri->dx23 << TILE_ORDER; - int ystep3 = tri->dx31 << TILE_ORDER; + int c[7]; + int ei[7]; + int eo[7]; + int xstep[7]; + int ystep[7]; int x, y; + + for (i = 0; i < nr_planes; i++) { + c[i] = (tri->plane[i].c + + tri->plane[i].dcdy * iy0 * TILE_SIZE - + tri->plane[i].dcdx * ix0 * TILE_SIZE); + + ei[i] = tri->plane[i].ei << TILE_ORDER; + eo[i] = tri->plane[i].eo << TILE_ORDER; + xstep[i] = -(tri->plane[i].dcdx << TILE_ORDER); + ystep[i] = tri->plane[i].dcdy << TILE_ORDER; + } + /* Test tile-sized blocks against the triangle. @@ -586,64 +730,67 @@ do_triangle_ccw(struct lp_setup_context *setup, * contained inside the tri, bin an lp_rast_shade_tile command. * Else, bin a lp_rast_triangle command. */ - for (y = miny; y <= maxy; y++) + for (y = iy0; y <= iy1; y++) { - int cx1 = c1; - int cx2 = c2; - int cx3 = c3; boolean in = FALSE; /* are we inside the triangle? */ + int cx[7]; + + for (i = 0; i < nr_planes; i++) + cx[i] = c[i]; - for (x = minx; x <= maxx; x++) + for (x = ix0; x <= ix1; x++) { - if (cx1 + eo1 < 0 || - cx2 + eo2 < 0 || - cx3 + eo3 < 0) - { - /* do nothing */ + int out = 0; + int partial = 0; + + for (i = 0; i < nr_planes; i++) { + int planeout = cx[i] + eo[i]; + int planepartial = cx[i] + ei[i] - 1; + out |= (planeout >> 31); + partial |= (planepartial >> 31) & (1<<i); + } + + if (out) { + /* do nothing */ + if (in) + break; /* exiting triangle, all done with this row */ LP_COUNT(nr_empty_64); - if (in) - break; /* exiting triangle, all done with this row */ - } - else if (cx1 + ei1 > 0 && - cx2 + ei2 > 0 && - cx3 + ei3 > 0) - { + } + else if (partial) { + /* Not trivially accepted by at least one plane - + * rasterize/shade partial tile + */ + int count = util_bitcount(partial); + in = TRUE; + lp_scene_bin_command( scene, x, y, + lp_rast_tri_tab[count], + lp_rast_arg_triangle(tri, partial) ); + + LP_COUNT(nr_partially_covered_64); + } + else { /* triangle covers the whole tile- shade whole tile */ LP_COUNT(nr_fully_covered_64); - in = TRUE; - if (setup->fs.current.variant->opaque && + in = TRUE; + if (variant->opaque && !setup->fb.zsbuf) { lp_scene_bin_reset( scene, x, y ); - lp_scene_bin_command( scene, x, y, - lp_rast_set_state, - lp_rast_arg_state(setup->fs.stored) ); } lp_scene_bin_command( scene, x, y, lp_rast_shade_tile, lp_rast_arg_inputs(&tri->inputs) ); - } - else - { - /* rasterizer/shade partial tile */ - LP_COUNT(nr_partially_covered_64); - in = TRUE; - lp_scene_bin_command( scene, x, y, - lp_rast_triangle, - lp_rast_arg_triangle(tri) ); - } + } /* Iterate cx values across the region: */ - cx1 += xstep1; - cx2 += xstep2; - cx3 += xstep3; + for (i = 0; i < nr_planes; i++) + cx[i] += xstep[i]; } /* Iterate c values down the region: */ - c1 += ystep1; - c2 += ystep2; - c3 += ystep3; + for (i = 0; i < nr_planes; i++) + c[i] += ystep[i]; } } } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 65115052cd..5953d690a4 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -31,9 +31,6 @@ * Code generate the whole fragment pipeline. * * The fragment pipeline consists of the following stages: - * - triangle edge in/out testing - * - scissor test - * - stipple (TBI) * - early depth test * - fragment shader * - alpha test @@ -97,6 +94,7 @@ #include "lp_state.h" #include "lp_tex_sample.h" #include "lp_flush.h" +#include "lp_state_fs.h" #include <llvm-c/Analysis.h> @@ -170,177 +168,63 @@ generate_depth_stencil(LLVMBuilderRef builder, /** - * Generate the code to do inside/outside triangle testing for the + * Expand the relevent bits of mask_input to a 4-dword mask for the * four pixels in a 2x2 quad. This will set the four elements of the * quad mask vector to 0 or ~0. - * \param i which quad of the quad group to test, in [0,3] + * + * \param quad which quad of the quad group to test, in [0,3] + * \param mask_input bitwise mask for the whole 4x4 stamp */ -static void -generate_tri_edge_mask(LLVMBuilderRef builder, - unsigned i, - LLVMValueRef *mask, /* ivec4, out */ - LLVMValueRef c0, /* int32 */ - LLVMValueRef c1, /* int32 */ - LLVMValueRef c2, /* int32 */ - LLVMValueRef step0_ptr, /* ivec4 */ - LLVMValueRef step1_ptr, /* ivec4 */ - LLVMValueRef step2_ptr) /* ivec4 */ +static LLVMValueRef +generate_quad_mask(LLVMBuilderRef builder, + struct lp_type fs_type, + unsigned quad, + LLVMValueRef mask_input) /* int32 */ { -#define OPTIMIZE_IN_OUT_TEST 0 -#if OPTIMIZE_IN_OUT_TEST - struct lp_build_if_state ifctx; - LLVMValueRef not_draw_all; -#endif - struct lp_build_flow_context *flow; - struct lp_type i32_type; - LLVMTypeRef i32vec4_type; - LLVMValueRef c0_vec, c1_vec, c2_vec; - LLVMValueRef in_out_mask; - - assert(i < 4); - - /* int32 vector type */ - memset(&i32_type, 0, sizeof i32_type); - i32_type.floating = FALSE; /* values are integers */ - i32_type.sign = TRUE; /* values are signed */ - i32_type.norm = FALSE; /* values are not normalized */ - i32_type.width = 32; /* 32-bit int values */ - i32_type.length = 4; /* 4 elements per vector */ - - i32vec4_type = lp_build_int32_vec4_type(); + struct lp_type mask_type; + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMValueRef bits[4]; + LLVMValueRef mask; /* - * Use a conditional here to do detailed pixel in/out testing. - * We only have to do this if c0 != INT_MIN. + * XXX: We'll need a different path for 16 x u8 */ - flow = lp_build_flow_create(builder); - lp_build_flow_scope_begin(flow); - - { -#if OPTIMIZE_IN_OUT_TEST - /* not_draw_all = (c0 != INT_MIN) */ - not_draw_all = LLVMBuildICmp(builder, - LLVMIntNE, - c0, - LLVMConstInt(LLVMInt32Type(), INT_MIN, 0), - ""); - - in_out_mask = lp_build_const_int_vec(i32_type, ~0); - - - lp_build_flow_scope_declare(flow, &in_out_mask); - - /* if (not_draw_all) {... */ - lp_build_if(&ifctx, flow, builder, not_draw_all); -#endif - { - LLVMValueRef step0_vec, step1_vec, step2_vec; - LLVMValueRef m0_vec, m1_vec, m2_vec; - LLVMValueRef index, m; - - /* c0_vec = {c0, c0, c0, c0} - * Note that we emit this code four times but LLVM optimizes away - * three instances of it. - */ - c0_vec = lp_build_broadcast(builder, i32vec4_type, c0); - c1_vec = lp_build_broadcast(builder, i32vec4_type, c1); - c2_vec = lp_build_broadcast(builder, i32vec4_type, c2); - lp_build_name(c0_vec, "edgeconst0vec"); - lp_build_name(c1_vec, "edgeconst1vec"); - lp_build_name(c2_vec, "edgeconst2vec"); - - /* load step0vec, step1, step2 vec from memory */ - index = LLVMConstInt(LLVMInt32Type(), i, 0); - step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), ""); - step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), ""); - step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), ""); - lp_build_name(step0_vec, "step0vec"); - lp_build_name(step1_vec, "step1vec"); - lp_build_name(step2_vec, "step2vec"); - - /* m0_vec = step0_ptr[i] > c0_vec */ - m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec); - m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec); - m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec); - - /* in_out_mask = m0_vec & m1_vec & m2_vec */ - m = LLVMBuildAnd(builder, m0_vec, m1_vec, ""); - in_out_mask = LLVMBuildAnd(builder, m, m2_vec, ""); - lp_build_name(in_out_mask, "inoutmaskvec"); - } -#if OPTIMIZE_IN_OUT_TEST - lp_build_endif(&ifctx); -#endif - - } - lp_build_flow_scope_end(flow); - lp_build_flow_destroy(flow); + assert(fs_type.width == 32); + assert(fs_type.length == 4); + mask_type = lp_int_type(fs_type); - /* This is the initial alive/dead pixel mask for a quad of four pixels. - * It's an int[4] vector with each word set to 0 or ~0. - * Words will get cleared when pixels faile the Z test, etc. + /* + * mask_input >>= (quad * 4) */ - *mask = in_out_mask; -} - - -static LLVMValueRef -generate_scissor_test(LLVMBuilderRef builder, - LLVMValueRef context_ptr, - const struct lp_build_interp_soa_context *interp, - struct lp_type type) -{ - LLVMTypeRef vec_type = lp_build_vec_type(type); - LLVMValueRef xpos = interp->pos[0], ypos = interp->pos[1]; - LLVMValueRef xmin, ymin, xmax, ymax; - LLVMValueRef m0, m1, m2, m3, m; - - /* xpos, ypos contain the window coords for the four pixels in the quad */ - assert(xpos); - assert(ypos); - - /* get the current scissor bounds, convert to vectors */ - xmin = lp_jit_context_scissor_xmin_value(builder, context_ptr); - xmin = lp_build_broadcast(builder, vec_type, xmin); - - ymin = lp_jit_context_scissor_ymin_value(builder, context_ptr); - ymin = lp_build_broadcast(builder, vec_type, ymin); - xmax = lp_jit_context_scissor_xmax_value(builder, context_ptr); - xmax = lp_build_broadcast(builder, vec_type, xmax); + mask_input = LLVMBuildLShr(builder, + mask_input, + LLVMConstInt(i32t, quad * 4, 0), + ""); - ymax = lp_jit_context_scissor_ymax_value(builder, context_ptr); - ymax = lp_build_broadcast(builder, vec_type, ymax); + /* + * mask = { mask_input & (1 << i), for i in [0,3] } + */ - /* compare the fragment's position coordinates against the scissor bounds */ - m0 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, xpos, xmin); - m1 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, ypos, ymin); - m2 = lp_build_compare(builder, type, PIPE_FUNC_LESS, xpos, xmax); - m3 = lp_build_compare(builder, type, PIPE_FUNC_LESS, ypos, ymax); + mask = lp_build_broadcast(builder, lp_build_vec_type(mask_type), mask_input); - /* AND all the masks together */ - m = LLVMBuildAnd(builder, m0, m1, ""); - m = LLVMBuildAnd(builder, m, m2, ""); - m = LLVMBuildAnd(builder, m, m3, ""); + bits[0] = LLVMConstInt(i32t, 1 << 0, 0); + bits[1] = LLVMConstInt(i32t, 1 << 1, 0); + bits[2] = LLVMConstInt(i32t, 1 << 2, 0); + bits[3] = LLVMConstInt(i32t, 1 << 3, 0); - lp_build_name(m, "scissormask"); + mask = LLVMBuildAnd(builder, mask, LLVMConstVector(bits, 4), ""); - return m; -} + /* + * mask = mask != 0 ? ~0 : 0 + */ + mask = lp_build_compare(builder, + mask_type, PIPE_FUNC_NOTEQUAL, + mask, + lp_build_const_int_vec(mask_type, 0)); -static LLVMValueRef -build_int32_vec_const(int value) -{ - struct lp_type i32_type; - - memset(&i32_type, 0, sizeof i32_type); - i32_type.floating = FALSE; /* values are integers */ - i32_type.sign = TRUE; /* values are signed */ - i32_type.norm = FALSE; /* values are not normalized */ - i32_type.width = 32; /* 32-bit int values */ - i32_type.length = 4; /* 4 elements per vector */ - return lp_build_const_int_vec(i32_type, value); + return mask; } @@ -348,7 +232,7 @@ build_int32_vec_const(int value) /** * Generate the fragment shader, depth/stencil test, and alpha tests. * \param i which quad in the tile, in range [0,3] - * \param do_tri_test if 1, do triangle edge in/out testing + * \param partial_mask if 1, do mask_input testing */ static void generate_fs(struct llvmpipe_context *lp, @@ -364,13 +248,8 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef (*color)[4], LLVMValueRef depth_ptr, LLVMValueRef facing, - unsigned do_tri_test, - LLVMValueRef c0, - LLVMValueRef c1, - LLVMValueRef c2, - LLVMValueRef step0_ptr, - LLVMValueRef step1_ptr, - LLVMValueRef step2_ptr, + unsigned partial_mask, + LLVMValueRef mask_input, LLVMValueRef counter) { const struct tgsi_token *tokens = shader->base.tokens; @@ -411,23 +290,17 @@ generate_fs(struct llvmpipe_context *lp, lp_build_flow_scope_declare(flow, &z); /* do triangle edge testing */ - if (do_tri_test) { - generate_tri_edge_mask(builder, i, pmask, - c0, c1, c2, step0_ptr, step1_ptr, step2_ptr); + if (partial_mask) { + *pmask = generate_quad_mask(builder, type, + i, mask_input); } else { - *pmask = build_int32_vec_const(~0); + *pmask = lp_build_const_int_vec(type, ~0); } /* 'mask' will control execution based on quad's pixel alive/killed state */ lp_build_mask_begin(&mask, flow, type, *pmask); - if (key->scissor) { - LLVMValueRef smask = - generate_scissor_test(builder, context_ptr, interp, type); - lp_build_mask_update(&mask, smask); - } - early_depth_stencil_test = (key->depth.enabled || key->stencil[0].enabled) && !key->alpha.enabled && @@ -579,7 +452,7 @@ static void generate_fragment(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, struct lp_fragment_shader_variant *variant, - unsigned do_tri_test) + unsigned partial_mask) { struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); const struct lp_fragment_shader_variant_key *key = &variant->key; @@ -589,9 +462,8 @@ generate_fragment(struct llvmpipe_context *lp, LLVMTypeRef fs_elem_type; LLVMTypeRef fs_int_vec_type; LLVMTypeRef blend_vec_type; - LLVMTypeRef arg_types[16]; + LLVMTypeRef arg_types[11]; LLVMTypeRef func_type; - LLVMTypeRef int32_vec4_type = lp_build_int32_vec4_type(); LLVMValueRef context_ptr; LLVMValueRef x; LLVMValueRef y; @@ -600,7 +472,8 @@ generate_fragment(struct llvmpipe_context *lp, LLVMValueRef dady_ptr; LLVMValueRef color_ptr_ptr; LLVMValueRef depth_ptr; - LLVMValueRef c0, c1, c2, step0_ptr, step1_ptr, step2_ptr, counter = NULL; + LLVMValueRef mask_input; + LLVMValueRef counter = NULL; LLVMBasicBlockRef block; LLVMBuilderRef builder; struct lp_build_sampler_soa *sampler; @@ -645,7 +518,7 @@ generate_fragment(struct llvmpipe_context *lp, blend_vec_type = lp_build_vec_type(blend_type); util_snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s", - shader->no, variant->no, do_tri_test ? "edge" : "whole"); + shader->no, variant->no, partial_mask ? "partial" : "whole"); arg_types[0] = screen->context_ptr_type; /* context */ arg_types[1] = LLVMInt32Type(); /* x */ @@ -656,23 +529,15 @@ generate_fragment(struct llvmpipe_context *lp, arg_types[6] = LLVMPointerType(fs_elem_type, 0); /* dady */ arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */ arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ - arg_types[9] = LLVMInt32Type(); /* c0 */ - arg_types[10] = LLVMInt32Type(); /* c1 */ - arg_types[11] = LLVMInt32Type(); /* c2 */ - /* Note: the step arrays are built as int32[16] but we interpret - * them here as int32_vec4[4]. - */ - arg_types[12] = LLVMPointerType(int32_vec4_type, 0);/* step0 */ - arg_types[13] = LLVMPointerType(int32_vec4_type, 0);/* step1 */ - arg_types[14] = LLVMPointerType(int32_vec4_type, 0);/* step2 */ - arg_types[15] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */ + arg_types[9] = LLVMInt32Type(); /* mask_input */ + arg_types[10] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); function = LLVMAddFunction(screen->module, func_name, func_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); - variant->function[do_tri_test] = function; + variant->function[partial_mask] = function; /* XXX: need to propagate noalias down into color param now we are @@ -691,12 +556,7 @@ generate_fragment(struct llvmpipe_context *lp, dady_ptr = LLVMGetParam(function, 6); color_ptr_ptr = LLVMGetParam(function, 7); depth_ptr = LLVMGetParam(function, 8); - c0 = LLVMGetParam(function, 9); - c1 = LLVMGetParam(function, 10); - c2 = LLVMGetParam(function, 11); - step0_ptr = LLVMGetParam(function, 12); - step1_ptr = LLVMGetParam(function, 13); - step2_ptr = LLVMGetParam(function, 14); + mask_input = LLVMGetParam(function, 9); lp_build_name(context_ptr, "context"); lp_build_name(x, "x"); @@ -706,15 +566,10 @@ generate_fragment(struct llvmpipe_context *lp, lp_build_name(dady_ptr, "dady"); lp_build_name(color_ptr_ptr, "color_ptr_ptr"); lp_build_name(depth_ptr, "depth"); - lp_build_name(c0, "c0"); - lp_build_name(c1, "c1"); - lp_build_name(c2, "c2"); - lp_build_name(step0_ptr, "step0"); - lp_build_name(step1_ptr, "step1"); - lp_build_name(step2_ptr, "step2"); + lp_build_name(mask_input, "mask_input"); if (key->occlusion_count) { - counter = LLVMGetParam(function, 15); + counter = LLVMGetParam(function, 10); lp_build_name(counter, "counter"); } @@ -763,9 +618,9 @@ generate_fragment(struct llvmpipe_context *lp, out_color, depth_ptr_i, facing, - do_tri_test, - c0, c1, c2, - step0_ptr, step1_ptr, step2_ptr, counter); + partial_mask, + mask_input, + counter); for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) for(chan = 0; chan < NUM_CHANNELS; ++chan) @@ -792,9 +647,13 @@ generate_fragment(struct llvmpipe_context *lp, lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]); } - lp_build_conv_mask(builder, fs_type, blend_type, - fs_mask, num_fs, - &blend_mask, 1); + if (partial_mask || !variant->opaque) { + lp_build_conv_mask(builder, fs_type, blend_type, + fs_mask, num_fs, + &blend_mask, 1); + } else { + blend_mask = lp_build_const_int_vec(blend_type, ~0); + } color_ptr = LLVMBuildLoad(builder, LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""), @@ -832,8 +691,7 @@ generate_fragment(struct llvmpipe_context *lp, #endif /* Apply optimizations to LLVM IR */ - if (1) - LLVMRunFunctionPassManager(screen->pass, function); + LLVMRunFunctionPassManager(screen->pass, function); if (gallivm_debug & GALLIVM_DEBUG_IR) { /* Print the LLVM IR to stderr */ @@ -847,7 +705,7 @@ generate_fragment(struct llvmpipe_context *lp, { void *f = LLVMGetPointerToGlobal(screen->engine, function); - variant->jit_function[do_tri_test] = (lp_jit_frag_func)pointer_to_func(f); + variant->jit_function[partial_mask] = (lp_jit_frag_func)pointer_to_func(f); if (gallivm_debug & GALLIVM_DEBUG_ASM) { lp_disassemble(f); @@ -963,7 +821,6 @@ generate_variant(struct llvmpipe_context *lp, !key->stencil[0].enabled && !key->alpha.enabled && !key->depth.enabled && - !key->scissor && !shader->info.uses_kill ? TRUE : FALSE; @@ -1182,7 +1039,6 @@ make_variant_key(struct llvmpipe_context *lp, /* alpha.ref_value is passed in jit_context */ key->flatshade = lp->rasterizer->flatshade; - key->scissor = lp->rasterizer->scissor; if (lp->active_query_count) { key->occlusion_count = TRUE; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h index 593cd4de6b..37900fc544 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.h +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h @@ -54,7 +54,6 @@ struct lp_fragment_shader_variant_key enum pipe_format zsbuf_format; unsigned nr_cbufs:8; unsigned flatshade:1; - unsigned scissor:1; unsigned occlusion_count:1; struct { diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 1d42bdde4e..d236bad69d 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -36,6 +36,7 @@ #include "pipe/p_defines.h" #include "util/u_inlines.h" +#include "util/u_cpu_detect.h" #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -898,13 +899,15 @@ static void alloc_image_data(struct llvmpipe_resource *lpr, unsigned level, enum lp_texture_layout layout) { + uint alignment = MAX2(16, util_cpu_caps.cacheline); + if (lpr->dt) assert(level == 0); if (layout == LP_TEX_LAYOUT_TILED) { /* tiled data is stored in regular memory */ uint buffer_size = tex_image_size(lpr, level, layout); - lpr->tiled[level].data = align_malloc(buffer_size, 16); + lpr->tiled[level].data = align_malloc(buffer_size, alignment); } else { assert(layout == LP_TEX_LAYOUT_LINEAR); @@ -920,7 +923,7 @@ alloc_image_data(struct llvmpipe_resource *lpr, unsigned level, else { /* not a display target - allocate regular memory */ uint buffer_size = tex_image_size(lpr, level, LP_TEX_LAYOUT_LINEAR); - lpr->linear[level].data = align_malloc(buffer_size, 16); + lpr->linear[level].data = align_malloc(buffer_size, alignment); } } } diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.c b/src/gallium/drivers/llvmpipe/lp_tile_image.c index 2b63992dd7..0938f7aea7 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_image.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_image.c @@ -204,7 +204,7 @@ lp_tiled_to_linear(const void *src, void *dst, lp_tile_unswizzle_4ub(format, src_tile, dst, dst_stride, - ii, jj, tile_w, tile_h); + ii, jj); } } } @@ -293,7 +293,7 @@ lp_linear_to_tiled(const void *src, void *dst, lp_tile_swizzle_4ub(format, dst_tile, src, src_stride, - ii, jj, tile_w, tile_h); + ii, jj); } } } diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h index 07f71b8411..12dac1da6c 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h @@ -79,14 +79,14 @@ void lp_tile_swizzle_4ub(enum pipe_format format, uint8_t *dst, const void *src, unsigned src_stride, - unsigned x, unsigned y, unsigned w, unsigned h); + unsigned x, unsigned y); void lp_tile_unswizzle_4ub(enum pipe_format format, const uint8_t *src, void *dst, unsigned dst_stride, - unsigned x, unsigned y, unsigned w, unsigned h); + unsigned x, unsigned y); diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index 5ab63cbac6..bf70c936b4 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -75,13 +75,13 @@ def generate_format_read(format, dst_channel, dst_native_type, dst_suffix): src_native_type = native_type(format) print 'static void' - print 'lp_tile_%s_swizzle_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, dst_suffix, dst_native_type) + print 'lp_tile_%s_swizzle_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)' % (name, dst_suffix, dst_native_type) print '{' print ' unsigned x, y;' print ' const uint8_t *src_row = src + y0*src_stride;' - print ' for (y = 0; y < h; ++y) {' + print ' for (y = 0; y < TILE_SIZE; ++y) {' print ' const %s *src_pixel = (const %s *)(src_row + x0*%u);' % (src_native_type, src_native_type, format.stride()) - print ' for (x = 0; x < w; ++x) {' + print ' for (x = 0; x < TILE_SIZE; ++x) {' names = ['']*4 if format.colorspace in ('rgb', 'srgb'): @@ -202,9 +202,9 @@ def emit_unrolled_unswizzle_code(format, src_channel): print ' %s *dstpix = (%s *) dst;' % (dst_native_type, dst_native_type) print ' unsigned int qx, qy, i;' print - print ' for (qy = 0; qy < h; qy += TILE_VECTOR_HEIGHT) {' + print ' for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {' print ' const unsigned py = y0 + qy;' - print ' for (qx = 0; qx < w; qx += TILE_VECTOR_WIDTH) {' + print ' for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {' print ' const unsigned px = x0 + qx;' print ' const uint8_t *r = src + 0 * TILE_C_STRIDE;' print ' const uint8_t *g = src + 1 * TILE_C_STRIDE;' @@ -231,9 +231,9 @@ def emit_tile_pixel_unswizzle_code(format, src_channel): print ' unsigned x, y;' print ' uint8_t *dst_row = dst + y0*dst_stride;' - print ' for (y = 0; y < h; ++y) {' + print ' for (y = 0; y < TILE_SIZE; ++y) {' print ' %s *dst_pixel = (%s *)(dst_row + x0*%u);' % (dst_native_type, dst_native_type, format.stride()) - print ' for (x = 0; x < w; ++x) {' + print ' for (x = 0; x < TILE_SIZE; ++x) {' if format.layout == PLAIN: if not format.is_array(): @@ -273,7 +273,7 @@ def generate_format_write(format, src_channel, src_native_type, src_suffix): name = format.short_name() print 'static void' - print 'lp_tile_%s_unswizzle_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type) + print 'lp_tile_%s_unswizzle_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)' % (name, src_suffix, src_native_type) print '{' if format.layout == PLAIN \ and format.colorspace == 'rgb' \ @@ -297,9 +297,9 @@ def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix): generate_format_read(format, dst_channel, dst_native_type, dst_suffix) print 'void' - print 'lp_tile_swizzle_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (dst_suffix, dst_native_type) + print 'lp_tile_swizzle_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y)' % (dst_suffix, dst_native_type) print '{' - print ' void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % dst_native_type + print ' void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0);' % dst_native_type print '#ifdef DEBUG' print ' lp_tile_swizzle_count += 1;' print '#endif' @@ -313,7 +313,7 @@ def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix): print ' debug_printf("%s: unsupported format %s\\n", __FUNCTION__, util_format_name(format));' print ' return;' print ' }' - print ' func(dst, (const uint8_t *)src, src_stride, x, y, w, h);' + print ' func(dst, (const uint8_t *)src, src_stride, x, y);' print '}' print @@ -326,10 +326,10 @@ def generate_unswizzle(formats, src_channel, src_native_type, src_suffix): generate_format_write(format, src_channel, src_native_type, src_suffix) print 'void' - print 'lp_tile_unswizzle_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (src_suffix, src_native_type) + print 'lp_tile_unswizzle_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y)' % (src_suffix, src_native_type) print '{' - print ' void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % src_native_type + print ' void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0);' % src_native_type print '#ifdef DEBUG' print ' lp_tile_unswizzle_count += 1;' print '#endif' @@ -343,7 +343,7 @@ def generate_unswizzle(formats, src_channel, src_native_type, src_suffix): print ' debug_printf("%s: unsupported format %s\\n", __FUNCTION__, util_format_name(format));' print ' return;' print ' }' - print ' func(src, (uint8_t *)dst, dst_stride, x, y, w, h);' + print ' func(src, (uint8_t *)dst, dst_stride, x, y);' print '}' print diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 2408a95353..895efaa1c4 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -24,6 +24,7 @@ #include "r300_texture.h" #include "util/u_format.h" +#include "util/u_pack_color.h" enum r300_blitter_op /* bitmask */ { @@ -79,6 +80,48 @@ static void r300_blitter_end(struct r300_context *r300) } } +static uint32_t r300_depth_clear_cb_value(enum pipe_format format, + const float* rgba) +{ + union util_color uc; + util_pack_color(rgba, format, &uc); + + if (util_format_get_blocksizebits(format) == 32) + return uc.ui; + else + return uc.us | (uc.us << 16); +} + +static boolean r300_cbzb_clear_allowed(struct r300_context *r300, + unsigned clear_buffers) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_surface *surf = r300_surface(fb->cbufs[0]); + unsigned bpp; + + /* Only color clear allowed, and only one colorbuffer. */ + if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1) + return FALSE; + + /* The colorbuffer must be point-sampled. */ + if (surf->base.texture->nr_samples > 1) + return FALSE; + + bpp = util_format_get_blocksizebits(surf->base.format); + + /* ZB can only work with the two pixel sizes. */ + if (bpp != 16 && bpp != 32) + return FALSE; + + /* If the midpoint ZB offset is not aligned to 2048, it returns garbage + * with certain texture sizes. Macrotiling ensures the alignment. */ + if (!r300_texture(surf->base.texture)->mip_macrotile[surf->base.level]) + return FALSE; + + return TRUE; +} + /* Clear currently bound buffers. */ static void r300_clear(struct pipe_context* pipe, unsigned buffers, @@ -124,15 +167,43 @@ static void r300_clear(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_hyperz_state *hyperz = + (struct r300_hyperz_state*)r300->hyperz_state.state; + uint32_t width = fb->width; + uint32_t height = fb->height; + + /* Enable CBZB clear. */ + if (r300_cbzb_clear_allowed(r300, buffers)) { + struct r300_surface *surf = r300_surface(fb->cbufs[0]); + + hyperz->zb_depthclearvalue = + r300_depth_clear_cb_value(surf->base.format, rgba); + + width = surf->cbzb_width; + height = surf->cbzb_height; + + r300->cbzb_clear = TRUE; + r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); + } /* Clear. */ r300_blitter_begin(r300, R300_CLEAR); util_blitter_clear(r300->blitter, - fb->width, - fb->height, + width, + height, fb->nr_cbufs, buffers, rgba, depth, stencil); r300_blitter_end(r300); + + /* Disable CBZB clear. */ + if (r300->cbzb_clear) { + r300->cbzb_clear = FALSE; + r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); + } + + /* XXX this flush "fixes" a hardlock in the cubestorm xscreensaver */ + if (r300->flush_counter == 0) + pipe->flush(pipe, 0, NULL); } /* Clear a region of a color surface to a constant value. */ diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 7f43281af4..1beab7628a 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -330,7 +330,7 @@ static void r300_init_states(struct pipe_context *pipe) BEGIN_CB(&hyperz->cb_begin, r300->hyperz_state.size); OUT_CB_REG(R300_ZB_BW_CNTL, 0); OUT_CB_REG(R300_ZB_DEPTHCLEARVALUE, 0); - OUT_CB_REG(R300_SC_HYPERZ, 0x1C); + OUT_CB_REG(R300_SC_HYPERZ, R300_SC_HYPERZ_ADJ_2); END_CB; } } @@ -373,15 +373,16 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_init_blit_functions(r300); r300_init_flush_functions(r300); r300_init_query_functions(r300); - r300_init_render_functions(r300); r300_init_state_functions(r300); r300_init_resource_functions(r300); - rws->set_flush_cb(r300->rws, r300_flush_cb, r300); - r300->dirty_hw++; - r300->blitter = util_blitter_create(&r300->context); + /* Render functions must be initialized after blitter. */ + r300_init_render_functions(r300); + + rws->set_flush_cb(r300->rws, r300_flush_cb, r300); + r300->upload_ib = u_upload_create(&r300->context, 32 * 1024, 16, PIPE_BIND_INDEX_BUFFER); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 2483af7fb5..df4299b7ea 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -311,6 +311,13 @@ struct r300_surface { uint32_t offset; /* COLOROFFSET or DEPTHOFFSET. */ uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */ uint32_t format; /* US_OUT_FMT or ZB_FORMAT. */ + + /* Parameters dedicated to the CBZB clear. */ + uint32_t cbzb_width; /* Aligned width. */ + uint32_t cbzb_height; /* Half of the height. */ + uint32_t cbzb_midpoint_offset; /* DEPTHOFFSET. */ + uint32_t cbzb_pitch; /* DEPTHPITCH. */ + uint32_t cbzb_format; /* ZB_FORMAT. */ }; struct r300_texture { @@ -525,6 +532,7 @@ struct r300_context { /* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */ boolean incompatible_vb_layout; + boolean cbzb_clear; /* upload managers */ struct u_upload_mgr *upload_vb; struct u_upload_mgr *upload_ib; @@ -593,7 +601,8 @@ void r300_plug_in_stencil_ref_fallback(struct r300_context *r300); /* r300_state.c */ enum r300_fb_state_change { - R300_CHANGED_FB_STATE = 0 + R300_CHANGED_FB_STATE = 0, + R300_CHANGED_CBZB_FLAG }; void r300_mark_fb_state_dirty(struct r300_context *r300, diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index a6cd86e392..31d4e14681 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -29,17 +29,20 @@ static const struct debug_named_value debug_options[] = { { "fp", DBG_FP, "Fragment program handling (for debugging)" }, { "vp", DBG_VP, "Vertex program handling (for debugging)" }, - { "draw", DBG_DRAW, "Draw and emit (for debugging)" }, + { "draw", DBG_DRAW, "Draw calls (for debugging)" }, + { "swtcl", DBG_SWTCL, "SWTCL-specific info (for debugging)" }, + { "rsblock", DBG_RS_BLOCK, "Rasterizer registers (for debugging)" }, + { "psc", DBG_PSC, "Vertex stream registers (for debugging)" }, { "tex", DBG_TEX, "Textures (for debugging)" }, { "texalloc", DBG_TEXALLOC, "Texture allocation (for debugging)" }, { "fall", DBG_FALL, "Fallbacks (for debugging)" }, { "rs", DBG_RS, "Rasterizer (for debugging)" }, { "fb", DBG_FB, "Framebuffer (for debugging)" }, + { "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries (for debugging)" }, { "anisohq", DBG_ANISOHQ, "High quality anisotropic filtering (for benchmarking)" }, { "notiling", DBG_NO_TILING, "Disable tiling (for benchmarking)" }, { "noimmd", DBG_NO_IMMD, "Disable immediate mode (for benchmarking)" }, - { "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries (for lulz)" }, - { "stats", DBG_STATS, "Gather statistics (for lulz)" }, + { "stats", DBG_STATS, "Gather statistics" }, /* must be last */ DEBUG_NAMED_VALUE_END diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 5ce3eb63c5..daae6dd510 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -272,8 +272,17 @@ void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state) struct r300_gpu_flush *gpuflush = (struct r300_gpu_flush*)state; struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; + uint32_t height = fb->height; + uint32_t width = fb->width; CS_LOCALS(r300); + if (r300->cbzb_clear) { + struct r300_surface *surf = r300_surface(fb->cbufs[0]); + + height = surf->cbzb_height; + width = surf->cbzb_width; + } + BEGIN_CS(size); /* Set up scissors. @@ -281,13 +290,13 @@ void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state) OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); if (r300->screen->caps.is_r500) { OUT_CS(0); - OUT_CS(((fb->width - 1) << R300_SCISSORS_X_SHIFT) | - ((fb->height - 1) << R300_SCISSORS_Y_SHIFT)); + OUT_CS(((width - 1) << R300_SCISSORS_X_SHIFT) | + ((height - 1) << R300_SCISSORS_Y_SHIFT)); } else { OUT_CS((1440 << R300_SCISSORS_X_SHIFT) | (1440 << R300_SCISSORS_Y_SHIFT)); - OUT_CS(((fb->width + 1440-1) << R300_SCISSORS_X_SHIFT) | - ((fb->height + 1440-1) << R300_SCISSORS_Y_SHIFT)); + OUT_CS(((width + 1440-1) << R300_SCISSORS_X_SHIFT) | + ((height + 1440-1) << R300_SCISSORS_Y_SHIFT)); } /* Flush CB & ZB caches and wait until the 3D engine is idle and clean. */ @@ -344,8 +353,20 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain, 0); } + /* Set up the ZB part of the CBZB clear. */ + if (r300->cbzb_clear) { + surf = r300_surface(fb->cbufs[0]); + + OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format); + + OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); + OUT_CS_RELOC(surf->buffer, surf->cbzb_midpoint_offset, 0, surf->domain, 0); + + OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); + OUT_CS_RELOC(surf->buffer, surf->cbzb_pitch, 0, surf->domain, 0); + } /* Set up a zbuffer. */ - if (fb->zsbuf) { + else if (fb->zsbuf) { surf = r300_surface(fb->zsbuf); OUT_CS_REG(R300_ZB_FORMAT, surf->format); @@ -377,6 +398,18 @@ void r300_emit_hyperz_state(struct r300_context *r300, WRITE_CS_TABLE(state, size); } +void r300_emit_hyperz_end(struct r300_context *r300) +{ + struct r300_hyperz_state z = + *(struct r300_hyperz_state*)r300->hyperz_state.state; + + z.zb_bw_cntl = 0; + z.zb_depthclearvalue = 0; + z.sc_hyperz = R300_SC_HYPERZ_ADJ_2; + + r300_emit_hyperz_state(r300, r300->hyperz_state.size, &z); +} + void r300_emit_fb_state_pipelined(struct r300_context *r300, unsigned size, void *state) { @@ -605,7 +638,7 @@ void r300_emit_rs_block_state(struct r300_context* r300, unsigned count = (rs->inst_count & R300_RS_INST_COUNT_MASK) + 1; CS_LOCALS(r300); - if (SCREEN_DBG_ON(r300->screen, DBG_DRAW)) { + if (DBG_ON(r300, DBG_RS_BLOCK)) { r500_dump_rs_block(rs); fprintf(stderr, "r300: RS emit:\n"); @@ -750,7 +783,7 @@ void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed) { CS_LOCALS(r300); - DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, " + DBG(r300, DBG_SWTCL, "r300: Preparing vertex buffer %p for render, " "vertex size %d\n", r300->vbo, r300->vertex_info.size); /* Set the pointer to our vertex buffer. The emitted values are this: @@ -778,7 +811,7 @@ void r300_emit_vertex_stream_state(struct r300_context* r300, unsigned i; CS_LOCALS(r300); - if (DBG_ON(r300, DBG_DRAW)) { + if (DBG_ON(r300, DBG_PSC)) { fprintf(stderr, "r300: PSC emit:\n"); for (i = 0; i < streams->count; i++) { diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 586ccda620..5d05039669 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -45,6 +45,11 @@ void r300_emit_clip_state(struct r300_context* r300, void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state); +void r300_emit_hyperz_state(struct r300_context *r300, + unsigned size, void *state); + +void r300_emit_hyperz_end(struct r300_context *r300); + void r300_emit_fs(struct r300_context* r300, unsigned size, void *state); void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *state); @@ -64,9 +69,6 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300, void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state); -void r300_emit_hyperz_state(struct r300_context *r300, - unsigned size, void *state); - void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state); void r300_emit_query_start(struct r300_context *r300, unsigned size, void *state); diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index ba840bfff8..6f31ba159a 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -48,11 +48,10 @@ static void r300_flush(struct pipe_context* pipe, } if (r300->dirty_hw) { + r300_emit_hyperz_end(r300); r300_emit_query_end(r300); - if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) { - r300->flush_counter++; - } + r300->flush_counter++; r300->rws->flush_cs(r300->rws); r300->dirty_hw = 0; diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index 2c4e6c7211..e952895601 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -27,6 +27,22 @@ #include "r300_fs.h" /*****************************************************************************/ +/* The HyperZ setup */ +/*****************************************************************************/ + +static void r300_update_hyperz(struct r300_context* r300) +{ + struct r300_hyperz_state *z = + (struct r300_hyperz_state*)r300->hyperz_state.state; + + z->zb_bw_cntl = 0; + z->sc_hyperz = R300_SC_HYPERZ_ADJ_2; + + if (r300->cbzb_clear) + z->zb_bw_cntl |= R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY; +} + +/*****************************************************************************/ /* The ZTOP state */ /*****************************************************************************/ @@ -118,4 +134,7 @@ static void r300_update_ztop(struct r300_context* r300) void r300_update_hyperz_state(struct r300_context* r300) { r300_update_ztop(r300); + if (r300->hyperz_state.dirty) { + r300_update_hyperz(r300); + } } diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 53728431a6..970cb68837 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -224,6 +224,7 @@ static void r300_prepare_for_rendering(struct r300_context *r300, /* Emitted in flush. */ end_dwords += 26; /* emit_query_end */ + end_dwords += r300->hyperz_state.size; /* emit_hyperz_end */ cs_dwords += end_dwords; @@ -278,7 +279,6 @@ static boolean immd_is_good_idea(struct r300_context *r300, /* We shouldn't map buffers referenced by CS, busy buffers, * and ones placed in VRAM. */ - /* XXX Check for VRAM buffers. */ for (i = 0; i < vertex_element_count; i++) { velem = &r300->velems->velem[i]; vbi = velem->vertex_buffer_index; @@ -286,6 +286,10 @@ static boolean immd_is_good_idea(struct r300_context *r300, if (!checked[vbi]) { vbuf = &r300->vertex_buffer[vbi]; + if (!(r300_buffer(vbuf->buffer)->domain & R300_DOMAIN_GTT)) { + return FALSE; + } + if (r300_buffer_is_referenced(&r300->context, vbuf->buffer, R300_REF_CS | R300_REF_HW)) { @@ -299,8 +303,7 @@ static boolean immd_is_good_idea(struct r300_context *r300, } /***************************************************************************** - * The emission of draw packets for r500. Older GPUs may use these functions * - * after resolving fallback issues (e.g. stencil ref two-sided). * + * The HWTCL draw functions. * ****************************************************************************/ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, @@ -867,13 +870,12 @@ static void r300_render_draw_arrays(struct vbuf_render* render, unsigned dwords = 6; CS_LOCALS(r300); - (void) i; (void) ptr; r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL, NULL, dwords, 0, 0, NULL); - DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count); + DBG(r300, DBG_DRAW, "r300: render_draw_arrays (count: %d)\n", count); /* Uncomment to dump all VBOs rendered through this interface. * Slow and noisy! @@ -916,6 +918,7 @@ static void r300_render_draw_elements(struct vbuf_render* render, unsigned free_dwords; CS_LOCALS(r300); + DBG(r300, DBG_DRAW, "r300: render_draw_elements (count: %d)\n", count); /* Reserve at least 256 dwords. * @@ -1017,6 +1020,88 @@ struct draw_stage* r300_draw_stage(struct r300_context* r300) * End of SW TCL functions * ***************************************************************************/ +/* If we used a quad to draw a rectangle, the pixels on the main diagonal + * would be computed and stored twice, which makes the clear/copy codepaths + * somewhat inefficient. Instead we use a rectangular point sprite. */ +static void r300_blitter_draw_rectangle(struct blitter_context *blitter, + unsigned x1, unsigned y1, + unsigned x2, unsigned y2, + float depth, + enum blitter_attrib_type type, + const float attrib[4]) +{ + struct r300_context *r300 = r300_context(util_blitter_get_pipe(blitter)); + unsigned last_sprite_coord_enable = r300->sprite_coord_enable; + unsigned width = x2 - x1; + unsigned height = y2 - y1; + unsigned vertex_size = + type == UTIL_BLITTER_ATTRIB_COLOR || !r300->draw ? 8 : 4; + unsigned dwords = 13 + vertex_size + + (type == UTIL_BLITTER_ATTRIB_TEXCOORD ? 7 : 0); + const float zeros[4] = {0, 0, 0, 0}; + CB_LOCALS; + + if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) + r300->sprite_coord_enable = 1; + + r300_update_derived_state(r300); + + /* Mark some states we don't care about as non-dirty. */ + r300->clip_state.dirty = FALSE; + r300->viewport_state.dirty = FALSE; + + r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL); + + DBG(r300, DBG_DRAW, "r300: draw_rectangle\n"); + + BEGIN_CS_AS_CB(r300, dwords); + /* Set up GA. */ + OUT_CB_REG(R300_GA_POINT_SIZE, (height * 6) | ((width * 6) << 16)); + + if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) { + /* Set up the GA to generate texcoords. */ + OUT_CB_REG(R300_GB_ENABLE, R300_GB_POINT_STUFF_ENABLE | + (R300_GB_TEX_STR << R300_GB_TEX0_SOURCE_SHIFT)); + OUT_CB_REG_SEQ(R300_GA_POINT_S0, 4); + OUT_CB_32F(attrib[0]); + OUT_CB_32F(attrib[3]); + OUT_CB_32F(attrib[2]); + OUT_CB_32F(attrib[1]); + } + + /* Set up VAP controls. */ + OUT_CB_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); + OUT_CB_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT); + OUT_CB_REG(R300_VAP_VTX_SIZE, vertex_size); + OUT_CB_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); + OUT_CB(1); + OUT_CB(0); + + /* Draw. */ + OUT_CB_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, vertex_size); + OUT_CB(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (1 << 16) | + R300_VAP_VF_CNTL__PRIM_POINTS); + + OUT_CB_32F(x1 + width * 0.5f); + OUT_CB_32F(y1 + height * 0.5f); + OUT_CB_32F(depth); + OUT_CB_32F(1); + + if (vertex_size == 8) { + if (!attrib) + attrib = zeros; + OUT_CB_TABLE(attrib, 4); + } + END_CB; + + /* Restore the state. */ + r300->clip_state.dirty = TRUE; + r300->rs_state.dirty = TRUE; + r300->viewport_state.dirty = TRUE; + + r300->sprite_coord_enable = last_sprite_coord_enable; +} + static void r300_resource_resolve(struct pipe_context* pipe, struct pipe_resource* dest, struct pipe_subresource subdest, @@ -1070,6 +1155,7 @@ void r300_init_render_functions(struct r300_context *r300) } r300->context.resource_resolve = r300_resource_resolve; + r300->blitter->draw_rectangle = r300_blitter_draw_rectangle; /* Plug in the two-sided stencil reference value fallback if needed. */ if (!r300->screen->caps.is_r500) diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index cad99ca845..a3b08555cd 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -256,7 +256,6 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, uint32_t retval = 0; boolean is_r500 = r300_screen(screen)->caps.is_r500; boolean is_r400 = r300_screen(screen)->caps.is_r400; - boolean is_rv350 = r300_screen(screen)->caps.is_rv350; boolean is_z24 = format == PIPE_FORMAT_X8Z24_UNORM || format == PIPE_FORMAT_S8_USCALED_Z24_UNORM; boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM || @@ -272,6 +271,7 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, format == PIPE_FORMAT_R16G16B16_FLOAT || format == PIPE_FORMAT_R16G16B16A16_FLOAT; + /* Check multisampling support. */ switch (sample_count) { case 0: case 1: @@ -326,7 +326,7 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, /* Check vertex buffer format support. */ if (usage & PIPE_BIND_VERTEX_BUFFER && /* Half float is supported on >= RV350. */ - (is_rv350 || !is_half_float) && + (is_r400 || is_r500 || !is_half_float) && r300_translate_vertex_data_type(format) != R300_INVALID_FORMAT) { retval |= PIPE_BIND_VERTEX_BUFFER; } diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 29cd5dbe26..c6b4b57c3b 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -61,17 +61,19 @@ static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) { * those changes. */ /*@{*/ -#define DBG_HELP (1 << 0) + /* Logging. */ +#define DBG_PSC (1 << 0) #define DBG_FP (1 << 1) #define DBG_VP (1 << 2) -/* The bit (1 << 3) is unused. */ +#define DBG_SWTCL (1 << 3) #define DBG_DRAW (1 << 4) #define DBG_TEX (1 << 5) #define DBG_TEXALLOC (1 << 6) #define DBG_RS (1 << 7) #define DBG_FALL (1 << 8) #define DBG_FB (1 << 9) +#define DBG_RS_BLOCK (1 << 10) /* Features. */ #define DBG_ANISOHQ (1 << 16) #define DBG_NO_TILING (1 << 17) diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index b0722cb95f..f4c6a262d4 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -688,7 +688,9 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, /* Now compute the fb_state atom size. */ r300->fb_state.size = 2 + (8 * state->nr_cbufs); - if (state->zsbuf) + if (r300->cbzb_clear) + r300->fb_state.size += 10; + else if (state->zsbuf) r300->fb_state.size += r300->screen->caps.has_hiz ? 18 : 14; /* The size of the rest of atoms stays the same. */ diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 3aa8deb63c..2ef9766578 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -102,7 +102,8 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300) * they won't be rasterized. */ gen_count = 0; for (i = 0; i < ATTR_GENERIC_COUNT && gen_count < 8; i++) { - if (vs_outputs->generic[i] != ATTR_UNUSED) { + if (vs_outputs->generic[i] != ATTR_UNUSED && + !(r300->sprite_coord_enable & (1 << i))) { r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, vs_outputs->generic[i]); gen_count++; @@ -118,7 +119,7 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300) /* WPOS. */ if (r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED && gen_count < 8) { - DBG(r300, DBG_DRAW, "draw_emit_attrib: WPOS, index: %i\n", + DBG(r300, DBG_SWTCL, "draw_emit_attrib: WPOS, index: %i\n", vs_outputs->wpos); r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, vs_outputs->wpos); @@ -140,18 +141,19 @@ static void r300_swtcl_vertex_psc(struct r300_context *r300) /* For each Draw attribute, route it to the fragment shader according * to the vs_output_tab. */ attrib_count = vinfo->num_attribs; - DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count); + DBG(r300, DBG_SWTCL, "r300: attrib count: %d\n", attrib_count); for (i = 0; i < attrib_count; i++) { - DBG(r300, DBG_DRAW, "r300: attrib: index %d, interp %d, emit %d," - " vs_output_tab %d\n", vinfo->attrib[i].src_index, - vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit, - vs_output_tab[i]); - - /* Make sure we have a proper destination for our attribute. */ - assert(vs_output_tab[i] != -1); + if (vs_output_tab[i] == -1) { + assert(0); + abort(); + } format = draw_translate_vinfo_format(vinfo->attrib[i].emit); + DBG(r300, DBG_SWTCL, + "r300: swtcl_vertex_psc [%i] <- %s\n", + vs_output_tab[i], util_format_short_name(format)); + /* Obtain the type of data in this attribute. */ type = r300_translate_vertex_data_type(format); if (type == R300_INVALID_FORMAT) { diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index d378a7150d..e8b1d67007 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -1034,6 +1034,8 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, struct r300_surface* surface = CALLOC_STRUCT(r300_surface); if (surface) { + uint32_t stride, offset, tile_height; + pipe_reference_init(&surface->base.reference, 1); pipe_resource_reference(&surface->base.texture, texture); surface->base.format = texture->format; @@ -1054,6 +1056,34 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, surface->offset = r300_texture_get_offset(tex, level, zslice, face); surface->pitch = tex->fb_state.pitch[level]; surface->format = tex->fb_state.format; + + /* Parameters for the CBZB clear. */ + surface->cbzb_width = align(surface->base.width, 64); + + /* Height must be aligned to the size of a tile. */ + tile_height = r300_get_pixel_alignment(tex, tex->mip_macrotile[level], + DIM_HEIGHT); + surface->cbzb_height = align((surface->base.height + 1) / 2, + tile_height); + + /* Offset must be aligned to 2K and must point at the beginning + * of a scanline. */ + stride = r300_texture_get_stride(r300_screen(screen), tex, level); + offset = surface->offset + stride * surface->cbzb_height; + surface->cbzb_midpoint_offset = offset & ~2047; + + surface->cbzb_pitch = surface->pitch & 0x1ffffc; + + if (util_format_get_blocksizebits(surface->base.format) == 32) + surface->cbzb_format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; + else + surface->cbzb_format = R300_DEPTHFORMAT_16BIT_INT_Z; + + SCREEN_DBG(r300_screen(screen), DBG_TEX, + "CBZB Dim: %ix%i, Misalignment: %i, Macro: %s\n", + surface->cbzb_width, surface->cbzb_height, + offset & 2047, + tex->mip_macrotile[level] ? "YES" : " NO"); } return &surface->base; diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c index 00b167e256..e0dd5cf8c2 100644 --- a/src/gallium/drivers/rbug/rbug_context.c +++ b/src/gallium/drivers/rbug/rbug_context.c @@ -97,15 +97,7 @@ rbug_draw_block_locked(struct rbug_context *rb_pipe, int flag) /* wait for rbug to clear the blocked flag */ while (rb_pipe->draw_blocked & flag) { rb_pipe->draw_blocked |= flag; -#ifdef PIPE_THREAD_HAVE_CONDVAR pipe_condvar_wait(rb_pipe->draw_cond, rb_pipe->draw_mutex); -#else - pipe_mutex_unlock(rb_pipe->draw_mutex); -#ifdef PIPE_SUBSYSTEM_WINDOWS_USER - Sleep(1); -#endif - pipe_mutex_lock(rb_pipe->draw_mutex); -#endif } } diff --git a/src/gallium/drivers/rbug/rbug_core.c b/src/gallium/drivers/rbug/rbug_core.c index f1aab3869b..9dc663b079 100644 --- a/src/gallium/drivers/rbug/rbug_core.c +++ b/src/gallium/drivers/rbug/rbug_core.c @@ -407,9 +407,7 @@ rbug_context_draw_step(struct rbug_rbug *tr_rbug, struct rbug_header *header, ui } pipe_mutex_unlock(rb_context->draw_mutex); -#ifdef PIPE_THREAD_HAVE_CONDVAR pipe_condvar_broadcast(rb_context->draw_cond); -#endif pipe_mutex_unlock(rb_screen->list_mutex); @@ -442,9 +440,7 @@ rbug_context_draw_unblock(struct rbug_rbug *tr_rbug, struct rbug_header *header, rb_context->draw_blocker &= ~unblock->unblock; pipe_mutex_unlock(rb_context->draw_mutex); -#ifdef PIPE_THREAD_HAVE_CONDVAR pipe_condvar_broadcast(rb_context->draw_cond); -#endif pipe_mutex_unlock(rb_screen->list_mutex); @@ -476,9 +472,7 @@ rbug_context_draw_rule(struct rbug_rbug *tr_rbug, struct rbug_header *header, ui rb_context->draw_blocker |= RBUG_BLOCK_RULE; pipe_mutex_unlock(rb_context->draw_mutex); -#ifdef PIPE_THREAD_HAVE_CONDVAR pipe_condvar_broadcast(rb_context->draw_cond); -#endif pipe_mutex_unlock(rb_screen->list_mutex); diff --git a/src/gallium/targets/Makefile.xorg b/src/gallium/targets/Makefile.xorg index 4237f944e0..c2d0064978 100644 --- a/src/gallium/targets/Makefile.xorg +++ b/src/gallium/targets/Makefile.xorg @@ -9,7 +9,8 @@ # Optional defines: # DRIVER_INCLUDES are appended to the list of includes directories. # DRIVER_DEFINES is not used for makedepend, but for compilation. -# DRIVER_LINKS are flags given to the linker +# DRIVER_PIPES are pipe drivers and modules that the driver depends on. +# DRIVER_LINKS are flags given to the linker. ### Basic defines ### @@ -27,13 +28,21 @@ INCLUDES = \ LIBNAME_STAGING = $(TOP)/$(LIB_DIR)/gallium/$(TARGET) +ifeq ($(MESA_LLVM),1) +LD = g++ +LDFLAGS += $(LLVM_LDFLAGS) +USE_CXX=1 +DRIVER_PIPES += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a +DRIVER_LINKS += $(LLVM_LIBS) -lm -ldl +endif + ##### TARGETS ##### default: depend $(TOP)/$(LIB_DIR)/gallium $(LIBNAME) $(LIBNAME_STAGING) -$(LIBNAME): $(OBJECTS) Makefile $(LIBS) - $(MKLIB) -noprefix -o $@ $(OBJECTS) $(DRIVER_LINKS) +$(LIBNAME): $(OBJECTS) Makefile ../Makefile.xorg $(LIBS) $(DRIVER_PIPES) + $(MKLIB) -noprefix -o $@ $(OBJECTS) $(DRIVER_PIPES) $(GALLIUM_AUXILIARIES) $(DRIVER_LINKS) depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS) $(GENERATED_SOURCES) rm -f depend diff --git a/src/gallium/targets/dri-i965/SConscript b/src/gallium/targets/dri-i965/SConscript index 7eb3c436a3..684e3488f7 100644 --- a/src/gallium/targets/dri-i965/SConscript +++ b/src/gallium/targets/dri-i965/SConscript @@ -17,7 +17,6 @@ env.Append(CPPDEFINES = [ env.Prepend(LIBS = [ st_dri, i965drm, - ws_drm, ws_wrapper, i965, trace, diff --git a/src/gallium/targets/libgl-xlib/Makefile b/src/gallium/targets/libgl-xlib/Makefile index b173ceb994..e745023ba5 100644 --- a/src/gallium/targets/libgl-xlib/Makefile +++ b/src/gallium/targets/libgl-xlib/Makefile @@ -97,7 +97,7 @@ tags: etags `find . -name \*.[ch]` $(TOP)/include/GL/*.h clean: - -rm -f *.o + -rm -f *.o depend include depend diff --git a/src/gallium/targets/xorg-i915/Makefile b/src/gallium/targets/xorg-i915/Makefile index 45b0622ca9..865240404c 100644 --- a/src/gallium/targets/xorg-i915/Makefile +++ b/src/gallium/targets/xorg-i915/Makefile @@ -10,15 +10,15 @@ C_SOURCES = \ DRIVER_DEFINES = \ -DHAVE_CONFIG_H -DGALLIUM_RBUG -DGALLIUM_TRACE -DGALLIUM_GALAHAD -DRIVER_LINKS = \ +DRIVER_PIPES = \ $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ $(TOP)/src/gallium/winsys/i915/drm/libi915drm.a \ $(TOP)/src/gallium/drivers/i915/libi915.a \ $(TOP)/src/gallium/drivers/galahad/libgalahad.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ - $(TOP)/src/gallium/drivers/rbug/librbug.a \ - $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ - $(GALLIUM_AUXILIARIES) \ + $(TOP)/src/gallium/drivers/rbug/librbug.a + +DRIVER_LINKS = \ $(shell pkg-config --libs libdrm libdrm_intel) include ../Makefile.xorg diff --git a/src/gallium/targets/xorg-i965/Makefile b/src/gallium/targets/xorg-i965/Makefile index 9bb8252be2..494dce41c8 100644 --- a/src/gallium/targets/xorg-i965/Makefile +++ b/src/gallium/targets/xorg-i965/Makefile @@ -11,15 +11,16 @@ DRIVER_DEFINES = \ -DHAVE_CONFIG_H -DGALLIUM_SOFTPIPE \ -DGALLIUM_RBUG -DGALLIUM_TRACE -DRIVER_LINKS = \ +DRIVER_PIPES = \ $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ $(TOP)/src/gallium/winsys/i965/drm/libi965drm.a \ $(TOP)/src/gallium/drivers/i965/libi965.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/drivers/rbug/librbug.a \ $(TOP)/src/gallium/winsys/sw/wrapper/libwsw.a \ - $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ - $(GALLIUM_AUXILIARIES) \ + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a + +DRIVER_LINKS = \ $(shell pkg-config --libs libdrm libdrm_intel) include ../Makefile.xorg diff --git a/src/gallium/targets/xorg-nouveau/Makefile b/src/gallium/targets/xorg-nouveau/Makefile index 93f53e63bf..2fcd9ffb7d 100644 --- a/src/gallium/targets/xorg-nouveau/Makefile +++ b/src/gallium/targets/xorg-nouveau/Makefile @@ -10,15 +10,16 @@ C_SOURCES = \ DRIVER_DEFINES = \ -DHAVE_CONFIG_H -DGALLIUM_RBUG -DGALLIUM_TRACE -DRIVER_LINKS = \ +DRIVER_PIPES = \ $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ $(TOP)/src/gallium/winsys/nouveau/drm/libnouveaudrm.a \ - $(TOP)/src/gallium/drivers/trace/libtrace.a \ - $(TOP)/src/gallium/drivers/rbug/librbug.a \ $(TOP)/src/gallium/drivers/nvfx/libnvfx.a \ $(TOP)/src/gallium/drivers/nv50/libnv50.a \ $(TOP)/src/gallium/drivers/nouveau/libnouveau.a \ - $(GALLIUM_AUXILIARIES) \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/rbug/librbug.a + +DRIVER_LINKS = \ $(shell pkg-config --libs libdrm libdrm_nouveau) include ../Makefile.xorg diff --git a/src/gallium/targets/xorg-radeon/Makefile b/src/gallium/targets/xorg-radeon/Makefile index 7def3a2261..d3bc356992 100644 --- a/src/gallium/targets/xorg-radeon/Makefile +++ b/src/gallium/targets/xorg-radeon/Makefile @@ -10,15 +10,15 @@ C_SOURCES = \ DRIVER_DEFINES = \ -DHAVE_CONFIG_H -DGALLIUM_RBUG -DGALLIUM_TRACE -DGALLIUM_GALAHAD +DRIVER_PIPES = \ + $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ + $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ + $(TOP)/src/gallium/drivers/r300/libr300.a \ + $(TOP)/src/gallium/drivers/galahad/libgalahad.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/rbug/librbug.a + DRIVER_LINKS = \ - $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ - $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ - $(TOP)/src/gallium/drivers/r300/libr300.a \ - $(TOP)/src/gallium/drivers/galahad/libgalahad.a \ - $(TOP)/src/gallium/drivers/trace/libtrace.a \ - $(TOP)/src/gallium/drivers/rbug/librbug.a \ - $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ - $(GALLIUM_AUXILIARIES) \ $(shell pkg-config --libs libdrm libdrm_radeon) include ../Makefile.xorg diff --git a/src/gallium/targets/xorg-vmwgfx/Makefile b/src/gallium/targets/xorg-vmwgfx/Makefile index 73a2cea232..04a444f5e9 100644 --- a/src/gallium/targets/xorg-vmwgfx/Makefile +++ b/src/gallium/targets/xorg-vmwgfx/Makefile @@ -20,15 +20,14 @@ DRIVER_DEFINES = \ -DGALLIUM_TRACE \ -DHAVE_CONFIG_H - -DRIVER_LINKS = \ +DRIVER_PIPES = \ $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ $(TOP)/src/gallium/winsys/svga/drm/libsvgadrm.a \ - $(TOP)/src/gallium/drivers/trace/libtrace.a \ - $(TOP)/src/gallium/drivers/rbug/librbug.a \ $(TOP)/src/gallium/drivers/svga/libsvga.a \ - $(GALLIUM_AUXILIARIES) \ - $(shell pkg-config --libs --silence-errors libkms) \ - $(shell pkg-config --libs libdrm) + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/rbug/librbug.a + +DRIVER_LINKS = \ + $(shell pkg-config --libs libdrm libkms) include ../Makefile.xorg diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c index cb4ec32fea..c5f133e7b2 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c @@ -361,8 +361,8 @@ void radeon_drm_bufmgr_write_reloc(struct pb_buffer *_buf, retval = radeon_cs_write_reloc(buf->mgr->rws->cs, buf->bo, gem_rd, gem_wd, flags); if (retval) { - debug_printf("radeon: Relocation of %p (%d, %d, %d) failed!\n", - buf, gem_rd, gem_wd, flags); + fprintf(stderr, "radeon: Relocation of %p (%d, %d, %d) failed!\n", + buf, gem_rd, gem_wd, flags); } } diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c index 51cfc0fd40..af35497fd7 100644 --- a/src/gallium/winsys/radeon/drm/radeon_r300.c +++ b/src/gallium/winsys/radeon/drm/radeon_r300.c @@ -252,8 +252,13 @@ static void radeon_flush_cs(struct r300_winsys_screen *rws) /* Emit the CS. */ retval = radeon_cs_emit(ws->cs); if (retval) { - debug_printf("radeon: Bad CS, dumping...\n"); - radeon_cs_print(ws->cs, stderr); + if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) { + fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n"); + radeon_cs_print(ws->cs, stderr); + } else { + fprintf(stderr, "radeon: The kernel rejected CS, " + "see dmesg for more information.\n"); + } } /* Reset CS. diff --git a/src/mapi/glapi/gen/gl_enums.py b/src/mapi/glapi/gen/gl_enums.py index 644e085522..0caa01030f 100644 --- a/src/mapi/glapi/gen/gl_enums.py +++ b/src/mapi/glapi/gen/gl_enums.py @@ -105,7 +105,8 @@ const char *_mesa_lookup_enum_by_nr( int nr ) } else { /* this is not re-entrant safe, no big deal here */ - sprintf(token_tmp, "0x%x", nr); + _mesa_snprintf(token_tmp, sizeof(token_tmp) - 1, "0x%x", nr); + token_tmp[sizeof(token_tmp) - 1] = '\\0'; return token_tmp; } } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index bd8d63246a..d347b4df9c 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -146,7 +146,8 @@ static unsigned long t_src(struct r300_vertex_program_code *vp, t_swizzle(GET_SWZ(src->Swizzle, 2)), t_swizzle(GET_SWZ(src->Swizzle, 3)), t_src_class(src->File), - src->Negate) | (src->RelAddr << 4); + src->Negate) | + (src->RelAddr << 4) | (src->Abs << 3); } static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, @@ -162,7 +163,7 @@ static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, t_swizzle(GET_SWZ(src->Swizzle, 0)), t_src_class(src->File), src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) | - (src->RelAddr << 4); + (src->RelAddr << 4) | (src->Abs << 3); } static int valid_dst(struct r300_vertex_program_code *vp, @@ -487,6 +488,44 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c } } +/** + * R3xx-R4xx vertex engine does not support the Absolute source operand modifier + * and the Saturate opcode modifier. Only Absolute is currently transformed. + */ +static int transform_nonnative_modifiers( + struct radeon_compiler *c, + struct rc_instruction *inst, + void* unused) +{ + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + /* Transform ABS(a) to MAX(a, -a). */ + for (i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].Abs) { + struct rc_instruction *new_inst; + unsigned temp; + + inst->U.I.SrcReg[i].Abs = 0; + + temp = rc_find_free_temporary(c); + + new_inst = rc_insert_new_instruction(c, inst->Prev); + new_inst->U.I.Opcode = RC_OPCODE_MAX; + new_inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + new_inst->U.I.DstReg.Index = temp; + new_inst->U.I.SrcReg[0] = inst->U.I.SrcReg[i]; + new_inst->U.I.SrcReg[1] = inst->U.I.SrcReg[i]; + new_inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; + + memset(&inst->U.I.SrcReg[i], 0, sizeof(inst->U.I.SrcReg[i])); + inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[i].Index = temp; + inst->U.I.SrcReg[i].Swizzle = RC_SWIZZLE_XYZW; + } + } + return 1; +} /** * Vertex engine cannot read two inputs or two constants at the same time. @@ -619,15 +658,33 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) debug_program_log(compiler, "after emulate branches"); - { + if (compiler->Base.is_r500) { struct radeon_program_transformation transformations[] = { { &r300_transform_vertex_alu, 0 }, { &r300_transform_trig_scale_vertex, 0 } }; radeonLocalTransform(&compiler->Base, 2, transformations); - } - debug_program_log(compiler, "after native rewrite"); + debug_program_log(compiler, "after native rewrite"); + } else { + struct radeon_program_transformation transformations[] = { + { &r300_transform_vertex_alu, 0 }, + { &radeonTransformTrigSimple, 0 } + }; + radeonLocalTransform(&compiler->Base, 2, transformations); + + debug_program_log(compiler, "after native rewrite"); + + /* Note: This pass has to be done seperately from ALU rewrite, + * because it needs to check every instruction. + */ + struct radeon_program_transformation transformations2[] = { + { &transform_nonnative_modifiers, 0 }, + }; + radeonLocalTransform(&compiler->Base, 1, transformations2); + + debug_program_log(compiler, "after emulate modifiers"); + } { /* Note: This pass has to be done seperately from ALU rewrite, diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index 282c0e18bc..5ae9f49840 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -523,8 +523,7 @@ static void r300AllocDmaRegions(GLcontext *ctx, const struct gl_client_array *in r300ConvertAttrib(ctx, count, input[i], &vbuf->attribs[index]); } else { if (input[i]->BufferObj->Name) { - if (stride % 4 != 0) { - assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0); + if (stride % 4 != 0 || (intptr_t)input[i]->Ptr % 4 != 0) { r300AlignDataToDword(ctx, input[i], count, &vbuf->attribs[index]); vbuf->attribs[index].is_named_bo = GL_FALSE; } else { diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c index afe2d55dc7..8013553f67 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c @@ -46,7 +46,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r600_context.h" #include "radeon_reg.h" #include "r600_cmdbuf.h" -#include "r600_emit.h" #include "radeon_bocs_wrapper.h" #include "radeon_reg.h" diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.h b/src/mesa/drivers/dri/r600/r600_cmdbuf.h index dff0009699..78fccd0b60 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.h +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.h @@ -37,7 +37,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define __R600_CMDBUF_H__ #include "r600_context.h" -#include "r600_emit.h" #define RADEON_CP_PACKET3_NOP 0xC0001000 #define RADEON_CP_PACKET3_NEXT_CHAR 0xC0001900 diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index f4aed4e87f..84d9d42312 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -59,7 +59,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_buffer_objects.h" #include "radeon_span.h" #include "r600_cmdbuf.h" -#include "r600_emit.h" #include "radeon_bocs_wrapper.h" #include "radeon_queryobj.h" #include "r600_blit.h" diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index aab1a7947a..bf17a977ce 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -38,6 +38,7 @@ #include "r600_context.h" #include "r600_cmdbuf.h" +#include "r600_emit.h" #include "r700_fragprog.h" diff --git a/src/mesa/drivers/dri/r600/r700_oglprog.h b/src/mesa/drivers/dri/r600/r700_oglprog.h index fe2e9d1974..4d42133867 100644 --- a/src/mesa/drivers/dri/r600/r700_oglprog.h +++ b/src/mesa/drivers/dri/r600/r700_oglprog.h @@ -27,7 +27,7 @@ #ifndef _R700_OGLPROG_H_ #define _R700_OGLPROG_H_ -#include "r600_context.h" +#include "main/dd.h" extern void r700InitShaderFuncs(struct dd_function_table *functions); diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 32f538f1c3..137f3007ce 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -42,6 +42,7 @@ #include "radeon_debug.h" #include "r600_context.h" #include "r600_cmdbuf.h" +#include "r600_emit.h" #include "program/programopt.h" #include "r700_debug.h" diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c index 6cd1d87de2..c877e6c176 100644 --- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c +++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c @@ -602,17 +602,17 @@ int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *t __FUNCTION__, texObj ,t->minLod, t->maxLod); radeon_mipmap_tree *dst_miptree; - dst_miptree = get_biggest_matching_miptree(t, t->minLod, t->maxLod); + dst_miptree = get_biggest_matching_miptree(t, t->base.BaseLevel, t->base.MaxLevel); + radeon_miptree_unreference(&t->mt); if (!dst_miptree) { - radeon_miptree_unreference(&t->mt); radeon_try_alloc_miptree(rmesa, t); - dst_miptree = t->mt; radeon_print(RADEON_TEXTURE, RADEON_NORMAL, "%s: No matching miptree found, allocated new one %p\n", __FUNCTION__, t->mt); } else { + radeon_miptree_reference(dst_miptree, &t->mt); radeon_print(RADEON_TEXTURE, RADEON_NORMAL, "%s: Using miptree %p\n", __FUNCTION__, t->mt); } @@ -629,7 +629,7 @@ int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *t "Checking image level %d, face %d, mt %p ... ", level, face, img->mt); - if (img->mt != dst_miptree) { + if (img->mt != t->mt) { radeon_print(RADEON_TEXTURE, RADEON_TRACE, "MIGRATING\n"); @@ -637,7 +637,7 @@ int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *t if (src_bo && radeon_bo_is_referenced_by_cs(src_bo, rmesa->cmdbuf.cs)) { radeon_firevertices(rmesa); } - migrate_image_to_miptree(dst_miptree, img, face, level); + migrate_image_to_miptree(t->mt, img, face, level); } else radeon_print(RADEON_TEXTURE, RADEON_TRACE, "OK\n"); } diff --git a/src/mesa/drivers/dri/radeon/radeon_tex_getimage.c b/src/mesa/drivers/dri/radeon/radeon_tex_getimage.c index 3ababb1ef5..f878b48e5f 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex_getimage.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex_getimage.c @@ -31,6 +31,7 @@ #include "radeon_common_context.h" #include "radeon_texture.h" +#include "radeon_mipmap_tree.h" #include "main/texgetimage.h" @@ -51,7 +52,15 @@ radeon_get_tex_image(GLcontext * ctx, GLenum target, GLint level, __func__, ctx, texObj, image, compressed); if (image->mt) { + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); /* Map the texture image read-only */ + if (radeon_bo_is_referenced_by_cs(image->mt->bo, rmesa->cmdbuf.cs)) { + radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, + "%s: called for texture that is queued for GPU processing\n", + __func__); + radeon_firevertices(rmesa); + } + radeon_teximage_map(image, GL_FALSE); } else { /* Image hasn't been uploaded to a miptree yet */ diff --git a/src/mesa/main/enums.c b/src/mesa/main/enums.c index 456d20603d..bc18e1b113 100644 --- a/src/mesa/main/enums.c +++ b/src/mesa/main/enums.c @@ -5648,7 +5648,8 @@ const char *_mesa_lookup_enum_by_nr( int nr ) } else { /* this is not re-entrant safe, no big deal here */ - sprintf(token_tmp, "0x%x", nr); + _mesa_snprintf(token_tmp, sizeof(token_tmp) - 1, "0x%x", nr); + token_tmp[sizeof(token_tmp) - 1] = '\0'; return token_tmp; } } |