diff options
Diffstat (limited to 'src/gallium/drivers/llvmpipe')
35 files changed, 2274 insertions, 604 deletions
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 2892b62920..dec874623e 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -27,6 +27,8 @@ C_SOURCES = \ lp_scene_queue.c \ lp_screen.c \ lp_setup.c \ + lp_setup_coef.c \ + lp_setup_coef_intrin.c \ lp_setup_line.c \ lp_setup_point.c \ lp_setup_tri.c \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 5583fca38e..8d57db72cf 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -63,6 +63,8 @@ llvmpipe = env.ConvenienceLibrary( 'lp_setup_line.c', 'lp_setup_point.c', 'lp_setup_tri.c', + 'lp_setup_coef.c', + 'lp_setup_coef_intrin.c', 'lp_setup_vbuf.c', 'lp_state_blend.c', 'lp_state_clip.c', diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 7543bd7b2b..39f2c6085e 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -85,6 +85,14 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) align_free( llvmpipe ); } +static void +do_flush( struct pipe_context *pipe, + unsigned flags, + struct pipe_fence_handle **fence) +{ + llvmpipe_flush(pipe, flags, fence, __FUNCTION__); +} + struct pipe_context * llvmpipe_create_context( struct pipe_screen *screen, void *priv ) @@ -109,7 +117,7 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) llvmpipe->pipe.destroy = llvmpipe_destroy; llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state; llvmpipe->pipe.clear = llvmpipe_clear; - llvmpipe->pipe.flush = llvmpipe_flush; + llvmpipe->pipe.flush = do_flush; llvmpipe_init_blend_funcs(llvmpipe); llvmpipe_init_clip_funcs(llvmpipe); @@ -147,9 +155,13 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) draw_install_aapoint_stage(llvmpipe->draw, &llvmpipe->pipe); draw_install_pstipple_stage(llvmpipe->draw, &llvmpipe->pipe); - /* convert points and lines into triangles: */ - draw_wide_point_threshold(llvmpipe->draw, 0.0); - draw_wide_line_threshold(llvmpipe->draw, 0.0); + /* convert points and lines into triangles: + * (otherwise, draw points and lines natively) + */ + draw_wide_point_sprites(llvmpipe->draw, FALSE); + draw_enable_point_sprites(llvmpipe->draw, FALSE); + draw_wide_point_threshold(llvmpipe->draw, 10000.0); + draw_wide_line_threshold(llvmpipe->draw, 10000.0); #if USE_DRAW_STAGE_PSTIPPLE /* Do polygon stipple w/ texture map + frag prog? */ diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 50f9091c3c..34fa20e204 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -101,6 +101,9 @@ struct llvmpipe_context { /** Vertex format */ struct vertex_info vertex_info; + + /** Which vertex shader output slot contains point size */ + int psize_slot; /** Fragment shader input interpolation info */ unsigned num_inputs; diff --git a/src/gallium/drivers/llvmpipe/lp_debug.h b/src/gallium/drivers/llvmpipe/lp_debug.h index 92fb2b3ee5..a928ee38be 100644 --- a/src/gallium/drivers/llvmpipe/lp_debug.h +++ b/src/gallium/drivers/llvmpipe/lp_debug.h @@ -46,6 +46,8 @@ st_print_current(void); #define DEBUG_SHOW_TILES 0x200 #define DEBUG_SHOW_SUBTILES 0x400 #define DEBUG_COUNTERS 0x800 +#define DEBUG_SCENE 0x1000 +#define DEBUG_FENCE 0x2000 #ifdef DEBUG diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index e73b431cb4..3af5c8d5c5 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -68,25 +68,17 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) } /* Map index buffer, if present */ - if (info->indexed && lp->index_buffer.buffer) { - char *indices = (char *) llvmpipe_resource_data(lp->index_buffer.buffer); - mapped_indices = (void *) (indices + lp->index_buffer.offset); - } + if (info->indexed && lp->index_buffer.buffer) + mapped_indices = llvmpipe_resource_data(lp->index_buffer.buffer); - draw_set_mapped_element_buffer_range(draw, (mapped_indices) ? - lp->index_buffer.index_size : 0, - info->index_bias, - info->min_index, - info->max_index, - mapped_indices); + draw_set_mapped_index_buffer(draw, mapped_indices); llvmpipe_prepare_vertex_sampling(lp, lp->num_vertex_sampler_views, lp->vertex_sampler_views); /* draw! */ - draw_arrays_instanced(draw, info->mode, info->start, info->count, - info->start_instance, info->instance_count); + draw_vbo(draw, info); /* * unmap vertex/index buffers @@ -95,7 +87,7 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) draw_set_mapped_vertex_buffer(draw, i, NULL); } if (mapped_indices) { - draw_set_mapped_element_buffer(draw, 0, 0, NULL); + draw_set_mapped_index_buffer(draw, NULL); } llvmpipe_cleanup_vertex_sampling(lp); diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c b/src/gallium/drivers/llvmpipe/lp_fence.c index f9805e5d68..3a55e76bc3 100644 --- a/src/gallium/drivers/llvmpipe/lp_fence.c +++ b/src/gallium/drivers/llvmpipe/lp_fence.c @@ -44,6 +44,7 @@ struct lp_fence * lp_fence_create(unsigned rank) { + static int fence_id; struct lp_fence *fence = CALLOC_STRUCT(lp_fence); pipe_reference_init(&fence->reference, 1); @@ -51,8 +52,12 @@ lp_fence_create(unsigned rank) pipe_mutex_init(fence->mutex); pipe_condvar_init(fence->signalled); + fence->id = fence_id++; fence->rank = rank; + if (LP_DEBUG & DEBUG_FENCE) + debug_printf("%s %d\n", __FUNCTION__, fence->id); + return fence; } @@ -61,6 +66,9 @@ lp_fence_create(unsigned rank) void lp_fence_destroy(struct lp_fence *fence) { + if (LP_DEBUG & DEBUG_FENCE) + debug_printf("%s %d\n", __FUNCTION__, fence->id); + pipe_mutex_destroy(fence->mutex); pipe_condvar_destroy(fence->signalled); FREE(fence); @@ -68,82 +76,49 @@ lp_fence_destroy(struct lp_fence *fence) /** - * For reference counting. - * This is a Gallium API function. - */ -static void -llvmpipe_fence_reference(struct pipe_screen *screen, - struct pipe_fence_handle **ptr, - struct pipe_fence_handle *fence) -{ - struct lp_fence **old = (struct lp_fence **) ptr; - struct lp_fence *f = (struct lp_fence *) fence; - - lp_fence_reference(old, f); -} - - -/** - * Has the fence been executed/finished? - * This is a Gallium API function. - */ -static int -llvmpipe_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flag) -{ - struct lp_fence *f = (struct lp_fence *) fence; - - return f->count == f->rank; -} - - -/** - * Wait for the fence to finish. - * This is a Gallium API function. - */ -static int -llvmpipe_fence_finish(struct pipe_screen *screen, - struct pipe_fence_handle *fence_handle, - unsigned flag) -{ - struct lp_fence *fence = (struct lp_fence *) fence_handle; - - pipe_mutex_lock(fence->mutex); - while (fence->count < fence->rank) { - pipe_condvar_wait(fence->signalled, fence->mutex); - } - pipe_mutex_unlock(fence->mutex); - - return 0; -} - - -/** * Called by the rendering threads to increment the fence counter. * When the counter == the rank, the fence is finished. */ void lp_fence_signal(struct lp_fence *fence) { + if (LP_DEBUG & DEBUG_FENCE) + debug_printf("%s %d\n", __FUNCTION__, fence->id); + pipe_mutex_lock(fence->mutex); fence->count++; assert(fence->count <= fence->rank); - LP_DBG(DEBUG_RAST, "%s count=%u rank=%u\n", __FUNCTION__, - fence->count, fence->rank); + if (LP_DEBUG & DEBUG_FENCE) + debug_printf("%s count=%u rank=%u\n", __FUNCTION__, + fence->count, fence->rank); - pipe_condvar_signal(fence->signalled); + /* Wakeup all threads waiting on the mutex: + */ + pipe_condvar_broadcast(fence->signalled); pipe_mutex_unlock(fence->mutex); } +boolean +lp_fence_signalled(struct lp_fence *f) +{ + return f->count == f->rank; +} void -llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen) +lp_fence_wait(struct lp_fence *f) { - screen->fence_reference = llvmpipe_fence_reference; - screen->fence_signalled = llvmpipe_fence_signalled; - screen->fence_finish = llvmpipe_fence_finish; + if (LP_DEBUG & DEBUG_FENCE) + debug_printf("%s %d\n", __FUNCTION__, f->id); + + pipe_mutex_lock(f->mutex); + assert(f->issued); + while (f->count < f->rank) { + pipe_condvar_wait(f->signalled, f->mutex); + } + pipe_mutex_unlock(f->mutex); } + + diff --git a/src/gallium/drivers/llvmpipe/lp_fence.h b/src/gallium/drivers/llvmpipe/lp_fence.h index 13358fb99f..3c59118780 100644 --- a/src/gallium/drivers/llvmpipe/lp_fence.h +++ b/src/gallium/drivers/llvmpipe/lp_fence.h @@ -41,10 +41,12 @@ struct pipe_screen; struct lp_fence { struct pipe_reference reference; + unsigned id; pipe_mutex mutex; pipe_condvar signalled; + boolean issued; unsigned rank; unsigned count; }; @@ -57,6 +59,11 @@ lp_fence_create(unsigned rank); void lp_fence_signal(struct lp_fence *fence); +boolean +lp_fence_signalled(struct lp_fence *fence); + +void +lp_fence_wait(struct lp_fence *fence); void llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen); @@ -78,5 +85,11 @@ lp_fence_reference(struct lp_fence **ptr, *ptr = f; } +static INLINE boolean +lp_fence_issued(const struct lp_fence *fence) +{ + return fence->issued; +} + #endif /* LP_FENCE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index 845292f4ab..e2c723b7a8 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -31,6 +31,7 @@ #include "pipe/p_defines.h" +#include "pipe/p_screen.h" #include "util/u_string.h" #include "draw/draw_context.h" #include "lp_flush.h" @@ -45,14 +46,15 @@ void llvmpipe_flush( struct pipe_context *pipe, unsigned flags, - struct pipe_fence_handle **fence ) + struct pipe_fence_handle **fence, + const char *reason) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); draw_flush(llvmpipe->draw); /* ask the setup module to flush */ - lp_setup_flush(llvmpipe->setup, flags, fence); + lp_setup_flush(llvmpipe->setup, flags, fence, reason); /* Enable to dump BMPs of the color/depth buffers each frame */ if (0) { @@ -76,6 +78,17 @@ llvmpipe_flush( struct pipe_context *pipe, } } +void +llvmpipe_finish( struct pipe_context *pipe, + const char *reason ) +{ + struct pipe_fence_handle *fence = NULL; + llvmpipe_flush(pipe, 0, &fence, reason); + if (fence) { + pipe->screen->fence_finish(pipe->screen, fence, 0); + pipe->screen->fence_reference(pipe->screen, &fence, NULL); + } +} /** * Flush context if necessary. @@ -93,7 +106,8 @@ llvmpipe_flush_resource(struct pipe_context *pipe, unsigned flush_flags, boolean read_only, boolean cpu_access, - boolean do_not_block) + boolean do_not_block, + const char *reason) { unsigned referenced; @@ -106,31 +120,16 @@ llvmpipe_flush_resource(struct pipe_context *pipe, /* * Flush and wait. */ - - struct pipe_fence_handle *fence = NULL; - if (do_not_block) return FALSE; - /* - * Do the unswizzling in parallel. - * - * XXX: Don't abuse the PIPE_FLUSH_FRAME flag for this. - */ - flush_flags |= PIPE_FLUSH_FRAME; - - llvmpipe_flush(pipe, flush_flags, &fence); - - if (fence) { - pipe->screen->fence_finish(pipe->screen, fence, 0); - pipe->screen->fence_reference(pipe->screen, &fence, NULL); - } + llvmpipe_finish(pipe, reason); } else { /* * Just flush. */ - llvmpipe_flush(pipe, flush_flags, NULL); + llvmpipe_flush(pipe, flush_flags, NULL, reason); } } diff --git a/src/gallium/drivers/llvmpipe/lp_flush.h b/src/gallium/drivers/llvmpipe/lp_flush.h index 7b605681a9..bb538b2bd8 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.h +++ b/src/gallium/drivers/llvmpipe/lp_flush.h @@ -34,8 +34,14 @@ struct pipe_context; struct pipe_fence_handle; void -llvmpipe_flush(struct pipe_context *pipe, unsigned flags, - struct pipe_fence_handle **fence); +llvmpipe_flush(struct pipe_context *pipe, + unsigned flags, + struct pipe_fence_handle **fence, + const char *reason); + +void +llvmpipe_finish( struct pipe_context *pipe, + const char *reason ); boolean llvmpipe_flush_resource(struct pipe_context *pipe, @@ -45,6 +51,7 @@ llvmpipe_flush_resource(struct pipe_context *pipe, unsigned flush_flags, boolean read_only, boolean cpu_access, - boolean do_not_block); + boolean do_not_block, + const char *reason); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_perf.c b/src/gallium/drivers/llvmpipe/lp_perf.c index 083e7e30a5..e22532f25c 100644 --- a/src/gallium/drivers/llvmpipe/lp_perf.c +++ b/src/gallium/drivers/llvmpipe/lp_perf.c @@ -46,7 +46,7 @@ lp_print_counters(void) { if (LP_DEBUG & DEBUG_COUNTERS) { unsigned total_64, total_16, total_4; - float p1, p2, p3, p4; + float p1, p2, p3, p5, p6; debug_printf("llvmpipe: nr_triangles: %9u\n", lp_count.nr_tris); debug_printf("llvmpipe: nr_culled_triangles: %9u\n", lp_count.nr_culled_tris); @@ -58,11 +58,15 @@ lp_print_counters(void) p1 = 100.0 * (float) lp_count.nr_empty_64 / (float) total_64; p2 = 100.0 * (float) lp_count.nr_fully_covered_64 / (float) total_64; p3 = 100.0 * (float) lp_count.nr_partially_covered_64 / (float) total_64; - p4 = 100.0 * (float) lp_count.nr_shade_opaque_64 / (float) total_64; + p5 = 100.0 * (float) lp_count.nr_shade_opaque_64 / (float) total_64; + p6 = 100.0 * (float) lp_count.nr_shade_64 / (float) total_64; debug_printf("llvmpipe: nr_64x64: %9u\n", total_64); debug_printf("llvmpipe: nr_fully_covered_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_64, p2, total_64); - debug_printf("llvmpipe: nr_shade_opaque_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_shade_opaque_64, p4, total_64); + debug_printf("llvmpipe: nr_shade_opaque_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_shade_opaque_64, p5, total_64); + debug_printf("llvmpipe: nr_pure_shade_opaque: %9u (%3.0f%% of %u)\n", lp_count.nr_pure_shade_opaque_64, 0.0, lp_count.nr_shade_opaque_64); + debug_printf("llvmpipe: nr_shade_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_shade_64, p6, total_64); + debug_printf("llvmpipe: nr_pure_shade: %9u (%3.0f%% of %u)\n", lp_count.nr_pure_shade_64, 0.0, lp_count.nr_shade_64); debug_printf("llvmpipe: nr_partially_covered_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_64, p3, total_64); debug_printf("llvmpipe: nr_empty_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_64, p1, total_64); @@ -79,12 +83,17 @@ lp_print_counters(void) debug_printf("llvmpipe: nr_partially_covered_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_16, p3, total_16); debug_printf("llvmpipe: nr_empty_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_16, p1, total_16); - total_4 = (lp_count.nr_empty_4 + lp_count.nr_non_empty_4); + total_4 = (lp_count.nr_empty_4 + + lp_count.nr_fully_covered_4 + + lp_count.nr_partially_covered_4); p1 = 100.0 * (float) lp_count.nr_empty_4 / (float) total_4; - p2 = 100.0 * (float) lp_count.nr_non_empty_4 / (float) total_4; + p2 = 100.0 * (float) lp_count.nr_fully_covered_4 / (float) total_4; + p3 = 100.0 * (float) lp_count.nr_partially_covered_4 / (float) total_4; - debug_printf("llvmpipe: nr_4x4: %9u\n", total_4); + debug_printf("llvmpipe: nr_tri_4x4: %9u\n", total_4); + debug_printf("llvmpipe: nr_fully_covered_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_4, p2, total_4); + debug_printf("llvmpipe: nr_partially_covered_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_4, p3, total_4); debug_printf("llvmpipe: nr_empty_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_4, p1, total_4); debug_printf("llvmpipe: nr_non_empty_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_non_empty_4, p2, total_4); diff --git a/src/gallium/drivers/llvmpipe/lp_perf.h b/src/gallium/drivers/llvmpipe/lp_perf.h index 4774f64550..c28652fc30 100644 --- a/src/gallium/drivers/llvmpipe/lp_perf.h +++ b/src/gallium/drivers/llvmpipe/lp_perf.h @@ -44,11 +44,16 @@ struct lp_counters unsigned nr_empty_64; unsigned nr_fully_covered_64; unsigned nr_partially_covered_64; + unsigned nr_pure_shade_opaque_64; + unsigned nr_pure_shade_64; + unsigned nr_shade_64; unsigned nr_shade_opaque_64; unsigned nr_empty_16; unsigned nr_fully_covered_16; unsigned nr_partially_covered_16; unsigned nr_empty_4; + unsigned nr_fully_covered_4; + unsigned nr_partially_covered_4; unsigned nr_non_empty_4; unsigned nr_llvm_compiles; int64_t llvm_compile_time; /**< total, in microseconds */ @@ -66,9 +71,11 @@ extern struct lp_counters lp_count; #ifdef DEBUG #define LP_COUNT(counter) lp_count.counter++ #define LP_COUNT_ADD(counter, incr) lp_count.counter += (incr) +#define LP_COUNT_GET(counter) (lp_count.counter) #else #define LP_COUNT(counter) #define LP_COUNT_ADD(counter, incr) (void) incr +#define LP_COUNT_GET(counter) 0 #endif diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c index 02eeaf6487..67fd797af2 100644 --- a/src/gallium/drivers/llvmpipe/lp_query.c +++ b/src/gallium/drivers/llvmpipe/lp_query.c @@ -35,9 +35,8 @@ #include "util/u_memory.h" #include "lp_context.h" #include "lp_flush.h" +#include "lp_fence.h" #include "lp_query.h" -#include "lp_rast.h" -#include "lp_rast_priv.h" #include "lp_state.h" @@ -69,12 +68,7 @@ llvmpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q) struct llvmpipe_query *pq = llvmpipe_query(q); /* query might still be in process if we never waited for the result */ if (!pq->done) { - struct pipe_fence_handle *fence = NULL; - llvmpipe_flush(pipe, 0, &fence); - if (fence) { - pipe->screen->fence_finish(pipe->screen, fence, 0); - pipe->screen->fence_reference(pipe->screen, &fence, NULL); - } + llvmpipe_finish(pipe, __FUNCTION__); } pipe_mutex_destroy(pq->mutex); @@ -93,16 +87,11 @@ llvmpipe_get_query_result(struct pipe_context *pipe, if (!pq->done) { if (wait) { - struct pipe_fence_handle *fence = NULL; - llvmpipe_flush(pipe, 0, &fence); - if (fence) { - pipe->screen->fence_finish(pipe->screen, fence, 0); - pipe->screen->fence_reference(pipe->screen, &fence, NULL); - } + llvmpipe_finish(pipe, __FUNCTION__); } /* this is a bit inconsequent but should be ok */ else { - llvmpipe_flush(pipe, 0, NULL); + llvmpipe_flush(pipe, 0, NULL, __FUNCTION__); } } @@ -125,12 +114,7 @@ llvmpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q) * frame of rendering. */ if (pq->binned) { - struct pipe_fence_handle *fence; - llvmpipe_flush(pipe, 0, &fence); - if (fence) { - pipe->screen->fence_finish(pipe->screen, fence, 0); - pipe->screen->fence_reference(pipe->screen, &fence, NULL); - } + llvmpipe_finish(pipe, __FUNCTION__); } lp_setup_begin_query(llvmpipe->setup, pq); diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 3215d0f652..b1c306bbe9 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -316,43 +316,6 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, } -/** - * Load tile color from the framebuffer surface. - * This is a bin command called during bin processing. - */ -#if 0 -void -lp_rast_load_color(struct lp_rasterizer_task *task, - const union lp_rast_cmd_arg arg) -{ - struct lp_rasterizer *rast = task->rast; - unsigned buf; - enum lp_texture_usage usage; - - LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y); - - if (scene->has_color_clear) - usage = LP_TEX_USAGE_WRITE_ALL; - else - usage = LP_TEX_USAGE_READ_WRITE; - - /* Get pointers to color tile(s). - * This will convert linear data to tiled if needed. - */ - for (buf = 0; buf < rast->state.nr_cbufs; buf++) { - struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf]; - struct llvmpipe_texture *lpt; - assert(cbuf); - lpt = llvmpipe_texture(cbuf->texture); - task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt, - cbuf->face + cbuf->zslice, - cbuf->level, - usage, - task->x, task->y); - assert(task->color_tiles[buf]); - } -} -#endif /** diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 44319a0ad6..b4564ef33b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -120,7 +120,7 @@ struct lp_rast_triangle { float v[3][2]; #endif - struct lp_rast_plane plane[7]; /* NOTE: may allocate fewer planes */ + struct lp_rast_plane plane[8]; /* NOTE: may allocate fewer planes */ }; @@ -236,6 +236,8 @@ void lp_rast_triangle_6( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); void lp_rast_triangle_7( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); +void lp_rast_triangle_8( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); void lp_rast_shade_tile( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); @@ -256,5 +258,9 @@ void lp_rast_begin_query(struct lp_rasterizer_task *, void lp_rast_end_query(struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); +void +lp_rast_triangle_3_16(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg); + #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 980c18c024..dbaa8e023a 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -67,7 +67,7 @@ block_full_16(struct lp_rasterizer_task *task, block_full_4(task, tri, x + ix, y + iy); } - +#if !defined(PIPE_ARCH_SSE) static INLINE unsigned build_mask(int c, int dcdx, int dcdy) { @@ -98,6 +98,7 @@ build_mask(int c, int dcdx, int dcdy) return mask; } + static INLINE unsigned build_mask_linear(int c, int dcdx, int dcdy) { @@ -129,6 +130,137 @@ build_mask_linear(int c, int dcdx, int dcdy) } +static INLINE void +build_masks(int c, + int cdiff, + int dcdx, + int dcdy, + unsigned *outmask, + unsigned *partmask) +{ + *outmask |= build_mask_linear(c, dcdx, dcdy); + *partmask |= build_mask_linear(c + cdiff, dcdx, dcdy); +} + +#else +#include <emmintrin.h> +#include "util/u_sse.h" + + +static INLINE void +build_masks(int c, + int cdiff, + int dcdx, + int dcdy, + unsigned *outmask, + unsigned *partmask) +{ + __m128i cstep0 = _mm_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3); + __m128i xdcdy = _mm_set1_epi32(dcdy); + + /* Get values across the quad + */ + __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy); + __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy); + __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy); + + { + __m128i cstep01, cstep23, result; + + cstep01 = _mm_packs_epi32(cstep0, cstep1); + cstep23 = _mm_packs_epi32(cstep2, cstep3); + result = _mm_packs_epi16(cstep01, cstep23); + + *outmask |= _mm_movemask_epi8(result); + } + + + { + __m128i cio4 = _mm_set1_epi32(cdiff); + __m128i cstep01, cstep23, result; + + cstep0 = _mm_add_epi32(cstep0, cio4); + cstep1 = _mm_add_epi32(cstep1, cio4); + cstep2 = _mm_add_epi32(cstep2, cio4); + cstep3 = _mm_add_epi32(cstep3, cio4); + + cstep01 = _mm_packs_epi32(cstep0, cstep1); + cstep23 = _mm_packs_epi32(cstep2, cstep3); + result = _mm_packs_epi16(cstep01, cstep23); + + *partmask |= _mm_movemask_epi8(result); + } +} + + +static INLINE unsigned +build_mask_linear(int c, int dcdx, int dcdy) +{ + __m128i cstep0 = _mm_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3); + __m128i xdcdy = _mm_set1_epi32(dcdy); + + /* Get values across the quad + */ + __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy); + __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy); + __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy); + + /* pack pairs of results into epi16 + */ + __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1); + __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3); + + /* pack into epi8, preserving sign bits + */ + __m128i result = _mm_packs_epi16(cstep01, cstep23); + + /* extract sign bits to create mask + */ + return _mm_movemask_epi8(result); +} + +static INLINE unsigned +build_mask(int c, int dcdx, int dcdy) +{ + __m128i step = _mm_setr_epi32(0, dcdx, dcdy, dcdx + dcdy); + __m128i c0 = _mm_set1_epi32(c); + + /* Get values across the quad + */ + __m128i cstep0 = _mm_add_epi32(c0, step); + + /* Scale up step for moving between quads. + */ + __m128i step4 = _mm_add_epi32(step, step); + + /* Get values for the remaining quads: + */ + __m128i cstep1 = _mm_add_epi32(cstep0, + _mm_shuffle_epi32(step4, _MM_SHUFFLE(1,1,1,1))); + __m128i cstep2 = _mm_add_epi32(cstep0, + _mm_shuffle_epi32(step4, _MM_SHUFFLE(2,2,2,2))); + __m128i cstep3 = _mm_add_epi32(cstep2, + _mm_shuffle_epi32(step4, _MM_SHUFFLE(1,1,1,1))); + + /* pack pairs of results into epi16 + */ + __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1); + __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3); + + /* pack into epi8, preserving sign bits + */ + __m128i result = _mm_packs_epi16(cstep01, cstep23); + + /* extract sign bits to create mask + */ + return _mm_movemask_epi8(result); +} + +#endif + + + + #define TAG(x) x##_1 #define NR_PLANES 1 #include "lp_rast_tri_tmp.h" @@ -157,3 +289,92 @@ build_mask_linear(int c, int dcdx, int dcdy) #define NR_PLANES 7 #include "lp_rast_tri_tmp.h" +#define TAG(x) x##_8 +#define NR_PLANES 8 +#include "lp_rast_tri_tmp.h" + + +/* Special case for 3 plane triangle which is contained entirely + * within a 16x16 block. + */ +void +lp_rast_triangle_3_16(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + const struct lp_rast_triangle *tri = arg.triangle.tri; + const struct lp_rast_plane *plane = tri->plane; + unsigned mask = arg.triangle.plane_mask; + const int x = task->x + (mask & 0xf) * 16; + const int y = task->y + (mask >> 4) * 16; + unsigned outmask, inmask, partmask, partial_mask; + unsigned j; + int c[3]; + + outmask = 0; /* outside one or more trivial reject planes */ + partmask = 0; /* outside one or more trivial accept planes */ + + for (j = 0; j < 3; j++) { + c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; + + { + const int dcdx = -plane[j].dcdx * 4; + const int dcdy = plane[j].dcdy * 4; + const int cox = plane[j].eo * 4; + const int cio = plane[j].ei * 4 - 1; + + build_masks(c[j] + cox, + cio - cox, + dcdx, dcdy, + &outmask, /* sign bits from c[i][0..15] + cox */ + &partmask); /* sign bits from c[i][0..15] + cio */ + } + } + + if (outmask == 0xffff) + return; + + /* Mask of sub-blocks which are inside all trivial accept planes: + */ + inmask = ~partmask & 0xffff; + + /* Mask of sub-blocks which are inside all trivial reject planes, + * but outside at least one trivial accept plane: + */ + partial_mask = partmask & ~outmask; + + assert((partial_mask & inmask) == 0); + + /* Iterate over partials: + */ + while (partial_mask) { + int i = ffs(partial_mask) - 1; + int ix = (i & 3) * 4; + int iy = (i >> 2) * 4; + int px = x + ix; + int py = y + iy; + int cx[3]; + + partial_mask &= ~(1 << i); + + for (j = 0; j < 3; j++) + cx[j] = (c[j] + - plane[j].dcdx * ix + + plane[j].dcdy * iy); + + do_block_4_3(task, tri, plane, px, py, cx); + } + + /* Iterate over fulls: + */ + while (inmask) { + int i = ffs(inmask) - 1; + int ix = (i & 3) * 4; + int iy = (i >> 2) * 4; + int px = x + ix; + int py = y + iy; + + inmask &= ~(1 << i); + + block_full_4(task, tri, px, py); + } +} diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index 43f72d8ca8..99a0bae45d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -32,7 +32,7 @@ /** - * Prototype for a 7 plane rasterizer function. Will codegenerate + * Prototype for a 8 plane rasterizer function. Will codegenerate * several of these. * * XXX: Varients for more/fewer planes. @@ -81,11 +81,14 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, for (j = 0; j < NR_PLANES; j++) { const int dcdx = -plane[j].dcdx * 4; const int dcdy = plane[j].dcdy * 4; - const int cox = c[j] + plane[j].eo * 4; - const int cio = c[j] + plane[j].ei * 4 - 1; - - outmask |= build_mask_linear(cox, dcdx, dcdy); - partmask |= build_mask_linear(cio, dcdx, dcdy); + const int cox = plane[j].eo * 4; + const int cio = plane[j].ei * 4 - 1; + + build_masks(c[j] + cox, + cio - cox, + dcdx, dcdy, + &outmask, /* sign bits from c[i][0..15] + cox */ + &partmask); /* sign bits from c[i][0..15] + cio */ } if (outmask == 0xffff) @@ -102,6 +105,8 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, assert((partial_mask & inmask) == 0); + LP_COUNT_ADD(nr_empty_4, util_bitcount(0xffff & ~(partial_mask | inmask))); + /* Iterate over partials: */ while (partial_mask) { @@ -114,6 +119,8 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, partial_mask &= ~(1 << i); + LP_COUNT(nr_partially_covered_4); + for (j = 0; j < NR_PLANES; j++) cx[j] = (c[j] - plane[j].dcdx * ix @@ -133,6 +140,7 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, inmask &= ~(1 << i); + LP_COUNT(nr_fully_covered_4); block_full_4(task, tri, px, py); } } @@ -166,11 +174,14 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, { const int dcdx = -plane[j].dcdx * 16; const int dcdy = plane[j].dcdy * 16; - const int cox = c[j] + plane[j].eo * 16; - const int cio = c[j] + plane[j].ei * 16 - 1; - - outmask |= build_mask_linear(cox, dcdx, dcdy); - partmask |= build_mask_linear(cio, dcdx, dcdy); + const int cox = plane[j].eo * 16; + const int cio = plane[j].ei * 16 - 1; + + build_masks(c[j] + cox, + cio - cox, + dcdx, dcdy, + &outmask, /* sign bits from c[i][0..15] + cox */ + &partmask); /* sign bits from c[i][0..15] + cio */ } j++; @@ -190,6 +201,8 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, assert((partial_mask & inmask) == 0); + LP_COUNT_ADD(nr_empty_16, util_bitcount(0xffff & ~(partial_mask | inmask))); + /* Iterate over partials: */ while (partial_mask) { diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index f88a759fe7..15a09b7100 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -163,12 +163,15 @@ lp_scene_reset(struct lp_scene *scene ) /* Free all but last binner command lists: */ - for (i = 0; i < TILES_X; i++) { - for (j = 0; j < TILES_Y; j++) { + for (i = 0; i < scene->tiles_x; i++) { + for (j = 0; j < scene->tiles_y; j++) { lp_scene_bin_reset(scene, i, j); } } + /* If there are any bins which weren't cleared by the loop above, + * they will be caught (on debug builds at least) by this assert: + */ assert(lp_scene_is_empty(scene)); /* Free all but last binned data block: diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 167cb2ee2e..1e65a91fc6 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -61,6 +61,8 @@ static const struct debug_named_value lp_debug_flags[] = { { "show_tiles", DEBUG_SHOW_TILES, NULL }, { "show_subtiles", DEBUG_SHOW_SUBTILES, NULL }, { "counters", DEBUG_COUNTERS, NULL }, + { "scene", DEBUG_SCENE, NULL }, + { "fence", DEBUG_FENCE, NULL }, DEBUG_NAMED_VALUE_END }; #endif @@ -87,7 +89,14 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: return PIPE_MAX_SAMPLERS; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return PIPE_MAX_VERTEX_SAMPLERS; + /* At this time, the draw module and llvmpipe driver only + * support vertex shader texture lookups when LLVM is enabled in + * the draw module. + */ + if (debug_get_bool_option("DRAW_USE_LLVM", TRUE)) + return PIPE_MAX_VERTEX_SAMPLERS; + else + return 0; case PIPE_CAP_MAX_COMBINED_SAMPLERS: return PIPE_MAX_SAMPLERS + PIPE_MAX_VERTEX_SAMPLERS; case PIPE_CAP_NPOT_TEXTURES: @@ -230,6 +239,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, assert(target == PIPE_BUFFER || target == PIPE_TEXTURE_1D || target == PIPE_TEXTURE_2D || + target == PIPE_TEXTURE_RECT || target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE); @@ -314,6 +324,51 @@ llvmpipe_destroy_screen( struct pipe_screen *_screen ) + +/** + * Fence reference counting. + */ +static void +llvmpipe_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + struct lp_fence **old = (struct lp_fence **) ptr; + struct lp_fence *f = (struct lp_fence *) fence; + + lp_fence_reference(old, f); +} + + +/** + * Has the fence been executed/finished? + */ +static int +llvmpipe_fence_signalled(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flag) +{ + struct lp_fence *f = (struct lp_fence *) fence; + return lp_fence_signalled(f); +} + + +/** + * Wait for the fence to finish. + */ +static int +llvmpipe_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence_handle, + unsigned flag) +{ + struct lp_fence *f = (struct lp_fence *) fence_handle; + + lp_fence_wait(f); + return 0; +} + + + /** * Create a new pipe_screen object * Note: we're not presently subclassing pipe_screen (no llvmpipe_screen). @@ -351,9 +406,11 @@ llvmpipe_create_screen(struct sw_winsys *winsys) screen->base.context_create = llvmpipe_create_context; screen->base.flush_frontbuffer = llvmpipe_flush_frontbuffer; + screen->base.fence_reference = llvmpipe_fence_reference; + screen->base.fence_signalled = llvmpipe_fence_signalled; + screen->base.fence_finish = llvmpipe_fence_finish; llvmpipe_init_screen_resource_funcs(&screen->base); - llvmpipe_init_screen_fence_funcs(&screen->base); lp_jit_screen_init(screen); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 556e571585..3da9097154 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -275,9 +275,10 @@ set_scene_state( struct lp_setup_context *setup, void lp_setup_flush( struct lp_setup_context *setup, unsigned flags, - struct pipe_fence_handle **fence) + struct pipe_fence_handle **fence, + const char *reason) { - LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + LP_DBG(DEBUG_SETUP, "%s %s\n", __FUNCTION__, reason); if (setup->scene) { if (fence) { @@ -287,6 +288,8 @@ lp_setup_flush( struct lp_setup_context *setup, *fence = lp_setup_fence( setup ); } + if (setup->scene->fence) + setup->scene->fence->issued = TRUE; } set_scene_state( setup, SETUP_FLUSHED ); @@ -312,6 +315,11 @@ lp_setup_bind_framebuffer( struct lp_setup_context *setup, * scene. */ util_copy_framebuffer_state(&setup->fb, fb); + setup->framebuffer.x0 = 0; + setup->framebuffer.y0 = 0; + setup->framebuffer.x1 = fb->width-1; + setup->framebuffer.y1 = fb->height-1; + setup->dirty |= LP_SETUP_NEW_SCISSOR; } @@ -469,11 +477,35 @@ lp_setup_set_triangle_state( struct lp_setup_context *setup, setup->ccw_is_frontface = ccw_is_frontface; setup->cullmode = cull_mode; setup->triangle = first_triangle; - setup->scissor_test = scissor; setup->pixel_offset = gl_rasterization_rules ? 0.5f : 0.0f; + + if (setup->scissor_test != scissor) { + setup->dirty |= LP_SETUP_NEW_SCISSOR; + setup->scissor_test = scissor; + } } +void +lp_setup_set_line_state( struct lp_setup_context *setup, + float line_width) +{ + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + setup->line_width = line_width; +} + +void +lp_setup_set_point_state( struct lp_setup_context *setup, + float point_size, + boolean point_size_per_vertex, + uint sprite) +{ + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + setup->point_size = point_size; + setup->sprite = sprite; + setup->point_size_per_vertex = point_size_per_vertex; +} void lp_setup_set_fs_inputs( struct lp_setup_context *setup, @@ -559,10 +591,11 @@ lp_setup_set_scissor( struct lp_setup_context *setup, assert(scissor); - if (memcmp(&setup->scissor.current, scissor, sizeof(*scissor)) != 0) { - setup->scissor.current = *scissor; /* struct copy */ - setup->dirty |= LP_SETUP_NEW_SCISSOR; - } + setup->scissor.x0 = scissor->minx; + setup->scissor.x1 = scissor->maxx-1; + setup->scissor.y0 = scissor->miny; + setup->scissor.y1 = scissor->maxy-1; + setup->dirty |= LP_SETUP_NEW_SCISSOR; } @@ -713,6 +746,12 @@ lp_setup_update_state( struct lp_setup_context *setup ) */ { struct llvmpipe_context *lp = llvmpipe_context(scene->pipe); + + /* Will probably need to move this somewhere else, just need + * to know about vertex shader point size attribute. + */ + setup->psize = lp->psize_slot; + if (lp->dirty) { llvmpipe_update_derived(lp); } @@ -806,6 +845,14 @@ lp_setup_update_state( struct lp_setup_context *setup ) } } + if (setup->dirty & LP_SETUP_NEW_SCISSOR) { + setup->draw_region = setup->framebuffer; + if (setup->scissor_test) { + u_rect_possible_intersection(&setup->scissor, + &setup->draw_region); + } + } + setup->dirty = 0; assert(setup->fs.stored); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 73b1c85325..821ebb1087 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -85,7 +85,8 @@ lp_setup_fence( struct lp_setup_context *setup ); void lp_setup_flush( struct lp_setup_context *setup, unsigned flags, - struct pipe_fence_handle **fence); + struct pipe_fence_handle **fence, + const char *reason); void @@ -99,6 +100,16 @@ lp_setup_set_triangle_state( struct lp_setup_context *setup, boolean scissor, boolean gl_rasterization_rules ); +void +lp_setup_set_line_state( struct lp_setup_context *setup, + float line_width); + +void +lp_setup_set_point_state( struct lp_setup_context *setup, + float point_size, + boolean point_size_per_vertex, + uint sprite); + void lp_setup_set_fs_inputs( struct lp_setup_context *setup, const struct lp_shader_input *interp, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.c b/src/gallium/drivers/llvmpipe/lp_setup_coef.c new file mode 100644 index 0000000000..95e3e8fffe --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_coef.c @@ -0,0 +1,258 @@ +/************************************************************************** + * + * Copyright 2010, VMware. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Binning code for triangles + */ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "lp_perf.h" +#include "lp_setup_context.h" +#include "lp_setup_coef.h" +#include "lp_rast.h" +#include "lp_state_fs.h" + +#if !defined(PIPE_ARCH_SSE) + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + */ +static void constant_coef( struct lp_rast_shader_inputs *inputs, + unsigned slot, + const float value, + unsigned i ) +{ + inputs->a0[slot][i] = value; + inputs->dadx[slot][i] = 0.0f; + inputs->dady[slot][i] = 0.0f; +} + + + +static void linear_coef( struct lp_rast_shader_inputs *inputs, + const struct lp_tri_info *info, + unsigned slot, + unsigned vert_attr, + unsigned i) +{ + float a0 = info->v0[vert_attr][i]; + float a1 = info->v1[vert_attr][i]; + float a2 = info->v2[vert_attr][i]; + + float da01 = a0 - a1; + float da20 = a2 - a0; + float dadx = (da01 * info->dy20_ooa - info->dy01_ooa * da20); + float dady = (da20 * info->dx01_ooa - info->dx20_ooa * da01); + + inputs->dadx[slot][i] = dadx; + inputs->dady[slot][i] = dady; + + /* calculate a0 as the value which would be sampled for the + * fragment at (0,0), taking into account that we want to sample at + * pixel centers, in other words (0.5, 0.5). + * + * this is neat but unfortunately not a good way to do things for + * triangles with very large values of dadx or dady as it will + * result in the subtraction and re-addition from a0 of a very + * large number, which means we'll end up loosing a lot of the + * fractional bits and precision from a0. the way to fix this is + * to define a0 as the sample at a pixel center somewhere near vmin + * instead - i'll switch to this later. + */ + inputs->a0[slot][i] = a0 - (dadx * info->x0_center + + dady * info->y0_center); +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void perspective_coef( struct lp_rast_shader_inputs *inputs, + const struct lp_tri_info *info, + unsigned slot, + unsigned vert_attr, + unsigned i) +{ + /* premultiply by 1/w (v[0][3] is always 1/w): + */ + float a0 = info->v0[vert_attr][i] * info->v0[0][3]; + float a1 = info->v1[vert_attr][i] * info->v1[0][3]; + float a2 = info->v2[vert_attr][i] * info->v2[0][3]; + float da01 = a0 - a1; + float da20 = a2 - a0; + float dadx = da01 * info->dy20_ooa - info->dy01_ooa * da20; + float dady = da20 * info->dx01_ooa - info->dx20_ooa * da01; + + inputs->dadx[slot][i] = dadx; + inputs->dady[slot][i] = dady; + inputs->a0[slot][i] = a0 - (dadx * info->x0_center + + dady * info->y0_center); +} + + +/** + * Special coefficient setup for gl_FragCoord. + * X and Y are trivial + * Z and W are copied from position_coef which should have already been computed. + * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. + */ +static void +setup_fragcoord_coef(struct lp_rast_shader_inputs *inputs, + const struct lp_tri_info *info, + unsigned slot, + unsigned usage_mask) +{ + /*X*/ + if (usage_mask & TGSI_WRITEMASK_X) { + inputs->a0[slot][0] = 0.0; + inputs->dadx[slot][0] = 1.0; + inputs->dady[slot][0] = 0.0; + } + + /*Y*/ + if (usage_mask & TGSI_WRITEMASK_Y) { + inputs->a0[slot][1] = 0.0; + inputs->dadx[slot][1] = 0.0; + inputs->dady[slot][1] = 1.0; + } + + /*Z*/ + if (usage_mask & TGSI_WRITEMASK_Z) { + linear_coef(inputs, info, slot, 0, 2); + } + + /*W*/ + if (usage_mask & TGSI_WRITEMASK_W) { + linear_coef(inputs, info, slot, 0, 3); + } +} + + +/** + * Setup the fragment input attribute with the front-facing value. + * \param frontface is the triangle front facing? + */ +static void setup_facing_coef( struct lp_rast_shader_inputs *inputs, + unsigned slot, + boolean frontface, + unsigned usage_mask) +{ + /* convert TRUE to 1.0 and FALSE to -1.0 */ + if (usage_mask & TGSI_WRITEMASK_X) + constant_coef( inputs, slot, 2.0f * frontface - 1.0f, 0 ); + + if (usage_mask & TGSI_WRITEMASK_Y) + constant_coef( inputs, slot, 0.0f, 1 ); /* wasted */ + + if (usage_mask & TGSI_WRITEMASK_Z) + constant_coef( inputs, slot, 0.0f, 2 ); /* wasted */ + + if (usage_mask & TGSI_WRITEMASK_W) + constant_coef( inputs, slot, 0.0f, 3 ); /* wasted */ +} + + +/** + * Compute the tri->coef[] array dadx, dady, a0 values. + */ +void lp_setup_tri_coef( struct lp_setup_context *setup, + struct lp_rast_shader_inputs *inputs, + const struct lp_tri_info *info) +{ + unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; + unsigned slot; + unsigned i; + + /* setup interpolation for all the remaining attributes: + */ + for (slot = 0; slot < setup->fs.nr_inputs; slot++) { + unsigned vert_attr = setup->fs.input[slot].src_index; + unsigned usage_mask = setup->fs.input[slot].usage_mask; + + switch (setup->fs.input[slot].interp) { + case LP_INTERP_CONSTANT: + if (setup->flatshade_first) { + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + constant_coef(inputs, slot+1, info->v0[vert_attr][i], i); + } + else { + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + constant_coef(inputs, slot+1, info->v2[vert_attr][i], i); + } + break; + + case LP_INTERP_LINEAR: + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + linear_coef(inputs, info, slot+1, vert_attr, i); + break; + + case LP_INTERP_PERSPECTIVE: + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + perspective_coef(inputs, info, slot+1, vert_attr, i); + fragcoord_usage_mask |= TGSI_WRITEMASK_W; + break; + + case LP_INTERP_POSITION: + /* + * The generated pixel interpolators will pick up the coeffs from + * slot 0, so all need to ensure that the usage mask is covers all + * usages. + */ + fragcoord_usage_mask |= usage_mask; + break; + + case LP_INTERP_FACING: + setup_facing_coef(inputs, slot+1, info->frontfacing, usage_mask); + break; + + default: + assert(0); + } + } + + /* The internal position input is in slot zero: + */ + setup_fragcoord_coef(inputs, info, 0, fragcoord_usage_mask); +} + +#else +extern void lp_setup_coef_dummy(void); +void lp_setup_coef_dummy(void) +{ +} + +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.h b/src/gallium/drivers/llvmpipe/lp_setup_coef.h new file mode 100644 index 0000000000..d68b39c603 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_coef.h @@ -0,0 +1,61 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * The setup code is concerned with point/line/triangle setup and + * putting commands/data into the bins. + */ + + +#ifndef LP_SETUP_COEF_H +#define LP_SETUP_COEF_H + + +struct lp_tri_info { + + float x0_center; + float y0_center; + + /* turn these into an aligned float[4] */ + float dy01_ooa; + float dy20_ooa; + float dx01_ooa; + float dx20_ooa; + + const float (*v0)[4]; + const float (*v1)[4]; + const float (*v2)[4]; + + boolean frontfacing; /* remove eventually */ +}; + +void lp_setup_tri_coef( struct lp_setup_context *setup, + struct lp_rast_shader_inputs *inputs, + const struct lp_tri_info *info); + +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c b/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c new file mode 100644 index 0000000000..73fb70599c --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c @@ -0,0 +1,207 @@ +/************************************************************************** + * + * Copyright 2010 VMware. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Binning code for triangles + */ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "lp_perf.h" +#include "lp_setup_context.h" +#include "lp_setup_coef.h" +#include "lp_rast.h" + +#if defined(PIPE_ARCH_SSE) +#include <emmintrin.h> + + +static void constant_coef4( struct lp_rast_shader_inputs *inputs, + const struct lp_tri_info *info, + unsigned slot, + const float *attr) +{ + *(__m128 *)inputs->a0[slot] = *(__m128 *)attr; + *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0); + *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0); +} + + + +/** + * Setup the fragment input attribute with the front-facing value. + * \param frontface is the triangle front facing? + */ +static void setup_facing_coef( struct lp_rast_shader_inputs *inputs, + const struct lp_tri_info *info, + unsigned slot ) +{ + /* XXX: just pass frontface directly to the shader, don't bother + * treating it as an input. + */ + __m128 a0 = _mm_setr_ps(info->frontfacing ? 1.0 : -1.0, + 0, 0, 0); + + *(__m128 *)inputs->a0[slot] = a0; + *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0); + *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0); +} + + + +static void calc_coef4( struct lp_rast_shader_inputs *inputs, + const struct lp_tri_info *info, + unsigned slot, + __m128 a0, + __m128 a1, + __m128 a2) +{ + __m128 da01 = _mm_sub_ps(a0, a1); + __m128 da20 = _mm_sub_ps(a2, a0); + + __m128 da01_dy20_ooa = _mm_mul_ps(da01, _mm_set1_ps(info->dy20_ooa)); + __m128 da20_dy01_ooa = _mm_mul_ps(da20, _mm_set1_ps(info->dy01_ooa)); + __m128 dadx = _mm_sub_ps(da01_dy20_ooa, da20_dy01_ooa); + + __m128 da01_dx20_ooa = _mm_mul_ps(da01, _mm_set1_ps(info->dx20_ooa)); + __m128 da20_dx01_ooa = _mm_mul_ps(da20, _mm_set1_ps(info->dx01_ooa)); + __m128 dady = _mm_sub_ps(da20_dx01_ooa, da01_dx20_ooa); + + __m128 dadx_x0 = _mm_mul_ps(dadx, _mm_set1_ps(info->x0_center)); + __m128 dady_y0 = _mm_mul_ps(dady, _mm_set1_ps(info->y0_center)); + __m128 attr_v0 = _mm_add_ps(dadx_x0, dady_y0); + __m128 attr_0 = _mm_sub_ps(a0, attr_v0); + + *(__m128 *)inputs->a0[slot] = attr_0; + *(__m128 *)inputs->dadx[slot] = dadx; + *(__m128 *)inputs->dady[slot] = dady; +} + + +static void linear_coef( struct lp_rast_shader_inputs *inputs, + const struct lp_tri_info *info, + unsigned slot, + unsigned vert_attr) +{ + __m128 a0 = *(const __m128 *)info->v0[vert_attr]; + __m128 a1 = *(const __m128 *)info->v1[vert_attr]; + __m128 a2 = *(const __m128 *)info->v2[vert_attr]; + + calc_coef4(inputs, info, slot, a0, a1, a2); +} + + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void perspective_coef( struct lp_rast_shader_inputs *inputs, + const struct lp_tri_info *info, + unsigned slot, + unsigned vert_attr) +{ + /* premultiply by 1/w (v[0][3] is always 1/w): + */ + __m128 a0 = *(const __m128 *)info->v0[vert_attr]; + __m128 a1 = *(const __m128 *)info->v1[vert_attr]; + __m128 a2 = *(const __m128 *)info->v2[vert_attr]; + + __m128 a0_oow = _mm_mul_ps(a0, _mm_set1_ps(info->v0[0][3])); + __m128 a1_oow = _mm_mul_ps(a1, _mm_set1_ps(info->v1[0][3])); + __m128 a2_oow = _mm_mul_ps(a2, _mm_set1_ps(info->v2[0][3])); + + calc_coef4(inputs, info, slot, a0_oow, a1_oow, a2_oow); +} + + + + + +/** + * Compute the inputs-> dadx, dady, a0 values. + */ +void lp_setup_tri_coef( struct lp_setup_context *setup, + struct lp_rast_shader_inputs *inputs, + const struct lp_tri_info *info) +{ + unsigned slot; + + /* The internal position input is in slot zero: + */ + linear_coef(inputs, info, 0, 0); + + /* setup interpolation for all the remaining attributes: + */ + for (slot = 0; slot < setup->fs.nr_inputs; slot++) { + unsigned vert_attr = setup->fs.input[slot].src_index; + + switch (setup->fs.input[slot].interp) { + case LP_INTERP_CONSTANT: + if (setup->flatshade_first) { + constant_coef4(inputs, info, slot+1, info->v0[vert_attr]); + } + else { + constant_coef4(inputs, info, slot+1, info->v2[vert_attr]); + } + break; + + case LP_INTERP_LINEAR: + linear_coef(inputs, info, slot+1, vert_attr); + break; + + case LP_INTERP_PERSPECTIVE: + perspective_coef(inputs, info, slot+1, vert_attr); + break; + + case LP_INTERP_POSITION: + /* + * The generated pixel interpolators will pick up the coeffs from + * slot 0. + */ + break; + + case LP_INTERP_FACING: + setup_facing_coef(inputs, info, slot+1); + break; + + default: + assert(0); + } + } +} + +#else +extern void lp_setup_coef_dummy(void); +void lp_setup_coef_dummy(void) +{ +} +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index a0606f5034..877a492c6d 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -41,6 +41,7 @@ #include "lp_scene.h" #include "draw/draw_vbuf.h" +#include "util/u_rect.h" #define LP_SETUP_NEW_FS 0x01 #define LP_SETUP_NEW_CONSTANTS 0x02 @@ -73,6 +74,7 @@ struct lp_setup_context uint prim; uint vertex_size; uint nr_vertices; + uint sprite; uint vertex_buffer_size; void *vertex_buffer; @@ -88,10 +90,17 @@ struct lp_setup_context boolean flatshade_first; boolean ccw_is_frontface; boolean scissor_test; + boolean point_size_per_vertex; unsigned cullmode; float pixel_offset; + float line_width; + float point_size; + float psize; struct pipe_framebuffer_state fb; + struct u_rect framebuffer; + struct u_rect scissor; + struct u_rect draw_region; /* intersection of fb & scissor */ struct { unsigned flags; @@ -127,9 +136,6 @@ struct lp_setup_context uint8_t *stored; } blend_color; - struct { - struct pipe_scissor_state current; - } scissor; unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */ @@ -158,4 +164,29 @@ void lp_setup_update_state( struct lp_setup_context *setup ); void lp_setup_destroy( struct lp_setup_context *setup ); +void +lp_setup_print_triangle(struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]); + +void +lp_setup_print_vertex(struct lp_setup_context *setup, + const char *name, + const float (*v)[4]); + + +struct lp_rast_triangle * +lp_setup_alloc_triangle(struct lp_scene *scene, + unsigned nr_inputs, + unsigned nr_planes, + unsigned *tri_size); + +void +lp_setup_bin_triangle( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + const struct u_rect *bbox, + int nr_planes ); + #endif + diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index be41c44e6f..ce2da55cf4 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -29,19 +29,671 @@ * Binning code for lines */ +#include "util/u_math.h" +#include "util/u_memory.h" +#include "lp_perf.h" #include "lp_setup_context.h" +#include "lp_rast.h" +#include "lp_state_fs.h" -static void line_nop( struct lp_setup_context *setup, - const float (*v0)[4], - const float (*v1)[4] ) +#define NUM_CHANNELS 4 + +struct lp_line_info { + + float dx; + float dy; + float oneoverarea; + + const float (*v1)[4]; + const float (*v2)[4]; +}; + + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + */ +static void constant_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + unsigned slot, + const float value, + unsigned i ) +{ + tri->inputs.a0[slot][i] = value; + tri->inputs.dadx[slot][i] = 0.0f; + tri->inputs.dady[slot][i] = 0.0f; +} + + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a triangle. + */ +static void linear_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + struct lp_line_info *info, + unsigned slot, + unsigned vert_attr, + unsigned i) +{ + float a1 = info->v1[vert_attr][i]; + float a2 = info->v2[vert_attr][i]; + + float da21 = a1 - a2; + float dadx = da21 * info->dx * info->oneoverarea; + float dady = da21 * info->dy * info->oneoverarea; + + tri->inputs.dadx[slot][i] = dadx; + tri->inputs.dady[slot][i] = dady; + + tri->inputs.a0[slot][i] = (a1 - + (dadx * (info->v1[0][0] - setup->pixel_offset) + + dady * (info->v1[0][1] - setup->pixel_offset))); +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void perspective_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + struct lp_line_info *info, + unsigned slot, + unsigned vert_attr, + unsigned i) +{ + /* premultiply by 1/w (v[0][3] is always 1/w): + */ + float a1 = info->v1[vert_attr][i] * info->v1[0][3]; + float a2 = info->v2[vert_attr][i] * info->v2[0][3]; + + float da21 = a1 - a2; + float dadx = da21 * info->dx * info->oneoverarea; + float dady = da21 * info->dy * info->oneoverarea; + + tri->inputs.dadx[slot][i] = dadx; + tri->inputs.dady[slot][i] = dady; + + tri->inputs.a0[slot][i] = (a1 - + (dadx * (info->v1[0][0] - setup->pixel_offset) + + dady * (info->v1[0][1] - setup->pixel_offset))); +} + +static void +setup_fragcoord_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + struct lp_line_info *info, + unsigned slot, + unsigned usage_mask) +{ + /*X*/ + if (usage_mask & TGSI_WRITEMASK_X) { + tri->inputs.a0[slot][0] = 0.0; + tri->inputs.dadx[slot][0] = 1.0; + tri->inputs.dady[slot][0] = 0.0; + } + + /*Y*/ + if (usage_mask & TGSI_WRITEMASK_Y) { + tri->inputs.a0[slot][1] = 0.0; + tri->inputs.dadx[slot][1] = 0.0; + tri->inputs.dady[slot][1] = 1.0; + } + + /*Z*/ + if (usage_mask & TGSI_WRITEMASK_Z) { + linear_coef(setup, tri, info, slot, 0, 2); + } + + /*W*/ + if (usage_mask & TGSI_WRITEMASK_W) { + linear_coef(setup, tri, info, slot, 0, 3); + } +} + +/** + * Compute the tri->coef[] array dadx, dady, a0 values. + */ +static void setup_line_coefficients( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + struct lp_line_info *info) +{ + unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; + unsigned slot; + + /* setup interpolation for all the remaining attributes: + */ + for (slot = 0; slot < setup->fs.nr_inputs; slot++) { + unsigned vert_attr = setup->fs.input[slot].src_index; + unsigned usage_mask = setup->fs.input[slot].usage_mask; + unsigned i; + + switch (setup->fs.input[slot].interp) { + case LP_INTERP_CONSTANT: + if (setup->flatshade_first) { + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + constant_coef(setup, tri, slot+1, info->v1[vert_attr][i], i); + } + else { + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + constant_coef(setup, tri, slot+1, info->v2[vert_attr][i], i); + } + break; + + case LP_INTERP_LINEAR: + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + linear_coef(setup, tri, info, slot+1, vert_attr, i); + break; + + case LP_INTERP_PERSPECTIVE: + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + perspective_coef(setup, tri, info, slot+1, vert_attr, i); + fragcoord_usage_mask |= TGSI_WRITEMASK_W; + break; + + case LP_INTERP_POSITION: + /* + * The generated pixel interpolators will pick up the coeffs from + * slot 0, so all need to ensure that the usage mask is covers all + * usages. + */ + fragcoord_usage_mask |= usage_mask; + break; + + default: + assert(0); + } + } + + /* The internal position input is in slot zero: + */ + setup_fragcoord_coef(setup, tri, info, 0, + fragcoord_usage_mask); +} + + + +static INLINE int subpixel_snap( float a ) +{ + return util_iround(FIXED_ONE * a); +} + + +/** + * Print line vertex attribs (for debug). + */ +static void +print_line(struct lp_setup_context *setup, + const float (*v1)[4], + const float (*v2)[4]) +{ + uint i; + + debug_printf("llvmpipe line\n"); + for (i = 0; i < 1 + setup->fs.nr_inputs; i++) { + debug_printf(" v1[%d]: %f %f %f %f\n", i, + v1[i][0], v1[i][1], v1[i][2], v1[i][3]); + } + for (i = 0; i < 1 + setup->fs.nr_inputs; i++) { + debug_printf(" v2[%d]: %f %f %f %f\n", i, + v2[i][0], v2[i][1], v2[i][2], v2[i][3]); + } +} + + +static INLINE boolean sign(float x){ + return x >= 0; +} + + +/* Used on positive floats only: + */ +static INLINE float fracf(float f) { + return f - floorf(f); } -void -lp_setup_choose_line( struct lp_setup_context *setup ) + +static void +lp_setup_line( struct lp_setup_context *setup, + const float (*v1)[4], + const float (*v2)[4]) { - setup->line = line_nop; + struct lp_scene *scene = lp_setup_get_current_scene(setup); + struct lp_rast_triangle *line; + struct lp_line_info info; + float width = MAX2(1.0, setup->line_width); + struct u_rect bbox; + unsigned tri_bytes; + int x[4]; + int y[4]; + int i; + int nr_planes = 4; + + /* linewidth should be interpreted as integer */ + int fixed_width = util_iround(width) * FIXED_ONE; + + float x_offset=0; + float y_offset=0; + float x_offset_end=0; + float y_offset_end=0; + + float x1diff; + float y1diff; + float x2diff; + float y2diff; + float dx, dy; + + boolean draw_start; + boolean draw_end; + boolean will_draw_start; + boolean will_draw_end; + + if (0) + print_line(setup, v1, v2); + + if (setup->scissor_test) { + nr_planes = 8; + } + else { + nr_planes = 4; + } + + + dx = v1[0][0] - v2[0][0]; + dy = v1[0][1] - v2[0][1]; + + /* X-MAJOR LINE */ + if (fabsf(dx) >= fabsf(dy)) { + float dydx = dy / dx; + + x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5; + y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5; + x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5; + y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5; + + if (y2diff==-0.5 && dy<0){ + y2diff = 0.5; + } + + /* + * Diamond exit rule test for starting point + */ + if (fabsf(x1diff) + fabsf(y1diff) < 0.5) { + draw_start = TRUE; + } + else if (sign(x1diff) == sign(-dx)) { + draw_start = FALSE; + } + else if (sign(-y1diff) != sign(dy)) { + draw_start = TRUE; + } + else { + /* do intersection test */ + float yintersect = fracf(v1[0][1]) + x1diff * dydx; + draw_start = (yintersect < 1.0 && yintersect > 0.0); + } + + + /* + * Diamond exit rule test for ending point + */ + if (fabsf(x2diff) + fabsf(y2diff) < 0.5) { + draw_end = FALSE; + } + else if (sign(x2diff) != sign(-dx)) { + draw_end = FALSE; + } + else if (sign(-y2diff) == sign(dy)) { + draw_end = TRUE; + } + else { + /* do intersection test */ + float yintersect = fracf(v2[0][1]) + x2diff * dydx; + draw_end = (yintersect < 1.0 && yintersect > 0.0); + } + + /* Are we already drawing start/end? + */ + will_draw_start = sign(-x1diff) != sign(dx); + will_draw_end = (sign(x2diff) == sign(-dx)) || x2diff==0; + + if (dx < 0) { + /* if v2 is to the right of v1, swap pointers */ + const float (*temp)[4] = v1; + v1 = v2; + v2 = temp; + dx = -dx; + dy = -dy; + /* Otherwise shift planes appropriately */ + if (will_draw_start != draw_start) { + x_offset_end = - x1diff - 0.5; + y_offset_end = x_offset_end * dydx; + + } + if (will_draw_end != draw_end) { + x_offset = - x2diff - 0.5; + y_offset = x_offset * dydx; + } + + } + else{ + /* Otherwise shift planes appropriately */ + if (will_draw_start != draw_start) { + x_offset = - x1diff + 0.5; + y_offset = x_offset * dydx; + } + if (will_draw_end != draw_end) { + x_offset_end = - x2diff + 0.5; + y_offset_end = x_offset_end * dydx; + } + } + + /* x/y positions in fixed point */ + x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset); + x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset); + x[2] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset); + x[3] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset); + + y[0] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset) - fixed_width/2; + y[1] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset) - fixed_width/2; + y[2] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset) + fixed_width/2; + y[3] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset) + fixed_width/2; + + } + else { + const float dxdy = dx / dy; + + /* Y-MAJOR LINE */ + x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5; + y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5; + x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5; + y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5; + + if (x2diff==-0.5 && dx<0) { + x2diff = 0.5; + } + + /* + * Diamond exit rule test for starting point + */ + if (fabsf(x1diff) + fabsf(y1diff) < 0.5) { + draw_start = TRUE; + } + else if (sign(-y1diff) == sign(dy)) { + draw_start = FALSE; + } + else if (sign(x1diff) != sign(-dx)) { + draw_start = TRUE; + } + else { + /* do intersection test */ + float xintersect = fracf(v1[0][0]) + y1diff * dxdy; + draw_start = (xintersect < 1.0 && xintersect > 0.0); + } + + /* + * Diamond exit rule test for ending point + */ + if (fabsf(x2diff) + fabsf(y2diff) < 0.5) { + draw_end = FALSE; + } + else if (sign(-y2diff) != sign(dy) ) { + draw_end = FALSE; + } + else if (sign(x2diff) == sign(-dx) ) { + draw_end = TRUE; + } + else { + /* do intersection test */ + float xintersect = fracf(v2[0][0]) + y2diff * dxdy; + draw_end = (xintersect < 1.0 && xintersect > 0.0); + } + + /* Are we already drawing start/end? + */ + will_draw_start = sign(y1diff) == sign(dy); + will_draw_end = (sign(-y2diff) == sign(dy)) || y2diff==0; + + if (dy > 0) { + /* if v2 is on top of v1, swap pointers */ + const float (*temp)[4] = v1; + v1 = v2; + v2 = temp; + dx = -dx; + dy = -dy; + + /* Otherwise shift planes appropriately */ + if (will_draw_start != draw_start) { + y_offset_end = - y1diff + 0.5; + x_offset_end = y_offset_end * dxdy; + } + if (will_draw_end != draw_end) { + y_offset = - y2diff + 0.5; + x_offset = y_offset * dxdy; + } + } + else { + /* Otherwise shift planes appropriately */ + if (will_draw_start != draw_start) { + y_offset = - y1diff - 0.5; + x_offset = y_offset * dxdy; + + } + if (will_draw_end != draw_end) { + y_offset_end = - y2diff - 0.5; + x_offset_end = y_offset_end * dxdy; + } + } + + /* x/y positions in fixed point */ + x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) - fixed_width/2; + x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) - fixed_width/2; + x[2] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) + fixed_width/2; + x[3] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) + fixed_width/2; + + y[0] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset); + y[1] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset); + y[2] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset); + y[3] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset); + } + + + + LP_COUNT(nr_tris); + + + /* Bounding rectangle (in pixels) */ + { + /* Yes this is necessary to accurately calculate bounding boxes + * with the two fill-conventions we support. GL (normally) ends + * up needing a bottom-left fill convention, which requires + * slightly different rounding. + */ + int adj = (setup->pixel_offset != 0) ? 1 : 0; + + bbox.x0 = (MIN4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER; + bbox.x1 = (MAX4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER; + bbox.y0 = (MIN4(y[0], y[1], y[2], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + bbox.y1 = (MAX4(y[0], y[1], y[2], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + + /* Inclusive coordinates: + */ + bbox.x1--; + bbox.y1--; + } + + if (bbox.x1 < bbox.x0 || + bbox.y1 < bbox.y0) { + if (0) debug_printf("empty bounding box\n"); + LP_COUNT(nr_culled_tris); + return; + } + + if (!u_rect_test_intersection(&setup->draw_region, &bbox)) { + if (0) debug_printf("offscreen\n"); + LP_COUNT(nr_culled_tris); + return; + } + + u_rect_find_intersection(&setup->draw_region, &bbox); + + line = lp_setup_alloc_triangle(scene, + setup->fs.nr_inputs, + nr_planes, + &tri_bytes); + if (!line) + return; + +#ifdef DEBUG + line->v[0][0] = v1[0][0]; + line->v[1][0] = v2[0][0]; + line->v[0][1] = v1[0][1]; + line->v[1][1] = v2[0][1]; +#endif + + /* calculate the deltas */ + line->plane[0].dcdy = x[0] - x[1]; + line->plane[1].dcdy = x[1] - x[2]; + line->plane[2].dcdy = x[2] - x[3]; + line->plane[3].dcdy = x[3] - x[0]; + + line->plane[0].dcdx = y[0] - y[1]; + line->plane[1].dcdx = y[1] - y[2]; + line->plane[2].dcdx = y[2] - y[3]; + line->plane[3].dcdx = y[3] - y[0]; + + + info.oneoverarea = 1.0f / (dx * dx + dy * dy); + info.dx = dx; + info.dy = dy; + info.v1 = v1; + info.v2 = v2; + + /* Setup parameter interpolants: + */ + setup_line_coefficients( setup, line, &info); + + line->inputs.facing = 1.0F; + line->inputs.state = setup->fs.stored; + + for (i = 0; i < 4; i++) { + struct lp_rast_plane *plane = &line->plane[i]; + + /* half-edge constants, will be interated over the whole render + * target. + */ + plane->c = plane->dcdx * x[i] - plane->dcdy * y[i]; + + + /* correct for top-left vs. bottom-left fill convention. + * + * note that we're overloading gl_rasterization_rules to mean + * both (0.5,0.5) pixel centers *and* bottom-left filling + * convention. + * + * GL actually has a top-left filling convention, but GL's + * notion of "top" differs from gallium's... + * + * Also, sometimes (in FBO cases) GL will render upside down + * to its usual method, in which case it will probably want + * to use the opposite, top-left convention. + */ + if (plane->dcdx < 0) { + /* both fill conventions want this - adjust for left edges */ + plane->c++; + } + else if (plane->dcdx == 0) { + if (setup->pixel_offset == 0) { + /* correct for top-left fill convention: + */ + if (plane->dcdy > 0) plane->c++; + } + else { + /* correct for bottom-left fill convention: + */ + if (plane->dcdy < 0) plane->c++; + } + } + + plane->dcdx *= FIXED_ONE; + plane->dcdy *= FIXED_ONE; + + /* find trivial reject offsets for each edge for a single-pixel + * sized block. These will be scaled up at each recursive level to + * match the active blocksize. Scaling in this way works best if + * the blocks are square. + */ + plane->eo = 0; + if (plane->dcdx < 0) plane->eo -= plane->dcdx; + if (plane->dcdy > 0) plane->eo += plane->dcdy; + + /* Calculate trivial accept offsets from the above. + */ + plane->ei = plane->dcdy - plane->dcdx - plane->eo; + } + + + /* + * When rasterizing scissored tris, use the intersection of the + * triangle bounding box and the scissor rect to generate the + * scissor planes. + * + * This permits us to cut off the triangle "tails" that are present + * in the intermediate recursive levels caused when two of the + * triangles edges don't diverge quickly enough to trivially reject + * exterior blocks from the triangle. + * + * It's not really clear if it's worth worrying about these tails, + * but since we generate the planes for each scissored tri, it's + * free to trim them in this case. + * + * Note that otherwise, the scissor planes only vary in 'C' value, + * and even then only on state-changes. Could alternatively store + * these planes elsewhere. + */ + if (nr_planes == 8) { + line->plane[4].dcdx = -1; + line->plane[4].dcdy = 0; + line->plane[4].c = 1-bbox.x0; + line->plane[4].ei = 0; + line->plane[4].eo = 1; + + line->plane[5].dcdx = 1; + line->plane[5].dcdy = 0; + line->plane[5].c = bbox.x1+1; + line->plane[5].ei = -1; + line->plane[5].eo = 0; + + line->plane[6].dcdx = 0; + line->plane[6].dcdy = 1; + line->plane[6].c = 1-bbox.y0; + line->plane[6].ei = 0; + line->plane[6].eo = 1; + + line->plane[7].dcdx = 0; + line->plane[7].dcdy = -1; + line->plane[7].c = bbox.y1+1; + line->plane[7].ei = -1; + line->plane[7].eo = 0; + } + + lp_setup_bin_triangle(setup, line, &bbox, nr_planes); +} + + +void lp_setup_choose_line( struct lp_setup_context *setup ) +{ + setup->line = lp_setup_line; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c index 9f69e6c5ce..6ae318d328 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2010, VMware Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -18,7 +18,7 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -30,17 +30,299 @@ */ #include "lp_setup_context.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "lp_perf.h" +#include "lp_setup_context.h" +#include "lp_rast.h" +#include "lp_state_fs.h" +#include "tgsi/tgsi_scan.h" + +#define NUM_CHANNELS 4 + +struct point_info { + /* x,y deltas */ + int dy01, dy12; + int dx01, dx12; + + const float (*v0)[4]; +}; + + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + */ +static void constant_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *point, + unsigned slot, + const float value, + unsigned i ) +{ + point->inputs.a0[slot][i] = value; + point->inputs.dadx[slot][i] = 0.0f; + point->inputs.dady[slot][i] = 0.0f; +} + +static void perspective_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *point, + const struct point_info *info, + unsigned slot, + unsigned vert_attr, + unsigned i) +{ + if (i == 0) { + float dadx = FIXED_ONE / (float)info->dx12; + float dady = 0.0f; + point->inputs.dadx[slot][i] = dadx; + point->inputs.dady[slot][i] = dady; + point->inputs.a0[slot][i] = (0.5 - + (dadx * ((float)info->v0[0][0] - setup->pixel_offset) + + dady * ((float)info->v0[0][1] - setup->pixel_offset))); + } + + else if (i == 1) { + float dadx = 0.0f; + float dady = FIXED_ONE / (float)info->dx12; + + point->inputs.dadx[slot][i] = dadx; + point->inputs.dady[slot][i] = dady; + point->inputs.a0[slot][i] = (0.5 - + (dadx * ((float)info->v0[0][0] - setup->pixel_offset) + + dady * ((float)info->v0[0][1] - setup->pixel_offset))); + } + + else if (i == 2) { + point->inputs.a0[slot][i] = 0.0f; + point->inputs.dadx[slot][i] = 0.0f; + point->inputs.dady[slot][i] = 0.0f; + } + + else if (i == 3) { + point->inputs.a0[slot][i] = 1.0f; + point->inputs.dadx[slot][i] = 0.0f; + point->inputs.dady[slot][i] = 0.0f; + } + +} + + +/** + * Special coefficient setup for gl_FragCoord. + * X and Y are trivial + * Z and W are copied from position_coef which should have already been computed. + * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. + */ +static void +setup_point_fragcoord_coef(struct lp_setup_context *setup, + struct lp_rast_triangle *point, + const struct point_info *info, + unsigned slot, + unsigned usage_mask) +{ + /*X*/ + if (usage_mask & TGSI_WRITEMASK_X) { + point->inputs.a0[slot][0] = 0.0; + point->inputs.dadx[slot][0] = 1.0; + point->inputs.dady[slot][0] = 0.0; + } + + /*Y*/ + if (usage_mask & TGSI_WRITEMASK_Y) { + point->inputs.a0[slot][1] = 0.0; + point->inputs.dadx[slot][1] = 0.0; + point->inputs.dady[slot][1] = 1.0; + } + + /*Z*/ + if (usage_mask & TGSI_WRITEMASK_Z) { + constant_coef(setup, point, slot, info->v0[0][2], 2); + } + + /*W*/ + if (usage_mask & TGSI_WRITEMASK_W) { + constant_coef(setup, point, slot, info->v0[0][3], 3); + } +} + +/** + * Compute the point->coef[] array dadx, dady, a0 values. + */ +static void +setup_point_coefficients( struct lp_setup_context *setup, + struct lp_rast_triangle *point, + const struct point_info *info) +{ + unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; + unsigned slot; + + /* setup interpolation for all the remaining attributes: + */ + for (slot = 0; slot < setup->fs.nr_inputs; slot++) { + unsigned vert_attr = setup->fs.input[slot].src_index; + unsigned usage_mask = setup->fs.input[slot].usage_mask; + unsigned i; + + switch (setup->fs.input[slot].interp) { + case LP_INTERP_POSITION: + /* + * The generated pixel interpolators will pick up the coeffs from + * slot 0, so all need to ensure that the usage mask is covers all + * usages. + */ + fragcoord_usage_mask |= usage_mask; + break; + + case LP_INTERP_PERSPECTIVE: + /* For point sprite textures */ + if (setup->fs.current.variant->shader->info.input_semantic_name[slot] + == TGSI_SEMANTIC_GENERIC) + { + int index = setup->fs.current.variant->shader->info.input_semantic_index[slot]; + + if (setup->sprite & (1 << index)) { + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + perspective_coef(setup, point, info, slot+1, vert_attr, i); + fragcoord_usage_mask |= TGSI_WRITEMASK_W; + break; + } + } + + /* Otherwise fallthrough */ + default: + for (i = 0; i < NUM_CHANNELS; i++) { + if (usage_mask & (1 << i)) + constant_coef(setup, point, slot+1, info->v0[vert_attr][i], i); + } + } + } -static void point_nop( struct lp_setup_context *setup, - const float (*v0)[4] ) + /* The internal position input is in slot zero: + */ + setup_point_fragcoord_coef(setup, point, info, 0, + fragcoord_usage_mask); +} + +static INLINE int +subpixel_snap(float a) { + return util_iround(FIXED_ONE * a); +} + + +static void lp_setup_point( struct lp_setup_context *setup, + const float (*v0)[4] ) +{ + /* x/y positions in fixed point */ + const int sizeAttr = setup->psize; + const float size + = (setup->point_size_per_vertex && sizeAttr > 0) ? v0[sizeAttr][0] + : setup->point_size; + + /* Point size as fixed point integer, remove rounding errors + * and gives minimum width for very small points + */ + int fixed_width = MAX2(FIXED_ONE, + (subpixel_snap(size) + FIXED_ONE/2 - 1) & ~(FIXED_ONE-1)); + + const int x0 = subpixel_snap(v0[0][0] - setup->pixel_offset) - fixed_width/2; + const int y0 = subpixel_snap(v0[0][1] - setup->pixel_offset) - fixed_width/2; + + struct lp_scene *scene = lp_setup_get_current_scene(setup); + struct lp_rast_triangle *point; + unsigned bytes; + struct u_rect bbox; + unsigned nr_planes = 4; + struct point_info info; + + + /* Bounding rectangle (in pixels) */ + { + /* Yes this is necessary to accurately calculate bounding boxes + * with the two fill-conventions we support. GL (normally) ends + * up needing a bottom-left fill convention, which requires + * slightly different rounding. + */ + int adj = (setup->pixel_offset != 0) ? 1 : 0; + + bbox.x0 = (x0 + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + bbox.x1 = (x0 + fixed_width + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + bbox.y0 = (y0 + (FIXED_ONE-1)) >> FIXED_ORDER; + bbox.y1 = (y0 + fixed_width + (FIXED_ONE-1)) >> FIXED_ORDER; + + /* Inclusive coordinates: + */ + bbox.x1--; + bbox.y1--; + } + + if (!u_rect_test_intersection(&setup->draw_region, &bbox)) { + if (0) debug_printf("offscreen\n"); + LP_COUNT(nr_culled_tris); + return; + } + + u_rect_find_intersection(&setup->draw_region, &bbox); + + point = lp_setup_alloc_triangle(scene, + setup->fs.nr_inputs, + nr_planes, + &bytes); + if (!point) + return; + +#ifdef DEBUG + point->v[0][0] = v0[0][0]; + point->v[0][1] = v0[0][1]; +#endif + + info.v0 = v0; + info.dx01 = 0; + info.dx12 = fixed_width; + info.dy01 = fixed_width; + info.dy12 = 0; + + /* Setup parameter interpolants: + */ + setup_point_coefficients(setup, point, &info); + + point->inputs.facing = 1.0F; + point->inputs.state = setup->fs.stored; + + { + point->plane[0].dcdx = -1; + point->plane[0].dcdy = 0; + point->plane[0].c = 1-bbox.x0; + point->plane[0].ei = 0; + point->plane[0].eo = 1; + + point->plane[1].dcdx = 1; + point->plane[1].dcdy = 0; + point->plane[1].c = bbox.x1+1; + point->plane[1].ei = -1; + point->plane[1].eo = 0; + + point->plane[2].dcdx = 0; + point->plane[2].dcdy = 1; + point->plane[2].c = 1-bbox.y0; + point->plane[2].ei = 0; + point->plane[2].eo = 1; + + point->plane[3].dcdx = 0; + point->plane[3].dcdy = -1; + point->plane[3].c = bbox.y1+1; + point->plane[3].ei = -1; + point->plane[3].eo = 0; + } + + lp_setup_bin_triangle(setup, point, &bbox, nr_planes); } void lp_setup_choose_point( struct lp_setup_context *setup ) { - setup->point = point_nop; + setup->point = lp_setup_point; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 393533ebee..0180d95090 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -31,35 +31,15 @@ #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_rect.h" #include "lp_perf.h" #include "lp_setup_context.h" +#include "lp_setup_coef.h" #include "lp_rast.h" #include "lp_state_fs.h" #define NUM_CHANNELS 4 -struct tri_info { - - float pixel_offset; - - /* fixed point vertex coordinates */ - int x[3]; - int y[3]; - - /* float x,y deltas - all from the original coordinates - */ - float dy01, dy20; - float dx01, dx20; - float oneoverarea; - - const float (*v0)[4]; - const float (*v1)[4]; - const float (*v2)[4]; - - boolean frontfacing; -}; - - static INLINE int @@ -76,247 +56,6 @@ fixed_to_float(int a) -/** - * Compute a0 for a constant-valued coefficient (GL_FLAT shading). - */ -static void constant_coef( struct lp_rast_triangle *tri, - unsigned slot, - const float value, - unsigned i ) -{ - tri->inputs.a0[slot][i] = value; - tri->inputs.dadx[slot][i] = 0.0f; - tri->inputs.dady[slot][i] = 0.0f; -} - - - -static void linear_coef( struct lp_rast_triangle *tri, - const struct tri_info *info, - unsigned slot, - unsigned vert_attr, - unsigned i) -{ - float a0 = info->v0[vert_attr][i]; - float a1 = info->v1[vert_attr][i]; - float a2 = info->v2[vert_attr][i]; - - float da01 = a0 - a1; - float da20 = a2 - a0; - float dadx = (da01 * info->dy20 - info->dy01 * da20) * info->oneoverarea; - float dady = (da20 * info->dx01 - info->dx20 * da01) * info->oneoverarea; - - tri->inputs.dadx[slot][i] = dadx; - tri->inputs.dady[slot][i] = dady; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - tri->inputs.a0[slot][i] = (a0 - - (dadx * (info->v0[0][0] - info->pixel_offset) + - dady * (info->v0[0][1] - info->pixel_offset))); -} - - -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a triangle. - * We basically multiply the vertex value by 1/w before computing - * the plane coefficients (a0, dadx, dady). - * Later, when we compute the value at a particular fragment position we'll - * divide the interpolated value by the interpolated W at that fragment. - */ -static void perspective_coef( struct lp_rast_triangle *tri, - const struct tri_info *info, - unsigned slot, - unsigned vert_attr, - unsigned i) -{ - /* premultiply by 1/w (v[0][3] is always 1/w): - */ - float a0 = info->v0[vert_attr][i] * info->v0[0][3]; - float a1 = info->v1[vert_attr][i] * info->v1[0][3]; - float a2 = info->v2[vert_attr][i] * info->v2[0][3]; - float da01 = a0 - a1; - float da20 = a2 - a0; - float dadx = (da01 * info->dy20 - info->dy01 * da20) * info->oneoverarea; - float dady = (da20 * info->dx01 - info->dx20 * da01) * info->oneoverarea; - - tri->inputs.dadx[slot][i] = dadx; - tri->inputs.dady[slot][i] = dady; - tri->inputs.a0[slot][i] = (a0 - - (dadx * (info->v0[0][0] - info->pixel_offset) + - dady * (info->v0[0][1] - info->pixel_offset))); -} - - -/** - * Special coefficient setup for gl_FragCoord. - * X and Y are trivial - * Z and W are copied from position_coef which should have already been computed. - * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. - */ -static void -setup_fragcoord_coef(struct lp_rast_triangle *tri, - const struct tri_info *info, - unsigned slot, - unsigned usage_mask) -{ - /*X*/ - if (usage_mask & TGSI_WRITEMASK_X) { - tri->inputs.a0[slot][0] = 0.0; - tri->inputs.dadx[slot][0] = 1.0; - tri->inputs.dady[slot][0] = 0.0; - } - - /*Y*/ - if (usage_mask & TGSI_WRITEMASK_Y) { - tri->inputs.a0[slot][1] = 0.0; - tri->inputs.dadx[slot][1] = 0.0; - tri->inputs.dady[slot][1] = 1.0; - } - - /*Z*/ - if (usage_mask & TGSI_WRITEMASK_Z) { - linear_coef(tri, info, slot, 0, 2); - } - - /*W*/ - if (usage_mask & TGSI_WRITEMASK_W) { - linear_coef(tri, info, slot, 0, 3); - } -} - - -/** - * Setup the fragment input attribute with the front-facing value. - * \param frontface is the triangle front facing? - */ -static void setup_facing_coef( struct lp_rast_triangle *tri, - unsigned slot, - boolean frontface, - unsigned usage_mask) -{ - /* convert TRUE to 1.0 and FALSE to -1.0 */ - if (usage_mask & TGSI_WRITEMASK_X) - constant_coef( tri, slot, 2.0f * frontface - 1.0f, 0 ); - - if (usage_mask & TGSI_WRITEMASK_Y) - constant_coef( tri, slot, 0.0f, 1 ); /* wasted */ - - if (usage_mask & TGSI_WRITEMASK_Z) - constant_coef( tri, slot, 0.0f, 2 ); /* wasted */ - - if (usage_mask & TGSI_WRITEMASK_W) - constant_coef( tri, slot, 0.0f, 3 ); /* wasted */ -} - - -/** - * Compute the tri->coef[] array dadx, dady, a0 values. - */ -static void setup_tri_coefficients( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, - const struct tri_info *info) -{ - unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; - unsigned slot; - unsigned i; - - /* setup interpolation for all the remaining attributes: - */ - for (slot = 0; slot < setup->fs.nr_inputs; slot++) { - unsigned vert_attr = setup->fs.input[slot].src_index; - unsigned usage_mask = setup->fs.input[slot].usage_mask; - - switch (setup->fs.input[slot].interp) { - case LP_INTERP_CONSTANT: - if (setup->flatshade_first) { - for (i = 0; i < NUM_CHANNELS; i++) - if (usage_mask & (1 << i)) - constant_coef(tri, slot+1, info->v0[vert_attr][i], i); - } - else { - for (i = 0; i < NUM_CHANNELS; i++) - if (usage_mask & (1 << i)) - constant_coef(tri, slot+1, info->v2[vert_attr][i], i); - } - break; - - case LP_INTERP_LINEAR: - for (i = 0; i < NUM_CHANNELS; i++) - if (usage_mask & (1 << i)) - linear_coef(tri, info, slot+1, vert_attr, i); - break; - - case LP_INTERP_PERSPECTIVE: - for (i = 0; i < NUM_CHANNELS; i++) - if (usage_mask & (1 << i)) - perspective_coef(tri, info, slot+1, vert_attr, i); - fragcoord_usage_mask |= TGSI_WRITEMASK_W; - break; - - case LP_INTERP_POSITION: - /* - * The generated pixel interpolators will pick up the coeffs from - * slot 0, so all need to ensure that the usage mask is covers all - * usages. - */ - fragcoord_usage_mask |= usage_mask; - break; - - case LP_INTERP_FACING: - setup_facing_coef(tri, slot+1, info->frontfacing, usage_mask); - break; - - default: - assert(0); - } - } - - /* The internal position input is in slot zero: - */ - setup_fragcoord_coef(tri, info, 0, fragcoord_usage_mask); - - if (0) { - for (i = 0; i < NUM_CHANNELS; i++) { - float a0 = tri->inputs.a0 [0][i]; - float dadx = tri->inputs.dadx[0][i]; - float dady = tri->inputs.dady[0][i]; - - debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n", - "xyzw"[i], - a0, dadx, dady); - } - - for (slot = 0; slot < setup->fs.nr_inputs; slot++) { - unsigned usage_mask = setup->fs.input[slot].usage_mask; - for (i = 0; i < NUM_CHANNELS; i++) { - if (usage_mask & (1 << i)) { - float a0 = tri->inputs.a0 [1 + slot][i]; - float dadx = tri->inputs.dadx[1 + slot][i]; - float dady = tri->inputs.dady[1 + slot][i]; - - debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n", - slot, - "xyzw"[i], - a0, dadx, dady); - } - } - } - } -} - - @@ -329,11 +68,11 @@ static void setup_tri_coefficients( struct lp_setup_context *setup, * \param nr_inputs number of fragment shader inputs * \return pointer to triangle space */ -static INLINE struct lp_rast_triangle * -alloc_triangle(struct lp_scene *scene, - unsigned nr_inputs, - unsigned nr_planes, - unsigned *tri_size) +struct lp_rast_triangle * +lp_setup_alloc_triangle(struct lp_scene *scene, + unsigned nr_inputs, + unsigned nr_planes, + unsigned *tri_size) { unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float); struct lp_rast_triangle *tri; @@ -357,35 +96,71 @@ alloc_triangle(struct lp_scene *scene, return tri; } +void +lp_setup_print_vertex(struct lp_setup_context *setup, + const char *name, + const float (*v)[4]) +{ + int i, j; + + debug_printf(" wpos (%s[0]) xyzw %f %f %f %f\n", + name, + v[0][0], v[0][1], v[0][2], v[0][3]); + + for (i = 0; i < setup->fs.nr_inputs; i++) { + const float *in = v[setup->fs.input[i].src_index]; + + debug_printf(" in[%d] (%s[%d]) %s%s%s%s ", + i, + name, setup->fs.input[i].src_index, + (setup->fs.input[i].usage_mask & 0x1) ? "x" : " ", + (setup->fs.input[i].usage_mask & 0x2) ? "y" : " ", + (setup->fs.input[i].usage_mask & 0x4) ? "z" : " ", + (setup->fs.input[i].usage_mask & 0x8) ? "w" : " "); + + for (j = 0; j < 4; j++) + if (setup->fs.input[i].usage_mask & (1<<j)) + debug_printf("%.5f ", in[j]); + + debug_printf("\n"); + } +} + /** * Print triangle vertex attribs (for debug). */ -static void -print_triangle(struct lp_setup_context *setup, - const float (*v1)[4], - const float (*v2)[4], - const float (*v3)[4]) +void +lp_setup_print_triangle(struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) { - uint i; + debug_printf("triangle\n"); - debug_printf("llvmpipe triangle\n"); - for (i = 0; i < 1 + setup->fs.nr_inputs; i++) { - debug_printf(" v1[%d]: %f %f %f %f\n", i, - v1[i][0], v1[i][1], v1[i][2], v1[i][3]); - } - for (i = 0; i < 1 + setup->fs.nr_inputs; i++) { - debug_printf(" v2[%d]: %f %f %f %f\n", i, - v2[i][0], v2[i][1], v2[i][2], v2[i][3]); - } - for (i = 0; i < 1 + setup->fs.nr_inputs; i++) { - debug_printf(" v3[%d]: %f %f %f %f\n", i, - v3[i][0], v3[i][1], v3[i][2], v3[i][3]); + { + const float ex = v0[0][0] - v2[0][0]; + const float ey = v0[0][1] - v2[0][1]; + const float fx = v1[0][0] - v2[0][0]; + const float fy = v1[0][1] - v2[0][1]; + + /* det = cross(e,f).z */ + const float det = ex * fy - ey * fx; + if (det < 0.0f) + debug_printf(" - ccw\n"); + else if (det > 0.0f) + debug_printf(" - cw\n"); + else + debug_printf(" - zero area\n"); } + + lp_setup_print_vertex(setup, "v0", v0); + lp_setup_print_vertex(setup, "v1", v1); + lp_setup_print_vertex(setup, "v2", v2); } -lp_rast_cmd lp_rast_tri_tab[8] = { +lp_rast_cmd lp_rast_tri_tab[9] = { NULL, /* should be impossible */ lp_rast_triangle_1, lp_rast_triangle_2, @@ -393,7 +168,8 @@ lp_rast_cmd lp_rast_tri_tab[8] = { lp_rast_triangle_4, lp_rast_triangle_5, lp_rast_triangle_6, - lp_rast_triangle_7 + lp_rast_triangle_7, + lp_rast_triangle_8 }; /** @@ -403,25 +179,27 @@ lp_rast_cmd lp_rast_tri_tab[8] = { */ static void do_triangle_ccw(struct lp_setup_context *setup, + const float (*v0)[4], const float (*v1)[4], const float (*v2)[4], - const float (*v3)[4], boolean frontfacing ) { - struct lp_scene *scene = lp_setup_get_current_scene(setup); - struct lp_fragment_shader_variant *variant = setup->fs.current.variant; struct lp_rast_triangle *tri; - struct tri_info info; + int x[3]; + int y[3]; + float dy01, dy20; + float dx01, dx20; + float oneoverarea; + struct lp_tri_info info; int area; - int minx, maxx, miny, maxy; - int ix0, ix1, iy0, iy1; + struct u_rect bbox; unsigned tri_bytes; int i; int nr_planes = 3; if (0) - print_triangle(setup, v1, v2, v3); + lp_setup_print_triangle(setup, v0, v1, v2); if (setup->scissor_test) { nr_planes = 7; @@ -430,38 +208,73 @@ do_triangle_ccw(struct lp_setup_context *setup, nr_planes = 3; } + /* x/y positions in fixed point */ + x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset); + x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset); + x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset); + y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset); + y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset); + y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset); + + + /* Bounding rectangle (in pixels) */ + { + /* Yes this is necessary to accurately calculate bounding boxes + * with the two fill-conventions we support. GL (normally) ends + * up needing a bottom-left fill convention, which requires + * slightly different rounding. + */ + int adj = (setup->pixel_offset != 0) ? 1 : 0; + + bbox.x0 = (MIN3(x[0], x[1], x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; + bbox.x1 = (MAX3(x[0], x[1], x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; + bbox.y0 = (MIN3(y[0], y[1], y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + bbox.y1 = (MAX3(y[0], y[1], y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + + /* Inclusive coordinates: + */ + bbox.x1--; + bbox.y1--; + } + + if (bbox.x1 < bbox.x0 || + bbox.y1 < bbox.y0) { + if (0) debug_printf("empty bounding box\n"); + LP_COUNT(nr_culled_tris); + return; + } + + if (!u_rect_test_intersection(&setup->draw_region, &bbox)) { + if (0) debug_printf("offscreen\n"); + LP_COUNT(nr_culled_tris); + return; + } + + u_rect_find_intersection(&setup->draw_region, &bbox); - tri = alloc_triangle(scene, - setup->fs.nr_inputs, - nr_planes, - &tri_bytes); + tri = lp_setup_alloc_triangle(scene, + setup->fs.nr_inputs, + nr_planes, + &tri_bytes); if (!tri) return; #ifdef DEBUG - tri->v[0][0] = v1[0][0]; - tri->v[1][0] = v2[0][0]; - tri->v[2][0] = v3[0][0]; - tri->v[0][1] = v1[0][1]; - tri->v[1][1] = v2[0][1]; - tri->v[2][1] = v3[0][1]; + tri->v[0][0] = v0[0][0]; + tri->v[1][0] = v1[0][0]; + tri->v[2][0] = v2[0][0]; + tri->v[0][1] = v0[0][1]; + tri->v[1][1] = v1[0][1]; + tri->v[2][1] = v2[0][1]; #endif - /* x/y positions in fixed point */ - info.x[0] = subpixel_snap(v1[0][0] - setup->pixel_offset); - info.x[1] = subpixel_snap(v2[0][0] - setup->pixel_offset); - info.x[2] = subpixel_snap(v3[0][0] - setup->pixel_offset); - info.y[0] = subpixel_snap(v1[0][1] - setup->pixel_offset); - info.y[1] = subpixel_snap(v2[0][1] - setup->pixel_offset); - info.y[2] = subpixel_snap(v3[0][1] - setup->pixel_offset); - - tri->plane[0].dcdy = info.x[0] - info.x[1]; - tri->plane[1].dcdy = info.x[1] - info.x[2]; - tri->plane[2].dcdy = info.x[2] - info.x[0]; + tri->plane[0].dcdy = x[0] - x[1]; + tri->plane[1].dcdy = x[1] - x[2]; + tri->plane[2].dcdy = x[2] - x[0]; - tri->plane[0].dcdx = info.y[0] - info.y[1]; - tri->plane[1].dcdx = info.y[1] - info.y[2]; - tri->plane[2].dcdx = info.y[2] - info.y[0]; + tri->plane[0].dcdx = y[0] - y[1]; + tri->plane[1].dcdx = y[1] - y[2]; + tri->plane[2].dcdx = y[2] - y[0]; area = (tri->plane[0].dcdy * tri->plane[2].dcdx - tri->plane[2].dcdy * tri->plane[0].dcdx); @@ -478,57 +291,29 @@ do_triangle_ccw(struct lp_setup_context *setup, return; } - /* Bounding rectangle (in pixels) */ - { - /* Yes this is necessary to accurately calculate bounding boxes - * with the two fill-conventions we support. GL (normally) ends - * up needing a bottom-left fill convention, which requires - * slightly different rounding. - */ - int adj = (setup->pixel_offset != 0) ? 1 : 0; - - minx = (MIN3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; - maxx = (MAX3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; - miny = (MIN3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; - maxy = (MAX3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; - } - - if (setup->scissor_test) { - minx = MAX2(minx, setup->scissor.current.minx); - maxx = MIN2(maxx, setup->scissor.current.maxx); - miny = MAX2(miny, setup->scissor.current.miny); - maxy = MIN2(maxy, setup->scissor.current.maxy); - } - else { - minx = MAX2(minx, 0); - miny = MAX2(miny, 0); - maxx = MIN2(maxx, scene->fb.width); - maxy = MIN2(maxy, scene->fb.height); - } - - - if (miny >= maxy || minx >= maxx) { - lp_scene_putback_data( scene, tri_bytes ); - LP_COUNT(nr_culled_tris); - return; - } /* */ - info.pixel_offset = setup->pixel_offset; - info.v0 = v1; - info.v1 = v2; - info.v2 = v3; - info.dx01 = info.v0[0][0] - info.v1[0][0]; - info.dx20 = info.v2[0][0] - info.v0[0][0]; - info.dy01 = info.v0[0][1] - info.v1[0][1]; - info.dy20 = info.v2[0][1] - info.v0[0][1]; - info.oneoverarea = 1.0f / (info.dx01 * info.dy20 - info.dx20 * info.dy01); + dx01 = v0[0][0] - v1[0][0]; + dy01 = v0[0][1] - v1[0][1]; + dx20 = v2[0][0] - v0[0][0]; + dy20 = v2[0][1] - v0[0][1]; + oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01); + + info.v0 = v0; + info.v1 = v1; + info.v2 = v2; info.frontfacing = frontfacing; + info.x0_center = v0[0][0] - setup->pixel_offset; + info.y0_center = v0[0][1] - setup->pixel_offset; + info.dx01_ooa = dx01 * oneoverarea; + info.dx20_ooa = dx20 * oneoverarea; + info.dy01_ooa = dy01 * oneoverarea; + info.dy20_ooa = dy20 * oneoverarea; /* Setup parameter interpolants: */ - setup_tri_coefficients( setup, tri, &info ); + lp_setup_tri_coef( setup, &tri->inputs, &info ); tri->inputs.facing = frontfacing ? 1.0F : -1.0F; tri->inputs.state = setup->fs.stored; @@ -541,7 +326,7 @@ do_triangle_ccw(struct lp_setup_context *setup, /* half-edge constants, will be interated over the whole render * target. */ - plane->c = plane->dcdx * info.x[i] - plane->dcdy * info.y[i]; + plane->c = plane->dcdx * x[i] - plane->dcdy * y[i]; /* correct for top-left vs. bottom-left fill convention. * @@ -612,29 +397,43 @@ do_triangle_ccw(struct lp_setup_context *setup, if (nr_planes == 7) { tri->plane[3].dcdx = -1; tri->plane[3].dcdy = 0; - tri->plane[3].c = 1-minx; + tri->plane[3].c = 1-bbox.x0; tri->plane[3].ei = 0; tri->plane[3].eo = 1; tri->plane[4].dcdx = 1; tri->plane[4].dcdy = 0; - tri->plane[4].c = maxx; + tri->plane[4].c = bbox.x1+1; tri->plane[4].ei = -1; tri->plane[4].eo = 0; tri->plane[5].dcdx = 0; tri->plane[5].dcdy = 1; - tri->plane[5].c = 1-miny; + tri->plane[5].c = 1-bbox.y0; tri->plane[5].ei = 0; tri->plane[5].eo = 1; tri->plane[6].dcdx = 0; tri->plane[6].dcdy = -1; - tri->plane[6].c = maxy; + tri->plane[6].c = bbox.y1+1; tri->plane[6].ei = -1; tri->plane[6].eo = 0; } + lp_setup_bin_triangle( setup, tri, &bbox, nr_planes ); +} + + +void +lp_setup_bin_triangle( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + const struct u_rect *bbox, + int nr_planes ) +{ + struct lp_scene *scene = setup->scene; + struct lp_fragment_shader_variant *variant = setup->fs.current.variant; + int ix0, ix1, iy0, iy1; + int i; /* * All fields of 'tri' are now set. The remaining code here is @@ -643,10 +442,30 @@ do_triangle_ccw(struct lp_setup_context *setup, /* Convert to tile coordinates, and inclusive ranges: */ - ix0 = minx / TILE_SIZE; - iy0 = miny / TILE_SIZE; - ix1 = (maxx-1) / TILE_SIZE; - iy1 = (maxy-1) / TILE_SIZE; + if (nr_planes == 3) { + int ix0 = bbox->x0 / 16; + int iy0 = bbox->y0 / 16; + int ix1 = bbox->x1 / 16; + int iy1 = bbox->y1 / 16; + + if (iy0 == iy1 && ix0 == ix1) + { + + /* Triangle is contained in a single 16x16 block: + */ + int mask = (ix0 & 3) | ((iy0 & 3) << 4); + + lp_scene_bin_command( scene, ix0/4, iy0/4, + lp_rast_triangle_3_16, + lp_rast_arg_triangle(tri, mask) ); + return; + } + } + + ix0 = bbox->x0 / TILE_SIZE; + iy0 = bbox->y0 / TILE_SIZE; + ix1 = bbox->x1 / TILE_SIZE; + iy1 = bbox->y1 / TILE_SIZE; /* * Clamp to framebuffer size @@ -799,9 +618,10 @@ static void triangle_both( struct lp_setup_context *setup, const float fy = v1[0][1] - v2[0][1]; /* det = cross(e,f).z */ - if (ex * fy - ey * fx < 0.0f) + const float det = ex * fy - ey * fx; + if (det < 0.0f) triangle_ccw( setup, v0, v1, v2 ); - else + else if (det > 0.0f) triangle_cw( setup, v0, v1, v2 ); } diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 77bec4640b..edd723f65f 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -74,6 +74,15 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) vs_index = draw_find_shader_output(llvmpipe->draw, lpfs->info.input_semantic_name[i], lpfs->info.input_semantic_index[i]); + if (vs_index < 0) { + /* + * This can happen with sprite coordinates - the vertex + * shader doesn't need to provide an output as we generate + * them internally. However, lets keep pretending that there + * is something there to not confuse other code. + */ + vs_index = 0; + } /* This can be pre-computed, except for flatshade: */ @@ -125,6 +134,17 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) inputs[i].src_index = vinfo->num_attribs; draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index); } + + /* Figure out if we need pointsize as well. + */ + vs_index = draw_find_shader_output(llvmpipe->draw, + TGSI_SEMANTIC_PSIZE, 0); + + if (vs_index > 0) { + llvmpipe->psize_slot = vinfo->num_attribs; + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index); + } + llvmpipe->num_inputs = lpfs->info.num_inputs; draw_compute_vertex_size(vinfo); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index dbca49a2ef..33c1a49efe 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -808,7 +808,7 @@ generate_variant(struct llvmpipe_context *lp, variant->list_item_local.base = variant; variant->no = shader->variants_created++; - memcpy(&variant->key, key, sizeof *key); + memcpy(&variant->key, key, shader->variant_key_size); if (gallivm_debug & GALLIVM_DEBUG_IR) { debug_printf("llvmpipe: Creating fragment shader #%u variant #%u:\n", @@ -840,6 +840,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { struct lp_fragment_shader *shader; + int nr_samplers; shader = CALLOC_STRUCT(lp_fragment_shader); if (!shader) @@ -854,6 +855,11 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, /* we need to keep a local copy of the tokens */ shader->base.tokens = tgsi_dup_tokens(templ->tokens); + nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1; + + shader->variant_key_size = Offset(struct lp_fragment_shader_variant_key, + sampler[nr_samplers]); + if (LP_DEBUG & DEBUG_TGSI) { unsigned attrib; debug_printf("llvmpipe: Create fragment shader #%u %p:\n", shader->no, (void *) shader); @@ -921,7 +927,6 @@ static void llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - struct pipe_fence_handle *fence = NULL; struct lp_fragment_shader *shader = fs; struct lp_fs_variant_list_item *li; @@ -934,12 +939,7 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) * Flushing alone might not sufficient we need to wait on it too. */ - llvmpipe_flush(pipe, 0, &fence); - - if (fence) { - pipe->screen->fence_finish(pipe->screen, fence, 0); - pipe->screen->fence_reference(pipe->screen, &fence, NULL); - } + llvmpipe_finish(pipe, __FUNCTION__); li = first_elem(&shader->variants); while(!at_end(&shader->variants, li)) { @@ -1027,7 +1027,7 @@ make_variant_key(struct llvmpipe_context *lp, { unsigned i; - memset(key, 0, sizeof *key); + memset(key, 0, shader->variant_key_size); if (lp->framebuffer.zsbuf) { if (lp->depth_stencil->depth.enabled) { @@ -1097,9 +1097,17 @@ make_variant_key(struct llvmpipe_context *lp, } } - for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) - if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) - lp_sampler_static_state(&key->sampler[i], lp->fragment_sampler_views[i], lp->sampler[i]); + /* This value will be the same for all the variants of a given shader: + */ + key->nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1; + + for(i = 0; i < key->nr_samplers; ++i) { + if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { + lp_sampler_static_state(&key->sampler[i], + lp->fragment_sampler_views[i], + lp->sampler[i]); + } + } } /** @@ -1118,7 +1126,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) li = first_elem(&shader->variants); while(!at_end(&shader->variants, li)) { - if(memcmp(&li->base->key, &key, sizeof key) == 0) { + if(memcmp(&li->base->key, &key, shader->variant_key_size) == 0) { variant = li->base; break; } @@ -1134,19 +1142,14 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) unsigned i; if (lp->nr_fs_variants >= LP_MAX_SHADER_VARIANTS) { struct pipe_context *pipe = &lp->pipe; - struct pipe_fence_handle *fence = NULL; /* * XXX: we need to flush the context until we have some sort of reference * counting in fragment shaders as they may still be binned * Flushing alone might not be sufficient we need to wait on it too. */ - llvmpipe_flush(pipe, 0, &fence); + llvmpipe_finish(pipe, __FUNCTION__); - if (fence) { - pipe->screen->fence_finish(pipe->screen, fence, 0); - pipe->screen->fence_reference(pipe->screen, &fence, NULL); - } for (i = 0; i < LP_MAX_SHADER_VARIANTS / 4; i++) { struct lp_fs_variant_list_item *item = last_elem(&lp->fs_variants_list); remove_shader_variant(lp, item->base); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h index 37900fc544..33c480010d 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.h +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h @@ -53,13 +53,10 @@ struct lp_fragment_shader_variant_key struct pipe_blend_state blend; enum pipe_format zsbuf_format; unsigned nr_cbufs:8; + unsigned nr_samplers:8; /* actually derivable from just the shader */ unsigned flatshade:1; unsigned occlusion_count:1; - struct { - ubyte colormask; - } cbuf_blend[PIPE_MAX_COLOR_BUFS]; - struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS]; }; @@ -97,6 +94,7 @@ struct lp_fragment_shader struct lp_fs_variant_list_item variants; /* For debugging/profiling purposes */ + unsigned variant_key_size; unsigned no; unsigned variants_created; unsigned variants_cached; diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index afd3e0b21c..0bad7320f3 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -73,7 +73,13 @@ llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, void *handle) llvmpipe->rasterizer->gl_rasterization_rules); lp_setup_set_flatshade_first( llvmpipe->setup, llvmpipe->rasterizer->flatshade_first); - } + lp_setup_set_line_state( llvmpipe->setup, + llvmpipe->rasterizer->line_width); + lp_setup_set_point_state( llvmpipe->setup, + llvmpipe->rasterizer->point_size, + llvmpipe->rasterizer->point_size_per_vertex, + llvmpipe->rasterizer->sprite_coord_enable); + } llvmpipe->dirty |= LP_NEW_RASTERIZER; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c index d86e66b4fb..fb29423dd3 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c @@ -100,7 +100,7 @@ llvmpipe_set_index_buffer(struct pipe_context *pipe, else memset(&llvmpipe->index_buffer, 0, sizeof(llvmpipe->index_buffer)); - /* TODO make this more like a state */ + draw_set_index_buffer(llvmpipe->draw, ib); } void diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c index f761e82850..63ddc669c2 100644 --- a/src/gallium/drivers/llvmpipe/lp_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_surface.c @@ -68,14 +68,16 @@ lp_resource_copy(struct pipe_context *pipe, 0, /* flush_flags */ FALSE, /* read_only */ TRUE, /* cpu_access */ - FALSE); /* do_not_block */ + FALSE, + "blit dst"); /* do_not_block */ llvmpipe_flush_resource(pipe, src, subsrc.face, subsrc.level, 0, /* flush_flags */ TRUE, /* read_only */ TRUE, /* cpu_access */ - FALSE); /* do_not_block */ + FALSE, + "blit src"); /* do_not_block */ /* printf("surface copy from %u to %u: %u,%u to %u,%u %u x %u\n", diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 25112c10a6..5832ea2744 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -67,6 +67,7 @@ resource_is_texture(const struct pipe_resource *resource) return FALSE; case PIPE_TEXTURE_1D: case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: case PIPE_TEXTURE_3D: case PIPE_TEXTURE_CUBE: return TRUE; @@ -583,7 +584,8 @@ llvmpipe_get_transfer(struct pipe_context *pipe, 0, /* flush_flags */ read_only, TRUE, /* cpu_access */ - do_not_block)) { + do_not_block, + "transfer dest")) { /* * It would have blocked, but state tracker requested no to. */ |