diff options
Diffstat (limited to 'src/gallium/drivers/llvmpipe')
26 files changed, 1043 insertions, 815 deletions
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 3173251437..41ac1cee72 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -37,6 +37,7 @@ C_SOURCES = \ lp_surface.c \ lp_tex_sample_llvm.c \ lp_texture.c \ + lp_tile_image.c \ lp_tile_soa.c CPP_SOURCES = \ diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index 72d9f39658..bf4c9a5727 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -49,8 +49,6 @@ To do (probably by this order): Requirements ============ - - Linux - - A x86 or amd64 processor. 64bit mode is preferred. Support for sse2 is strongly encouraged. Support for ssse3, and sse4.1 will @@ -86,7 +84,7 @@ Building To build everything on Linux invoke scons as: - scons debug=yes statetrackers=mesa drivers=trace,llvmpipe winsys=xlib dri=false + scons debug=yes statetrackers=mesa drivers=llvmpipe winsys=xlib dri=false Alternatively, you can build it with GNU make, if you prefer, by invoking it as @@ -96,7 +94,7 @@ but the rest of these instructions assume that scons is used. For windows is everything the except except the winsys: - scons debug=yes statetrackers=mesa drivers=trace,llvmpipe winsys=gdi dri=false + scons debug=yes statetrackers=mesa drivers=llvmpipe winsys=gdi dri=false Using ===== diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index a39283e5e8..13c1a13e87 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -18,6 +18,13 @@ env.CodeGenerate( command = 'python $SCRIPT $SOURCE > $TARGET' ) +# XXX: Our dependency scanner only finds depended modules in relative dirs. +env.Depends('lp_tile_soa.c', [ + '#src/gallium/auxiliary/util/u_format_parse.py', + '#src/gallium/auxiliary/util/u_format_pack.py', + '#src/gallium/auxiliary/util/u_format_access.py', +]) + llvmpipe = env.ConvenienceLibrary( target = 'llvmpipe', source = [ @@ -52,6 +59,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_surface.c', 'lp_tex_sample_llvm.c', 'lp_texture.c', + 'lp_tile_image.c', 'lp_tile_soa.c', ]) diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index ccd1cf8eec..d94efec16a 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -186,7 +186,7 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) if (debug_get_bool_option( "LP_NO_RAST", FALSE )) llvmpipe->no_rast = TRUE; - llvmpipe->setup = lp_setup_create( screen, + llvmpipe->setup = lp_setup_create( &llvmpipe->pipe, llvmpipe->draw ); if (!llvmpipe->setup) goto fail; diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index d3d7e26882..08c8f93794 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -37,6 +37,7 @@ #include "util/u_memory.h" #include "util/u_cpu_detect.h" +#include "gallivm/lp_bld_init.h" #include "lp_debug.h" #include "lp_screen.h" #include "gallivm/lp_bld_intr.h" @@ -50,12 +51,16 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) /* struct lp_jit_texture */ { - LLVMTypeRef elem_types[4]; + LLVMTypeRef elem_types[6]; elem_types[LP_JIT_TEXTURE_WIDTH] = LLVMInt32Type(); elem_types[LP_JIT_TEXTURE_HEIGHT] = LLVMInt32Type(); + elem_types[LP_JIT_TEXTURE_DEPTH] = LLVMInt32Type(); + elem_types[LP_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type(); elem_types[LP_JIT_TEXTURE_STRIDE] = LLVMInt32Type(); - elem_types[LP_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0); + elem_types[LP_JIT_TEXTURE_DATA] = + LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0), + LP_MAX_TEXTURE_2D_LEVELS); texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); @@ -65,6 +70,12 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, height, screen->target, texture_type, LP_JIT_TEXTURE_HEIGHT); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, depth, + screen->target, texture_type, + LP_JIT_TEXTURE_DEPTH); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, last_level, + screen->target, texture_type, + LP_JIT_TEXTURE_LAST_LEVEL); LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, stride, screen->target, texture_type, LP_JIT_TEXTURE_STRIDE); @@ -148,8 +159,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) util_cpu_caps.has_sse4_1 = 0; #endif - LLVMLinkInJIT(); - LLVMInitializeNativeTarget(); + lp_build_init(); screen->module = LLVMModuleCreateWithName("llvmpipe"); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 8df3015d4b..5cc7a12c03 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -39,6 +39,7 @@ #include "gallivm/lp_bld_struct.h" #include "pipe/p_state.h" +#include "lp_texture.h" struct llvmpipe_screen; @@ -48,14 +49,18 @@ struct lp_jit_texture { uint32_t width; uint32_t height; + uint32_t depth; + uint32_t last_level; uint32_t stride; - const void *data; + const void *data[LP_MAX_TEXTURE_2D_LEVELS]; }; enum { LP_JIT_TEXTURE_WIDTH = 0, LP_JIT_TEXTURE_HEIGHT, + LP_JIT_TEXTURE_DEPTH, + LP_JIT_TEXTURE_LAST_LEVEL, LP_JIT_TEXTURE_STRIDE, LP_JIT_TEXTURE_DATA }; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 6dbcb3c9b3..dd9a8e8856 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -42,128 +42,73 @@ #include "lp_scene.h" -/** - * Begin the rasterization phase. - * Map the framebuffer surfaces. Initialize the 'rast' state. +/* Begin rasterizing a scene: */ static boolean lp_rast_begin( struct lp_rasterizer *rast, - const struct pipe_framebuffer_state *fb, - boolean write_color, - boolean write_zstencil ) + struct lp_scene *scene ) { - struct pipe_screen *screen = rast->screen; - struct pipe_surface *cbuf, *zsbuf; + const struct pipe_framebuffer_state *fb = &scene->fb; + boolean write_color = fb->nr_cbufs != 0; + boolean write_zstencil = fb->zsbuf != NULL; int i; - LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + rast->curr_scene = scene; - util_copy_framebuffer_state(&rast->state.fb, fb); + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + rast->state.nr_cbufs = scene->fb.nr_cbufs; rast->state.write_zstencil = write_zstencil; rast->state.write_color = write_color; - - rast->check_for_clipped_tiles = (fb->width % TILE_SIZE != 0 || - fb->height % TILE_SIZE != 0); - - for (i = 0; i < rast->state.fb.nr_cbufs; i++) { - cbuf = rast->state.fb.cbufs[i]; - if (cbuf) { - rast->cbuf_transfer[i] = screen->get_tex_transfer(rast->screen, - cbuf->texture, - cbuf->face, - cbuf->level, - cbuf->zslice, - PIPE_TRANSFER_READ_WRITE, - 0, 0, - cbuf->width, - cbuf->height); - if (!rast->cbuf_transfer[i]) - goto fail; - - rast->cbuf_map[i] = screen->transfer_map(rast->screen, - rast->cbuf_transfer[i]); - if (!rast->cbuf_map[i]) - goto fail; - } + for (i = 0; i < rast->state.nr_cbufs; i++) { + rast->cbuf[i].map = scene->cbuf_map[i]; + rast->cbuf[i].format = scene->cbuf_transfer[i]->texture->format; + rast->cbuf[i].width = scene->cbuf_transfer[i]->width; + rast->cbuf[i].height = scene->cbuf_transfer[i]->height; + rast->cbuf[i].stride = scene->cbuf_transfer[i]->stride; } - zsbuf = rast->state.fb.zsbuf; - if (zsbuf) { - rast->zsbuf_transfer = screen->get_tex_transfer(rast->screen, - zsbuf->texture, - zsbuf->face, - zsbuf->level, - zsbuf->zslice, - PIPE_TRANSFER_READ_WRITE, - 0, 0, - zsbuf->width, - zsbuf->height); - if (!rast->zsbuf_transfer) - goto fail; - - rast->zsbuf_map = screen->transfer_map(rast->screen, - rast->zsbuf_transfer); - if (!rast->zsbuf_map) - goto fail; + if (write_zstencil) { + rast->zsbuf.map = scene->zsbuf_map; + rast->zsbuf.stride = scene->zsbuf_transfer->stride; + rast->zsbuf.blocksize = + util_format_get_blocksize(scene->zsbuf_transfer->texture->format); } + lp_scene_bin_iter_begin( scene ); + return TRUE; - -fail: - /* Unmap and release transfers? - */ - return FALSE; } -/** - * Finish the rasterization phase. - * Unmap framebuffer surfaces. - */ static void lp_rast_end( struct lp_rasterizer *rast ) { - struct pipe_screen *screen = rast->screen; - unsigned i; - - for (i = 0; i < rast->state.fb.nr_cbufs; i++) { - if (rast->cbuf_map[i]) - screen->transfer_unmap(screen, rast->cbuf_transfer[i]); - - if (rast->cbuf_transfer[i]) - screen->tex_transfer_destroy(rast->cbuf_transfer[i]); - - rast->cbuf_transfer[i] = NULL; - rast->cbuf_map[i] = NULL; - } + int i; - if (rast->zsbuf_map) - screen->transfer_unmap(screen, rast->zsbuf_transfer); + lp_scene_reset( rast->curr_scene ); - if (rast->zsbuf_transfer) - screen->tex_transfer_destroy(rast->zsbuf_transfer); + for (i = 0; i < rast->state.nr_cbufs; i++) + rast->cbuf[i].map = NULL; - rast->zsbuf_transfer = NULL; - rast->zsbuf_map = NULL; + rast->zsbuf.map = NULL; + rast->curr_scene = NULL; } - /** * Begining rasterization of a tile. * \param x window X position of the tile, in pixels * \param y window Y position of the tile, in pixels */ static void -lp_rast_start_tile( struct lp_rasterizer *rast, - unsigned thread_index, - unsigned x, unsigned y ) +lp_rast_start_tile(struct lp_rasterizer_task *task, + unsigned x, unsigned y) { LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); - rast->tasks[thread_index].x = x; - rast->tasks[thread_index].y = y; + task->x = x; + task->y = y; } @@ -171,12 +116,13 @@ lp_rast_start_tile( struct lp_rasterizer *rast, * Clear the rasterizer's current color tile. * This is a bin command called during bin processing. */ -void lp_rast_clear_color( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg ) +void +lp_rast_clear_color(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { + struct lp_rasterizer *rast = task->rast; const uint8_t *clear_color = arg.clear_color; - uint8_t **color_tile = rast->tasks[thread_index].tile.color; + uint8_t **color_tile = task->tile.color; unsigned i; LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__, @@ -189,7 +135,7 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, clear_color[1] == clear_color[2] && clear_color[2] == clear_color[3]) { /* clear to grayscale value {x, x, x, x} */ - for (i = 0; i < rast->state.fb.nr_cbufs; i++) { + for (i = 0; i < rast->state.nr_cbufs; i++) { memset(color_tile[i], clear_color[0], TILE_SIZE * TILE_SIZE * 4); } } @@ -200,7 +146,7 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, * works. */ const unsigned chunk = TILE_SIZE / 4; - for (i = 0; i < rast->state.fb.nr_cbufs; i++) { + for (i = 0; i < rast->state.nr_cbufs; i++) { uint8_t *c = color_tile[i]; unsigned j; for (j = 0; j < 4 * TILE_SIZE; j++) { @@ -225,24 +171,24 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, * Clear the rasterizer's current z/stencil tile. * This is a bin command called during bin processing. */ -void lp_rast_clear_zstencil( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg) +void +lp_rast_clear_zstencil(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { - struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + struct lp_rasterizer *rast = task->rast; const unsigned tile_x = task->x; const unsigned tile_y = task->y; - const unsigned height = TILE_SIZE/TILE_VECTOR_HEIGHT; - const unsigned width = TILE_SIZE*TILE_VECTOR_HEIGHT; - unsigned block_size = util_format_get_blocksize(rast->zsbuf_transfer->texture->format); + const unsigned height = TILE_SIZE / TILE_VECTOR_HEIGHT; + const unsigned width = TILE_SIZE * TILE_VECTOR_HEIGHT; + unsigned block_size = rast->zsbuf.blocksize; uint8_t *dst; - unsigned dst_stride = rast->zsbuf_transfer->stride*TILE_VECTOR_HEIGHT; + unsigned dst_stride = rast->zsbuf.stride * TILE_VECTOR_HEIGHT; unsigned i, j; LP_DBG(DEBUG_RAST, "%s 0x%x\n", __FUNCTION__, arg.clear_zstencil); - assert(rast->zsbuf_map); - if (!rast->zsbuf_map) + assert(rast->zsbuf.map); + if (!rast->zsbuf.map) return; LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); @@ -278,8 +224,8 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, } break; default: - assert(0); - break; + assert(0); + break; } } @@ -288,55 +234,42 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, * Load tile color from the framebuffer surface. * This is a bin command called during bin processing. */ -void lp_rast_load_color( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg) +void +lp_rast_load_color(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { - struct lp_rasterizer_task *task = &rast->tasks[thread_index]; - const unsigned x = task->x; - const unsigned y = task->y; + struct lp_rasterizer *rast = task->rast; + const unsigned x = task->x, y = task->y; unsigned i; LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y); - for (i = 0; i < rast->state.fb.nr_cbufs; i++) { - struct pipe_transfer *transfer = rast->cbuf_transfer[i]; - int w = TILE_SIZE; - int h = TILE_SIZE; - - if (x >= transfer->width) - continue; - - if (y >= transfer->height) + for (i = 0; i < rast->state.nr_cbufs; i++) { + if (x >= rast->cbuf[i].width || y >= rast->cbuf[i].height) continue; - assert(w >= 0); - assert(h >= 0); - assert(w <= TILE_SIZE); - assert(h <= TILE_SIZE); - - lp_tile_read_4ub(transfer->texture->format, + lp_tile_read_4ub(rast->cbuf[i].format, task->tile.color[i], - rast->cbuf_map[i], - transfer->stride, + rast->cbuf[i].map, + rast->cbuf[i].stride, x, y, - w, h); + TILE_SIZE, TILE_SIZE); LP_COUNT(nr_color_tile_load); } } -void lp_rast_set_state( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg ) +void +lp_rast_set_state(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { const struct lp_rast_state *state = arg.set_state; LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state); /* just set the current state pointer for this rasterizer */ - rast->tasks[thread_index].current_state = state; + task->current_state = state; } @@ -346,16 +279,15 @@ void lp_rast_set_state( struct lp_rasterizer *rast, * completely contained inside a triangle. * This is a bin command called during bin processing. */ -void lp_rast_shade_tile( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg ) +void +lp_rast_shade_tile(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { - struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + struct lp_rasterizer *rast = task->rast; const struct lp_rast_state *state = task->current_state; struct lp_rast_tile *tile = &task->tile; const struct lp_rast_shader_inputs *inputs = arg.shade_tile; - const unsigned tile_x = task->x; - const unsigned tile_y = task->y; + const unsigned tile_x = task->x, tile_y = task->y; unsigned x, y; LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); @@ -371,7 +303,7 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, block_offset = ((y / 4) * (16 * 16) + (x / 4) * 16); /* color buffer */ - for (i = 0; i < rast->state.fb.nr_cbufs; i++) + for (i = 0; i < rast->state.nr_cbufs; i++) color[i] = tile->color[i] + 4 * block_offset; /* depth buffer */ @@ -396,14 +328,13 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, * Compute shading for a 4x4 block of pixels. * This is a bin command called during bin processing. */ -void lp_rast_shade_quads( struct lp_rasterizer *rast, - unsigned thread_index, +void lp_rast_shade_quads( struct lp_rasterizer_task *task, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, int32_t c1, int32_t c2, int32_t c3) { - struct lp_rasterizer_task *task = &rast->tasks[thread_index]; const struct lp_rast_state *state = task->current_state; + struct lp_rasterizer *rast = task->rast; struct lp_rast_tile *tile = &task->tile; uint8_t *color[PIPE_MAX_COLOR_BUFS]; void *depth; @@ -411,7 +342,6 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, unsigned ix, iy; int block_offset; -#ifdef DEBUG assert(state); /* Sanity checks */ @@ -420,7 +350,6 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, assert((x % 4) == 0); assert((y % 4) == 0); -#endif ix = x % TILE_SIZE; iy = y % TILE_SIZE; @@ -429,22 +358,19 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, block_offset = ((iy / 4) * (16 * 16) + (ix / 4) * 16); /* color buffer */ - for (i = 0; i < rast->state.fb.nr_cbufs; i++) + for (i = 0; i < rast->state.nr_cbufs; i++) color[i] = tile->color[i] + 4 * block_offset; /* depth buffer */ depth = lp_rast_depth_pointer(rast, x, y); - -#ifdef DEBUG assert(lp_check_alignment(tile->color[0], 16)); assert(lp_check_alignment(state->jit_context.blend_color, 16)); assert(lp_check_alignment(inputs->step[0], 16)); assert(lp_check_alignment(inputs->step[1], 16)); assert(lp_check_alignment(inputs->step[2], 16)); -#endif /* run shader */ state->jit_function[1]( &state->jit_context, @@ -515,66 +441,48 @@ outline_subtiles(uint8_t *tile) /** * Write the rasterizer's color tile to the framebuffer. */ -static void lp_rast_store_color( struct lp_rasterizer *rast, - unsigned thread_index) +static void +lp_rast_store_color(struct lp_rasterizer_task *task) { - struct lp_rasterizer_task *task = &rast->tasks[thread_index]; - const unsigned x = task->x; - const unsigned y = task->y; + struct lp_rasterizer *rast = task->rast; + const unsigned x = task->x, y = task->y; unsigned i; - for (i = 0; i < rast->state.fb.nr_cbufs; i++) { - struct pipe_transfer *transfer = rast->cbuf_transfer[i]; - int w = TILE_SIZE; - int h = TILE_SIZE; - - if (x >= transfer->width) + for (i = 0; i < rast->state.nr_cbufs; i++) { + if (x >= rast->cbuf[i].width) continue; - if (y >= transfer->height) + if (y >= rast->cbuf[i].height) continue; - LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__, - thread_index, x, y, w, h); + LP_DBG(DEBUG_RAST, "%s [%u] %d,%d\n", __FUNCTION__, + task->thread_index, x, y); if (LP_DEBUG & DEBUG_SHOW_SUBTILES) outline_subtiles(task->tile.color[i]); else if (LP_DEBUG & DEBUG_SHOW_TILES) outline_tile(task->tile.color[i]); - lp_tile_write_4ub(transfer->texture->format, + lp_tile_write_4ub(rast->cbuf[i].format, task->tile.color[i], - rast->cbuf_map[i], - transfer->stride, + rast->cbuf[i].map, + rast->cbuf[i].stride, x, y, - w, h); + TILE_SIZE, TILE_SIZE); LP_COUNT(nr_color_tile_store); } } -/** - * Write the rasterizer's tiles to the framebuffer. - */ -static void -lp_rast_end_tile( struct lp_rasterizer *rast, - unsigned thread_index ) -{ - LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); - - if (rast->state.write_color) - lp_rast_store_color(rast, thread_index); -} - /** * Signal on a fence. This is called during bin execution/rasterization. * Called per thread. */ -void lp_rast_fence( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg ) +void +lp_rast_fence(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { struct lp_fence *fence = arg.fence; @@ -592,20 +500,6 @@ void lp_rast_fence( struct lp_rasterizer *rast, } -/** - * When all the threads are done rasterizing a scene, one thread will - * call this function to reset the scene and put it onto the empty queue. - */ -static void -release_scene( struct lp_rasterizer *rast, - struct lp_scene *scene ) -{ - util_unreference_framebuffer_state( &scene->fb ); - - lp_scene_reset( scene ); - lp_scene_enqueue( rast->empty_scenes, scene ); - rast->curr_scene = NULL; -} /** @@ -615,25 +509,31 @@ release_scene( struct lp_rasterizer *rast, * Called per thread. */ static void -rasterize_bin( struct lp_rasterizer *rast, - unsigned thread_index, - const struct cmd_bin *bin, - int x, int y) +rasterize_bin(struct lp_rasterizer_task *task, + const struct cmd_bin *bin, + int x, int y) { const struct cmd_block_list *commands = &bin->commands; struct cmd_block *block; unsigned k; - lp_rast_start_tile( rast, thread_index, x, y ); + lp_rast_start_tile( task, x * TILE_SIZE, y * TILE_SIZE ); /* simply execute each of the commands in the block list */ for (block = commands->head; block; block = block->next) { for (k = 0; k < block->count; k++) { - block->cmd[k]( rast, thread_index, block->arg[k] ); + block->cmd[k]( task, block->arg[k] ); } } - lp_rast_end_tile( rast, thread_index ); + /* Write the rasterizer's tiles to the framebuffer. + */ + if (task->rast->state.write_color) + lp_rast_store_color(task); + + /* Free data for this bin. + */ + lp_scene_bin_reset( task->rast->curr_scene, x, y); } @@ -717,10 +617,8 @@ is_empty_bin( const struct cmd_bin *bin ) * Called per thread. */ static void -rasterize_scene( struct lp_rasterizer *rast, - unsigned thread_index, - struct lp_scene *scene, - bool write_depth ) +rasterize_scene(struct lp_rasterizer_task *task, + struct lp_scene *scene) { /* loop over scene bins, rasterize each */ #if 0 @@ -728,9 +626,8 @@ rasterize_scene( struct lp_rasterizer *rast, unsigned i, j; for (i = 0; i < scene->tiles_x; i++) { for (j = 0; j < scene->tiles_y; j++) { - struct cmd_bin *bin = lp_get_bin(scene, i, j); - rasterize_bin( rast, thread_index, - bin, i * TILE_SIZE, j * TILE_SIZE ); + struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); + rasterize_bin(task, bin, i, j); } } } @@ -742,7 +639,7 @@ rasterize_scene( struct lp_rasterizer *rast, assert(scene); while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) { if (!is_empty_bin( bin )) - rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE); + rasterize_bin(task, bin, x, y); } } #endif @@ -753,44 +650,20 @@ rasterize_scene( struct lp_rasterizer *rast, * Called by setup module when it has something for us to render. */ void -lp_rasterize_scene( struct lp_rasterizer *rast, - struct lp_scene *scene, - const struct pipe_framebuffer_state *fb, - bool write_depth ) +lp_rast_queue_scene( struct lp_rasterizer *rast, + struct lp_scene *scene) { - boolean debug = false; - LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - if (debug) { - unsigned x, y; - debug_printf("rasterize scene:\n"); - debug_printf(" data size: %u\n", lp_scene_data_size(scene)); - for (y = 0; y < scene->tiles_y; y++) { - for (x = 0; x < scene->tiles_x; x++) { - debug_printf(" bin %u, %u size: %u\n", x, y, - lp_scene_bin_size(scene, x, y)); - } - } - } - - /* save framebuffer state in the bin */ - util_copy_framebuffer_state(&scene->fb, fb); - scene->write_depth = write_depth; - if (rast->num_threads == 0) { /* no threading */ - lp_rast_begin( rast, fb, - fb->nr_cbufs != 0, /* always write color if cbufs present */ - fb->zsbuf != NULL && write_depth ); + lp_rast_begin( rast, scene ); - lp_scene_bin_iter_begin( scene ); - rasterize_scene( rast, 0, scene, write_depth ); + rasterize_scene( &rast->tasks[0], scene ); - release_scene( rast, scene ); - - lp_rast_end( rast ); + lp_scene_reset( scene ); + rast->curr_scene = NULL; } else { /* threaded rendering! */ @@ -802,14 +675,26 @@ lp_rasterize_scene( struct lp_rasterizer *rast, for (i = 0; i < rast->num_threads; i++) { pipe_semaphore_signal(&rast->tasks[i].work_ready); } + } + + LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); +} + + +void +lp_rast_finish( struct lp_rasterizer *rast ) +{ + if (rast->num_threads == 0) { + /* nothing to do */ + } + else { + int i; /* wait for work to complete */ for (i = 0; i < rast->num_threads; i++) { pipe_semaphore_wait(&rast->tasks[i].work_done); } } - - LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); } @@ -832,24 +717,16 @@ static PIPE_THREAD_ROUTINE( thread_func, init_data ) debug_printf("thread %d waiting for work\n", task->thread_index); pipe_semaphore_wait(&task->work_ready); + if (rast->exit_flag) + break; + if (task->thread_index == 0) { /* thread[0]: * - get next scene to rasterize * - map the framebuffer surfaces */ - const struct pipe_framebuffer_state *fb; - boolean write_depth; - - rast->curr_scene = lp_scene_dequeue( rast->full_scenes, TRUE ); - - lp_scene_bin_iter_begin( rast->curr_scene ); - - fb = &rast->curr_scene->fb; - write_depth = rast->curr_scene->write_depth; - - lp_rast_begin( rast, fb, - fb->nr_cbufs != 0, - fb->zsbuf != NULL && write_depth ); + lp_rast_begin( rast, + lp_scene_dequeue( rast->full_scenes, TRUE ) ); } /* Wait for all threads to get here so that threads[1+] don't @@ -860,26 +737,23 @@ static PIPE_THREAD_ROUTINE( thread_func, init_data ) /* do work */ if (debug) debug_printf("thread %d doing work\n", task->thread_index); - rasterize_scene(rast, - task->thread_index, - rast->curr_scene, - rast->curr_scene->write_depth); + + rasterize_scene(task, + rast->curr_scene); /* wait for all threads to finish with this scene */ pipe_barrier_wait( &rast->barrier ); + /* XXX: shouldn't be necessary: + */ if (task->thread_index == 0) { - /* thread[0]: - * - release the scene object - * - unmap the framebuffer surfaces - */ - release_scene( rast, rast->curr_scene ); lp_rast_end( rast ); } /* signal done with work */ if (debug) debug_printf("thread %d done working\n", task->thread_index); + pipe_semaphore_signal(&task->work_done); } @@ -922,7 +796,7 @@ create_rast_threads(struct lp_rasterizer *rast) * processing them. */ struct lp_rasterizer * -lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty ) +lp_rast_create( void ) { struct lp_rasterizer *rast; unsigned i, cbuf; @@ -931,9 +805,6 @@ lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty ) if(!rast) return NULL; - rast->screen = screen; - - rast->empty_scenes = empty; rast->full_scenes = lp_scene_queue_create(); for (i = 0; i < Elements(rast->tasks); i++) { @@ -961,13 +832,31 @@ void lp_rast_destroy( struct lp_rasterizer *rast ) { unsigned i, cbuf; - util_unreference_framebuffer_state(&rast->state.fb); - for (i = 0; i < Elements(rast->tasks); i++) { for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ ) align_free(rast->tasks[i].tile.color[cbuf]); } + /* Set exit_flag and signal each thread's work_ready semaphore. + * Each thread will be woken up, notice that the exit_flag is set and + * break out of its main loop. The thread will then exit. + */ + rast->exit_flag = TRUE; + for (i = 0; i < rast->num_threads; i++) { + pipe_semaphore_signal(&rast->tasks[i].work_ready); + } + + /* Wait for threads to terminate before cleaning up per-thread data */ + for (i = 0; i < rast->num_threads; i++) { + pipe_thread_wait(rast->threads[i]); + } + + /* Clean up per-thread data */ + for (i = 0; i < rast->num_threads; i++) { + pipe_semaphore_destroy(&rast->tasks[i].work_ready); + pipe_semaphore_destroy(&rast->tasks[i].work_done); + } + /* for synchronizing rasterization threads */ pipe_barrier_destroy( &rast->barrier ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 875f18e0c0..dc5fc5fc7d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -43,16 +43,17 @@ struct lp_rasterizer; struct lp_scene; -struct lp_scene_queue; struct lp_fence; struct cmd_bin; -struct pipe_screen; /** For sub-pixel positioning */ #define FIXED_ORDER 4 #define FIXED_ONE (1<<FIXED_ORDER) +struct lp_rasterizer_task; + + /** * Rasterization state. * Objects of this type are put into the shared data bin and pointed @@ -97,6 +98,10 @@ struct lp_rast_shader_inputs { * Objects of this type are put into the setup_context::data buffer. */ struct lp_rast_triangle { +#ifdef DEBUG + float v[3][2]; +#endif + /* one-pixel sized trivial accept offsets for each plane */ int ei1; int ei2; @@ -126,18 +131,21 @@ struct lp_rast_triangle { -struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen, - struct lp_scene_queue *empty ); +struct lp_rasterizer * +lp_rast_create( void ); -void lp_rast_destroy( struct lp_rasterizer * ); +void +lp_rast_destroy( struct lp_rasterizer * ); -unsigned lp_rast_get_num_threads( struct lp_rasterizer * ); +unsigned +lp_rast_get_num_threads( struct lp_rasterizer * ); -void lp_rasterize_scene( struct lp_rasterizer *rast, - struct lp_scene *scene, - const struct pipe_framebuffer_state *fb, - bool write_depth ); +void +lp_rast_queue_scene( struct lp_rasterizer *rast, + struct lp_scene *scene ); +void +lp_rast_finish( struct lp_rasterizer *rast ); union lp_rast_cmd_arg { @@ -201,32 +209,25 @@ lp_rast_arg_null( void ) * the bins are executed. */ -void lp_rast_clear_color( struct lp_rasterizer *, - unsigned thread_index, +void lp_rast_clear_color( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); -void lp_rast_clear_zstencil( struct lp_rasterizer *, - unsigned thread_index, +void lp_rast_clear_zstencil( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); -void lp_rast_load_color( struct lp_rasterizer *, - unsigned thread_index, +void lp_rast_load_color( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); -void lp_rast_set_state( struct lp_rasterizer *, - unsigned thread_index, +void lp_rast_set_state( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); -void lp_rast_triangle( struct lp_rasterizer *, - unsigned thread_index, +void lp_rast_triangle( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); -void lp_rast_shade_tile( struct lp_rasterizer *, - unsigned thread_index, +void lp_rast_shade_tile( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); -void lp_rast_fence( struct lp_rasterizer *, - unsigned thread_index, +void lp_rast_fence( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 5c5497e092..39bf2c2587 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -38,8 +38,6 @@ #define MAX_THREADS 8 /* XXX probably temporary here */ -struct pipe_transfer; -struct pipe_screen; struct lp_rasterizer; @@ -82,19 +80,26 @@ struct lp_rasterizer_task */ struct lp_rasterizer { - boolean clipped_tile; - boolean check_for_clipped_tiles; + boolean exit_flag; /* Framebuffer stuff */ - struct pipe_screen *screen; - struct pipe_transfer *cbuf_transfer[PIPE_MAX_COLOR_BUFS]; - struct pipe_transfer *zsbuf_transfer; - void *cbuf_map[PIPE_MAX_COLOR_BUFS]; - uint8_t *zsbuf_map; + struct { + void *map; + unsigned stride; + unsigned width; + unsigned height; + enum pipe_format format; + } cbuf[PIPE_MAX_COLOR_BUFS]; + + struct { + uint8_t *map; + unsigned stride; + unsigned blocksize; + } zsbuf; struct { - struct pipe_framebuffer_state fb; + unsigned nr_cbufs; boolean write_color; boolean write_zstencil; unsigned clear_color; @@ -104,7 +109,14 @@ struct lp_rasterizer /** The incoming queue of scenes ready to rasterize */ struct lp_scene_queue *full_scenes; - /** The outgoing queue of processed scenes to return to setup modulee */ + + /** + * The outgoing queue of processed scenes to return to setup module + * + * XXX: while scenes are per-context but the rasterizer is + * (potentially) shared, these empty scenes should be returned to + * the context which created them rather than retained here. + */ struct lp_scene_queue *empty_scenes; /** The scene currently being rasterized by the threads */ @@ -121,8 +133,7 @@ struct lp_rasterizer }; -void lp_rast_shade_quads( struct lp_rasterizer *rast, - unsigned thread_index, +void lp_rast_shade_quads( struct lp_rasterizer_task *task, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, int32_t c1, int32_t c2, int32_t c3); @@ -137,17 +148,18 @@ lp_rast_depth_pointer( struct lp_rasterizer *rast, unsigned x, unsigned y ) { void * depth; + assert((x % TILE_VECTOR_WIDTH) == 0); assert((y % TILE_VECTOR_HEIGHT) == 0); - if(!rast->zsbuf_map) + + if (!rast->zsbuf.map) return NULL; - assert(rast->zsbuf_transfer); - depth = rast->zsbuf_map + - y*rast->zsbuf_transfer->stride + - TILE_VECTOR_HEIGHT*x*util_format_get_blocksize(rast->zsbuf_transfer->texture->format); -#ifdef DEBUG + + depth = (rast->zsbuf.map + + rast->zsbuf.stride * y + + rast->zsbuf.blocksize * x * TILE_VECTOR_HEIGHT); + assert(lp_check_alignment(depth, 16)); -#endif return depth; } @@ -159,13 +171,13 @@ lp_rast_depth_pointer( struct lp_rasterizer *rast, * \param x, y location of 4x4 block in window coords */ static INLINE void -lp_rast_shade_quads_all( struct lp_rasterizer *rast, - unsigned thread_index, +lp_rast_shade_quads_all( struct lp_rasterizer_task *task, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y ) { - const struct lp_rast_state *state = rast->tasks[thread_index].current_state; - struct lp_rast_tile *tile = &rast->tasks[thread_index].tile; + struct lp_rasterizer *rast = task->rast; + const struct lp_rast_state *state = task->current_state; + struct lp_rast_tile *tile = &task->tile; const unsigned ix = x % TILE_SIZE, iy = y % TILE_SIZE; uint8_t *color[PIPE_MAX_COLOR_BUFS]; void *depth; @@ -175,7 +187,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer *rast, block_offset = (iy / 4) * (16 * 16) + (ix / 4) * 16; /* color buffer */ - for (i = 0; i < rast->state.fb.nr_cbufs; i++) + for (i = 0; i < rast->state.nr_cbufs; i++) color[i] = tile->color[i] + 4 * block_offset; depth = lp_rast_depth_pointer(rast, x, y); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 3f76f159df..a5f0d14c95 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -89,14 +89,11 @@ static const int pos_table16[16][2] = { * Shade all pixels in a 4x4 block. */ static void -block_full_4( struct lp_rasterizer_task *rast_task, - const struct lp_rast_triangle *tri, - int x, int y ) +block_full_4(struct lp_rasterizer_task *task, + const struct lp_rast_triangle *tri, + int x, int y) { - lp_rast_shade_quads_all(rast_task->rast, - rast_task->thread_index, - &tri->inputs, - x, y); + lp_rast_shade_quads_all(task, &tri->inputs, x, y); } @@ -104,16 +101,16 @@ block_full_4( struct lp_rasterizer_task *rast_task, * Shade all pixels in a 16x16 block. */ static void -block_full_16( struct lp_rasterizer_task *rast_task, - const struct lp_rast_triangle *tri, - int x, int y ) +block_full_16(struct lp_rasterizer_task *task, + const struct lp_rast_triangle *tri, + int x, int y) { unsigned ix, iy; assert(x % 16 == 0); assert(y % 16 == 0); for (iy = 0; iy < 16; iy += 4) for (ix = 0; ix < 16; ix += 4) - block_full_4(rast_task, tri, x + ix, y + iy); + block_full_4(task, tri, x + ix, y + iy); } @@ -123,18 +120,15 @@ block_full_16( struct lp_rasterizer_task *rast_task, * will be done as part of the fragment shader. */ static void -do_block_4( struct lp_rasterizer_task *rast_task, - const struct lp_rast_triangle *tri, - int x, int y, - int c1, - int c2, - int c3 ) +do_block_4(struct lp_rasterizer_task *task, + const struct lp_rast_triangle *tri, + int x, int y, + int c1, int c2, int c3) { - lp_rast_shade_quads(rast_task->rast, - rast_task->thread_index, - &tri->inputs, - x, y, - -c1, -c2, -c3); + assert(x >= 0); + assert(y >= 0); + + lp_rast_shade_quads(task, &tri->inputs, x, y, -c1, -c2, -c3); } @@ -143,45 +137,58 @@ do_block_4( struct lp_rasterizer_task *rast_task, * of the triangle's bounds. */ static void -do_block_16( struct lp_rasterizer_task *rast_task, - const struct lp_rast_triangle *tri, - int x, int y, - int c1, - int c2, - int c3 ) +do_block_16(struct lp_rasterizer_task *task, + const struct lp_rast_triangle *tri, + int x, int y, + int c0, int c1, int c2) { - const int eo1 = tri->eo1 * 4; - const int eo2 = tri->eo2 * 4; - const int eo3 = tri->eo3 * 4; - const int *step0 = tri->inputs.step[0]; - const int *step1 = tri->inputs.step[1]; - const int *step2 = tri->inputs.step[2]; - int i; + unsigned mask = 0; + int eo[3]; + int c[3]; + int i, j; + assert(x >= 0); + assert(y >= 0); assert(x % 16 == 0); assert(y % 16 == 0); - for (i = 0; i < 16; i++) { - int cx1 = c1 + step0[i] * 4; - int cx2 = c2 + step1[i] * 4; - int cx3 = c3 + step2[i] * 4; + eo[0] = tri->eo1 * 4; + eo[1] = tri->eo2 * 4; + eo[2] = tri->eo3 * 4; - if (cx1 + eo1 < 0 || - cx2 + eo2 < 0 || - cx3 + eo3 < 0) { - /* the block is completely outside the triangle - nop */ - LP_COUNT(nr_empty_4); - } - else { - int px = x + pos_table4[i][0]; - int py = y + pos_table4[i][1]; - /* Don't bother testing if the 4x4 block is entirely in/out of - * the triangle. It's a little faster to do it in the jit code. - */ - LP_COUNT(nr_non_empty_4); - do_block_4(rast_task, tri, px, py, cx1, cx2, cx3); + c[0] = c0; + c[1] = c1; + c[2] = c2; + + for (j = 0; j < 3; j++) { + const int *step = tri->inputs.step[j]; + const int cx = c[j] + eo[j]; + + /* Mask has bits set whenever we are outside any of the edges. + */ + for (i = 0; i < 16; i++) { + int out = cx + step[i] * 4; + mask |= (out >> 31) & (1 << i); } } + + mask = ~mask & 0xffff; + while (mask) { + int i = ffs(mask) - 1; + int px = x + pos_table4[i][0]; + int py = y + pos_table4[i][1]; + int cx1 = c0 + tri->inputs.step[0][i] * 4; + int cx2 = c1 + tri->inputs.step[1][i] * 4; + int cx3 = c2 + tri->inputs.step[2][i] * 4; + + mask &= ~(1 << i); + + /* Don't bother testing if the 4x4 block is entirely in/out of + * the triangle. It's a little faster to do it in the jit code. + */ + LP_COUNT(nr_non_empty_4); + do_block_4(task, tri, px, py, cx1, cx2, cx3); + } } @@ -190,62 +197,84 @@ do_block_16( struct lp_rasterizer_task *rast_task, * for this triangle. */ void -lp_rast_triangle( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg ) +lp_rast_triangle(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { - struct lp_rasterizer_task *rast_task = &rast->tasks[thread_index]; const struct lp_rast_triangle *tri = arg.triangle; + const int x = task->x, y = task->y; + int ei[3], eo[3], c[3]; + unsigned outmask, inmask, partial_mask; + unsigned i, j; - int x = rast_task->x; - int y = rast_task->y; - unsigned i; + c[0] = tri->c1 + tri->dx12 * y - tri->dy12 * x; + c[1] = tri->c2 + tri->dx23 * y - tri->dy23 * x; + c[2] = tri->c3 + tri->dx31 * y - tri->dy31 * x; - int c1 = tri->c1 + tri->dx12 * y - tri->dy12 * x; - int c2 = tri->c2 + tri->dx23 * y - tri->dy23 * x; - int c3 = tri->c3 + tri->dx31 * y - tri->dy31 * x; + eo[0] = tri->eo1 * 16; + eo[1] = tri->eo2 * 16; + eo[2] = tri->eo3 * 16; - int ei1 = tri->ei1 * 16; - int ei2 = tri->ei2 * 16; - int ei3 = tri->ei3 * 16; + ei[0] = tri->ei1 * 16; + ei[1] = tri->ei2 * 16; + ei[2] = tri->ei3 * 16; - int eo1 = tri->eo1 * 16; - int eo2 = tri->eo2 * 16; - int eo3 = tri->eo3 * 16; + outmask = 0; + inmask = 0xffff; - LP_DBG(DEBUG_RAST, "lp_rast_triangle\n"); + for (j = 0; j < 3; j++) { + const int *step = tri->inputs.step[j]; + const int cox = c[j] + eo[j]; + const int cio = ei[j]- eo[j]; - /* Walk over the tile to build a list of 4x4 pixel blocks which will - * be filled/shaded. We do this at two granularities: 16x16 blocks - * and then 4x4 blocks. - */ - for (i = 0; i < 16; i++) { - int cx1 = c1 + (tri->inputs.step[0][i] * 16); - int cx2 = c2 + (tri->inputs.step[1][i] * 16); - int cx3 = c3 + (tri->inputs.step[2][i] * 16); - - if (cx1 + eo1 < 0 || - cx2 + eo2 < 0 || - cx3 + eo3 < 0) { - /* the block is completely outside the triangle - nop */ - LP_COUNT(nr_empty_16); - } - else { - int px = x + pos_table16[i][0]; - int py = y + pos_table16[i][1]; - - if (cx1 + ei1 > 0 && - cx2 + ei2 > 0 && - cx3 + ei3 > 0) { - /* the block is completely inside the triangle */ - LP_COUNT(nr_fully_covered_16); - block_full_16(rast_task, tri, px, py); - } - else { - /* the block is partially in/out of the triangle */ - LP_COUNT(nr_partially_covered_16); - do_block_16(rast_task, tri, px, py, cx1, cx2, cx3); - } + /* Outmask has bits set whenever we are outside any of the + * edges. + */ + /* Inmask has bits set whenever we are inside all of the edges. + */ + for (i = 0; i < 16; i++) { + int out = cox + step[i] * 16; + int in = out + cio; + outmask |= (out >> 31) & (1 << i); + inmask &= ~((in >> 31) & (1 << i)); } } + + assert((outmask & inmask) == 0); + + if (outmask == 0xffff) + return; + + /* Invert mask, so that bits are set whenever we are at least + * partially inside all of the edges: + */ + partial_mask = ~inmask & ~outmask & 0xffff; + + /* Iterate over partials: + */ + while (partial_mask) { + int i = ffs(partial_mask) - 1; + int px = x + pos_table16[i][0]; + int py = y + pos_table16[i][1]; + int cx1 = c[0] + tri->inputs.step[0][i] * 16; + int cx2 = c[1] + tri->inputs.step[1][i] * 16; + int cx3 = c[2] + tri->inputs.step[2][i] * 16; + + partial_mask &= ~(1 << i); + + LP_COUNT(nr_partially_covered_16); + do_block_16(task, tri, px, py, cx1, cx2, cx3); + } + + /* Iterate over fulls: + */ + while (inmask) { + int i = ffs(inmask) - 1; + int px = x + pos_table16[i][0]; + int py = y + pos_table16[i][1]; + + inmask &= ~(1 << i); + + LP_COUNT(nr_fully_covered_16); + block_full_16(task, tri, px, py); + } } diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index b7116297ec..72492c0f0c 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -29,44 +29,67 @@ #include "util/u_memory.h" #include "util/u_inlines.h" #include "util/u_simple_list.h" +#include "util/u_surface.h" #include "lp_scene.h" +#include "lp_scene_queue.h" +#include "lp_debug.h" struct lp_scene * -lp_scene_create(void) +lp_scene_create( struct pipe_context *pipe, + struct lp_scene_queue *queue ) { + unsigned i, j; struct lp_scene *scene = CALLOC_STRUCT(lp_scene); - if (scene) - lp_scene_init(scene); + if (!scene) + return NULL; + + scene->pipe = pipe; + scene->empty_queue = queue; + + for (i = 0; i < TILES_X; i++) { + for (j = 0; j < TILES_Y; j++) { + struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); + bin->commands.head = bin->commands.tail = CALLOC_STRUCT(cmd_block); + } + } + + scene->data.head = + scene->data.tail = CALLOC_STRUCT(data_block); + + make_empty_list(&scene->textures); + + pipe_mutex_init(scene->mutex); + return scene; } +/** + * Free all data associated with the given scene, and free(scene). + */ void lp_scene_destroy(struct lp_scene *scene) { - lp_scene_reset(scene); - lp_scene_free_bin_data(scene); - FREE(scene); -} + unsigned i, j; + lp_scene_reset(scene); -void -lp_scene_init(struct lp_scene *scene) -{ - unsigned i, j; for (i = 0; i < TILES_X; i++) for (j = 0; j < TILES_Y; j++) { struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); - bin->commands.head = bin->commands.tail = CALLOC_STRUCT(cmd_block); + assert(bin->commands.head == bin->commands.tail); + FREE(bin->commands.head); + bin->commands.head = NULL; + bin->commands.tail = NULL; } - scene->data.head = - scene->data.tail = CALLOC_STRUCT(data_block); + FREE(scene->data.head); + scene->data.head = NULL; - make_empty_list(&scene->textures); + pipe_mutex_destroy(scene->mutex); - pipe_mutex_init(scene->mutex); + FREE(scene); } @@ -92,6 +115,9 @@ lp_scene_is_empty(struct lp_scene *scene ) } +/* Free data for one particular bin. May be called from the + * rasterizer thread(s). + */ void lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y) { @@ -100,6 +126,9 @@ lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y) struct cmd_block *block; struct cmd_block *tmp; + assert(x < TILES_X); + assert(y < TILES_Y); + for (block = list->head; block != list->tail; block = tmp) { tmp = block->next; FREE(block); @@ -112,7 +141,8 @@ lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y) /** - * Set scene to empty state. + * Free all the temporary data in a scene. May be called from the + * rasterizer thread(s). */ void lp_scene_reset(struct lp_scene *scene ) @@ -159,40 +189,8 @@ lp_scene_reset(struct lp_scene *scene ) } -/** - * Free all data associated with the given bin, but don't free(scene). - */ -void -lp_scene_free_bin_data(struct lp_scene *scene) -{ - unsigned i, j; - for (i = 0; i < TILES_X; i++) - for (j = 0; j < TILES_Y; j++) { - struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); - /* lp_reset_scene() should have been already called */ - assert(bin->commands.head == bin->commands.tail); - FREE(bin->commands.head); - bin->commands.head = NULL; - bin->commands.tail = NULL; - } - FREE(scene->data.head); - scene->data.head = NULL; - - pipe_mutex_destroy(scene->mutex); -} - - -void -lp_scene_set_framebuffer_size( struct lp_scene *scene, - unsigned width, unsigned height ) -{ - assert(lp_scene_is_empty(scene)); - - scene->tiles_x = align(width, TILE_SIZE) / TILE_SIZE; - scene->tiles_y = align(height, TILE_SIZE) / TILE_SIZE; -} void @@ -390,3 +388,158 @@ end: pipe_mutex_unlock(scene->mutex); return bin; } + + +/** + * Prepare this scene for the rasterizer. + * Map the framebuffer surfaces. Initialize the 'rast' state. + */ +static boolean +lp_scene_map_buffers( struct lp_scene *scene ) +{ + struct pipe_screen *screen = scene->pipe->screen; + struct pipe_surface *cbuf, *zsbuf; + int i; + + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + + + /* Map all color buffers + */ + for (i = 0; i < scene->fb.nr_cbufs; i++) { + cbuf = scene->fb.cbufs[i]; + if (cbuf) { + scene->cbuf_transfer[i] = screen->get_tex_transfer(screen, + cbuf->texture, + cbuf->face, + cbuf->level, + cbuf->zslice, + PIPE_TRANSFER_READ_WRITE, + 0, 0, + cbuf->width, + cbuf->height); + if (!scene->cbuf_transfer[i]) + goto fail; + + scene->cbuf_map[i] = screen->transfer_map(screen, + scene->cbuf_transfer[i]); + if (!scene->cbuf_map[i]) + goto fail; + } + } + + /* Map the zsbuffer + */ + zsbuf = scene->fb.zsbuf; + if (zsbuf) { + scene->zsbuf_transfer = screen->get_tex_transfer(screen, + zsbuf->texture, + zsbuf->face, + zsbuf->level, + zsbuf->zslice, + PIPE_TRANSFER_READ_WRITE, + 0, 0, + zsbuf->width, + zsbuf->height); + if (!scene->zsbuf_transfer) + goto fail; + + scene->zsbuf_map = screen->transfer_map(screen, + scene->zsbuf_transfer); + if (!scene->zsbuf_map) + goto fail; + } + + return TRUE; + +fail: + /* Unmap and release transfers? + */ + return FALSE; +} + + + +/** + * Called after rasterizer as finished rasterizing a scene. + * + * We want to call this from the pipe_context's current thread to + * avoid having to have mutexes on the transfer functions. + */ +static void +lp_scene_unmap_buffers( struct lp_scene *scene ) +{ + struct pipe_screen *screen = scene->pipe->screen; + unsigned i; + + for (i = 0; i < scene->fb.nr_cbufs; i++) { + if (scene->cbuf_map[i]) + screen->transfer_unmap(screen, scene->cbuf_transfer[i]); + + if (scene->cbuf_transfer[i]) + screen->tex_transfer_destroy(scene->cbuf_transfer[i]); + + scene->cbuf_transfer[i] = NULL; + scene->cbuf_map[i] = NULL; + } + + if (scene->zsbuf_map) + screen->transfer_unmap(screen, scene->zsbuf_transfer); + + if (scene->zsbuf_transfer) + screen->tex_transfer_destroy(scene->zsbuf_transfer); + + scene->zsbuf_transfer = NULL; + scene->zsbuf_map = NULL; + + util_unreference_framebuffer_state( &scene->fb ); +} + + +void lp_scene_begin_binning( struct lp_scene *scene, + struct pipe_framebuffer_state *fb ) +{ + assert(lp_scene_is_empty(scene)); + + util_copy_framebuffer_state(&scene->fb, fb); + + scene->tiles_x = align(fb->width, TILE_SIZE) / TILE_SIZE; + scene->tiles_y = align(fb->height, TILE_SIZE) / TILE_SIZE; +} + + +void lp_scene_rasterize( struct lp_scene *scene, + struct lp_rasterizer *rast, + boolean write_depth ) +{ + if (0) { + unsigned x, y; + debug_printf("rasterize scene:\n"); + debug_printf(" data size: %u\n", lp_scene_data_size(scene)); + for (y = 0; y < scene->tiles_y; y++) { + for (x = 0; x < scene->tiles_x; x++) { + debug_printf(" bin %u, %u size: %u\n", x, y, + lp_scene_bin_size(scene, x, y)); + } + } + } + + + scene->write_depth = (scene->fb.zsbuf != NULL && + write_depth); + + lp_scene_map_buffers( scene ); + + /* Enqueue the scene for rasterization, then immediately wait for + * it to finish. + */ + lp_rast_queue_scene( rast, scene ); + + /* Currently just wait for the rasterizer to finish. Some + * threading interactions need to be worked out, particularly once + * transfers become per-context: + */ + lp_rast_finish( rast ); + lp_scene_unmap_buffers( scene ); + lp_scene_enqueue( scene->empty_queue, scene ); +} diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index fb478cc2eb..739ac22908 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -39,6 +39,7 @@ #include "lp_tile_soa.h" #include "lp_rast.h" +struct lp_scene_queue; /* We're limited to 2K by 2K for 32bit fixed point rasterization. * Will need a 64-bit version for larger framebuffers. @@ -56,8 +57,7 @@ /* switch to a non-pointer value for this: */ -typedef void (*lp_rast_cmd)( struct lp_rasterizer *, - unsigned thread_index, +typedef void (*lp_rast_cmd)( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); struct cmd_block { @@ -113,8 +113,14 @@ struct texture_ref { * scenes: */ struct lp_scene { - struct cmd_bin tile[TILES_X][TILES_Y]; - struct data_block_list data; + struct pipe_context *pipe; + struct pipe_transfer *cbuf_transfer[PIPE_MAX_COLOR_BUFS]; + struct pipe_transfer *zsbuf_transfer; + + /* Scene's buffers are mapped at the time the scene is enqueued: + */ + void *cbuf_map[PIPE_MAX_COLOR_BUFS]; + uint8_t *zsbuf_map; /** the framebuffer to render the scene into */ struct pipe_framebuffer_state fb; @@ -132,25 +138,28 @@ struct lp_scene { int curr_x, curr_y; /**< for iterating over bins */ pipe_mutex mutex; + + /* Where to place this scene once it has been rasterized: + */ + struct lp_scene_queue *empty_queue; + + struct cmd_bin tile[TILES_X][TILES_Y]; + struct data_block_list data; }; -struct lp_scene *lp_scene_create(void); +struct lp_scene *lp_scene_create(struct pipe_context *pipe, + struct lp_scene_queue *empty_queue); void lp_scene_destroy(struct lp_scene *scene); -void lp_scene_init(struct lp_scene *scene); boolean lp_scene_is_empty(struct lp_scene *scene ); void lp_scene_reset(struct lp_scene *scene ); -void lp_scene_free_bin_data(struct lp_scene *scene); - -void lp_scene_set_framebuffer_size( struct lp_scene *scene, - unsigned width, unsigned height ); void lp_bin_new_data_block( struct data_block_list *list ); @@ -297,5 +306,13 @@ lp_scene_bin_iter_begin( struct lp_scene *scene ); struct cmd_bin * lp_scene_bin_iter_next( struct lp_scene *scene, int *bin_x, int *bin_y ); +void +lp_scene_rasterize( struct lp_scene *scene, + struct lp_rasterizer *rast, + boolean write_depth ); + +void +lp_scene_begin_binning( struct lp_scene *scene, + struct pipe_framebuffer_state *fb ); #endif /* LP_BIN_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 1cd3ea9a84..934b0d8439 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -83,7 +83,7 @@ llvmpipe_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: return PIPE_MAX_SAMPLERS; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return PIPE_MAX_VERTEX_SAMPLERS; + return 0; case PIPE_CAP_MAX_COMBINED_SAMPLERS: return PIPE_MAX_SAMPLERS + PIPE_MAX_VERTEX_SAMPLERS; case PIPE_CAP_NPOT_TEXTURES: @@ -107,11 +107,11 @@ llvmpipe_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_TEXTURE_SHADOW_MAP: return 1; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 13; /* max 4Kx4K */ + return LP_MAX_TEXTURE_2D_LEVELS; case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 9; /* max 256x256x256 */ + return LP_MAX_TEXTURE_3D_LEVELS; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 13; /* max 4Kx4K */ + return LP_MAX_TEXTURE_2D_LEVELS; case PIPE_CAP_TGSI_CONT_SUPPORTED: return 1; case PIPE_CAP_BLEND_EQUATION_SEPARATE: @@ -194,9 +194,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, format_desc->block.height != 1) return FALSE; - if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR && - format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH && - format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) + if(format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) return FALSE; if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB && @@ -224,33 +222,22 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, format_desc->block.height != 1) return FALSE; - if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR && - format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH && - format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) + if(format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) return FALSE; if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB && - format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB && format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) return FALSE; + + /* not supported yet */ + if (format == PIPE_FORMAT_Z16_UNORM) + return FALSE; } return TRUE; } -static struct pipe_buffer * -llvmpipe_surface_buffer_create(struct pipe_screen *screen, - unsigned width, unsigned height, - enum pipe_format format, - unsigned tex_usage, - unsigned usage, - unsigned *stride) -{ - /* This function should never be used */ - assert(0); - return NULL; -} static void @@ -310,7 +297,6 @@ llvmpipe_create_screen(struct llvmpipe_winsys *winsys) screen->base.get_paramf = llvmpipe_get_paramf; screen->base.is_format_supported = llvmpipe_is_format_supported; - screen->base.surface_buffer_create = llvmpipe_surface_buffer_create; screen->base.context_create = llvmpipe_create_context; screen->base.flush_frontbuffer = llvmpipe_flush_frontbuffer; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index cb873667a2..cc7c18c8c0 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -64,11 +64,10 @@ lp_setup_get_current_scene(struct setup_context *setup) */ setup->scene = lp_scene_dequeue(setup->empty_scenes, TRUE); - if(0)lp_scene_reset( setup->scene ); /* XXX temporary? */ + assert(lp_scene_is_empty(setup->scene)); - lp_scene_set_framebuffer_size(setup->scene, - setup->fb.width, - setup->fb.height); + lp_scene_begin_binning(setup->scene, + &setup->fb ); } return setup->scene; } @@ -133,13 +132,12 @@ static void reset_context( struct setup_context *setup ) /** Rasterize all scene's bins */ static void lp_setup_rasterize_scene( struct setup_context *setup, - boolean write_depth ) + boolean write_depth ) { struct lp_scene *scene = lp_setup_get_current_scene(setup); - lp_rasterize_scene(setup->rast, - scene, - &setup->fb, + lp_scene_rasterize(scene, + setup->rast, write_depth); reset_context( setup ); @@ -244,19 +242,16 @@ void lp_setup_bind_framebuffer( struct setup_context *setup, const struct pipe_framebuffer_state *fb ) { - struct lp_scene *scene = lp_setup_get_current_scene(setup); - LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + /* Flush any old scene. + */ set_scene_state( setup, SETUP_FLUSHED ); - /* re-get scene pointer, may have a new scene after flushing */ - (void) scene; - scene = lp_setup_get_current_scene(setup); - + /* Set new state. This will be picked up later when we next need a + * scene. + */ util_copy_framebuffer_state(&setup->fb, fb); - - lp_scene_set_framebuffer_size(scene, setup->fb.width, setup->fb.height); } @@ -474,20 +469,28 @@ lp_setup_set_sampler_textures( struct setup_context *setup, jit_tex = &setup->fs.current.jit_context.textures[i]; jit_tex->width = tex->width0; jit_tex->height = tex->height0; + jit_tex->depth = tex->depth0; + jit_tex->last_level = tex->last_level; jit_tex->stride = lp_tex->stride[0]; - if(!lp_tex->dt) { - jit_tex->data = lp_tex->data; + if (!lp_tex->dt) { + /* regular texture - setup array of mipmap level pointers */ + int j; + for (j = 0; j < LP_MAX_TEXTURE_2D_LEVELS; j++) { + jit_tex->data[j] = + (ubyte *) lp_tex->data + lp_tex->level_offset[j]; + } } else { + /* display target texture/surface */ /* * XXX: Where should this be unmapped? */ struct llvmpipe_screen *screen = llvmpipe_screen(tex->screen); struct llvmpipe_winsys *winsys = screen->winsys; - jit_tex->data = winsys->displaytarget_map(winsys, lp_tex->dt, - PIPE_BUFFER_USAGE_CPU_READ); - assert(jit_tex->data); + jit_tex->data[0] = winsys->displaytarget_map(winsys, lp_tex->dt, + PIPE_BUFFER_USAGE_CPU_READ); + assert(jit_tex->data[0]); } /* the scene references this texture */ @@ -681,7 +684,7 @@ lp_setup_destroy( struct setup_context *setup ) * it. */ struct setup_context * -lp_setup_create( struct pipe_screen *screen, +lp_setup_create( struct pipe_context *pipe, struct draw_context *draw ) { unsigned i; @@ -696,7 +699,9 @@ lp_setup_create( struct pipe_screen *screen, if (!setup->empty_scenes) goto fail; - setup->rast = lp_rast_create( screen, setup->empty_scenes ); + /* XXX: move this to the screen and share between contexts: + */ + setup->rast = lp_rast_create(); if (!setup->rast) goto fail; @@ -709,7 +714,8 @@ lp_setup_create( struct pipe_screen *screen, /* create some empty scenes */ for (i = 0; i < MAX_SCENES; i++) { - setup->scenes[i] = lp_scene_create(); + setup->scenes[i] = lp_scene_create( pipe, setup->empty_scenes ); + lp_scene_enqueue(setup->empty_scenes, setup->scenes[i]); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 0e155a7dc3..17c112b528 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -62,7 +62,7 @@ struct lp_fragment_shader; struct lp_jit_context; struct setup_context * -lp_setup_create( struct pipe_screen *screen, +lp_setup_create( struct pipe_context *pipe, struct draw_context *draw ); void diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index a8bf540803..e75412ac9a 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -297,6 +297,15 @@ do_triangle_ccw(struct setup_context *setup, tri = alloc_triangle(scene, setup->fs.nr_inputs, &tri_bytes); +#ifdef DEBUG + tri->v[0][0] = v1[0][0]; + tri->v[1][0] = v2[0][0]; + tri->v[2][0] = v3[0][0]; + tri->v[0][1] = v1[0][1]; + tri->v[1][1] = v2[0][1]; + tri->v[2][1] = v3[0][1]; +#endif + tri->dx12 = x1 - x2; tri->dx23 = x2 - x3; tri->dx31 = x3 - x1; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c index 24291da91e..671e74465c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c @@ -231,57 +231,29 @@ lp_setup_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) break; case PIPE_PRIM_QUADS: - if (setup->flatshade_first) { - for (i = 3; i < nr; i += 4) { - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-3], stride) ); - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-3], stride) ); - } - } - else { - for (i = 3; i < nr; i += 4) { - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-3], stride), - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); } break; case PIPE_PRIM_QUAD_STRIP: - if (setup->flatshade_first) { - for (i = 3; i < nr; i += 2) { - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-3], stride)); - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-3], stride) ); - } - } - else { - for (i = 3; i < nr; i += 2) { - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-3], stride), - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-3], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); } break; @@ -415,57 +387,28 @@ lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) break; case PIPE_PRIM_QUADS: - if (setup->flatshade_first) { - for (i = 3; i < nr; i += 4) { - setup->triangle( setup, - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-3, stride) ); - setup->triangle( setup, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-3, stride) ); - } - } - else { - for (i = 3; i < nr; i += 4) { - setup->triangle( setup, - get_vert(vertex_buffer, i-3, stride), - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-0, stride) ); - setup->triangle( setup, - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride) ); - } + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-0, stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); } break; case PIPE_PRIM_QUAD_STRIP: - if (setup->flatshade_first) { - for (i = 3; i < nr; i += 2) { - setup->triangle( setup, - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-3, stride) ); - setup->triangle( setup, - - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-3, stride) ); - } - } - else { - for (i = 3; i < nr; i += 2) { - setup->triangle( setup, - get_vert(vertex_buffer, i-3, stride), - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-0, stride) ); - setup->triangle( setup, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-3, stride), - get_vert(vertex_buffer, i-0, stride) ); - } + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-0, stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-0, stride) ); } break; diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 90dae3f910..c4b79dd415 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -974,6 +974,13 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) assert(fs != llvmpipe->fs); (void) llvmpipe; + /* + * XXX: we need to flush the context until we have some sort of reference + * counting in fragment shaders as they may still be binned + */ + draw_flush(llvmpipe->draw); + lp_setup_flush(llvmpipe->setup, 0); + variant = shader->variants; while(variant) { struct lp_fragment_shader_variant *next = variant->next; diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 48828bd0a0..d05157991b 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -52,25 +52,25 @@ struct pixel_test_case struct pixel_test_case test_cases[] = { - {PIPE_FORMAT_R5G6B5_UNORM, 0x0000, {0.0, 0.0, 0.0, 1.0}}, - {PIPE_FORMAT_R5G6B5_UNORM, 0x001f, {0.0, 0.0, 1.0, 1.0}}, - {PIPE_FORMAT_R5G6B5_UNORM, 0x07e0, {0.0, 1.0, 0.0, 1.0}}, - {PIPE_FORMAT_R5G6B5_UNORM, 0xf800, {1.0, 0.0, 0.0, 1.0}}, - {PIPE_FORMAT_R5G6B5_UNORM, 0xffff, {1.0, 1.0, 1.0, 1.0}}, - - {PIPE_FORMAT_A1R5G5B5_UNORM, 0x0000, {0.0, 0.0, 0.0, 0.0}}, - {PIPE_FORMAT_A1R5G5B5_UNORM, 0x001f, {0.0, 0.0, 1.0, 0.0}}, - {PIPE_FORMAT_A1R5G5B5_UNORM, 0x03e0, {0.0, 1.0, 0.0, 0.0}}, - {PIPE_FORMAT_A1R5G5B5_UNORM, 0x7c00, {1.0, 0.0, 0.0, 0.0}}, - {PIPE_FORMAT_A1R5G5B5_UNORM, 0x8000, {0.0, 0.0, 0.0, 1.0}}, - {PIPE_FORMAT_A1R5G5B5_UNORM, 0xffff, {1.0, 1.0, 1.0, 1.0}}, + {PIPE_FORMAT_B5G6R5_UNORM, 0x0000, {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_B5G6R5_UNORM, 0x001f, {0.0, 0.0, 1.0, 1.0}}, + {PIPE_FORMAT_B5G6R5_UNORM, 0x07e0, {0.0, 1.0, 0.0, 1.0}}, + {PIPE_FORMAT_B5G6R5_UNORM, 0xf800, {1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_B5G6R5_UNORM, 0xffff, {1.0, 1.0, 1.0, 1.0}}, + + {PIPE_FORMAT_B5G5R5A1_UNORM, 0x0000, {0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_B5G5R5A1_UNORM, 0x001f, {0.0, 0.0, 1.0, 0.0}}, + {PIPE_FORMAT_B5G5R5A1_UNORM, 0x03e0, {0.0, 1.0, 0.0, 0.0}}, + {PIPE_FORMAT_B5G5R5A1_UNORM, 0x7c00, {1.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_B5G5R5A1_UNORM, 0x8000, {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_B5G5R5A1_UNORM, 0xffff, {1.0, 1.0, 1.0, 1.0}}, - {PIPE_FORMAT_A8R8G8B8_UNORM, 0x00000000, {0.0, 0.0, 0.0, 0.0}}, - {PIPE_FORMAT_A8R8G8B8_UNORM, 0x000000ff, {0.0, 0.0, 1.0, 0.0}}, - {PIPE_FORMAT_A8R8G8B8_UNORM, 0x0000ff00, {0.0, 1.0, 0.0, 0.0}}, - {PIPE_FORMAT_A8R8G8B8_UNORM, 0x00ff0000, {1.0, 0.0, 0.0, 0.0}}, - {PIPE_FORMAT_A8R8G8B8_UNORM, 0xff000000, {0.0, 0.0, 0.0, 1.0}}, - {PIPE_FORMAT_A8R8G8B8_UNORM, 0xffffffff, {1.0, 1.0, 1.0, 1.0}}, + {PIPE_FORMAT_B8G8R8A8_UNORM, 0x00000000, {0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_B8G8R8A8_UNORM, 0x000000ff, {0.0, 0.0, 1.0, 0.0}}, + {PIPE_FORMAT_B8G8R8A8_UNORM, 0x0000ff00, {0.0, 1.0, 0.0, 0.0}}, + {PIPE_FORMAT_B8G8R8A8_UNORM, 0x00ff0000, {1.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_B8G8R8A8_UNORM, 0xff000000, {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_B8G8R8A8_UNORM, 0xffffffff, {1.0, 1.0, 1.0, 1.0}}, #if 0 {PIPE_FORMAT_R8G8B8A8_UNORM, 0x00000000, {0.0, 0.0, 0.0, 0.0}}, @@ -81,12 +81,12 @@ struct pixel_test_case test_cases[] = {PIPE_FORMAT_R8G8B8A8_UNORM, 0xffffffff, {1.0, 1.0, 1.0, 1.0}}, #endif - {PIPE_FORMAT_B8G8R8A8_UNORM, 0x00000000, {0.0, 0.0, 0.0, 0.0}}, - {PIPE_FORMAT_B8G8R8A8_UNORM, 0x000000ff, {0.0, 0.0, 0.0, 1.0}}, - {PIPE_FORMAT_B8G8R8A8_UNORM, 0x0000ff00, {1.0, 0.0, 0.0, 0.0}}, - {PIPE_FORMAT_B8G8R8A8_UNORM, 0x00ff0000, {0.0, 1.0, 0.0, 0.0}}, - {PIPE_FORMAT_B8G8R8A8_UNORM, 0xff000000, {0.0, 0.0, 1.0, 0.0}}, - {PIPE_FORMAT_B8G8R8A8_UNORM, 0xffffffff, {1.0, 1.0, 1.0, 1.0}}, + {PIPE_FORMAT_A8R8G8B8_UNORM, 0x00000000, {0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_A8R8G8B8_UNORM, 0x000000ff, {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_A8R8G8B8_UNORM, 0x0000ff00, {1.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_A8R8G8B8_UNORM, 0x00ff0000, {0.0, 1.0, 0.0, 0.0}}, + {PIPE_FORMAT_A8R8G8B8_UNORM, 0xff000000, {0.0, 0.0, 1.0, 0.0}}, + {PIPE_FORMAT_A8R8G8B8_UNORM, 0xffffffff, {1.0, 1.0, 1.0, 1.0}}, }; diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index 14ff00469b..f9dce8b9c2 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -37,7 +37,7 @@ #include "util/u_cpu_detect.h" #include "gallivm/lp_bld_const.h" -#include "gallivm/lp_bld_misc.h" +#include "gallivm/lp_bld_init.h" #include "lp_test.h" @@ -380,8 +380,7 @@ int main(int argc, char **argv) n = atoi(argv[i]); } - LLVMLinkInJIT(); - LLVMInitializeNativeTarget(); + lp_build_init(); util_cpu_detect(); diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c index 2533275dc1..5a3cf37d6d 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c @@ -51,10 +51,11 @@ /** - * This provides the bridge between the sampler state store in lp_jit_context - * and lp_jit_texture and the sampler code generator. It provides the - * texture layout information required by the texture sampler code generator - * in terms of the state stored in lp_jit_context and lp_jit_texture in runtime. + * This provides the bridge between the sampler state store in + * lp_jit_context and lp_jit_texture and the sampler code + * generator. It provides the texture layout information required by + * the texture sampler code generator in terms of the state stored in + * lp_jit_context and lp_jit_texture in runtime. */ struct llvmpipe_sampler_dynamic_state { @@ -79,6 +80,9 @@ struct lp_llvm_sampler_soa /** * Fetch the specified member of the lp_jit_texture structure. + * \param emit_load if TRUE, emit the LLVM load instruction to actually + * fetch the field's value. Otherwise, just emit the + * GEP code to address the field. * * @sa http://llvm.org/docs/GetElementPtr.html */ @@ -87,9 +91,11 @@ lp_llvm_texture_member(struct lp_sampler_dynamic_state *base, LLVMBuilderRef builder, unsigned unit, unsigned member_index, - const char *member_name) + const char *member_name, + boolean emit_load) { - struct llvmpipe_sampler_dynamic_state *state = (struct llvmpipe_sampler_dynamic_state *)base; + struct llvmpipe_sampler_dynamic_state *state = + (struct llvmpipe_sampler_dynamic_state *)base; LLVMValueRef indices[4]; LLVMValueRef ptr; LLVMValueRef res; @@ -107,7 +113,10 @@ lp_llvm_texture_member(struct lp_sampler_dynamic_state *base, ptr = LLVMBuildGEP(builder, state->context_ptr, indices, Elements(indices), ""); - res = LLVMBuildLoad(builder, ptr, ""); + if (emit_load) + res = LLVMBuildLoad(builder, ptr, ""); + else + res = ptr; lp_build_name(res, "context.texture%u.%s", unit, member_name); @@ -116,26 +125,30 @@ lp_llvm_texture_member(struct lp_sampler_dynamic_state *base, /** - * Helper macro to instantiate the functions that generate the code to fetch - * the members of lp_jit_texture to fulfill the sampler code generator requests. + * Helper macro to instantiate the functions that generate the code to + * fetch the members of lp_jit_texture to fulfill the sampler code + * generator requests. * - * This complexity is the price we have to pay to keep the texture sampler code - * generator a reusable module without dependencies to llvmpipe internals. + * This complexity is the price we have to pay to keep the texture + * sampler code generator a reusable module without dependencies to + * llvmpipe internals. */ -#define LP_LLVM_TEXTURE_MEMBER(_name, _index) \ +#define LP_LLVM_TEXTURE_MEMBER(_name, _index, _emit_load) \ static LLVMValueRef \ lp_llvm_texture_##_name( struct lp_sampler_dynamic_state *base, \ LLVMBuilderRef builder, \ unsigned unit) \ { \ - return lp_llvm_texture_member(base, builder, unit, _index, #_name ); \ + return lp_llvm_texture_member(base, builder, unit, _index, #_name, _emit_load ); \ } -LP_LLVM_TEXTURE_MEMBER(width, LP_JIT_TEXTURE_WIDTH) -LP_LLVM_TEXTURE_MEMBER(height, LP_JIT_TEXTURE_HEIGHT) -LP_LLVM_TEXTURE_MEMBER(stride, LP_JIT_TEXTURE_STRIDE) -LP_LLVM_TEXTURE_MEMBER(data_ptr, LP_JIT_TEXTURE_DATA) +LP_LLVM_TEXTURE_MEMBER(width, LP_JIT_TEXTURE_WIDTH, TRUE) +LP_LLVM_TEXTURE_MEMBER(height, LP_JIT_TEXTURE_HEIGHT, TRUE) +LP_LLVM_TEXTURE_MEMBER(depth, LP_JIT_TEXTURE_DEPTH, TRUE) +LP_LLVM_TEXTURE_MEMBER(last_level, LP_JIT_TEXTURE_LAST_LEVEL, TRUE) +LP_LLVM_TEXTURE_MEMBER(stride, LP_JIT_TEXTURE_STRIDE, TRUE) +LP_LLVM_TEXTURE_MEMBER(data_ptr, LP_JIT_TEXTURE_DATA, FALSE) static void @@ -145,6 +158,10 @@ lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) } +/** + * Fetch filtered values from texture. + * The 'texel' parameter returns four vectors corresponding to R, G, B, A. + */ static void lp_llvm_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *base, LLVMBuilderRef builder, @@ -185,6 +202,8 @@ lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state, sampler->base.emit_fetch_texel = lp_llvm_sampler_soa_emit_fetch_texel; sampler->dynamic_state.base.width = lp_llvm_texture_width; sampler->dynamic_state.base.height = lp_llvm_texture_height; + sampler->dynamic_state.base.depth = lp_llvm_texture_depth; + sampler->dynamic_state.base.last_level = lp_llvm_texture_last_level; sampler->dynamic_state.base.stride = lp_llvm_texture_stride; sampler->dynamic_state.base.data_ptr = lp_llvm_texture_data_ptr; sampler->dynamic_state.static_state = static_state; diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 022bf92cb4..e41d6238ed 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -124,14 +124,9 @@ llvmpipe_texture_create(struct pipe_screen *_screen, pipe_reference_init(&lpt->base.reference, 1); lpt->base.screen = &screen->base; - /* XXX: The xlib state tracker is brain-dead and will request - * PIPE_FORMAT_Z16_UNORM no matter how much we tell it we don't support it. - */ - if (lpt->base.format == PIPE_FORMAT_Z16_UNORM) - lpt->base.format = PIPE_FORMAT_Z32_UNORM; - if (lpt->base.tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | - PIPE_TEXTURE_USAGE_PRIMARY)) { + PIPE_TEXTURE_USAGE_SCANOUT | + PIPE_TEXTURE_USAGE_SHARED)) { if (!llvmpipe_displaytarget_layout(screen, lpt)) goto fail; } @@ -148,43 +143,6 @@ llvmpipe_texture_create(struct pipe_screen *_screen, } -static struct pipe_texture * -llvmpipe_texture_blanket(struct pipe_screen * screen, - const struct pipe_texture *base, - const unsigned *stride, - struct pipe_buffer *buffer) -{ - /* FIXME */ -#if 0 - struct llvmpipe_texture *lpt; - assert(screen); - - /* Only supports one type */ - if (base->target != PIPE_TEXTURE_2D || - base->last_level != 0 || - base->depth0 != 1) { - return NULL; - } - - lpt = CALLOC_STRUCT(llvmpipe_texture); - if (!lpt) - return NULL; - - lpt->base = *base; - pipe_reference_init(&lpt->base.reference, 1); - lpt->base.screen = screen; - lpt->stride[0] = stride[0]; - - pipe_buffer_reference(&lpt->buffer, buffer); - - return &lpt->base; -#else - debug_printf("llvmpipe_texture_blanket() not implemented!"); - return NULL; -#endif -} - - static void llvmpipe_texture_destroy(struct pipe_texture *pt) { @@ -414,7 +372,6 @@ void llvmpipe_init_screen_texture_funcs(struct pipe_screen *screen) { screen->texture_create = llvmpipe_texture_create; - screen->texture_blanket = llvmpipe_texture_blanket; screen->texture_destroy = llvmpipe_texture_destroy; screen->get_tex_surface = llvmpipe_get_tex_surface; diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h index 87c905bc02..c511a01298 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.h +++ b/src/gallium/drivers/llvmpipe/lp_texture.h @@ -32,6 +32,10 @@ #include "pipe/p_state.h" +#define LP_MAX_TEXTURE_2D_LEVELS 13 /* 4K x 4K for now */ +#define LP_MAX_TEXTURE_3D_LEVELS 10 /* 512 x 512 x 512 for now */ + + struct pipe_context; struct pipe_screen; struct llvmpipe_context; @@ -42,8 +46,8 @@ struct llvmpipe_texture { struct pipe_texture base; - unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS]; - unsigned stride[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long level_offset[LP_MAX_TEXTURE_2D_LEVELS]; + unsigned stride[LP_MAX_TEXTURE_2D_LEVELS]; /** * Display target, for textures with the PIPE_TEXTURE_USAGE_DISPLAY_TARGET diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.c b/src/gallium/drivers/llvmpipe/lp_tile_image.c new file mode 100644 index 0000000000..c1980b316d --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_tile_image.c @@ -0,0 +1,126 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "lp_tile_soa.h" +#include "lp_tile_image.h" + + +#define BYTES_PER_TILE (TILE_SIZE * TILE_SIZE * 4) + + +/** + * Convert a tiled image into a linear image. + * \param src_stride source row stride in bytes (bytes per row of tiles) + * \param dst_stride dest row stride in bytes + */ +void +lp_tiled_to_linear(const uint8_t *src, + uint8_t *dst, + unsigned width, unsigned height, + enum pipe_format format, + unsigned src_stride, + unsigned dst_stride) +{ + const unsigned tiles_per_row = src_stride / BYTES_PER_TILE; + unsigned i, j; + + for (j = 0; j < height; j += TILE_SIZE) { + for (i = 0; i < width; i += TILE_SIZE) { + unsigned tile_offset = + ((j / TILE_SIZE) * tiles_per_row + i / TILE_SIZE); + unsigned byte_offset = tile_offset * BYTES_PER_TILE; + const uint8_t *src_tile = src + byte_offset; + + lp_tile_write_4ub(format, + src_tile, + dst, + dst_stride, + i, j, TILE_SIZE, TILE_SIZE); + } + } +} + + +/** + * Convert a linear image into a tiled image. + * \param src_stride source row stride in bytes + * \param dst_stride dest row stride in bytes (bytes per row of tiles) + */ +void +lp_linear_to_tiled(const uint8_t *src, + uint8_t *dst, + unsigned width, unsigned height, + enum pipe_format format, + unsigned src_stride, + unsigned dst_stride) +{ + const unsigned tiles_per_row = dst_stride / BYTES_PER_TILE; + unsigned i, j; + + for (j = 0; j < height; j += TILE_SIZE) { + for (i = 0; i < width; i += TILE_SIZE) { + unsigned tile_offset = + ((j / TILE_SIZE) * tiles_per_row + i / TILE_SIZE); + unsigned byte_offset = tile_offset * BYTES_PER_TILE; + uint8_t *dst_tile = dst + byte_offset; + + lp_tile_read_4ub(format, + dst_tile, + src, + src_stride, + i, j, TILE_SIZE, TILE_SIZE); + } + } +} + + +/** + * For testing only. + */ +void +test_tiled_linear_conversion(uint8_t *data, + enum pipe_format format, + unsigned width, unsigned height, + unsigned stride) +{ + /* size in tiles */ + unsigned wt = (width + TILE_SIZE - 1) / TILE_SIZE; + unsigned ht = (height + TILE_SIZE - 1) / TILE_SIZE; + + uint8_t *tiled = malloc(wt * ht * TILE_SIZE * TILE_SIZE * 4); + + unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4; + + lp_linear_to_tiled(data, tiled, width, height, format, + stride, tiled_stride); + + lp_tiled_to_linear(tiled, data, width, height, format, + tiled_stride, stride); + + free(tiled); +} + diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.h b/src/gallium/drivers/llvmpipe/lp_tile_image.h new file mode 100644 index 0000000000..60d472e8c5 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_tile_image.h @@ -0,0 +1,57 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef LP_TILE_IMAGE_H +#define LP_TILE_IMAGE_H + + +void +lp_tiled_to_linear(const uint8_t *src, + uint8_t *dst, + unsigned width, unsigned height, + enum pipe_format format, + unsigned src_stride, + unsigned dst_stride); + + +void +lp_linear_to_tiled(const uint8_t *src, + uint8_t *dst, + unsigned width, unsigned height, + enum pipe_format format, + unsigned src_stride, + unsigned dst_stride); + + +void +test_tiled_linear_conversion(uint8_t *data, + enum pipe_format format, + unsigned width, unsigned height, + unsigned stride); + + +#endif /* LP_TILE_IMAGE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index 5d53689a3d..00b8d4fc38 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -45,10 +45,10 @@ sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '../../auxiliary/u from u_format_access import * -def generate_format_read(format, dst_type, dst_native_type, dst_suffix): +def generate_format_read(format, dst_channel, dst_native_type, dst_suffix): '''Generate the function to read pixels from a particular format''' - name = short_name(format) + name = format.short_name() src_native_type = native_type(format) @@ -64,11 +64,11 @@ def generate_format_read(format, dst_type, dst_native_type, dst_suffix): names = ['']*4 if format.colorspace == 'rgb': for i in range(4): - swizzle = format.out_swizzle[i] + swizzle = format.swizzles[i] if swizzle < 4: names[swizzle] += 'rgba'[i] elif format.colorspace == 'zs': - swizzle = format.out_swizzle[0] + swizzle = format.swizzles[0] if swizzle < 4: names[swizzle] = 'z' else: @@ -76,48 +76,49 @@ def generate_format_read(format, dst_type, dst_native_type, dst_suffix): else: assert False - if format.layout == ARITH: - print ' %s pixel = *src_pixel++;' % src_native_type - shift = 0; - for i in range(4): - src_type = format.in_types[i] - width = src_type.size - if names[i]: - value = 'pixel' - mask = (1 << width) - 1 - if shift: - value = '(%s >> %u)' % (value, shift) - if shift + width < format.block_size(): - value = '(%s & 0x%x)' % (value, mask) - value = conversion_expr(src_type, dst_type, dst_native_type, value) - print ' %s %s = %s;' % (dst_native_type, names[i], value) - shift += width - elif format.layout == ARRAY: - for i in range(4): - src_type = format.in_types[i] - if names[i]: - value = '(*src_pixel++)' - value = conversion_expr(src_type, dst_type, dst_native_type, value) - print ' %s %s = %s;' % (dst_native_type, names[i], value) + if format.layout == PLAIN: + if not format.is_array(): + print ' %s pixel = *src_pixel++;' % src_native_type + shift = 0; + for i in range(4): + src_channel = format.channels[i] + width = src_channel.size + if names[i]: + value = 'pixel' + mask = (1 << width) - 1 + if shift: + value = '(%s >> %u)' % (value, shift) + if shift + width < format.block_size(): + value = '(%s & 0x%x)' % (value, mask) + value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) + print ' %s %s = %s;' % (dst_native_type, names[i], value) + shift += width + else: + for i in range(4): + src_channel = format.channels[i] + if names[i]: + value = '(*src_pixel++)' + value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) + print ' %s %s = %s;' % (dst_native_type, names[i], value) else: assert False for i in range(4): if format.colorspace == 'rgb': - swizzle = format.out_swizzle[i] + swizzle = format.swizzles[i] if swizzle < 4: value = names[swizzle] elif swizzle == SWIZZLE_0: value = '0' elif swizzle == SWIZZLE_1: - value = '1' + value = get_one(dst_channel) else: assert False elif format.colorspace == 'zs': if i < 3: value = 'z' else: - value = '1' + value = get_one(dst_channel) else: assert False print ' TILE_PIXEL(dst, x, y, %u) = %s; /* %s */' % (i, value, 'rgba'[i]) @@ -129,31 +130,16 @@ def generate_format_read(format, dst_type, dst_native_type, dst_suffix): print -def compute_inverse_swizzle(format): - '''Return an array[4] of inverse swizzle terms''' - inv_swizzle = [None]*4 - if format.colorspace == 'rgb': - for i in range(4): - swizzle = format.out_swizzle[i] - if swizzle < 4: - inv_swizzle[swizzle] = i - elif format.colorspace == 'zs': - swizzle = format.out_swizzle[0] - if swizzle < 4: - inv_swizzle[swizzle] = 0 - return inv_swizzle - - -def pack_rgba(format, src_type, r, g, b, a): +def pack_rgba(format, src_channel, r, g, b, a): """Return an expression for packing r, g, b, a into a pixel of the given format. Ex: '(b << 24) | (g << 16) | (r << 8) | (a << 0)' """ assert format.colorspace == 'rgb' - inv_swizzle = compute_inverse_swizzle(format) + inv_swizzle = format.inv_swizzles() shift = 0 expr = None for i in range(4): - # choose r, g, b, or a depending on the inverse swizzle term + # choose r, g, b, or a depending on the inverse swizzle term if inv_swizzle[i] == 0: value = r elif inv_swizzle[i] == 1: @@ -166,25 +152,25 @@ def pack_rgba(format, src_type, r, g, b, a): value = None if value: - dst_type = format.in_types[i] + dst_channel = format.channels[i] dst_native_type = native_type(format) - value = conversion_expr(src_type, dst_type, dst_native_type, value) + value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) term = "((%s) << %d)" % (value, shift) if expr: expr = expr + " | " + term else: expr = term - width = format.in_types[i].size + width = format.channels[i].size shift = shift + width return expr -def emit_unrolled_write_code(format, src_type): +def emit_unrolled_write_code(format, src_channel): '''Emit code for writing a block based on unrolled loops. This is considerably faster than the TILE_PIXEL-based code below. ''' - dst_native_type = native_type(format) + dst_native_type = 'uint%u_t' % format.block_size() print ' const unsigned dstpix_stride = dst_stride / %d;' % format.stride() print ' %s *dstpix = (%s *) dst;' % (dst_native_type, dst_native_type) print ' unsigned int qx, qy, i;' @@ -199,8 +185,8 @@ def emit_unrolled_write_code(format, src_type): print ' const uint8_t *a = src + 3 * TILE_C_STRIDE;' print ' (void) r; (void) g; (void) b; (void) a; /* silence warnings */' print ' for (i = 0; i < TILE_C_STRIDE; i += 2) {' - print ' const uint32_t pixel0 = %s;' % pack_rgba(format, src_type, "r[i+0]", "g[i+0]", "b[i+0]", "a[i+0]") - print ' const uint32_t pixel1 = %s;' % pack_rgba(format, src_type, "r[i+1]", "g[i+1]", "b[i+1]", "a[i+1]") + print ' const uint32_t pixel0 = %s;' % pack_rgba(format, src_channel, "r[i+0]", "g[i+0]", "b[i+0]", "a[i+0]") + print ' const uint32_t pixel1 = %s;' % pack_rgba(format, src_channel, "r[i+1]", "g[i+1]", "b[i+1]", "a[i+1]") print ' const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);' print ' dstpix[offset + 0] = pixel0;' print ' dstpix[offset + 1] = pixel1;' @@ -210,11 +196,11 @@ def emit_unrolled_write_code(format, src_type): print ' }' -def emit_tile_pixel_write_code(format, src_type): +def emit_tile_pixel_write_code(format, src_channel): '''Emit code for writing a block based on the TILE_PIXEL macro.''' dst_native_type = native_type(format) - inv_swizzle = compute_inverse_swizzle(format) + inv_swizzle = format.inv_swizzles() print ' unsigned x, y;' print ' uint8_t *dst_row = dst + y0*dst_stride;' @@ -222,27 +208,28 @@ def emit_tile_pixel_write_code(format, src_type): print ' %s *dst_pixel = (%s *)(dst_row + x0*%u);' % (dst_native_type, dst_native_type, format.stride()) print ' for (x = 0; x < w; ++x) {' - if format.layout == ARITH: - print ' %s pixel = 0;' % dst_native_type - shift = 0; - for i in range(4): - dst_type = format.in_types[i] - width = dst_type.size - if inv_swizzle[i] is not None: - value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i] - value = conversion_expr(src_type, dst_type, dst_native_type, value) - if shift: - value = '(%s << %u)' % (value, shift) - print ' pixel |= %s;' % value - shift += width - print ' *dst_pixel++ = pixel;' - elif format.layout == ARRAY: - for i in range(4): - dst_type = format.in_types[i] - if inv_swizzle[i] is not None: - value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i] - value = conversion_expr(src_type, dst_type, dst_native_type, value) - print ' *dst_pixel++ = %s;' % value + if format.layout == PLAIN: + if not format.is_array(): + print ' %s pixel = 0;' % dst_native_type + shift = 0; + for i in range(4): + dst_channel = format.channels[i] + width = dst_channel.size + if inv_swizzle[i] is not None: + value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i] + value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) + if shift: + value = '(%s << %u)' % (value, shift) + print ' pixel |= %s;' % value + shift += width + print ' *dst_pixel++ = pixel;' + else: + for i in range(4): + dst_channel = format.channels[i] + if inv_swizzle[i] is not None: + value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i] + value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) + print ' *dst_pixel++ = %s;' % value else: assert False @@ -251,28 +238,33 @@ def emit_tile_pixel_write_code(format, src_type): print ' }' -def generate_format_write(format, src_type, src_native_type, src_suffix): +def generate_format_write(format, src_channel, src_native_type, src_suffix): '''Generate the function to write pixels to a particular format''' - name = short_name(format) + name = format.short_name() print 'static void' print 'lp_tile_%s_write_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type) print '{' - if format.layout == ARITH and format.colorspace == 'rgb': - emit_unrolled_write_code(format, src_type) + if format.layout == PLAIN \ + and format.colorspace == 'rgb' \ + and format.block_size() <= 32 \ + and format.is_pot() \ + and not format.is_mixed() \ + and format.channels[0].type == UNSIGNED: + emit_unrolled_write_code(format, src_channel) else: - emit_tile_pixel_write_code(format, src_type) + emit_tile_pixel_write_code(format, src_channel) print '}' print -def generate_read(formats, dst_type, dst_native_type, dst_suffix): +def generate_read(formats, dst_channel, dst_native_type, dst_suffix): '''Generate the dispatch function to read pixels from any format''' for format in formats: if is_format_supported(format): - generate_format_read(format, dst_type, dst_native_type, dst_suffix) + generate_format_read(format, dst_channel, dst_native_type, dst_suffix) print 'void' print 'lp_tile_read_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (dst_suffix, dst_native_type) @@ -282,7 +274,7 @@ def generate_read(formats, dst_type, dst_native_type, dst_suffix): for format in formats: if is_format_supported(format): print ' case %s:' % format.name - print ' func = &lp_tile_%s_read_%s;' % (short_name(format), dst_suffix) + print ' func = &lp_tile_%s_read_%s;' % (format.short_name(), dst_suffix) print ' break;' print ' default:' print ' debug_printf("unsupported format\\n");' @@ -293,12 +285,12 @@ def generate_read(formats, dst_type, dst_native_type, dst_suffix): print -def generate_write(formats, src_type, src_native_type, src_suffix): +def generate_write(formats, src_channel, src_native_type, src_suffix): '''Generate the dispatch function to write pixels to any format''' for format in formats: if is_format_supported(format): - generate_format_write(format, src_type, src_native_type, src_suffix) + generate_format_write(format, src_channel, src_native_type, src_suffix) print 'void' print 'lp_tile_write_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (src_suffix, src_native_type) @@ -309,7 +301,7 @@ def generate_write(formats, src_type, src_native_type, src_suffix): for format in formats: if is_format_supported(format): print ' case %s:' % format.name - print ' func = &lp_tile_%s_write_%s;' % (short_name(format), src_suffix) + print ' func = &lp_tile_%s_write_%s;' % (format.short_name(), src_suffix) print ' break;' print ' default:' print ' debug_printf("unsupported format\\n");' @@ -359,12 +351,12 @@ def main(): generate_clamp() - type = Type(UNSIGNED, True, 8) + channel = Channel(UNSIGNED, True, 8) native_type = 'uint8_t' suffix = '4ub' - generate_read(formats, type, native_type, suffix) - generate_write(formats, type, native_type, suffix) + generate_read(formats, channel, native_type, suffix) + generate_write(formats, channel, native_type, suffix) if __name__ == '__main__': |