From 0639765b2850739af1678f10fc0c5706d5827776 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 16 Apr 2010 09:10:54 -0600 Subject: Merge the lp-surface-tiling branch into master. This branch implemented dual representations of texture/drawing surfaces: one in the conventional linear layout and the other the tiled layout which is used by the fragment shader pipe. Per-tile flags indicate the layout of each image tile. In many situations this lets us avoid converting image data between the two layouts. Squashed commit of the following: commit 563a7e3cc552fdcfcaf9ac0d4b1683c3ba2ae732 Author: Brian Paul Date: Thu Apr 8 14:48:21 2010 -0600 llvmpipe: convert points/lines to triangles with draw module This isn't the most efficient way to render points/lines but it allows us to run more tests. commit a8aa763e8a717533f2b13bb6ea53cbccbede68c9 Author: Brian Paul Date: Thu Apr 8 14:47:28 2010 -0600 llvmpipe: call llvmpipe_get_texture_tile() for depth/stencil The returned pointer isn't used, but the tile status/layout info gets updated. Helps to fix glReadPixels(DEPTH / STENCIL). commit 463bc64af266194acbea71cd52e26a79b8c8a260 Author: Brian Paul Date: Thu Apr 8 10:58:48 2010 -0600 llvmpipe: add store_color to debug cmd_names list commit 784cc73fb334a9d7b7c93cbd8a1445cdf742ff58 Author: Brian Paul Date: Thu Apr 8 10:57:43 2010 -0600 llvmpipe: fix debug build commit 792c93171ec075664f55720ffed397ac2834a4fc Author: Brian Paul Date: Thu Apr 8 10:49:01 2010 -0600 llvmpipe: fix cube mapping commit 882b1035db88c3dd8aebe28dc971ac30a9ee39e3 Author: Brian Paul Date: Thu Apr 8 09:53:30 2010 -0600 llvmpipe: remove some older/unused code commit b807d32b23145301e8842824664d9f06b9c5502e Author: Brian Paul Date: Thu Apr 8 09:29:50 2010 -0600 llvmpipe: silence warning commit 7b337e64fec92836ccdf9d96216289dd58418e35 Author: Brian Paul Date: Wed Apr 7 17:06:08 2010 -0600 llvmpipe: clean-up, comments in lp_surface_copy() commit c52fa36f249cc652fa8d5fdd94d6574127c08c41 Author: Brian Paul Date: Wed Apr 7 16:51:42 2010 -0600 llvmpipe: overhaul tiled/linear memory management Now we keep per-tile layout info (linear vs. tiled (or neither or both) and convert from one layout to the other on demand. commit 4a50ccfd470547c9be0704005818a87014e9c0e9 Author: Brian Paul Date: Wed Apr 7 16:51:27 2010 -0600 llvmpipe: added tile read/write counters commit b7d0ea9c687ac8773b083791623826fa604adf21 Author: Brian Paul Date: Mon Apr 5 14:54:04 2010 -0600 llvmpipe: rename some functions commit ee45c6e5b95cbd3c8cccc9aa4d45d8aef11e20c4 Author: Brian Paul Date: Mon Apr 5 14:42:15 2010 -0600 llvmpipe: re-org some get block/tile pointer code commit 26ce97c16c0b6520ff1538803baa772d8c3b1280 Author: Brian Paul Date: Mon Apr 5 14:34:13 2010 -0600 llvmpipe: disable bad assertions commit 5c670481248c4d46f87f13bf3af5655925e7002d Author: Brian Paul Date: Fri Apr 2 16:36:11 2010 -0600 llvmpipe: add a special-case optimization to lp_surface_copy() Be more efficient when copying tiled image to linear image. Before, the fallback path was always converting the whole source image to linear. Now we can convert just a sub region. commit faa684645e64d6024b3a11e4e08da825e8220b2e Author: Brian Paul Date: Fri Apr 2 16:15:16 2010 -0600 llvmpipe: assorted texture and tile/line conversion code change s The tiled/linear conversion functions take x/y positions now to allow converting only sub-regions. More texture-related helper functions. commit baad81ec5318d44bfac1e37c7643afc0836607bb Author: Brian Paul Date: Tue Mar 30 13:18:40 2010 -0600 llvmpipe: convert tiled->linear upon PIPE_FLUSH_SWAPBUFFERS If we know we're about to do a swapbuffers we should immediately convert the tiled color tiles to linear instead of later in llvmpipe_texture_unmap() since we can take advantage of threading/ parallelism here. commit 928dd41256811daeddb7506a49a34dbad04beaf8 Author: Brian Paul Date: Tue Mar 30 09:16:58 2010 -0600 llvmpipe: polish-up the llvmpipe_flush() code commit dd6014abcf86c517d159b8175e0eaeb167ea2ef6 Author: Brian Paul Date: Tue Mar 30 09:15:17 2010 -0600 llvmpipe: SETUP_x enum clean-up commit 0b1ce6da2b28a41f3389685ab93e10b43c950f5d Author: Brian Paul Date: Fri Mar 26 10:43:37 2010 -0600 llvmpipe: remove unused vars commit 4562663480f88162ed4452cb05569eecb67f9f39 Author: Brian Paul Date: Fri Mar 26 10:31:55 2010 -0600 llvmpipe: cope with non-existant color/depth buffers The fragment jit functions always grab these pointers, even if they're not used. commit df4329edbaf204ed501f1eac0698b8198178f9af Author: Brian Paul Date: Thu Mar 25 15:20:15 2010 -0600 llvmpipe: do all render target surface mapping/unmapping in the rast code commit 3d0c25d5ba8b8f61e8366d4c97324e45d526ff41 Author: Brian Paul Date: Thu Mar 25 14:31:21 2010 -0600 llvmpipe: map z/stencil buffer on demand like color buffers Plus lots of code clean-up and loose ends taken care of. commit c3b6fddd788aef09b4b84b843b7b1272231151e8 Author: Brian Paul Date: Thu Mar 25 13:15:03 2010 -0600 llvmpipe: remove unused write_zstencil field commit 63374d97836926a6357e9d6dd24a509a8e155c56 Author: Brian Paul Date: Thu Mar 25 09:45:59 2010 -0600 llvmpipe: add missing lp_rast_end() call Fixes crash on window resize when LP_NUM_THREADS=0. commit 92fe9952161cc06f6edc58778e9e5a8b9ea447dc Author: Brian Paul Date: Wed Mar 24 10:15:19 2010 -0600 llvmpipe: add tiled/linear conversion for 16-bit Z images commit 6605fa28c147f30df351da0e4413cab33e4db5da Author: Brian Paul Date: Tue Mar 23 16:06:41 2010 -0600 llvmpipe: implement tiled/linear conversion for Z/stencil images commit 804528d84ffa292ef9d49d3666cdd3fa099ff3ff Author: Brian Paul Date: Tue Mar 23 16:05:45 2010 -0600 llvmpipe: added texture stride comment commit 66a88c012edf670c4ac887a912f02dcff93266dd Author: Brian Paul Date: Tue Mar 23 16:04:07 2010 -0600 llvmpipe: remove unused vars commit e2ca8d1328316dc8b36d5f688c16d109e49a6870 Author: Brian Paul Date: Mon Mar 22 18:53:11 2010 -0600 llvmpipe: checkpoint WIP: overhaul texture/surface mapping Conversion between tiled and linear surfaces is working everywhere now. The LP_TEXTURE_READ/READ_WRITE/WRITE_ALL flags let us avoid unnecessary image layout conversions. Still some loose ends, temporary/debug code, etc. Need to implement tiled/linear conversion for depth/stencil images. commit f2730a03839ee8984c1f537b7cbebba24961397a Author: Brian Paul Date: Mon Mar 22 14:41:58 2010 -0600 llvmpipe: rename/repurpose lp_rast_store_color() commit e192a47552c5d20d2caef452ca7697e2cd852c9b Author: Brian Paul Date: Mon Mar 22 14:38:51 2010 -0600 llvmpipe: remove lp_rast_load_color() commit 3cff0bde4b4ab980e1c3e812700419091527c76b Author: Brian Paul Date: Mon Mar 22 14:11:38 2010 -0600 llvmpipe: remove/consolidate texture image code commit 3a2f08b6a550c69ef5e874f482be30252cbf8bfa Author: Brian Paul Date: Fri Mar 19 17:03:14 2010 -0600 llvmpipe: checkpoint WIP: directly render to tiled texture buffers We're now directly writing colors into the tiled texture image buffers. This is a checkpoint commit with lots of dead code and temporary hacks. Everything will get cleaned up eventually. commit c5ca987e03870849514d4e3c99af143722a09695 Author: Brian Paul Date: Fri Mar 19 16:41:14 2010 -0600 llvmpipe: refactor code, create tile_pixel_offset() commit 2133e8273e937cbac09cd7264d6ce53af9764ddb Author: Brian Paul Date: Fri Mar 19 14:55:11 2010 -0600 llvmpipe: pass LP_TEXTURE_LINEAR/TILED flags around commit b9b9d4b82b01f4588721fdc8444740f859b4a021 Author: Brian Paul Date: Fri Mar 19 14:51:05 2010 -0600 llvmpipe: checkpoint WIP: hanlde co-existing tiled/linear texture data Cube maps are temporarily broken, maybe other things. commit 4cd322e6889940b5f155fcb69041b685b9ef9273 Author: Brian Paul Date: Fri Mar 19 11:34:43 2010 -0600 progs/demos: add other modes/patterns to dissolve demo --- src/gallium/drivers/llvmpipe/lp_context.c | 4 + src/gallium/drivers/llvmpipe/lp_draw_arrays.c | 4 +- src/gallium/drivers/llvmpipe/lp_flush.c | 15 +- src/gallium/drivers/llvmpipe/lp_jit.c | 4 +- src/gallium/drivers/llvmpipe/lp_jit.h | 4 +- src/gallium/drivers/llvmpipe/lp_rast.c | 322 +++++++----- src/gallium/drivers/llvmpipe/lp_rast.h | 7 +- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 105 ++-- src/gallium/drivers/llvmpipe/lp_scene.c | 49 +- src/gallium/drivers/llvmpipe/lp_scene.h | 8 +- src/gallium/drivers/llvmpipe/lp_setup.c | 54 +- src/gallium/drivers/llvmpipe/lp_setup_context.h | 2 +- src/gallium/drivers/llvmpipe/lp_state_fs.c | 2 +- src/gallium/drivers/llvmpipe/lp_surface.c | 104 +++- src/gallium/drivers/llvmpipe/lp_texture.c | 648 ++++++++++++++++++++++-- src/gallium/drivers/llvmpipe/lp_texture.h | 100 +++- src/gallium/drivers/llvmpipe/lp_tile_image.c | 296 +++++++++-- src/gallium/drivers/llvmpipe/lp_tile_image.h | 18 +- src/gallium/drivers/llvmpipe/lp_tile_soa.h | 25 +- src/gallium/drivers/llvmpipe/lp_tile_soa.py | 4 + 20 files changed, 1428 insertions(+), 347 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index c7acb0c545..e63720c99a 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -184,6 +184,10 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) draw_install_aaline_stage(llvmpipe->draw, &llvmpipe->pipe); draw_install_aapoint_stage(llvmpipe->draw, &llvmpipe->pipe); + /* convert points and lines into triangles: */ + draw_wide_point_threshold(llvmpipe->draw, 0.0); + draw_wide_line_threshold(llvmpipe->draw, 0.0); + #if USE_DRAW_STAGE_PSTIPPLE /* Do polygon stipple w/ texture map + frag prog? */ draw_install_pstipple_stage(llvmpipe->draw, &llvmpipe->pipe); diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index a9b8ba258b..86525eea9e 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -74,13 +74,13 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, * Map vertex buffers */ for (i = 0; i < lp->num_vertex_buffers; i++) { - void *buf = llvmpipe_resource(lp->vertex_buffer[i].buffer)->data; + void *buf = llvmpipe_resource_data(lp->vertex_buffer[i].buffer); draw_set_mapped_vertex_buffer(draw, i, buf); } /* Map index buffer, if present */ if (indexBuffer) { - void *mapped_indexes = llvmpipe_resource(indexBuffer)->data; + void *mapped_indexes = llvmpipe_resource_data(indexBuffer); draw_set_mapped_element_buffer_range(draw, indexSize, min_index, max_index, diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index f1533f8f70..a248142b1d 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -37,6 +37,10 @@ #include "lp_setup.h" +/** + * \param flags bitmask of PIPE_FLUSH_x flags + * \param fence if non-null, returns pointer to a fench which can be waited on + */ void llvmpipe_flush( struct pipe_context *pipe, unsigned flags, @@ -60,14 +64,9 @@ llvmpipe_flush( struct pipe_context *pipe, } } - /* XXX the lp_setup_flush(flags) param is not a bool, and it's ignored - * at this time! - */ - if (flags & PIPE_FLUSH_SWAPBUFFERS) { - lp_setup_flush( llvmpipe->setup, FALSE ); - } - else if (flags & PIPE_FLUSH_RENDER_CACHE) { - lp_setup_flush( llvmpipe->setup, TRUE ); + /* ask the setup module to flush */ + if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_RENDER_CACHE)) { + lp_setup_flush(llvmpipe->setup, flags); } /* Enable to dump BMPs of the color/depth buffers each frame */ diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 2f804bb11c..7e8a117cc8 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -58,10 +58,10 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) elem_types[LP_JIT_TEXTURE_DEPTH] = LLVMInt32Type(); elem_types[LP_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type(); elem_types[LP_JIT_TEXTURE_ROW_STRIDE] = - LLVMArrayType(LLVMInt32Type(), LP_MAX_TEXTURE_2D_LEVELS); + LLVMArrayType(LLVMInt32Type(), LP_MAX_TEXTURE_LEVELS); elem_types[LP_JIT_TEXTURE_DATA] = LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0), - LP_MAX_TEXTURE_2D_LEVELS); + LP_MAX_TEXTURE_LEVELS); texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 4930ff02e6..3790a71eab 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -51,8 +51,8 @@ struct lp_jit_texture uint32_t height; uint32_t depth; uint32_t last_level; - uint32_t row_stride[LP_MAX_TEXTURE_2D_LEVELS]; - const void *data[LP_MAX_TEXTURE_2D_LEVELS]; + uint32_t row_stride[LP_MAX_TEXTURE_LEVELS]; + const void *data[LP_MAX_TEXTURE_LEVELS]; }; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index fccc63c28f..4574f41145 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -42,15 +42,15 @@ #include "lp_scene.h" -/* Begin rasterizing a scene: +/** + * Begin rasterizing a scene. + * Called once per scene by one thread. */ -static boolean +static void lp_rast_begin( struct lp_rasterizer *rast, struct lp_scene *scene ) { const struct pipe_framebuffer_state *fb = &scene->fb; - boolean write_color = fb->nr_cbufs != 0; - boolean write_zstencil = fb->zsbuf != NULL; int i; rast->curr_scene = scene; @@ -58,59 +58,147 @@ lp_rast_begin( struct lp_rasterizer *rast, LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); rast->state.nr_cbufs = scene->fb.nr_cbufs; - rast->state.write_zstencil = write_zstencil; - rast->state.write_color = write_color; for (i = 0; i < rast->state.nr_cbufs; i++) { struct pipe_surface *cbuf = scene->fb.cbufs[i]; - rast->cbuf[i].map = scene->cbuf_map[i]; rast->cbuf[i].format = cbuf->texture->format; - rast->cbuf[i].width = cbuf->width; - rast->cbuf[i].height = cbuf->height; - rast->cbuf[i].stride = llvmpipe_resource_stride(cbuf->texture, cbuf->level); + rast->cbuf[i].tiles_per_row = align(cbuf->width, TILE_SIZE) / TILE_SIZE; + rast->cbuf[i].blocksize = + util_format_get_blocksize(cbuf->texture->format); + rast->cbuf[i].map = llvmpipe_resource_map(cbuf->texture, + cbuf->face, + cbuf->level, + cbuf->zslice, + LP_TEX_USAGE_READ_WRITE, + LP_TEX_LAYOUT_NONE); } - if (write_zstencil) { + if (fb->zsbuf) { struct pipe_surface *zsbuf = scene->fb.zsbuf; - rast->zsbuf.map = scene->zsbuf_map; rast->zsbuf.stride = llvmpipe_resource_stride(zsbuf->texture, zsbuf->level); rast->zsbuf.blocksize = util_format_get_blocksize(zsbuf->texture->format); + + rast->zsbuf.map = llvmpipe_resource_map(zsbuf->texture, + zsbuf->face, + zsbuf->level, + zsbuf->zslice, + LP_TEX_USAGE_READ_WRITE, + LP_TEX_LAYOUT_NONE); + assert(rast->zsbuf.map); } lp_scene_bin_iter_begin( scene ); - - return TRUE; } static void lp_rast_end( struct lp_rasterizer *rast ) { - int i; - - lp_scene_reset( rast->curr_scene ); + struct lp_scene *scene = rast->curr_scene; + unsigned i; - for (i = 0; i < rast->state.nr_cbufs; i++) + /* Unmap color buffers */ + for (i = 0; i < rast->state.nr_cbufs; i++) { + struct pipe_surface *cbuf = scene->fb.cbufs[i]; + llvmpipe_resource_unmap(cbuf->texture, + cbuf->face, + cbuf->level, + cbuf->zslice); rast->cbuf[i].map = NULL; + } + + /* Unmap z/stencil buffer */ + if (rast->zsbuf.map) { + struct pipe_surface *zsbuf = scene->fb.zsbuf; + llvmpipe_resource_unmap(zsbuf->texture, + zsbuf->face, + zsbuf->level, + zsbuf->zslice); + rast->zsbuf.map = NULL; + } + + lp_scene_reset( rast->curr_scene ); - rast->zsbuf.map = NULL; rast->curr_scene = NULL; + + if (0) + printf("Post render scene: tile read: %d tile write: %d\n", + tile_read_count, tile_write_count); } + /** * Begining rasterization of a tile. * \param x window X position of the tile, in pixels * \param y window Y position of the tile, in pixels */ static void -lp_rast_start_tile(struct lp_rasterizer_task *task, +lp_rast_tile_begin(struct lp_rasterizer_task *task, unsigned x, unsigned y) { + struct lp_rasterizer *rast = task->rast; + struct lp_scene *scene = rast->curr_scene; + enum lp_texture_usage usage; + unsigned buf; + LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); + assert(x % TILE_SIZE == 0); + assert(y % TILE_SIZE == 0); + task->x = x; task->y = y; + + if (scene->has_color_clear) + usage = LP_TEX_USAGE_WRITE_ALL; + else + usage = LP_TEX_USAGE_READ_WRITE; + + /* get pointers to color tile(s) */ + for (buf = 0; buf < rast->state.nr_cbufs; buf++) { + struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf]; + struct llvmpipe_resource *lpt; + assert(cbuf); + lpt = llvmpipe_resource(cbuf->texture); + task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt, + cbuf->face, + cbuf->level, + usage, + x, y); + assert(task->color_tiles[buf]); + } + + /* get pointer to depth/stencil tile */ + { + struct pipe_surface *zsbuf = rast->curr_scene->fb.zsbuf; + if (zsbuf) { + struct llvmpipe_resource *lpt = llvmpipe_resource(zsbuf->texture); + + if (scene->has_depth_clear) + usage = LP_TEX_USAGE_WRITE_ALL; + else + usage = LP_TEX_USAGE_READ_WRITE; + + /* "prime" the tile: convert data from linear to tiled if necessary + * and update the tile's layout info. + */ + (void) llvmpipe_get_texture_tile(lpt, + zsbuf->face, + zsbuf->level, + usage, + x, y); + /* Get actual pointer to the tile data. Note that depth/stencil + * data is tiled differently than color data. + */ + task->depth_tile = lp_rast_get_depth_block_pointer(rast, x, y); + + assert(task->depth_tile); + } + else { + task->depth_tile = NULL; + } + } } @@ -124,7 +212,7 @@ lp_rast_clear_color(struct lp_rasterizer_task *task, { struct lp_rasterizer *rast = task->rast; const uint8_t *clear_color = arg.clear_color; - uint8_t **color_tile = task->tile.color; + unsigned i; LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__, @@ -138,7 +226,8 @@ lp_rast_clear_color(struct lp_rasterizer_task *task, clear_color[2] == clear_color[3]) { /* clear to grayscale value {x, x, x, x} */ for (i = 0; i < rast->state.nr_cbufs; i++) { - memset(color_tile[i], clear_color[0], TILE_SIZE * TILE_SIZE * 4); + uint8_t *ptr = task->color_tiles[i]; + memset(ptr, clear_color[0], TILE_SIZE * TILE_SIZE * 4); } } else { @@ -149,8 +238,9 @@ lp_rast_clear_color(struct lp_rasterizer_task *task, */ const unsigned chunk = TILE_SIZE / 4; for (i = 0; i < rast->state.nr_cbufs; i++) { - uint8_t *c = color_tile[i]; + uint8_t *c = task->color_tiles[i]; unsigned j; + for (j = 0; j < 4 * TILE_SIZE; j++) { memset(c, clear_color[0], chunk); c += chunk; @@ -161,7 +251,6 @@ lp_rast_clear_color(struct lp_rasterizer_task *task, memset(c, clear_color[3], chunk); c += chunk; } - assert(c - color_tile[i] == TILE_SIZE * TILE_SIZE * 4); } } @@ -178,23 +267,15 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { struct lp_rasterizer *rast = task->rast; - const unsigned tile_x = task->x; - const unsigned tile_y = task->y; const unsigned height = TILE_SIZE / TILE_VECTOR_HEIGHT; const unsigned width = TILE_SIZE * TILE_VECTOR_HEIGHT; - unsigned block_size = rast->zsbuf.blocksize; + const unsigned block_size = rast->zsbuf.blocksize; + const unsigned dst_stride = rast->zsbuf.stride * TILE_VECTOR_HEIGHT; uint8_t *dst; - unsigned dst_stride = rast->zsbuf.stride * TILE_VECTOR_HEIGHT; unsigned i, j; LP_DBG(DEBUG_RAST, "%s 0x%x\n", __FUNCTION__, arg.clear_zstencil); - /*assert(rast->zsbuf.map);*/ - if (!rast->zsbuf.map) - return; - - LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); - /* * Clear the aera of the swizzled depth/depth buffer matching this tile, in * stripes of TILE_VECTOR_HEIGHT x TILE_SIZE at a time. @@ -203,7 +284,9 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, * TILE_VECTOR_HEIGHT x TILE_VECTOR_WIDTH pixels have consecutive offsets. */ - dst = lp_rast_depth_pointer(rast, tile_x, tile_y); + dst = task->depth_tile; + + assert(dst == lp_rast_get_depth_block_pointer(rast, task->x, task->y)); switch (block_size) { case 1: @@ -236,32 +319,73 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, * Load tile color from the framebuffer surface. * This is a bin command called during bin processing. */ +#if 0 void lp_rast_load_color(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { struct lp_rasterizer *rast = task->rast; - const unsigned x = task->x, y = task->y; - unsigned i; + unsigned buf; + enum lp_texture_usage usage; LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y); - for (i = 0; i < rast->state.nr_cbufs; i++) { - if (x >= rast->cbuf[i].width || y >= rast->cbuf[i].height) - continue; + if (scene->has_color_clear) + usage = LP_TEX_USAGE_WRITE_ALL; + else + usage = LP_TEX_USAGE_READ_WRITE; + + /* Get pointers to color tile(s). + * This will convert linear data to tiled if needed. + */ + for (buf = 0; buf < rast->state.nr_cbufs; buf++) { + struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf]; + struct llvmpipe_texture *lpt; + assert(cbuf); + lpt = llvmpipe_texture(cbuf->texture); + task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt, + cbuf->face, + cbuf->level, + usage, + task->x, task->y); + assert(task->color_tiles[buf]); + } +} +#endif - lp_tile_read_4ub(rast->cbuf[i].format, - task->tile.color[i], - rast->cbuf[i].map, - rast->cbuf[i].stride, - x, y, - TILE_SIZE, TILE_SIZE); - LP_COUNT(nr_color_tile_load); +/** + * Convert the color tile from tiled to linear layout. + * This is generally only done when we're flushing the scene just prior to + * SwapBuffers. If we didn't do this here, we'd have to convert the entire + * tiled color buffer to linear layout in the llvmpipe_texture_unmap() + * function. It's better to do it here to take advantage of + * threading/parallelism. + * This is a bin command which is stored in all bins. + */ +void +lp_rast_store_color( struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + struct lp_rasterizer *rast = task->rast; + struct lp_scene *scene = rast->curr_scene; + unsigned buf; + + for (buf = 0; buf < rast->state.nr_cbufs; buf++) { + struct pipe_surface *cbuf = scene->fb.cbufs[buf]; + const unsigned face = cbuf->face, level = cbuf->level; + struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture); + /* this will convert the tiled data to linear if needed */ + (void) llvmpipe_get_texture_tile_linear(lpt, face,level, + LP_TEX_USAGE_READ, + task->x, task->y); } } +/** + * This is a bin command called during bin processing. + */ void lp_rast_set_state(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) @@ -275,7 +399,6 @@ lp_rast_set_state(struct lp_rasterizer_task *task, } - /** * Run the shader on all blocks in a tile. This is used when a tile is * completely contained inside a triangle. @@ -287,7 +410,6 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, { struct lp_rasterizer *rast = task->rast; const struct lp_rast_state *state = task->current_state; - struct lp_rast_tile *tile = &task->tile; const struct lp_rast_shader_inputs *inputs = arg.shade_tile; const unsigned tile_x = task->x, tile_y = task->y; unsigned x, y; @@ -299,19 +421,17 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, for (x = 0; x < TILE_SIZE; x += 4) { uint8_t *color[PIPE_MAX_COLOR_BUFS]; uint32_t *depth; - unsigned block_offset, i; - - /* offset of the 16x16 pixel block within the tile */ - block_offset = ((y / 4) * (16 * 16) + (x / 4) * 16); + unsigned i; /* color buffer */ for (i = 0; i < rast->state.nr_cbufs; i++) - color[i] = tile->color[i] + 4 * block_offset; + color[i] = lp_rast_get_color_block_pointer(task, i, + tile_x + x, tile_y + y); /* depth buffer */ - depth = lp_rast_depth_pointer(rast, tile_x + x, tile_y + y); + depth = lp_rast_get_depth_block_pointer(rast, tile_x + x, tile_y + y); - /* run shader */ + /* run shader on 4x4 block */ state->jit_function[RAST_WHOLE]( &state->jit_context, tile_x + x, tile_y + y, inputs->facing, @@ -330,6 +450,8 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, /** * Compute shading for a 4x4 block of pixels. * This is a bin command called during bin processing. + * \param x X position of quad in window coords + * \param y Y position of quad in window coords */ void lp_rast_shade_quads( struct lp_rasterizer_task *task, const struct lp_rast_shader_inputs *inputs, @@ -338,12 +460,9 @@ void lp_rast_shade_quads( struct lp_rasterizer_task *task, { const struct lp_rast_state *state = task->current_state; struct lp_rasterizer *rast = task->rast; - struct lp_rast_tile *tile = &task->tile; uint8_t *color[PIPE_MAX_COLOR_BUFS]; void *depth; unsigned i; - unsigned ix, iy; - int block_offset; assert(state); @@ -354,28 +473,23 @@ void lp_rast_shade_quads( struct lp_rasterizer_task *task, assert((x % 4) == 0); assert((y % 4) == 0); - ix = x % TILE_SIZE; - iy = y % TILE_SIZE; - - /* offset of the 16x16 pixel block within the tile */ - block_offset = ((iy / 4) * (16 * 16) + (ix / 4) * 16); - /* color buffer */ - for (i = 0; i < rast->state.nr_cbufs; i++) - color[i] = tile->color[i] + 4 * block_offset; + for (i = 0; i < rast->state.nr_cbufs; i++) { + color[i] = lp_rast_get_color_block_pointer(task, i, x, y); + assert(lp_check_alignment(color[i], 16)); + } /* depth buffer */ - depth = lp_rast_depth_pointer(rast, x, y); + depth = lp_rast_get_depth_block_pointer(rast, x, y); - assert(lp_check_alignment(tile->color[0], 16)); assert(lp_check_alignment(state->jit_context.blend_color, 16)); assert(lp_check_alignment(inputs->step[0], 16)); assert(lp_check_alignment(inputs->step[1], 16)); assert(lp_check_alignment(inputs->step[2], 16)); - /* run shader */ + /* run shader on 4x4 block */ state->jit_function[RAST_EDGE_TEST]( &state->jit_context, x, y, inputs->facing, @@ -445,39 +559,31 @@ outline_subtiles(uint8_t *tile) /** - * Write the rasterizer's color tile to the framebuffer. + * Called when we're done writing to a color tile. */ static void -lp_rast_store_color(struct lp_rasterizer_task *task) +lp_rast_tile_end(struct lp_rasterizer_task *task) { +#if DEBUG struct lp_rasterizer *rast = task->rast; - const unsigned x = task->x, y = task->y; - unsigned i; - - for (i = 0; i < rast->state.nr_cbufs; i++) { - if (x >= rast->cbuf[i].width) - continue; - - if (y >= rast->cbuf[i].height) - continue; + unsigned buf; - LP_DBG(DEBUG_RAST, "%s [%u] %d,%d\n", __FUNCTION__, - task->thread_index, x, y); + for (buf = 0; buf < rast->state.nr_cbufs; buf++) { + uint8_t *color = lp_rast_get_color_block_pointer(task, buf, + task->x, task->y); if (LP_DEBUG & DEBUG_SHOW_SUBTILES) - outline_subtiles(task->tile.color[i]); + outline_subtiles(color); else if (LP_DEBUG & DEBUG_SHOW_TILES) - outline_tile(task->tile.color[i]); - - lp_tile_write_4ub(rast->cbuf[i].format, - task->tile.color[i], - rast->cbuf[i].map, - rast->cbuf[i].stride, - x, y, - TILE_SIZE, TILE_SIZE); - - LP_COUNT(nr_color_tile_store); + outline_tile(color); } +#else + (void) outline_subtiles; +#endif + + /* debug */ + memset(task->color_tiles, 0, sizeof(task->color_tiles)); + task->depth_tile = NULL; } @@ -512,7 +618,7 @@ rasterize_bin(struct lp_rasterizer_task *task, struct cmd_block *block; unsigned k; - lp_rast_start_tile( task, x * TILE_SIZE, y * TILE_SIZE ); + lp_rast_tile_begin( task, x * TILE_SIZE, y * TILE_SIZE ); /* simply execute each of the commands in the block list */ for (block = commands->head; block; block = block->next) { @@ -521,10 +627,7 @@ rasterize_bin(struct lp_rasterizer_task *task, } } - /* Write the rasterizer's tiles to the framebuffer. - */ - if (task->rast->state.write_color) - lp_rast_store_color(task); + lp_rast_tile_end(task); /* Free data for this bin. */ @@ -539,12 +642,12 @@ static struct { const char *name; } cmd_names[] = { - RAST(load_color), RAST(clear_color), RAST(clear_zstencil), RAST(triangle), RAST(shade_tile), RAST(set_state), + RAST(store_color), RAST(fence), }; @@ -597,8 +700,7 @@ is_empty_bin( const struct cmd_bin *bin ) } for (i = 0; i < head->count; i++) - if (head->cmd[i] != lp_rast_load_color && - head->cmd[i] != lp_rast_set_state) { + if (head->cmd[i] != lp_rast_set_state) { return FALSE; } @@ -658,6 +760,9 @@ lp_rast_queue_scene( struct lp_rasterizer *rast, rasterize_scene( &rast->tasks[0], scene ); lp_scene_reset( scene ); + + lp_rast_end( rast ); + rast->curr_scene = NULL; } else { @@ -798,7 +903,7 @@ struct lp_rasterizer * lp_rast_create( void ) { struct lp_rasterizer *rast; - unsigned i, cbuf; + unsigned i; rast = CALLOC_STRUCT(lp_rasterizer); if(!rast) @@ -808,10 +913,6 @@ lp_rast_create( void ) for (i = 0; i < Elements(rast->tasks); i++) { struct lp_rasterizer_task *task = &rast->tasks[i]; - - for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ ) - task->tile.color[cbuf] = align_malloc(TILE_SIZE * TILE_SIZE * 4, 16); - task->rast = rast; task->thread_index = i; } @@ -829,12 +930,7 @@ lp_rast_create( void ) */ void lp_rast_destroy( struct lp_rasterizer *rast ) { - unsigned i, cbuf; - - for (i = 0; i < Elements(rast->tasks); i++) { - for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ ) - align_free(rast->tasks[i].tile.color[cbuf]); - } + unsigned i; /* Set exit_flag and signal each thread's work_ready semaphore. * Each thread will be woken up, notice that the exit_flag is set and diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index ae838f3fbe..a0ecb2fc47 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -217,9 +217,6 @@ void lp_rast_clear_color( struct lp_rasterizer_task *, void lp_rast_clear_zstencil( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); -void lp_rast_load_color( struct lp_rasterizer_task *, - const union lp_rast_cmd_arg ); - void lp_rast_set_state( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); @@ -232,4 +229,8 @@ void lp_rast_shade_tile( struct lp_rasterizer_task *, void lp_rast_fence( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); +void lp_rast_store_color( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + + #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 6ee9bcaae3..8bf2b92a6a 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -32,6 +32,8 @@ #include "util/u_format.h" #include "gallivm/lp_bld_debug.h" #include "lp_rast.h" +#include "lp_scene.h" +#include "lp_texture.h" #include "lp_tile_soa.h" @@ -41,25 +43,16 @@ struct lp_rasterizer; -/** - * A tile's color and depth memory. - * We can choose whatever layout for the internal tile storage we prefer. - */ -struct lp_rast_tile -{ - uint8_t *color[PIPE_MAX_COLOR_BUFS]; -}; - - /** * Per-thread rasterization state */ struct lp_rasterizer_task { - struct lp_rast_tile tile; /** Tile color/z/stencil memory */ - unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */ + uint8_t *color_tiles[PIPE_MAX_COLOR_BUFS]; + uint8_t *depth_tile; + const struct lp_rast_state *current_state; /** "back" pointer */ @@ -86,9 +79,8 @@ struct lp_rasterizer */ struct { void *map; - unsigned stride; - unsigned width; - unsigned height; + unsigned tiles_per_row; + unsigned blocksize; enum pipe_format format; } cbuf[PIPE_MAX_COLOR_BUFS]; @@ -100,8 +92,6 @@ struct lp_rasterizer struct { unsigned nr_cbufs; - boolean write_color; - boolean write_zstencil; unsigned clear_color; unsigned clear_depth; char clear_stencil; @@ -140,18 +130,23 @@ void lp_rast_shade_quads( struct lp_rasterizer_task *task, /** - * Get the pointer to the depth buffer for a block. + * Get the pointer to a 4x4 depth/stencil block. + * We'll map the z/stencil buffer on demand here. + * Note that this may be called even when there's no z/stencil buffer - return + * NULL in that case. * \param x, y location of 4x4 block in window coords */ static INLINE void * -lp_rast_depth_pointer( struct lp_rasterizer *rast, - unsigned x, unsigned y ) +lp_rast_get_depth_block_pointer(const struct lp_rasterizer *rast, + unsigned x, unsigned y) { - void * depth; + void *depth; assert((x % TILE_VECTOR_WIDTH) == 0); assert((y % TILE_VECTOR_HEIGHT) == 0); + assert(rast->zsbuf.map || !rast->curr_scene->fb.zsbuf); + if (!rast->zsbuf.map) return NULL; @@ -164,6 +159,37 @@ lp_rast_depth_pointer( struct lp_rasterizer *rast, } +/** + * Get the pointer to a 4x4 color block (within a 64x64 tile). + * We'll map the color buffer on demand here. + * Note that this may be called even when there's no color buffers - return + * NULL in that case. + * \param x, y location of 4x4 block in window coords + */ +static INLINE uint8_t * +lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task, + unsigned buf, unsigned x, unsigned y) +{ + unsigned px, py, pixel_offset; + uint8_t *color; + + assert((x % TILE_VECTOR_WIDTH) == 0); + assert((y % TILE_VECTOR_HEIGHT) == 0); + + color = task->color_tiles[buf]; + assert(color); + + px = x % TILE_SIZE; + py = y % TILE_SIZE; + pixel_offset = tile_pixel_offset(px, py, 0); + + color = color + pixel_offset; + + assert(lp_check_alignment(color, 16)); + return color; +} + + /** * Shade all pixels in a 4x4 block. The fragment code omits the @@ -177,32 +203,27 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, { struct lp_rasterizer *rast = task->rast; const struct lp_rast_state *state = task->current_state; - struct lp_rast_tile *tile = &task->tile; - const unsigned ix = x % TILE_SIZE, iy = y % TILE_SIZE; uint8_t *color[PIPE_MAX_COLOR_BUFS]; void *depth; - unsigned block_offset, i; - - /* offset of the containing 16x16 pixel block within the tile */ - block_offset = (iy / 4) * (16 * 16) + (ix / 4) * 16; + unsigned i; /* color buffer */ for (i = 0; i < rast->state.nr_cbufs; i++) - color[i] = tile->color[i] + 4 * block_offset; - - depth = lp_rast_depth_pointer(rast, x, y); - - /* run shader */ - state->jit_function[0]( &state->jit_context, - x, y, - inputs->facing, - inputs->a0, - inputs->dadx, - inputs->dady, - color, - depth, - INT_MIN, INT_MIN, INT_MIN, - NULL, NULL, NULL ); + color[i] = lp_rast_get_color_block_pointer(task, i, x, y); + + depth = lp_rast_get_depth_block_pointer(rast, x, y); + + /* run shader on 4x4 block */ + state->jit_function[RAST_WHOLE]( &state->jit_context, + x, y, + inputs->facing, + inputs->a0, + inputs->dadx, + inputs->dady, + color, + depth, + INT_MIN, INT_MIN, INT_MIN, + NULL, NULL, NULL ); } diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 245d387578..182e7cb230 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -186,6 +186,9 @@ lp_scene_reset(struct lp_scene *scene ) } make_empty_list(ref_list); } + + scene->has_color_clear = FALSE; + scene->has_depth_clear = FALSE; } @@ -390,6 +393,7 @@ end: } + /** * Prepare this scene for the rasterizer. * Map the framebuffer surfaces. Initialize the 'rast' state. @@ -397,50 +401,12 @@ end: static boolean lp_scene_map_buffers( struct lp_scene *scene ) { - struct pipe_surface *cbuf, *zsbuf; - unsigned usage; - int i; - LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); - /* XXX: try to improve on this: - */ - usage = PIPE_TRANSFER_READ_WRITE; - - /* Map all color buffers - */ - for (i = 0; i < scene->fb.nr_cbufs; i++) { - cbuf = scene->fb.cbufs[i]; - if (cbuf) { - scene->cbuf_map[i] = llvmpipe_resource_map(cbuf->texture, - usage, - cbuf->face, - cbuf->level, - cbuf->zslice); - if (!scene->cbuf_map[i]) - goto fail; - } - } - - /* Map the zsbuffer - */ - zsbuf = scene->fb.zsbuf; - if (zsbuf) { - scene->zsbuf_map = llvmpipe_resource_map(zsbuf->texture, - usage, - zsbuf->face, - zsbuf->level, - zsbuf->zslice); - if (!scene->zsbuf_map) - goto fail; - } + /* XXX framebuffer surfaces are no longer mapped here */ + /* XXX move all map/unmap stuff into rast module... */ return TRUE; - -fail: - /* Unmap and release transfers? - */ - return FALSE; } @@ -454,6 +420,7 @@ fail: static void lp_scene_unmap_buffers( struct lp_scene *scene ) { +#if 0 unsigned i; for (i = 0; i < scene->fb.nr_cbufs; i++) { @@ -475,6 +442,7 @@ lp_scene_unmap_buffers( struct lp_scene *scene ) zsbuf->zslice); scene->zsbuf_map = NULL; } +#endif util_unreference_framebuffer_state( &scene->fb ); } @@ -511,7 +479,6 @@ void lp_scene_rasterize( struct lp_scene *scene, } } - scene->write_depth = (scene->fb.zsbuf != NULL && write_depth); diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index a1fb8bf541..ac0717db6a 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -115,11 +115,6 @@ struct texture_ref { struct lp_scene { struct pipe_context *pipe; - /* Scene's buffers are mapped at the time the scene is enqueued: - */ - void *cbuf_map[PIPE_MAX_COLOR_BUFS]; - uint8_t *zsbuf_map; - /** the framebuffer to render the scene into */ struct pipe_framebuffer_state fb; @@ -127,6 +122,8 @@ struct lp_scene { struct texture_ref textures; boolean write_depth; + boolean has_color_clear; + boolean has_depth_clear; /** * Number of active tiles in each dimension. @@ -304,6 +301,7 @@ lp_scene_bin_iter_begin( struct lp_scene *scene ); struct cmd_bin * lp_scene_bin_iter_next( struct lp_scene *scene, int *bin_x, int *bin_y ); + void lp_scene_rasterize( struct lp_scene *scene, struct lp_rasterizer *rast, diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index b8abdfa114..97a6b5422b 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -51,7 +51,7 @@ #include "draw/draw_vbuf.h" -static void set_scene_state( struct lp_setup_context *, unsigned ); +static void set_scene_state( struct lp_setup_context *, enum setup_state ); struct lp_scene * @@ -156,21 +156,21 @@ begin_binning( struct lp_setup_context *setup ) (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) ? "clear": "load"); if (setup->fb.nr_cbufs) { - if (setup->clear.flags & PIPE_CLEAR_COLOR) + if (setup->clear.flags & PIPE_CLEAR_COLOR) { lp_scene_bin_everywhere( scene, lp_rast_clear_color, setup->clear.color ); - else - lp_scene_bin_everywhere( scene, - lp_rast_load_color, - lp_rast_arg_null() ); + scene->has_color_clear = TRUE; + } } if (setup->fb.zsbuf) { - if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) + if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) { lp_scene_bin_everywhere( scene, lp_rast_clear_zstencil, setup->clear.zstencil ); + scene->has_depth_clear = TRUE; + } } LP_DBG(DEBUG_SETUP, "%s done\n", __FUNCTION__); @@ -194,7 +194,7 @@ execute_clears( struct lp_setup_context *setup ) static void set_scene_state( struct lp_setup_context *setup, - unsigned new_state ) + enum setup_state new_state ) { unsigned old_state = setup->state; @@ -221,18 +221,39 @@ set_scene_state( struct lp_setup_context *setup, else lp_setup_rasterize_scene( setup, TRUE ); break; + + default: + assert(0 && "invalid setup state mode"); } setup->state = new_state; } +/** + * \param flags bitmask of PIPE_FLUSH_x flags + */ void lp_setup_flush( struct lp_setup_context *setup, unsigned flags ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + if (setup->scene) { + struct lp_scene *scene = lp_setup_get_current_scene(setup); + union lp_rast_cmd_arg dummy; + + if (flags & (PIPE_FLUSH_SWAPBUFFERS | + PIPE_FLUSH_FRAME)) { + /* Store colors in the linear color buffer(s). + * If we don't do this here, we'll end up converting the tiled + * data to linear in the texture_unmap() function, which will + * not be a parallel/threaded operation as here. + */ + lp_scene_bin_everywhere(scene, lp_rast_store_color, dummy); + } + } + set_scene_state( setup, SETUP_FLUSHED ); } @@ -286,15 +307,20 @@ lp_setup_clear( struct lp_setup_context *setup, * binned scene and start again, but I don't see that as being * a common usage. */ - if (flags & PIPE_CLEAR_COLOR) + if (flags & PIPE_CLEAR_COLOR) { lp_scene_bin_everywhere( scene, lp_rast_clear_color, setup->clear.color ); + scene->has_color_clear = TRUE; + } - if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) + if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) { lp_scene_bin_everywhere( scene, lp_rast_clear_zstencil, setup->clear.zstencil ); + scene->has_depth_clear = TRUE; + } + } else { /* Put ourselves into the 'pre-clear' state, specifically to try @@ -498,8 +524,14 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, /* regular texture - setup array of mipmap level pointers */ int j; for (j = 0; j <= tex->last_level; j++) { +#if 0 jit_tex->data[j] = (ubyte *) lp_tex->data + lp_tex->level_offset[j]; +#else + jit_tex->data[j] = + llvmpipe_get_texture_image(lp_tex, 0, j, LP_TEX_USAGE_READ, + LP_TEX_LAYOUT_LINEAR); +#endif jit_tex->row_stride[j] = lp_tex->stride[j]; } } @@ -610,7 +642,7 @@ lp_setup_update_state( struct lp_setup_context *setup ) if(buffer) { unsigned current_size = buffer->width0; - const void *current_data = llvmpipe_resource(buffer)->data; + const void *current_data = llvmpipe_resource_data(buffer); /* TODO: copy only the actually used constants? */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index ed21ec0f75..4594f7597d 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -99,7 +99,7 @@ struct lp_setup_context union lp_rast_cmd_arg zstencil; /**< lp_rast_clear_zstencil() cmd */ } clear; - enum { + enum setup_state { SETUP_FLUSHED, SETUP_CLEARED, SETUP_ACTIVE diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index e82364d4b6..59ba0be2b9 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -1047,7 +1047,7 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); unsigned size = constants ? constants->width0 : 0; - const void *data = constants ? llvmpipe_resource(constants)->data : NULL; + const void *data = constants ? llvmpipe_resource_data(constants) : NULL; assert(shader < PIPE_SHADER_TYPES); assert(index == 0); diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c index ca3d62c361..381c58ecee 100644 --- a/src/gallium/drivers/llvmpipe/lp_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_surface.c @@ -29,16 +29,35 @@ #include "lp_context.h" #include "lp_flush.h" #include "lp_surface.h" +#include "lp_texture.h" +#include "lp_tile_image.h" +#include "lp_tile_size.h" + + +/** + * Adjust x, y, width, height to lie on tile bounds. + */ +static void +adjust_to_tile_bounds(unsigned x, unsigned y, unsigned width, unsigned height, + unsigned *x_tile, unsigned *y_tile, + unsigned *w_tile, unsigned *h_tile) +{ + *x_tile = x & ~(TILE_SIZE - 1); + *y_tile = y & ~(TILE_SIZE - 1); + *w_tile = ((x + width + TILE_SIZE - 1) & ~(TILE_SIZE - 1)) - *x_tile; + *h_tile = ((y + height + TILE_SIZE - 1) & ~(TILE_SIZE - 1)) - *y_tile; +} + static void lp_surface_copy(struct pipe_context *pipe, - struct pipe_surface *dest, unsigned destx, unsigned desty, + struct pipe_surface *dst, unsigned dstx, unsigned dsty, struct pipe_surface *src, unsigned srcx, unsigned srcy, unsigned width, unsigned height) { llvmpipe_flush_texture(pipe, - dest->texture, dest->face, dest->level, + dst->texture, dst->face, dst->level, 0, /* flush_flags */ FALSE, /* read_only */ FALSE, /* cpu_access */ @@ -51,8 +70,87 @@ lp_surface_copy(struct pipe_context *pipe, FALSE, /* cpu_access */ FALSE); /* do_not_flush */ + /* Look for special case in which we're copying from a tiled image + * to a linear image. + */ + { + struct llvmpipe_resource *src_tex = llvmpipe_resource(src->texture); + struct llvmpipe_resource *dst_tex = llvmpipe_resource(dst->texture); + enum pipe_format format = src_tex->base.format; + + /* + printf("surface copy from %u to %u: %u,%u to %u,%u %u x %u\n", + src_tex->id, dst_tex->id, + srcx, srcy, dstx, dsty, width, height); + */ + + /* set src tiles to linear layout */ + { + unsigned tx, ty, tw, th; + unsigned x, y; + + adjust_to_tile_bounds(srcx, srcy, width, height, &tx, &ty, &tw, &th); + + for (y = 0; y < th; y += TILE_SIZE) { + for (x = 0; x < tw; x += TILE_SIZE) { + (void) llvmpipe_get_texture_tile_linear(src_tex, + src->face, src->level, + LP_TEX_USAGE_READ, + tx + x, ty + y); + } + } + } + + /* set dst tiles to linear layout */ + { + unsigned tx, ty, tw, th; + unsigned x, y; + enum lp_texture_usage usage; + + /* XXX for the tiles which are completely contained by the + * dest rectangle, we could set the usage mode to WRITE_ALL. + * Just test for the case of replacing the whole dest region for now. + */ + if (width == dst_tex->base.width0 && height == dst_tex->base.height0) + usage = LP_TEX_USAGE_WRITE_ALL; + else + usage = LP_TEX_USAGE_READ_WRITE; + + adjust_to_tile_bounds(dstx, dsty, width, height, &tx, &ty, &tw, &th); + + for (y = 0; y < th; y += TILE_SIZE) { + for (x = 0; x < tw; x += TILE_SIZE) { + (void) llvmpipe_get_texture_tile_linear(dst_tex, + dst->face, dst->level, + usage, + tx + x, ty + y); + } + } + } + + /* copy */ + { + const ubyte *src_linear_ptr + = llvmpipe_get_texture_image_address(src_tex, src->face, + src->level, + LP_TEX_LAYOUT_LINEAR); + ubyte *dst_linear_ptr + = llvmpipe_get_texture_image_address(dst_tex, dst->face, + dst->level, + LP_TEX_LAYOUT_LINEAR); + + util_copy_rect(dst_linear_ptr, format, + dst_tex->stride[dst->level], + dstx, dsty, + width, height, + src_linear_ptr, src_tex->stride[src->level], + srcx, srcy); + } + return; + } + util_surface_copy(pipe, FALSE, - dest, destx, desty, + dst, dstx, dsty, src, srcx, srcy, width, height); } diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 61210de890..7e4e4d5f0b 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -30,64 +30,101 @@ * Michel Dänzer */ +#include + #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "util/u_inlines.h" +#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_transfer.h" #include "lp_context.h" -#include "lp_screen.h" #include "lp_flush.h" +#include "lp_screen.h" +#include "lp_tile_image.h" #include "lp_texture.h" #include "lp_setup.h" #include "lp_tile_size.h" + #include "state_tracker/sw_winsys.h" +static INLINE boolean +resource_is_texture(const struct pipe_resource *resource) +{ + const unsigned tex_binds = (PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED | + PIPE_BIND_DEPTH_STENCIL | + PIPE_BIND_SAMPLER_VIEW); + const struct llvmpipe_resource *lpt = llvmpipe_resource_const(resource); + + return (lpt->base.bind & tex_binds) ? TRUE : FALSE; +} + + + +/** + * Allocate storage for llvmpipe_texture::layout array. + * The number of elements is width_in_tiles * height_in_tiles. + */ +static enum lp_texture_layout * +alloc_layout_array(unsigned width, unsigned height) +{ + const unsigned tx = align(width, TILE_SIZE) / TILE_SIZE; + const unsigned ty = align(height, TILE_SIZE) / TILE_SIZE; + + assert(tx * ty > 0); + assert(LP_TEX_LAYOUT_NONE == 0); /* calloc'ing LP_TEX_LAYOUT_NONE here */ + + return (enum lp_texture_layout *) + calloc(tx * ty, sizeof(enum lp_texture_layout)); +} + + + /** * Conventional allocation path for non-display textures: - * Simple, maximally packed layout. + * Just compute row strides here. Storage is allocated on demand later. */ static boolean -llvmpipe_resource_layout(struct llvmpipe_screen *screen, +llvmpipe_texture_layout(struct llvmpipe_screen *screen, struct llvmpipe_resource *lpt) { struct pipe_resource *pt = &lpt->base; unsigned level; unsigned width = pt->width0; unsigned height = pt->height0; - unsigned depth = pt->depth0; - unsigned buffer_size = 0; + + assert(LP_MAX_TEXTURE_2D_LEVELS <= LP_MAX_TEXTURE_LEVELS); + assert(LP_MAX_TEXTURE_3D_LEVELS <= LP_MAX_TEXTURE_LEVELS); for (level = 0; level <= pt->last_level; level++) { - unsigned nblocksx, nblocksy; + const unsigned num_faces = lpt->base.target == PIPE_TEXTURE_CUBE ? 6 : 1; + unsigned nblocksx, face; /* Allocate storage for whole quads. This is particularly important * for depth surfaces, which are currently stored in a swizzled format. */ nblocksx = util_format_get_nblocksx(pt->format, align(width, TILE_SIZE)); - nblocksy = util_format_get_nblocksy(pt->format, align(height, TILE_SIZE)); - lpt->stride[level] = align(nblocksx * util_format_get_blocksize(pt->format), 16); + lpt->stride[level] = + align(nblocksx * util_format_get_blocksize(pt->format), 16); - lpt->level_offset[level] = buffer_size; + lpt->tiles_per_row[level] = align(width, TILE_SIZE) / TILE_SIZE; - buffer_size += (nblocksy * - ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * - lpt->stride[level]); + for (face = 0; face < num_faces; face++) { + lpt->layout[face][level] = alloc_layout_array(width, height); + } width = u_minify(width, 1); height = u_minify(height, 1); - depth = u_minify(depth, 1); } - lpt->data = align_malloc(buffer_size, 16); - - return lpt->data != NULL; + return TRUE; } @@ -104,6 +141,10 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, unsigned width = align(lpt->base.width0, TILE_SIZE); unsigned height = align(lpt->base.height0, TILE_SIZE); + lpt->tiles_per_row[0] = align(width, TILE_SIZE) / TILE_SIZE; + + lpt->layout[0][0] = alloc_layout_array(width, height); + lpt->dt = winsys->displaytarget_create(winsys, lpt->base.bind, lpt->base.format, @@ -117,8 +158,9 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, static struct pipe_resource * llvmpipe_resource_create(struct pipe_screen *_screen, - const struct pipe_resource *templat) + const struct pipe_resource *templat) { + static unsigned id_counter = 0; struct llvmpipe_screen *screen = llvmpipe_screen(_screen); struct llvmpipe_resource *lpt = CALLOC_STRUCT(llvmpipe_resource); if (!lpt) @@ -128,17 +170,38 @@ llvmpipe_resource_create(struct pipe_screen *_screen, pipe_reference_init(&lpt->base.reference, 1); lpt->base.screen = &screen->base; + assert(lpt->base.bind); + if (lpt->base.bind & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) { + /* displayable surface */ if (!llvmpipe_displaytarget_layout(screen, lpt)) goto fail; + assert(lpt->layout[0][0][0] == LP_TEX_LAYOUT_NONE); + } + else if (lpt->base.bind & (PIPE_BIND_SAMPLER_VIEW | + PIPE_BIND_DEPTH_STENCIL)) { + /* texture map */ + if (!llvmpipe_texture_layout(screen, lpt)) + goto fail; + assert(lpt->layout[0][0][0] == LP_TEX_LAYOUT_NONE); } else { - if (!llvmpipe_resource_layout(screen, lpt)) + /* other data (vertex buffer, const buffer, etc) */ + const enum pipe_format format = templat->format; + const uint w = templat->width0 / util_format_get_blockheight(format); + const uint h = templat->height0 / util_format_get_blockwidth(format); + const uint d = templat->depth0; + const uint bpp = util_format_get_blocksize(format); + const uint bytes = w * h * d * bpp; + lpt->data = align_malloc(bytes, 16); + if (!lpt->data) goto fail; } + lpt->id = id_counter++; + return &lpt->base; fail: @@ -159,8 +222,37 @@ llvmpipe_resource_destroy(struct pipe_screen *pscreen, struct sw_winsys *winsys = screen->winsys; winsys->displaytarget_destroy(winsys, lpt->dt); } - else if (!lpt->userBuffer) { + else if (resource_is_texture(pt)) { /* regular texture */ + const uint num_faces = pt->target == PIPE_TEXTURE_CUBE ? 6 : 1; + uint level, face; + + /* free linear image data */ + for (level = 0; level < Elements(lpt->linear); level++) { + if (lpt->linear[level].data) { + align_free(lpt->linear[level].data); + lpt->linear[level].data = NULL; + } + } + + /* free tiled image data */ + for (level = 0; level < Elements(lpt->tiled); level++) { + if (lpt->tiled[level].data) { + align_free(lpt->tiled[level].data); + lpt->tiled[level].data = NULL; + } + } + + /* free layout flag arrays */ + for (level = 0; level < Elements(lpt->tiled); level++) { + for (face = 0; face < num_faces; face++) { + free(lpt->layout[face][level]); + lpt->layout[face][level] = NULL; + } + } + } + else if (!lpt->userBuffer) { + assert(lpt->data); align_free(lpt->data); } @@ -169,63 +261,88 @@ llvmpipe_resource_destroy(struct pipe_screen *pscreen, /** - * Map a texture. Without any synchronization. + * Map a texture for read/write (rendering). Without any synchronization. */ void * llvmpipe_resource_map(struct pipe_resource *texture, - unsigned usage, unsigned face, unsigned level, - unsigned zslice) + unsigned zslice, + enum lp_texture_usage tex_usage, + enum lp_texture_layout layout) { struct llvmpipe_resource *lpt = llvmpipe_resource(texture); uint8_t *map; + assert(face < 6); + assert(level < LP_MAX_TEXTURE_LEVELS); + + assert(tex_usage == LP_TEX_USAGE_READ || + tex_usage == LP_TEX_USAGE_READ_WRITE || + tex_usage == LP_TEX_USAGE_WRITE_ALL); + + assert(layout == LP_TEX_LAYOUT_NONE || + layout == LP_TEX_LAYOUT_TILED || + layout == LP_TEX_LAYOUT_LINEAR); + if (lpt->dt) { /* display target */ struct llvmpipe_screen *screen = llvmpipe_screen(texture->screen); struct sw_winsys *winsys = screen->winsys; + unsigned dt_usage; + + if (tex_usage == LP_TEX_USAGE_READ) { + dt_usage = PIPE_TRANSFER_READ; + } + else { + dt_usage = PIPE_TRANSFER_READ_WRITE; + } assert(face == 0); assert(level == 0); assert(zslice == 0); /* FIXME: keep map count? */ - map = winsys->displaytarget_map(winsys, lpt->dt, usage); - } - else { - /* regular texture */ - unsigned offset; - unsigned stride; + map = winsys->displaytarget_map(winsys, lpt->dt, dt_usage); - map = lpt->data; + /* install this linear image in texture data structure */ + lpt->linear[level].data = map; - assert(level < LP_MAX_TEXTURE_2D_LEVELS); + map = llvmpipe_get_texture_image(lpt, face, level, tex_usage, layout); + assert(map); - offset = lpt->level_offset[level]; - stride = lpt->stride[level]; + return map; + } + else if (resource_is_texture(texture)) { + /* regular texture */ + const unsigned tex_height = u_minify(texture->height0, level); + const unsigned nblocksy = + util_format_get_nblocksy(texture->format, tex_height); + const unsigned stride = lpt->stride[level]; + unsigned offset = 0; - /* XXX shouldn't that rather be - tex_height = align(u_minify(texture->height0, level), 2) - to account for alignment done in llvmpipe_resource_layout ? - */ if (texture->target == PIPE_TEXTURE_CUBE) { - unsigned tex_height = u_minify(texture->height0, level); - offset += face * util_format_get_nblocksy(texture->format, tex_height) * stride; + /* XXX incorrect + offset = face * nblocksy * stride; + */ } else if (texture->target == PIPE_TEXTURE_3D) { - unsigned tex_height = u_minify(texture->height0, level); - offset += zslice * util_format_get_nblocksy(texture->format, tex_height) * stride; + offset = zslice * nblocksy * stride; } else { assert(face == 0); assert(zslice == 0); + offset = 0; } + map = llvmpipe_get_texture_image(lpt, face, level, tex_usage, layout); + assert(map); map += offset; + return map; + } + else { + return lpt->data; } - - return map; } @@ -249,11 +366,30 @@ llvmpipe_resource_unmap(struct pipe_resource *texture, assert(level == 0); assert(zslice == 0); + /* make sure linear image is up to date */ + (void) llvmpipe_get_texture_image(lpt, 0, 0, + LP_TEX_USAGE_READ, + LP_TEX_LAYOUT_LINEAR); + winsys->displaytarget_unmap(winsys, lpt->dt); } } +void * +llvmpipe_resource_data(struct pipe_resource *resource) +{ + struct llvmpipe_resource *lpt = llvmpipe_resource(resource); + + assert((lpt->base.bind & (PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED | + PIPE_BIND_SAMPLER_VIEW)) == 0); + + return lpt->data; +} + + static struct pipe_resource * llvmpipe_resource_from_handle(struct pipe_screen *screen, const struct pipe_resource *template, @@ -303,7 +439,7 @@ static struct pipe_surface * llvmpipe_get_tex_surface(struct pipe_screen *screen, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice, - unsigned usage) + enum lp_texture_usage usage) { struct pipe_surface *ps; @@ -389,6 +525,34 @@ llvmpipe_transfer_map( struct pipe_context *pipe, ubyte *map; struct llvmpipe_resource *lpt; enum pipe_format format; + enum lp_texture_usage tex_usage; + const char *mode; + + assert(transfer->sr.face < 6); + assert(transfer->sr.level < LP_MAX_TEXTURE_LEVELS); + + /* + printf("tex_transfer_map(%d, %d %d x %d of %d x %d, usage %d )\n", + transfer->x, transfer->y, transfer->width, transfer->height, + transfer->texture->width0, + transfer->texture->height0, + transfer->usage); + */ + + if (transfer->usage == PIPE_TRANSFER_READ) { + tex_usage = LP_TEX_USAGE_READ; + mode = "read"; + } + else { + tex_usage = LP_TEX_USAGE_READ_WRITE; + mode = "read/write"; + } + + if (0) { + struct llvmpipe_resource *lpt = llvmpipe_resource(transfer->resource); + printf("transfer map tex %u mode %s\n", lpt->id, mode); + } + assert(transfer->resource); lpt = llvmpipe_resource(transfer->resource); @@ -408,12 +572,13 @@ llvmpipe_transfer_map( struct pipe_context *pipe, FALSE); /* do_not_flush */ map = llvmpipe_resource_map(transfer->resource, - transfer->usage, transfer->sr.face, transfer->sr.level, - transfer->box.z); + transfer->box.z, + tex_usage, LP_TEX_LAYOUT_LINEAR); + - /* May want to different things here depending on read/write nature + /* May want to do different things here depending on read/write nature * of the map: */ if (transfer->usage & PIPE_TRANSFER_WRITE) { @@ -488,6 +653,395 @@ llvmpipe_user_buffer_create(struct pipe_screen *screen, } +/** + * Compute size (in bytes) need to store a texture image / mipmap level, + * for just one cube face. + */ +static unsigned +tex_image_face_size(const struct llvmpipe_resource *lpt, unsigned level, + enum lp_texture_layout layout) +{ + /* for tiled layout, force a 32bpp format */ + enum pipe_format format = layout == LP_TEX_LAYOUT_TILED + ? PIPE_FORMAT_B8G8R8A8_UNORM : lpt->base.format; + const unsigned height = u_minify(lpt->base.height0, level); + const unsigned depth = u_minify(lpt->base.depth0, level); + const unsigned nblocksy = + util_format_get_nblocksy(format, align(height, TILE_SIZE)); + const unsigned buffer_size = + nblocksy * lpt->stride[level] * + (lpt->base.target == PIPE_TEXTURE_3D ? depth : 1); + return buffer_size; +} + + +/** + * Compute size (in bytes) need to store a texture image / mipmap level, + * including all cube faces. + */ +static unsigned +tex_image_size(const struct llvmpipe_resource *lpt, unsigned level, + enum lp_texture_layout layout) +{ + const unsigned buf_size = tex_image_face_size(lpt, level, layout); + const unsigned num_faces = lpt->base.target == PIPE_TEXTURE_CUBE ? 6 : 1; + return buf_size * num_faces; +} + + +/** + * This function encapsulates some complicated logic for determining + * how to convert a tile of image data from linear layout to tiled + * layout, or vice versa. + * \param cur_layout the current tile layout + * \param target_layout the desired tile layout + * \param usage how the tile will be accessed (R/W vs. read-only, etc) + * \param new_layout_return returns the new layout mode + * \param convert_return returns TRUE if image conversion is needed + */ +static void +layout_logic(enum lp_texture_layout cur_layout, + enum lp_texture_layout target_layout, + enum lp_texture_usage usage, + enum lp_texture_layout *new_layout_return, + boolean *convert) +{ + enum lp_texture_layout other_layout, new_layout; + + *convert = FALSE; + + new_layout = 99; /* debug check */ + + if (target_layout == LP_TEX_LAYOUT_LINEAR) { + other_layout = LP_TEX_LAYOUT_TILED; + } + else { + assert(target_layout == LP_TEX_LAYOUT_TILED); + other_layout = LP_TEX_LAYOUT_LINEAR; + } + + new_layout = target_layout; /* may get changed below */ + + if (cur_layout == LP_TEX_LAYOUT_BOTH) { + if (usage == LP_TEX_USAGE_READ) { + new_layout = LP_TEX_LAYOUT_BOTH; + } + } + else if (cur_layout == other_layout) { + if (usage != LP_TEX_USAGE_WRITE_ALL) { + /* need to convert tiled data to linear or vice versa */ + *convert = TRUE; + + if (usage == LP_TEX_USAGE_READ) + new_layout = LP_TEX_LAYOUT_BOTH; + } + } + else { + assert(cur_layout == LP_TEX_LAYOUT_NONE || + cur_layout == target_layout); + } + + assert(new_layout == LP_TEX_LAYOUT_BOTH || + new_layout == target_layout); + + *new_layout_return = new_layout; +} + + +/** + * Return pointer to a texture image. No tiled/linear conversion is done. + */ +void * +llvmpipe_get_texture_image_address(struct llvmpipe_resource *lpt, + unsigned face, unsigned level, + enum lp_texture_layout layout) +{ + struct llvmpipe_texture_image *img; + unsigned face_offset; + + if (layout == LP_TEX_LAYOUT_LINEAR) { + img = &lpt->linear[level]; + } + else { + assert (layout == LP_TEX_LAYOUT_TILED); + img = &lpt->tiled[level]; + } + + if (face > 0) + face_offset = face * tex_image_face_size(lpt, level, layout); + else + face_offset = 0; + + return (ubyte *) img->data + face_offset; +} + + + +/** + * Return pointer to texture image data (either linear or tiled layout). + * \param usage one of LP_TEX_USAGE_READ/WRITE_ALL/READ_WRITE + * \param layout either LP_TEX_LAYOUT_LINEAR or LP_TEX_LAYOUT_TILED + */ +void * +llvmpipe_get_texture_image(struct llvmpipe_resource *lpt, + unsigned face, unsigned level, + enum lp_texture_usage usage, + enum lp_texture_layout layout) +{ + /* + * 'target' refers to the image which we're retrieving (either in + * tiled or linear layout). + * 'other' refers to the same image but in the other layout. (it may + * or may not exist. + */ + struct llvmpipe_texture_image *target_img; + struct llvmpipe_texture_image *other_img; + void *target_data; + void *other_data; + const unsigned width = u_minify(lpt->base.width0, level); + const unsigned height = u_minify(lpt->base.height0, level); + const unsigned width_t = align(width, TILE_SIZE) / TILE_SIZE; + const unsigned height_t = align(height, TILE_SIZE) / TILE_SIZE; + enum lp_texture_layout other_layout; + + assert(layout == LP_TEX_LAYOUT_NONE || + layout == LP_TEX_LAYOUT_TILED || + layout == LP_TEX_LAYOUT_LINEAR); + + assert(usage == LP_TEX_USAGE_READ || + usage == LP_TEX_USAGE_READ_WRITE || + usage == LP_TEX_USAGE_WRITE_ALL); + + if (lpt->dt) { + assert(lpt->linear[level].data); + } + + /* which is target? which is other? */ + if (layout == LP_TEX_LAYOUT_LINEAR) { + target_img = &lpt->linear[level]; + other_img = &lpt->tiled[level]; + other_layout = LP_TEX_LAYOUT_TILED; + } + else { + target_img = &lpt->tiled[level]; + other_img = &lpt->linear[level]; + other_layout = LP_TEX_LAYOUT_LINEAR; + } + + target_data = target_img->data; + other_data = other_img->data; + + if (!target_data) { + /* allocate memory for the target image now */ + unsigned buffer_size = tex_image_size(lpt, level, layout); + target_img->data = align_malloc(buffer_size, 16); + target_data = target_img->data; + } + + if (face > 0) { + unsigned offset = face * tex_image_face_size(lpt, level, layout); + if (target_data) { + target_data = (uint8_t *) target_data + offset; + } + if (other_data) { + other_data = (uint8_t *) other_data + offset; + } + } + + if (layout == LP_TEX_LAYOUT_NONE) { + /* just allocating memory */ + return target_data; + } + + if (other_data) { + /* may need to convert other data to the requested layout */ + enum lp_texture_layout new_layout; + unsigned x, y, i = 0; + + /* loop over all image tiles, doing layout conversion where needed */ + for (y = 0; y < height_t; y++) { + for (x = 0; x < width_t; x++) { + enum lp_texture_layout cur_layout = lpt->layout[face][level][i]; + boolean convert; + + layout_logic(cur_layout, layout, usage, &new_layout, &convert); + + if (convert) { + if (layout == LP_TEX_LAYOUT_TILED) { + lp_linear_to_tiled(other_data, target_data, + x * TILE_SIZE, y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, + lpt->base.format, + lpt->stride[level]); + } + else { + lp_tiled_to_linear(other_data, target_data, + x * TILE_SIZE, y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, + lpt->base.format, + lpt->stride[level]); + } + } + + lpt->layout[face][level][i] = new_layout; + i++; + } + } + } + else { + /* no other data */ + unsigned i; + for (i = 0; i < width_t * height_t; i++) { + lpt->layout[face][level][i] = layout; + } + } + + assert(target_data); + + return target_data; +} + + +static INLINE enum lp_texture_layout +llvmpipe_get_texture_tile_layout(const struct llvmpipe_resource *lpt, + unsigned face, unsigned level, + unsigned x, unsigned y) +{ + uint i; + assert(resource_is_texture(&lpt->base)); + assert(x < lpt->tiles_per_row[level]); + i = y * lpt->tiles_per_row[level] + x; + return lpt->layout[face][level][i]; +} + + +static INLINE void +llvmpipe_set_texture_tile_layout(struct llvmpipe_resource *lpt, + unsigned face, unsigned level, + unsigned x, unsigned y, + enum lp_texture_layout layout) +{ + uint i; + assert(resource_is_texture(&lpt->base)); + assert(x < lpt->tiles_per_row[level]); + i = y * lpt->tiles_per_row[level] + x; + lpt->layout[face][level][i] = layout; +} + + +/** + * Get pointer to a linear image where the tile at (x,y) is known to be + * in linear layout. + * Conversion from tiled to linear will be done if necessary. + * \return pointer to start of image/face (not the tile) + */ +ubyte * +llvmpipe_get_texture_tile_linear(struct llvmpipe_resource *lpt, + unsigned face, unsigned level, + enum lp_texture_usage usage, + unsigned x, unsigned y) +{ + struct llvmpipe_texture_image *tiled_img = &lpt->tiled[level]; + struct llvmpipe_texture_image *linear_img = &lpt->linear[level]; + enum lp_texture_layout cur_layout, new_layout; + const unsigned tx = x / TILE_SIZE, ty = y / TILE_SIZE; + boolean convert; + + assert(resource_is_texture(&lpt->base)); + assert(x % TILE_SIZE == 0); + assert(y % TILE_SIZE == 0); + + if (!linear_img->data) { + /* allocate memory for the tiled image now */ + unsigned buffer_size = tex_image_size(lpt, level, LP_TEX_LAYOUT_LINEAR); + linear_img->data = align_malloc(buffer_size, 16); + } + + cur_layout = llvmpipe_get_texture_tile_layout(lpt, face, level, tx, ty); + + layout_logic(cur_layout, LP_TEX_LAYOUT_LINEAR, usage, + &new_layout, &convert); + + if (convert) { + lp_tiled_to_linear(tiled_img->data, linear_img->data, + x, y, TILE_SIZE, TILE_SIZE, lpt->base.format, + lpt->stride[level]); + } + + if (new_layout != cur_layout) + llvmpipe_set_texture_tile_layout(lpt, face, level, tx, ty, new_layout); + + if (face > 0) { + unsigned offset + = face * tex_image_face_size(lpt, level, LP_TEX_LAYOUT_LINEAR); + return (ubyte *) linear_img->data + offset; + } + else { + return linear_img->data; + } +} + + +/** + * Get pointer to tiled data for rendering. + * \return pointer to the tiled data at the given tile position + */ +ubyte * +llvmpipe_get_texture_tile(struct llvmpipe_resource *lpt, + unsigned face, unsigned level, + enum lp_texture_usage usage, + unsigned x, unsigned y) +{ + const unsigned width = u_minify(lpt->base.width0, level); + struct llvmpipe_texture_image *tiled_img = &lpt->tiled[level]; + struct llvmpipe_texture_image *linear_img = &lpt->linear[level]; + enum lp_texture_layout cur_layout, new_layout; + const unsigned tx = x / TILE_SIZE, ty = y / TILE_SIZE; + boolean convert; + + assert(x % TILE_SIZE == 0); + assert(y % TILE_SIZE == 0); + + if (!tiled_img->data) { + /* allocate memory for the tiled image now */ + unsigned buffer_size = tex_image_size(lpt, level, LP_TEX_LAYOUT_TILED); + tiled_img->data = align_malloc(buffer_size, 16); + } + + cur_layout = llvmpipe_get_texture_tile_layout(lpt, face, level, tx, ty); + + layout_logic(cur_layout, LP_TEX_LAYOUT_TILED, usage, &new_layout, &convert); + if (convert) { + lp_linear_to_tiled(linear_img->data, tiled_img->data, + x, y, TILE_SIZE, TILE_SIZE, lpt->base.format, + lpt->stride[level]); + } + + if (new_layout != cur_layout) + llvmpipe_set_texture_tile_layout(lpt, face, level, tx, ty, new_layout); + + /* compute, return address of the 64x64 tile */ + { + unsigned tiles_per_row, tile_offset, face_offset; + + tiles_per_row = align(width, TILE_SIZE) / TILE_SIZE; + + assert(tiles_per_row == lpt->tiles_per_row[level]); + + tile_offset = ty * tiles_per_row + tx; + tile_offset *= TILE_SIZE * TILE_SIZE * 4; + + assert(tiled_img->data); + + face_offset = (face > 0) + ? (face * tex_image_face_size(lpt, level, LP_TEX_LAYOUT_TILED)) + : 0; + + return (ubyte *) tiled_img->data + face_offset + tile_offset; + } +} + + void llvmpipe_init_screen_resource_funcs(struct pipe_screen *screen) { diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h index 0226867840..8920209245 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.h +++ b/src/gallium/drivers/llvmpipe/lp_texture.h @@ -36,6 +36,26 @@ #define LP_MAX_TEXTURE_2D_LEVELS 12 /* 2K x 2K for now */ #define LP_MAX_TEXTURE_3D_LEVELS 10 /* 512 x 512 x 512 for now */ +#define LP_MAX_TEXTURE_LEVELS LP_MAX_TEXTURE_2D_LEVELS + + +enum lp_texture_usage +{ + LP_TEX_USAGE_READ = 100, + LP_TEX_USAGE_READ_WRITE, + LP_TEX_USAGE_WRITE_ALL +}; + + +/** Per-tile layout mode */ +enum lp_texture_layout +{ + LP_TEX_LAYOUT_NONE = 0, /**< no layout for the tile data yet */ + LP_TEX_LAYOUT_TILED, /**< the tile data is in tiled layout */ + LP_TEX_LAYOUT_LINEAR, /**< the tile data is in linear layout */ + LP_TEX_LAYOUT_BOTH /**< the tile data is in both modes */ +}; + struct pipe_context; struct pipe_screen; @@ -44,12 +64,36 @@ struct llvmpipe_context; struct sw_displaytarget; +/** + * We keep one or two copies of the texture image data: one in a simple + * linear layout (for texture sampling) and another in a tiled layout (for + * render targets). We keep track of whether each image tile is linear + * or tiled on a per-tile basis. + */ + + +/** A 1D/2D/3D image, one mipmap level */ +struct llvmpipe_texture_image +{ + void *data; +}; + + +/** + * llvmpipe subclass of pipe_resource. A texture, drawing surface, + * vertex buffer, const buffer, etc. + * Textures are stored differently than othere types of objects such as + * vertex buffers and const buffers. + * The former are tiled and have per-tile layout flags. + * The later are simple malloc'd blocks of memory. + */ struct llvmpipe_resource { struct pipe_resource base; - unsigned long level_offset[LP_MAX_TEXTURE_2D_LEVELS]; - unsigned stride[LP_MAX_TEXTURE_2D_LEVELS]; + /** Row stride in bytes */ + unsigned stride[LP_MAX_TEXTURE_LEVELS]; + unsigned tiles_per_row[LP_MAX_TEXTURE_LEVELS]; /** * Display target, for textures with the PIPE_BIND_DISPLAY_TARGET @@ -60,10 +104,21 @@ struct llvmpipe_resource /** * Malloc'ed data for regular textures, or a mapping to dt above. */ + struct llvmpipe_texture_image tiled[LP_MAX_TEXTURE_LEVELS]; + struct llvmpipe_texture_image linear[LP_MAX_TEXTURE_LEVELS]; + + /** + * Data for non-texture resources. + */ void *data; + /** per-tile layout info */ + enum lp_texture_layout *layout[PIPE_TEX_FACE_MAX][LP_MAX_TEXTURE_LEVELS]; + boolean userBuffer; /** Is this a user-space buffer? */ unsigned timestamp; + + unsigned id; /**< temporary, for debugging */ }; @@ -112,10 +167,11 @@ llvmpipe_resource_stride(struct pipe_resource *texture, void * llvmpipe_resource_map(struct pipe_resource *texture, - unsigned usage, unsigned face, unsigned level, - unsigned zslice); + unsigned zslice, + enum lp_texture_usage tex_usage, + enum lp_texture_layout layout); void llvmpipe_resource_unmap(struct pipe_resource *texture, @@ -124,4 +180,40 @@ llvmpipe_resource_unmap(struct pipe_resource *texture, unsigned zslice); +void * +llvmpipe_resource_data(struct pipe_resource *resource); + + +void * +llvmpipe_get_texture_image_address(struct llvmpipe_resource *lpt, + unsigned face, unsigned level, + enum lp_texture_layout layout); + +void * +llvmpipe_get_texture_image(struct llvmpipe_resource *resource, + unsigned face, unsigned level, + enum lp_texture_usage usage, + enum lp_texture_layout layout); + + +ubyte * +llvmpipe_get_texture_tile_linear(struct llvmpipe_resource *lpt, + unsigned face, unsigned level, + enum lp_texture_usage usage, + unsigned x, unsigned y); + +ubyte * +llvmpipe_get_texture_tile(struct llvmpipe_resource *lpt, + unsigned face, unsigned level, + enum lp_texture_usage usage, + unsigned x, unsigned y); + + + +extern void +llvmpipe_init_screen_texture_funcs(struct pipe_screen *screen); + +extern void +llvmpipe_init_context_texture_funcs(struct pipe_context *pipe); + #endif /* LP_TEXTURE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.c b/src/gallium/drivers/llvmpipe/lp_tile_image.c index c1980b316d..0852150ba7 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_image.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_image.c @@ -25,6 +25,14 @@ **************************************************************************/ +/** + * Code to convert images from tiled to linear and back. + * XXX there are quite a few assumptions about color and z/stencil being + * 32bpp. + */ + + +#include "util/u_format.h" #include "lp_tile_soa.h" #include "lp_tile_image.h" @@ -32,34 +40,173 @@ #define BYTES_PER_TILE (TILE_SIZE * TILE_SIZE * 4) +/** + * Untile a 4x4 block of 32-bit words (all contiguous) to linear layout + * at dst, with dst_stride words between rows. + */ +static void +untile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned dst_stride) +{ + uint32_t *d0 = dst; + uint32_t *d1 = d0 + dst_stride; + uint32_t *d2 = d1 + dst_stride; + uint32_t *d3 = d2 + dst_stride; + + d0[0] = src[0]; d0[1] = src[1]; d0[2] = src[4]; d0[3] = src[5]; + d1[0] = src[2]; d1[1] = src[3]; d1[2] = src[6]; d1[3] = src[7]; + d2[0] = src[8]; d2[1] = src[9]; d2[2] = src[12]; d2[3] = src[13]; + d3[0] = src[10]; d3[1] = src[11]; d3[2] = src[14]; d3[3] = src[15]; +} + + + +/** + * Untile a 4x4 block of 16-bit words (all contiguous) to linear layout + * at dst, with dst_stride words between rows. + */ +static void +untile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned dst_stride) +{ + uint16_t *d0 = dst; + uint16_t *d1 = d0 + dst_stride; + uint16_t *d2 = d1 + dst_stride; + uint16_t *d3 = d2 + dst_stride; + + d0[0] = src[0]; d0[1] = src[1]; d0[2] = src[4]; d0[3] = src[5]; + d1[0] = src[2]; d1[1] = src[3]; d1[2] = src[6]; d1[3] = src[7]; + d2[0] = src[8]; d2[1] = src[9]; d2[2] = src[12]; d2[3] = src[13]; + d3[0] = src[10]; d3[1] = src[11]; d3[2] = src[14]; d3[3] = src[15]; +} + + + +/** + * Convert a 4x4 rect of 32-bit words from a linear layout into tiled + * layout (in which all 16 words are contiguous). + */ +static void +tile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned src_stride) +{ + const uint32_t *s0 = src; + const uint32_t *s1 = s0 + src_stride; + const uint32_t *s2 = s1 + src_stride; + const uint32_t *s3 = s2 + src_stride; + + dst[0] = s0[0]; dst[1] = s0[1]; dst[4] = s0[2]; dst[5] = s0[3]; + dst[2] = s1[0]; dst[3] = s1[1]; dst[6] = s1[2]; dst[7] = s1[3]; + dst[8] = s2[0]; dst[9] = s2[1]; dst[12] = s2[2]; dst[13] = s2[3]; + dst[10] = s3[0]; dst[11] = s3[1]; dst[14] = s3[2]; dst[15] = s3[3]; +} + + + +/** + * Convert a 4x4 rect of 16-bit words from a linear layout into tiled + * layout (in which all 16 words are contiguous). + */ +static void +tile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned src_stride) +{ + const uint16_t *s0 = src; + const uint16_t *s1 = s0 + src_stride; + const uint16_t *s2 = s1 + src_stride; + const uint16_t *s3 = s2 + src_stride; + + dst[0] = s0[0]; dst[1] = s0[1]; dst[4] = s0[2]; dst[5] = s0[3]; + dst[2] = s1[0]; dst[3] = s1[1]; dst[6] = s1[2]; dst[7] = s1[3]; + dst[8] = s2[0]; dst[9] = s2[1]; dst[12] = s2[2]; dst[13] = s2[3]; + dst[10] = s3[0]; dst[11] = s3[1]; dst[14] = s3[2]; dst[15] = s3[3]; +} + + + /** * Convert a tiled image into a linear image. * \param src_stride source row stride in bytes (bytes per row of tiles) * \param dst_stride dest row stride in bytes */ void -lp_tiled_to_linear(const uint8_t *src, - uint8_t *dst, +lp_tiled_to_linear(const void *src, void *dst, + unsigned x, unsigned y, unsigned width, unsigned height, - enum pipe_format format, - unsigned src_stride, - unsigned dst_stride) + enum pipe_format format, unsigned dst_stride) { - const unsigned tiles_per_row = src_stride / BYTES_PER_TILE; - unsigned i, j; - - for (j = 0; j < height; j += TILE_SIZE) { - for (i = 0; i < width; i += TILE_SIZE) { - unsigned tile_offset = - ((j / TILE_SIZE) * tiles_per_row + i / TILE_SIZE); - unsigned byte_offset = tile_offset * BYTES_PER_TILE; - const uint8_t *src_tile = src + byte_offset; - - lp_tile_write_4ub(format, - src_tile, - dst, - dst_stride, - i, j, TILE_SIZE, TILE_SIZE); + assert(x % TILE_SIZE == 0); + assert(y % TILE_SIZE == 0); + /*assert(width % TILE_SIZE == 0); + assert(height % TILE_SIZE == 0);*/ + + /* Note that Z/stencil surfaces use a different tiling size than + * color surfaces. + */ + if (util_format_is_depth_or_stencil(format)) { + const uint bpp = util_format_get_blocksize(format); + const uint src_stride = dst_stride * TILE_VECTOR_WIDTH; + const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT; + const uint tiles_per_row = src_stride / (tile_w * tile_h * bpp); + + dst_stride /= bpp; /* convert from bytes to words */ + + if (bpp == 4) { + const uint32_t *src32 = (const uint32_t *) src; + uint32_t *dst32 = (uint32_t *) dst; + uint i, j; + + for (j = 0; j < height; j += tile_h) { + for (i = 0; i < width; i += tile_w) { + /* compute offsets in 32-bit words */ + uint ii = i + x, jj = j + y; + uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w) + * (tile_w * tile_h); + uint dst_offset = jj * dst_stride + ii; + untile_4_4_uint32(src32 + src_offset, + dst32 + dst_offset, + dst_stride); + } + } + } + else { + const uint16_t *src16 = (const uint16_t *) src; + uint16_t *dst16 = (uint16_t *) dst; + uint i, j; + + assert(bpp == 2); + + for (j = 0; j < height; j += tile_h) { + for (i = 0; i < width; i += tile_w) { + /* compute offsets in 16-bit words */ + uint ii = i + x, jj = j + y; + uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w) + * (tile_w * tile_h); + uint dst_offset = jj * dst_stride + ii; + untile_4_4_uint16(src16 + src_offset, + dst16 + dst_offset, + dst_stride); + } + } + } + } + else { + /* color image */ + const uint bpp = 4; + const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE; + const uint bytes_per_tile = tile_w * tile_h * bpp; + const uint src_stride = dst_stride * tile_w; + const uint tiles_per_row = src_stride / bytes_per_tile; + uint i, j; + + for (j = 0; j < height; j += tile_h) { + for (i = 0; i < width; i += tile_w) { + uint ii = i + x, jj = j + y; + uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w); + uint byte_offset = tile_offset * bytes_per_tile; + const uint8_t *src_tile = (uint8_t *) src + byte_offset; + + lp_tile_write_4ub(format, + src_tile, + dst, dst_stride, + ii, jj, tile_w, tile_h); + } } } } @@ -71,28 +218,85 @@ lp_tiled_to_linear(const uint8_t *src, * \param dst_stride dest row stride in bytes (bytes per row of tiles) */ void -lp_linear_to_tiled(const uint8_t *src, - uint8_t *dst, +lp_linear_to_tiled(const void *src, void *dst, + unsigned x, unsigned y, unsigned width, unsigned height, - enum pipe_format format, - unsigned src_stride, - unsigned dst_stride) + enum pipe_format format, unsigned src_stride) { - const unsigned tiles_per_row = dst_stride / BYTES_PER_TILE; - unsigned i, j; - - for (j = 0; j < height; j += TILE_SIZE) { - for (i = 0; i < width; i += TILE_SIZE) { - unsigned tile_offset = - ((j / TILE_SIZE) * tiles_per_row + i / TILE_SIZE); - unsigned byte_offset = tile_offset * BYTES_PER_TILE; - uint8_t *dst_tile = dst + byte_offset; - - lp_tile_read_4ub(format, - dst_tile, - src, - src_stride, - i, j, TILE_SIZE, TILE_SIZE); + assert(x % TILE_SIZE == 0); + assert(y % TILE_SIZE == 0); + /* + assert(width % TILE_SIZE == 0); + assert(height % TILE_SIZE == 0); + */ + + if (util_format_is_depth_or_stencil(format)) { + const uint bpp = util_format_get_blocksize(format); + const uint dst_stride = src_stride * TILE_VECTOR_WIDTH; + const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT; + const uint tiles_per_row = dst_stride / (tile_w * tile_h * bpp); + + src_stride /= bpp; /* convert from bytes to words */ + + if (bpp == 4) { + const uint32_t *src32 = (const uint32_t *) src; + uint32_t *dst32 = (uint32_t *) dst; + uint i, j; + + for (j = 0; j < height; j += tile_h) { + for (i = 0; i < width; i += tile_w) { + /* compute offsets in 32-bit words */ + uint ii = i + x, jj = j + y; + uint src_offset = jj * src_stride + ii; + uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w) + * (tile_w * tile_h); + tile_4_4_uint32(src32 + src_offset, + dst32 + dst_offset, + src_stride); + } + } + } + else { + const uint16_t *src16 = (const uint16_t *) src; + uint16_t *dst16 = (uint16_t *) dst; + uint i, j; + + assert(bpp == 2); + + for (j = 0; j < height; j += tile_h) { + for (i = 0; i < width; i += tile_w) { + /* compute offsets in 16-bit words */ + uint ii = i + x, jj = j + y; + uint src_offset = jj * src_stride + ii; + uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w) + * (tile_w * tile_h); + tile_4_4_uint16(src16 + src_offset, + dst16 + dst_offset, + src_stride); + } + } + } + } + else { + const uint bpp = 4; + const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE; + const uint bytes_per_tile = tile_w * tile_h * bpp; + const uint dst_stride = src_stride * tile_w; + const uint tiles_per_row = dst_stride / bytes_per_tile; + uint i, j; + + for (j = 0; j < height; j += TILE_SIZE) { + for (i = 0; i < width; i += TILE_SIZE) { + uint ii = i + x, jj = j + y; + uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w); + uint byte_offset = tile_offset * bytes_per_tile; + uint8_t *dst_tile = (uint8_t *) dst + byte_offset; + + lp_tile_read_4ub(format, + dst_tile, + src, src_stride, + ii, jj, tile_w, tile_h); + } } } } @@ -102,7 +306,7 @@ lp_linear_to_tiled(const uint8_t *src, * For testing only. */ void -test_tiled_linear_conversion(uint8_t *data, +test_tiled_linear_conversion(void *data, enum pipe_format format, unsigned width, unsigned height, unsigned stride) @@ -113,13 +317,13 @@ test_tiled_linear_conversion(uint8_t *data, uint8_t *tiled = malloc(wt * ht * TILE_SIZE * TILE_SIZE * 4); - unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4; + /*unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4;*/ - lp_linear_to_tiled(data, tiled, width, height, format, - stride, tiled_stride); + lp_linear_to_tiled(data, tiled, 0, 0, width, height, format, + stride); - lp_tiled_to_linear(tiled, data, width, height, format, - tiled_stride, stride); + lp_tiled_to_linear(tiled, data, 0, 0, width, height, format, + stride); free(tiled); } diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.h b/src/gallium/drivers/llvmpipe/lp_tile_image.h index 60d472e8c5..d74621925d 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_image.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_image.h @@ -30,25 +30,21 @@ void -lp_tiled_to_linear(const uint8_t *src, - uint8_t *dst, +lp_tiled_to_linear(const void *src, void *dst, + unsigned x, unsigned y, unsigned width, unsigned height, - enum pipe_format format, - unsigned src_stride, - unsigned dst_stride); + enum pipe_format format, unsigned dst_stride); void -lp_linear_to_tiled(const uint8_t *src, - uint8_t *dst, +lp_linear_to_tiled(const void *src, void *dst, + unsigned x, unsigned y, unsigned width, unsigned height, - enum pipe_format format, - unsigned src_stride, - unsigned dst_stride); + enum pipe_format format, unsigned src_stride); void -test_tiled_linear_conversion(uint8_t *data, +test_tiled_linear_conversion(void *data, enum pipe_format format, unsigned width, unsigned height, unsigned stride); diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h index eea3ab8499..9d6a88afec 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h @@ -50,11 +50,26 @@ tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH]; #define TILE_X_STRIDE (NUM_CHANNELS * TILE_C_STRIDE) //64 #define TILE_Y_STRIDE (TILE_VECTOR_HEIGHT * TILE_SIZE * NUM_CHANNELS) //1024 -#define TILE_PIXEL(_p, _x, _y, _c) \ - ((_p)[((_y) / TILE_VECTOR_HEIGHT) * TILE_Y_STRIDE + \ - ((_x) / TILE_VECTOR_WIDTH) * TILE_X_STRIDE + \ - (_c) * TILE_C_STRIDE + \ - tile_offset[(_y) % TILE_VECTOR_HEIGHT][(_x) % TILE_VECTOR_WIDTH]]) + +extern int tile_write_count, tile_read_count; + + +/** + * Return offset of the given pixel (and color channel) from the start + * of a tile, in bytes. + */ +static INLINE unsigned +tile_pixel_offset(unsigned x, unsigned y, unsigned c) +{ + unsigned ix = (x / TILE_VECTOR_WIDTH) * TILE_X_STRIDE; + unsigned iy = (y / TILE_VECTOR_HEIGHT) * TILE_Y_STRIDE; + unsigned offset = iy + ix + c * TILE_C_STRIDE + + tile_offset[y % TILE_VECTOR_HEIGHT][x % TILE_VECTOR_WIDTH]; + return offset; +} + + +#define TILE_PIXEL(_p, _x, _y, _c) ((_p)[tile_pixel_offset(_x, _y, _c)]) void diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index c1226e499c..65810b6f8f 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -300,6 +300,7 @@ def generate_read(formats, dst_channel, dst_native_type, dst_suffix): print 'lp_tile_read_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (dst_suffix, dst_native_type) print '{' print ' void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % dst_native_type + print ' tile_read_count += 1;' print ' switch(format) {' for format in formats: if is_format_supported(format): @@ -327,6 +328,7 @@ def generate_write(formats, src_channel, src_native_type, src_suffix): print '{' print ' void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % src_native_type + print ' tile_write_count += 1;' print ' switch(format) {' for format in formats: if is_format_supported(format): @@ -358,6 +360,8 @@ def main(): print '#include "util/u_half.h"' print '#include "lp_tile_soa.h"' print + print 'int tile_write_count=0, tile_read_count=0;' + print print 'const unsigned char' print 'tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH] = {' print ' { 0, 1, 4, 5},' -- cgit v1.2.3