From 3a06c113c76355fc9622adfe7565c18d9787e9a8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 7 Dec 2009 17:02:17 -0700 Subject: llvmpipe: repartition lp_rasterizer state for threading Some of the state is per-thread. Put that state in new lp_rasterizer_task struct. --- src/gallium/drivers/llvmpipe/lp_bin.h | 4 +- src/gallium/drivers/llvmpipe/lp_rast.c | 89 +++++++++++++++++++---------- src/gallium/drivers/llvmpipe/lp_rast.h | 7 +++ src/gallium/drivers/llvmpipe/lp_rast_priv.h | 43 +++++++++----- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 62 ++++++++++---------- 5 files changed, 133 insertions(+), 72 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/llvmpipe/lp_bin.h b/src/gallium/drivers/llvmpipe/lp_bin.h index 24e599ea66..b07ff64e62 100644 --- a/src/gallium/drivers/llvmpipe/lp_bin.h +++ b/src/gallium/drivers/llvmpipe/lp_bin.h @@ -56,7 +56,9 @@ /* switch to a non-pointer value for this: */ -typedef void (*lp_rast_cmd)( struct lp_rasterizer *, const union lp_rast_cmd_arg ); +typedef void (*lp_rast_cmd)( struct lp_rasterizer *, + unsigned thread_index, + const union lp_rast_cmd_arg ); struct cmd_block { lp_rast_cmd cmd[CMD_BLOCK_MAX]; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index a6192e589d..37cc28e938 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -39,14 +39,18 @@ struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen ) { struct lp_rasterizer *rast; + unsigned i; rast = CALLOC_STRUCT(lp_rasterizer); if(!rast) return NULL; rast->screen = screen; - rast->tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); - rast->tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); + + for (i = 0; i < Elements(rast->tasks); i++) { + rast->tasks[i].tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); + rast->tasks[i].tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); + } return rast; } @@ -153,12 +157,13 @@ lp_rast_end( struct lp_rasterizer *rast ) */ static void lp_rast_start_tile( struct lp_rasterizer *rast, + unsigned thread_index, unsigned x, unsigned y ) { LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); - rast->x = x; - rast->y = y; + rast->tasks[thread_index].x = x; + rast->tasks[thread_index].y = y; } @@ -167,9 +172,11 @@ lp_rast_start_tile( struct lp_rasterizer *rast, * This is a bin command called during bin processing. */ void lp_rast_clear_color( struct lp_rasterizer *rast, + unsigned thread_index, const union lp_rast_cmd_arg arg ) { const uint8_t *clear_color = arg.clear_color; + uint8_t *color_tile = rast->tasks[thread_index].tile.color; LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__, clear_color[0], @@ -180,14 +187,14 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, if (clear_color[0] == clear_color[1] && clear_color[1] == clear_color[2] && clear_color[2] == clear_color[3]) { - memset(rast->tile.color, clear_color[0], TILE_SIZE * TILE_SIZE * 4); + memset(color_tile, clear_color[0], TILE_SIZE * TILE_SIZE * 4); } else { unsigned x, y, chan; for (y = 0; y < TILE_SIZE; y++) for (x = 0; x < TILE_SIZE; x++) for (chan = 0; chan < 4; ++chan) - TILE_PIXEL(rast->tile.color, x, y, chan) = clear_color[chan]; + TILE_PIXEL(color_tile, x, y, chan) = clear_color[chan]; } } @@ -197,15 +204,17 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, * This is a bin command called during bin processing. */ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, + unsigned thread_index, const union lp_rast_cmd_arg arg) { unsigned i, j; + uint32_t *depth_tile = rast->tasks[thread_index].tile.depth; LP_DBG(DEBUG_RAST, "%s 0x%x\n", __FUNCTION__, arg.clear_zstencil); for (i = 0; i < TILE_SIZE; i++) for (j = 0; j < TILE_SIZE; j++) - rast->tile.depth[i*TILE_SIZE + j] = arg.clear_zstencil; + depth_tile[i*TILE_SIZE + j] = arg.clear_zstencil; } @@ -214,6 +223,7 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, * This is a bin command called during bin processing. */ void lp_rast_load_color( struct lp_rasterizer *rast, + unsigned thread_index, const union lp_rast_cmd_arg arg) { LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); @@ -227,6 +237,7 @@ void lp_rast_load_color( struct lp_rasterizer *rast, * This is a bin command called during bin processing. */ void lp_rast_load_zstencil( struct lp_rasterizer *rast, + unsigned thread_index, const union lp_rast_cmd_arg arg ) { LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); @@ -236,6 +247,7 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast, void lp_rast_set_state( struct lp_rasterizer *rast, + unsigned thread_index, const union lp_rast_cmd_arg arg ) { const struct lp_rast_state *state = arg.set_state; @@ -243,7 +255,7 @@ void lp_rast_set_state( struct lp_rasterizer *rast, LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state); /* just set the current state pointer for this rasterizer */ - rast->current_state = state; + rast->tasks[thread_index].current_state = state; } @@ -257,9 +269,12 @@ void lp_rast_set_state( struct lp_rasterizer *rast, * This is a bin command called during bin processing. */ void lp_rast_shade_tile( struct lp_rasterizer *rast, + unsigned thread_index, const union lp_rast_cmd_arg arg ) { const struct lp_rast_shader_inputs *inputs = arg.shade_tile; + const unsigned tile_x = rast->tasks[thread_index].x; + const unsigned tile_y = rast->tasks[thread_index].y; const unsigned mask = ~0; unsigned x, y; @@ -269,7 +284,12 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, */ for (y = 0; y < TILE_SIZE; y += 4) for (x = 0; x < TILE_SIZE; x += 4) - lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, mask); + lp_rast_shade_quads( rast, + thread_index, + inputs, + tile_x + x, + tile_y + y, + mask); } @@ -278,13 +298,14 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, * This is a bin command called during bin processing. */ void lp_rast_shade_quads( struct lp_rasterizer *rast, + unsigned thread_index, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, unsigned mask) { #if 1 - const struct lp_rast_state *state = rast->current_state; - struct lp_rast_tile *tile = &rast->tile; + const struct lp_rast_state *state = rast->tasks[thread_index].current_state; + struct lp_rast_tile *tile = &rast->tasks[thread_index].tile; void *color; void *depth; uint32_t ALIGN16_ATTRIB masks[2][2][2][2]; @@ -388,10 +409,11 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, /** * Write the rasterizer's color tile to the framebuffer. */ -static void lp_rast_store_color( struct lp_rasterizer *rast ) +static void lp_rast_store_color( struct lp_rasterizer *rast, + unsigned thread_index) { - const unsigned x = rast->x; - const unsigned y = rast->y; + const unsigned x = rast->tasks[thread_index].x; + const unsigned y = rast->tasks[thread_index].y; unsigned w = TILE_SIZE; unsigned h = TILE_SIZE; @@ -404,7 +426,7 @@ static void lp_rast_store_color( struct lp_rasterizer *rast ) LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); lp_tile_write_4ub(rast->cbuf_transfer->format, - rast->tile.color, + rast->tasks[thread_index].tile.color, rast->cbuf_map, rast->cbuf_transfer->stride, x, y, @@ -430,10 +452,11 @@ lp_tile_write_z32(const uint32_t *src, uint8_t *dst, unsigned dst_stride, /** * Write the rasterizer's z/stencil tile to the framebuffer. */ -static void lp_rast_store_zstencil( struct lp_rasterizer *rast ) +static void lp_rast_store_zstencil( struct lp_rasterizer *rast, + unsigned thread_index ) { - const unsigned x = rast->x; - const unsigned y = rast->y; + const unsigned x = rast->tasks[thread_index].x; + const unsigned y = rast->tasks[thread_index].y; unsigned w = TILE_SIZE; unsigned h = TILE_SIZE; @@ -446,7 +469,7 @@ static void lp_rast_store_zstencil( struct lp_rasterizer *rast ) LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); assert(rast->zsbuf_transfer->format == PIPE_FORMAT_Z32_UNORM); - lp_tile_write_z32(rast->tile.depth, + lp_tile_write_z32(rast->tasks[thread_index].tile.depth, rast->zsbuf_map, rast->zsbuf_transfer->stride, x, y, w, h); @@ -457,15 +480,16 @@ static void lp_rast_store_zstencil( struct lp_rasterizer *rast ) * Write the rasterizer's tiles to the framebuffer. */ static void -lp_rast_end_tile( struct lp_rasterizer *rast ) +lp_rast_end_tile( struct lp_rasterizer *rast, + unsigned thread_index ) { LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); if (rast->state.write_color) - lp_rast_store_color(rast); + lp_rast_store_color(rast, thread_index); if (rast->state.write_zstencil) - lp_rast_store_zstencil(rast); + lp_rast_store_zstencil(rast, thread_index); } @@ -476,6 +500,7 @@ lp_rast_end_tile( struct lp_rasterizer *rast ) */ static void rasterize_bin( struct lp_rasterizer *rast, + unsigned thread_index, const struct cmd_bin *bin, int x, int y) { @@ -483,16 +508,16 @@ rasterize_bin( struct lp_rasterizer *rast, struct cmd_block *block; unsigned k; - lp_rast_start_tile( rast, x, y ); + lp_rast_start_tile( rast, thread_index, x, y ); /* simply execute each of the commands in the block list */ for (block = commands->head; block; block = block->next) { for (k = 0; k < block->count; k++) { - block->cmd[k]( rast, block->arg[k] ); + block->cmd[k]( rast, 0, block->arg[k] ); } } - lp_rast_end_tile( rast ); + lp_rast_end_tile( rast, thread_index ); } @@ -522,7 +547,7 @@ lp_rasterize_bins( struct lp_rasterizer *rast, for (i = 0; i < bins->tiles_x; i++) { for (j = 0; j < bins->tiles_y; j++) { struct cmd_bin *bin = lp_get_bin(bins, i, j); - rasterize_bin( rast, bin, i * TILE_SIZE, j * TILE_SIZE ); + rasterize_bin( rast, 0, bin, i * TILE_SIZE, j * TILE_SIZE ); } } } @@ -534,7 +559,7 @@ lp_rasterize_bins( struct lp_rasterizer *rast, lp_bin_iter_begin( bins ); while ((bin = lp_bin_iter_next(bins, &x, &y))) { - rasterize_bin( rast, bin, x * TILE_SIZE, y * TILE_SIZE); + rasterize_bin( rast, 0, bin, x * TILE_SIZE, y * TILE_SIZE); } } #endif @@ -550,10 +575,16 @@ lp_rasterize_bins( struct lp_rasterizer *rast, */ void lp_rast_destroy( struct lp_rasterizer *rast ) { + unsigned i; + pipe_surface_reference(&rast->state.cbuf, NULL); pipe_surface_reference(&rast->state.zsbuf, NULL); - align_free(rast->tile.depth); - align_free(rast->tile.color); + + for (i = 0; i < Elements(rast->tasks); i++) { + align_free(rast->tasks[i].tile.depth); + align_free(rast->tasks[i].tile.color); + } + FREE(rast); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index e77c77b776..25e7f8e008 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -193,24 +193,31 @@ lp_rast_arg_null( void ) */ void lp_rast_clear_color( struct lp_rasterizer *, + unsigned thread_index, const union lp_rast_cmd_arg ); void lp_rast_clear_zstencil( struct lp_rasterizer *, + unsigned thread_index, const union lp_rast_cmd_arg ); void lp_rast_load_color( struct lp_rasterizer *, + unsigned thread_index, const union lp_rast_cmd_arg ); void lp_rast_load_zstencil( struct lp_rasterizer *, + unsigned thread_index, const union lp_rast_cmd_arg ); void lp_rast_set_state( struct lp_rasterizer *, + unsigned thread_index, const union lp_rast_cmd_arg ); void lp_rast_triangle( struct lp_rasterizer *, + unsigned thread_index, const union lp_rast_cmd_arg ); void lp_rast_shade_tile( struct lp_rasterizer *, + unsigned thread_index, const union lp_rast_cmd_arg ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 98111edff7..9e7cbd7912 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -30,6 +30,10 @@ #include "lp_rast.h" + +#define MAX_THREADS 4 /* XXX probably temporary here */ + + struct pipe_transfer; struct pipe_screen; @@ -47,14 +51,34 @@ struct lp_rast_tile /** - * This is the state required while rasterizing a tile. - * The tile size is TILE_SIZE x TILE_SIZE pixels. + * Per-thread rasterization state */ -struct lp_rasterizer +struct lp_rasterizer_task { struct lp_rast_tile tile; /** Tile color/z/stencil memory */ unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */ + + /* Pixel blocks produced during rasterization + */ + unsigned nr_blocks; + struct { + unsigned x; + unsigned y; + unsigned mask; + } blocks[256]; + + const struct lp_rast_state *current_state; +}; + + +/** + * This is the state required while rasterizing tiles. + * Note that this contains per-thread information too. + * The tile size is TILE_SIZE x TILE_SIZE pixels. + */ +struct lp_rasterizer +{ unsigned width, height; /**< Size of framebuffer, in pixels */ boolean clipped_tile; @@ -78,20 +102,13 @@ struct lp_rasterizer char clear_stencil; } state; - /* Pixel blocks produced during rasterization - */ - unsigned nr_blocks; - struct { - unsigned x; - unsigned y; - unsigned mask; - } blocks[256]; - - const struct lp_rast_state *current_state; + /** A task object for each rasterization thread */ + struct lp_rasterizer_task tasks[MAX_THREADS]; }; void lp_rast_shade_quads( struct lp_rasterizer *rast, + unsigned thread_index, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, unsigned masks); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 81a9c1c142..6c96010c52 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -40,15 +40,15 @@ * All pixels are known to be inside the triangle's bounds. */ static void -block_full_4( struct lp_rasterizer *rast, int x, int y ) +block_full_4( struct lp_rasterizer_task *rast_task, int x, int y ) { - const unsigned i = rast->nr_blocks; + const unsigned i = rast_task->nr_blocks; assert(x % 4 == 0); assert(y % 4 == 0); - rast->blocks[i].x = x; - rast->blocks[i].y = y; - rast->blocks[i].mask = ~0; - rast->nr_blocks++; + rast_task->blocks[i].x = x; + rast_task->blocks[i].y = y; + rast_task->blocks[i].mask = ~0; + rast_task->nr_blocks++; } @@ -57,14 +57,14 @@ block_full_4( struct lp_rasterizer *rast, int x, int y ) * All pixels are known to be inside the triangle's bounds. */ static void -block_full_16( struct lp_rasterizer *rast, int x, int y ) +block_full_16( struct lp_rasterizer_task *rast_task, int x, int y ) { unsigned ix, iy; assert(x % 16 == 0); assert(y % 16 == 0); for (iy = 0; iy < 16; iy += 4) for (ix = 0; ix < 16; ix += 4) - block_full_4(rast, x + ix, y + iy); + block_full_4(rast_task, x + ix, y + iy); } @@ -74,7 +74,7 @@ block_full_16( struct lp_rasterizer *rast, int x, int y ) * Generate a mask of in/out flags and add the block to the blocks list. */ static void -do_block_4( struct lp_rasterizer *rast, +do_block_4( struct lp_rasterizer_task *rast_task, const struct lp_rast_triangle *tri, int x, int y, int c1, @@ -97,11 +97,11 @@ do_block_4( struct lp_rasterizer *rast, /* As we do trivial reject already, masks should rarely be all zero: */ if (mask) { - const unsigned i = rast->nr_blocks; - rast->blocks[i].x = x; - rast->blocks[i].y = y; - rast->blocks[i].mask = mask; - rast->nr_blocks++; + const unsigned i = rast_task->nr_blocks; + rast_task->blocks[i].x = x; + rast_task->blocks[i].y = y; + rast_task->blocks[i].mask = mask; + rast_task->nr_blocks++; } } @@ -111,7 +111,7 @@ do_block_4( struct lp_rasterizer *rast, * of the triangle's bounds. */ static void -do_block_16( struct lp_rasterizer *rast, +do_block_16( struct lp_rasterizer_task *rast_task, const struct lp_rast_triangle *tri, int x, int y, int c1, @@ -146,11 +146,11 @@ do_block_16( struct lp_rasterizer *rast, cx2 + ei2 > 0 && cx3 + ei3 > 0) { /* the block is completely inside the triangle */ - block_full_4(rast, x+ix, y+iy); + block_full_4(rast_task, x+ix, y+iy); } else { /* the block is partially in/out of the triangle */ - do_block_4(rast, tri, x+ix, y+iy, cx1, cx2, cx3); + do_block_4(rast_task, tri, x+ix, y+iy, cx1, cx2, cx3); } } } @@ -163,12 +163,14 @@ do_block_16( struct lp_rasterizer *rast, */ void lp_rast_triangle( struct lp_rasterizer *rast, + unsigned thread_index, const union lp_rast_cmd_arg arg ) { + struct lp_rasterizer_task *rast_task = &rast->tasks[thread_index]; const struct lp_rast_triangle *tri = arg.triangle; - int x = rast->x; - int y = rast->y; + int x = rast_task->x; + int y = rast_task->y; int ix, iy; unsigned i = 0; @@ -184,11 +186,11 @@ lp_rast_triangle( struct lp_rasterizer *rast, int eo2 = tri->eo2 * 16; int eo3 = tri->eo3 * 16; - assert(Elements(rast->blocks) == (TILE_SIZE * TILE_SIZE) / (4*4)); + assert(Elements(rast_task->blocks) == (TILE_SIZE * TILE_SIZE) / (4*4)); LP_DBG(DEBUG_RAST, "lp_rast_triangle\n"); - rast->nr_blocks = 0; + rast_task->nr_blocks = 0; /* Walk over the tile to build a list of 4x4 pixel blocks which will * be filled/shaded. We do this at two granularities: 16x16 blocks @@ -209,21 +211,23 @@ lp_rast_triangle( struct lp_rasterizer *rast, cx2 + ei2 > 0 && cx3 + ei3 > 0) { /* the block is completely inside the triangle */ - block_full_16(rast, x+ix, y+iy); + block_full_16(rast_task, x+ix, y+iy); } else { /* the block is partially in/out of the triangle */ - do_block_16(rast, tri, x+ix, y+iy, cx1, cx2, cx3); + do_block_16(rast_task, tri, x+ix, y+iy, cx1, cx2, cx3); } } } - assert(rast->nr_blocks <= Elements(rast->blocks)); + assert(rast_task->nr_blocks <= Elements(rast_task->blocks)); /* Shade the 4x4 pixel blocks */ - for (i = 0; i < rast->nr_blocks; i++) - lp_rast_shade_quads(rast, &tri->inputs, - rast->blocks[i].x, - rast->blocks[i].y, - rast->blocks[i].mask); + for (i = 0; i < rast_task->nr_blocks; i++) + lp_rast_shade_quads(rast, + thread_index, + &tri->inputs, + rast_task->blocks[i].x, + rast_task->blocks[i].y, + rast_task->blocks[i].mask); } -- cgit v1.2.3