summaryrefslogtreecommitdiff
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorBrian Paul <brianp@vmware.com>2009-12-07 17:02:17 -0700
committerBrian Paul <brianp@vmware.com>2009-12-07 18:04:54 -0700
commit3a06c113c76355fc9622adfe7565c18d9787e9a8 (patch)
tree7020ba41b1fda67c9f4adb157fba7ec2f8cca1d9 /src/gallium/drivers
parentcdaea049c95031338040b31ff31944c8a001a1dd (diff)
llvmpipe: repartition lp_rasterizer state for threading
Some of the state is per-thread. Put that state in new lp_rasterizer_task struct.
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bin.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.c89
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.h7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_priv.h43
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri.c62
5 files changed, 133 insertions, 72 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_bin.h b/src/gallium/drivers/llvmpipe/lp_bin.h
index 24e599ea66..b07ff64e62 100644
--- a/src/gallium/drivers/llvmpipe/lp_bin.h
+++ b/src/gallium/drivers/llvmpipe/lp_bin.h
@@ -56,7 +56,9 @@
/* switch to a non-pointer value for this:
*/
-typedef void (*lp_rast_cmd)( struct lp_rasterizer *, const union lp_rast_cmd_arg );
+typedef void (*lp_rast_cmd)( struct lp_rasterizer *,
+ unsigned thread_index,
+ const union lp_rast_cmd_arg );
struct cmd_block {
lp_rast_cmd cmd[CMD_BLOCK_MAX];
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index a6192e589d..37cc28e938 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -39,14 +39,18 @@
struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen )
{
struct lp_rasterizer *rast;
+ unsigned i;
rast = CALLOC_STRUCT(lp_rasterizer);
if(!rast)
return NULL;
rast->screen = screen;
- rast->tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 );
- rast->tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 );
+
+ for (i = 0; i < Elements(rast->tasks); i++) {
+ rast->tasks[i].tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 );
+ rast->tasks[i].tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 );
+ }
return rast;
}
@@ -153,12 +157,13 @@ lp_rast_end( struct lp_rasterizer *rast )
*/
static void
lp_rast_start_tile( struct lp_rasterizer *rast,
+ unsigned thread_index,
unsigned x, unsigned y )
{
LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
- rast->x = x;
- rast->y = y;
+ rast->tasks[thread_index].x = x;
+ rast->tasks[thread_index].y = y;
}
@@ -167,9 +172,11 @@ lp_rast_start_tile( struct lp_rasterizer *rast,
* This is a bin command called during bin processing.
*/
void lp_rast_clear_color( struct lp_rasterizer *rast,
+ unsigned thread_index,
const union lp_rast_cmd_arg arg )
{
const uint8_t *clear_color = arg.clear_color;
+ uint8_t *color_tile = rast->tasks[thread_index].tile.color;
LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
clear_color[0],
@@ -180,14 +187,14 @@ void lp_rast_clear_color( struct lp_rasterizer *rast,
if (clear_color[0] == clear_color[1] &&
clear_color[1] == clear_color[2] &&
clear_color[2] == clear_color[3]) {
- memset(rast->tile.color, clear_color[0], TILE_SIZE * TILE_SIZE * 4);
+ memset(color_tile, clear_color[0], TILE_SIZE * TILE_SIZE * 4);
}
else {
unsigned x, y, chan;
for (y = 0; y < TILE_SIZE; y++)
for (x = 0; x < TILE_SIZE; x++)
for (chan = 0; chan < 4; ++chan)
- TILE_PIXEL(rast->tile.color, x, y, chan) = clear_color[chan];
+ TILE_PIXEL(color_tile, x, y, chan) = clear_color[chan];
}
}
@@ -197,15 +204,17 @@ void lp_rast_clear_color( struct lp_rasterizer *rast,
* This is a bin command called during bin processing.
*/
void lp_rast_clear_zstencil( struct lp_rasterizer *rast,
+ unsigned thread_index,
const union lp_rast_cmd_arg arg)
{
unsigned i, j;
+ uint32_t *depth_tile = rast->tasks[thread_index].tile.depth;
LP_DBG(DEBUG_RAST, "%s 0x%x\n", __FUNCTION__, arg.clear_zstencil);
for (i = 0; i < TILE_SIZE; i++)
for (j = 0; j < TILE_SIZE; j++)
- rast->tile.depth[i*TILE_SIZE + j] = arg.clear_zstencil;
+ depth_tile[i*TILE_SIZE + j] = arg.clear_zstencil;
}
@@ -214,6 +223,7 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast,
* This is a bin command called during bin processing.
*/
void lp_rast_load_color( struct lp_rasterizer *rast,
+ unsigned thread_index,
const union lp_rast_cmd_arg arg)
{
LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
@@ -227,6 +237,7 @@ void lp_rast_load_color( struct lp_rasterizer *rast,
* This is a bin command called during bin processing.
*/
void lp_rast_load_zstencil( struct lp_rasterizer *rast,
+ unsigned thread_index,
const union lp_rast_cmd_arg arg )
{
LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
@@ -236,6 +247,7 @@ void lp_rast_load_zstencil( struct lp_rasterizer *rast,
void lp_rast_set_state( struct lp_rasterizer *rast,
+ unsigned thread_index,
const union lp_rast_cmd_arg arg )
{
const struct lp_rast_state *state = arg.set_state;
@@ -243,7 +255,7 @@ void lp_rast_set_state( struct lp_rasterizer *rast,
LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state);
/* just set the current state pointer for this rasterizer */
- rast->current_state = state;
+ rast->tasks[thread_index].current_state = state;
}
@@ -257,9 +269,12 @@ void lp_rast_set_state( struct lp_rasterizer *rast,
* This is a bin command called during bin processing.
*/
void lp_rast_shade_tile( struct lp_rasterizer *rast,
+ unsigned thread_index,
const union lp_rast_cmd_arg arg )
{
const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
+ const unsigned tile_x = rast->tasks[thread_index].x;
+ const unsigned tile_y = rast->tasks[thread_index].y;
const unsigned mask = ~0;
unsigned x, y;
@@ -269,7 +284,12 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast,
*/
for (y = 0; y < TILE_SIZE; y += 4)
for (x = 0; x < TILE_SIZE; x += 4)
- lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, mask);
+ lp_rast_shade_quads( rast,
+ thread_index,
+ inputs,
+ tile_x + x,
+ tile_y + y,
+ mask);
}
@@ -278,13 +298,14 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast,
* This is a bin command called during bin processing.
*/
void lp_rast_shade_quads( struct lp_rasterizer *rast,
+ unsigned thread_index,
const struct lp_rast_shader_inputs *inputs,
unsigned x, unsigned y,
unsigned mask)
{
#if 1
- const struct lp_rast_state *state = rast->current_state;
- struct lp_rast_tile *tile = &rast->tile;
+ const struct lp_rast_state *state = rast->tasks[thread_index].current_state;
+ struct lp_rast_tile *tile = &rast->tasks[thread_index].tile;
void *color;
void *depth;
uint32_t ALIGN16_ATTRIB masks[2][2][2][2];
@@ -388,10 +409,11 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast,
/**
* Write the rasterizer's color tile to the framebuffer.
*/
-static void lp_rast_store_color( struct lp_rasterizer *rast )
+static void lp_rast_store_color( struct lp_rasterizer *rast,
+ unsigned thread_index)
{
- const unsigned x = rast->x;
- const unsigned y = rast->y;
+ const unsigned x = rast->tasks[thread_index].x;
+ const unsigned y = rast->tasks[thread_index].y;
unsigned w = TILE_SIZE;
unsigned h = TILE_SIZE;
@@ -404,7 +426,7 @@ static void lp_rast_store_color( struct lp_rasterizer *rast )
LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h);
lp_tile_write_4ub(rast->cbuf_transfer->format,
- rast->tile.color,
+ rast->tasks[thread_index].tile.color,
rast->cbuf_map,
rast->cbuf_transfer->stride,
x, y,
@@ -430,10 +452,11 @@ lp_tile_write_z32(const uint32_t *src, uint8_t *dst, unsigned dst_stride,
/**
* Write the rasterizer's z/stencil tile to the framebuffer.
*/
-static void lp_rast_store_zstencil( struct lp_rasterizer *rast )
+static void lp_rast_store_zstencil( struct lp_rasterizer *rast,
+ unsigned thread_index )
{
- const unsigned x = rast->x;
- const unsigned y = rast->y;
+ const unsigned x = rast->tasks[thread_index].x;
+ const unsigned y = rast->tasks[thread_index].y;
unsigned w = TILE_SIZE;
unsigned h = TILE_SIZE;
@@ -446,7 +469,7 @@ static void lp_rast_store_zstencil( struct lp_rasterizer *rast )
LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h);
assert(rast->zsbuf_transfer->format == PIPE_FORMAT_Z32_UNORM);
- lp_tile_write_z32(rast->tile.depth,
+ lp_tile_write_z32(rast->tasks[thread_index].tile.depth,
rast->zsbuf_map,
rast->zsbuf_transfer->stride,
x, y, w, h);
@@ -457,15 +480,16 @@ static void lp_rast_store_zstencil( struct lp_rasterizer *rast )
* Write the rasterizer's tiles to the framebuffer.
*/
static void
-lp_rast_end_tile( struct lp_rasterizer *rast )
+lp_rast_end_tile( struct lp_rasterizer *rast,
+ unsigned thread_index )
{
LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
if (rast->state.write_color)
- lp_rast_store_color(rast);
+ lp_rast_store_color(rast, thread_index);
if (rast->state.write_zstencil)
- lp_rast_store_zstencil(rast);
+ lp_rast_store_zstencil(rast, thread_index);
}
@@ -476,6 +500,7 @@ lp_rast_end_tile( struct lp_rasterizer *rast )
*/
static void
rasterize_bin( struct lp_rasterizer *rast,
+ unsigned thread_index,
const struct cmd_bin *bin,
int x, int y)
{
@@ -483,16 +508,16 @@ rasterize_bin( struct lp_rasterizer *rast,
struct cmd_block *block;
unsigned k;
- lp_rast_start_tile( rast, x, y );
+ lp_rast_start_tile( rast, thread_index, x, y );
/* simply execute each of the commands in the block list */
for (block = commands->head; block; block = block->next) {
for (k = 0; k < block->count; k++) {
- block->cmd[k]( rast, block->arg[k] );
+ block->cmd[k]( rast, 0, block->arg[k] );
}
}
- lp_rast_end_tile( rast );
+ lp_rast_end_tile( rast, thread_index );
}
@@ -522,7 +547,7 @@ lp_rasterize_bins( struct lp_rasterizer *rast,
for (i = 0; i < bins->tiles_x; i++) {
for (j = 0; j < bins->tiles_y; j++) {
struct cmd_bin *bin = lp_get_bin(bins, i, j);
- rasterize_bin( rast, bin, i * TILE_SIZE, j * TILE_SIZE );
+ rasterize_bin( rast, 0, bin, i * TILE_SIZE, j * TILE_SIZE );
}
}
}
@@ -534,7 +559,7 @@ lp_rasterize_bins( struct lp_rasterizer *rast,
lp_bin_iter_begin( bins );
while ((bin = lp_bin_iter_next(bins, &x, &y))) {
- rasterize_bin( rast, bin, x * TILE_SIZE, y * TILE_SIZE);
+ rasterize_bin( rast, 0, bin, x * TILE_SIZE, y * TILE_SIZE);
}
}
#endif
@@ -550,10 +575,16 @@ lp_rasterize_bins( struct lp_rasterizer *rast,
*/
void lp_rast_destroy( struct lp_rasterizer *rast )
{
+ unsigned i;
+
pipe_surface_reference(&rast->state.cbuf, NULL);
pipe_surface_reference(&rast->state.zsbuf, NULL);
- align_free(rast->tile.depth);
- align_free(rast->tile.color);
+
+ for (i = 0; i < Elements(rast->tasks); i++) {
+ align_free(rast->tasks[i].tile.depth);
+ align_free(rast->tasks[i].tile.color);
+ }
+
FREE(rast);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index e77c77b776..25e7f8e008 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -193,24 +193,31 @@ lp_rast_arg_null( void )
*/
void lp_rast_clear_color( struct lp_rasterizer *,
+ unsigned thread_index,
const union lp_rast_cmd_arg );
void lp_rast_clear_zstencil( struct lp_rasterizer *,
+ unsigned thread_index,
const union lp_rast_cmd_arg );
void lp_rast_load_color( struct lp_rasterizer *,
+ unsigned thread_index,
const union lp_rast_cmd_arg );
void lp_rast_load_zstencil( struct lp_rasterizer *,
+ unsigned thread_index,
const union lp_rast_cmd_arg );
void lp_rast_set_state( struct lp_rasterizer *,
+ unsigned thread_index,
const union lp_rast_cmd_arg );
void lp_rast_triangle( struct lp_rasterizer *,
+ unsigned thread_index,
const union lp_rast_cmd_arg );
void lp_rast_shade_tile( struct lp_rasterizer *,
+ unsigned thread_index,
const union lp_rast_cmd_arg );
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index 98111edff7..9e7cbd7912 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -30,6 +30,10 @@
#include "lp_rast.h"
+
+#define MAX_THREADS 4 /* XXX probably temporary here */
+
+
struct pipe_transfer;
struct pipe_screen;
@@ -47,14 +51,34 @@ struct lp_rast_tile
/**
- * This is the state required while rasterizing a tile.
- * The tile size is TILE_SIZE x TILE_SIZE pixels.
+ * Per-thread rasterization state
*/
-struct lp_rasterizer
+struct lp_rasterizer_task
{
struct lp_rast_tile tile; /** Tile color/z/stencil memory */
unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */
+
+ /* Pixel blocks produced during rasterization
+ */
+ unsigned nr_blocks;
+ struct {
+ unsigned x;
+ unsigned y;
+ unsigned mask;
+ } blocks[256];
+
+ const struct lp_rast_state *current_state;
+};
+
+
+/**
+ * This is the state required while rasterizing tiles.
+ * Note that this contains per-thread information too.
+ * The tile size is TILE_SIZE x TILE_SIZE pixels.
+ */
+struct lp_rasterizer
+{
unsigned width, height; /**< Size of framebuffer, in pixels */
boolean clipped_tile;
@@ -78,20 +102,13 @@ struct lp_rasterizer
char clear_stencil;
} state;
- /* Pixel blocks produced during rasterization
- */
- unsigned nr_blocks;
- struct {
- unsigned x;
- unsigned y;
- unsigned mask;
- } blocks[256];
-
- const struct lp_rast_state *current_state;
+ /** A task object for each rasterization thread */
+ struct lp_rasterizer_task tasks[MAX_THREADS];
};
void lp_rast_shade_quads( struct lp_rasterizer *rast,
+ unsigned thread_index,
const struct lp_rast_shader_inputs *inputs,
unsigned x, unsigned y,
unsigned masks);
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index 81a9c1c142..6c96010c52 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -40,15 +40,15 @@
* All pixels are known to be inside the triangle's bounds.
*/
static void
-block_full_4( struct lp_rasterizer *rast, int x, int y )
+block_full_4( struct lp_rasterizer_task *rast_task, int x, int y )
{
- const unsigned i = rast->nr_blocks;
+ const unsigned i = rast_task->nr_blocks;
assert(x % 4 == 0);
assert(y % 4 == 0);
- rast->blocks[i].x = x;
- rast->blocks[i].y = y;
- rast->blocks[i].mask = ~0;
- rast->nr_blocks++;
+ rast_task->blocks[i].x = x;
+ rast_task->blocks[i].y = y;
+ rast_task->blocks[i].mask = ~0;
+ rast_task->nr_blocks++;
}
@@ -57,14 +57,14 @@ block_full_4( struct lp_rasterizer *rast, int x, int y )
* All pixels are known to be inside the triangle's bounds.
*/
static void
-block_full_16( struct lp_rasterizer *rast, int x, int y )
+block_full_16( struct lp_rasterizer_task *rast_task, int x, int y )
{
unsigned ix, iy;
assert(x % 16 == 0);
assert(y % 16 == 0);
for (iy = 0; iy < 16; iy += 4)
for (ix = 0; ix < 16; ix += 4)
- block_full_4(rast, x + ix, y + iy);
+ block_full_4(rast_task, x + ix, y + iy);
}
@@ -74,7 +74,7 @@ block_full_16( struct lp_rasterizer *rast, int x, int y )
* Generate a mask of in/out flags and add the block to the blocks list.
*/
static void
-do_block_4( struct lp_rasterizer *rast,
+do_block_4( struct lp_rasterizer_task *rast_task,
const struct lp_rast_triangle *tri,
int x, int y,
int c1,
@@ -97,11 +97,11 @@ do_block_4( struct lp_rasterizer *rast,
/* As we do trivial reject already, masks should rarely be all zero:
*/
if (mask) {
- const unsigned i = rast->nr_blocks;
- rast->blocks[i].x = x;
- rast->blocks[i].y = y;
- rast->blocks[i].mask = mask;
- rast->nr_blocks++;
+ const unsigned i = rast_task->nr_blocks;
+ rast_task->blocks[i].x = x;
+ rast_task->blocks[i].y = y;
+ rast_task->blocks[i].mask = mask;
+ rast_task->nr_blocks++;
}
}
@@ -111,7 +111,7 @@ do_block_4( struct lp_rasterizer *rast,
* of the triangle's bounds.
*/
static void
-do_block_16( struct lp_rasterizer *rast,
+do_block_16( struct lp_rasterizer_task *rast_task,
const struct lp_rast_triangle *tri,
int x, int y,
int c1,
@@ -146,11 +146,11 @@ do_block_16( struct lp_rasterizer *rast,
cx2 + ei2 > 0 &&
cx3 + ei3 > 0) {
/* the block is completely inside the triangle */
- block_full_4(rast, x+ix, y+iy);
+ block_full_4(rast_task, x+ix, y+iy);
}
else {
/* the block is partially in/out of the triangle */
- do_block_4(rast, tri, x+ix, y+iy, cx1, cx2, cx3);
+ do_block_4(rast_task, tri, x+ix, y+iy, cx1, cx2, cx3);
}
}
}
@@ -163,12 +163,14 @@ do_block_16( struct lp_rasterizer *rast,
*/
void
lp_rast_triangle( struct lp_rasterizer *rast,
+ unsigned thread_index,
const union lp_rast_cmd_arg arg )
{
+ struct lp_rasterizer_task *rast_task = &rast->tasks[thread_index];
const struct lp_rast_triangle *tri = arg.triangle;
- int x = rast->x;
- int y = rast->y;
+ int x = rast_task->x;
+ int y = rast_task->y;
int ix, iy;
unsigned i = 0;
@@ -184,11 +186,11 @@ lp_rast_triangle( struct lp_rasterizer *rast,
int eo2 = tri->eo2 * 16;
int eo3 = tri->eo3 * 16;
- assert(Elements(rast->blocks) == (TILE_SIZE * TILE_SIZE) / (4*4));
+ assert(Elements(rast_task->blocks) == (TILE_SIZE * TILE_SIZE) / (4*4));
LP_DBG(DEBUG_RAST, "lp_rast_triangle\n");
- rast->nr_blocks = 0;
+ rast_task->nr_blocks = 0;
/* Walk over the tile to build a list of 4x4 pixel blocks which will
* be filled/shaded. We do this at two granularities: 16x16 blocks
@@ -209,21 +211,23 @@ lp_rast_triangle( struct lp_rasterizer *rast,
cx2 + ei2 > 0 &&
cx3 + ei3 > 0) {
/* the block is completely inside the triangle */
- block_full_16(rast, x+ix, y+iy);
+ block_full_16(rast_task, x+ix, y+iy);
}
else {
/* the block is partially in/out of the triangle */
- do_block_16(rast, tri, x+ix, y+iy, cx1, cx2, cx3);
+ do_block_16(rast_task, tri, x+ix, y+iy, cx1, cx2, cx3);
}
}
}
- assert(rast->nr_blocks <= Elements(rast->blocks));
+ assert(rast_task->nr_blocks <= Elements(rast_task->blocks));
/* Shade the 4x4 pixel blocks */
- for (i = 0; i < rast->nr_blocks; i++)
- lp_rast_shade_quads(rast, &tri->inputs,
- rast->blocks[i].x,
- rast->blocks[i].y,
- rast->blocks[i].mask);
+ for (i = 0; i < rast_task->nr_blocks; i++)
+ lp_rast_shade_quads(rast,
+ thread_index,
+ &tri->inputs,
+ rast_task->blocks[i].x,
+ rast_task->blocks[i].y,
+ rast_task->blocks[i].mask);
}