From d23869a88a1e9e41c9ebbd5f918ede16a8ee838f Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 9 Jan 2008 17:49:43 -0700 Subject: Cell: initial implementation of tile status optimizations Tiles are marked as CLEAR, DEFINED or DIRTY to avoid making unnecessary get_tile() and put_tile() calls. --- src/mesa/pipe/cell/spu/main.c | 128 ++++++++++++++++++++++++++++++++++++------ src/mesa/pipe/cell/spu/main.h | 22 +++++++- src/mesa/pipe/cell/spu/tri.c | 47 +++++++++++----- 3 files changed, 164 insertions(+), 33 deletions(-) diff --git a/src/mesa/pipe/cell/spu/main.c b/src/mesa/pipe/cell/spu/main.c index 7b63e85ae2..5d47ca85d3 100644 --- a/src/mesa/pipe/cell/spu/main.c +++ b/src/mesa/pipe/cell/spu/main.c @@ -54,7 +54,9 @@ struct framebuffer fb; uint ctile[TILE_SIZE][TILE_SIZE] ALIGN16_ATTRIB; ushort ztile[TILE_SIZE][TILE_SIZE] ALIGN16_ATTRIB; -int DefaultTag; +ubyte tile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; +ubyte tile_status_z[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + @@ -120,13 +122,91 @@ put_tile(const struct framebuffer *fb, uint tx, uint ty, const uint *tile, } +void +clear_tile(uint tile[TILE_SIZE][TILE_SIZE], uint value) +{ + uint i, j; + for (i = 0; i < TILE_SIZE; i++) { + for (j = 0; j < TILE_SIZE; j++) { + tile[i][j] = value; + } + } +} + +void +clear_tile_z(ushort tile[TILE_SIZE][TILE_SIZE], uint value) +{ + uint i, j; + for (i = 0; i < TILE_SIZE; i++) { + for (j = 0; j < TILE_SIZE; j++) { + tile[i][j] = value; + } + } +} + + +/** + * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled + * tiles back to the main framebuffer. + */ +static void +really_clear_tiles(uint surfaceIndex) +{ + const uint num_tiles = fb.width_tiles * fb.height_tiles; + uint i, j; + + if (surfaceIndex == 0) { + for (i = 0; i < TILE_SIZE; i++) + for (j = 0; j < TILE_SIZE; j++) + ctile[i][j] = fb.color_clear_value; + + for (i = init.id; i < num_tiles; i += init.num_spus) { + uint tx = i % fb.width_tiles; + uint ty = i / fb.width_tiles; + if (tile_status[ty][tx] == TILE_STATUS_CLEAR) { + put_tile(&fb, tx, ty, (uint *) ctile, TAG_SURFACE_CLEAR, 0); + } + } + } + else { + for (i = 0; i < TILE_SIZE; i++) + for (j = 0; j < TILE_SIZE; j++) + ztile[i][j] = fb.depth_clear_value; + + for (i = init.id; i < num_tiles; i += init.num_spus) { + uint tx = i % fb.width_tiles; + uint ty = i / fb.width_tiles; + if (tile_status_z[ty][tx] == TILE_STATUS_CLEAR) + put_tile(&fb, tx, ty, (uint *) ctile, TAG_SURFACE_CLEAR, 1); + } + } + +#if 0 + wait_on_mask(1 << TAG_SURFACE_CLEAR); +#endif +} + static void clear_surface(const struct cell_command_clear_surface *clear) { - uint num_tiles = fb.width_tiles * fb.height_tiles; + const uint num_tiles = fb.width_tiles * fb.height_tiles; uint i, j; +#define CLEAR_OPT 1 +#if CLEAR_OPT + /* set all tile's status to CLEAR */ + if (clear->surface == 0) { + memset(tile_status, TILE_STATUS_CLEAR, sizeof(tile_status)); + fb.color_clear_value = clear->value; + } + else { + memset(tile_status_z, TILE_STATUS_CLEAR, sizeof(tile_status_z)); + fb.depth_clear_value = clear->value; + } + return; +#endif + if (clear->surface == 0) { for (i = 0; i < TILE_SIZE; i++) for (j = 0; j < TILE_SIZE; j++) @@ -195,7 +275,6 @@ tile_bounding_box(const struct cell_command_render *render, } - static void render(const struct cell_command_render *render) { @@ -242,9 +321,14 @@ render(const struct cell_command_render *render) /* Start fetching color/z tiles. We'll wait for completion when * we need read/write to them later in triangle rasterization. */ - get_tile(&fb, tx, ty, (uint *) ctile, TAG_READ_TILE_COLOR, 0); if (fb.depth_format == PIPE_FORMAT_Z16_UNORM) { - get_tile(&fb, tx, ty, (uint *) ztile, TAG_READ_TILE_Z, 1); + if (tile_status_z[ty][tx] != TILE_STATUS_CLEAR) { + get_tile(&fb, tx, ty, (uint *) ztile, TAG_READ_TILE_Z, 1); + } + } + + if (tile_status[ty][tx] != TILE_STATUS_CLEAR) { + get_tile(&fb, tx, ty, (uint *) ctile, TAG_READ_TILE_COLOR, 0); } assert(render->prim_type == PIPE_PRIM_TRIANGLES); @@ -277,23 +361,22 @@ render(const struct cell_command_render *render) tri_draw(&prim, tx, ty); } - /* in case nothing was drawn, wait now for completion */ - /* XXX temporary */ - wait_on_mask(1 << TAG_READ_TILE_COLOR); - if (fb.depth_format == PIPE_FORMAT_Z16_UNORM) { - wait_on_mask(1 << TAG_READ_TILE_Z); /* XXX temporary */ + /* write color/z tiles back to main framebuffer, if dirtied */ + if (tile_status[ty][tx] == TILE_STATUS_DIRTY) { + put_tile(&fb, tx, ty, (uint *) ctile, TAG_WRITE_TILE_COLOR, 0); + tile_status[ty][tx] = TILE_STATUS_DEFINED; } - - /* XXX IF we wrote anything into the tile... */ - - put_tile(&fb, tx, ty, (uint *) ctile, TAG_WRITE_TILE_COLOR, 0); if (fb.depth_format == PIPE_FORMAT_Z16_UNORM) { - put_tile(&fb, tx, ty, (uint *) ztile, TAG_WRITE_TILE_Z, 1); + if (tile_status_z[ty][tx] == TILE_STATUS_DIRTY) { + put_tile(&fb, tx, ty, (uint *) ztile, TAG_WRITE_TILE_Z, 1); + tile_status_z[ty][tx] = TILE_STATUS_DEFINED; + } } - wait_on_mask(1 << TAG_WRITE_TILE_COLOR); /* XXX temp */ + /* XXX move these... */ + wait_on_mask(1 << TAG_WRITE_TILE_COLOR); if (fb.depth_format == PIPE_FORMAT_Z16_UNORM) { - wait_on_mask(1 << TAG_WRITE_TILE_Z); /* XXX temporary */ + wait_on_mask(1 << TAG_WRITE_TILE_Z); } } } @@ -380,6 +463,7 @@ main_loop(void) case CELL_CMD_FINISH: if (Debug) printf("SPU %u: FINISH\n", init.id); + really_clear_tiles(0); /* wait for all outstanding DMAs to finish */ mfc_write_tag_mask(~0); mfc_read_tag_status_all(); @@ -398,6 +482,14 @@ main_loop(void) +static void +one_time_init(void) +{ + memset(tile_status, TILE_STATUS_DEFINED, sizeof(tile_status)); + memset(tile_status_z, TILE_STATUS_DEFINED, sizeof(tile_status_z)); +} + + /** * SPE entrypoint. * Note: example programs declare params as 'unsigned long long' but @@ -410,7 +502,7 @@ main(unsigned long speid, unsigned long argp) (void) speid; - DefaultTag = 1; + one_time_init(); if (Debug) printf("SPU: main() speid=%lu\n", speid); diff --git a/src/mesa/pipe/cell/spu/main.h b/src/mesa/pipe/cell/spu/main.h index 656d28ea0e..ee4248ead6 100644 --- a/src/mesa/pipe/cell/spu/main.h +++ b/src/mesa/pipe/cell/spu/main.h @@ -34,6 +34,10 @@ #include "pipe/cell/common.h" +#define MAX_WIDTH 1024 +#define MAX_HEIGHT 1024 + + extern volatile struct cell_init_info init; struct framebuffer { @@ -43,6 +47,9 @@ struct framebuffer { enum pipe_format depth_format; uint width, height; /**< size in pixels */ uint width_tiles, height_tiles; /**< width and height in tiles */ + + uint color_clear_value; + uint depth_clear_value; }; /* XXX Collect these globals in a struct: */ @@ -52,8 +59,6 @@ extern struct framebuffer fb; extern uint ctile[TILE_SIZE][TILE_SIZE] ALIGN16_ATTRIB; extern ushort ztile[TILE_SIZE][TILE_SIZE] ALIGN16_ATTRIB; -extern int DefaultTag; - /* DMA TAGS */ @@ -66,6 +71,13 @@ extern int DefaultTag; +#define TILE_STATUS_CLEAR 1 +#define TILE_STATUS_DEFINED 2 /**< defined pixel data */ +#define TILE_STATUS_DIRTY 3 /**< modified, but not put back yet */ + +extern ubyte tile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; +extern ubyte tile_status_z[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + void wait_on_mask(unsigned tag); @@ -78,5 +90,11 @@ void put_tile(const struct framebuffer *fb, uint tx, uint ty, const uint *tile, int tag, int zBuf); +void +clear_tile(uint tile[TILE_SIZE][TILE_SIZE], uint value); + +void +clear_tile_z(ushort tile[TILE_SIZE][TILE_SIZE], uint value); + #endif /* MAIN_H */ diff --git a/src/mesa/pipe/cell/spu/tri.c b/src/mesa/pipe/cell/spu/tri.c index b7dfd6ab8c..78cc7a591f 100644 --- a/src/mesa/pipe/cell/spu/tri.c +++ b/src/mesa/pipe/cell/spu/tri.c @@ -122,6 +122,8 @@ struct setup_stage { float oneoverarea; + uint tx, ty; + int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy; #if 0 @@ -287,8 +289,15 @@ emit_quad( struct setup_stage *setup, int x, int y, unsigned mask ) float zvals[4]; eval_z(setup, (float) x, (float) y, zvals); - wait_on_mask(1 << TAG_READ_TILE_Z); /* XXX temporary */ - + if (tile_status_z[setup->ty][setup->tx] == TILE_STATUS_CLEAR) { + /* now, _really_ clear the tile */ + clear_tile_z(ztile, fb.depth_clear_value); + } + else { + /* make sure we've got the tile from main mem */ + wait_on_mask(1 << TAG_READ_TILE_Z); + } + tile_status_z[setup->ty][setup->tx] = TILE_STATUS_DIRTY; if (mask & MASK_TOP_LEFT) { z = (uint) (zvals[0] * 65535.0); @@ -323,17 +332,26 @@ emit_quad( struct setup_stage *setup, int x, int y, unsigned mask ) } } - if (mask) - wait_on_mask(1 << TAG_READ_TILE_COLOR); - - if (mask & MASK_TOP_LEFT) - ctile[iy][ix] = pack_color(colors[QUAD_TOP_LEFT]); - if (mask & MASK_TOP_RIGHT) - ctile[iy][ix+1] = pack_color(colors[QUAD_TOP_RIGHT]); - if (mask & MASK_BOTTOM_LEFT) - ctile[iy+1][ix] = pack_color(colors[QUAD_BOTTOM_LEFT]); - if (mask & MASK_BOTTOM_RIGHT) - ctile[iy+1][ix+1] = pack_color(colors[QUAD_BOTTOM_RIGHT]); + if (mask) { + if (tile_status[setup->ty][setup->tx] == TILE_STATUS_CLEAR) { + /* now, _really_ clear the tile */ + clear_tile(ctile, fb.color_clear_value); + } + else { + /* make sure we've got the tile from main mem */ + wait_on_mask(1 << TAG_READ_TILE_COLOR); + } + tile_status[setup->ty][setup->tx] = TILE_STATUS_DIRTY; + + if (mask & MASK_TOP_LEFT) + ctile[iy][ix] = pack_color(colors[QUAD_TOP_LEFT]); + if (mask & MASK_TOP_RIGHT) + ctile[iy][ix+1] = pack_color(colors[QUAD_TOP_RIGHT]); + if (mask & MASK_BOTTOM_LEFT) + ctile[iy+1][ix] = pack_color(colors[QUAD_BOTTOM_LEFT]); + if (mask & MASK_BOTTOM_RIGHT) + ctile[iy+1][ix+1] = pack_color(colors[QUAD_BOTTOM_RIGHT]); + } #endif } @@ -937,6 +955,9 @@ tri_draw(struct prim_header *tri, uint tx, uint ty) { struct setup_stage setup; + setup.tx = tx; + setup.ty = ty; + /* set clipping bounds to tile bounds */ setup.cliprect_minx = tx * TILE_SIZE; setup.cliprect_miny = ty * TILE_SIZE; -- cgit v1.2.3