From 53951531ae7bfd64afae1ae55aac7f6ebd3fe4f5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Oct 2008 12:35:51 -0600 Subject: cell: propogate blend color to SPUs for the fallback fragment ops code --- src/gallium/drivers/cell/common.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/drivers/cell/common.h') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index b0169b8e32..3b5a25e165 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -118,12 +118,16 @@ /** * Command to specify per-fragment operations state and generated code. + * Note that the dsa, blend, blend_color fields are really only needed + * for the fallback/C per-pixel code. They're not used when we generate + * dynamic SPU fragment code (which is the normal case). */ struct cell_command_fragment_ops { uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */ struct pipe_depth_stencil_alpha_state dsa; struct pipe_blend_state blend; + struct pipe_blend_color blend_color; unsigned code[SPU_MAX_FRAGMENT_OPS_INSTS]; }; -- cgit v1.2.3 From ddeec1ed10d6c12403fe8d30c072ea68f044db99 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Oct 2008 13:55:18 -0600 Subject: cell: simplify spu debug code --- src/gallium/drivers/cell/common.h | 1 + src/gallium/drivers/cell/ppu/cell_context.c | 1 + src/gallium/drivers/cell/spu/spu_command.c | 47 +++++++++++++---------------- src/gallium/drivers/cell/spu/spu_debug.h | 9 ------ src/gallium/drivers/cell/spu/spu_main.c | 9 +----- src/gallium/drivers/cell/spu/spu_main.h | 15 +++++++-- src/gallium/drivers/cell/spu/spu_render.c | 7 +++-- 7 files changed, 41 insertions(+), 48 deletions(-) (limited to 'src/gallium/drivers/cell/common.h') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 3b5a25e165..8ae78265f2 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -111,6 +111,7 @@ #define CELL_DEBUG_SYNC (1 << 2) #define CELL_DEBUG_FRAGMENT_OPS (1 << 3) #define CELL_DEBUG_FRAGMENT_OP_FALLBACK (1 << 4) +#define CELL_DEBUG_CMD (1 << 5) /** Max instructions for doing per-fragment operations */ #define SPU_MAX_FRAGMENT_OPS_INSTS 64 diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index b66aa9c9d9..f8d5eef3ac 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -93,6 +93,7 @@ static const struct debug_named_value cell_debug_flags[] = { {"sync", CELL_DEBUG_SYNC}, /**< SPUs do synchronous DMA */ {"fragops", CELL_DEBUG_FRAGMENT_OPS}, /**< SPUs emit fragment ops debug messages*/ {"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK}, /**< SPUs use reference implementation for fragment ops*/ + {"cmd", CELL_DEBUG_CMD}, /**< SPUs dump command buffer info */ {NULL, 0} }; diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index b521c3aecf..ebbed3d1dc 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -44,7 +44,6 @@ #include "spu_tile.h" #include "spu_vertex_shader.h" #include "spu_dcache.h" -#include "spu_debug.h" #include "cell/common.h" @@ -97,7 +96,7 @@ release_buffer(uint buffer) static void cmd_clear_surface(const struct cell_command_clear_surface *clear) { - DEBUG_PRINTF("CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value); + D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value); if (clear->surface == 0) { spu.fb.color_clear_value = clear->value; @@ -165,14 +164,14 @@ cmd_clear_surface(const struct cell_command_clear_surface *clear) #endif /* CLEAR_OPT */ - DEBUG_PRINTF("CLEAR SURF done\n"); + D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF done\n"); } static void cmd_release_verts(const struct cell_command_release_verts *release) { - DEBUG_PRINTF("RELEASE VERTS %u\n", release->vertex_buf); + D_PRINTF(CELL_DEBUG_CMD, "RELEASE VERTS %u\n", release->vertex_buf); ASSERT(release->vertex_buf != ~0U); release_buffer(release->vertex_buf); } @@ -189,7 +188,7 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) { static int warned = 0; - DEBUG_PRINTF("CMD_STATE_FRAGMENT_OPS\n"); + D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n"); /* Copy SPU code from batch buffer to spu buffer */ memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4); /* Copy state info (for fallback case only) */ @@ -229,7 +228,7 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) static void cmd_state_fragment_program(const struct cell_command_fragment_program *fp) { - DEBUG_PRINTF("CMD_STATE_FRAGMENT_PROGRAM\n"); + D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_PROGRAM\n"); /* Copy SPU code from batch buffer to spu buffer */ memcpy(spu.fragment_program_code, fp->code, SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4); @@ -247,11 +246,11 @@ cmd_state_fs_constants(const uint64_t *buffer, uint pos) const float *constants = (const float *) &buffer[pos + 2]; uint i; - DEBUG_PRINTF("CMD_STATE_FS_CONSTANTS (%u)\n", num_const); + D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FS_CONSTANTS (%u)\n", num_const); /* Expand each float to float[4] for SOA execution */ for (i = 0; i < num_const; i++) { - DEBUG_PRINTF(" const[%u] = %f\n", i, constants[i]); + D_PRINTF(CELL_DEBUG_CMD, " const[%u] = %f\n", i, constants[i]); spu.constants[i] = spu_splats(constants[i]); } @@ -263,7 +262,7 @@ cmd_state_fs_constants(const uint64_t *buffer, uint pos) static void cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) { - DEBUG_PRINTF("FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", + D_PRINTF(CELL_DEBUG_CMD, "FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", cmd->width, cmd->height, cmd->color_start, @@ -352,7 +351,7 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) { uint unit = sampler->unit; - DEBUG_PRINTF("SAMPLER [%u]\n", unit); + D_PRINTF(CELL_DEBUG_CMD, "SAMPLER [%u]\n", unit); spu.sampler[unit] = sampler->state; @@ -404,9 +403,7 @@ cmd_state_texture(const struct cell_command_texture *texture) const uint unit = texture->unit; uint i; - //if (spu.init.id==0) Debug=1; - - DEBUG_PRINTF("TEXTURE [%u]\n", texture->unit); + D_PRINTF(CELL_DEBUG_CMD, "TEXTURE [%u]\n", texture->unit); spu.texture[unit].max_level = 0; spu.texture[unit].target = texture->target; @@ -416,7 +413,7 @@ cmd_state_texture(const struct cell_command_texture *texture) uint height = texture->height[i]; uint depth = texture->depth[i]; - DEBUG_PRINTF(" LEVEL %u: at %p size[0] %u x %u\n", i, + D_PRINTF(CELL_DEBUG_CMD, " LEVEL %u: at %p size[0] %u x %u\n", i, texture->start[i], texture->width[i], texture->height[i]); spu.texture[unit].level[i].start = texture->start[i]; @@ -438,15 +435,13 @@ cmd_state_texture(const struct cell_command_texture *texture) } update_tex_masks(&spu.texture[unit], &spu.sampler[unit], unit); - - //Debug=0; } static void cmd_state_vertex_info(const struct vertex_info *vinfo) { - DEBUG_PRINTF("VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs); + D_PRINTF(CELL_DEBUG_CMD, "VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs); ASSERT(vinfo->num_attribs >= 1); ASSERT(vinfo->num_attribs <= 8); memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo)); @@ -485,7 +480,7 @@ cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code) static void cmd_finish(void) { - DEBUG_PRINTF("FINISH\n"); + D_PRINTF(CELL_DEBUG_CMD, "FINISH\n"); really_clear_tiles(0); /* wait for all outstanding DMAs to finish */ mfc_write_tag_mask(~0); @@ -510,7 +505,7 @@ cmd_batch(uint opcode) const unsigned usize = size / sizeof(buffer[0]); uint pos; - DEBUG_PRINTF("BATCH buffer %u, len %u, from %p\n", + D_PRINTF(CELL_DEBUG_CMD, "BATCH buffer %u, len %u, from %p\n", buf, size, spu.init.buffers[buf]); ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH); @@ -530,7 +525,7 @@ cmd_batch(uint opcode) wait_on_mask(1 << TAG_BATCH_BUFFER); /* Tell PPU we're done copying the buffer to local store */ - DEBUG_PRINTF("release batch buf %u\n", buf); + D_PRINTF(CELL_DEBUG_CMD, "release batch buf %u\n", buf); release_buffer(buf); /* @@ -663,7 +658,7 @@ cmd_batch(uint opcode) } } - DEBUG_PRINTF("BATCH complete\n"); + D_PRINTF(CELL_DEBUG_CMD, "BATCH complete\n"); } @@ -677,7 +672,7 @@ command_loop(void) struct cell_command cmd; int exitFlag = 0; - DEBUG_PRINTF("Enter command loop\n"); + D_PRINTF(CELL_DEBUG_CMD, "Enter command loop\n"); ASSERT((sizeof(struct cell_command) & 0xf) == 0); ASSERT_ALIGN16(&cmd); @@ -686,12 +681,12 @@ command_loop(void) unsigned opcode; int tag = 0; - DEBUG_PRINTF("Wait for cmd...\n"); + D_PRINTF(CELL_DEBUG_CMD, "Wait for cmd...\n"); /* read/wait from mailbox */ opcode = (unsigned int) spu_read_in_mbox(); - DEBUG_PRINTF("got cmd 0x%x\n", opcode); + D_PRINTF(CELL_DEBUG_CMD, "got cmd 0x%x\n", opcode); /* command payload */ mfc_get(&cmd, /* dest */ @@ -708,7 +703,7 @@ command_loop(void) switch (opcode & CELL_CMD_OPCODE_MASK) { case CELL_CMD_EXIT: - DEBUG_PRINTF("EXIT\n"); + D_PRINTF(CELL_DEBUG_CMD, "EXIT\n"); exitFlag = 1; break; case CELL_CMD_VS_EXECUTE: @@ -725,7 +720,7 @@ command_loop(void) } - DEBUG_PRINTF("Exit command loop\n"); + D_PRINTF(CELL_DEBUG_CMD, "Exit command loop\n"); spu_dcache_report(); } diff --git a/src/gallium/drivers/cell/spu/spu_debug.h b/src/gallium/drivers/cell/spu/spu_debug.h index eeec052655..25653dcdcd 100644 --- a/src/gallium/drivers/cell/spu/spu_debug.h +++ b/src/gallium/drivers/cell/spu/spu_debug.h @@ -30,28 +30,19 @@ #define SPU_DEBUG_H -/* Set to 0 to disable all extraneous debugging code */ -#define DEBUG 1 - #if DEBUG -extern boolean Debug; -extern boolean force_fragment_ops_fallback; /* These debug macros use the unusual construction ", ##__VA_ARGS__" * which expands to the expected comma + args if variadic arguments * are supplied, but swallows the comma if there are no variadic * arguments (which avoids syntax errors that would otherwise occur). */ -#define DEBUG_PRINTF(format,...) \ - if (Debug) \ - printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) #define D_PRINTF(flag, format,...) \ if (spu.init.debug_flags & (flag)) \ printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) #else -#define DEBUG_PRINTF(...) #define D_PRINTF(...) #endif diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 4becd0f92a..c8bb251905 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -40,7 +40,6 @@ #include "spu_per_fragment_op.h" #include "spu_texture.h" //#include "spu_test.h" -#include "spu_debug.h" #include "cell/common.h" @@ -53,12 +52,6 @@ helpful headers: struct spu_global spu; -#if DEBUG -boolean Debug = FALSE; -boolean force_fragment_ops_fallback = TRUE; -#endif - - static void one_time_init(void) { @@ -102,7 +95,7 @@ main(main_param_t speid, main_param_t argp) one_time_init(); - DEBUG_PRINTF("main() speid=%lu\n", (unsigned long) speid); + D_PRINTF(CELL_DEBUG_CMD, "main() speid=%lu\n", (unsigned long) speid); D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n"); /* get initialization data */ diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index ca72baea8b..569b9e45d4 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -36,6 +36,19 @@ #include "pipe/p_state.h" +#if DEBUG +/* These debug macros use the unusual construction ", ##__VA_ARGS__" + * which expands to the expected comma + args if variadic arguments + * are supplied, but swallows the comma if there are no variadic + * arguments (which avoids syntax errors that would otherwise occur). + */ +#define D_PRINTF(flag, format,...) \ + if (spu.init.debug_flags & (flag)) \ + printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) +#else +#define D_PRINTF(...) +#endif + #define MAX_WIDTH 1024 #define MAX_HEIGHT 1024 @@ -187,8 +200,6 @@ struct spu_global extern struct spu_global spu; -extern boolean Debug; - diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 82dbeb26b7..cfff19b6c0 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -177,7 +177,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) uint i, j; - if (Debug) { +#if 0 printf("SPU %u: RENDER prim %u, num_vert=%u num_ind=%u " "inline_vert=%u\n", spu.init.id, @@ -190,7 +190,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) printf(" bound: %g, %g .. %g, %g\n", render->xmin, render->ymin, render->xmax, render->ymax); */ - } +#endif ASSERT(sizeof(*render) % 4 == 0); ASSERT(total_vertex_bytes % 16 == 0); @@ -293,7 +293,8 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) spu.ztile_status[ty][tx] = spu.cur_ztile_status; } - if (Debug) +#if 0 printf("SPU %u: RENDER done\n", spu.init.id); +#endif } -- cgit v1.2.3 From 79e96b3a77f7d5c7136b380abcc675c7242d0ffe Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Oct 2008 13:58:58 -0600 Subject: cell: move some CELL_MAX constants --- src/gallium/drivers/cell/common.h | 6 +++++- src/gallium/drivers/cell/spu/spu_main.h | 11 ++--------- 2 files changed, 7 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers/cell/common.h') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 8ae78265f2..d716a26175 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -68,6 +68,9 @@ #define CELL_MAX_SAMPLERS 4 #define CELL_MAX_TEXTURE_LEVELS 12 /* 2k x 2k */ +#define CELL_MAX_CONSTANTS 32 /**< number of float[4] constants */ +#define CELL_MAX_WIDTH 1024 /**< max framebuffer width */ +#define CELL_MAX_HEIGHT 1024 /**< max framebuffer width */ #define TILE_SIZE 32 @@ -99,13 +102,14 @@ #define CELL_CMD_VS_EXECUTE 22 #define CELL_CMD_FLUSH_BUFFER_RANGE 23 - +/** Command/batch buffers */ #define CELL_NUM_BUFFERS 4 #define CELL_BUFFER_SIZE (4*1024) /**< 16KB would be the max */ #define CELL_BUFFER_STATUS_FREE 10 #define CELL_BUFFER_STATUS_USED 20 +/** Debug flags */ #define CELL_DEBUG_CHECKER (1 << 0) #define CELL_DEBUG_ASM (1 << 1) #define CELL_DEBUG_SYNC (1 << 2) diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 569b9e45d4..f87495b72d 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -50,13 +50,6 @@ #endif -#define MAX_WIDTH 1024 -#define MAX_HEIGHT 1024 - - -#define CELL_MAX_CONSTANTS 32 /**< number of float[4] constants */ - - /** * A tile is basically a TILE_SIZE x TILE_SIZE block of 4-byte pixels. * The data may be addressed through several different types. @@ -175,8 +168,8 @@ struct spu_global ubyte cur_ctile_status, cur_ztile_status; /** Status of all tiles in framebuffer */ - ubyte ctile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; - ubyte ztile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; /** Current fragment ops machine code, at 8-byte boundary */ uint fragment_ops_code[SPU_MAX_FRAGMENT_OPS_INSTS] ALIGN8_ATTRIB; -- cgit v1.2.3 From 0eb0b0a816764a323af7a8d2b5cb6792f886ce04 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Oct 2008 14:12:55 -0600 Subject: cell: remove some old, pre-batchbuffer stuff --- src/gallium/drivers/cell/common.h | 14 -------------- src/gallium/drivers/cell/ppu/cell_spu.c | 5 +---- src/gallium/drivers/cell/ppu/cell_spu.h | 3 +-- src/gallium/drivers/cell/spu/spu_command.c | 19 ------------------- 4 files changed, 2 insertions(+), 39 deletions(-) (limited to 'src/gallium/drivers/cell/common.h') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index d716a26175..600f1b37a2 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -269,19 +269,6 @@ struct cell_command_texture }; -/** XXX unions don't seem to work */ -/* XXX this should go away; all commands should be placed in batch buffers */ -struct cell_command -{ -#if 0 - struct cell_command_framebuffer fb; - struct cell_command_clear_surface clear; - struct cell_command_render render; -#endif - struct cell_command_vs vs; -} ALIGN16_ATTRIB; - - #define MAX_SPU_FUNCTIONS 12 /** * Used to tell the PPU about the address of particular functions in the @@ -302,7 +289,6 @@ struct cell_init_info unsigned id; unsigned num_spus; unsigned debug_flags; /**< mask of CELL_DEBUG_x flags */ - struct cell_command *cmd; /** Buffers for command batches, vertex/index data */ ubyte *buffers[CELL_NUM_BUFFERS]; diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c index df020c4146..90745da3d2 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.c +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -126,9 +126,6 @@ cell_start_spus(struct cell_context *cell) assert(cell->num_spus <= MAX_SPUS); - ASSERT_ALIGN16(&cell_global.command[0]); - ASSERT_ALIGN16(&cell_global.command[1]); - ASSERT_ALIGN16(&cell_global.inits[0]); ASSERT_ALIGN16(&cell_global.inits[1]); @@ -141,7 +138,7 @@ cell_start_spus(struct cell_context *cell) cell_global.inits[i].id = i; cell_global.inits[i].num_spus = cell->num_spus; cell_global.inits[i].debug_flags = cell->debug_flags; - cell_global.inits[i].cmd = &cell_global.command[i]; + for (j = 0; j < CELL_NUM_BUFFERS; j++) { cell_global.inits[i].buffers[j] = cell->buffer[j]; } diff --git a/src/gallium/drivers/cell/ppu/cell_spu.h b/src/gallium/drivers/cell/ppu/cell_spu.h index 137f26612e..3443331b01 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.h +++ b/src/gallium/drivers/cell/ppu/cell_spu.h @@ -50,10 +50,9 @@ struct cell_global_info pthread_t spe_threads[MAX_SPUS]; /** - * Data sent to SPUs + * Data sent to SPUs at start-up */ struct cell_init_info inits[MAX_SPUS]; - struct cell_command command[MAX_SPUS]; }; diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index ebbed3d1dc..4febd5385b 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -669,38 +669,19 @@ cmd_batch(uint opcode) void command_loop(void) { - struct cell_command cmd; int exitFlag = 0; D_PRINTF(CELL_DEBUG_CMD, "Enter command loop\n"); - ASSERT((sizeof(struct cell_command) & 0xf) == 0); - ASSERT_ALIGN16(&cmd); - while (!exitFlag) { unsigned opcode; - int tag = 0; D_PRINTF(CELL_DEBUG_CMD, "Wait for cmd...\n"); /* read/wait from mailbox */ opcode = (unsigned int) spu_read_in_mbox(); - D_PRINTF(CELL_DEBUG_CMD, "got cmd 0x%x\n", opcode); - /* command payload */ - mfc_get(&cmd, /* dest */ - (unsigned int) spu.init.cmd, /* src */ - sizeof(struct cell_command), /* bytes */ - tag, - 0, /* tid */ - 0 /* rid */); - wait_on_mask( 1 << tag ); - - /* - * NOTE: most commands should be contained in a batch buffer - */ - switch (opcode & CELL_CMD_OPCODE_MASK) { case CELL_CMD_EXIT: D_PRINTF(CELL_DEBUG_CMD, "EXIT\n"); -- cgit v1.2.3 From 67f615681c569264eab1bc901473c86cfc54e480 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Oct 2008 14:18:51 -0600 Subject: cell: use CELL_MAX_SPUS consistently. --- src/gallium/drivers/cell/common.h | 2 +- src/gallium/drivers/cell/ppu/cell_spu.c | 2 +- src/gallium/drivers/cell/ppu/cell_spu.h | 8 +++----- 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers/cell/common.h') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 600f1b37a2..1f6f2d494b 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -64,7 +64,7 @@ #define ROUNDUP16(k) (((k) + 0xf) & ~0xf) -#define CELL_MAX_SPUS 6 +#define CELL_MAX_SPUS 8 #define CELL_MAX_SAMPLERS 4 #define CELL_MAX_TEXTURE_LEVELS 12 /* 2k x 2k */ diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c index 90745da3d2..a6e268b362 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.c +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -124,7 +124,7 @@ cell_start_spus(struct cell_context *cell) one_time_init = TRUE; - assert(cell->num_spus <= MAX_SPUS); + assert(cell->num_spus <= CELL_MAX_SPUS); ASSERT_ALIGN16(&cell_global.inits[0]); ASSERT_ALIGN16(&cell_global.inits[1]); diff --git a/src/gallium/drivers/cell/ppu/cell_spu.h b/src/gallium/drivers/cell/ppu/cell_spu.h index 3443331b01..2e965c6301 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.h +++ b/src/gallium/drivers/cell/ppu/cell_spu.h @@ -36,8 +36,6 @@ #include "cell_context.h" -#define MAX_SPUS 8 - /** * Global vars, for now anyway. */ @@ -46,13 +44,13 @@ struct cell_global_info /** * SPU/SPE handles, etc */ - spe_context_ptr_t spe_contexts[MAX_SPUS]; - pthread_t spe_threads[MAX_SPUS]; + spe_context_ptr_t spe_contexts[CELL_MAX_SPUS]; + pthread_t spe_threads[CELL_MAX_SPUS]; /** * Data sent to SPUs at start-up */ - struct cell_init_info inits[MAX_SPUS]; + struct cell_init_info inits[CELL_MAX_SPUS]; }; -- cgit v1.2.3 From ec7d6c656178babdf143faa242f7a3df9d0bc22c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Oct 2008 14:39:16 -0600 Subject: cell: send rasterizer state to SPUs in proper way, remove front_winding hack --- src/gallium/drivers/cell/common.h | 18 ++++++++++++++---- src/gallium/drivers/cell/ppu/cell_state_emit.c | 7 +++++++ src/gallium/drivers/cell/ppu/cell_vbuf.c | 1 - src/gallium/drivers/cell/spu/spu_command.c | 8 ++++++++ src/gallium/drivers/cell/spu/spu_main.h | 1 + src/gallium/drivers/cell/spu/spu_render.c | 2 +- src/gallium/drivers/cell/spu/spu_tri.c | 4 ++-- src/gallium/drivers/cell/spu/spu_tri.h | 2 +- 8 files changed, 34 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers/cell/common.h') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 1f6f2d494b..0ff2c491fb 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -99,8 +99,9 @@ #define CELL_CMD_STATE_FRAGMENT_PROGRAM 19 #define CELL_CMD_STATE_ATTRIB_FETCH 20 #define CELL_CMD_STATE_FS_CONSTANTS 21 -#define CELL_CMD_VS_EXECUTE 22 -#define CELL_CMD_FLUSH_BUFFER_RANGE 23 +#define CELL_CMD_STATE_RASTERIZER 22 +#define CELL_CMD_VS_EXECUTE 23 +#define CELL_CMD_FLUSH_BUFFER_RANGE 24 /** Command/batch buffers */ #define CELL_NUM_BUFFERS 4 @@ -156,13 +157,23 @@ struct cell_command_fragment_program */ struct cell_command_framebuffer { - uint64_t opcode; /**< CELL_CMD_FRAMEBUFFER */ + uint64_t opcode; /**< CELL_CMD_STATE_FRAMEBUFFER */ int width, height; void *color_start, *depth_start; enum pipe_format color_format, depth_format; }; +/** + * Tell SPUs about rasterizer state. + */ +struct cell_command_rasterizer +{ + uint64_t opcode; /**< CELL_CMD_STATE_RASTERIZER */ + struct pipe_rasterizer_state rasterizer; +}; + + /** * Clear framebuffer to the given value/color. */ @@ -238,7 +249,6 @@ struct cell_command_render float xmin, ymin, xmax, ymax; /* XXX another dummy field */ uint min_index; boolean inline_verts; - uint front_winding; /* the rasterizer needs to be able to determine facing to apply front/back-facing stencil */ }; diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index d2427584ba..e6387382f2 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -147,6 +147,13 @@ cell_emit_state(struct cell_context *cell) #endif } + if (cell->dirty & (CELL_NEW_RASTERIZER)) { + struct cell_command_rasterizer *rast = + cell_batch_alloc(cell, sizeof(*rast)); + rast->opcode = CELL_CMD_STATE_RASTERIZER; + rast->rasterizer = *cell->rasterizer; + } + if (cell->dirty & (CELL_NEW_FS)) { /* Send new fragment program to SPUs */ struct cell_command_fragment_program *fp diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c index 578ddf62dc..aa63435b93 100644 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -214,7 +214,6 @@ cell_vbuf_draw(struct vbuf_render *vbr, render->opcode = CELL_CMD_RENDER; render->prim_type = cvbr->prim; - render->front_winding = cell->rasterizer->front_winding; render->num_indexes = nr_indices; render->min_index = min_index; diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 4febd5385b..d2c282a022 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -583,6 +583,14 @@ cmd_batch(uint opcode) case CELL_CMD_STATE_FS_CONSTANTS: pos = cmd_state_fs_constants(buffer, pos); break; + case CELL_CMD_STATE_RASTERIZER: + { + struct cell_command_rasterizer *rast = + (struct cell_command_rasterizer *) &buffer[pos]; + spu.rasterizer = rast->rasterizer; + pos += sizeof(*rast) / 8; + } + break; case CELL_CMD_STATE_SAMPLER: { struct cell_command_sampler *sampler diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index f87495b72d..4099e52699 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -153,6 +153,7 @@ struct spu_global struct pipe_blend_state blend; struct pipe_blend_color blend_color; struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; + struct pipe_rasterizer_state rasterizer; struct spu_texture texture[PIPE_MAX_SAMPLERS]; struct vertex_info vertex_info; diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index cfff19b6c0..75a7f75abc 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -279,7 +279,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) v1 = (const float *) (vertices + indexes[j+1] * vertex_size); v2 = (const float *) (vertices + indexes[j+2] * vertex_size); - drawn += tri_draw(v0, v1, v2, tx, ty, render->front_winding); + drawn += tri_draw(v0, v1, v2, tx, ty); } //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3); diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 2417db8960..1519b8cd7e 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -775,7 +775,7 @@ determinant(const float *v0, const float *v1, const float *v2) */ boolean tri_draw(const float *v0, const float *v1, const float *v2, - uint tx, uint ty, uint front_winding) + uint tx, uint ty) { setup.tx = tx; setup.ty = ty; @@ -790,7 +790,7 @@ tri_draw(const float *v0, const float *v1, const float *v2, * which will be needed for front/back-face stencil application */ float det = determinant(v0, v1, v2); - setup.facing = (det > 0.0) ^ (front_winding == PIPE_WINDING_CW); + setup.facing = (det > 0.0) ^ (spu.rasterizer.front_winding == PIPE_WINDING_CW); if (!setup_sort_vertices((struct vertex_header *) v0, (struct vertex_header *) v1, diff --git a/src/gallium/drivers/cell/spu/spu_tri.h b/src/gallium/drivers/cell/spu/spu_tri.h index abc3d35160..aa694dd7c9 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.h +++ b/src/gallium/drivers/cell/spu/spu_tri.h @@ -31,7 +31,7 @@ extern boolean -tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty, uint front_winding); +tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty); #endif /* SPU_TRI_H */ -- cgit v1.2.3 From 0116ee1d1c341726b6ed23c2dddc4515e8a34385 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Oct 2008 20:46:43 -0600 Subject: cell: start some performance measurements Use the spu_write_decrementer() and spu_read_decrementer() functions to measure time. Convert to milliseconds according to the system timebase value. --- src/gallium/drivers/cell/common.h | 1 + src/gallium/drivers/cell/ppu/cell_spu.c | 31 ++++++++++++++++++++++++++++++ src/gallium/drivers/cell/spu/spu_command.c | 15 +++++++++++++++ src/gallium/drivers/cell/spu/spu_render.c | 9 ++++++++- 4 files changed, 55 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/cell/common.h') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 0ff2c491fb..469d56cda8 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -299,6 +299,7 @@ struct cell_init_info unsigned id; unsigned num_spus; unsigned debug_flags; /**< mask of CELL_DEBUG_x flags */ + float inv_timebase; /**< 1.0/timebase, for perf measurement */ /** Buffers for command batches, vertex/index data */ ubyte *buffers[CELL_NUM_BUFFERS]; diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c index a6e268b362..28e5e6d706 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.c +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -52,6 +52,35 @@ helpful headers: struct cell_global_info cell_global; +/** + * Scan /proc/cpuinfo to determine the timebase for the system. + * This is used by the SPUs to convert 'decrementer' ticks to seconds. + * There may be a better way to get this value... + */ +static unsigned +get_timebase(void) +{ + FILE *f = fopen("/proc/cpuinfo", "r"); + unsigned timebase; + + assert(f); + while (!feof(f)) { + char line[80]; + fgets(line, sizeof(line), f); + if (strncmp(line, "timebase", 8) == 0) { + char *colon = strchr(line, ':'); + if (colon) { + timebase = atoi(colon + 2); + break; + } + } + } + fclose(f); + + return timebase; +} + + /** * Write a 1-word message to the given SPE mailbox. */ @@ -115,6 +144,7 @@ cell_start_spus(struct cell_context *cell) { static boolean one_time_init = FALSE; uint i, j; + uint timebase = get_timebase(); if (one_time_init) { fprintf(stderr, "PPU: Multiple rendering contexts not yet supported " @@ -138,6 +168,7 @@ cell_start_spus(struct cell_context *cell) cell_global.inits[i].id = i; cell_global.inits[i].num_spus = cell->num_spus; cell_global.inits[i].debug_flags = cell->debug_flags; + cell_global.inits[i].inv_timebase = 1000.0f / timebase; for (j = 0; j < CELL_NUM_BUFFERS; j++) { cell_global.inits[i].buffers[j] = cell->buffer[j]; diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index d2c282a022..57d265fef7 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -670,6 +670,8 @@ cmd_batch(uint opcode) } +#define PERF 0 + /** * Main loop for SPEs: Get a command, execute it, repeat. @@ -678,6 +680,7 @@ void command_loop(void) { int exitFlag = 0; + uint t0, t1; D_PRINTF(CELL_DEBUG_CMD, "Enter command loop\n"); @@ -686,10 +689,16 @@ command_loop(void) D_PRINTF(CELL_DEBUG_CMD, "Wait for cmd...\n"); + if (PERF) + spu_write_decrementer(~0); + /* read/wait from mailbox */ opcode = (unsigned int) spu_read_in_mbox(); D_PRINTF(CELL_DEBUG_CMD, "got cmd 0x%x\n", opcode); + if (PERF) + t0 = spu_read_decrementer(); + switch (opcode & CELL_CMD_OPCODE_MASK) { case CELL_CMD_EXIT: D_PRINTF(CELL_DEBUG_CMD, "EXIT\n"); @@ -707,6 +716,12 @@ command_loop(void) printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK); } + if (PERF) { + t1 = spu_read_decrementer(); + printf("wait mbox time: %gms batch time: %gms\n", + (~0u - t0) * spu.init.inv_timebase, + (t0 - t1) * spu.init.inv_timebase); + } } D_PRINTF(CELL_DEBUG_CMD, "Exit command loop\n"); diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 802455bf79..5515bb55c9 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -175,6 +175,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) const ubyte *vertices; const ushort *indexes; uint i, j; + uint num_tiles; D_PRINTF(CELL_DEBUG_CMD, "RENDER prim=%u num_vert=%u num_ind=%u inline_vert=%u\n", @@ -242,6 +243,8 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */ + num_tiles = 0; + /** ** loop over tiles, rendering tris **/ @@ -255,6 +258,8 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) if (!my_tile(tx, ty)) continue; + num_tiles++; + spu.cur_ctile_status = spu.ctile_status[ty][tx]; spu.cur_ztile_status = spu.ztile_status[ty][tx]; @@ -284,5 +289,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) spu.ztile_status[ty][tx] = spu.cur_ztile_status; } - D_PRINTF(CELL_DEBUG_CMD, "RENDER done\n"); + D_PRINTF(CELL_DEBUG_CMD, + "RENDER done (%u tiles hit)\n", + num_tiles); } -- cgit v1.2.3 From 9fa8671c73fa44a95e2ea7fed6047bddb042796f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 16 Oct 2008 20:25:28 -0600 Subject: cell: add new debug flag (cache) to report texture cache stats on exit --- src/gallium/drivers/cell/common.h | 1 + src/gallium/drivers/cell/ppu/cell_context.c | 1 + src/gallium/drivers/cell/spu/spu_command.c | 3 ++- src/gallium/drivers/cell/spu/spu_dcache.c | 4 +++- 4 files changed, 7 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/cell/common.h') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 469d56cda8..9ca4e9d67e 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -117,6 +117,7 @@ #define CELL_DEBUG_FRAGMENT_OPS (1 << 3) #define CELL_DEBUG_FRAGMENT_OP_FALLBACK (1 << 4) #define CELL_DEBUG_CMD (1 << 5) +#define CELL_DEBUG_CACHE (1 << 6) /** Max instructions for doing per-fragment operations */ #define SPU_MAX_FRAGMENT_OPS_INSTS 64 diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 4dad490ce1..7a2d93ecb4 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -94,6 +94,7 @@ static const struct debug_named_value cell_debug_flags[] = { {"fragops", CELL_DEBUG_FRAGMENT_OPS}, /**< SPUs emit fragment ops debug messages*/ {"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK}, /**< SPUs use reference implementation for fragment ops*/ {"cmd", CELL_DEBUG_CMD}, /**< SPUs dump command buffer info */ + {"cache", CELL_DEBUG_CACHE}, /**< report texture cache stats on exit */ {NULL, 0} }; diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index ff4a52d79a..9c853c0961 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -720,5 +720,6 @@ command_loop(void) D_PRINTF(CELL_DEBUG_CMD, "Exit command loop\n"); - spu_dcache_report(); + if (spu.init.debug_flags & CELL_DEBUG_CACHE) + spu_dcache_report(); } diff --git a/src/gallium/drivers/cell/spu/spu_dcache.c b/src/gallium/drivers/cell/spu/spu_dcache.c index 167404cdc5..a6d67634fd 100644 --- a/src/gallium/drivers/cell/spu/spu_dcache.c +++ b/src/gallium/drivers/cell/spu/spu_dcache.c @@ -36,7 +36,9 @@ #define CACHE_SET_TAGID(set) (((set) & 0x03) + TAG_DCACHE0) #define CACHE_LOG2NNWAY 2 #define CACHE_LOG2NSETS 6 -/*#define CACHE_STATS 1*/ +#ifdef DEBUG +#define CACHE_STATS 1 +#endif #include /* Yes folks, this is ugly. -- cgit v1.2.3 From 70dd4379d2cd54f229c3940312537912470218d3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 10:34:13 -0600 Subject: cell: implement fencing for texture buffers If we delete a texture, we need to keep the underlying tiled data buffer around until any rendering that references it has completed. Keep a list of buffers referenced by a rendering batch. Unref/free them when the associated batch's fence is executed/signalled. --- src/gallium/drivers/cell/common.h | 25 ++++ src/gallium/drivers/cell/ppu/Makefile | 1 + src/gallium/drivers/cell/ppu/cell_batch.c | 32 +++++ src/gallium/drivers/cell/ppu/cell_context.c | 6 + src/gallium/drivers/cell/ppu/cell_context.h | 21 ++++ src/gallium/drivers/cell/ppu/cell_fence.c | 158 +++++++++++++++++++++++++ src/gallium/drivers/cell/ppu/cell_fence.h | 57 +++++++++ src/gallium/drivers/cell/ppu/cell_state_emit.c | 2 +- src/gallium/drivers/cell/ppu/cell_texture.c | 33 ++++-- src/gallium/drivers/cell/ppu/cell_texture.h | 5 +- src/gallium/drivers/cell/ppu/cell_vbuf.c | 6 + src/gallium/drivers/cell/spu/spu_command.c | 38 +++++- src/gallium/drivers/cell/spu/spu_main.h | 2 +- 13 files changed, 367 insertions(+), 19 deletions(-) create mode 100644 src/gallium/drivers/cell/ppu/cell_fence.c create mode 100644 src/gallium/drivers/cell/ppu/cell_fence.h (limited to 'src/gallium/drivers/cell/common.h') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 9ca4e9d67e..23fb0b0831 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -102,6 +102,8 @@ #define CELL_CMD_STATE_RASTERIZER 22 #define CELL_CMD_VS_EXECUTE 23 #define CELL_CMD_FLUSH_BUFFER_RANGE 24 +#define CELL_CMD_FENCE 25 + /** Command/batch buffers */ #define CELL_NUM_BUFFERS 4 @@ -123,6 +125,29 @@ #define SPU_MAX_FRAGMENT_OPS_INSTS 64 + +#define CELL_FENCE_IDLE 0 +#define CELL_FENCE_EMITTED 1 +#define CELL_FENCE_SIGNALLED 2 + +struct cell_fence +{ + /** There's a 16-byte status qword per SPU */ + volatile uint status[CELL_MAX_SPUS][4]; +}; + + +/** + * Fence command sent to SPUs. In response, the SPUs will write + * CELL_FENCE_STATUS_SIGNALLED back to the fence status word in main memory. + */ +struct cell_command_fence +{ + uint64_t opcode; /**< CELL_CMD_FENCE */ + struct cell_fence *fence; +}; + + /** * Command to specify per-fragment operations state and generated code. * Note that the dsa, blend, blend_color fields are really only needed diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index b28f4c5c31..9358a47284 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -24,6 +24,7 @@ SOURCES = \ cell_clear.c \ cell_context.c \ cell_draw_arrays.c \ + cell_fence.c \ cell_flush.c \ cell_gen_fragment.c \ cell_gen_fp.c \ diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c index 01254aed60..448b723d85 100644 --- a/src/gallium/drivers/cell/ppu/cell_batch.c +++ b/src/gallium/drivers/cell/ppu/cell_batch.c @@ -28,6 +28,7 @@ #include "cell_context.h" #include "cell_batch.h" +#include "cell_fence.h" #include "cell_spu.h" @@ -63,6 +64,10 @@ cell_get_empty_buffer(struct cell_context *cell) printf("PPU: ALLOC BUFFER %u, %u tries\n", buf, tries); */ prev_buffer = buf; + + /* release tex buffer associated w/ prev use of this batch buf */ + cell_free_fenced_buffers(cell, &cell->fenced_buffers[buf]); + return buf; } } @@ -84,6 +89,26 @@ cell_get_empty_buffer(struct cell_context *cell) } +/** + * Append a fence command to the current batch buffer. + * Note that we're sure there's always room for this because of the + * adjusted size check in cell_batch_free_space(). + */ +static void +emit_fence(struct cell_context *cell) +{ + const uint batch = cell->cur_batch; + const uint size = cell->buffer_size[batch]; + struct cell_command_fence *fence_cmd; + + ASSERT(size + sizeof(struct cell_command_fence) <= CELL_BUFFER_SIZE); + + fence_cmd = (struct cell_command_fence *) (cell->buffer[batch] + size); + fence_cmd->opcode = CELL_CMD_FENCE; + fence_cmd->fence = &cell->fenced_buffers[batch].fence; +} + + /** * Flush the current batch buffer to the SPUs. * An empty buffer will be found and set as the new current batch buffer @@ -102,6 +127,12 @@ cell_batch_flush(struct cell_context *cell) if (size == 0) return; + /* Before we use this batch buffer, make sure any fenced texture buffers + * are released. + */ + if (cell->fenced_buffers[batch].head) + emit_fence(cell); + flushing = TRUE; assert(batch < CELL_NUM_BUFFERS); @@ -142,6 +173,7 @@ uint cell_batch_free_space(const struct cell_context *cell) { uint free = CELL_BUFFER_SIZE - cell->buffer_size[cell->cur_batch]; + free -= sizeof(struct cell_command_fence); return free; } diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 7a2d93ecb4..22d552d8e3 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -47,6 +47,7 @@ #include "cell_clear.h" #include "cell_context.h" #include "cell_draw_arrays.h" +#include "cell_fence.h" #include "cell_flush.h" #include "cell_state.h" #include "cell_surface.h" @@ -104,6 +105,7 @@ cell_create_context(struct pipe_screen *screen, struct cell_winsys *cws) { struct cell_context *cell; + uint i; /* some fields need to be 16-byte aligned, so align the whole object */ cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16); @@ -151,6 +153,10 @@ cell_create_context(struct pipe_screen *screen, cell_debug_flags, 0 ); + for (i = 0; i < CELL_NUM_BUFFERS; i++) + cell_fence_init(&cell->fenced_buffers[i].fence); + + /* * SPU stuff */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index ad1f4829a4..4491ae8cdf 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -81,6 +81,19 @@ struct cell_fragment_ops_key }; +struct cell_buffer_node; + +/** + * Fenced buffer list. List of buffers which can be unreferenced after + * the fence has been executed/signalled. + */ +struct cell_buffer_list +{ + struct cell_fence fence; + struct cell_buffer_node *head; +}; + + /** * Per-context state, subclass of pipe_context. */ @@ -154,6 +167,14 @@ struct cell_context uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB; + /** Associated with each command/batch buffer is a list of pipe_buffers + * that are fenced. When the last command in a buffer is executed, the + * fence will be signalled, indicating that any pipe_buffers preceeding + * that fence can be unreferenced (and probably freed). + */ + struct cell_buffer_list fenced_buffers[CELL_NUM_BUFFERS]; + + struct spe_function attrib_fetch; unsigned attrib_fetch_offsets[PIPE_MAX_ATTRIBS]; diff --git a/src/gallium/drivers/cell/ppu/cell_fence.c b/src/gallium/drivers/cell/ppu/cell_fence.c new file mode 100644 index 0000000000..ffb3bea12b --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_fence.c @@ -0,0 +1,158 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_fence.h" +#include "cell_texture.h" + + +void +cell_fence_init(struct cell_fence *fence) +{ + uint i; + for (i = 0; i < CELL_MAX_SPUS; i++) { + fence->status[i][0] = CELL_FENCE_IDLE; + } +} + + +boolean +cell_fence_signalled(const struct cell_context *cell, + const struct cell_fence *fence) +{ + uint i; + for (i = 0; i < cell->num_spus; i++) { + //ASSERT(fence->status[i][0] != CELL_FENCE_IDLE); + if (fence->status[i][0] == CELL_FENCE_EMITTED) + return FALSE; + } + return TRUE; +} + + +void +cell_fence_finish(const struct cell_context *cell, + const struct cell_fence *fence) +{ + while (!cell_fence_signalled(cell, fence)) { + usleep(10); + } +} + + + + +struct cell_buffer_node +{ + struct pipe_buffer *buffer; + struct cell_buffer_node *next; +}; + + +static void +cell_add_buffer_to_list(struct cell_context *cell, + struct cell_buffer_list *list, + struct pipe_buffer *buffer) +{ + struct pipe_screen *ps = cell->pipe.screen; + struct cell_buffer_node *node = CALLOC_STRUCT(cell_buffer_node); + /* create new list node which references the buffer, insert at head */ + if (node) { + pipe_buffer_reference(ps, &node->buffer, buffer); + node->next = list->head; + list->head = node; + } +} + + +/** + * Wait for completion of the given fence, then unreference any buffers + * on the list. + * This typically unrefs/frees texture buffers after any rendering which uses + * them has completed. + */ +void +cell_free_fenced_buffers(struct cell_context *cell, + struct cell_buffer_list *list) +{ + if (list->head) { + struct pipe_screen *ps = cell->pipe.screen; + struct cell_buffer_node *node; + + cell_fence_finish(cell, &list->fence); + + /* traverse the list, unreferencing buffers, freeing nodes */ + node = list->head; + while (node) { + struct cell_buffer_node *next = node->next; + assert(node->buffer); + pipe_buffer_unmap(ps, node->buffer); +#if 0 + printf("Unref buffer %p\n", node->buffer); + if (node->buffer->refcount == 1) + printf(" Delete!\n"); +#endif + pipe_buffer_reference(ps, &node->buffer, NULL); + FREE(node); + node = next; + } + list->head = NULL; + } +} + + +/** + * This should be called for each render command. + * Any texture buffers that are current bound will be added to a fenced + * list to be freed later when the fence is executed/signalled. + */ +void +cell_add_fenced_textures(struct cell_context *cell) +{ + struct cell_buffer_list *list = &cell->fenced_buffers[cell->cur_batch]; + uint i; + + for (i = 0; i < cell->num_textures; i++) { + struct cell_texture *ct = cell->texture[i]; + if (ct) { + uint level; + for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { + if (ct->tiled_buffer[level]) { +#if 0 + printf("Adding texture %p buffer %p to list\n", + ct, ct->tiled_buffer[level]); +#endif + cell_add_buffer_to_list(cell, list, ct->tiled_buffer[level]); + } + } + } + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_fence.h b/src/gallium/drivers/cell/ppu/cell_fence.h new file mode 100644 index 0000000000..536b4ba411 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_fence.h @@ -0,0 +1,57 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_FENCE_H +#define CELL_FENCE_H + + +extern void +cell_fence_init(struct cell_fence *fence); + + +extern boolean +cell_fence_signalled(const struct cell_context *cell, + const struct cell_fence *fence); + + +extern void +cell_fence_finish(const struct cell_context *cell, + const struct cell_fence *fence); + + + +extern void +cell_free_fenced_buffers(struct cell_context *cell, + struct cell_buffer_list *list); + + +extern void +cell_add_fenced_textures(struct cell_context *cell); + + +#endif /* CELL_FENCE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index effcd2a1e1..dd2d7f7d1e 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -225,7 +225,7 @@ cell_emit_state(struct cell_context *cell) if (cell->texture[i]) { uint level; for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { - texture->start[level] = cell->texture[i]->tiled_data[level]; + texture->start[level] = cell->texture[i]->tiled_mapped[level]; texture->width[level] = cell->texture[i]->base.width[level]; texture->height[level] = cell->texture[i]->base.height[level]; texture->depth[level] = cell->texture[i]->base.depth[level]; diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 9c6741f1bc..9ac2f3bbb9 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -136,6 +136,9 @@ cell_texture_release(struct pipe_screen *screen, __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); */ if (--(*pt)->refcount <= 0) { + /* Delete this texture now. + * But note that the underlying pipe_buffer may linger... + */ struct cell_texture *ct = cell_texture(*pt); uint i; @@ -146,14 +149,12 @@ cell_texture_release(struct pipe_screen *screen, pipe_buffer_reference(screen, &ct->buffer, NULL); for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { - if (ct->tiled_data[i]) { - /* XXX need to use a fenced buffer for tiled data so that - * it's properly freed after rendering has completed. - * Disabling this free() allows glDrawPixels to work for now. - */ -#if 0 - align_free(ct->tiled_data[i]); -#endif + /* Unreference the tiled image buffer. + * It may not actually be deleted until a fence is hit. + */ + if (ct->tiled_buffer[i]) { + ct->tiled_mapped[i] = NULL; + winsys_buffer_reference(screen->winsys, &ct->tiled_buffer[i], NULL); } } @@ -234,12 +235,18 @@ cell_twiddle_texture(struct pipe_screen *screen, int offset = bufWidth * bufHeight * 4 * surface->face; uint *dst; - if (!ct->tiled_data[level]) { - ct->tiled_data[level] = - align_malloc(bufWidth * bufHeight * 4 * numFaces, 16); + if (!ct->tiled_buffer[level]) { + /* allocate buffer for tiled data now */ + struct pipe_winsys *ws = screen->winsys; + uint bytes = bufWidth * bufHeight * 4 * numFaces; + ct->tiled_buffer[level] = ws->buffer_create(ws, 16, + PIPE_BUFFER_USAGE_PIXEL, + bytes); + /* and map it */ + ct->tiled_mapped[level] = ws->buffer_map(ws, ct->tiled_buffer[level], + PIPE_BUFFER_USAGE_GPU_READ); } - - dst = (uint *) ((ubyte *) ct->tiled_data[level] + offset); + dst = (uint *) ((ubyte *) ct->tiled_mapped[level] + offset); twiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst, surface->stride, src); diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h index a0757091b0..2f5fe0dd1b 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.h +++ b/src/gallium/drivers/cell/ppu/cell_texture.h @@ -48,7 +48,10 @@ struct cell_texture struct pipe_buffer *buffer; unsigned long buffer_size; - void *tiled_data[CELL_MAX_TEXTURE_LEVELS]; /* XXX this may be temporary */ /*ALIGN16*/ + /** Texture data in tiled layout is held here */ + struct pipe_buffer *tiled_buffer[CELL_MAX_TEXTURE_LEVELS]; + /** Mapped, tiled texture data */ + void *tiled_mapped[CELL_MAX_TEXTURE_LEVELS]; }; diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c index aa63435b93..65ba51b6bb 100644 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -38,6 +38,7 @@ #include "cell_batch.h" #include "cell_context.h" +#include "cell_fence.h" #include "cell_flush.h" #include "cell_spu.h" #include "cell_vbuf.h" @@ -108,6 +109,11 @@ cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, __FUNCTION__, cvbr->vertex_buf, vertices_used); */ + /* Make sure texture buffers aren't released until we're done rendering + * with them. + */ + cell_add_fenced_textures(cell); + /* Tell SPUs they can release the vert buf */ if (cvbr->vertex_buf != ~0U) { struct cell_command_release_verts *release diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 9c853c0961..a6ed29ea63 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -76,9 +76,10 @@ static void release_buffer(uint buffer) { /* Evidently, using less than a 16-byte status doesn't work reliably */ - static const uint status[4] ALIGN16_ATTRIB - = {CELL_BUFFER_STATUS_FREE, 0, 0, 0}; - + static const vector unsigned int status = {CELL_BUFFER_STATUS_FREE, + CELL_BUFFER_STATUS_FREE, + CELL_BUFFER_STATUS_FREE, + CELL_BUFFER_STATUS_FREE}; const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer); uint *dst = spu.init.buffer_status + index; @@ -93,6 +94,29 @@ release_buffer(uint buffer) } +/** + * Write CELL_FENCE_SIGNALLED back to the fence status qword in main memory. + * There's a qword of status per SPU. + */ +static void +cmd_fence(struct cell_command_fence *fence_cmd) +{ + static const vector unsigned int status = {CELL_FENCE_SIGNALLED, + CELL_FENCE_SIGNALLED, + CELL_FENCE_SIGNALLED, + CELL_FENCE_SIGNALLED}; + uint *dst = (uint *) fence_cmd->fence; + dst += 4 * spu.init.id; /* main store/memory address, not local store */ + + mfc_put((void *) &status, /* src in local memory */ + (unsigned int) dst, /* dst in main memory */ + sizeof(status), /* size */ + TAG_FENCE, /* tag */ + 0, /* tid */ + 0 /* rid */); +} + + static void cmd_clear_surface(const struct cell_command_clear_surface *clear) { @@ -637,6 +661,14 @@ cmd_batch(uint opcode) cmd_finish(); pos += 1; break; + case CELL_CMD_FENCE: + { + struct cell_command_fence *fence_cmd = + (struct cell_command_fence *) &buffer[pos]; + cmd_fence(fence_cmd); + pos += sizeof(*fence_cmd) / 8; + } + break; case CELL_CMD_RELEASE_VERTS: { struct cell_command_release_verts *release diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 95ef4c9244..668af10be2 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -210,7 +210,7 @@ extern struct spu_global spu; #define TAG_DCACHE1 21 #define TAG_DCACHE2 22 #define TAG_DCACHE3 23 - +#define TAG_FENCE 24 static INLINE void -- cgit v1.2.3