From 4e506f422a13b20fcc95edb6c7048a9de6e32efa Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Oct 2008 10:53:48 -0600 Subject: cell: fix/add some fallback blend cases --- src/gallium/drivers/cell/spu/spu_per_fragment_op.c | 49 ++++++++++++++++++++-- 1 file changed, 46 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers/cell/spu') diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c index d252fa6dc1..9404704abf 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -40,6 +40,24 @@ #define LINEAR_QUAD_LAYOUT 1 +static INLINE vector float +spu_min(vector float a, vector float b) +{ + vector unsigned int m; + m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */ + return spu_sel(a, b, m); +} + + +static INLINE vector float +spu_max(vector float a, vector float b) +{ + vector unsigned int m; + m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */ + return spu_sel(b, a, m); +} + + /** * Called by rasterizer for each quad after the shader has run. Do * all the per-fragment operations including alpha test, z test, @@ -293,9 +311,9 @@ spu_fallback_fragment_ops(uint x, uint y, */ switch (spu.blend.rgb_dst_factor) { case PIPE_BLENDFACTOR_ONE: - term2r = fragR; - term2g = fragG; - term2b = fragB; + term2r = fbRGBA[0]; + term2g = fbRGBA[1]; + term2b = fbRGBA[2]; break; case PIPE_BLENDFACTOR_ZERO: term2r = @@ -361,8 +379,24 @@ spu_fallback_fragment_ops(uint x, uint y, fragG = spu_sub(term1g, term2g); fragB = spu_sub(term1b, term2b); break; + case PIPE_BLEND_REVERSE_SUBTRACT: + fragR = spu_sub(term2r, term1r); + fragG = spu_sub(term2g, term1g); + fragB = spu_sub(term2b, term1b); + break; + case PIPE_BLEND_MIN: + fragR = spu_min(term1r, term2r); + fragG = spu_min(term1g, term2g); + fragB = spu_min(term1b, term2b); + break; + case PIPE_BLEND_MAX: + fragR = spu_max(term1r, term2r); + fragG = spu_max(term1g, term2g); + fragB = spu_max(term1b, term2b); + break; /* XXX more cases */ default: + printf("unsup 0x%x\n", spu.blend.rgb_func); ASSERT(0); } @@ -376,6 +410,15 @@ spu_fallback_fragment_ops(uint x, uint y, case PIPE_BLEND_SUBTRACT: fragA = spu_sub(term1a, term2a); break; + case PIPE_BLEND_REVERSE_SUBTRACT: + fragA = spu_sub(term2a, term1a); + break; + case PIPE_BLEND_MIN: + fragA = spu_min(term1a, term2a); + break; + case PIPE_BLEND_MAX: + fragA = spu_max(term1a, term2a); + break; /* XXX more cases */ default: ASSERT(0); -- cgit v1.2.3 From f60c756ed14f25731ff2a52d6b695ceb5b7a6f6b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Oct 2008 10:54:06 -0600 Subject: cell: additional debug --- src/gallium/drivers/cell/spu/spu_command.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers/cell/spu') diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index c28677ebf8..a07b312111 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -250,6 +250,7 @@ cmd_state_fs_constants(const uint64_t *buffer, uint pos) /* Expand each float to float[4] for SOA execution */ for (i = 0; i < num_const; i++) { + DEBUG_PRINTF(" const[%u] = %f\n", i, constants[i]); spu.constants[i] = spu_splats(constants[i]); } -- cgit v1.2.3 From 9382a7100fd6de6e615dc661ed813bf43e24ec15 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Oct 2008 10:54:36 -0600 Subject: cell: updated vertex dump/debug code --- src/gallium/drivers/cell/spu/spu_tri.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers/cell/spu') diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 03f094373d..2417db8960 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -404,11 +404,14 @@ flush_spans(void) static void print_vertex(const struct vertex_header *v) { - int i; - fprintf(stderr, "Vertex: (%p)\n", v); - for (i = 0; i < setup.quad.nr_attrs; i++) { - fprintf(stderr, " %d: %f %f %f %f\n", i, - v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]); + uint i; + fprintf(stderr, " Vertex: (%p)\n", v); + for (i = 0; i < spu.vertex_info.num_attribs; i++) { + fprintf(stderr, " %d: %f %f %f %f\n", i, + spu_extract(v->data[i], 0), + spu_extract(v->data[i], 1), + spu_extract(v->data[i], 2), + spu_extract(v->data[i], 3)); } } #endif @@ -420,10 +423,12 @@ setup_sort_vertices(const struct vertex_header *v0, const struct vertex_header *v2) { #if DEBUG_VERTS - fprintf(stderr, "Triangle:\n"); - print_vertex(v0); - print_vertex(v1); - print_vertex(v2); + if (spu.init.id==0) { + fprintf(stderr, "SPU %u: Triangle:\n", spu.init.id); + print_vertex(v0); + print_vertex(v1); + print_vertex(v2); + } #endif setup.vprovoke = v2; -- cgit v1.2.3 From 53951531ae7bfd64afae1ae55aac7f6ebd3fe4f5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Oct 2008 12:35:51 -0600 Subject: cell: propogate blend color to SPUs for the fallback fragment ops code --- src/gallium/drivers/cell/common.h | 4 ++ src/gallium/drivers/cell/ppu/cell_context.h | 1 + src/gallium/drivers/cell/ppu/cell_state_emit.c | 1 + src/gallium/drivers/cell/spu/spu_command.c | 1 + src/gallium/drivers/cell/spu/spu_main.h | 1 + src/gallium/drivers/cell/spu/spu_per_fragment_op.c | 75 +++++++++++++++++++--- 6 files changed, 74 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers/cell/spu') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index b0169b8e32..3b5a25e165 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -118,12 +118,16 @@ /** * Command to specify per-fragment operations state and generated code. + * Note that the dsa, blend, blend_color fields are really only needed + * for the fallback/C per-pixel code. They're not used when we generate + * dynamic SPU fragment code (which is the normal case). */ struct cell_command_fragment_ops { uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */ struct pipe_depth_stencil_alpha_state dsa; struct pipe_blend_state blend; + struct pipe_blend_color blend_color; unsigned code[SPU_MAX_FRAGMENT_OPS_INSTS]; }; diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 80a9b3d7e1..1fcf03c2b8 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -74,6 +74,7 @@ struct cell_fragment_shader_state struct cell_fragment_ops_key { struct pipe_blend_state blend; + struct pipe_blend_color blend_color; struct pipe_depth_stencil_alpha_state dsa; enum pipe_format color_format; enum pipe_format zs_format; diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index bb694aa107..d2427584ba 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -52,6 +52,7 @@ lookup_fragment_ops(struct cell_context *cell) */ memset(&key, 0, sizeof(key)); key.blend = *cell->blend; + key.blend_color = cell->blend_color; key.dsa = *cell->depth_stencil; if (cell->framebuffer.cbufs[0]) diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index a07b312111..b521c3aecf 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -195,6 +195,7 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) /* Copy state info (for fallback case only) */ memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa)); memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); + memcpy(&spu.blend_color, &fops->blend_color, sizeof(fops->blend_color)); /* Parity twist! For now, always use the fallback code by default, * only switching to codegen when specifically requested. This diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index eff43b870c..ca72baea8b 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -145,6 +145,7 @@ struct spu_global struct spu_framebuffer fb; struct pipe_depth_stencil_alpha_state depth_stencil_alpha; struct pipe_blend_state blend; + struct pipe_blend_color blend_color; struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; struct spu_texture texture[PIPE_MAX_SAMPLERS]; struct vertex_info vertex_info; diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c index 9404704abf..f8ffc70492 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -260,7 +260,7 @@ spu_fallback_fragment_ops(uint x, uint y, } /* - * Compute Src RGB terms + * Compute Src RGB terms (fragment color * factor) */ switch (spu.blend.rgb_src_factor) { case PIPE_BLENDFACTOR_ONE: @@ -283,13 +283,33 @@ spu_fallback_fragment_ops(uint x, uint y, term1g = spu_mul(fragG, fragA); term1b = spu_mul(fragB, fragA); break; + case PIPE_BLENDFACTOR_DST_COLOR: + term1r = spu_mul(fragR, fbRGBA[0]); + term1g = spu_mul(fragG, fbRGBA[1]); + term1b = spu_mul(fragB, fbRGBA[1]); + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + term1r = spu_mul(fragR, fbRGBA[3]); + term1g = spu_mul(fragG, fbRGBA[3]); + term1b = spu_mul(fragB, fbRGBA[3]); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[0])); + term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[1])); + term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[2])); + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[3])); + term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[3])); + term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[3])); + break; /* XXX more cases */ default: ASSERT(0); } /* - * Compute Src Alpha term + * Compute Src Alpha term (fragment alpha * factor) */ switch (spu.blend.alpha_src_factor) { case PIPE_BLENDFACTOR_ONE: @@ -301,13 +321,23 @@ spu_fallback_fragment_ops(uint x, uint y, case PIPE_BLENDFACTOR_SRC_ALPHA: term1a = spu_mul(fragA, fragA); break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + term1a = spu_mul(fragA, fbRGBA[3]); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + term1a = spu_mul(fragR, spu_splats(spu.blend_color.color[3])); + break; /* XXX more cases */ default: ASSERT(0); } /* - * Compute Dest RGB terms + * Compute Dest RGB terms (framebuffer color * factor) */ switch (spu.blend.rgb_dst_factor) { case PIPE_BLENDFACTOR_ONE: @@ -337,17 +367,37 @@ spu_fallback_fragment_ops(uint x, uint y, term2g = spu_mul(fbRGBA[1], tmp); term2b = spu_mul(fbRGBA[2], tmp); break; - /* XXX more cases */ + case PIPE_BLENDFACTOR_DST_COLOR: + term2r = spu_mul(fbRGBA[0], fbRGBA[0]); + term2g = spu_mul(fbRGBA[1], fbRGBA[1]); + term2b = spu_mul(fbRGBA[2], fbRGBA[2]); + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + term2r = spu_mul(fbRGBA[0], fbRGBA[3]); + term2g = spu_mul(fbRGBA[1], fbRGBA[3]); + term2b = spu_mul(fbRGBA[2], fbRGBA[3]); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[0])); + term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[1])); + term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[2])); + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[3])); + term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[3])); + term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[3])); + break; + /* XXX more cases */ default: ASSERT(0); } /* - * Compute Dest Alpha term + * Compute Dest Alpha term (framebuffer alpha * factor) */ switch (spu.blend.alpha_dst_factor) { case PIPE_BLENDFACTOR_ONE: - term2a = fragA; + term2a = fbRGBA[3]; break; case PIPE_BLENDFACTOR_SRC_COLOR: term2a = spu_splats(0.0f); @@ -360,6 +410,16 @@ spu_fallback_fragment_ops(uint x, uint y, tmp = spu_sub(one, fragA); term2a = spu_mul(fbRGBA[3], tmp); break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + term2a = spu_mul(fbRGBA[3], fbRGBA[3]); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + term2a = spu_mul(fbRGBA[3], spu_splats(spu.blend_color.color[3])); + break; /* XXX more cases */ default: ASSERT(0); @@ -394,9 +454,7 @@ spu_fallback_fragment_ops(uint x, uint y, fragG = spu_max(term1g, term2g); fragB = spu_max(term1b, term2b); break; - /* XXX more cases */ default: - printf("unsup 0x%x\n", spu.blend.rgb_func); ASSERT(0); } @@ -419,7 +477,6 @@ spu_fallback_fragment_ops(uint x, uint y, case PIPE_BLEND_MAX: fragA = spu_max(term1a, term2a); break; - /* XXX more cases */ default: ASSERT(0); } -- cgit v1.2.3 From ddeec1ed10d6c12403fe8d30c072ea68f044db99 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Oct 2008 13:55:18 -0600 Subject: cell: simplify spu debug code --- src/gallium/drivers/cell/common.h | 1 + src/gallium/drivers/cell/ppu/cell_context.c | 1 + src/gallium/drivers/cell/spu/spu_command.c | 47 +++++++++++++---------------- src/gallium/drivers/cell/spu/spu_debug.h | 9 ------ src/gallium/drivers/cell/spu/spu_main.c | 9 +----- src/gallium/drivers/cell/spu/spu_main.h | 15 +++++++-- src/gallium/drivers/cell/spu/spu_render.c | 7 +++-- 7 files changed, 41 insertions(+), 48 deletions(-) (limited to 'src/gallium/drivers/cell/spu') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 3b5a25e165..8ae78265f2 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -111,6 +111,7 @@ #define CELL_DEBUG_SYNC (1 << 2) #define CELL_DEBUG_FRAGMENT_OPS (1 << 3) #define CELL_DEBUG_FRAGMENT_OP_FALLBACK (1 << 4) +#define CELL_DEBUG_CMD (1 << 5) /** Max instructions for doing per-fragment operations */ #define SPU_MAX_FRAGMENT_OPS_INSTS 64 diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index b66aa9c9d9..f8d5eef3ac 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -93,6 +93,7 @@ static const struct debug_named_value cell_debug_flags[] = { {"sync", CELL_DEBUG_SYNC}, /**< SPUs do synchronous DMA */ {"fragops", CELL_DEBUG_FRAGMENT_OPS}, /**< SPUs emit fragment ops debug messages*/ {"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK}, /**< SPUs use reference implementation for fragment ops*/ + {"cmd", CELL_DEBUG_CMD}, /**< SPUs dump command buffer info */ {NULL, 0} }; diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index b521c3aecf..ebbed3d1dc 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -44,7 +44,6 @@ #include "spu_tile.h" #include "spu_vertex_shader.h" #include "spu_dcache.h" -#include "spu_debug.h" #include "cell/common.h" @@ -97,7 +96,7 @@ release_buffer(uint buffer) static void cmd_clear_surface(const struct cell_command_clear_surface *clear) { - DEBUG_PRINTF("CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value); + D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value); if (clear->surface == 0) { spu.fb.color_clear_value = clear->value; @@ -165,14 +164,14 @@ cmd_clear_surface(const struct cell_command_clear_surface *clear) #endif /* CLEAR_OPT */ - DEBUG_PRINTF("CLEAR SURF done\n"); + D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF done\n"); } static void cmd_release_verts(const struct cell_command_release_verts *release) { - DEBUG_PRINTF("RELEASE VERTS %u\n", release->vertex_buf); + D_PRINTF(CELL_DEBUG_CMD, "RELEASE VERTS %u\n", release->vertex_buf); ASSERT(release->vertex_buf != ~0U); release_buffer(release->vertex_buf); } @@ -189,7 +188,7 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) { static int warned = 0; - DEBUG_PRINTF("CMD_STATE_FRAGMENT_OPS\n"); + D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n"); /* Copy SPU code from batch buffer to spu buffer */ memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4); /* Copy state info (for fallback case only) */ @@ -229,7 +228,7 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) static void cmd_state_fragment_program(const struct cell_command_fragment_program *fp) { - DEBUG_PRINTF("CMD_STATE_FRAGMENT_PROGRAM\n"); + D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_PROGRAM\n"); /* Copy SPU code from batch buffer to spu buffer */ memcpy(spu.fragment_program_code, fp->code, SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4); @@ -247,11 +246,11 @@ cmd_state_fs_constants(const uint64_t *buffer, uint pos) const float *constants = (const float *) &buffer[pos + 2]; uint i; - DEBUG_PRINTF("CMD_STATE_FS_CONSTANTS (%u)\n", num_const); + D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FS_CONSTANTS (%u)\n", num_const); /* Expand each float to float[4] for SOA execution */ for (i = 0; i < num_const; i++) { - DEBUG_PRINTF(" const[%u] = %f\n", i, constants[i]); + D_PRINTF(CELL_DEBUG_CMD, " const[%u] = %f\n", i, constants[i]); spu.constants[i] = spu_splats(constants[i]); } @@ -263,7 +262,7 @@ cmd_state_fs_constants(const uint64_t *buffer, uint pos) static void cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) { - DEBUG_PRINTF("FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", + D_PRINTF(CELL_DEBUG_CMD, "FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", cmd->width, cmd->height, cmd->color_start, @@ -352,7 +351,7 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) { uint unit = sampler->unit; - DEBUG_PRINTF("SAMPLER [%u]\n", unit); + D_PRINTF(CELL_DEBUG_CMD, "SAMPLER [%u]\n", unit); spu.sampler[unit] = sampler->state; @@ -404,9 +403,7 @@ cmd_state_texture(const struct cell_command_texture *texture) const uint unit = texture->unit; uint i; - //if (spu.init.id==0) Debug=1; - - DEBUG_PRINTF("TEXTURE [%u]\n", texture->unit); + D_PRINTF(CELL_DEBUG_CMD, "TEXTURE [%u]\n", texture->unit); spu.texture[unit].max_level = 0; spu.texture[unit].target = texture->target; @@ -416,7 +413,7 @@ cmd_state_texture(const struct cell_command_texture *texture) uint height = texture->height[i]; uint depth = texture->depth[i]; - DEBUG_PRINTF(" LEVEL %u: at %p size[0] %u x %u\n", i, + D_PRINTF(CELL_DEBUG_CMD, " LEVEL %u: at %p size[0] %u x %u\n", i, texture->start[i], texture->width[i], texture->height[i]); spu.texture[unit].level[i].start = texture->start[i]; @@ -438,15 +435,13 @@ cmd_state_texture(const struct cell_command_texture *texture) } update_tex_masks(&spu.texture[unit], &spu.sampler[unit], unit); - - //Debug=0; } static void cmd_state_vertex_info(const struct vertex_info *vinfo) { - DEBUG_PRINTF("VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs); + D_PRINTF(CELL_DEBUG_CMD, "VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs); ASSERT(vinfo->num_attribs >= 1); ASSERT(vinfo->num_attribs <= 8); memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo)); @@ -485,7 +480,7 @@ cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code) static void cmd_finish(void) { - DEBUG_PRINTF("FINISH\n"); + D_PRINTF(CELL_DEBUG_CMD, "FINISH\n"); really_clear_tiles(0); /* wait for all outstanding DMAs to finish */ mfc_write_tag_mask(~0); @@ -510,7 +505,7 @@ cmd_batch(uint opcode) const unsigned usize = size / sizeof(buffer[0]); uint pos; - DEBUG_PRINTF("BATCH buffer %u, len %u, from %p\n", + D_PRINTF(CELL_DEBUG_CMD, "BATCH buffer %u, len %u, from %p\n", buf, size, spu.init.buffers[buf]); ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH); @@ -530,7 +525,7 @@ cmd_batch(uint opcode) wait_on_mask(1 << TAG_BATCH_BUFFER); /* Tell PPU we're done copying the buffer to local store */ - DEBUG_PRINTF("release batch buf %u\n", buf); + D_PRINTF(CELL_DEBUG_CMD, "release batch buf %u\n", buf); release_buffer(buf); /* @@ -663,7 +658,7 @@ cmd_batch(uint opcode) } } - DEBUG_PRINTF("BATCH complete\n"); + D_PRINTF(CELL_DEBUG_CMD, "BATCH complete\n"); } @@ -677,7 +672,7 @@ command_loop(void) struct cell_command cmd; int exitFlag = 0; - DEBUG_PRINTF("Enter command loop\n"); + D_PRINTF(CELL_DEBUG_CMD, "Enter command loop\n"); ASSERT((sizeof(struct cell_command) & 0xf) == 0); ASSERT_ALIGN16(&cmd); @@ -686,12 +681,12 @@ command_loop(void) unsigned opcode; int tag = 0; - DEBUG_PRINTF("Wait for cmd...\n"); + D_PRINTF(CELL_DEBUG_CMD, "Wait for cmd...\n"); /* read/wait from mailbox */ opcode = (unsigned int) spu_read_in_mbox(); - DEBUG_PRINTF("got cmd 0x%x\n", opcode); + D_PRINTF(CELL_DEBUG_CMD, "got cmd 0x%x\n", opcode); /* command payload */ mfc_get(&cmd, /* dest */ @@ -708,7 +703,7 @@ command_loop(void) switch (opcode & CELL_CMD_OPCODE_MASK) { case CELL_CMD_EXIT: - DEBUG_PRINTF("EXIT\n"); + D_PRINTF(CELL_DEBUG_CMD, "EXIT\n"); exitFlag = 1; break; case CELL_CMD_VS_EXECUTE: @@ -725,7 +720,7 @@ command_loop(void) } - DEBUG_PRINTF("Exit command loop\n"); + D_PRINTF(CELL_DEBUG_CMD, "Exit command loop\n"); spu_dcache_report(); } diff --git a/src/gallium/drivers/cell/spu/spu_debug.h b/src/gallium/drivers/cell/spu/spu_debug.h index eeec052655..25653dcdcd 100644 --- a/src/gallium/drivers/cell/spu/spu_debug.h +++ b/src/gallium/drivers/cell/spu/spu_debug.h @@ -30,28 +30,19 @@ #define SPU_DEBUG_H -/* Set to 0 to disable all extraneous debugging code */ -#define DEBUG 1 - #if DEBUG -extern boolean Debug; -extern boolean force_fragment_ops_fallback; /* These debug macros use the unusual construction ", ##__VA_ARGS__" * which expands to the expected comma + args if variadic arguments * are supplied, but swallows the comma if there are no variadic * arguments (which avoids syntax errors that would otherwise occur). */ -#define DEBUG_PRINTF(format,...) \ - if (Debug) \ - printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) #define D_PRINTF(flag, format,...) \ if (spu.init.debug_flags & (flag)) \ printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) #else -#define DEBUG_PRINTF(...) #define D_PRINTF(...) #endif diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 4becd0f92a..c8bb251905 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -40,7 +40,6 @@ #include "spu_per_fragment_op.h" #include "spu_texture.h" //#include "spu_test.h" -#include "spu_debug.h" #include "cell/common.h" @@ -53,12 +52,6 @@ helpful headers: struct spu_global spu; -#if DEBUG -boolean Debug = FALSE; -boolean force_fragment_ops_fallback = TRUE; -#endif - - static void one_time_init(void) { @@ -102,7 +95,7 @@ main(main_param_t speid, main_param_t argp) one_time_init(); - DEBUG_PRINTF("main() speid=%lu\n", (unsigned long) speid); + D_PRINTF(CELL_DEBUG_CMD, "main() speid=%lu\n", (unsigned long) speid); D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n"); /* get initialization data */ diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index ca72baea8b..569b9e45d4 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -36,6 +36,19 @@ #include "pipe/p_state.h" +#if DEBUG +/* These debug macros use the unusual construction ", ##__VA_ARGS__" + * which expands to the expected comma + args if variadic arguments + * are supplied, but swallows the comma if there are no variadic + * arguments (which avoids syntax errors that would otherwise occur). + */ +#define D_PRINTF(flag, format,...) \ + if (spu.init.debug_flags & (flag)) \ + printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) +#else +#define D_PRINTF(...) +#endif + #define MAX_WIDTH 1024 #define MAX_HEIGHT 1024 @@ -187,8 +200,6 @@ struct spu_global extern struct spu_global spu; -extern boolean Debug; - diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 82dbeb26b7..cfff19b6c0 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -177,7 +177,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) uint i, j; - if (Debug) { +#if 0 printf("SPU %u: RENDER prim %u, num_vert=%u num_ind=%u " "inline_vert=%u\n", spu.init.id, @@ -190,7 +190,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) printf(" bound: %g, %g .. %g, %g\n", render->xmin, render->ymin, render->xmax, render->ymax); */ - } +#endif ASSERT(sizeof(*render) % 4 == 0); ASSERT(total_vertex_bytes % 16 == 0); @@ -293,7 +293,8 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) spu.ztile_status[ty][tx] = spu.cur_ztile_status; } - if (Debug) +#if 0 printf("SPU %u: RENDER done\n", spu.init.id); +#endif } -- cgit v1.2.3 From 708f046c215d070e82f40eee895a8d312b1a64c7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Oct 2008 13:56:00 -0600 Subject: cell: remove obsolete spu_debug.h file --- src/gallium/drivers/cell/spu/spu_debug.h | 51 -------------------------------- 1 file changed, 51 deletions(-) delete mode 100644 src/gallium/drivers/cell/spu/spu_debug.h (limited to 'src/gallium/drivers/cell/spu') diff --git a/src/gallium/drivers/cell/spu/spu_debug.h b/src/gallium/drivers/cell/spu/spu_debug.h deleted file mode 100644 index 25653dcdcd..0000000000 --- a/src/gallium/drivers/cell/spu/spu_debug.h +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef SPU_DEBUG_H -#define SPU_DEBUG_H - - -#if DEBUG - -/* These debug macros use the unusual construction ", ##__VA_ARGS__" - * which expands to the expected comma + args if variadic arguments - * are supplied, but swallows the comma if there are no variadic - * arguments (which avoids syntax errors that would otherwise occur). - */ -#define D_PRINTF(flag, format,...) \ - if (spu.init.debug_flags & (flag)) \ - printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) - -#else - -#define D_PRINTF(...) - -#endif - - -#endif /* SPU_DEBUG_H */ -- cgit v1.2.3 From 79e96b3a77f7d5c7136b380abcc675c7242d0ffe Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Oct 2008 13:58:58 -0600 Subject: cell: move some CELL_MAX constants --- src/gallium/drivers/cell/common.h | 6 +++++- src/gallium/drivers/cell/spu/spu_main.h | 11 ++--------- 2 files changed, 7 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers/cell/spu') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 8ae78265f2..d716a26175 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -68,6 +68,9 @@ #define CELL_MAX_SAMPLERS 4 #define CELL_MAX_TEXTURE_LEVELS 12 /* 2k x 2k */ +#define CELL_MAX_CONSTANTS 32 /**< number of float[4] constants */ +#define CELL_MAX_WIDTH 1024 /**< max framebuffer width */ +#define CELL_MAX_HEIGHT 1024 /**< max framebuffer width */ #define TILE_SIZE 32 @@ -99,13 +102,14 @@ #define CELL_CMD_VS_EXECUTE 22 #define CELL_CMD_FLUSH_BUFFER_RANGE 23 - +/** Command/batch buffers */ #define CELL_NUM_BUFFERS 4 #define CELL_BUFFER_SIZE (4*1024) /**< 16KB would be the max */ #define CELL_BUFFER_STATUS_FREE 10 #define CELL_BUFFER_STATUS_USED 20 +/** Debug flags */ #define CELL_DEBUG_CHECKER (1 << 0) #define CELL_DEBUG_ASM (1 << 1) #define CELL_DEBUG_SYNC (1 << 2) diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 569b9e45d4..f87495b72d 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -50,13 +50,6 @@ #endif -#define MAX_WIDTH 1024 -#define MAX_HEIGHT 1024 - - -#define CELL_MAX_CONSTANTS 32 /**< number of float[4] constants */ - - /** * A tile is basically a TILE_SIZE x TILE_SIZE block of 4-byte pixels. * The data may be addressed through several different types. @@ -175,8 +168,8 @@ struct spu_global ubyte cur_ctile_status, cur_ztile_status; /** Status of all tiles in framebuffer */ - ubyte ctile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; - ubyte ztile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; /** Current fragment ops machine code, at 8-byte boundary */ uint fragment_ops_code[SPU_MAX_FRAGMENT_OPS_INSTS] ALIGN8_ATTRIB; -- cgit v1.2.3 From 0eb0b0a816764a323af7a8d2b5cb6792f886ce04 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Oct 2008 14:12:55 -0600 Subject: cell: remove some old, pre-batchbuffer stuff --- src/gallium/drivers/cell/common.h | 14 -------------- src/gallium/drivers/cell/ppu/cell_spu.c | 5 +---- src/gallium/drivers/cell/ppu/cell_spu.h | 3 +-- src/gallium/drivers/cell/spu/spu_command.c | 19 ------------------- 4 files changed, 2 insertions(+), 39 deletions(-) (limited to 'src/gallium/drivers/cell/spu') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index d716a26175..600f1b37a2 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -269,19 +269,6 @@ struct cell_command_texture }; -/** XXX unions don't seem to work */ -/* XXX this should go away; all commands should be placed in batch buffers */ -struct cell_command -{ -#if 0 - struct cell_command_framebuffer fb; - struct cell_command_clear_surface clear; - struct cell_command_render render; -#endif - struct cell_command_vs vs; -} ALIGN16_ATTRIB; - - #define MAX_SPU_FUNCTIONS 12 /** * Used to tell the PPU about the address of particular functions in the @@ -302,7 +289,6 @@ struct cell_init_info unsigned id; unsigned num_spus; unsigned debug_flags; /**< mask of CELL_DEBUG_x flags */ - struct cell_command *cmd; /** Buffers for command batches, vertex/index data */ ubyte *buffers[CELL_NUM_BUFFERS]; diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c index df020c4146..90745da3d2 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.c +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -126,9 +126,6 @@ cell_start_spus(struct cell_context *cell) assert(cell->num_spus <= MAX_SPUS); - ASSERT_ALIGN16(&cell_global.command[0]); - ASSERT_ALIGN16(&cell_global.command[1]); - ASSERT_ALIGN16(&cell_global.inits[0]); ASSERT_ALIGN16(&cell_global.inits[1]); @@ -141,7 +138,7 @@ cell_start_spus(struct cell_context *cell) cell_global.inits[i].id = i; cell_global.inits[i].num_spus = cell->num_spus; cell_global.inits[i].debug_flags = cell->debug_flags; - cell_global.inits[i].cmd = &cell_global.command[i]; + for (j = 0; j < CELL_NUM_BUFFERS; j++) { cell_global.inits[i].buffers[j] = cell->buffer[j]; } diff --git a/src/gallium/drivers/cell/ppu/cell_spu.h b/src/gallium/drivers/cell/ppu/cell_spu.h index 137f26612e..3443331b01 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.h +++ b/src/gallium/drivers/cell/ppu/cell_spu.h @@ -50,10 +50,9 @@ struct cell_global_info pthread_t spe_threads[MAX_SPUS]; /** - * Data sent to SPUs + * Data sent to SPUs at start-up */ struct cell_init_info inits[MAX_SPUS]; - struct cell_command command[MAX_SPUS]; }; diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index ebbed3d1dc..4febd5385b 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -669,38 +669,19 @@ cmd_batch(uint opcode) void command_loop(void) { - struct cell_command cmd; int exitFlag = 0; D_PRINTF(CELL_DEBUG_CMD, "Enter command loop\n"); - ASSERT((sizeof(struct cell_command) & 0xf) == 0); - ASSERT_ALIGN16(&cmd); - while (!exitFlag) { unsigned opcode; - int tag = 0; D_PRINTF(CELL_DEBUG_CMD, "Wait for cmd...\n"); /* read/wait from mailbox */ opcode = (unsigned int) spu_read_in_mbox(); - D_PRINTF(CELL_DEBUG_CMD, "got cmd 0x%x\n", opcode); - /* command payload */ - mfc_get(&cmd, /* dest */ - (unsigned int) spu.init.cmd, /* src */ - sizeof(struct cell_command), /* bytes */ - tag, - 0, /* tid */ - 0 /* rid */); - wait_on_mask( 1 << tag ); - - /* - * NOTE: most commands should be contained in a batch buffer - */ - switch (opcode & CELL_CMD_OPCODE_MASK) { case CELL_CMD_EXIT: D_PRINTF(CELL_DEBUG_CMD, "EXIT\n"); -- cgit v1.2.3 From ec7d6c656178babdf143faa242f7a3df9d0bc22c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Oct 2008 14:39:16 -0600 Subject: cell: send rasterizer state to SPUs in proper way, remove front_winding hack --- src/gallium/drivers/cell/common.h | 18 ++++++++++++++---- src/gallium/drivers/cell/ppu/cell_state_emit.c | 7 +++++++ src/gallium/drivers/cell/ppu/cell_vbuf.c | 1 - src/gallium/drivers/cell/spu/spu_command.c | 8 ++++++++ src/gallium/drivers/cell/spu/spu_main.h | 1 + src/gallium/drivers/cell/spu/spu_render.c | 2 +- src/gallium/drivers/cell/spu/spu_tri.c | 4 ++-- src/gallium/drivers/cell/spu/spu_tri.h | 2 +- 8 files changed, 34 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers/cell/spu') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 1f6f2d494b..0ff2c491fb 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -99,8 +99,9 @@ #define CELL_CMD_STATE_FRAGMENT_PROGRAM 19 #define CELL_CMD_STATE_ATTRIB_FETCH 20 #define CELL_CMD_STATE_FS_CONSTANTS 21 -#define CELL_CMD_VS_EXECUTE 22 -#define CELL_CMD_FLUSH_BUFFER_RANGE 23 +#define CELL_CMD_STATE_RASTERIZER 22 +#define CELL_CMD_VS_EXECUTE 23 +#define CELL_CMD_FLUSH_BUFFER_RANGE 24 /** Command/batch buffers */ #define CELL_NUM_BUFFERS 4 @@ -156,13 +157,23 @@ struct cell_command_fragment_program */ struct cell_command_framebuffer { - uint64_t opcode; /**< CELL_CMD_FRAMEBUFFER */ + uint64_t opcode; /**< CELL_CMD_STATE_FRAMEBUFFER */ int width, height; void *color_start, *depth_start; enum pipe_format color_format, depth_format; }; +/** + * Tell SPUs about rasterizer state. + */ +struct cell_command_rasterizer +{ + uint64_t opcode; /**< CELL_CMD_STATE_RASTERIZER */ + struct pipe_rasterizer_state rasterizer; +}; + + /** * Clear framebuffer to the given value/color. */ @@ -238,7 +249,6 @@ struct cell_command_render float xmin, ymin, xmax, ymax; /* XXX another dummy field */ uint min_index; boolean inline_verts; - uint front_winding; /* the rasterizer needs to be able to determine facing to apply front/back-facing stencil */ }; diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index d2427584ba..e6387382f2 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -147,6 +147,13 @@ cell_emit_state(struct cell_context *cell) #endif } + if (cell->dirty & (CELL_NEW_RASTERIZER)) { + struct cell_command_rasterizer *rast = + cell_batch_alloc(cell, sizeof(*rast)); + rast->opcode = CELL_CMD_STATE_RASTERIZER; + rast->rasterizer = *cell->rasterizer; + } + if (cell->dirty & (CELL_NEW_FS)) { /* Send new fragment program to SPUs */ struct cell_command_fragment_program *fp diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c index 578ddf62dc..aa63435b93 100644 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -214,7 +214,6 @@ cell_vbuf_draw(struct vbuf_render *vbr, render->opcode = CELL_CMD_RENDER; render->prim_type = cvbr->prim; - render->front_winding = cell->rasterizer->front_winding; render->num_indexes = nr_indices; render->min_index = min_index; diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 4febd5385b..d2c282a022 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -583,6 +583,14 @@ cmd_batch(uint opcode) case CELL_CMD_STATE_FS_CONSTANTS: pos = cmd_state_fs_constants(buffer, pos); break; + case CELL_CMD_STATE_RASTERIZER: + { + struct cell_command_rasterizer *rast = + (struct cell_command_rasterizer *) &buffer[pos]; + spu.rasterizer = rast->rasterizer; + pos += sizeof(*rast) / 8; + } + break; case CELL_CMD_STATE_SAMPLER: { struct cell_command_sampler *sampler diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index f87495b72d..4099e52699 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -153,6 +153,7 @@ struct spu_global struct pipe_blend_state blend; struct pipe_blend_color blend_color; struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; + struct pipe_rasterizer_state rasterizer; struct spu_texture texture[PIPE_MAX_SAMPLERS]; struct vertex_info vertex_info; diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index cfff19b6c0..75a7f75abc 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -279,7 +279,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) v1 = (const float *) (vertices + indexes[j+1] * vertex_size); v2 = (const float *) (vertices + indexes[j+2] * vertex_size); - drawn += tri_draw(v0, v1, v2, tx, ty, render->front_winding); + drawn += tri_draw(v0, v1, v2, tx, ty); } //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3); diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 2417db8960..1519b8cd7e 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -775,7 +775,7 @@ determinant(const float *v0, const float *v1, const float *v2) */ boolean tri_draw(const float *v0, const float *v1, const float *v2, - uint tx, uint ty, uint front_winding) + uint tx, uint ty) { setup.tx = tx; setup.ty = ty; @@ -790,7 +790,7 @@ tri_draw(const float *v0, const float *v1, const float *v2, * which will be needed for front/back-face stencil application */ float det = determinant(v0, v1, v2); - setup.facing = (det > 0.0) ^ (front_winding == PIPE_WINDING_CW); + setup.facing = (det > 0.0) ^ (spu.rasterizer.front_winding == PIPE_WINDING_CW); if (!setup_sort_vertices((struct vertex_header *) v0, (struct vertex_header *) v1, diff --git a/src/gallium/drivers/cell/spu/spu_tri.h b/src/gallium/drivers/cell/spu/spu_tri.h index abc3d35160..aa694dd7c9 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.h +++ b/src/gallium/drivers/cell/spu/spu_tri.h @@ -31,7 +31,7 @@ extern boolean -tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty, uint front_winding); +tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty); #endif /* SPU_TRI_H */ -- cgit v1.2.3 From 30d3b581124a9fa5fbc7aa8404f717c5c2a6ab15 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Oct 2008 15:20:09 -0600 Subject: cell: simplify triangle front/back face determination --- src/gallium/drivers/cell/spu/spu_tri.c | 69 ++++++++++++---------------------- 1 file changed, 23 insertions(+), 46 deletions(-) (limited to 'src/gallium/drivers/cell/spu') diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 1519b8cd7e..bd7547353d 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -116,7 +116,7 @@ struct setup_stage { struct edge etop; struct edge emaj; - float oneOverArea; + float oneOverArea; /* XXX maybe make into vector? */ uint facing; @@ -417,11 +417,19 @@ print_vertex(const struct vertex_header *v) #endif +/** + * Sort vertices from top to bottom. + * Compute area and determine front vs. back facing. + * Do coarse clip test against tile bounds + * \return FALSE if tri is totally outside tile, TRUE otherwise + */ static boolean setup_sort_vertices(const struct vertex_header *v0, const struct vertex_header *v1, const struct vertex_header *v2) { + float area, sign; + #if DEBUG_VERTS if (spu.init.id==0) { fprintf(stderr, "SPU %u: Triangle:\n", spu.init.id); @@ -431,8 +439,6 @@ setup_sort_vertices(const struct vertex_header *v0, } #endif - setup.vprovoke = v2; - /* determine bottom to top order of vertices */ { float y0 = spu_extract(v0->data[0], 1); @@ -444,18 +450,21 @@ setup_sort_vertices(const struct vertex_header *v0, setup.vmin = v0; setup.vmid = v1; setup.vmax = v2; + sign = -1.0f; } else if (y2 <= y0) { /* y2<=y0<=y1 */ setup.vmin = v2; setup.vmid = v0; setup.vmax = v1; + sign = -1.0f; } else { /* y0<=y2<=y1 */ setup.vmin = v0; setup.vmid = v2; setup.vmax = v1; + sign = 1.0f; } } else { @@ -464,18 +473,21 @@ setup_sort_vertices(const struct vertex_header *v0, setup.vmin = v1; setup.vmid = v0; setup.vmax = v2; + sign = 1.0f; } else if (y2 <= y1) { /* y2<=y1<=y0 */ setup.vmin = v2; setup.vmid = v1; setup.vmax = v0; + sign = 1.0f; } else { /* y1<=y2<=y0 */ setup.vmin = v1; setup.vmid = v2; setup.vmax = v0; + sign = -1.0f; } } } @@ -504,31 +516,16 @@ setup_sort_vertices(const struct vertex_header *v0, /* * Compute triangle's area. Use 1/area to compute partial * derivatives of attributes later. - * - * The area will be the same as prim->det, but the sign may be - * different depending on how the vertices get sorted above. - * - * To determine whether the primitive is front or back facing we - * use the prim->det value because its sign is correct. */ - { - const float area = (setup.emaj.dx * setup.ebot.dy - - setup.ebot.dx * setup.emaj.dy); - - setup.oneOverArea = 1.0f / area; - /* - _mesa_printf("%s one-over-area %f area %f det %f\n", - __FUNCTION__, setup.oneOverArea, area, prim->det ); - */ - } + area = setup.emaj.dx * setup.ebot.dy - setup.ebot.dx * setup.emaj.dy; -#if 0 - /* We need to know if this is a front or back-facing triangle for: - * - the GLSL gl_FrontFacing fragment attribute (bool) - * - two-sided stencil test - */ - setup.quad.facing = (prim->det > 0.0) ^ (setup.softpipe->rasterizer->front_winding == PIPE_WINDING_CW); -#endif + setup.oneOverArea = 1.0f / area; + + /* The product of area * sign indicates front/back orientation (0/1) */ + setup.facing = (area * sign > 0.0f) + ^ (spu.rasterizer.front_winding == PIPE_WINDING_CW); + + setup.vprovoke = v2; return TRUE; } @@ -755,20 +752,6 @@ subtriangle(struct edge *eleft, struct edge *eright, unsigned lines) } -static float -determinant(const float *v0, const float *v1, const float *v2) -{ - /* edge vectors e = v0 - v2, f = v1 - v2 */ - const float ex = v0[0] - v2[0]; - const float ey = v0[1] - v2[1]; - const float fx = v1[0] - v2[0]; - const float fy = v1[1] - v2[1]; - - /* det = cross(e,f).z */ - return ex * fy - ey * fx; -} - - /** * Draw triangle into tile at (tx, ty) (tile coords) * The tile data should have already been fetched. @@ -786,12 +769,6 @@ tri_draw(const float *v0, const float *v1, const float *v2, setup.cliprect_maxx = (tx + 1) * TILE_SIZE; setup.cliprect_maxy = (ty + 1) * TILE_SIZE; - /* Before we sort vertices, determine the facing of the triangle, - * which will be needed for front/back-face stencil application - */ - float det = determinant(v0, v1, v2); - setup.facing = (det > 0.0) ^ (spu.rasterizer.front_winding == PIPE_WINDING_CW); - if (!setup_sort_vertices((struct vertex_header *) v0, (struct vertex_header *) v1, (struct vertex_header *) v2)) { -- cgit v1.2.3 From 224c19a758466cdfb821e1a40db4928311278e90 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Oct 2008 15:34:02 -0600 Subject: cell: get rid of last usage of float4 union/typedef Results in slightly tighter code. --- src/gallium/drivers/cell/spu/spu_tri.c | 63 ++++++++++++++++------------------ 1 file changed, 29 insertions(+), 34 deletions(-) (limited to 'src/gallium/drivers/cell/spu') diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index bd7547353d..d83085d0f9 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -43,11 +43,6 @@ /** Masks are uint[4] vectors with each element being 0 or 0xffffffff */ typedef vector unsigned int mask_t; -typedef union -{ - vector float v; - float f[4]; -} float4; /** @@ -91,9 +86,9 @@ struct edge { struct interp_coef { - float4 a0; - float4 dadx; - float4 dady; + vector float a0; + vector float dadx; + vector float dady; }; @@ -152,14 +147,14 @@ eval_coeff(uint slot, float x, float y, vector float w, vector float result[4]) result[QUAD_TOP_LEFT] = result[QUAD_TOP_RIGHT] = result[QUAD_BOTTOM_LEFT] = - result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0.v; + result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0; break; case INTERP_LINEAR: { - vector float dadx = setup.coef[slot].dadx.v; - vector float dady = setup.coef[slot].dady.v; + vector float dadx = setup.coef[slot].dadx; + vector float dady = setup.coef[slot].dady; vector float topLeft = - spu_add(setup.coef[slot].a0.v, + spu_add(setup.coef[slot].a0, spu_add(spu_mul(spu_splats(x), dadx), spu_mul(spu_splats(y), dady))); @@ -171,10 +166,10 @@ eval_coeff(uint slot, float x, float y, vector float w, vector float result[4]) break; case INTERP_PERSPECTIVE: { - vector float dadx = setup.coef[slot].dadx.v; - vector float dady = setup.coef[slot].dady.v; + vector float dadx = setup.coef[slot].dadx; + vector float dady = setup.coef[slot].dady; vector float topLeft = - spu_add(setup.coef[slot].a0.v, + spu_add(setup.coef[slot].a0, spu_add(spu_mul(spu_splats(x), dadx), spu_mul(spu_splats(y), dady))); @@ -212,9 +207,9 @@ static INLINE vector float eval_z(float x, float y) { const uint slot = 0; - const float dzdx = setup.coef[slot].dadx.f[2]; - const float dzdy = setup.coef[slot].dady.f[2]; - const float topLeft = setup.coef[slot].a0.f[2] + x * dzdx + y * dzdy; + const float dzdx = spu_extract(setup.coef[slot].dadx, 2); + const float dzdy = spu_extract(setup.coef[slot].dady, 2); + const float topLeft = spu_extract(setup.coef[slot].a0, 2) + x * dzdx + y * dzdy; const vector float topLeftv = spu_splats(topLeft); const vector float derivs = (vector float) { 0.0, dzdx, dzdy, dzdx + dzdy }; return spu_add(topLeftv, derivs); @@ -226,9 +221,9 @@ static INLINE vector float eval_w(float x, float y) { const uint slot = 0; - const float dwdx = setup.coef[slot].dadx.f[3]; - const float dwdy = setup.coef[slot].dady.f[3]; - const float topLeft = setup.coef[slot].a0.f[3] + x * dwdx + y * dwdy; + const float dwdx = spu_extract(setup.coef[slot].dadx, 3); + const float dwdy = spu_extract(setup.coef[slot].dady, 3); + const float topLeft = spu_extract(setup.coef[slot].a0, 3) + x * dwdx + y * dwdy; const vector float topLeftv = spu_splats(topLeft); const vector float derivs = (vector float) { 0.0, dwdx, dwdy, dwdx + dwdy }; return spu_add(topLeftv, derivs); @@ -540,9 +535,9 @@ setup_sort_vertices(const struct vertex_header *v0, static INLINE void const_coeff4(uint slot) { - setup.coef[slot].dadx.v = (vector float) {0.0, 0.0, 0.0, 0.0}; - setup.coef[slot].dady.v = (vector float) {0.0, 0.0, 0.0, 0.0}; - setup.coef[slot].a0.v = setup.vprovoke->data[slot]; + setup.coef[slot].dadx = (vector float) {0.0, 0.0, 0.0, 0.0}; + setup.coef[slot].dady = (vector float) {0.0, 0.0, 0.0, 0.0}; + setup.coef[slot].a0 = setup.vprovoke->data[slot]; } @@ -566,13 +561,13 @@ tri_linear_coeff4(uint slot) vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), spu_mul(majda, spu_splats(setup.ebot.dx))); - setup.coef[slot].dadx.v = spu_mul(a, spu_splats(setup.oneOverArea)); - setup.coef[slot].dady.v = spu_mul(b, spu_splats(setup.oneOverArea)); + setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea)); + setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea)); - vector float tempx = spu_mul(setup.coef[slot].dadx.v, xxxx); - vector float tempy = spu_mul(setup.coef[slot].dady.v, yyyy); + vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx); + vector float tempy = spu_mul(setup.coef[slot].dady, yyyy); - setup.coef[slot].a0.v = spu_sub(vmin_d, spu_add(tempx, tempy)); + setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy)); } @@ -610,13 +605,13 @@ tri_persp_coeff4(uint slot) vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), spu_mul(majda, spu_splats(setup.ebot.dx))); - setup.coef[slot].dadx.v = spu_mul(a, spu_splats(setup.oneOverArea)); - setup.coef[slot].dady.v = spu_mul(b, spu_splats(setup.oneOverArea)); + setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea)); + setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea)); - vector float tempx = spu_mul(setup.coef[slot].dadx.v, xxxx); - vector float tempy = spu_mul(setup.coef[slot].dady.v, yyyy); + vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx); + vector float tempy = spu_mul(setup.coef[slot].dady, yyyy); - setup.coef[slot].a0.v = spu_sub(vmin_d, spu_add(tempx, tempy)); + setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy)); } -- cgit v1.2.3 From 1c915b14a545ffb10cc1c98cc69f997b6471617f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Oct 2008 19:40:51 -0600 Subject: cell: updated debug code --- src/gallium/drivers/cell/spu/spu_render.c | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) (limited to 'src/gallium/drivers/cell/spu') diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 75a7f75abc..802455bf79 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -176,21 +176,12 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) const ushort *indexes; uint i, j; - -#if 0 - printf("SPU %u: RENDER prim %u, num_vert=%u num_ind=%u " - "inline_vert=%u\n", - spu.init.id, - render->prim_type, - render->num_verts, - render->num_indexes, - render->inline_verts); - - /* - printf(" bound: %g, %g .. %g, %g\n", - render->xmin, render->ymin, render->xmax, render->ymax); - */ -#endif + D_PRINTF(CELL_DEBUG_CMD, + "RENDER prim=%u num_vert=%u num_ind=%u inline_vert=%u\n", + render->prim_type, + render->num_verts, + render->num_indexes, + render->inline_verts); ASSERT(sizeof(*render) % 4 == 0); ASSERT(total_vertex_bytes % 16 == 0); @@ -293,8 +284,5 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) spu.ztile_status[ty][tx] = spu.cur_ztile_status; } -#if 0 - printf("SPU %u: RENDER done\n", - spu.init.id); -#endif + D_PRINTF(CELL_DEBUG_CMD, "RENDER done\n"); } -- cgit v1.2.3 From 0116ee1d1c341726b6ed23c2dddc4515e8a34385 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Oct 2008 20:46:43 -0600 Subject: cell: start some performance measurements Use the spu_write_decrementer() and spu_read_decrementer() functions to measure time. Convert to milliseconds according to the system timebase value. --- src/gallium/drivers/cell/common.h | 1 + src/gallium/drivers/cell/ppu/cell_spu.c | 31 ++++++++++++++++++++++++++++++ src/gallium/drivers/cell/spu/spu_command.c | 15 +++++++++++++++ src/gallium/drivers/cell/spu/spu_render.c | 9 ++++++++- 4 files changed, 55 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/cell/spu') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 0ff2c491fb..469d56cda8 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -299,6 +299,7 @@ struct cell_init_info unsigned id; unsigned num_spus; unsigned debug_flags; /**< mask of CELL_DEBUG_x flags */ + float inv_timebase; /**< 1.0/timebase, for perf measurement */ /** Buffers for command batches, vertex/index data */ ubyte *buffers[CELL_NUM_BUFFERS]; diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c index a6e268b362..28e5e6d706 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.c +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -52,6 +52,35 @@ helpful headers: struct cell_global_info cell_global; +/** + * Scan /proc/cpuinfo to determine the timebase for the system. + * This is used by the SPUs to convert 'decrementer' ticks to seconds. + * There may be a better way to get this value... + */ +static unsigned +get_timebase(void) +{ + FILE *f = fopen("/proc/cpuinfo", "r"); + unsigned timebase; + + assert(f); + while (!feof(f)) { + char line[80]; + fgets(line, sizeof(line), f); + if (strncmp(line, "timebase", 8) == 0) { + char *colon = strchr(line, ':'); + if (colon) { + timebase = atoi(colon + 2); + break; + } + } + } + fclose(f); + + return timebase; +} + + /** * Write a 1-word message to the given SPE mailbox. */ @@ -115,6 +144,7 @@ cell_start_spus(struct cell_context *cell) { static boolean one_time_init = FALSE; uint i, j; + uint timebase = get_timebase(); if (one_time_init) { fprintf(stderr, "PPU: Multiple rendering contexts not yet supported " @@ -138,6 +168,7 @@ cell_start_spus(struct cell_context *cell) cell_global.inits[i].id = i; cell_global.inits[i].num_spus = cell->num_spus; cell_global.inits[i].debug_flags = cell->debug_flags; + cell_global.inits[i].inv_timebase = 1000.0f / timebase; for (j = 0; j < CELL_NUM_BUFFERS; j++) { cell_global.inits[i].buffers[j] = cell->buffer[j]; diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index d2c282a022..57d265fef7 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -670,6 +670,8 @@ cmd_batch(uint opcode) } +#define PERF 0 + /** * Main loop for SPEs: Get a command, execute it, repeat. @@ -678,6 +680,7 @@ void command_loop(void) { int exitFlag = 0; + uint t0, t1; D_PRINTF(CELL_DEBUG_CMD, "Enter command loop\n"); @@ -686,10 +689,16 @@ command_loop(void) D_PRINTF(CELL_DEBUG_CMD, "Wait for cmd...\n"); + if (PERF) + spu_write_decrementer(~0); + /* read/wait from mailbox */ opcode = (unsigned int) spu_read_in_mbox(); D_PRINTF(CELL_DEBUG_CMD, "got cmd 0x%x\n", opcode); + if (PERF) + t0 = spu_read_decrementer(); + switch (opcode & CELL_CMD_OPCODE_MASK) { case CELL_CMD_EXIT: D_PRINTF(CELL_DEBUG_CMD, "EXIT\n"); @@ -707,6 +716,12 @@ command_loop(void) printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK); } + if (PERF) { + t1 = spu_read_decrementer(); + printf("wait mbox time: %gms batch time: %gms\n", + (~0u - t0) * spu.init.inv_timebase, + (t0 - t1) * spu.init.inv_timebase); + } } D_PRINTF(CELL_DEBUG_CMD, "Exit command loop\n"); diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 802455bf79..5515bb55c9 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -175,6 +175,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) const ubyte *vertices; const ushort *indexes; uint i, j; + uint num_tiles; D_PRINTF(CELL_DEBUG_CMD, "RENDER prim=%u num_vert=%u num_ind=%u inline_vert=%u\n", @@ -242,6 +243,8 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */ + num_tiles = 0; + /** ** loop over tiles, rendering tris **/ @@ -255,6 +258,8 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) if (!my_tile(tx, ty)) continue; + num_tiles++; + spu.cur_ctile_status = spu.ctile_status[ty][tx]; spu.cur_ztile_status = spu.ztile_status[ty][tx]; @@ -284,5 +289,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) spu.ztile_status[ty][tx] = spu.cur_ztile_status; } - D_PRINTF(CELL_DEBUG_CMD, "RENDER done\n"); + D_PRINTF(CELL_DEBUG_CMD, + "RENDER done (%u tiles hit)\n", + num_tiles); } -- cgit v1.2.3 From 926b8dbb3e86360e5968882df94785ae84d0ad43 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 16 Oct 2008 09:00:05 -0600 Subject: cell: clean up various texture-related things Distinguish among texture targets in codegen. progs/demos/cubemap.c runs correctly now too. --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 29 ++++++++++++++--- src/gallium/drivers/cell/spu/spu_command.c | 24 ++++++-------- src/gallium/drivers/cell/spu/spu_funcs.c | 34 +++++++++++++++++--- src/gallium/drivers/cell/spu/spu_main.h | 16 +++++----- src/gallium/drivers/cell/spu/spu_texture.c | 50 ++++++++++++++---------------- src/gallium/drivers/cell/spu/spu_texture.h | 34 +++++++++----------- 6 files changed, 107 insertions(+), 80 deletions(-) (limited to 'src/gallium/drivers/cell/spu') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 3dfd5f673d..2b34cf1e23 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -1337,16 +1337,33 @@ emit_function_call(struct codegen *gen, static boolean -emit_TXP(struct codegen *gen, const struct tgsi_full_instruction *inst) +emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) { - const uint addr = lookup_function(gen->cell, "spu_txp"); + const uint target = inst->InstructionExtTexture.Texture; const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + uint addr; int ch; int coord_regs[4], d_regs[4]; + switch (target) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_2D: + addr = lookup_function(gen->cell, "spu_tex_2d"); + break; + case TGSI_TEXTURE_3D: + addr = lookup_function(gen->cell, "spu_tex_3d"); + break; + case TGSI_TEXTURE_CUBE: + addr = lookup_function(gen->cell, "spu_tex_cube"); + break; + default: + ASSERT(0 && "unsupported texture target"); + return FALSE; + } + assert(inst->FullSrcRegisters[1].SrcRegister.File == TGSI_FILE_SAMPLER); - spe_comment(gen->f, -4, "CALL txp:"); + spe_comment(gen->f, -4, "CALL tex:"); /* get src/dst reg info */ for (ch = 0; ch < 4; ch++) { @@ -1368,7 +1385,7 @@ emit_TXP(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); } - /* setup function arguments */ + /* setup function arguments (XXX depends on target) */ for (i = 0; i < 4; i++) { spe_move(gen->f, 3 + i, coord_regs[i]); } @@ -1674,8 +1691,10 @@ emit_instruction(struct codegen *gen, /* fall-through for now */ case TGSI_OPCODE_TXB: /* fall-through for now */ + case TGSI_OPCODE_TXL: + /* fall-through for now */ case TGSI_OPCODE_TXP: - return emit_TXP(gen, inst); + return emit_TEX(gen, inst); case TGSI_OPCODE_IF: return emit_IF(gen, inst); diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 57d265fef7..ff4a52d79a 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -310,8 +310,7 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) */ static void update_tex_masks(struct spu_texture *texture, - const struct pipe_sampler_state *sampler, - uint unit) + const struct pipe_sampler_state *sampler) { uint i; @@ -338,11 +337,6 @@ update_tex_masks(struct spu_texture *texture, texture->level[i].scale_t = spu_splats(1.0f); } } - - /* XXX temporary hack */ - if (texture->target == PIPE_TEXTURE_CUBE) { - spu.sample_texture4[unit] = sample_texture4_cube; - } } @@ -357,12 +351,12 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) switch (spu.sampler[unit].min_img_filter) { case PIPE_TEX_FILTER_LINEAR: - spu.min_sample_texture4[unit] = sample_texture4_bilinear; + spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear; break; case PIPE_TEX_FILTER_ANISO: /* fall-through, for now */ case PIPE_TEX_FILTER_NEAREST: - spu.min_sample_texture4[unit] = sample_texture4_nearest; + spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest; break; default: ASSERT(0); @@ -370,12 +364,12 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) switch (spu.sampler[sampler->unit].mag_img_filter) { case PIPE_TEX_FILTER_LINEAR: - spu.mag_sample_texture4[unit] = sample_texture4_bilinear; + spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear; break; case PIPE_TEX_FILTER_ANISO: /* fall-through, for now */ case PIPE_TEX_FILTER_NEAREST: - spu.mag_sample_texture4[unit] = sample_texture4_nearest; + spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest; break; default: ASSERT(0); @@ -384,16 +378,16 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) switch (spu.sampler[sampler->unit].min_mip_filter) { case PIPE_TEX_MIPFILTER_NEAREST: case PIPE_TEX_MIPFILTER_LINEAR: - spu.sample_texture4[unit] = sample_texture4_lod; + spu.sample_texture_2d[unit] = sample_texture_2d_lod; break; case PIPE_TEX_MIPFILTER_NONE: - spu.sample_texture4[unit] = spu.mag_sample_texture4[unit]; + spu.sample_texture_2d[unit] = spu.mag_sample_texture_2d[unit]; break; default: ASSERT(0); } - update_tex_masks(&spu.texture[unit], &spu.sampler[unit], unit); + update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); } @@ -434,7 +428,7 @@ cmd_state_texture(const struct cell_command_texture *texture) spu.texture[unit].max_level = i; } - update_tex_masks(&spu.texture[unit], &spu.sampler[unit], unit); + update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); } diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index 5c3ee305d4..3534b35000 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -43,6 +43,7 @@ #include "cell/common.h" #include "spu_main.h" #include "spu_funcs.h" +#include "spu_texture.h" /** For "return"-ing four vectors */ @@ -102,11 +103,34 @@ spu_log2(vector float x) static struct vec_4x4 -spu_txp(vector float s, vector float t, vector float r, vector float q, - unsigned unit) +spu_tex_2d(vector float s, vector float t, vector float r, vector float q, + unsigned unit) { struct vec_4x4 colors; - spu.sample_texture4[unit](s, t, r, q, unit, 0, 0, colors.v); + (void) r; + (void) q; + spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v); + return colors; +} + +static struct vec_4x4 +spu_tex_3d(vector float s, vector float t, vector float r, vector float q, + unsigned unit) +{ + struct vec_4x4 colors; + (void) r; + (void) q; + spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v); + return colors; +} + +static struct vec_4x4 +spu_tex_cube(vector float s, vector float t, vector float r, vector float q, + unsigned unit) +{ + struct vec_4x4 colors; + (void) q; + sample_texture_cube(s, t, r, unit, colors.v); return colors; } @@ -147,7 +171,9 @@ return_function_info(void) export_func(&funcs, "spu_pow", &spu_pow); export_func(&funcs, "spu_exp2", &spu_exp2); export_func(&funcs, "spu_log2", &spu_log2); - export_func(&funcs, "spu_txp", &spu_txp); + export_func(&funcs, "spu_tex_2d", &spu_tex_2d); + export_func(&funcs, "spu_tex_3d", &spu_tex_3d); + export_func(&funcs, "spu_tex_cube", &spu_tex_cube); /* Send the function info back to the PPU / main memory */ mfc_put((void *) &funcs, /* src in local store */ diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 4099e52699..80e9c696f8 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -70,12 +70,10 @@ typedef union { /** Function for sampling textures */ -typedef void (*spu_sample_texture4_func)(vector float s, - vector float t, - vector float r, - vector float q, - uint unit, uint level, uint face, - vector float colors[4]); +typedef void (*spu_sample_texture_2d_func)(vector float s, + vector float t, + uint unit, uint level, uint face, + vector float colors[4]); /** Function for performing per-fragment ops */ @@ -183,9 +181,9 @@ struct spu_global spu_fragment_program_func fragment_program; /** Current texture sampler function */ - spu_sample_texture4_func sample_texture4[CELL_MAX_SAMPLERS]; - spu_sample_texture4_func min_sample_texture4[CELL_MAX_SAMPLERS]; - spu_sample_texture4_func mag_sample_texture4[CELL_MAX_SAMPLERS]; + spu_sample_texture_2d_func sample_texture_2d[CELL_MAX_SAMPLERS]; + spu_sample_texture_2d_func min_sample_texture_2d[CELL_MAX_SAMPLERS]; + spu_sample_texture_2d_func mag_sample_texture_2d[CELL_MAX_SAMPLERS]; /** Fragment program constants */ vector float constants[4 * CELL_MAX_CONSTANTS]; diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 42eb06a362..04202a7657 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -126,10 +126,9 @@ spu_clamp(vector signed int vec, vector signed int max) * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa). */ void -sample_texture4_nearest(vector float s, vector float t, - vector float r, vector float q, - uint unit, uint level, uint face, - vector float colors[4]) +sample_texture_2d_nearest(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]) { const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; vector float ss = spu_mul(s, tlevel->scale_s); @@ -158,10 +157,9 @@ sample_texture4_nearest(vector float s, vector float t, * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa). */ void -sample_texture4_bilinear(vector float s, vector float t, - vector float r, vector float q, - uint unit, uint level, uint face, - vector float colors[4]) +sample_texture_2d_bilinear(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]) { const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; @@ -308,10 +306,9 @@ transpose(vector unsigned int *mOut0, * Bilinear filtering, using int intead of float arithmetic */ void -sample_texture4_bilinear_2(vector float s, vector float t, - vector float r, vector float q, - uint unit, uint level, uint face, - vector float colors[4]) +sample_texture_2d_bilinear_int(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]) { const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; @@ -444,10 +441,9 @@ compute_lambda(uint unit, vector float s, vector float t) * Texture sampling with level of detail selection. */ void -sample_texture4_lod(vector float s, vector float t, - vector float r, vector float q, - uint unit, uint level_ignored, uint face, - vector float colors[4]) +sample_texture_2d_lod(vector float s, vector float t, + uint unit, uint level_ignored, uint face, + vector float colors[4]) { /* * Note that we're computing a lambda/lod here that's used for all @@ -455,6 +451,9 @@ sample_texture4_lod(vector float s, vector float t, */ float lambda = compute_lambda(unit, s, t); + (void) face; + (void) level_ignored; + /* apply lod bias */ lambda += spu.sampler[unit].lod_bias; @@ -466,14 +465,14 @@ sample_texture4_lod(vector float s, vector float t, if (lambda <= 0.0f) { /* magnify */ - spu.mag_sample_texture4[unit](s, t, r, q, unit, 0, 0, colors); + spu.mag_sample_texture_2d[unit](s, t, unit, 0, 0, colors); } else { /* minify */ int level = (int) (lambda + 0.5f); if (level > (int) spu.texture[unit].max_level) level = spu.texture[unit].max_level; - spu.min_sample_texture4[unit](s, t, r, q, unit, level, 0, colors); + spu.min_sample_texture_2d[unit](s, t, unit, level, 0, colors); /* XXX to do: mipmap level interpolation */ } } @@ -552,13 +551,10 @@ choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) void -sample_texture4_cube(vector float s, vector float t, - vector float r, vector float q, - uint unit, uint level, uint face_ignored, - vector float colors[4]) +sample_texture_cube(vector float s, vector float t, vector float r, + uint unit, vector float colors[4]) { - static const vector float zero = {0.0f, 0.0f, 0.0f, 0.0f}; - uint p, faces[4]; + uint p, faces[4], level = 0; float newS[4], newT[4]; /* Compute cube face referenced by the four sets of texcoords. @@ -577,15 +573,15 @@ sample_texture4_cube(vector float s, vector float t, /* GOOD! All four texcoords refer to the same cube face */ s = (vector float) {newS[0], newS[1], newS[2], newS[3]}; t = (vector float) {newT[0], newT[1], newT[2], newT[3]}; - sample_texture4_nearest(s, t, zero, zero, unit, level, faces[0], colors); + sample_texture_2d_nearest(s, t, unit, level, faces[0], colors); } else { /* BAD! The four texcoords refer to different faces */ for (p = 0; p < 4; p++) { vector float c[4]; - sample_texture4_nearest(spu_splats(newS[p]), spu_splats(newT[p]), - zero, zero, unit, level, faces[p], c); + sample_texture_2d_nearest(spu_splats(newS[p]), spu_splats(newT[p]), + unit, level, faces[p], c); float red = spu_extract(c[0], p); float green = spu_extract(c[1], p); diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h index 387484c3ad..7b75b007b5 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.h +++ b/src/gallium/drivers/cell/spu/spu_texture.h @@ -37,37 +37,31 @@ invalidate_tex_cache(void); extern void -sample_texture4_nearest(vector float s, vector float t, - vector float r, vector float q, - uint unit, uint level, uint face, - vector float colors[4]); +sample_texture_2d_nearest(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]); extern void -sample_texture4_bilinear(vector float s, vector float t, - vector float r, vector float q, - uint unit, uint level, uint face, - vector float colors[4]); - -extern void -sample_texture4_bilinear_2(vector float s, vector float t, - vector float r, vector float q, +sample_texture_2d_bilinear(vector float s, vector float t, uint unit, uint level, uint face, vector float colors[4]); +extern void +sample_texture_2d_bilinear_int(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]); + extern void -sample_texture4_lod(vector float s, vector float t, - vector float r, vector float q, - uint unit, uint level, uint face, - vector float colors[4]); +sample_texture_2d_lod(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]); extern void -sample_texture4_cube(vector float s, vector float t, - vector float r, vector float q, - uint unit, uint level_ignored, uint face_ignored, - vector float colors[4]); +sample_texture_cube(vector float s, vector float t, vector float r, + uint unit, vector float colors[4]); #endif /* SPU_TEXTURE_H */ -- cgit v1.2.3 From 1da8f9b005a197214532e124c764a4e04e835519 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 16 Oct 2008 09:33:45 -0600 Subject: cell: call proper sampler function in sample_texture_cube() --- src/gallium/drivers/cell/spu/spu_texture.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers/cell/spu') diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 04202a7657..b2d5d4aef8 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -557,7 +557,7 @@ sample_texture_cube(vector float s, vector float t, vector float r, uint p, faces[4], level = 0; float newS[4], newT[4]; - /* Compute cube face referenced by the four sets of texcoords. + /* Compute cube faces referenced by the four sets of texcoords. * XXX we should SIMD-ize this. */ for (p = 0; p < 4; p++) { @@ -573,15 +573,15 @@ sample_texture_cube(vector float s, vector float t, vector float r, /* GOOD! All four texcoords refer to the same cube face */ s = (vector float) {newS[0], newS[1], newS[2], newS[3]}; t = (vector float) {newT[0], newT[1], newT[2], newT[3]}; - sample_texture_2d_nearest(s, t, unit, level, faces[0], colors); + spu.sample_texture_2d[unit](s, t, unit, level, faces[0], colors); } else { /* BAD! The four texcoords refer to different faces */ for (p = 0; p < 4; p++) { vector float c[4]; - sample_texture_2d_nearest(spu_splats(newS[p]), spu_splats(newT[p]), - unit, level, faces[p], c); + spu.sample_texture_2d[unit](spu_splats(newS[p]), spu_splats(newT[p]), + unit, level, faces[p], c); float red = spu_extract(c[0], p); float green = spu_extract(c[1], p); -- cgit v1.2.3 From f0c70f9aabcb8e7c57c71eac2bd4dc86a2f86a0e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 16 Oct 2008 09:52:02 -0600 Subject: cell: update comments --- src/gallium/drivers/cell/spu/spu_texture.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers/cell/spu') diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index b2d5d4aef8..19c17c9118 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -193,10 +193,6 @@ sample_texture_2d_bilinear(vector float s, vector float t, get_four_texels(unit, level, face, is0, it1, texels + 8); /* lower-left */ get_four_texels(unit, level, face, is1, it1, texels + 12); /* lower-right */ - /* XXX possibly rework following code to compute the weighted sample - * colors with integer arithmetic for fewer int->float conversions. - */ - /* convert packed int texels to float colors */ vector float ftexels[16]; spu_unpack_A8R8G8B8_transpose4(texels + 0, ftexels + 0); @@ -303,7 +299,8 @@ transpose(vector unsigned int *mOut0, /** - * Bilinear filtering, using int intead of float arithmetic + * Bilinear filtering, using int instead of float arithmetic for computing + * sample weights. */ void sample_texture_2d_bilinear_int(vector float s, vector float t, -- cgit v1.2.3 From 5191429b15a3e7a7ef7cda499de8074c2c0df94f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 16 Oct 2008 11:19:22 -0600 Subject: cell: trilinear mipmap interpolation --- src/gallium/drivers/cell/spu/spu_texture.c | 55 +++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers/cell/spu') diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 19c17c9118..e3d9a49dc4 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -415,7 +415,7 @@ sample_texture_2d_bilinear_int(vector float s, vector float t, * Compute level of detail factor from texcoords. */ static float -compute_lambda(uint unit, vector float s, vector float t) +compute_lambda_2d(uint unit, vector float s, vector float t) { uint baseLevel = 0; float width = spu.texture[unit].level[baseLevel].width; @@ -433,9 +433,27 @@ compute_lambda(uint unit, vector float s, vector float t) } +/** + * Blend two sets of colors according to weight. + */ +static void +blend_colors(vector float c0[4], const vector float c1[4], float weight) +{ + vector float t = spu_splats(weight); + vector float dc0 = spu_sub(c1[0], c0[0]); + vector float dc1 = spu_sub(c1[1], c0[1]); + vector float dc2 = spu_sub(c1[2], c0[2]); + vector float dc3 = spu_sub(c1[3], c0[3]); + c0[0] = spu_madd(dc0, t, c0[0]); + c0[1] = spu_madd(dc1, t, c0[1]); + c0[2] = spu_madd(dc2, t, c0[2]); + c0[3] = spu_madd(dc3, t, c0[3]); +} + /** - * Texture sampling with level of detail selection. + * Texture sampling with level of detail selection and possibly mipmap + * interpolation. */ void sample_texture_2d_lod(vector float s, vector float t, @@ -446,7 +464,7 @@ sample_texture_2d_lod(vector float s, vector float t, * Note that we're computing a lambda/lod here that's used for all * four pixels in the quad. */ - float lambda = compute_lambda(unit, s, t); + float lambda = compute_lambda_2d(unit, s, t); (void) face; (void) level_ignored; @@ -462,15 +480,34 @@ sample_texture_2d_lod(vector float s, vector float t, if (lambda <= 0.0f) { /* magnify */ - spu.mag_sample_texture_2d[unit](s, t, unit, 0, 0, colors); + spu.mag_sample_texture_2d[unit](s, t, unit, 0, face, colors); } else { /* minify */ - int level = (int) (lambda + 0.5f); - if (level > (int) spu.texture[unit].max_level) - level = spu.texture[unit].max_level; - spu.min_sample_texture_2d[unit](s, t, unit, level, 0, colors); - /* XXX to do: mipmap level interpolation */ + if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) { + /* sample two mipmap levels and interpolate */ + int level = (int) lambda; + if (level > (int) spu.texture[unit].max_level) + level = spu.texture[unit].max_level; + spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors); + if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) { + /* sample second mipmap level */ + float weight = lambda - (float) level; + level++; + if (level <= (int) spu.texture[unit].max_level) { + vector float colors2[4]; + spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors2); + blend_colors(colors, colors2, weight); + } + } + } + else { + /* sample one mipmap level */ + int level = (int) (lambda + 0.5f); + if (level > (int) spu.texture[unit].max_level) + level = spu.texture[unit].max_level; + spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors); + } } } -- cgit v1.2.3 From 033c90f4c16c1da517d676282508208319bd5ec5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 16 Oct 2008 13:49:42 -0600 Subject: cell: implement KIL instruction --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 80 ++++++++++++++++++++++++++++++ src/gallium/drivers/cell/spu/spu_main.h | 6 +-- src/gallium/drivers/cell/spu/spu_tri.c | 5 +- 3 files changed, 87 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers/cell/spu') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 2b34cf1e23..493ee1a0c9 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -84,6 +84,9 @@ struct codegen /** Index of execution mask register */ int exec_mask_reg; + /** KIL mask: indicates which fragments have been killed */ + int kill_mask_reg; + int frame_size; /**< Stack frame size, in words */ struct spe_function *f; @@ -431,8 +434,21 @@ emit_prologue(struct codegen *gen) static void emit_epilogue(struct codegen *gen) { + const int return_reg = 3; + spe_comment(gen->f, -4, "Function epilogue:"); + spe_comment(gen->f, 0, "return the killed mask"); + if (gen->kill_mask_reg > 0) { + /* shader called KIL, return the "alive" mask */ + spe_move(gen->f, return_reg, gen->kill_mask_reg); + } + else { + /* return {0,0,0,0} */ + spe_load_uint(gen->f, return_reg, 0); + } + + spe_comment(gen->f, 0, "restore stack and return"); if (gen->frame_size >= 512) { /* offset is too large for ai instruction */ int offset_reg = spe_allocate_available_register(gen->f); @@ -1423,6 +1439,68 @@ emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) } +/** + * KILL if any of src reg values are less than zero. + */ +static boolean +emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + int s_regs[4], kil_reg = -1, cmp_reg, zero_reg; + + spe_comment(gen->f, -4, "CALL kil:"); + + /* zero = {0,0,0,0} */ + zero_reg = get_itemp(gen); + spe_load_uint(gen->f, zero_reg, 0); + + cmp_reg = get_itemp(gen); + + /* get src regs */ + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + s_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + } + } + + /* test if any src regs are < 0 */ + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + if (kil_reg >= 0) { + /* cmp = 0 > src ? : ~0 : 0 */ + spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]); + /* kil = kil | cmp */ + spe_or(gen->f, kil_reg, kil_reg, cmp_reg); + } + else { + kil_reg = get_itemp(gen); + /* kil = 0 > src ? : ~0 : 0 */ + spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]); + } + } + } + + if (gen->if_nesting) { + /* may have been a conditional kil */ + spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg); + } + + /* allocate the kill mask reg if needed */ + if (gen->kill_mask_reg <= 0) { + gen->kill_mask_reg = spe_allocate_available_register(gen->f); + spe_move(gen->f, gen->kill_mask_reg, kil_reg); + } + else { + spe_or(gen->f, gen->kill_mask_reg, gen->kill_mask_reg, kil_reg); + } + + free_itemps(gen); + + return TRUE; +} + + + /** * Emit max. See emit_SGT for comments. */ @@ -1695,6 +1773,8 @@ emit_instruction(struct codegen *gen, /* fall-through for now */ case TGSI_OPCODE_TXP: return emit_TEX(gen, inst); + case TGSI_OPCODE_KIL: + return emit_KIL(gen, inst); case TGSI_OPCODE_IF: return emit_IF(gen, inst); diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 80e9c696f8..95ef4c9244 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -89,9 +89,9 @@ typedef void (*spu_fragment_ops_func)(uint x, uint y, uint facing); /** Function for running fragment program */ -typedef void (*spu_fragment_program_func)(vector float *inputs, - vector float *outputs, - vector float *constants); +typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs, + vector float *outputs, + vector float *constants); struct spu_framebuffer diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index d83085d0f9..4caf7d6b61 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -254,6 +254,7 @@ emit_quad( int x, int y, mask_t mask) vector float inputs[4*4], outputs[2*4]; vector float fragZ = eval_z((float) x, (float) y); vector float fragW = eval_w((float) x, (float) y); + vector unsigned int kill_mask; /* setup inputs */ #if 0 @@ -268,7 +269,9 @@ emit_quad( int x, int y, mask_t mask) ASSERT(spu.fragment_ops); /* Execute the current fragment program */ - spu.fragment_program(inputs, outputs, spu.constants); + kill_mask = spu.fragment_program(inputs, outputs, spu.constants); + + mask = spu_andc(mask, kill_mask); /* Execute per-fragment/quad operations, including: * alpha test, z test, stencil test, blend and framebuffer writing. -- cgit v1.2.3 From 51ffab362b27997f9c6c60bf9bace1b1854817db Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 16 Oct 2008 13:54:17 -0600 Subject: cell: pass spu_texture_level ptr to get_four_texels() --- src/gallium/drivers/cell/spu/spu_texture.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers/cell/spu') diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index e3d9a49dc4..c0af05e46e 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -72,10 +72,10 @@ invalidate_tex_cache(void) * a time. */ static void -get_four_texels(uint unit, uint level, uint face, vec_int4 x, vec_int4 y, +get_four_texels(const struct spu_texture_level *tlevel, uint face, + vec_int4 x, vec_int4 y, vec_uint4 *texels) { - const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; unsigned texture_ea = (uintptr_t) tlevel->start; const vec_int4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */ const vec_int4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */ @@ -145,7 +145,7 @@ sample_texture_2d_nearest(vector float s, vector float t, is = spu_clamp(is, tlevel->max_s); it = spu_clamp(it, tlevel->max_t); - get_four_texels(unit, level, face, is, it, texels); + get_four_texels(tlevel, face, is, it, texels); /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */ spu_unpack_A8R8G8B8_transpose4(texels, colors); @@ -188,10 +188,10 @@ sample_texture_2d_bilinear(vector float s, vector float t, /* get packed int texels */ vector unsigned int texels[16]; - get_four_texels(unit, level, face, is0, it0, texels + 0); /* upper-left */ - get_four_texels(unit, level, face, is1, it0, texels + 4); /* upper-right */ - get_four_texels(unit, level, face, is0, it1, texels + 8); /* lower-left */ - get_four_texels(unit, level, face, is1, it1, texels + 12); /* lower-right */ + get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */ + get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */ + get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */ + get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */ /* convert packed int texels to float colors */ vector float ftexels[16]; @@ -346,10 +346,10 @@ sample_texture_2d_bilinear_int(vector float s, vector float t, /* get packed int texels */ vector unsigned int texels[16]; - get_four_texels(unit, level, face, is0, it0, texels + 0); /* upper-left */ - get_four_texels(unit, level, face, is1, it0, texels + 4); /* upper-right */ - get_four_texels(unit, level, face, is0, it1, texels + 8); /* lower-left */ - get_four_texels(unit, level, face, is1, it1, texels + 12); /* lower-right */ + get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */ + get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */ + get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */ + get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */ /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */ { -- cgit v1.2.3 From fa7b8388066651c5cfafd4ce6461fc43c982d8c7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 16 Oct 2008 15:48:04 -0600 Subject: cell: use 7-bit weights in sample_texture_2d_bilinear_int() This allows us to use 16-bit signed mul/add instructions. Had to used unsigned mul before and there's no unsigned mul/add instruction. --- src/gallium/drivers/cell/spu/spu_texture.c | 62 +++++++++++++++--------------- 1 file changed, 31 insertions(+), 31 deletions(-) (limited to 'src/gallium/drivers/cell/spu') diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index c0af05e46e..4e12a116cd 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -314,19 +314,19 @@ sample_texture_2d_bilinear_int(vector float s, vector float t, vector float ss = spu_madd(s, tlevel->scale_s, half); vector float tt = spu_madd(t, tlevel->scale_t, half); - /* convert float coords to fixed-pt coords with 8 fraction bits */ - vector signed int is = spu_convts(ss, 8); - vector signed int it = spu_convts(tt, 8); + /* convert float coords to fixed-pt coords with 7 fraction bits */ + vector signed int is = spu_convts(ss, 7); /* XXX really need floor() here */ + vector signed int it = spu_convts(tt, 7); /* XXX really need floor() here */ - /* compute integer texel weights in [0, 255] */ - vector signed int sWeights0 = spu_and(is, 255); - vector signed int tWeights0 = spu_and(it, 255); - vector signed int sWeights1 = spu_sub(255, sWeights0); - vector signed int tWeights1 = spu_sub(255, tWeights0); + /* compute integer texel weights in [0, 127] */ + vector signed int sWeights0 = spu_and(is, 127); + vector signed int tWeights0 = spu_and(it, 127); + vector signed int sWeights1 = spu_sub(127, sWeights0); + vector signed int tWeights1 = spu_sub(127, tWeights0); - /* texel coords: is0 = is / 256, it0 = is / 256 */ - vector signed int is0 = spu_rlmask(is, -8); - vector signed int it0 = spu_rlmask(it, -8); + /* texel coords: is0 = is / 128, it0 = is / 128 */ + vector signed int is0 = spu_rlmask(is, -7); + vector signed int it0 = spu_rlmask(it, -7); /* texel coords: i1 = is0 + 1, it1 = it0 + 1 */ vector signed int is1 = spu_add(is0, 1); @@ -377,36 +377,36 @@ sample_texture_2d_bilinear_int(vector float s, vector float t, vector unsigned int c0, c1, c2, c3, cSum; /* red */ - c0 = (vector unsigned int) si_mpyu((qword) texel0, si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/ - c1 = (vector unsigned int) si_mpyu((qword) texel4, si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/ - c2 = (vector unsigned int) si_mpyu((qword) texel8, si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/ - c3 = (vector unsigned int) si_mpyu((qword) texel12, si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/ + c0 = (vector unsigned int) si_mpy((qword) texel0, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ + c1 = (vector unsigned int) si_mpy((qword) texel4, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ + c2 = (vector unsigned int) si_mpy((qword) texel8, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ + c3 = (vector unsigned int) si_mpy((qword) texel12, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); - colors[0] = spu_convtf(cSum, 24); + colors[0] = spu_convtf(cSum, 22); /* green */ - c0 = (vector unsigned int) si_mpyu((qword) texel1, si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/ - c1 = (vector unsigned int) si_mpyu((qword) texel5, si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/ - c2 = (vector unsigned int) si_mpyu((qword) texel9, si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/ - c3 = (vector unsigned int) si_mpyu((qword) texel13, si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/ + c0 = (vector unsigned int) si_mpy((qword) texel1, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ + c1 = (vector unsigned int) si_mpy((qword) texel5, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ + c2 = (vector unsigned int) si_mpy((qword) texel9, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ + c3 = (vector unsigned int) si_mpy((qword) texel13, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); - colors[1] = spu_convtf(cSum, 24); + colors[1] = spu_convtf(cSum, 22); /* blue */ - c0 = (vector unsigned int) si_mpyu((qword) texel2, si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/ - c1 = (vector unsigned int) si_mpyu((qword) texel6, si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/ - c2 = (vector unsigned int) si_mpyu((qword) texel10, si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/ - c3 = (vector unsigned int) si_mpyu((qword) texel14, si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/ + c0 = (vector unsigned int) si_mpy((qword) texel2, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ + c1 = (vector unsigned int) si_mpy((qword) texel6, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ + c2 = (vector unsigned int) si_mpy((qword) texel10, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ + c3 = (vector unsigned int) si_mpy((qword) texel14, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); - colors[2] = spu_convtf(cSum, 24); + colors[2] = spu_convtf(cSum, 22); /* alpha */ - c0 = (vector unsigned int) si_mpyu((qword) texel3, si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/ - c1 = (vector unsigned int) si_mpyu((qword) texel7, si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/ - c2 = (vector unsigned int) si_mpyu((qword) texel11, si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/ - c3 = (vector unsigned int) si_mpyu((qword) texel15, si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/ + c0 = (vector unsigned int) si_mpy((qword) texel3, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ + c1 = (vector unsigned int) si_mpy((qword) texel7, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ + c2 = (vector unsigned int) si_mpy((qword) texel11, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ + c3 = (vector unsigned int) si_mpy((qword) texel15, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); - colors[3] = spu_convtf(cSum, 24); + colors[3] = spu_convtf(cSum, 22); } -- cgit v1.2.3 From 9fa8671c73fa44a95e2ea7fed6047bddb042796f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 16 Oct 2008 20:25:28 -0600 Subject: cell: add new debug flag (cache) to report texture cache stats on exit --- src/gallium/drivers/cell/common.h | 1 + src/gallium/drivers/cell/ppu/cell_context.c | 1 + src/gallium/drivers/cell/spu/spu_command.c | 3 ++- src/gallium/drivers/cell/spu/spu_dcache.c | 4 +++- 4 files changed, 7 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/cell/spu') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 469d56cda8..9ca4e9d67e 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -117,6 +117,7 @@ #define CELL_DEBUG_FRAGMENT_OPS (1 << 3) #define CELL_DEBUG_FRAGMENT_OP_FALLBACK (1 << 4) #define CELL_DEBUG_CMD (1 << 5) +#define CELL_DEBUG_CACHE (1 << 6) /** Max instructions for doing per-fragment operations */ #define SPU_MAX_FRAGMENT_OPS_INSTS 64 diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 4dad490ce1..7a2d93ecb4 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -94,6 +94,7 @@ static const struct debug_named_value cell_debug_flags[] = { {"fragops", CELL_DEBUG_FRAGMENT_OPS}, /**< SPUs emit fragment ops debug messages*/ {"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK}, /**< SPUs use reference implementation for fragment ops*/ {"cmd", CELL_DEBUG_CMD}, /**< SPUs dump command buffer info */ + {"cache", CELL_DEBUG_CACHE}, /**< report texture cache stats on exit */ {NULL, 0} }; diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index ff4a52d79a..9c853c0961 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -720,5 +720,6 @@ command_loop(void) D_PRINTF(CELL_DEBUG_CMD, "Exit command loop\n"); - spu_dcache_report(); + if (spu.init.debug_flags & CELL_DEBUG_CACHE) + spu_dcache_report(); } diff --git a/src/gallium/drivers/cell/spu/spu_dcache.c b/src/gallium/drivers/cell/spu/spu_dcache.c index 167404cdc5..a6d67634fd 100644 --- a/src/gallium/drivers/cell/spu/spu_dcache.c +++ b/src/gallium/drivers/cell/spu/spu_dcache.c @@ -36,7 +36,9 @@ #define CACHE_SET_TAGID(set) (((set) & 0x03) + TAG_DCACHE0) #define CACHE_LOG2NNWAY 2 #define CACHE_LOG2NSETS 6 -/*#define CACHE_STATS 1*/ +#ifdef DEBUG +#define CACHE_STATS 1 +#endif #include /* Yes folks, this is ugly. -- cgit v1.2.3 From 81724da4f61f2ba678e2e0376209e1b754e1ecab Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 17 Oct 2008 09:09:57 -0600 Subject: cell: use an approximation in compute_lambda_2d() to avoid sqrt Though, the logf() call still needs attention. --- src/gallium/drivers/cell/spu/spu_texture.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/cell/spu') diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 4e12a116cd..69784c8978 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -414,7 +414,7 @@ sample_texture_2d_bilinear_int(vector float s, vector float t, /** * Compute level of detail factor from texcoords. */ -static float +static INLINE float compute_lambda_2d(uint unit, vector float s, vector float t) { uint baseLevel = 0; @@ -424,11 +424,21 @@ compute_lambda_2d(uint unit, vector float s, vector float t) float dsdy = width * (spu_extract(s, 2) - spu_extract(s, 0)); float dtdx = height * (spu_extract(t, 1) - spu_extract(t, 0)); float dtdy = height * (spu_extract(t, 2) - spu_extract(t, 0)); +#if 0 + /* ideal value */ float x = dsdx * dsdx + dtdx * dtdx; float y = dsdy * dsdy + dtdy * dtdy; float rho = x > y ? x : y; rho = sqrtf(rho); - float lambda = logf(rho) * 1.442695f; +#else + /* approximation */ + dsdx = fabsf(dsdx); + dsdy = fabsf(dsdy); + dtdx = fabsf(dtdx); + dtdy = fabsf(dtdy); + float rho = (dsdx + dsdy + dtdx + dtdy) * 0.5; +#endif + float lambda = logf(rho) * 1.442695f; /* compute logbase2(rho) */ return lambda; } -- cgit v1.2.3 From 70dd4379d2cd54f229c3940312537912470218d3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 10:34:13 -0600 Subject: cell: implement fencing for texture buffers If we delete a texture, we need to keep the underlying tiled data buffer around until any rendering that references it has completed. Keep a list of buffers referenced by a rendering batch. Unref/free them when the associated batch's fence is executed/signalled. --- src/gallium/drivers/cell/common.h | 25 ++++ src/gallium/drivers/cell/ppu/Makefile | 1 + src/gallium/drivers/cell/ppu/cell_batch.c | 32 +++++ src/gallium/drivers/cell/ppu/cell_context.c | 6 + src/gallium/drivers/cell/ppu/cell_context.h | 21 ++++ src/gallium/drivers/cell/ppu/cell_fence.c | 158 +++++++++++++++++++++++++ src/gallium/drivers/cell/ppu/cell_fence.h | 57 +++++++++ src/gallium/drivers/cell/ppu/cell_state_emit.c | 2 +- src/gallium/drivers/cell/ppu/cell_texture.c | 33 ++++-- src/gallium/drivers/cell/ppu/cell_texture.h | 5 +- src/gallium/drivers/cell/ppu/cell_vbuf.c | 6 + src/gallium/drivers/cell/spu/spu_command.c | 38 +++++- src/gallium/drivers/cell/spu/spu_main.h | 2 +- 13 files changed, 367 insertions(+), 19 deletions(-) create mode 100644 src/gallium/drivers/cell/ppu/cell_fence.c create mode 100644 src/gallium/drivers/cell/ppu/cell_fence.h (limited to 'src/gallium/drivers/cell/spu') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 9ca4e9d67e..23fb0b0831 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -102,6 +102,8 @@ #define CELL_CMD_STATE_RASTERIZER 22 #define CELL_CMD_VS_EXECUTE 23 #define CELL_CMD_FLUSH_BUFFER_RANGE 24 +#define CELL_CMD_FENCE 25 + /** Command/batch buffers */ #define CELL_NUM_BUFFERS 4 @@ -123,6 +125,29 @@ #define SPU_MAX_FRAGMENT_OPS_INSTS 64 + +#define CELL_FENCE_IDLE 0 +#define CELL_FENCE_EMITTED 1 +#define CELL_FENCE_SIGNALLED 2 + +struct cell_fence +{ + /** There's a 16-byte status qword per SPU */ + volatile uint status[CELL_MAX_SPUS][4]; +}; + + +/** + * Fence command sent to SPUs. In response, the SPUs will write + * CELL_FENCE_STATUS_SIGNALLED back to the fence status word in main memory. + */ +struct cell_command_fence +{ + uint64_t opcode; /**< CELL_CMD_FENCE */ + struct cell_fence *fence; +}; + + /** * Command to specify per-fragment operations state and generated code. * Note that the dsa, blend, blend_color fields are really only needed diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index b28f4c5c31..9358a47284 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -24,6 +24,7 @@ SOURCES = \ cell_clear.c \ cell_context.c \ cell_draw_arrays.c \ + cell_fence.c \ cell_flush.c \ cell_gen_fragment.c \ cell_gen_fp.c \ diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c index 01254aed60..448b723d85 100644 --- a/src/gallium/drivers/cell/ppu/cell_batch.c +++ b/src/gallium/drivers/cell/ppu/cell_batch.c @@ -28,6 +28,7 @@ #include "cell_context.h" #include "cell_batch.h" +#include "cell_fence.h" #include "cell_spu.h" @@ -63,6 +64,10 @@ cell_get_empty_buffer(struct cell_context *cell) printf("PPU: ALLOC BUFFER %u, %u tries\n", buf, tries); */ prev_buffer = buf; + + /* release tex buffer associated w/ prev use of this batch buf */ + cell_free_fenced_buffers(cell, &cell->fenced_buffers[buf]); + return buf; } } @@ -84,6 +89,26 @@ cell_get_empty_buffer(struct cell_context *cell) } +/** + * Append a fence command to the current batch buffer. + * Note that we're sure there's always room for this because of the + * adjusted size check in cell_batch_free_space(). + */ +static void +emit_fence(struct cell_context *cell) +{ + const uint batch = cell->cur_batch; + const uint size = cell->buffer_size[batch]; + struct cell_command_fence *fence_cmd; + + ASSERT(size + sizeof(struct cell_command_fence) <= CELL_BUFFER_SIZE); + + fence_cmd = (struct cell_command_fence *) (cell->buffer[batch] + size); + fence_cmd->opcode = CELL_CMD_FENCE; + fence_cmd->fence = &cell->fenced_buffers[batch].fence; +} + + /** * Flush the current batch buffer to the SPUs. * An empty buffer will be found and set as the new current batch buffer @@ -102,6 +127,12 @@ cell_batch_flush(struct cell_context *cell) if (size == 0) return; + /* Before we use this batch buffer, make sure any fenced texture buffers + * are released. + */ + if (cell->fenced_buffers[batch].head) + emit_fence(cell); + flushing = TRUE; assert(batch < CELL_NUM_BUFFERS); @@ -142,6 +173,7 @@ uint cell_batch_free_space(const struct cell_context *cell) { uint free = CELL_BUFFER_SIZE - cell->buffer_size[cell->cur_batch]; + free -= sizeof(struct cell_command_fence); return free; } diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 7a2d93ecb4..22d552d8e3 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -47,6 +47,7 @@ #include "cell_clear.h" #include "cell_context.h" #include "cell_draw_arrays.h" +#include "cell_fence.h" #include "cell_flush.h" #include "cell_state.h" #include "cell_surface.h" @@ -104,6 +105,7 @@ cell_create_context(struct pipe_screen *screen, struct cell_winsys *cws) { struct cell_context *cell; + uint i; /* some fields need to be 16-byte aligned, so align the whole object */ cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16); @@ -151,6 +153,10 @@ cell_create_context(struct pipe_screen *screen, cell_debug_flags, 0 ); + for (i = 0; i < CELL_NUM_BUFFERS; i++) + cell_fence_init(&cell->fenced_buffers[i].fence); + + /* * SPU stuff */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index ad1f4829a4..4491ae8cdf 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -81,6 +81,19 @@ struct cell_fragment_ops_key }; +struct cell_buffer_node; + +/** + * Fenced buffer list. List of buffers which can be unreferenced after + * the fence has been executed/signalled. + */ +struct cell_buffer_list +{ + struct cell_fence fence; + struct cell_buffer_node *head; +}; + + /** * Per-context state, subclass of pipe_context. */ @@ -154,6 +167,14 @@ struct cell_context uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB; + /** Associated with each command/batch buffer is a list of pipe_buffers + * that are fenced. When the last command in a buffer is executed, the + * fence will be signalled, indicating that any pipe_buffers preceeding + * that fence can be unreferenced (and probably freed). + */ + struct cell_buffer_list fenced_buffers[CELL_NUM_BUFFERS]; + + struct spe_function attrib_fetch; unsigned attrib_fetch_offsets[PIPE_MAX_ATTRIBS]; diff --git a/src/gallium/drivers/cell/ppu/cell_fence.c b/src/gallium/drivers/cell/ppu/cell_fence.c new file mode 100644 index 0000000000..ffb3bea12b --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_fence.c @@ -0,0 +1,158 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_fence.h" +#include "cell_texture.h" + + +void +cell_fence_init(struct cell_fence *fence) +{ + uint i; + for (i = 0; i < CELL_MAX_SPUS; i++) { + fence->status[i][0] = CELL_FENCE_IDLE; + } +} + + +boolean +cell_fence_signalled(const struct cell_context *cell, + const struct cell_fence *fence) +{ + uint i; + for (i = 0; i < cell->num_spus; i++) { + //ASSERT(fence->status[i][0] != CELL_FENCE_IDLE); + if (fence->status[i][0] == CELL_FENCE_EMITTED) + return FALSE; + } + return TRUE; +} + + +void +cell_fence_finish(const struct cell_context *cell, + const struct cell_fence *fence) +{ + while (!cell_fence_signalled(cell, fence)) { + usleep(10); + } +} + + + + +struct cell_buffer_node +{ + struct pipe_buffer *buffer; + struct cell_buffer_node *next; +}; + + +static void +cell_add_buffer_to_list(struct cell_context *cell, + struct cell_buffer_list *list, + struct pipe_buffer *buffer) +{ + struct pipe_screen *ps = cell->pipe.screen; + struct cell_buffer_node *node = CALLOC_STRUCT(cell_buffer_node); + /* create new list node which references the buffer, insert at head */ + if (node) { + pipe_buffer_reference(ps, &node->buffer, buffer); + node->next = list->head; + list->head = node; + } +} + + +/** + * Wait for completion of the given fence, then unreference any buffers + * on the list. + * This typically unrefs/frees texture buffers after any rendering which uses + * them has completed. + */ +void +cell_free_fenced_buffers(struct cell_context *cell, + struct cell_buffer_list *list) +{ + if (list->head) { + struct pipe_screen *ps = cell->pipe.screen; + struct cell_buffer_node *node; + + cell_fence_finish(cell, &list->fence); + + /* traverse the list, unreferencing buffers, freeing nodes */ + node = list->head; + while (node) { + struct cell_buffer_node *next = node->next; + assert(node->buffer); + pipe_buffer_unmap(ps, node->buffer); +#if 0 + printf("Unref buffer %p\n", node->buffer); + if (node->buffer->refcount == 1) + printf(" Delete!\n"); +#endif + pipe_buffer_reference(ps, &node->buffer, NULL); + FREE(node); + node = next; + } + list->head = NULL; + } +} + + +/** + * This should be called for each render command. + * Any texture buffers that are current bound will be added to a fenced + * list to be freed later when the fence is executed/signalled. + */ +void +cell_add_fenced_textures(struct cell_context *cell) +{ + struct cell_buffer_list *list = &cell->fenced_buffers[cell->cur_batch]; + uint i; + + for (i = 0; i < cell->num_textures; i++) { + struct cell_texture *ct = cell->texture[i]; + if (ct) { + uint level; + for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { + if (ct->tiled_buffer[level]) { +#if 0 + printf("Adding texture %p buffer %p to list\n", + ct, ct->tiled_buffer[level]); +#endif + cell_add_buffer_to_list(cell, list, ct->tiled_buffer[level]); + } + } + } + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_fence.h b/src/gallium/drivers/cell/ppu/cell_fence.h new file mode 100644 index 0000000000..536b4ba411 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_fence.h @@ -0,0 +1,57 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_FENCE_H +#define CELL_FENCE_H + + +extern void +cell_fence_init(struct cell_fence *fence); + + +extern boolean +cell_fence_signalled(const struct cell_context *cell, + const struct cell_fence *fence); + + +extern void +cell_fence_finish(const struct cell_context *cell, + const struct cell_fence *fence); + + + +extern void +cell_free_fenced_buffers(struct cell_context *cell, + struct cell_buffer_list *list); + + +extern void +cell_add_fenced_textures(struct cell_context *cell); + + +#endif /* CELL_FENCE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index effcd2a1e1..dd2d7f7d1e 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -225,7 +225,7 @@ cell_emit_state(struct cell_context *cell) if (cell->texture[i]) { uint level; for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { - texture->start[level] = cell->texture[i]->tiled_data[level]; + texture->start[level] = cell->texture[i]->tiled_mapped[level]; texture->width[level] = cell->texture[i]->base.width[level]; texture->height[level] = cell->texture[i]->base.height[level]; texture->depth[level] = cell->texture[i]->base.depth[level]; diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 9c6741f1bc..9ac2f3bbb9 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -136,6 +136,9 @@ cell_texture_release(struct pipe_screen *screen, __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); */ if (--(*pt)->refcount <= 0) { + /* Delete this texture now. + * But note that the underlying pipe_buffer may linger... + */ struct cell_texture *ct = cell_texture(*pt); uint i; @@ -146,14 +149,12 @@ cell_texture_release(struct pipe_screen *screen, pipe_buffer_reference(screen, &ct->buffer, NULL); for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { - if (ct->tiled_data[i]) { - /* XXX need to use a fenced buffer for tiled data so that - * it's properly freed after rendering has completed. - * Disabling this free() allows glDrawPixels to work for now. - */ -#if 0 - align_free(ct->tiled_data[i]); -#endif + /* Unreference the tiled image buffer. + * It may not actually be deleted until a fence is hit. + */ + if (ct->tiled_buffer[i]) { + ct->tiled_mapped[i] = NULL; + winsys_buffer_reference(screen->winsys, &ct->tiled_buffer[i], NULL); } } @@ -234,12 +235,18 @@ cell_twiddle_texture(struct pipe_screen *screen, int offset = bufWidth * bufHeight * 4 * surface->face; uint *dst; - if (!ct->tiled_data[level]) { - ct->tiled_data[level] = - align_malloc(bufWidth * bufHeight * 4 * numFaces, 16); + if (!ct->tiled_buffer[level]) { + /* allocate buffer for tiled data now */ + struct pipe_winsys *ws = screen->winsys; + uint bytes = bufWidth * bufHeight * 4 * numFaces; + ct->tiled_buffer[level] = ws->buffer_create(ws, 16, + PIPE_BUFFER_USAGE_PIXEL, + bytes); + /* and map it */ + ct->tiled_mapped[level] = ws->buffer_map(ws, ct->tiled_buffer[level], + PIPE_BUFFER_USAGE_GPU_READ); } - - dst = (uint *) ((ubyte *) ct->tiled_data[level] + offset); + dst = (uint *) ((ubyte *) ct->tiled_mapped[level] + offset); twiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst, surface->stride, src); diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h index a0757091b0..2f5fe0dd1b 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.h +++ b/src/gallium/drivers/cell/ppu/cell_texture.h @@ -48,7 +48,10 @@ struct cell_texture struct pipe_buffer *buffer; unsigned long buffer_size; - void *tiled_data[CELL_MAX_TEXTURE_LEVELS]; /* XXX this may be temporary */ /*ALIGN16*/ + /** Texture data in tiled layout is held here */ + struct pipe_buffer *tiled_buffer[CELL_MAX_TEXTURE_LEVELS]; + /** Mapped, tiled texture data */ + void *tiled_mapped[CELL_MAX_TEXTURE_LEVELS]; }; diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c index aa63435b93..65ba51b6bb 100644 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -38,6 +38,7 @@ #include "cell_batch.h" #include "cell_context.h" +#include "cell_fence.h" #include "cell_flush.h" #include "cell_spu.h" #include "cell_vbuf.h" @@ -108,6 +109,11 @@ cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, __FUNCTION__, cvbr->vertex_buf, vertices_used); */ + /* Make sure texture buffers aren't released until we're done rendering + * with them. + */ + cell_add_fenced_textures(cell); + /* Tell SPUs they can release the vert buf */ if (cvbr->vertex_buf != ~0U) { struct cell_command_release_verts *release diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 9c853c0961..a6ed29ea63 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -76,9 +76,10 @@ static void release_buffer(uint buffer) { /* Evidently, using less than a 16-byte status doesn't work reliably */ - static const uint status[4] ALIGN16_ATTRIB - = {CELL_BUFFER_STATUS_FREE, 0, 0, 0}; - + static const vector unsigned int status = {CELL_BUFFER_STATUS_FREE, + CELL_BUFFER_STATUS_FREE, + CELL_BUFFER_STATUS_FREE, + CELL_BUFFER_STATUS_FREE}; const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer); uint *dst = spu.init.buffer_status + index; @@ -93,6 +94,29 @@ release_buffer(uint buffer) } +/** + * Write CELL_FENCE_SIGNALLED back to the fence status qword in main memory. + * There's a qword of status per SPU. + */ +static void +cmd_fence(struct cell_command_fence *fence_cmd) +{ + static const vector unsigned int status = {CELL_FENCE_SIGNALLED, + CELL_FENCE_SIGNALLED, + CELL_FENCE_SIGNALLED, + CELL_FENCE_SIGNALLED}; + uint *dst = (uint *) fence_cmd->fence; + dst += 4 * spu.init.id; /* main store/memory address, not local store */ + + mfc_put((void *) &status, /* src in local memory */ + (unsigned int) dst, /* dst in main memory */ + sizeof(status), /* size */ + TAG_FENCE, /* tag */ + 0, /* tid */ + 0 /* rid */); +} + + static void cmd_clear_surface(const struct cell_command_clear_surface *clear) { @@ -637,6 +661,14 @@ cmd_batch(uint opcode) cmd_finish(); pos += 1; break; + case CELL_CMD_FENCE: + { + struct cell_command_fence *fence_cmd = + (struct cell_command_fence *) &buffer[pos]; + cmd_fence(fence_cmd); + pos += sizeof(*fence_cmd) / 8; + } + break; case CELL_CMD_RELEASE_VERTS: { struct cell_command_release_verts *release diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 95ef4c9244..668af10be2 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -210,7 +210,7 @@ extern struct spu_global spu; #define TAG_DCACHE1 21 #define TAG_DCACHE2 22 #define TAG_DCACHE3 23 - +#define TAG_FENCE 24 static INLINE void -- cgit v1.2.3