diff options
Diffstat (limited to 'src/mesa/pipe/cell/spu/spu_main.c')
-rw-r--r-- | src/mesa/pipe/cell/spu/spu_main.c | 473 |
1 files changed, 161 insertions, 312 deletions
diff --git a/src/mesa/pipe/cell/spu/spu_main.c b/src/mesa/pipe/cell/spu/spu_main.c index 0c83900a18..e375197fe6 100644 --- a/src/mesa/pipe/cell/spu/spu_main.c +++ b/src/mesa/pipe/cell/spu/spu_main.c @@ -31,11 +31,13 @@ #include <stdio.h> #include <libmisc.h> -#include <spu_mfcio.h> #include "spu_main.h" -#include "spu_tri.h" +#include "spu_render.h" +#include "spu_texture.h" #include "spu_tile.h" +//#include "spu_test.h" +#include "spu_vertex_shader.h" #include "pipe/cell/common.h" #include "pipe/p_defines.h" @@ -46,28 +48,37 @@ helpful headers: /opt/ibm/cell-sdk/prototype/sysroot/usr/include/libmisc.h */ -static boolean Debug = FALSE; +boolean Debug = FALSE; struct spu_global spu; +struct spu_vs_context draw; -void -wait_on_mask(unsigned tagMask) +/** + * Tell the PPU that this SPU has finished copying a buffer to + * local store and that it may be reused by the PPU. + * This is done by writting a 16-byte batch-buffer-status block back into + * main memory (in cell_context->buffer_status[]). + */ +static void +release_buffer(uint buffer) { - mfc_write_tag_mask( tagMask ); - /* wait for completion of _any_ DMAs specified by tagMask */ - mfc_read_tag_status_any(); -} + /* Evidently, using less than a 16-byte status doesn't work reliably */ + static const uint status[4] ALIGN16_ATTRIB + = {CELL_BUFFER_STATUS_FREE, 0, 0, 0}; + const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer); + uint *dst = spu.init.buffer_status + index; -static void -wait_on_mask_all(unsigned tagMask) -{ - mfc_write_tag_mask( tagMask ); - /* wait for completion of _any_ DMAs specified by tagMask */ - mfc_read_tag_status_all(); -} + ASSERT(buffer < CELL_NUM_BUFFERS); + mfc_put((void *) &status, /* src in local memory */ + (unsigned int) dst, /* dst in main memory */ + sizeof(status), /* size */ + TAG_MISC, /* tag is unimportant */ + 0, /* tid */ + 0 /* rid */); +} /** @@ -81,24 +92,24 @@ really_clear_tiles(uint surfaceIndex) uint i; if (surfaceIndex == 0) { - clear_c_tile(&ctile); + clear_c_tile(&spu.ctile); for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { uint tx = i % spu.fb.width_tiles; uint ty = i / spu.fb.width_tiles; - if (tile_status[ty][tx] == TILE_STATUS_CLEAR) { - put_tile(tx, ty, &ctile, TAG_SURFACE_CLEAR, 0); + if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) { + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); } } } else { - clear_z_tile(&ztile); + clear_z_tile(&spu.ztile); for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { uint tx = i % spu.fb.width_tiles; uint ty = i / spu.fb.width_tiles; - if (tile_status_z[ty][tx] == TILE_STATUS_CLEAR) - put_tile(tx, ty, &ctile, TAG_SURFACE_CLEAR, 1); + if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR) + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1); } } @@ -122,11 +133,11 @@ cmd_clear_surface(const struct cell_command_clear_surface *clear) #if CLEAR_OPT /* set all tile's status to CLEAR */ if (clear->surface == 0) { - memset(tile_status, TILE_STATUS_CLEAR, sizeof(tile_status)); + memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status)); spu.fb.color_clear_value = clear->value; } else { - memset(tile_status_z, TILE_STATUS_CLEAR, sizeof(tile_status_z)); + memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status)); spu.fb.depth_clear_value = clear->value; } return; @@ -134,11 +145,11 @@ cmd_clear_surface(const struct cell_command_clear_surface *clear) if (clear->surface == 0) { spu.fb.color_clear_value = clear->value; - clear_c_tile(&ctile); + clear_c_tile(&spu.ctile); } else { spu.fb.depth_clear_value = clear->value; - clear_z_tile(&ztile); + clear_z_tile(&spu.ztile); } /* @@ -150,9 +161,9 @@ cmd_clear_surface(const struct cell_command_clear_surface *clear) uint tx = i % spu.fb.width_tiles; uint ty = i / spu.fb.width_tiles; if (clear->surface == 0) - put_tile(tx, ty, &ctile, TAG_SURFACE_CLEAR, 0); + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); else - put_tile(tx, ty, &ztile, TAG_SURFACE_CLEAR, 1); + put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1); /* XXX we don't want this here, but it fixes bad tile results */ } @@ -165,229 +176,14 @@ cmd_clear_surface(const struct cell_command_clear_surface *clear) } -/** - * Given a rendering command's bounding box (in pixels) compute the - * location of the corresponding screen tile bounding box. - */ -static INLINE void -tile_bounding_box(const struct cell_command_render *render, - uint *txmin, uint *tymin, - uint *box_num_tiles, uint *box_width_tiles) -{ -#if 1 - /* Debug: full-window bounding box */ - uint txmax = spu.fb.width_tiles - 1; - uint tymax = spu.fb.height_tiles - 1; - *txmin = 0; - *tymin = 0; - *box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; - *box_width_tiles = spu.fb.width_tiles; - (void) render; - (void) txmax; - (void) tymax; -#else - uint txmax, tymax, box_height_tiles; - - *txmin = (uint) render->xmin / TILE_SIZE; - *tymin = (uint) render->ymin / TILE_SIZE; - txmax = (uint) render->xmax / TILE_SIZE; - tymax = (uint) render->ymax / TILE_SIZE; - *box_width_tiles = txmax - *txmin + 1; - box_height_tiles = tymax - *tymin + 1; - *box_num_tiles = *box_width_tiles * box_height_tiles; -#endif -#if 0 - printf("Render bounds: %g, %g ... %g, %g\n", - render->xmin, render->ymin, render->xmax, render->ymax); - printf("Render tiles: %u, %u .. %u, %u\n", *txmin, *tymin, txmax, tymax); -#endif -} - - -/** - * Render primitives - * \param pos_incr returns value indicating how may words to skip after - * this command in the batch buffer - */ static void -cmd_render(const struct cell_command_render *render, uint *pos_incr) +cmd_release_verts(const struct cell_command_release_verts *release) { - /* we'll DMA into these buffers */ - ubyte vertex_data[CELL_MAX_VBUF_SIZE] ALIGN16_ATTRIB; - ushort index_data[CELL_MAX_VBUF_INDEXES] ALIGN16_ATTRIB; - const uint vertex_size = render->vertex_size; /* in bytes */ - const uint total_vertex_bytes = render->num_verts * vertex_size; - const ubyte *vertices; - const ushort *indexes; - uint mask; - uint i, j; - - - if (Debug) { - printf("SPU %u: RENDER prim %u, num_vert=%u num_ind=%u " - "inline_vert=%u inline_ind=%u\n", - spu.init.id, - render->prim_type, - render->num_verts, - render->num_indexes, - render->inline_verts, - render->inline_indexes); - - /* - printf(" bound: %g, %g .. %g, %g\n", - render->xmin, render->ymin, render->xmax, render->ymax); - */ - printf("SPU %u: indices at %p vertices at %p\n", - spu.init.id, - render->index_data, render->vertex_data); - } - - ASSERT(sizeof(*render) % 4 == 0); - ASSERT_ALIGN16(render->vertex_data); - ASSERT_ALIGN16(render->index_data); - - - /** - ** Get vertex, index buffers if not inlined - **/ - if (!render->inline_verts) { - ASSERT(total_vertex_bytes % 16 == 0); - - mfc_get(vertex_data, /* dest */ - (unsigned int) render->vertex_data, /* src */ - total_vertex_bytes, /* size */ - TAG_VERTEX_BUFFER, - 0, /* tid */ - 0 /* rid */); - - vertices = vertex_data; - } - - if (!render->inline_indexes) { - uint total_index_bytes; - - *pos_incr = 0; - - total_index_bytes = render->num_indexes * sizeof(ushort); - if (total_index_bytes < 16) - total_index_bytes = 16; - else - total_index_bytes = ROUNDUP16(total_index_bytes); - - indexes = index_data; - - /* get index data from main memory */ - mfc_get(index_data, /* dest */ - (unsigned int) render->index_data, /* src */ - total_index_bytes, - TAG_INDEX_BUFFER, - 0, /* tid */ - 0 /* rid */); - } - - - /** - ** Get pointers to inlined indexes, verts, if present - **/ - if (render->inline_indexes) { - /* indexes are right after the render command in the batch buffer */ - indexes = (ushort *) (render + 1); - *pos_incr = (render->num_indexes * 2 + 3) / 4; - - if (render->inline_verts) { - /* vertices are after indexes, if inlined */ - vertices = (const ubyte *) (render + 1) + *pos_incr * 4; - *pos_incr = *pos_incr + total_vertex_bytes / 4; - } - } - - - /* wait for vertex and/or index buffers if not inlined */ - mask = 0x0; - if (!render->inline_verts) - mask |= (1 << TAG_VERTEX_BUFFER); - if (!render->inline_indexes) - mask |= (1 << TAG_INDEX_BUFFER); - wait_on_mask_all(mask); - - - /** - ** find tiles which intersect the prim bounding box - **/ - uint txmin, tymin, box_width_tiles, box_num_tiles; -#if 0 - tile_bounding_box(render, &txmin, &tymin, - &box_num_tiles, &box_width_tiles); -#else - txmin = 0; - tymin = 0; - box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; - box_width_tiles = spu.fb.width_tiles; -#endif - - /* make sure any pending clears have completed */ - wait_on_mask(1 << TAG_SURFACE_CLEAR); - - - /** - ** loop over tiles, rendering tris - **/ - for (i = spu.init.id; i < box_num_tiles; i += spu.init.num_spus) { - const uint tx = txmin + i % box_width_tiles; - const uint ty = tymin + i / box_width_tiles; - - ASSERT(tx < spu.fb.width_tiles); - ASSERT(ty < spu.fb.height_tiles); - - /* Start fetching color/z tiles. We'll wait for completion when - * we need read/write to them later in triangle rasterization. - */ - if (spu.depth_stencil.depth.enabled) { - if (tile_status_z[ty][tx] != TILE_STATUS_CLEAR) { - get_tile(tx, ty, &ztile, TAG_READ_TILE_Z, 1); - } - } - - if (tile_status[ty][tx] != TILE_STATUS_CLEAR) { - get_tile(tx, ty, &ctile, TAG_READ_TILE_COLOR, 0); - } - - ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES); - ASSERT(render->num_indexes % 3 == 0); - - /* loop over tris */ - for (j = 0; j < render->num_indexes; j += 3) { - const float *v0, *v1, *v2; - - v0 = (const float *) (vertices + indexes[j+0] * vertex_size); - v1 = (const float *) (vertices + indexes[j+1] * vertex_size); - v2 = (const float *) (vertices + indexes[j+2] * vertex_size); - - tri_draw(v0, v1, v2, tx, ty); - } - - /* write color/z tiles back to main framebuffer, if dirtied */ - if (tile_status[ty][tx] == TILE_STATUS_DIRTY) { - put_tile(tx, ty, &ctile, TAG_WRITE_TILE_COLOR, 0); - tile_status[ty][tx] = TILE_STATUS_DEFINED; - } - if (spu.depth_stencil.depth.enabled) { - if (tile_status_z[ty][tx] == TILE_STATUS_DIRTY) { - put_tile(tx, ty, &ztile, TAG_WRITE_TILE_Z, 1); - tile_status_z[ty][tx] = TILE_STATUS_DEFINED; - } - } - - /* XXX move these... */ - wait_on_mask(1 << TAG_WRITE_TILE_COLOR); - if (spu.depth_stencil.depth.enabled) { - wait_on_mask(1 << TAG_WRITE_TILE_Z); - } - } - if (Debug) - printf("SPU %u: RENDER done\n", - spu.init.id); + printf("SPU %u: RELEASE VERTS %u\n", + spu.init.id, release->vertex_buf); + ASSERT(release->vertex_buf != ~0U); + release_buffer(release->vertex_buf); } @@ -421,6 +217,29 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) spu.fb.zsize = 2; else spu.fb.zsize = 0; + + if (spu.fb.color_format == PIPE_FORMAT_A8R8G8B8_UNORM) + spu.color_shuffle = ((vector unsigned char) { + 12, 0, 4, 8, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}); + else if (spu.fb.color_format == PIPE_FORMAT_B8G8R8A8_UNORM) + spu.color_shuffle = ((vector unsigned char) { + 8, 4, 0, 12, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}); + else + ASSERT(0); +} + + +static void +cmd_state_blend(const struct pipe_blend_state *state) +{ + if (Debug) + printf("SPU %u: BLEND: enabled %d\n", + spu.init.id, + state->blend_enable); + + memcpy(&spu.blend, state, sizeof(*state)); } @@ -444,19 +263,53 @@ cmd_state_sampler(const struct pipe_sampler_state *state) spu.init.id); memcpy(&spu.sampler[0], state, sizeof(*state)); + if (spu.sampler[0].min_img_filter == PIPE_TEX_FILTER_LINEAR) + spu.sample_texture = sample_texture_bilinear; + else + spu.sample_texture = sample_texture_nearest; } static void -cmd_state_vertex_info(const struct vertex_info *vinfo) +cmd_state_texture(const struct cell_command_texture *texture) { if (Debug) + printf("SPU %u: TEXTURE at %p size %u x %u\n", + spu.init.id, texture->start, texture->width, texture->height); + + memcpy(&spu.texture, texture, sizeof(*texture)); + spu.tex_size = (vector float) + { spu.texture.width, spu.texture.height, 0.0, 0.0}; + spu.tex_size_mask = (vector unsigned int) + { spu.texture.width - 1, spu.texture.height - 1, 0, 0 }; +} + + +static void +cmd_state_vertex_info(const struct vertex_info *vinfo) +{ + if (Debug) { printf("SPU %u: VERTEX_INFO num_attribs=%u\n", spu.init.id, vinfo->num_attribs); + } + ASSERT(vinfo->num_attribs >= 1); + ASSERT(vinfo->num_attribs <= 8); memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo)); } +static void +cmd_state_vs_array_info(const struct cell_array_info *vs_info) +{ + const unsigned attr = vs_info->attr; + + ASSERT(attr < PIPE_ATTRIB_MAX); + draw.vertex_fetch.src_ptr[attr] = vs_info->base; + draw.vertex_fetch.pitch[attr] = vs_info->pitch; + draw.vertex_fetch.format[attr] = vs_info->format; + draw.vertex_fetch.dirty = 1; +} + static void cmd_finish(void) @@ -473,38 +326,6 @@ cmd_finish(void) /** - * Tell the PPU that this SPU has finished copying a batch buffer to - * local store and that it may be reused by the PPU. - * This is done by writting a 16-byte batch-buffer-status block back into - * main memory (in cell_contex->buffer_status[]). - */ -static void -release_batch_buffer(uint buffer) -{ - /* Evidently, using less than a 16-byte status doesn't work reliably */ - static const uint status[4] ALIGN16_ATTRIB - = {CELL_BUFFER_STATUS_FREE, 0, 0, 0}; - - const uint index = 4 * (spu.init.id * CELL_NUM_BATCH_BUFFERS + buffer); - uint *dst = spu.init.buffer_status + index; - - ASSERT(buffer < CELL_NUM_BATCH_BUFFERS); - - /* - printf("SPU %u: Set batch status buf=%u, index %u, at %p to FREE\n", - spu.init.id, buffer, index, dst); - */ - - mfc_put((void *) &status, /* src in local memory */ - (unsigned int) dst, /* dst in main memory */ - sizeof(status), /* size */ - TAG_MISC, /* tag is unimportant */ - 0, /* tid */ - 0 /* rid */); -} - - -/** * Execute a batch of commands * The opcode param encodes the location of the buffer and its size. */ @@ -513,24 +334,24 @@ cmd_batch(uint opcode) { const uint buf = (opcode >> 8) & 0xff; uint size = (opcode >> 16); - uint buffer[CELL_BATCH_BUFFER_SIZE / 4] ALIGN16_ATTRIB; - const uint usize = size / sizeof(uint); + uint64_t buffer[CELL_BUFFER_SIZE / 8] ALIGN16_ATTRIB; + const unsigned usize = size / sizeof(buffer[0]); uint pos; if (Debug) printf("SPU %u: BATCH buffer %u, len %u, from %p\n", - spu.init.id, buf, size, spu.init.batch_buffers[buf]); + spu.init.id, buf, size, spu.init.buffers[buf]); ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH); - ASSERT_ALIGN16(spu.init.batch_buffers[buf]); + ASSERT_ALIGN16(spu.init.buffers[buf]); size = ROUNDUP16(size); - ASSERT_ALIGN16(spu.init.batch_buffers[buf]); + ASSERT_ALIGN16(spu.init.buffers[buf]); mfc_get(buffer, /* dest */ - (unsigned int) spu.init.batch_buffers[buf], /* src */ + (unsigned int) spu.init.buffers[buf], /* src */ size, TAG_BATCH_BUFFER, 0, /* tid */ @@ -538,7 +359,9 @@ cmd_batch(uint opcode) wait_on_mask(1 << TAG_BATCH_BUFFER); /* Tell PPU we're done copying the buffer to local store */ - release_batch_buffer(buf); + if (Debug) + printf("SPU %u: release batch buf %u\n", spu.init.id, buf); + release_buffer(buf); for (pos = 0; pos < usize; /* no incr */) { switch (buffer[pos]) { @@ -547,7 +370,7 @@ cmd_batch(uint opcode) struct cell_command_framebuffer *fb = (struct cell_command_framebuffer *) &buffer[pos]; cmd_state_framebuffer(fb); - pos += sizeof(*fb) / 4; + pos += sizeof(*fb) / 8; } break; case CELL_CMD_CLEAR_SURFACE: @@ -555,7 +378,7 @@ cmd_batch(uint opcode) struct cell_command_clear_surface *clr = (struct cell_command_clear_surface *) &buffer[pos]; cmd_clear_surface(clr); - pos += sizeof(*clr) / 4; + pos += sizeof(*clr) / 8; } break; case CELL_CMD_RENDER: @@ -564,28 +387,54 @@ cmd_batch(uint opcode) = (struct cell_command_render *) &buffer[pos]; uint pos_incr; cmd_render(render, &pos_incr); - pos += sizeof(*render) / 4 + pos_incr; + pos += pos_incr; + } + break; + case CELL_CMD_RELEASE_VERTS: + { + struct cell_command_release_verts *release + = (struct cell_command_release_verts *) &buffer[pos]; + cmd_release_verts(release); + pos += sizeof(*release) / 8; } break; case CELL_CMD_FINISH: cmd_finish(); pos += 1; break; + case CELL_CMD_STATE_BLEND: + cmd_state_blend((struct pipe_blend_state *) + &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct pipe_blend_state)) / 8); + break; case CELL_CMD_STATE_DEPTH_STENCIL: cmd_state_depth_stencil((struct pipe_depth_stencil_alpha_state *) &buffer[pos+1]); - pos += (1 + sizeof(struct pipe_depth_stencil_alpha_state) / 4); + pos += (1 + ROUNDUP8(sizeof(struct pipe_depth_stencil_alpha_state)) / 8); break; case CELL_CMD_STATE_SAMPLER: cmd_state_sampler((struct pipe_sampler_state *) &buffer[pos+1]); - pos += (1 + sizeof(struct pipe_sampler_state) / 4); + pos += (1 + ROUNDUP8(sizeof(struct pipe_sampler_state)) / 8); + break; + case CELL_CMD_STATE_TEXTURE: + cmd_state_texture((struct cell_command_texture *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct cell_command_texture)) / 8); break; case CELL_CMD_STATE_VERTEX_INFO: cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]); - pos += (1 + sizeof(struct vertex_info) / 4); + pos += (1 + ROUNDUP8(sizeof(struct vertex_info)) / 8); + break; + case CELL_CMD_STATE_VIEWPORT: + (void) memcpy(& draw.viewport, &buffer[pos+1], + sizeof(struct pipe_viewport_state)); + pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8); + break; + case CELL_CMD_STATE_VS_ARRAY_INFO: + cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8); break; default: - printf("SPU %u: bad opcode: 0x%x\n", spu.init.id, buffer[pos]); + printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]); ASSERT(0); break; } @@ -633,31 +482,22 @@ main_loop(void) 0 /* rid */); wait_on_mask( 1 << tag ); + /* + * NOTE: most commands should be contained in a batch buffer + */ + switch (opcode & CELL_CMD_OPCODE_MASK) { case CELL_CMD_EXIT: if (Debug) printf("SPU %u: EXIT\n", spu.init.id); exitFlag = 1; break; - case CELL_CMD_STATE_FRAMEBUFFER: - cmd_state_framebuffer(&cmd.fb); - break; - case CELL_CMD_CLEAR_SURFACE: - cmd_clear_surface(&cmd.clear); - break; - case CELL_CMD_RENDER: - { - uint pos_incr; - cmd_render(&cmd.render, &pos_incr); - assert(pos_incr == 0); - } + case CELL_CMD_VS_EXECUTE: + spu_execute_vertex_shader(&draw, &cmd.vs); break; case CELL_CMD_BATCH: cmd_batch(opcode); break; - case CELL_CMD_FINISH: - cmd_finish(); - break; default: printf("Bad opcode!\n"); } @@ -673,11 +513,13 @@ main_loop(void) static void one_time_init(void) { - memset(tile_status, TILE_STATUS_DEFINED, sizeof(tile_status)); - memset(tile_status_z, TILE_STATUS_DEFINED, sizeof(tile_status_z)); + memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status)); + memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status)); + invalidate_tex_cache(); } + /* In some versions of the SDK the SPE main takes 'unsigned long' as a * parameter. In others it takes 'unsigned long long'. Use a define to * select between the two. @@ -698,6 +540,9 @@ main(main_param_t speid, main_param_t argp) (void) speid; + ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4); + ASSERT(sizeof(struct cell_command_render) % 8 == 0); + one_time_init(); if (Debug) @@ -711,6 +556,10 @@ main(main_param_t speid, main_param_t argp) 0 /* rid */); wait_on_mask( 1 << tag ); +#if 0 + if (spu.init.id==0) + spu_test_misc(); +#endif main_loop(); |