From 64935c875128d2d1254b6b39ced72b9848d477fe Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 28 Jan 2008 18:17:30 -0700 Subject: Cell: move cmd_render() into new spu_render.c file --- src/mesa/pipe/cell/spu/spu_render.c | 240 ++++++++++++++++++++++++++++++++++++ 1 file changed, 240 insertions(+) create mode 100644 src/mesa/pipe/cell/spu/spu_render.c (limited to 'src/mesa/pipe/cell/spu/spu_render.c') diff --git a/src/mesa/pipe/cell/spu/spu_render.c b/src/mesa/pipe/cell/spu/spu_render.c new file mode 100644 index 0000000000..21a286a23d --- /dev/null +++ b/src/mesa/pipe/cell/spu/spu_render.c @@ -0,0 +1,240 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include +#include +#include + +#include "spu_main.h" +#include "spu_render.h" +#include "spu_tri.h" +#include "spu_tile.h" +#include "pipe/cell/common.h" + + + +/** + * Given a rendering command's bounding box (in pixels) compute the + * location of the corresponding screen tile bounding box. + */ +static INLINE void +tile_bounding_box(const struct cell_command_render *render, + uint *txmin, uint *tymin, + uint *box_num_tiles, uint *box_width_tiles) +{ +#if 0 + /* Debug: full-window bounding box */ + uint txmax = spu.fb.width_tiles - 1; + uint tymax = spu.fb.height_tiles - 1; + *txmin = 0; + *tymin = 0; + *box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; + *box_width_tiles = spu.fb.width_tiles; + (void) render; + (void) txmax; + (void) tymax; +#else + uint txmax, tymax, box_height_tiles; + + *txmin = (uint) render->xmin / TILE_SIZE; + *tymin = (uint) render->ymin / TILE_SIZE; + txmax = (uint) render->xmax / TILE_SIZE; + tymax = (uint) render->ymax / TILE_SIZE; + *box_width_tiles = txmax - *txmin + 1; + box_height_tiles = tymax - *tymin + 1; + *box_num_tiles = *box_width_tiles * box_height_tiles; +#endif +#if 0 + printf("SPU %u: bounds: %g, %g ... %g, %g\n", spu.init.id, + render->xmin, render->ymin, render->xmax, render->ymax); + printf("SPU %u: tiles: %u, %u .. %u, %u\n", + spu.init.id, *txmin, *tymin, txmax, tymax); + ASSERT(render->xmin <= render->xmax); + ASSERT(render->ymin <= render->ymax); +#endif +} + + +/** Check if the tile at (tx,ty) belongs to this SPU */ +static INLINE boolean +my_tile(uint tx, uint ty) +{ + return (spu.fb.width_tiles * ty + tx) % spu.init.num_spus == spu.init.id; +} + + +/** + * Render primitives + * \param pos_incr returns value indicating how may words to skip after + * this command in the batch buffer + */ +void +cmd_render(const struct cell_command_render *render, uint *pos_incr) +{ + /* we'll DMA into these buffers */ + ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + const uint vertex_size = render->vertex_size; /* in bytes */ + /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; + const ubyte *vertices; + const ushort *indexes; + uint i, j; + + + if (Debug) { + printf("SPU %u: RENDER prim %u, num_vert=%u num_ind=%u " + "inline_vert=%u\n", + spu.init.id, + render->prim_type, + render->num_verts, + render->num_indexes, + render->inline_verts); + + /* + printf(" bound: %g, %g .. %g, %g\n", + render->xmin, render->ymin, render->xmax, render->ymax); + */ + } + + ASSERT(sizeof(*render) % 4 == 0); + ASSERT(total_vertex_bytes % 16 == 0); + + /* indexes are right after the render command in the batch buffer */ + indexes = (const ushort *) (render + 1); + *pos_incr = (render->num_indexes * 2 + 3) / 4; + + + if (render->inline_verts) { + /* Vertices are right after indexes in batch buffer */ + vertices = (const ubyte *) (render + 1) + *pos_incr * 4; + *pos_incr = *pos_incr + total_vertex_bytes / 4; + } + else { + /* Begin DMA fetch of vertex buffer */ + ubyte *src = spu.init.buffers[render->vertex_buf]; + ubyte *dest = vertex_data; + + /* skip vertex data we won't use */ +#if 01 + src += render->min_index * vertex_size; + dest += render->min_index * vertex_size; + total_vertex_bytes -= render->min_index * vertex_size; +#endif + ASSERT(total_vertex_bytes % 16 == 0); + ASSERT_ALIGN16(dest); + ASSERT_ALIGN16(src); + + mfc_get(dest, /* in vertex_data[] array */ + (unsigned int) src, /* src in main memory */ + total_vertex_bytes, /* size */ + TAG_VERTEX_BUFFER, + 0, /* tid */ + 0 /* rid */); + + vertices = vertex_data; + + wait_on_mask(1 << TAG_VERTEX_BUFFER); + } + + + /** + ** find tiles which intersect the prim bounding box + **/ + uint txmin, tymin, box_width_tiles, box_num_tiles; + tile_bounding_box(render, &txmin, &tymin, + &box_num_tiles, &box_width_tiles); + + + /* make sure any pending clears have completed */ + wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */ + + + /** + ** loop over tiles, rendering tris + **/ + for (i = 0; i < box_num_tiles; i++) { + const uint tx = txmin + i % box_width_tiles; + const uint ty = tymin + i / box_width_tiles; + + ASSERT(tx < spu.fb.width_tiles); + ASSERT(ty < spu.fb.height_tiles); + + if (!my_tile(tx, ty)) + continue; + + /* Start fetching color/z tiles. We'll wait for completion when + * we need read/write to them later in triangle rasterization. + */ + if (spu.depth_stencil.depth.enabled) { + if (tile_status_z[ty][tx] != TILE_STATUS_CLEAR) { + get_tile(tx, ty, &ztile, TAG_READ_TILE_Z, 1); + } + } + + if (tile_status[ty][tx] != TILE_STATUS_CLEAR) { + get_tile(tx, ty, &ctile, TAG_READ_TILE_COLOR, 0); + } + + ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES); + ASSERT(render->num_indexes % 3 == 0); + + /* loop over tris */ + for (j = 0; j < render->num_indexes; j += 3) { + const float *v0, *v1, *v2; + + v0 = (const float *) (vertices + indexes[j+0] * vertex_size); + v1 = (const float *) (vertices + indexes[j+1] * vertex_size); + v2 = (const float *) (vertices + indexes[j+2] * vertex_size); + + tri_draw(v0, v1, v2, tx, ty); + } + + /* write color/z tiles back to main framebuffer, if dirtied */ + if (tile_status[ty][tx] == TILE_STATUS_DIRTY) { + put_tile(tx, ty, &ctile, TAG_WRITE_TILE_COLOR, 0); + tile_status[ty][tx] = TILE_STATUS_DEFINED; + } + if (spu.depth_stencil.depth.enabled) { + if (tile_status_z[ty][tx] == TILE_STATUS_DIRTY) { + put_tile(tx, ty, &ztile, TAG_WRITE_TILE_Z, 1); + tile_status_z[ty][tx] = TILE_STATUS_DEFINED; + } + } + + /* XXX move these... */ + wait_on_mask(1 << TAG_WRITE_TILE_COLOR); + if (spu.depth_stencil.depth.enabled) { + wait_on_mask(1 << TAG_WRITE_TILE_Z); + } + } + + if (Debug) + printf("SPU %u: RENDER done\n", + spu.init.id); +} + + -- cgit v1.2.3 From dcf41a0eed71a67060b4efa9ab4befc86eafc177 Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 30 Jan 2008 11:56:41 -0700 Subject: Cell: minor code refactoring, movement --- src/mesa/pipe/cell/spu/spu_render.c | 85 ++++++++++++++++++++++++------------- 1 file changed, 55 insertions(+), 30 deletions(-) (limited to 'src/mesa/pipe/cell/spu/spu_render.c') diff --git a/src/mesa/pipe/cell/spu/spu_render.c b/src/mesa/pipe/cell/spu/spu_render.c index 21a286a23d..f506095116 100644 --- a/src/mesa/pipe/cell/spu/spu_render.c +++ b/src/mesa/pipe/cell/spu/spu_render.c @@ -88,6 +88,55 @@ my_tile(uint tx, uint ty) } +/** + * Start fetching non-clear color/Z tiles from main memory + */ +static INLINE void +get_cz_tiles(uint tx, uint ty) +{ + if (spu.depth_stencil.depth.enabled) { + if (tile_status_z[ty][tx] != TILE_STATUS_CLEAR) { + get_tile(tx, ty, &ztile, TAG_READ_TILE_Z, 1); + } + } + + if (tile_status[ty][tx] != TILE_STATUS_CLEAR) { + get_tile(tx, ty, &ctile, TAG_READ_TILE_COLOR, 0); + } +} + + +/** + * Start putting dirty color/Z tiles back to main memory + */ +static INLINE void +put_cz_tiles(uint tx, uint ty) +{ + if (tile_status_z[ty][tx] == TILE_STATUS_DIRTY) { + put_tile(tx, ty, &ztile, TAG_WRITE_TILE_Z, 1); + tile_status_z[ty][tx] = TILE_STATUS_DEFINED; + } + + if (tile_status[ty][tx] == TILE_STATUS_DIRTY) { + put_tile(tx, ty, &ctile, TAG_WRITE_TILE_COLOR, 0); + tile_status[ty][tx] = TILE_STATUS_DEFINED; + } +} + + +/** + * Wait for 'put' of color/z tiles to complete. + */ +static INLINE void +wait_put_cz_tiles(void) +{ + wait_on_mask(1 << TAG_WRITE_TILE_COLOR); + if (spu.depth_stencil.depth.enabled) { + wait_on_mask(1 << TAG_WRITE_TILE_Z); + } +} + + /** * Render primitives * \param pos_incr returns value indicating how may words to skip after @@ -122,6 +171,9 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) ASSERT(sizeof(*render) % 4 == 0); ASSERT(total_vertex_bytes % 16 == 0); + ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES); + ASSERT(render->num_indexes % 3 == 0); + /* indexes are right after the render command in the batch buffer */ indexes = (const ushort *) (render + 1); @@ -186,21 +238,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) if (!my_tile(tx, ty)) continue; - /* Start fetching color/z tiles. We'll wait for completion when - * we need read/write to them later in triangle rasterization. - */ - if (spu.depth_stencil.depth.enabled) { - if (tile_status_z[ty][tx] != TILE_STATUS_CLEAR) { - get_tile(tx, ty, &ztile, TAG_READ_TILE_Z, 1); - } - } - - if (tile_status[ty][tx] != TILE_STATUS_CLEAR) { - get_tile(tx, ty, &ctile, TAG_READ_TILE_COLOR, 0); - } - - ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES); - ASSERT(render->num_indexes % 3 == 0); + get_cz_tiles(tx, ty); /* loop over tris */ for (j = 0; j < render->num_indexes; j += 3) { @@ -214,22 +252,9 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) } /* write color/z tiles back to main framebuffer, if dirtied */ - if (tile_status[ty][tx] == TILE_STATUS_DIRTY) { - put_tile(tx, ty, &ctile, TAG_WRITE_TILE_COLOR, 0); - tile_status[ty][tx] = TILE_STATUS_DEFINED; - } - if (spu.depth_stencil.depth.enabled) { - if (tile_status_z[ty][tx] == TILE_STATUS_DIRTY) { - put_tile(tx, ty, &ztile, TAG_WRITE_TILE_Z, 1); - tile_status_z[ty][tx] = TILE_STATUS_DEFINED; - } - } + put_cz_tiles(tx, ty); - /* XXX move these... */ - wait_on_mask(1 << TAG_WRITE_TILE_COLOR); - if (spu.depth_stencil.depth.enabled) { - wait_on_mask(1 << TAG_WRITE_TILE_Z); - } + wait_put_cz_tiles(); /* XXX seems unnecessary... */ } if (Debug) -- cgit v1.2.3 From b108bea6b44c1abc6d61e3e47096e5122de89cd1 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 1 Feb 2008 09:27:57 -0700 Subject: Cell: store current tile status in cur_tile_status_c/z, add TILE_STATUS_GETTING --- src/mesa/pipe/cell/spu/spu_render.c | 36 ++++++++++++++++----- src/mesa/pipe/cell/spu/spu_tile.c | 1 + src/mesa/pipe/cell/spu/spu_tile.h | 8 +++-- src/mesa/pipe/cell/spu/spu_tri.c | 62 ++++++++++++++++++++++++++++++------- src/mesa/pipe/cell/spu/spu_tri.h | 2 +- 5 files changed, 87 insertions(+), 22 deletions(-) (limited to 'src/mesa/pipe/cell/spu/spu_render.c') diff --git a/src/mesa/pipe/cell/spu/spu_render.c b/src/mesa/pipe/cell/spu/spu_render.c index f506095116..ca54a103bd 100644 --- a/src/mesa/pipe/cell/spu/spu_render.c +++ b/src/mesa/pipe/cell/spu/spu_render.c @@ -95,13 +95,15 @@ static INLINE void get_cz_tiles(uint tx, uint ty) { if (spu.depth_stencil.depth.enabled) { - if (tile_status_z[ty][tx] != TILE_STATUS_CLEAR) { + if (cur_tile_status_z != TILE_STATUS_CLEAR) { get_tile(tx, ty, &ztile, TAG_READ_TILE_Z, 1); + cur_tile_status_z = TILE_STATUS_GETTING; } } - if (tile_status[ty][tx] != TILE_STATUS_CLEAR) { + if (cur_tile_status_c != TILE_STATUS_CLEAR) { get_tile(tx, ty, &ctile, TAG_READ_TILE_COLOR, 0); + cur_tile_status_c = TILE_STATUS_GETTING; } } @@ -112,14 +114,24 @@ get_cz_tiles(uint tx, uint ty) static INLINE void put_cz_tiles(uint tx, uint ty) { - if (tile_status_z[ty][tx] == TILE_STATUS_DIRTY) { + if (cur_tile_status_z == TILE_STATUS_DIRTY) { + /* tile was modified and needs to be written back */ put_tile(tx, ty, &ztile, TAG_WRITE_TILE_Z, 1); - tile_status_z[ty][tx] = TILE_STATUS_DEFINED; + cur_tile_status_z = TILE_STATUS_DEFINED; + } + else if (cur_tile_status_z == TILE_STATUS_GETTING) { + /* tile was never used */ + cur_tile_status_z = TILE_STATUS_DEFINED; } - if (tile_status[ty][tx] == TILE_STATUS_DIRTY) { + if (cur_tile_status_c == TILE_STATUS_DIRTY) { + /* tile was modified and needs to be written back */ put_tile(tx, ty, &ctile, TAG_WRITE_TILE_COLOR, 0); - tile_status[ty][tx] = TILE_STATUS_DEFINED; + cur_tile_status_c = TILE_STATUS_DEFINED; + } + else if (cur_tile_status_c == TILE_STATUS_GETTING) { + /* tile was never used */ + cur_tile_status_c = TILE_STATUS_DEFINED; } } @@ -238,8 +250,13 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) if (!my_tile(tx, ty)) continue; + cur_tile_status_c = tile_status[ty][tx]; + cur_tile_status_z = tile_status_z[ty][tx]; + get_cz_tiles(tx, ty); + uint drawn = 0; + /* loop over tris */ for (j = 0; j < render->num_indexes; j += 3) { const float *v0, *v1, *v2; @@ -248,13 +265,18 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) v1 = (const float *) (vertices + indexes[j+1] * vertex_size); v2 = (const float *) (vertices + indexes[j+2] * vertex_size); - tri_draw(v0, v1, v2, tx, ty); + drawn += tri_draw(v0, v1, v2, tx, ty); } + //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3); + /* write color/z tiles back to main framebuffer, if dirtied */ put_cz_tiles(tx, ty); wait_put_cz_tiles(); /* XXX seems unnecessary... */ + + tile_status[ty][tx] = cur_tile_status_c; + tile_status_z[ty][tx] = cur_tile_status_z; } if (Debug) diff --git a/src/mesa/pipe/cell/spu/spu_tile.c b/src/mesa/pipe/cell/spu/spu_tile.c index ca1352f9f8..aea4785bc2 100644 --- a/src/mesa/pipe/cell/spu/spu_tile.c +++ b/src/mesa/pipe/cell/spu/spu_tile.c @@ -37,6 +37,7 @@ tile_t ztile ALIGN16_ATTRIB; ubyte tile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; ubyte tile_status_z[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; +ubyte cur_tile_status_c, cur_tile_status_z; void diff --git a/src/mesa/pipe/cell/spu/spu_tile.h b/src/mesa/pipe/cell/spu/spu_tile.h index 18d1b3c117..1f123a2b7b 100644 --- a/src/mesa/pipe/cell/spu/spu_tile.h +++ b/src/mesa/pipe/cell/spu/spu_tile.h @@ -51,12 +51,16 @@ extern tile_t ztile ALIGN16_ATTRIB; #define TILE_STATUS_CLEAR 1 -#define TILE_STATUS_DEFINED 2 /**< defined pixel data */ -#define TILE_STATUS_DIRTY 3 /**< modified, but not put back yet */ +#define TILE_STATUS_DEFINED 2 /**< defined in FB, but not in local store */ +#define TILE_STATUS_CLEAN 3 /**< in local store, but not changed */ +#define TILE_STATUS_DIRTY 4 /**< modified locally, but not put back yet */ +#define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */ extern ubyte tile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; extern ubyte tile_status_z[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; +extern ubyte cur_tile_status_c, cur_tile_status_z; + void get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf); diff --git a/src/mesa/pipe/cell/spu/spu_tri.c b/src/mesa/pipe/cell/spu/spu_tri.c index 08b8bf0c9c..a32878d917 100644 --- a/src/mesa/pipe/cell/spu/spu_tri.c +++ b/src/mesa/pipe/cell/spu/spu_tri.c @@ -299,16 +299,23 @@ do_depth_test(int x, int y, unsigned int mask) zvals.v = eval_z((float) x, (float) y); - if (tile_status_z[setup.ty][setup.tx] == TILE_STATUS_CLEAR) { + if (cur_tile_status_c == TILE_STATUS_CLEAR) { /* now, _really_ clear the tile */ clear_z_tile(&ztile); + cur_tile_status_z = TILE_STATUS_DIRTY; } - else if (tile_status_z[setup.ty][setup.tx] != TILE_STATUS_DIRTY) { + +#if 0 + if (cur_tile_status_z == TILE_STATUS_CLEAR) { + /* now, _really_ clear the tile */ + clear_z_tile(&ztile); + } + else if (cur_tile_status_z != TILE_STATUS_DIRTY) { /* make sure we've got the tile from main mem */ wait_on_mask(1 << TAG_READ_TILE_Z); } - tile_status_z[setup.ty][setup.tx] = TILE_STATUS_DIRTY; - + cur_tile_status_z = TILE_STATUS_DIRTY; +#endif if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) { zvals.v = spu_mul(zvals.v, zscale16.v); @@ -380,6 +387,9 @@ do_depth_test(int x, int y, unsigned int mask) } } + if (mask) + cur_tile_status_z = TILE_STATUS_DIRTY; + return mask; } @@ -397,15 +407,15 @@ do_depth_test_simd(int x, int y, vector unsigned int quadmask) zvals.v = eval_z((float) x, (float) y); - if (tile_status_z[setup.ty][setup.tx] == TILE_STATUS_CLEAR) { + if (cur_tile_status_z == TILE_STATUS_CLEAR) { /* now, _really_ clear the tile */ clear_z_tile(&ztile); } - else if (tile_status_z[setup.ty][setup.tx] != TILE_STATUS_DIRTY) { + else if (cur_tile_status_z != TILE_STATUS_DIRTY) { /* make sure we've got the tile from main mem */ wait_on_mask(1 << TAG_READ_TILE_Z); } - tile_status_z[setup.ty][setup.tx] = TILE_STATUS_DIRTY; + cur_tile_status_z = TILE_STATUS_DIRTY; /* XXX fetch Z value sooner to hide latency here */ zmask = spu_cmpgt(ztile.f4[ix][iy].v, zvals.v); @@ -462,15 +472,23 @@ emit_quad( int x, int y, mask_t mask ) if (mask) #endif { - if (tile_status[setup.ty][setup.tx] == TILE_STATUS_CLEAR) { + if (cur_tile_status_c == TILE_STATUS_CLEAR) { /* now, _really_ clear the tile */ clear_c_tile(&ctile); } - else if (tile_status[setup.ty][setup.tx] != TILE_STATUS_DIRTY) { + +#if 0 + if (cur_tile_status_c == TILE_STATUS_CLEAR) { + /* now, _really_ clear the tile */ + clear_c_tile(&ctile); + cur_tile_status_c = TILE_STATUS_DIRTY; + } + else if (cur_tile_status_c != TILE_STATUS_DIRTY) { /* make sure we've got the tile from main mem */ wait_on_mask(1 << TAG_READ_TILE_COLOR); } - tile_status[setup.ty][setup.tx] = TILE_STATUS_DIRTY; +#endif + cur_tile_status_c = TILE_STATUS_DIRTY; #if SIMD_Z if (spu_extract(mask, 0)) @@ -970,7 +988,7 @@ static void subtriangle( struct edge *eleft, * Draw triangle into tile at (tx, ty) (tile coords) * The tile data should have already been fetched. */ -void +boolean tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty) { setup.tx = tx; @@ -985,7 +1003,7 @@ tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty) if (!setup_sort_vertices((struct vertex_header *) v0, (struct vertex_header *) v1, (struct vertex_header *) v2)) { - return; /* totally clipped */ + return FALSE; /* totally clipped */ } setup_tri_coefficients(); @@ -999,6 +1017,24 @@ tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty) /* init_constant_attribs( setup ); */ + if (cur_tile_status_c == TILE_STATUS_GETTING) { + /* wait for mfc_get() to complete */ + wait_on_mask(1 << TAG_READ_TILE_COLOR); + cur_tile_status_c = TILE_STATUS_CLEAN; + } + + ASSERT(cur_tile_status_c != TILE_STATUS_DEFINED); + + if (spu.depth_stencil.depth.enabled) { + if (cur_tile_status_z == TILE_STATUS_GETTING) { + /* wait for mfc_get() to complete */ + wait_on_mask(1 << TAG_READ_TILE_Z); + cur_tile_status_z = TILE_STATUS_CLEAN; + } + ASSERT(cur_tile_status_z != TILE_STATUS_DEFINED); + } + + if (setup.oneoverarea < 0.0) { /* emaj on left: */ @@ -1013,4 +1049,6 @@ tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty) } flush_spans(); + + return TRUE; } diff --git a/src/mesa/pipe/cell/spu/spu_tri.h b/src/mesa/pipe/cell/spu/spu_tri.h index 86c42b6339..aa694dd7c9 100644 --- a/src/mesa/pipe/cell/spu/spu_tri.h +++ b/src/mesa/pipe/cell/spu/spu_tri.h @@ -30,7 +30,7 @@ #define SPU_TRI_H -extern void +extern boolean tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty); -- cgit v1.2.3 From 42201d7574ebb1582563988820c248680081c42f Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 1 Feb 2008 15:33:53 -0700 Subject: Cell: rename/move global vars Put tile-related globals into spu_global struct. Rename c/ztile fields to be more consistant. --- src/mesa/pipe/cell/spu/spu_main.c | 28 +++++++++++++------------- src/mesa/pipe/cell/spu/spu_main.h | 32 +++++++++++++++++++++++++++++ src/mesa/pipe/cell/spu/spu_render.c | 40 ++++++++++++++++++------------------- src/mesa/pipe/cell/spu/spu_tile.c | 11 +--------- src/mesa/pipe/cell/spu/spu_tile.h | 27 ------------------------- src/mesa/pipe/cell/spu/spu_tri.c | 38 +++++++++++++++++------------------ 6 files changed, 86 insertions(+), 90 deletions(-) (limited to 'src/mesa/pipe/cell/spu/spu_render.c') diff --git a/src/mesa/pipe/cell/spu/spu_main.c b/src/mesa/pipe/cell/spu/spu_main.c index 1760de02b7..8e3987f6ef 100644 --- a/src/mesa/pipe/cell/spu/spu_main.c +++ b/src/mesa/pipe/cell/spu/spu_main.c @@ -92,24 +92,24 @@ really_clear_tiles(uint surfaceIndex) uint i; if (surfaceIndex == 0) { - clear_c_tile(&ctile); + clear_c_tile(&spu.ctile); for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { uint tx = i % spu.fb.width_tiles; uint ty = i / spu.fb.width_tiles; - if (tile_status[ty][tx] == TILE_STATUS_CLEAR) { - put_tile(tx, ty, &ctile, TAG_SURFACE_CLEAR, 0); + if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) { + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); } } } else { - clear_z_tile(&ztile); + clear_z_tile(&spu.ztile); for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { uint tx = i % spu.fb.width_tiles; uint ty = i / spu.fb.width_tiles; - if (tile_status_z[ty][tx] == TILE_STATUS_CLEAR) - put_tile(tx, ty, &ctile, TAG_SURFACE_CLEAR, 1); + if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR) + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1); } } @@ -133,11 +133,11 @@ cmd_clear_surface(const struct cell_command_clear_surface *clear) #if CLEAR_OPT /* set all tile's status to CLEAR */ if (clear->surface == 0) { - memset(tile_status, TILE_STATUS_CLEAR, sizeof(tile_status)); + memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status)); spu.fb.color_clear_value = clear->value; } else { - memset(tile_status_z, TILE_STATUS_CLEAR, sizeof(tile_status_z)); + memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status)); spu.fb.depth_clear_value = clear->value; } return; @@ -145,11 +145,11 @@ cmd_clear_surface(const struct cell_command_clear_surface *clear) if (clear->surface == 0) { spu.fb.color_clear_value = clear->value; - clear_c_tile(&ctile); + clear_c_tile(&spu.ctile); } else { spu.fb.depth_clear_value = clear->value; - clear_z_tile(&ztile); + clear_z_tile(&spu.ztile); } /* @@ -161,9 +161,9 @@ cmd_clear_surface(const struct cell_command_clear_surface *clear) uint tx = i % spu.fb.width_tiles; uint ty = i / spu.fb.width_tiles; if (clear->surface == 0) - put_tile(tx, ty, &ctile, TAG_SURFACE_CLEAR, 0); + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); else - put_tile(tx, ty, &ztile, TAG_SURFACE_CLEAR, 1); + put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1); /* XXX we don't want this here, but it fixes bad tile results */ } @@ -478,8 +478,8 @@ main_loop(void) static void one_time_init(void) { - memset(tile_status, TILE_STATUS_DEFINED, sizeof(tile_status)); - memset(tile_status_z, TILE_STATUS_DEFINED, sizeof(tile_status_z)); + memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status)); + memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status)); invalidate_tex_cache(); } diff --git a/src/mesa/pipe/cell/spu/spu_main.h b/src/mesa/pipe/cell/spu/spu_main.h index 8be5268f52..cce5e70802 100644 --- a/src/mesa/pipe/cell/spu/spu_main.h +++ b/src/mesa/pipe/cell/spu/spu_main.h @@ -36,6 +36,11 @@ #include "pipe/p_state.h" + +#define MAX_WIDTH 1024 +#define MAX_HEIGHT 1024 + + typedef union { vector float v; @@ -43,6 +48,21 @@ typedef union } float4; +typedef union { + ushort us[TILE_SIZE][TILE_SIZE]; + uint ui[TILE_SIZE][TILE_SIZE]; + vector unsigned short us8[TILE_SIZE/2][TILE_SIZE/4]; + vector unsigned int ui4[TILE_SIZE/2][TILE_SIZE/2]; +} tile_t; + + +#define TILE_STATUS_CLEAR 1 +#define TILE_STATUS_DEFINED 2 /**< defined in FB, but not in local store */ +#define TILE_STATUS_CLEAN 3 /**< in local store, but not changed */ +#define TILE_STATUS_DIRTY 4 /**< modified locally, but not put back yet */ +#define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */ + + struct spu_framebuffer { void *color_start; /**< addr of color surface in main memory */ void *depth_start; /**< addr of depth surface in main memory */ @@ -75,6 +95,18 @@ struct spu_global /* XXX more state to come */ + + /** current color and Z tiles */ + tile_t ctile ALIGN16_ATTRIB; + tile_t ztile ALIGN16_ATTRIB; + + /** Current tiles' status */ + ubyte cur_ctile_status, cur_ztile_status; + + /** Status of all tiles in framebuffer */ + ubyte ctile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + ubyte ztile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + } ALIGN16_ATTRIB; diff --git a/src/mesa/pipe/cell/spu/spu_render.c b/src/mesa/pipe/cell/spu/spu_render.c index ca54a103bd..ab711d67fe 100644 --- a/src/mesa/pipe/cell/spu/spu_render.c +++ b/src/mesa/pipe/cell/spu/spu_render.c @@ -95,15 +95,15 @@ static INLINE void get_cz_tiles(uint tx, uint ty) { if (spu.depth_stencil.depth.enabled) { - if (cur_tile_status_z != TILE_STATUS_CLEAR) { - get_tile(tx, ty, &ztile, TAG_READ_TILE_Z, 1); - cur_tile_status_z = TILE_STATUS_GETTING; + if (spu.cur_ztile_status != TILE_STATUS_CLEAR) { + get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1); + spu.cur_ztile_status = TILE_STATUS_GETTING; } } - if (cur_tile_status_c != TILE_STATUS_CLEAR) { - get_tile(tx, ty, &ctile, TAG_READ_TILE_COLOR, 0); - cur_tile_status_c = TILE_STATUS_GETTING; + if (spu.cur_ctile_status != TILE_STATUS_CLEAR) { + get_tile(tx, ty, &spu.ctile, TAG_READ_TILE_COLOR, 0); + spu.cur_ctile_status = TILE_STATUS_GETTING; } } @@ -114,24 +114,24 @@ get_cz_tiles(uint tx, uint ty) static INLINE void put_cz_tiles(uint tx, uint ty) { - if (cur_tile_status_z == TILE_STATUS_DIRTY) { + if (spu.cur_ztile_status == TILE_STATUS_DIRTY) { /* tile was modified and needs to be written back */ - put_tile(tx, ty, &ztile, TAG_WRITE_TILE_Z, 1); - cur_tile_status_z = TILE_STATUS_DEFINED; + put_tile(tx, ty, &spu.ztile, TAG_WRITE_TILE_Z, 1); + spu.cur_ztile_status = TILE_STATUS_DEFINED; } - else if (cur_tile_status_z == TILE_STATUS_GETTING) { + else if (spu.cur_ztile_status == TILE_STATUS_GETTING) { /* tile was never used */ - cur_tile_status_z = TILE_STATUS_DEFINED; + spu.cur_ztile_status = TILE_STATUS_DEFINED; } - if (cur_tile_status_c == TILE_STATUS_DIRTY) { + if (spu.cur_ctile_status == TILE_STATUS_DIRTY) { /* tile was modified and needs to be written back */ - put_tile(tx, ty, &ctile, TAG_WRITE_TILE_COLOR, 0); - cur_tile_status_c = TILE_STATUS_DEFINED; + put_tile(tx, ty, &spu.ctile, TAG_WRITE_TILE_COLOR, 0); + spu.cur_ctile_status = TILE_STATUS_DEFINED; } - else if (cur_tile_status_c == TILE_STATUS_GETTING) { + else if (spu.cur_ctile_status == TILE_STATUS_GETTING) { /* tile was never used */ - cur_tile_status_c = TILE_STATUS_DEFINED; + spu.cur_ctile_status = TILE_STATUS_DEFINED; } } @@ -250,8 +250,8 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) if (!my_tile(tx, ty)) continue; - cur_tile_status_c = tile_status[ty][tx]; - cur_tile_status_z = tile_status_z[ty][tx]; + spu.cur_ctile_status = spu.ctile_status[ty][tx]; + spu.cur_ztile_status = spu.ztile_status[ty][tx]; get_cz_tiles(tx, ty); @@ -275,8 +275,8 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) wait_put_cz_tiles(); /* XXX seems unnecessary... */ - tile_status[ty][tx] = cur_tile_status_c; - tile_status_z[ty][tx] = cur_tile_status_z; + spu.ctile_status[ty][tx] = spu.cur_ctile_status; + spu.ztile_status[ty][tx] = spu.cur_ztile_status; } if (Debug) diff --git a/src/mesa/pipe/cell/spu/spu_tile.c b/src/mesa/pipe/cell/spu/spu_tile.c index fd65c2b49c..12dc246328 100644 --- a/src/mesa/pipe/cell/spu/spu_tile.c +++ b/src/mesa/pipe/cell/spu/spu_tile.c @@ -28,16 +28,7 @@ #include "spu_tile.h" - - - -tile_t ctile ALIGN16_ATTRIB; -tile_t ztile ALIGN16_ATTRIB; - -ubyte tile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; -ubyte tile_status_z[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; - -ubyte cur_tile_status_c, cur_tile_status_z; +#include "spu_main.h" void diff --git a/src/mesa/pipe/cell/spu/spu_tile.h b/src/mesa/pipe/cell/spu/spu_tile.h index 85a0d55807..e53340a55a 100644 --- a/src/mesa/pipe/cell/spu/spu_tile.h +++ b/src/mesa/pipe/cell/spu/spu_tile.h @@ -35,33 +35,6 @@ #include "pipe/cell/common.h" -#define MAX_WIDTH 1024 -#define MAX_HEIGHT 1024 - - -typedef union { - ushort us[TILE_SIZE][TILE_SIZE]; - uint ui[TILE_SIZE][TILE_SIZE]; - vector unsigned short us8[TILE_SIZE/2][TILE_SIZE/4]; - vector unsigned int ui4[TILE_SIZE/2][TILE_SIZE/2]; -} tile_t; - - -extern tile_t ctile ALIGN16_ATTRIB; -extern tile_t ztile ALIGN16_ATTRIB; - - -#define TILE_STATUS_CLEAR 1 -#define TILE_STATUS_DEFINED 2 /**< defined in FB, but not in local store */ -#define TILE_STATUS_CLEAN 3 /**< in local store, but not changed */ -#define TILE_STATUS_DIRTY 4 /**< modified locally, but not put back yet */ -#define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */ - -extern ubyte tile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; -extern ubyte tile_status_z[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; - -extern ubyte cur_tile_status_c, cur_tile_status_z; - void get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf); diff --git a/src/mesa/pipe/cell/spu/spu_tri.c b/src/mesa/pipe/cell/spu/spu_tri.c index ae8fd17cc6..6f61a3d816 100644 --- a/src/mesa/pipe/cell/spu/spu_tri.c +++ b/src/mesa/pipe/cell/spu/spu_tri.c @@ -283,21 +283,21 @@ do_depth_test(int x, int y, mask_t quadmask) zvals.v = eval_z((float) x, (float) y); - if (cur_tile_status_c == TILE_STATUS_CLEAR) { + if (spu.cur_ctile_status == TILE_STATUS_CLEAR) { /* now, _really_ clear the tile */ - clear_z_tile(&ztile); - cur_tile_status_z = TILE_STATUS_DIRTY; + clear_z_tile(&spu.ztile); + spu.cur_ztile_status = TILE_STATUS_DIRTY; } if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) { int ix = (x - setup.cliprect_minx) / 4; int iy = (y - setup.cliprect_miny) / 2; - mask = spu_z16_test_less(zvals.v, &ztile.us8[iy][ix], x>>1, quadmask); + mask = spu_z16_test_less(zvals.v, &spu.ztile.us8[iy][ix], x>>1, quadmask); } else { int ix = (x - setup.cliprect_minx) / 2; int iy = (y - setup.cliprect_miny) / 2; - mask = spu_z32_test_less(zvals.v, &ztile.ui4[iy][ix], quadmask); + mask = spu_z32_test_less(zvals.v, &spu.ztile.ui4[iy][ix], quadmask); } return mask; } @@ -341,25 +341,25 @@ emit_quad( int x, int y, mask_t mask ) pack_colors(colors, fcolors); } - if (cur_tile_status_c == TILE_STATUS_CLEAR) { + if (spu.cur_ctile_status == TILE_STATUS_CLEAR) { /* now, _really_ clear the tile */ - clear_c_tile(&ctile); + clear_c_tile(&spu.ctile); } - cur_tile_status_c = TILE_STATUS_DIRTY; + spu.cur_ctile_status = TILE_STATUS_DIRTY; #if 1 if (spu_extract(mask, 0)) - ctile.ui[iy][ix] = colors[QUAD_TOP_LEFT]; + spu.ctile.ui[iy][ix] = colors[QUAD_TOP_LEFT]; if (spu_extract(mask, 1)) - ctile.ui[iy][ix+1] = colors[QUAD_TOP_RIGHT]; + spu.ctile.ui[iy][ix+1] = colors[QUAD_TOP_RIGHT]; if (spu_extract(mask, 2)) - ctile.ui[iy+1][ix] = colors[QUAD_BOTTOM_LEFT]; + spu.ctile.ui[iy+1][ix] = colors[QUAD_BOTTOM_LEFT]; if (spu_extract(mask, 3)) - ctile.ui[iy+1][ix+1] = colors[QUAD_BOTTOM_RIGHT]; + spu.ctile.ui[iy+1][ix+1] = colors[QUAD_BOTTOM_RIGHT]; #else /* SIMD_Z with swizzled color buffer (someday) */ vector unsigned int uicolors = *((vector unsigned int *) &colors); - ctile.ui4[iy/2][ix/2] = spu_sel(ctile.ui4[iy/2][ix/2], uicolors, mask); + spu.ctile.ui4[iy/2][ix/2] = spu_sel(spu.ctile.ui4[iy/2][ix/2], uicolors, mask); #endif } @@ -846,21 +846,21 @@ tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty) /* init_constant_attribs( setup ); */ - if (cur_tile_status_c == TILE_STATUS_GETTING) { + if (spu.cur_ctile_status == TILE_STATUS_GETTING) { /* wait for mfc_get() to complete */ wait_on_mask(1 << TAG_READ_TILE_COLOR); - cur_tile_status_c = TILE_STATUS_CLEAN; + spu.cur_ctile_status = TILE_STATUS_CLEAN; } - ASSERT(cur_tile_status_c != TILE_STATUS_DEFINED); + ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED); if (spu.depth_stencil.depth.enabled) { - if (cur_tile_status_z == TILE_STATUS_GETTING) { + if (spu.cur_ztile_status == TILE_STATUS_GETTING) { /* wait for mfc_get() to complete */ wait_on_mask(1 << TAG_READ_TILE_Z); - cur_tile_status_z = TILE_STATUS_CLEAN; + spu.cur_ztile_status = TILE_STATUS_CLEAN; } - ASSERT(cur_tile_status_z != TILE_STATUS_DEFINED); + ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED); } -- cgit v1.2.3 From 6023311c7ce336f727d7aa6d5266e88a55b88d36 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 4 Feb 2008 08:46:44 -0700 Subject: Cell: clamp txmax, tymax in tile_bounding_box() Also, added some debug printfs --- src/mesa/pipe/cell/spu/spu_render.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/mesa/pipe/cell/spu/spu_render.c') diff --git a/src/mesa/pipe/cell/spu/spu_render.c b/src/mesa/pipe/cell/spu/spu_render.c index ab711d67fe..e8705eeeba 100644 --- a/src/mesa/pipe/cell/spu/spu_render.c +++ b/src/mesa/pipe/cell/spu/spu_render.c @@ -65,6 +65,10 @@ tile_bounding_box(const struct cell_command_render *render, *tymin = (uint) render->ymin / TILE_SIZE; txmax = (uint) render->xmax / TILE_SIZE; tymax = (uint) render->ymax / TILE_SIZE; + if (txmax >= spu.fb.width_tiles) + txmax = spu.fb.width_tiles-1; + if (tymax >= spu.fb.height_tiles) + tymax = spu.fb.height_tiles-1; *box_width_tiles = txmax - *txmin + 1; box_height_tiles = tymax - *tymin + 1; *box_num_tiles = *box_width_tiles * box_height_tiles; @@ -96,12 +100,14 @@ get_cz_tiles(uint tx, uint ty) { if (spu.depth_stencil.depth.enabled) { if (spu.cur_ztile_status != TILE_STATUS_CLEAR) { + //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty); get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1); spu.cur_ztile_status = TILE_STATUS_GETTING; } } if (spu.cur_ctile_status != TILE_STATUS_CLEAR) { + //printf("SPU %u: getting C tile %u, %u\n", spu.init.id, tx, ty); get_tile(tx, ty, &spu.ctile, TAG_READ_TILE_COLOR, 0); spu.cur_ctile_status = TILE_STATUS_GETTING; } @@ -116,22 +122,26 @@ put_cz_tiles(uint tx, uint ty) { if (spu.cur_ztile_status == TILE_STATUS_DIRTY) { /* tile was modified and needs to be written back */ + //printf("SPU %u: put dirty Z tile %u, %u\n", spu.init.id, tx, ty); put_tile(tx, ty, &spu.ztile, TAG_WRITE_TILE_Z, 1); spu.cur_ztile_status = TILE_STATUS_DEFINED; } else if (spu.cur_ztile_status == TILE_STATUS_GETTING) { /* tile was never used */ spu.cur_ztile_status = TILE_STATUS_DEFINED; + //printf("SPU %u: put getting Z tile %u, %u\n", spu.init.id, tx, ty); } if (spu.cur_ctile_status == TILE_STATUS_DIRTY) { /* tile was modified and needs to be written back */ + //printf("SPU %u: put dirty C tile %u, %u\n", spu.init.id, tx, ty); put_tile(tx, ty, &spu.ctile, TAG_WRITE_TILE_COLOR, 0); spu.cur_ctile_status = TILE_STATUS_DEFINED; } else if (spu.cur_ctile_status == TILE_STATUS_GETTING) { /* tile was never used */ spu.cur_ctile_status = TILE_STATUS_DEFINED; + //printf("SPU %u: put getting C tile %u, %u\n", spu.init.id, tx, ty); } } -- cgit v1.2.3 From 4da82fd5c5e0a7535e30aa81f08dcbe1a26358b7 Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 5 Feb 2008 14:23:34 -0700 Subject: Cell: re-enable inlined vertex buffers Vertex data must be on a 16-byte address/offset so SIMD operations will work properly in the SPU code. --- src/mesa/pipe/cell/ppu/cell_vbuf.c | 12 +++++------- src/mesa/pipe/cell/spu/spu_main.c | 3 ++- src/mesa/pipe/cell/spu/spu_render.c | 12 ++++++++---- 3 files changed, 15 insertions(+), 12 deletions(-) (limited to 'src/mesa/pipe/cell/spu/spu_render.c') diff --git a/src/mesa/pipe/cell/ppu/cell_vbuf.c b/src/mesa/pipe/cell/ppu/cell_vbuf.c index 0fee61821a..e9fafe492e 100644 --- a/src/mesa/pipe/cell/ppu/cell_vbuf.c +++ b/src/mesa/pipe/cell/ppu/cell_vbuf.c @@ -40,7 +40,7 @@ /** Allow vertex data to be inlined after RENDER command */ -#define ALLOW_INLINE_VERTS 0 +#define ALLOW_INLINE_VERTS 1 /** @@ -199,9 +199,7 @@ cell_vbuf_draw(struct vbuf_render *vbr, { const uint index_bytes = ROUNDUP8(nr_indices * 2); const uint vertex_bytes = nr_vertices * 4 * cell->vertex_info.size; - - const uint batch_size = sizeof(struct cell_command_render) - + index_bytes; + const uint batch_size = sizeof(struct cell_command_render) + index_bytes; struct cell_command_render *render = (struct cell_command_render *) @@ -223,9 +221,9 @@ cell_vbuf_draw(struct vbuf_render *vbr, render->num_verts = nr_vertices; if (ALLOW_INLINE_VERTS && min_index == 0 && - vertex_bytes <= cell_batch_free_space(cell)) { - /* vertex data inlined, after indices */ - void *dst = cell_batch_alloc(cell, vertex_bytes); + vertex_bytes + 16 <= cell_batch_free_space(cell)) { + /* vertex data inlined, after indices, at 16-byte boundary */ + void *dst = cell_batch_alloc_aligned(cell, vertex_bytes, 16); memcpy(dst, vertices, vertex_bytes); render->inline_verts = TRUE; render->vertex_buf = ~0; diff --git a/src/mesa/pipe/cell/spu/spu_main.c b/src/mesa/pipe/cell/spu/spu_main.c index 4f126d5e5b..e375197fe6 100644 --- a/src/mesa/pipe/cell/spu/spu_main.c +++ b/src/mesa/pipe/cell/spu/spu_main.c @@ -387,7 +387,7 @@ cmd_batch(uint opcode) = (struct cell_command_render *) &buffer[pos]; uint pos_incr; cmd_render(render, &pos_incr); - pos += sizeof(*render) / 8 + ((pos_incr + 1) / 2); + pos += pos_incr; } break; case CELL_CMD_RELEASE_VERTS: @@ -541,6 +541,7 @@ main(main_param_t speid, main_param_t argp) (void) speid; ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4); + ASSERT(sizeof(struct cell_command_render) % 8 == 0); one_time_init(); diff --git a/src/mesa/pipe/cell/spu/spu_render.c b/src/mesa/pipe/cell/spu/spu_render.c index e8705eeeba..932fb500b3 100644 --- a/src/mesa/pipe/cell/spu/spu_render.c +++ b/src/mesa/pipe/cell/spu/spu_render.c @@ -171,6 +171,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB; const uint vertex_size = render->vertex_size; /* in bytes */ /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; + uint index_bytes; const ubyte *vertices; const ushort *indexes; uint i, j; @@ -199,13 +200,16 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) /* indexes are right after the render command in the batch buffer */ indexes = (const ushort *) (render + 1); - *pos_incr = (render->num_indexes * 2 + 3) / 4; + index_bytes = ROUNDUP8(render->num_indexes * 2); + *pos_incr = index_bytes / 8 + sizeof(*render) / 8; if (render->inline_verts) { - /* Vertices are right after indexes in batch buffer */ - vertices = (const ubyte *) (render + 1) + *pos_incr * 4; - *pos_incr = *pos_incr + total_vertex_bytes / 4; + /* Vertices are after indexes in batch buffer at next 16-byte addr */ + vertices = (const ubyte *) render + (*pos_incr * 8); + vertices = (const ubyte *) align_pointer((void *) vertices, 16); + ASSERT_ALIGN16(vertices); + *pos_incr = ((vertices + total_vertex_bytes) - (ubyte *) render) / 8; } else { /* Begin DMA fetch of vertex buffer */ -- cgit v1.2.3