summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/cell/spu
diff options
context:
space:
mode:
authorRobert Ellison <papillo@tungstengraphics.com>2008-11-21 11:42:14 -0700
committerRobert Ellison <papillo@tungstengraphics.com>2008-11-21 11:42:35 -0700
commit11fc390f6478526d4f0bdb4b7e628284da31b3b9 (patch)
tree5fd526db7370668cf4cf1c36844fa5eac705cb2d /src/gallium/drivers/cell/spu
parent81aa678ce8f4a1f7c75b928ba2b107908959d50d (diff)
CELL: use variant-length fragment ops programs
This is a set of changes that optimizes the memory use of fragment operation programs (by using and transmitting only as much memory as is needed for the fragment ops programs, instead of maximal sizes), as well as eliminate the dependency on hard-coded maximal program sizes. State that is not dependent on fragment facing (i.e. that isn't using two-sided stenciling) will only save and transmit a single fragment operation program, instead of two identical programs. - Added the ability to emit a LNOP (No Operation (Load)) instruction. This is used to pad the generated fragment operations programs to a multiple of 8 bytes, which is necessary for proper operation of the dual instruction pipeline, and also required for proper SPU-side decoding. - Added the ability to allocate and manage a variant-length struct cell_command_fragment_ops. This structure now puts the generated function field at the end, where it can be as large as necessary. - On the PPU side, we now combine the generated front-facing and back-facing code into a single variant-length buffer (and only use one if the two sets of code are identical) for transmission to the SPU. - On the SPU side, we pull the correct sizes out of the buffer, allocate a new code buffer if the one we have isn't large enough, and save the code to that buffer. The buffer is deallocated when the SPU exits. - Commented out the emit_fetch() static function, which was not being used.
Diffstat (limited to 'src/gallium/drivers/cell/spu')
-rw-r--r--src/gallium/drivers/cell/spu/spu_command.c111
-rw-r--r--src/gallium/drivers/cell/spu/spu_command.h32
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.c15
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.h4
4 files changed, 120 insertions, 42 deletions
diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c
index d5faf4e3aa..8500d19754 100644
--- a/src/gallium/drivers/cell/spu/spu_command.c
+++ b/src/gallium/drivers/cell/spu/spu_command.c
@@ -210,45 +210,72 @@ cmd_release_verts(const struct cell_command_release_verts *release)
static void
cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
{
- static int warned = 0;
-
D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n");
- /* Copy SPU code from batch buffer to spu buffer */
- memcpy(spu.fragment_ops_code_front, fops->code_front, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
- memcpy(spu.fragment_ops_code_back, fops->code_back, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
- /* Copy state info (for fallback case only) */
+
+ /* Copy state info (for fallback case only - this will eventually
+ * go away when the fallback case goes away)
+ */
memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
memcpy(&spu.blend_color, &fops->blend_color, sizeof(fops->blend_color));
- /* Parity twist! For now, always use the fallback code by default,
- * only switching to codegen when specifically requested. This
- * allows us to develop freely without risking taking down the
- * branch.
- *
- * Later, the parity of this check will be reversed, so that
- * codegen is *always* used, unless we specifically indicate that
- * we don't want it.
- *
- * Eventually, the option will be removed completely, because in
- * final code we'll always use codegen and won't even provide the
- * raw state records that the fallback code requires.
+ /* Make sure the SPU knows which buffers it's expected to read when
+ * it's told to pull tiles.
*/
- if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) == 0) {
- spu.fragment_ops[CELL_FACING_FRONT] = (spu_fragment_ops_func) spu.fragment_ops_code_front;
- spu.fragment_ops[CELL_FACING_BACK] = (spu_fragment_ops_func) spu.fragment_ops_code_back;
- }
- else {
- /* otherwise, the default fallback code remains in place */
+ spu.read_depth_stencil = (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled);
+
+ /* If we're forcing the fallback code to be used (for debug purposes),
+ * install that. Otherwise install the incoming SPU code.
+ */
+ if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) != 0) {
+ static unsigned int warned = 0;
if (!warned) {
fprintf(stderr, "Cell Warning: using fallback per-fragment code\n");
warned = 1;
}
+ /* The following two lines aren't really necessary if you
+ * know the debug flags won't change during a run, and if you
+ * know that the function pointers are initialized correctly.
+ * We set them here to allow a person to change the debug
+ * flags during a run (from inside a debugger).
+ */
+ spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
+ spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
+ return;
}
- spu.read_depth_stencil = (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled);
-}
+ /* Make sure the SPU code buffer is large enough to hold the incoming code.
+ * Note that we *don't* use align_malloc() and align_free(), because
+ * those utility functions are *not* available in SPU code.
+ * */
+ if (spu.fragment_ops_code_size < fops->total_code_size) {
+ if (spu.fragment_ops_code != NULL) {
+ free(spu.fragment_ops_code);
+ }
+ spu.fragment_ops_code_size = fops->total_code_size;
+ spu.fragment_ops_code = malloc(fops->total_code_size);
+ if (spu.fragment_ops_code == NULL) {
+ /* Whoops. */
+ fprintf(stderr, "CELL Warning: failed to allocate fragment ops code (%d bytes) - using fallback\n", fops->total_code_size);
+ spu.fragment_ops_code = NULL;
+ spu.fragment_ops_code_size = 0;
+ spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
+ spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
+ return;
+ }
+ }
+ /* Copy the SPU code from the command buffer to the spu buffer */
+ memcpy(spu.fragment_ops_code, fops->code, fops->total_code_size);
+
+ /* Set the pointers for the front-facing and back-facing fragments
+ * to the specified offsets within the code. Note that if the
+ * front-facing and back-facing code are the same, they'll have
+ * the same offset.
+ */
+ spu.fragment_ops[CELL_FACING_FRONT] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->front_code_index];
+ spu.fragment_ops[CELL_FACING_BACK] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->back_code_index];
+}
static void
cmd_state_fragment_program(const struct cell_command_fragment_program *fp)
@@ -588,7 +615,8 @@ cmd_batch(uint opcode)
struct cell_command_fragment_ops *fops
= (struct cell_command_fragment_ops *) &buffer[pos];
cmd_state_fragment_ops(fops);
- pos += sizeof(*fops) / 8;
+ /* This is a variant-sized command */
+ pos += (sizeof(*fops) + fops->total_code_size)/ 8;
}
break;
case CELL_CMD_STATE_FRAGMENT_PROGRAM:
@@ -756,3 +784,32 @@ command_loop(void)
if (spu.init.debug_flags & CELL_DEBUG_CACHE)
spu_dcache_report();
}
+
+/* Initialize this module; we manage the fragment ops buffer here. */
+void
+spu_command_init(void)
+{
+ /* Install default/fallback fragment processing function.
+ * This will normally be overriden by a code-gen'd function
+ * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set.
+ */
+ spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
+ spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
+
+ /* Set up the basic empty buffer for code-gen'ed fragment ops */
+ spu.fragment_ops_code = NULL;
+ spu.fragment_ops_code_size = 0;
+}
+
+void
+spu_command_close(void)
+{
+ /* Deallocate the code-gen buffer for fragment ops, and reset the
+ * fragment ops functions to their initial setting (just to leave
+ * things in a good state).
+ */
+ if (spu.fragment_ops_code != NULL) {
+ free(spu.fragment_ops_code);
+ }
+ spu_command_init();
+}
diff --git a/src/gallium/drivers/cell/spu/spu_command.h b/src/gallium/drivers/cell/spu/spu_command.h
index 853e9aa549..83dcdade28 100644
--- a/src/gallium/drivers/cell/spu/spu_command.h
+++ b/src/gallium/drivers/cell/spu/spu_command.h
@@ -1,7 +1,35 @@
-
-
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
extern void
command_loop(void);
+extern void
+spu_command_init(void);
+extern void
+spu_command_close(void);
diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c
index 7033f6037d..97c86d194d 100644
--- a/src/gallium/drivers/cell/spu/spu_main.c
+++ b/src/gallium/drivers/cell/spu/spu_main.c
@@ -58,17 +58,8 @@ one_time_init(void)
memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status));
memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status));
invalidate_tex_cache();
-
- /* Install default/fallback fragment processing function.
- * This will normally be overriden by a code-gen'd function
- * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set.
- */
- spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
- spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
}
-
-
/* In some versions of the SDK the SPE main takes 'unsigned long' as a
* parameter. In others it takes 'unsigned long long'. Use a define to
* select between the two.
@@ -91,11 +82,11 @@ main(main_param_t speid, main_param_t argp)
ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4);
ASSERT(sizeof(struct cell_command_render) % 8 == 0);
- ASSERT(((unsigned long) &spu.fragment_ops_code_front) % 8 == 0);
- ASSERT(((unsigned long) &spu.fragment_ops_code_back) % 8 == 0);
+ ASSERT(sizeof(struct cell_command_fragment_ops) % 8 == 0);
ASSERT(((unsigned long) &spu.fragment_program_code) % 8 == 0);
one_time_init();
+ spu_command_init();
D_PRINTF(CELL_DEBUG_CMD, "main() speid=%lu\n", (unsigned long) speid);
D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n");
@@ -120,5 +111,7 @@ main(main_param_t speid, main_param_t argp)
command_loop();
+ spu_command_close();
+
return 0;
}
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h
index 24cf7d77ce..33767e7c51 100644
--- a/src/gallium/drivers/cell/spu/spu_main.h
+++ b/src/gallium/drivers/cell/spu/spu_main.h
@@ -169,8 +169,8 @@ struct spu_global
ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
/** Current fragment ops machine code, at 8-byte boundary */
- uint fragment_ops_code_front[SPU_MAX_FRAGMENT_OPS_INSTS] ALIGN8_ATTRIB;
- uint fragment_ops_code_back[SPU_MAX_FRAGMENT_OPS_INSTS] ALIGN8_ATTRIB;
+ uint *fragment_ops_code;
+ uint fragment_ops_code_size;
/** Current fragment ops functions, 0 = frontfacing, 1 = backfacing */
spu_fragment_ops_func fragment_ops[2];