summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/cell/ppu
diff options
context:
space:
mode:
authorRobert Ellison <papillo@tungstengraphics.com>2008-11-21 11:42:14 -0700
committerRobert Ellison <papillo@tungstengraphics.com>2008-11-21 11:42:35 -0700
commit11fc390f6478526d4f0bdb4b7e628284da31b3b9 (patch)
tree5fd526db7370668cf4cf1c36844fa5eac705cb2d /src/gallium/drivers/cell/ppu
parent81aa678ce8f4a1f7c75b928ba2b107908959d50d (diff)
CELL: use variant-length fragment ops programs
This is a set of changes that optimizes the memory use of fragment operation programs (by using and transmitting only as much memory as is needed for the fragment ops programs, instead of maximal sizes), as well as eliminate the dependency on hard-coded maximal program sizes. State that is not dependent on fragment facing (i.e. that isn't using two-sided stenciling) will only save and transmit a single fragment operation program, instead of two identical programs. - Added the ability to emit a LNOP (No Operation (Load)) instruction. This is used to pad the generated fragment operations programs to a multiple of 8 bytes, which is necessary for proper operation of the dual instruction pipeline, and also required for proper SPU-side decoding. - Added the ability to allocate and manage a variant-length struct cell_command_fragment_ops. This structure now puts the generated function field at the end, where it can be as large as necessary. - On the PPU side, we now combine the generated front-facing and back-facing code into a single variant-length buffer (and only use one if the two sets of code are identical) for transmission to the SPU. - On the SPU side, we pull the correct sizes out of the buffer, allocate a new code buffer if the one we have isn't large enough, and save the code to that buffer. The buffer is deallocated when the SPU exits. - Commented out the emit_fetch() static function, which was not being used.
Diffstat (limited to 'src/gallium/drivers/cell/ppu')
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fragment.c7
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_emit.c77
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vertex_fetch.c3
3 files changed, 74 insertions, 13 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
index 82336d6635..2c64eb1bcc 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
@@ -1776,7 +1776,10 @@ gen_stencil_depth_test(struct spe_function *f,
* \param cell the rendering context (in)
* \param facing whether the generated code is for front-facing or
* back-facing fragments
- * \param f the generated function (out)
+ * \param f the generated function (in/out); on input, the function
+ * must already have been initialized. On exit, whatever
+ * instructions within the generated function have had
+ * the fragment ops appended.
*/
void
cell_gen_fragment_function(struct cell_context *cell, const uint facing, struct spe_function *f)
@@ -1808,8 +1811,6 @@ cell_gen_fragment_function(struct cell_context *cell, const uint facing, struct
int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */
int fbZS_reg; /**< framebuffer's combined z/stencil values for quad */
- spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
-
if (cell->debug_flags & CELL_DEBUG_ASM) {
spe_print_code(f, true);
spe_indent(f, 8);
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c
index 031b27f11f..0a0af81f53 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_emit.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c
@@ -76,30 +76,86 @@ lookup_fragment_ops(struct cell_context *cell)
*/
if (!ops) {
struct spe_function spe_code_front, spe_code_back;
+ unsigned int facing_dependent, total_code_size;
if (0)
debug_printf("**** Create New Fragment Ops\n");
- /* Prepare the buffer that will hold the generated code. */
- spe_init_func(&spe_code_front, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
- spe_init_func(&spe_code_back, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
+ /* Prepare the buffer that will hold the generated code. The
+ * "0" passed in for the size means that the SPE code will
+ * use a default size.
+ */
+ spe_init_func(&spe_code_front, 0);
+ spe_init_func(&spe_code_back, 0);
- /* generate new code. Always generate new code for both front-facing
+ /* Generate new code. Always generate new code for both front-facing
* and back-facing fragments, even if it's the same code in both
* cases.
*/
cell_gen_fragment_function(cell, CELL_FACING_FRONT, &spe_code_front);
cell_gen_fragment_function(cell, CELL_FACING_BACK, &spe_code_back);
- /* alloc new fragment ops command */
- ops = CALLOC_STRUCT(cell_command_fragment_ops);
+ /* Make sure the code is a multiple of 8 bytes long; this is
+ * required to ensure that the dual pipe instruction alignment
+ * is correct. It's also important for the SPU unpacking,
+ * which assumes 8-byte boundaries.
+ */
+ unsigned int front_code_size = spe_code_size(&spe_code_front);
+ while (front_code_size % 8 != 0) {
+ spe_lnop(&spe_code_front);
+ front_code_size = spe_code_size(&spe_code_front);
+ }
+ unsigned int back_code_size = spe_code_size(&spe_code_back);
+ while (back_code_size % 8 != 0) {
+ spe_lnop(&spe_code_back);
+ back_code_size = spe_code_size(&spe_code_back);
+ }
+ /* Determine whether the code we generated is facing-dependent, by
+ * determining whether the generated code is different for the front-
+ * and back-facing fragments.
+ */
+ if (front_code_size == back_code_size && memcmp(spe_code_front.store, spe_code_back.store, front_code_size) == 0) {
+ /* Code is identical; only need one copy. */
+ facing_dependent = 0;
+ total_code_size = front_code_size;
+ }
+ else {
+ /* Code is different for front-facing and back-facing fragments.
+ * Need to send both copies.
+ */
+ facing_dependent = 1;
+ total_code_size = front_code_size + back_code_size;
+ }
+
+ /* alloc new fragment ops command. Note that this structure
+ * has variant length based on the total code size required.
+ */
+ ops = CALLOC_VARIANT_LENGTH_STRUCT(cell_command_fragment_ops, total_code_size);
/* populate the new cell_command_fragment_ops object */
ops->opcode = CELL_CMD_STATE_FRAGMENT_OPS;
- memcpy(ops->code_front, spe_code_front.store, spe_code_size(&spe_code_front));
- memcpy(ops->code_back, spe_code_back.store, spe_code_size(&spe_code_back));
+ ops->total_code_size = total_code_size;
+ ops->front_code_index = 0;
+ memcpy(ops->code, spe_code_front.store, front_code_size);
+ if (facing_dependent) {
+ /* We have separate front- and back-facing code. Append the
+ * back-facing code to the buffer. Be careful because the code
+ * size is in bytes, but the buffer is of unsigned elements.
+ */
+ ops->back_code_index = front_code_size / sizeof(spe_code_front.store[0]);
+ memcpy(ops->code + ops->back_code_index, spe_code_back.store, back_code_size);
+ }
+ else {
+ /* Use the same code for front- and back-facing fragments */
+ ops->back_code_index = ops->front_code_index;
+ }
+
+ /* Set the fields for the fallback case. Note that these fields
+ * (and the whole fallback case) will eventually go away.
+ */
ops->dsa = *cell->depth_stencil;
ops->blend = *cell->blend;
+ ops->blend_color = cell->blend_color;
/* insert cell_command_fragment_ops object into keymap/cache */
util_keymap_insert(cell->fragment_ops_cache, &key, ops, NULL);
@@ -200,9 +256,10 @@ cell_emit_state(struct cell_context *cell)
CELL_NEW_DEPTH_STENCIL |
CELL_NEW_BLEND)) {
struct cell_command_fragment_ops *fops, *fops_cmd;
- fops_cmd = cell_batch_alloc(cell, sizeof(*fops_cmd));
+ /* Note that cell_command_fragment_ops is a variant-sized record */
fops = lookup_fragment_ops(cell);
- memcpy(fops_cmd, fops, sizeof(*fops));
+ fops_cmd = cell_batch_alloc(cell, sizeof(*fops_cmd) + fops->total_code_size);
+ memcpy(fops_cmd, fops, sizeof(*fops) + fops->total_code_size);
}
if (cell->dirty & CELL_NEW_SAMPLER) {
diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
index 18969005b0..9cba537d9e 100644
--- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
+++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
@@ -145,6 +145,8 @@ emit_matrix_transpose(struct spe_function *p,
}
+#if 0
+/* This appears to not be used currently */
static void
emit_fetch(struct spe_function *p,
unsigned in_ptr, unsigned *offset,
@@ -256,6 +258,7 @@ emit_fetch(struct spe_function *p,
spe_release_register(p, float_one);
}
}
+#endif
void cell_update_vertex_fetch(struct draw_context *draw)