diff options
author | Robert Ellison <papillo@tungstengraphics.com> | 2008-11-21 11:42:14 -0700 |
---|---|---|
committer | Robert Ellison <papillo@tungstengraphics.com> | 2008-11-21 11:42:35 -0700 |
commit | 11fc390f6478526d4f0bdb4b7e628284da31b3b9 (patch) | |
tree | 5fd526db7370668cf4cf1c36844fa5eac705cb2d /src/gallium/drivers/cell/ppu | |
parent | 81aa678ce8f4a1f7c75b928ba2b107908959d50d (diff) |
CELL: use variant-length fragment ops programs
This is a set of changes that optimizes the memory use of fragment
operation programs (by using and transmitting only as much memory as is
needed for the fragment ops programs, instead of maximal sizes), as well
as eliminate the dependency on hard-coded maximal program sizes. State
that is not dependent on fragment facing (i.e. that isn't using
two-sided stenciling) will only save and transmit a single
fragment operation program, instead of two identical programs.
- Added the ability to emit a LNOP (No Operation (Load)) instruction.
This is used to pad the generated fragment operations programs to
a multiple of 8 bytes, which is necessary for proper operation of
the dual instruction pipeline, and also required for proper SPU-side
decoding.
- Added the ability to allocate and manage a variant-length
struct cell_command_fragment_ops. This structure now puts the
generated function field at the end, where it can be as large
as necessary.
- On the PPU side, we now combine the generated front-facing and
back-facing code into a single variant-length buffer (and only use one
if the two sets of code are identical) for transmission to the SPU.
- On the SPU side, we pull the correct sizes out of the buffer,
allocate a new code buffer if the one we have isn't large enough,
and save the code to that buffer. The buffer is deallocated when
the SPU exits.
- Commented out the emit_fetch() static function, which was not being used.
Diffstat (limited to 'src/gallium/drivers/cell/ppu')
-rw-r--r-- | src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 7 | ||||
-rw-r--r-- | src/gallium/drivers/cell/ppu/cell_state_emit.c | 77 | ||||
-rw-r--r-- | src/gallium/drivers/cell/ppu/cell_vertex_fetch.c | 3 |
3 files changed, 74 insertions, 13 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 82336d6635..2c64eb1bcc 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -1776,7 +1776,10 @@ gen_stencil_depth_test(struct spe_function *f, * \param cell the rendering context (in) * \param facing whether the generated code is for front-facing or * back-facing fragments - * \param f the generated function (out) + * \param f the generated function (in/out); on input, the function + * must already have been initialized. On exit, whatever + * instructions within the generated function have had + * the fragment ops appended. */ void cell_gen_fragment_function(struct cell_context *cell, const uint facing, struct spe_function *f) @@ -1808,8 +1811,6 @@ cell_gen_fragment_function(struct cell_context *cell, const uint facing, struct int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */ int fbZS_reg; /**< framebuffer's combined z/stencil values for quad */ - spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); - if (cell->debug_flags & CELL_DEBUG_ASM) { spe_print_code(f, true); spe_indent(f, 8); diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 031b27f11f..0a0af81f53 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -76,30 +76,86 @@ lookup_fragment_ops(struct cell_context *cell) */ if (!ops) { struct spe_function spe_code_front, spe_code_back; + unsigned int facing_dependent, total_code_size; if (0) debug_printf("**** Create New Fragment Ops\n"); - /* Prepare the buffer that will hold the generated code. */ - spe_init_func(&spe_code_front, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); - spe_init_func(&spe_code_back, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); + /* Prepare the buffer that will hold the generated code. The + * "0" passed in for the size means that the SPE code will + * use a default size. + */ + spe_init_func(&spe_code_front, 0); + spe_init_func(&spe_code_back, 0); - /* generate new code. Always generate new code for both front-facing + /* Generate new code. Always generate new code for both front-facing * and back-facing fragments, even if it's the same code in both * cases. */ cell_gen_fragment_function(cell, CELL_FACING_FRONT, &spe_code_front); cell_gen_fragment_function(cell, CELL_FACING_BACK, &spe_code_back); - /* alloc new fragment ops command */ - ops = CALLOC_STRUCT(cell_command_fragment_ops); + /* Make sure the code is a multiple of 8 bytes long; this is + * required to ensure that the dual pipe instruction alignment + * is correct. It's also important for the SPU unpacking, + * which assumes 8-byte boundaries. + */ + unsigned int front_code_size = spe_code_size(&spe_code_front); + while (front_code_size % 8 != 0) { + spe_lnop(&spe_code_front); + front_code_size = spe_code_size(&spe_code_front); + } + unsigned int back_code_size = spe_code_size(&spe_code_back); + while (back_code_size % 8 != 0) { + spe_lnop(&spe_code_back); + back_code_size = spe_code_size(&spe_code_back); + } + /* Determine whether the code we generated is facing-dependent, by + * determining whether the generated code is different for the front- + * and back-facing fragments. + */ + if (front_code_size == back_code_size && memcmp(spe_code_front.store, spe_code_back.store, front_code_size) == 0) { + /* Code is identical; only need one copy. */ + facing_dependent = 0; + total_code_size = front_code_size; + } + else { + /* Code is different for front-facing and back-facing fragments. + * Need to send both copies. + */ + facing_dependent = 1; + total_code_size = front_code_size + back_code_size; + } + + /* alloc new fragment ops command. Note that this structure + * has variant length based on the total code size required. + */ + ops = CALLOC_VARIANT_LENGTH_STRUCT(cell_command_fragment_ops, total_code_size); /* populate the new cell_command_fragment_ops object */ ops->opcode = CELL_CMD_STATE_FRAGMENT_OPS; - memcpy(ops->code_front, spe_code_front.store, spe_code_size(&spe_code_front)); - memcpy(ops->code_back, spe_code_back.store, spe_code_size(&spe_code_back)); + ops->total_code_size = total_code_size; + ops->front_code_index = 0; + memcpy(ops->code, spe_code_front.store, front_code_size); + if (facing_dependent) { + /* We have separate front- and back-facing code. Append the + * back-facing code to the buffer. Be careful because the code + * size is in bytes, but the buffer is of unsigned elements. + */ + ops->back_code_index = front_code_size / sizeof(spe_code_front.store[0]); + memcpy(ops->code + ops->back_code_index, spe_code_back.store, back_code_size); + } + else { + /* Use the same code for front- and back-facing fragments */ + ops->back_code_index = ops->front_code_index; + } + + /* Set the fields for the fallback case. Note that these fields + * (and the whole fallback case) will eventually go away. + */ ops->dsa = *cell->depth_stencil; ops->blend = *cell->blend; + ops->blend_color = cell->blend_color; /* insert cell_command_fragment_ops object into keymap/cache */ util_keymap_insert(cell->fragment_ops_cache, &key, ops, NULL); @@ -200,9 +256,10 @@ cell_emit_state(struct cell_context *cell) CELL_NEW_DEPTH_STENCIL | CELL_NEW_BLEND)) { struct cell_command_fragment_ops *fops, *fops_cmd; - fops_cmd = cell_batch_alloc(cell, sizeof(*fops_cmd)); + /* Note that cell_command_fragment_ops is a variant-sized record */ fops = lookup_fragment_ops(cell); - memcpy(fops_cmd, fops, sizeof(*fops)); + fops_cmd = cell_batch_alloc(cell, sizeof(*fops_cmd) + fops->total_code_size); + memcpy(fops_cmd, fops, sizeof(*fops) + fops->total_code_size); } if (cell->dirty & CELL_NEW_SAMPLER) { diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c index 18969005b0..9cba537d9e 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -145,6 +145,8 @@ emit_matrix_transpose(struct spe_function *p, } +#if 0 +/* This appears to not be used currently */ static void emit_fetch(struct spe_function *p, unsigned in_ptr, unsigned *offset, @@ -256,6 +258,7 @@ emit_fetch(struct spe_function *p, spe_release_register(p, float_one); } } +#endif void cell_update_vertex_fetch(struct draw_context *draw) |