summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorBrian Paul <brian.paul@tungstengraphics.com>2008-10-28 18:21:03 -0600
committerBrian Paul <brian.paul@tungstengraphics.com>2008-10-28 18:21:03 -0600
commitf4e9526addc617dc78af9b1af781ffe09ce62504 (patch)
tree1d7fcfabf8b5188f8b7b6ad580535cc3c179ee68 /src
parent0a8590e3cf9e9f671405343bcd1dc756a7296fc3 (diff)
gallium: ppc: don't replicate/smear immediate values, use vspltw instruction as with constants
Diffstat (limited to 'src')
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_ppc.c8
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ppc.c22
2 files changed, 17 insertions, 13 deletions
diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c
index 8eff6d4fda..ff40263400 100644
--- a/src/gallium/auxiliary/draw/draw_vs_ppc.c
+++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c
@@ -54,7 +54,7 @@
typedef void (PIPE_CDECL *codegen_function) (float (*inputs)[4][4],
float (*outputs)[4][4],
float (*temps)[4][4],
- float (*immeds)[4][4],
+ float (*immeds)[4],
float (*consts)[4],
const float *builtins);
@@ -151,7 +151,7 @@ vs_ppc_run_linear( struct draw_vertex_shader *base,
output_stride );
#else
shader->func(inputs_soa, outputs_soa, temps_soa,
- (float (*)[4][4]) shader->base.immediates,
+ (float (*)[4]) shader->base.immediates,
(float (*)[4]) constants,
ppc_builtin_constants);
@@ -227,7 +227,7 @@ draw_create_vs_ppc(struct draw_context *draw,
vs->base.run_linear = vs_ppc_run_linear;
vs->base.delete = vs_ppc_delete;
- vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * 4 *
+ vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 *
sizeof(float), 16);
vs->machine = &draw->vs.machine;
@@ -236,7 +236,7 @@ draw_create_vs_ppc(struct draw_context *draw,
if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens,
&vs->ppc_program,
- (float (*)[4])vs->base.immediates,
+ (float (*)[4]) vs->base.immediates,
TRUE ))
goto fail;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index 0de9b972b4..dd574ac02a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -299,10 +299,18 @@ emit_fetch(struct gen_context *gen,
break;
case TGSI_FILE_IMMEDIATE:
{
- int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
+ int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4;
int offset_reg = emit_li_offset(gen, offset);
dst_vec = ppc_allocate_vec_register(gen->f);
- ppc_lvx(gen->f, dst_vec, gen->immed_reg, offset_reg);
+ /* Load 4-byte word into vector register.
+ * The vector slot depends on the effective address we load from.
+ * We know that our immediates start at a 16-byte boundary so we
+ * know that 'swizzle' tells us which vector slot will have the
+ * loaded word. The other vector slots will be undefined.
+ */
+ ppc_lvewx(gen->f, dst_vec, gen->immed_reg, offset_reg);
+ /* splat word[swizzle] across the vector reg */
+ ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle);
}
break;
case TGSI_FILE_CONSTANT:
@@ -1095,14 +1103,10 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
assert(size <= 4);
assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
for (i = 0; i < size; i++) {
- const float value =
- parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
- imm[num_immediates * 4 + 0] =
- imm[num_immediates * 4 + 1] =
- imm[num_immediates * 4 + 2] =
- imm[num_immediates * 4 + 3] = value;
- num_immediates++;
+ immediates[num_immediates][i] =
+ parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
}
+ num_immediates++;
}
break;