From b7da4c3dc199ee382bb9924ac86a3485deccc62d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 11:08:45 -0600 Subject: gallium: PPC vertex shader support Works, but dead code lingering, debug code present, etc. --- src/gallium/auxiliary/draw/draw_vs_ppc.c | 270 +++++++++++++++++++++++++++++++ 1 file changed, 270 insertions(+) create mode 100644 src/gallium/auxiliary/draw/draw_vs_ppc.c (limited to 'src/gallium/auxiliary/draw/draw_vs_ppc.c') diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c new file mode 100644 index 0000000000..a096ad49b8 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -0,0 +1,270 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + * Brian Paul + */ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_config.h" + +#include "draw_vs.h" + +#if defined(PIPE_ARCH_PPC) + +#include "pipe/p_shader_tokens.h" + +#include "draw_private.h" +#include "draw_context.h" + +#include "rtasm/rtasm_cpu.h" +#include "rtasm/rtasm_ppc.h" +#include "tgsi/tgsi_ppc.h" +#include "tgsi/tgsi_parse.h" + + + +typedef void (PIPE_CDECL *codegen_function) (float (*inputs)[4][4], + float (*outputs)[4][4], + float (*temps)[4][4], + float (*immeds)[4][4], + float (*consts)[4]); + +#if 0 + const struct tgsi_exec_vector *input, + struct tgsi_exec_vector *output, + float (*constant)[4], /* 3 */ + struct tgsi_exec_vector *temporary, /* 4 */ + float (*immediates)[4], /* 5 */ + const float (*aos_input)[4], /* 6 */ + uint num_inputs, /* 7 */ + uint input_stride, /* 8 */ + float (*aos_output)[4], /* 9 */ + uint num_outputs, /* 10 */ + uint output_stride ); /* 11 */ +#endif + +struct draw_ppc_vertex_shader { + struct draw_vertex_shader base; + struct ppc_function ppc_program; + + codegen_function func; + + struct tgsi_exec_machine *machine; +}; + + +static void +vs_ppc_prepare( struct draw_vertex_shader *base, + struct draw_context *draw ) +{ +} + + + +/* Simplified vertex shader interface for the pt paths. Given the + * complexity of code-generating all the above operations together, + * it's time to try doing all the other stuff separately. + */ +static void +vs_ppc_run_linear( struct draw_vertex_shader *base, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ) +{ + struct draw_ppc_vertex_shader *shader = (struct draw_ppc_vertex_shader *)base; + struct tgsi_exec_machine *machine = shader->machine; + unsigned int i; + +#define MAX_VERTICES 4 + + /* loop over verts */ + for (i = 0; i < count; i += MAX_VERTICES) { + const uint max_vertices = MIN2(MAX_VERTICES, count - i); + float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4] ALIGN16_ATTRIB; + float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4] ALIGN16_ATTRIB; + float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4] ALIGN16_ATTRIB; + uint attr; + + /* convert (up to) four input verts to SoA format */ + for (attr = 0; attr < base->info.num_inputs; attr++) { + const float *vIn = (const float *) input; + uint vert; + for (vert = 0; vert < max_vertices; vert++) { +#if 0 + if (attr==0) + printf("Input v%d a%d: %f %f %f %f\n", + vert, attr, vIn[0], vIn[1], vIn[2], vIn[3]); +#endif + inputs_soa[attr][0][vert] = vIn[attr * 4 + 0]; + inputs_soa[attr][1][vert] = vIn[attr * 4 + 1]; + inputs_soa[attr][2][vert] = vIn[attr * 4 + 2]; + inputs_soa[attr][3][vert] = vIn[attr * 4 + 3]; + vIn += input_stride / 4; + } + } + + /* run compiled shader + */ +#if 0 + shader->func(machine->Inputs, + machine->Outputs, + (float (*)[4])constants, + machine->Temps, + (float (*)[4])shader->base.immediates, + input, + base->info.num_inputs, + input_stride, + output, + base->info.num_outputs, + output_stride ); +#else + shader->func(inputs_soa, outputs_soa, temps_soa, + (float (*)[4][4]) shader->base.immediates, + (float (*)[4]) constants); + + /*output[0][0] = input[0][0] * 0.5;*/ +#endif + + /* convert (up to) four output verts from SoA back to AoS format */ + for (attr = 0; attr < base->info.num_outputs; attr++) { + float *vOut = (float *) output; + uint vert; + for (vert = 0; vert < max_vertices; vert++) { + vOut[attr * 4 + 0] = outputs_soa[attr][0][vert]; + vOut[attr * 4 + 1] = outputs_soa[attr][1][vert]; + vOut[attr * 4 + 2] = outputs_soa[attr][2][vert]; + vOut[attr * 4 + 3] = outputs_soa[attr][3][vert]; +#if 0 + if (attr==0) + printf("Output v%d a%d: %f %f %f %f\n", + vert, attr, vOut[0], vOut[1], vOut[2], vOut[3]); +#endif + vOut += output_stride / 4; + } + } + + /* advance to next group of four input/output verts */ + input = (const float (*)[4])((const char *)input + input_stride * max_vertices); + output = (float (*)[4])((char *)output + output_stride * max_vertices); + } +} + + + + +static void +vs_ppc_delete( struct draw_vertex_shader *base ) +{ + struct draw_ppc_vertex_shader *shader = (struct draw_ppc_vertex_shader *)base; + + ppc_release_func( &shader->ppc_program ); + + align_free( (void *) shader->base.immediates ); + + FREE( (void*) shader->base.state.tokens ); + FREE( shader ); +} + + +struct draw_vertex_shader * +draw_create_vs_ppc(struct draw_context *draw, + const struct pipe_shader_state *templ) +{ + struct draw_ppc_vertex_shader *vs; + + vs = CALLOC_STRUCT( draw_ppc_vertex_shader ); + if (vs == NULL) + return NULL; + + /* we make a private copy of the tokens */ + vs->base.state.tokens = tgsi_dup_tokens(templ->tokens); + if (!vs->base.state.tokens) + goto fail; + + tgsi_scan_shader(templ->tokens, &vs->base.info); + + vs->base.draw = draw; +#if 0 + if (1) + vs->base.create_varient = draw_vs_varient_aos_ppc; + else +#endif + vs->base.create_varient = draw_vs_varient_generic; + vs->base.prepare = vs_ppc_prepare; + vs->base.run_linear = vs_ppc_run_linear; + vs->base.delete = vs_ppc_delete; + + vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * 4 * + sizeof(float), 16); + + vs->machine = &draw->vs.machine; + + ppc_init_func( &vs->ppc_program, 1000 ); /* XXX fix limit */ + + if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens, + &vs->ppc_program, + (float (*)[4])vs->base.immediates, + TRUE )) + goto fail; + + vs->func = (codegen_function) ppc_get_func( &vs->ppc_program ); + if (!vs->func) { + goto fail; + } + + return &vs->base; + +fail: + debug_error("tgsi_emit_ppc() failed, falling back to interpreter\n"); + + ppc_release_func( &vs->ppc_program ); + + FREE(vs); + return NULL; +} + + + +#else /* PIPE_ARCH_PPC */ + + +struct draw_vertex_shader * +draw_create_vs_ppc( struct draw_context *draw, + const struct pipe_shader_state *templ ) +{ + return (void *) 0; +} + + +#endif /* PIPE_ARCH_PPC */ -- cgit v1.2.3 From ba4faef7c07c47ad4f71f3e6ba94cb54217c56ed Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 11:13:31 -0600 Subject: gallium: temporarily disable PPC vertex shader until more things run --- src/gallium/auxiliary/draw/draw_vs_ppc.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/auxiliary/draw/draw_vs_ppc.c') diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index a096ad49b8..990a659f27 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -203,6 +203,9 @@ draw_create_vs_ppc(struct draw_context *draw, { struct draw_ppc_vertex_shader *vs; + /* XXX temporary short-circuit */ + return NULL; + vs = CALLOC_STRUCT( draw_ppc_vertex_shader ); if (vs == NULL) return NULL; -- cgit v1.2.3 From 7b1d08738f30d0fec2f07568b16e08c4fdddeeac Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 15:25:04 -0600 Subject: cell: turn on PPC assembly vertex transform gears runs with it now (3x faster FPS than before). --- src/gallium/auxiliary/draw/draw_vs_ppc.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'src/gallium/auxiliary/draw/draw_vs_ppc.c') diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index 990a659f27..fcc9cbfec5 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -203,9 +203,6 @@ draw_create_vs_ppc(struct draw_context *draw, { struct draw_ppc_vertex_shader *vs; - /* XXX temporary short-circuit */ - return NULL; - vs = CALLOC_STRUCT( draw_ppc_vertex_shader ); if (vs == NULL) return NULL; @@ -233,7 +230,7 @@ draw_create_vs_ppc(struct draw_context *draw, vs->machine = &draw->vs.machine; - ppc_init_func( &vs->ppc_program, 1000 ); /* XXX fix limit */ + ppc_init_func( &vs->ppc_program, 2000 ); /* XXX fix limit */ if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens, &vs->ppc_program, -- cgit v1.2.3 From abbbe876ac98596b143da295abf6887e0a4e50d2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 17:19:12 -0600 Subject: gallium: new PPC built-in constants array It's hard to form PPC vector immediates so load them from an array. --- src/gallium/auxiliary/draw/draw_vs_ppc.c | 8 +++-- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 61 ++++++++++++++++++++++++++++---- src/gallium/auxiliary/tgsi/tgsi_ppc.h | 3 ++ 3 files changed, 63 insertions(+), 9 deletions(-) (limited to 'src/gallium/auxiliary/draw/draw_vs_ppc.c') diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index fcc9cbfec5..8eff6d4fda 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -55,7 +55,8 @@ typedef void (PIPE_CDECL *codegen_function) (float (*inputs)[4][4], float (*outputs)[4][4], float (*temps)[4][4], float (*immeds)[4][4], - float (*consts)[4]); + float (*consts)[4], + const float *builtins); #if 0 const struct tgsi_exec_vector *input, @@ -151,7 +152,8 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, #else shader->func(inputs_soa, outputs_soa, temps_soa, (float (*)[4][4]) shader->base.immediates, - (float (*)[4]) constants); + (float (*)[4]) constants, + ppc_builtin_constants); /*output[0][0] = input[0][0] * 0.5;*/ #endif @@ -246,7 +248,9 @@ draw_create_vs_ppc(struct draw_context *draw, return &vs->base; fail: + /* debug_error("tgsi_emit_ppc() failed, falling back to interpreter\n"); + */ ppc_release_func( &vs->ppc_program ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 9d7de41fe7..6b05fd16cf 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -36,6 +36,7 @@ #include "pipe/p_debug.h" #include "pipe/p_shader_tokens.h" #include "util/u_math.h" +#include "util/u_memory.h" #include "util/u_sse.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" @@ -44,6 +45,14 @@ #include "rtasm/rtasm_ppc.h" +/** + * Since it's pretty much impossible to form PPC vector immediates, load + * them from memory here: + */ +const float ppc_builtin_constants[] ALIGN16_ATTRIB = { + 1.0f, -128.0f, 128.0, 0.0 +}; + #define FOR_EACH_CHANNEL( CHAN )\ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) @@ -81,12 +90,46 @@ struct gen_context int temps_reg; /**< GP register pointing to temporary "registers" */ int immed_reg; /**< GP register pointing to immediates buffer */ int const_reg; /**< GP register pointing to constants buffer */ + int builtins_reg; /**< GP register pointint to built-in constants */ int one_vec; /**< vector register with {1.0, 1.0, 1.0, 1.0} */ int bit31_vec; /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */ }; +/** + * Load the given vector register with {value, value, value, value}. + * The value must be in the ppu_builtin_constants[] array. + * We wouldn't need this if there was a simple way to load PPC vector + * registers with immediate values! + */ +static void +load_constant_vec(struct gen_context *gen, int dst_vec, float value) +{ + uint pos; + for (pos = 0; pos < Elements(ppc_builtin_constants); pos++) { + if (ppc_builtin_constants[pos] == value) { + int offset_reg = ppc_allocate_register(gen->f); + int offset = pos * 4; + + ppc_li(gen->f, offset_reg, offset); + /* Load 4-byte word into vector register. + * The vector slot depends on the effective address we load from. + * We know that our builtins start at a 16-byte boundary so we + * know that 'swizzle' tells us which vector slot will have the + * loaded word. The other vector slots will be undefined. + */ + ppc_lvewx(gen->f, dst_vec, gen->builtins_reg, offset_reg); + /* splat word[pos % 4] across the vector reg */ + ppc_vspltw(gen->f, dst_vec, dst_vec, pos % 4); + ppc_release_register(gen->f, offset_reg); + return; + } + } + assert(0 && "Need to add new constant to ppc_builtin_constants array"); +} + + /** * Return index of vector register containing {1.0, 1.0, 1.0, 1.0}. */ @@ -95,7 +138,7 @@ gen_one_vec(struct gen_context *gen) { if (gen->one_vec < 0) { gen->one_vec = ppc_allocate_vec_register(gen->f); - ppc_vload_float(gen->f, gen->one_vec, 1.0f); + load_constant_vec(gen, gen->one_vec, 1.0f); } return gen->one_vec; } @@ -115,7 +158,6 @@ gen_get_bit31_vec(struct gen_context *gen) } - /** * Register fetch, put result in 'dst_vec'. */ @@ -182,7 +224,7 @@ emit_fetch(struct gen_context *gen, } break; case TGSI_EXTSWIZZLE_ZERO: - ppc_vload_float(gen->f, dst_vec, 0.0f); + ppc_vzero(gen->f, dst_vec); break; case TGSI_EXTSWIZZLE_ONE: { @@ -544,7 +586,7 @@ ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb) int t_vec = ppc_allocate_vec_register(f); int zero_vec = ppc_allocate_vec_register(f); - ppc_vload_float(f, zero_vec, 0.0f); + ppc_vzero(f, zero_vec); ppc_vlogefp(f, t_vec, va); /* t = log2(va) */ ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec); /* t = t * vb */ @@ -573,7 +615,7 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) FETCH(gen, *inst, x_vec, 0, CHAN_X); /* x_vec = src[0].x */ - ppc_vload_float(gen->f, zero_vec, 0.0f); /* zero = {0,0,0,0} */ + ppc_vzero(gen->f, zero_vec); /* zero = {0,0,0,0} */ ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { @@ -586,7 +628,8 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) int w_vec = ppc_allocate_vec_register(gen->f); int pow_vec = ppc_allocate_vec_register(gen->f); int pos_vec = ppc_allocate_vec_register(gen->f); - int c128_vec = ppc_allocate_vec_register(gen->f); + int p128_vec = ppc_allocate_vec_register(gen->f); + int n128_vec = ppc_allocate_vec_register(gen->f); FETCH(gen, *inst, y_vec, 0, CHAN_Y); /* y_vec = src[0].y */ ppc_vmaxfp(gen->f, y_vec, y_vec, zero_vec); /* y_vec = max(y_vec, 0) */ @@ -594,7 +637,8 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) FETCH(gen, *inst, w_vec, 0, CHAN_W); /* w_vec = src[0].w */ /* XXX clamp Y to [-128, 128] */ - ppc_vload_float(gen->f, c128_vec, 128.0f); + load_constant_vec(gen, p128_vec, 128.0f); + load_constant_vec(gen, n128_vec, -128.0f); /* if temp.x > 0 * pow(tmp.y, tmp.w) @@ -613,6 +657,8 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) ppc_release_vec_register(gen->f, w_vec); ppc_release_vec_register(gen->f, pow_vec); ppc_release_vec_register(gen->f, pos_vec); + ppc_release_vec_register(gen->f, p128_vec); + ppc_release_vec_register(gen->f, n128_vec); } ppc_release_vec_register(gen->f, x_vec); @@ -798,6 +844,7 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, gen.temps_reg = ppc_reserve_register(func, 5); /* ... */ gen.immed_reg = ppc_reserve_register(func, 6); gen.const_reg = ppc_reserve_register(func, 7); + gen.builtins_reg = ppc_reserve_register(func, 8); gen.one_vec = -1; gen.bit31_vec = -1; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.h b/src/gallium/auxiliary/tgsi/tgsi_ppc.h index 7cd2bf9aff..829ec075e7 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.h @@ -35,6 +35,9 @@ extern "C" { struct tgsi_token; struct ppc_function; +extern const float ppc_builtin_constants[]; + + boolean tgsi_emit_ppc(const struct tgsi_token *tokens, struct ppc_function *function, -- cgit v1.2.3 From f4e9526addc617dc78af9b1af781ffe09ce62504 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 28 Oct 2008 18:21:03 -0600 Subject: gallium: ppc: don't replicate/smear immediate values, use vspltw instruction as with constants --- src/gallium/auxiliary/draw/draw_vs_ppc.c | 8 ++++---- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 22 +++++++++++++--------- 2 files changed, 17 insertions(+), 13 deletions(-) (limited to 'src/gallium/auxiliary/draw/draw_vs_ppc.c') diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index 8eff6d4fda..ff40263400 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -54,7 +54,7 @@ typedef void (PIPE_CDECL *codegen_function) (float (*inputs)[4][4], float (*outputs)[4][4], float (*temps)[4][4], - float (*immeds)[4][4], + float (*immeds)[4], float (*consts)[4], const float *builtins); @@ -151,7 +151,7 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, output_stride ); #else shader->func(inputs_soa, outputs_soa, temps_soa, - (float (*)[4][4]) shader->base.immediates, + (float (*)[4]) shader->base.immediates, (float (*)[4]) constants, ppc_builtin_constants); @@ -227,7 +227,7 @@ draw_create_vs_ppc(struct draw_context *draw, vs->base.run_linear = vs_ppc_run_linear; vs->base.delete = vs_ppc_delete; - vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * 4 * + vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * sizeof(float), 16); vs->machine = &draw->vs.machine; @@ -236,7 +236,7 @@ draw_create_vs_ppc(struct draw_context *draw, if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens, &vs->ppc_program, - (float (*)[4])vs->base.immediates, + (float (*)[4]) vs->base.immediates, TRUE )) goto fail; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 0de9b972b4..dd574ac02a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -299,10 +299,18 @@ emit_fetch(struct gen_context *gen, break; case TGSI_FILE_IMMEDIATE: { - int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; int offset_reg = emit_li_offset(gen, offset); dst_vec = ppc_allocate_vec_register(gen->f); - ppc_lvx(gen->f, dst_vec, gen->immed_reg, offset_reg); + /* Load 4-byte word into vector register. + * The vector slot depends on the effective address we load from. + * We know that our immediates start at a 16-byte boundary so we + * know that 'swizzle' tells us which vector slot will have the + * loaded word. The other vector slots will be undefined. + */ + ppc_lvewx(gen->f, dst_vec, gen->immed_reg, offset_reg); + /* splat word[swizzle] across the vector reg */ + ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle); } break; case TGSI_FILE_CONSTANT: @@ -1095,14 +1103,10 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, assert(size <= 4); assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); for (i = 0; i < size; i++) { - const float value = - parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; - imm[num_immediates * 4 + 0] = - imm[num_immediates * 4 + 1] = - imm[num_immediates * 4 + 2] = - imm[num_immediates * 4 + 3] = value; - num_immediates++; + immediates[num_immediates][i] = + parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; } + num_immediates++; } break; -- cgit v1.2.3 From a045b92511eb43ff89e9c0536464af7866956168 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 28 Oct 2008 18:22:14 -0600 Subject: gallium: remove old code --- src/gallium/auxiliary/draw/draw_vs_ppc.c | 29 ----------------------------- 1 file changed, 29 deletions(-) (limited to 'src/gallium/auxiliary/draw/draw_vs_ppc.c') diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index ff40263400..19f6c4ee5b 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -58,19 +58,6 @@ typedef void (PIPE_CDECL *codegen_function) (float (*inputs)[4][4], float (*consts)[4], const float *builtins); -#if 0 - const struct tgsi_exec_vector *input, - struct tgsi_exec_vector *output, - float (*constant)[4], /* 3 */ - struct tgsi_exec_vector *temporary, /* 4 */ - float (*immediates)[4], /* 5 */ - const float (*aos_input)[4], /* 6 */ - uint num_inputs, /* 7 */ - uint input_stride, /* 8 */ - float (*aos_output)[4], /* 9 */ - uint num_outputs, /* 10 */ - uint output_stride ); /* 11 */ -#endif struct draw_ppc_vertex_shader { struct draw_vertex_shader base; @@ -137,27 +124,11 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, /* run compiled shader */ -#if 0 - shader->func(machine->Inputs, - machine->Outputs, - (float (*)[4])constants, - machine->Temps, - (float (*)[4])shader->base.immediates, - input, - base->info.num_inputs, - input_stride, - output, - base->info.num_outputs, - output_stride ); -#else shader->func(inputs_soa, outputs_soa, temps_soa, (float (*)[4]) shader->base.immediates, (float (*)[4]) constants, ppc_builtin_constants); - /*output[0][0] = input[0][0] * 0.5;*/ -#endif - /* convert (up to) four output verts from SoA back to AoS format */ for (attr = 0; attr < base->info.num_outputs; attr++) { float *vOut = (float *) output; -- cgit v1.2.3 From 75b92764a7820558fb2b6cd27a2ab0487ef2f9ba Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 29 Oct 2008 11:04:05 -0600 Subject: gallium: clean-ups --- src/gallium/auxiliary/draw/draw_vs_ppc.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'src/gallium/auxiliary/draw/draw_vs_ppc.c') diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index 19f6c4ee5b..d720c7bbd5 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -64,8 +64,6 @@ struct draw_ppc_vertex_shader { struct ppc_function ppc_program; codegen_function func; - - struct tgsi_exec_machine *machine; }; @@ -73,11 +71,12 @@ static void vs_ppc_prepare( struct draw_vertex_shader *base, struct draw_context *draw ) { + /* nothing */ } - -/* Simplified vertex shader interface for the pt paths. Given the +/** + * Simplified vertex shader interface for the pt paths. Given the * complexity of code-generating all the above operations together, * it's time to try doing all the other stuff separately. */ @@ -91,7 +90,6 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, unsigned output_stride ) { struct draw_ppc_vertex_shader *shader = (struct draw_ppc_vertex_shader *)base; - struct tgsi_exec_machine *machine = shader->machine; unsigned int i; #define MAX_VERTICES 4 @@ -154,8 +152,6 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, } - - static void vs_ppc_delete( struct draw_vertex_shader *base ) { @@ -172,7 +168,7 @@ vs_ppc_delete( struct draw_vertex_shader *base ) struct draw_vertex_shader * draw_create_vs_ppc(struct draw_context *draw, - const struct pipe_shader_state *templ) + const struct pipe_shader_state *templ) { struct draw_ppc_vertex_shader *vs; @@ -201,8 +197,6 @@ draw_create_vs_ppc(struct draw_context *draw, vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * sizeof(float), 16); - vs->machine = &draw->vs.machine; - ppc_init_func( &vs->ppc_program, 2000 ); /* XXX fix limit */ if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens, -- cgit v1.2.3 From 725ba94ce5701aa8690c7ab2ea792dda86cbbe7a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 29 Oct 2008 16:35:59 -0600 Subject: gallium: no longer pass max_inst to ppc_init_func() --- src/gallium/auxiliary/draw/draw_vs_ppc.c | 2 +- src/gallium/auxiliary/rtasm/rtasm_ppc.c | 2 +- src/gallium/auxiliary/rtasm/rtasm_ppc.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary/draw/draw_vs_ppc.c') diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index d720c7bbd5..8b75136144 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -197,7 +197,7 @@ draw_create_vs_ppc(struct draw_context *draw, vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * sizeof(float), 16); - ppc_init_func( &vs->ppc_program, 2000 ); /* XXX fix limit */ + ppc_init_func( &vs->ppc_program ); if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens, &vs->ppc_program, diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index e73ed71a0b..6d11263be8 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -43,7 +43,7 @@ void -ppc_init_func(struct ppc_function *p, unsigned max_inst) +ppc_init_func(struct ppc_function *p) { uint i; diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index d0477dec94..afb4704c39 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -62,7 +62,7 @@ struct ppc_function -extern void ppc_init_func(struct ppc_function *p, unsigned max_inst); +extern void ppc_init_func(struct ppc_function *p); extern void ppc_release_func(struct ppc_function *p); extern uint ppc_num_instructions(const struct ppc_function *p); extern void (*ppc_get_func( struct ppc_function *p ))( void ); -- cgit v1.2.3 From 2ebd969f0f0d0e45e6ac462059cf322f037775f1 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 9 Jan 2009 21:42:58 -0700 Subject: gallium: code to dump/debug PPC code (disabled) --- src/gallium/auxiliary/draw/draw_vs_ppc.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium/auxiliary/draw/draw_vs_ppc.c') diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index 8b75136144..d35db57d57 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -199,6 +199,11 @@ draw_create_vs_ppc(struct draw_context *draw, ppc_init_func( &vs->ppc_program ); +#if 0 + ppc_print_code(&vs->ppc_program, TRUE); + ppc_indent(&vs->ppc_program, 8); +#endif + if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens, &vs->ppc_program, (float (*)[4]) vs->base.immediates, -- cgit v1.2.3