diff options
author | Ben Skeggs <skeggsb@gmail.com> | 2008-10-27 15:40:33 +1100 |
---|---|---|
committer | Ben Skeggs <skeggsb@gmail.com> | 2008-10-27 15:40:33 +1100 |
commit | 70a06e03d45f3d2e1cd1d430ba83c4b22471373c (patch) | |
tree | 399cc450107d78041a53b61773635560472ae08a /src/gallium/auxiliary | |
parent | 295d6f8e8f03192320aa8d4ed767427dd06071a5 (diff) | |
parent | 02c9009bb842cd8a47bc36ea274ef54ff47e1528 (diff) |
Merge remote branch 'origin/gallium-0.2' into gallium-0.2
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r-- | src/gallium/auxiliary/draw/Makefile | 1 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/SConscript | 1 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_vs.c | 5 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_vs.h | 4 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_vs_aos_io.c | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_vs_exec.c | 4 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_vs_ppc.c | 274 | ||||
-rw-r--r-- | src/gallium/auxiliary/rtasm/Makefile | 1 | ||||
-rw-r--r-- | src/gallium/auxiliary/rtasm/SConscript | 1 | ||||
-rw-r--r-- | src/gallium/auxiliary/rtasm/rtasm_ppc.c | 641 | ||||
-rw-r--r-- | src/gallium/auxiliary/rtasm/rtasm_ppc.h | 151 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/Makefile | 1 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/SConscript | 1 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_ppc.c | 910 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_ppc.h | 51 |
15 files changed, 1998 insertions, 50 deletions
diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index f2e36a89e9..bdbf5a08ed 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -40,6 +40,7 @@ C_SOURCES = \ draw_vs_aos_machine.c \ draw_vs_exec.c \ draw_vs_llvm.c \ + draw_vs_ppc.c \ draw_vs_sse.c diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 544a04918b..5f05aa324a 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -38,6 +38,7 @@ draw = env.ConvenienceLibrary( 'draw_vs_aos_machine.c', 'draw_vs_exec.c', 'draw_vs_llvm.c', + 'draw_vs_ppc.c', 'draw_vs_sse.c', 'draw_vs_varient.c' ]) diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 34adbd49b0..7f305304ff 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -85,7 +85,10 @@ draw_create_vertex_shader(struct draw_context *draw, if (!vs) { vs = draw_create_vs_sse( draw, shader ); if (!vs) { - vs = draw_create_vs_exec( draw, shader ); + vs = draw_create_vs_ppc( draw, shader ); + if (!vs) { + vs = draw_create_vs_exec( draw, shader ); + } } } diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 68c24abad3..89ae158751 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -158,6 +158,10 @@ draw_create_vs_sse(struct draw_context *draw, const struct pipe_shader_state *templ); struct draw_vertex_shader * +draw_create_vs_ppc(struct draw_context *draw, + const struct pipe_shader_state *templ); + +struct draw_vertex_shader * draw_create_vs_llvm(struct draw_context *draw, const struct pipe_shader_state *templ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index dd79bc799a..39f75b50b7 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -338,7 +338,7 @@ static void emit_store_R32G32B32A32( struct aos_compilation *cp, struct x86_reg dst_ptr, struct x86_reg dataXMM ) { - sse_movaps(cp->func, dst_ptr, dataXMM); + sse_movups(cp->func, dst_ptr, dataXMM); } static void emit_store_R32G32B32( struct aos_compilation *cp, diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 79a19d6be2..13d4fcfdbf 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -46,7 +46,6 @@ struct exec_vertex_shader { struct draw_vertex_shader base; struct tgsi_exec_machine *machine; - const struct tgsi_token *machine_tokens; }; static struct exec_vertex_shader *exec_vertex_shader( struct draw_vertex_shader *vs ) @@ -66,12 +65,11 @@ vs_exec_prepare( struct draw_vertex_shader *shader, /* Specify the vertex program to interpret/execute. * Avoid rebinding when possible. */ - if (evs->machine_tokens != shader->state.tokens) { + if (evs->machine->Tokens != shader->state.tokens) { tgsi_exec_machine_bind_shader(evs->machine, shader->state.tokens, PIPE_MAX_SAMPLERS, NULL /*samplers*/ ); - evs->machine_tokens = shader->state.tokens; } } diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c new file mode 100644 index 0000000000..8eff6d4fda --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -0,0 +1,274 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + */ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_config.h" + +#include "draw_vs.h" + +#if defined(PIPE_ARCH_PPC) + +#include "pipe/p_shader_tokens.h" + +#include "draw_private.h" +#include "draw_context.h" + +#include "rtasm/rtasm_cpu.h" +#include "rtasm/rtasm_ppc.h" +#include "tgsi/tgsi_ppc.h" +#include "tgsi/tgsi_parse.h" + + + +typedef void (PIPE_CDECL *codegen_function) (float (*inputs)[4][4], + float (*outputs)[4][4], + float (*temps)[4][4], + float (*immeds)[4][4], + float (*consts)[4], + const float *builtins); + +#if 0 + const struct tgsi_exec_vector *input, + struct tgsi_exec_vector *output, + float (*constant)[4], /* 3 */ + struct tgsi_exec_vector *temporary, /* 4 */ + float (*immediates)[4], /* 5 */ + const float (*aos_input)[4], /* 6 */ + uint num_inputs, /* 7 */ + uint input_stride, /* 8 */ + float (*aos_output)[4], /* 9 */ + uint num_outputs, /* 10 */ + uint output_stride ); /* 11 */ +#endif + +struct draw_ppc_vertex_shader { + struct draw_vertex_shader base; + struct ppc_function ppc_program; + + codegen_function func; + + struct tgsi_exec_machine *machine; +}; + + +static void +vs_ppc_prepare( struct draw_vertex_shader *base, + struct draw_context *draw ) +{ +} + + + +/* Simplified vertex shader interface for the pt paths. Given the + * complexity of code-generating all the above operations together, + * it's time to try doing all the other stuff separately. + */ +static void +vs_ppc_run_linear( struct draw_vertex_shader *base, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ) +{ + struct draw_ppc_vertex_shader *shader = (struct draw_ppc_vertex_shader *)base; + struct tgsi_exec_machine *machine = shader->machine; + unsigned int i; + +#define MAX_VERTICES 4 + + /* loop over verts */ + for (i = 0; i < count; i += MAX_VERTICES) { + const uint max_vertices = MIN2(MAX_VERTICES, count - i); + float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4] ALIGN16_ATTRIB; + float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4] ALIGN16_ATTRIB; + float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4] ALIGN16_ATTRIB; + uint attr; + + /* convert (up to) four input verts to SoA format */ + for (attr = 0; attr < base->info.num_inputs; attr++) { + const float *vIn = (const float *) input; + uint vert; + for (vert = 0; vert < max_vertices; vert++) { +#if 0 + if (attr==0) + printf("Input v%d a%d: %f %f %f %f\n", + vert, attr, vIn[0], vIn[1], vIn[2], vIn[3]); +#endif + inputs_soa[attr][0][vert] = vIn[attr * 4 + 0]; + inputs_soa[attr][1][vert] = vIn[attr * 4 + 1]; + inputs_soa[attr][2][vert] = vIn[attr * 4 + 2]; + inputs_soa[attr][3][vert] = vIn[attr * 4 + 3]; + vIn += input_stride / 4; + } + } + + /* run compiled shader + */ +#if 0 + shader->func(machine->Inputs, + machine->Outputs, + (float (*)[4])constants, + machine->Temps, + (float (*)[4])shader->base.immediates, + input, + base->info.num_inputs, + input_stride, + output, + base->info.num_outputs, + output_stride ); +#else + shader->func(inputs_soa, outputs_soa, temps_soa, + (float (*)[4][4]) shader->base.immediates, + (float (*)[4]) constants, + ppc_builtin_constants); + + /*output[0][0] = input[0][0] * 0.5;*/ +#endif + + /* convert (up to) four output verts from SoA back to AoS format */ + for (attr = 0; attr < base->info.num_outputs; attr++) { + float *vOut = (float *) output; + uint vert; + for (vert = 0; vert < max_vertices; vert++) { + vOut[attr * 4 + 0] = outputs_soa[attr][0][vert]; + vOut[attr * 4 + 1] = outputs_soa[attr][1][vert]; + vOut[attr * 4 + 2] = outputs_soa[attr][2][vert]; + vOut[attr * 4 + 3] = outputs_soa[attr][3][vert]; +#if 0 + if (attr==0) + printf("Output v%d a%d: %f %f %f %f\n", + vert, attr, vOut[0], vOut[1], vOut[2], vOut[3]); +#endif + vOut += output_stride / 4; + } + } + + /* advance to next group of four input/output verts */ + input = (const float (*)[4])((const char *)input + input_stride * max_vertices); + output = (float (*)[4])((char *)output + output_stride * max_vertices); + } +} + + + + +static void +vs_ppc_delete( struct draw_vertex_shader *base ) +{ + struct draw_ppc_vertex_shader *shader = (struct draw_ppc_vertex_shader *)base; + + ppc_release_func( &shader->ppc_program ); + + align_free( (void *) shader->base.immediates ); + + FREE( (void*) shader->base.state.tokens ); + FREE( shader ); +} + + +struct draw_vertex_shader * +draw_create_vs_ppc(struct draw_context *draw, + const struct pipe_shader_state *templ) +{ + struct draw_ppc_vertex_shader *vs; + + vs = CALLOC_STRUCT( draw_ppc_vertex_shader ); + if (vs == NULL) + return NULL; + + /* we make a private copy of the tokens */ + vs->base.state.tokens = tgsi_dup_tokens(templ->tokens); + if (!vs->base.state.tokens) + goto fail; + + tgsi_scan_shader(templ->tokens, &vs->base.info); + + vs->base.draw = draw; +#if 0 + if (1) + vs->base.create_varient = draw_vs_varient_aos_ppc; + else +#endif + vs->base.create_varient = draw_vs_varient_generic; + vs->base.prepare = vs_ppc_prepare; + vs->base.run_linear = vs_ppc_run_linear; + vs->base.delete = vs_ppc_delete; + + vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * 4 * + sizeof(float), 16); + + vs->machine = &draw->vs.machine; + + ppc_init_func( &vs->ppc_program, 2000 ); /* XXX fix limit */ + + if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens, + &vs->ppc_program, + (float (*)[4])vs->base.immediates, + TRUE )) + goto fail; + + vs->func = (codegen_function) ppc_get_func( &vs->ppc_program ); + if (!vs->func) { + goto fail; + } + + return &vs->base; + +fail: + /* + debug_error("tgsi_emit_ppc() failed, falling back to interpreter\n"); + */ + + ppc_release_func( &vs->ppc_program ); + + FREE(vs); + return NULL; +} + + + +#else /* PIPE_ARCH_PPC */ + + +struct draw_vertex_shader * +draw_create_vs_ppc( struct draw_context *draw, + const struct pipe_shader_state *templ ) +{ + return (void *) 0; +} + + +#endif /* PIPE_ARCH_PPC */ diff --git a/src/gallium/auxiliary/rtasm/Makefile b/src/gallium/auxiliary/rtasm/Makefile index 39b8a4dbd7..252dc5274a 100644 --- a/src/gallium/auxiliary/rtasm/Makefile +++ b/src/gallium/auxiliary/rtasm/Makefile @@ -7,6 +7,7 @@ C_SOURCES = \ rtasm_cpu.c \ rtasm_execmem.c \ rtasm_x86sse.c \ + rtasm_ppc.c \ rtasm_ppc_spe.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/rtasm/SConscript b/src/gallium/auxiliary/rtasm/SConscript index 8ea25922aa..eb48368acc 100644 --- a/src/gallium/auxiliary/rtasm/SConscript +++ b/src/gallium/auxiliary/rtasm/SConscript @@ -6,6 +6,7 @@ rtasm = env.ConvenienceLibrary( 'rtasm_cpu.c', 'rtasm_execmem.c', 'rtasm_x86sse.c', + 'rtasm_ppc.c', 'rtasm_ppc_spe.c', ]) diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index 534a23568d..7dd8263749 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -23,10 +23,19 @@ /** * PPC code generation. + * For reference, see http://www.power.org/resources/reading/PowerISA_V2.05.pdf + * ABI info: http://www.cs.utsa.edu/~whaley/teach/cs6463FHPO/LEC/lec12_ho.pdf + * + * Other PPC refs: + * http://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/852569B20050FF778525699600719DF2 + * http://www.ibm.com/developerworks/eserver/library/es-archguide-v2.html + * http://www.freescale.com/files/product/doc/MPCFPE32B.pdf + * * \author Brian Paul */ +#include <stdio.h> #include "util/u_memory.h" #include "pipe/p_debug.h" #include "rtasm_ppc.h" @@ -35,40 +44,148 @@ void ppc_init_func(struct ppc_function *p, unsigned max_inst) { - p->store = align_malloc(max_inst * PPC_INST_SIZE, 16); - p->num_inst = 0; - p->max_inst = max_inst; - p->vec_used = ~0; + uint i; + + p->store = align_malloc(max_inst * PPC_INST_SIZE, 16); + p->num_inst = 0; + p->max_inst = max_inst; + p->reg_used = 0x0; + p->fp_used = 0x0; + p->vec_used = 0x0; + + /* only allow using gp registers 3..12 for now */ + for (i = 0; i < 3; i++) + ppc_reserve_register(p, i); + for (i = 12; i < PPC_NUM_REGS; i++) + ppc_reserve_register(p, i); } void ppc_release_func(struct ppc_function *p) { - assert(p->num_inst <= p->max_inst); - if (p->store != NULL) { - align_free(p->store); - } - p->store = NULL; + assert(p->num_inst <= p->max_inst); + if (p->store != NULL) { + align_free(p->store); + } + p->store = NULL; +} + + +void (*ppc_get_func(struct ppc_function *p))(void) +{ +#if 0 + DUMP_END(); + if (DISASSEM && p->store) + debug_printf("disassemble %p %p\n", p->store, p->csr); + + if (p->store == p->error_overflow) + return (void (*)(void)) NULL; + else +#endif + return (void (*)(void)) p->store; +} + + +void +ppc_dump_func(const struct ppc_function *p) +{ + uint i; + for (i = 0; i < p->num_inst; i++) { + debug_printf("%3u: 0x%08x\n", i, p->store[i]); + } +} + + +/** + * Mark a register as being unavailable. + */ +int +ppc_reserve_register(struct ppc_function *p, int reg) +{ + assert(reg < PPC_NUM_REGS); + p->reg_used |= (1 << reg); + return reg; } /** - * Alloate a vector register. + * Allocate a general purpose register. * \return register index or -1 if none left. */ int -ppc_allocate_vec_register(struct ppc_function *p, int reg) +ppc_allocate_register(struct ppc_function *p) { unsigned i; - for (i = 0; i < PPC_NUM_VEC_REGS; i++) { + for (i = 0; i < PPC_NUM_REGS; i++) { const uint64_t mask = 1 << i; - if ((p->vec_used & mask) != 0) { - p->vec_used &= ~mask; + if ((p->reg_used & mask) == 0) { + p->reg_used |= mask; return i; } } + return -1; +} + + +/** + * Mark the given general purpose register as "unallocated". + */ +void +ppc_release_register(struct ppc_function *p, int reg) +{ + assert(reg < PPC_NUM_REGS); + assert(p->reg_used & (1 << reg)); + p->reg_used &= ~(1 << reg); +} + +/** + * Allocate a floating point register. + * \return register index or -1 if none left. + */ +int +ppc_allocate_fp_register(struct ppc_function *p) +{ + unsigned i; + for (i = 0; i < PPC_NUM_FP_REGS; i++) { + const uint64_t mask = 1 << i; + if ((p->fp_used & mask) == 0) { + p->fp_used |= mask; + return i; + } + } + return -1; +} + + +/** + * Mark the given floating point register as "unallocated". + */ +void +ppc_release_fp_register(struct ppc_function *p, int reg) +{ + assert(reg < PPC_NUM_FP_REGS); + assert(p->fp_used & (1 << reg)); + p->fp_used &= ~(1 << reg); +} + + +/** + * Allocate a vector register. + * \return register index or -1 if none left. + */ +int +ppc_allocate_vec_register(struct ppc_function *p) +{ + unsigned i; + for (i = 0; i < PPC_NUM_VEC_REGS; i++) { + const uint64_t mask = 1 << i; + if ((p->vec_used & mask) == 0) { + p->vec_used |= mask; + return i; + } + } return -1; } @@ -80,9 +197,8 @@ void ppc_release_vec_register(struct ppc_function *p, int reg) { assert(reg < PPC_NUM_VEC_REGS); - assert((p->vec_used & (1 << reg)) == 0); - - p->vec_used |= (1 << reg); + assert(p->vec_used & (1 << reg)); + p->vec_used &= ~(1 << reg); } @@ -98,6 +214,20 @@ union vx_inst { } inst; }; +static inline void +emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) +{ + union vx_inst inst; + inst.inst.op = 4; + inst.inst.vD = vD; + inst.inst.vA = vA; + inst.inst.vB = vB; + inst.inst.op2 = op2; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +}; + + union vxr_inst { uint32_t bits; struct { @@ -110,6 +240,21 @@ union vxr_inst { } inst; }; +static inline void +emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) +{ + union vxr_inst inst; + inst.inst.op = 4; + inst.inst.vD = vD; + inst.inst.vA = vA; + inst.inst.vB = vB; + inst.inst.rC = 0; + inst.inst.op2 = op2; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +}; + + union va_inst { uint32_t bits; struct { @@ -122,49 +267,204 @@ union va_inst { } inst; }; - static inline void -emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) +emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC) { - union vx_inst inst; + union va_inst inst; inst.inst.op = 4; inst.inst.vD = vD; inst.inst.vA = vA; inst.inst.vB = vB; + inst.inst.vC = vC; inst.inst.op2 = op2; p->store[p->num_inst++] = inst.bits; assert(p->num_inst <= p->max_inst); }; -static inline void -emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) + +union i_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned li:24; + unsigned aa:1; + unsigned lk:1; + } inst; +}; + +static INLINE void +emit_i(struct ppc_function *p, uint op, uint li, uint aa, uint lk) { - union vxr_inst inst; - inst.inst.op = 4; - inst.inst.vD = vD; - inst.inst.vA = vA; - inst.inst.vB = vB; - inst.inst.rC = 0; + union i_inst inst; + inst.inst.op = op; + inst.inst.li = li; + inst.inst.aa = aa; + inst.inst.lk = lk; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +} + + +union xl_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned bo:5; + unsigned bi:5; + unsigned unused:3; + unsigned bh:2; + unsigned op2:10; + unsigned lk:1; + } inst; +}; + +static INLINE void +emit_xl(struct ppc_function *p, uint op, uint bo, uint bi, uint bh, + uint op2, uint lk) +{ + union xl_inst inst; + inst.inst.op = op; + inst.inst.bo = bo; + inst.inst.bi = bi; + inst.inst.unused = 0x0; + inst.inst.bh = bh; inst.inst.op2 = op2; + inst.inst.lk = lk; p->store[p->num_inst++] = inst.bits; assert(p->num_inst <= p->max_inst); +} + +static INLINE void +dump_xl(const char *name, uint inst) +{ + union xl_inst i; + + i.bits = inst; + debug_printf("%s = 0x%08x\n", name, inst); + debug_printf(" op: %d 0x%x\n", i.inst.op, i.inst.op); + debug_printf(" bo: %d 0x%x\n", i.inst.bo, i.inst.bo); + debug_printf(" bi: %d 0x%x\n", i.inst.bi, i.inst.bi); + debug_printf(" unused: %d 0x%x\n", i.inst.unused, i.inst.unused); + debug_printf(" bh: %d 0x%x\n", i.inst.bh, i.inst.bh); + debug_printf(" op2: %d 0x%x\n", i.inst.op2, i.inst.op2); + debug_printf(" lk: %d 0x%x\n", i.inst.lk, i.inst.lk); +} + + +union x_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned vrs:5; + unsigned ra:5; + unsigned rb:5; + unsigned op2:10; + unsigned unused:1; + } inst; }; -static inline void -emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC) +static INLINE void +emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2) { - union va_inst inst; - inst.inst.op = 4; - inst.inst.vD = vD; - inst.inst.vA = vA; - inst.inst.vB = vB; - inst.inst.vC = vC; + union x_inst inst; + inst.inst.op = op; + inst.inst.vrs = vrs; + inst.inst.ra = ra; + inst.inst.rb = rb; + inst.inst.op2 = op2; + inst.inst.unused = 0x0; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +} + + +union d_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned rt:5; + unsigned ra:5; + unsigned si:16; + } inst; +}; + +static inline void +emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si) +{ + union d_inst inst; + assert(si >= -32768); + assert(si <= 32767); + inst.inst.op = op; + inst.inst.rt = rt; + inst.inst.ra = ra; + inst.inst.si = (unsigned) (si & 0xffff); + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +}; + + +union a_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned frt:5; + unsigned fra:5; + unsigned frb:5; + unsigned unused:5; + unsigned op2:5; + unsigned rc:1; + } inst; +}; + +static inline void +emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2, + uint rc) +{ + union a_inst inst; + inst.inst.op = op; + inst.inst.frt = frt; + inst.inst.fra = fra; + inst.inst.frb = frb; + inst.inst.unused = 0x0; inst.inst.op2 = op2; + inst.inst.rc = rc; p->store[p->num_inst++] = inst.bits; assert(p->num_inst <= p->max_inst); }; +union xo_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned rt:5; + unsigned ra:5; + unsigned rb:5; + unsigned oe:1; + unsigned op2:9; + unsigned rc:1; + } inst; +}; + +static INLINE void +emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe, + uint op2, uint rc) +{ + union xo_inst inst; + inst.inst.op = op; + inst.inst.rt = rt; + inst.inst.ra = ra; + inst.inst.rb = rb; + inst.inst.oe = oe; + inst.inst.op2 = op2; + inst.inst.rc = rc; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +} + + + + /** ** float vector arithmetic @@ -172,7 +472,7 @@ emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC) /** vector float add */ void -ppc_vaddfp(struct ppc_function *p,uint vD, uint vA, uint vB) +ppc_vaddfp(struct ppc_function *p, uint vD, uint vA, uint vB) { emit_vx(p, 10, vD, vA, vB); } @@ -198,11 +498,11 @@ ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB) emit_vx(p, 1034, vD, vA, vB); } -/** vector float mult add */ +/** vector float mult add: vD = vA * vB + vC */ void ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) { - emit_va(p, 46, vD, vA, vB, vC); + emit_va(p, 46, vD, vA, vC, vB); /* note arg order */ } /** vector float compare greater than */ @@ -282,13 +582,34 @@ ppc_vrfiz(struct ppc_function *p, uint vD, uint vB) emit_vx(p, 586, vD, 0, vB); } +/** vector store: store vR at mem[vA+vB] */ +void +ppc_stvx(struct ppc_function *p, uint vR, uint vA, uint vB) +{ + emit_x(p, 31, vR, vA, vB, 231); +} + +/** vector load: vR = mem[vA+vB] */ +void +ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB) +{ + emit_x(p, 31, vR, vA, vB, 103); +} + +/** load vector element word: vR = mem_word[ra+rb] */ +void +ppc_lvewx(struct ppc_function *p, uint vr, uint ra, uint rb) +{ + emit_x(p, 31, vr, ra, rb, 71); +} + + /** - ** bitwise operations + ** vector bitwise operations **/ - /** vector and */ void ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB) @@ -324,6 +645,22 @@ ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB) emit_vx(p, 1220, vD, vA, vB); } +/** Pseudo-instruction: vector move */ +void +ppc_vmove(struct ppc_function *p, uint vD, uint vA) +{ + ppc_vor(p, vD, vA, vA); +} + +/** Set vector register to {0,0,0,0} */ +void +ppc_vzero(struct ppc_function *p, uint vr) +{ + ppc_vxor(p, vr, vr, vr); +} + + + /** ** Vector shuffle / select / splat / etc @@ -363,3 +700,225 @@ ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm) { emit_vx(p, 652, vD, imm, vB); } + +/** vector splat signed immediate word */ +void +ppc_vspltisw(struct ppc_function *p, uint vD, int imm) +{ + assert(imm >= -16); + assert(imm < 15); + emit_vx(p, 908, vD, imm, 0); +} + +/** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */ +void +ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 388, vD, vA, vB); +} + + + + +/** + ** integer arithmetic + **/ + +/** rt = ra + imm */ +void +ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm) +{ + emit_d(p, 14, rt, ra, imm); +} + +/** rt = ra + (imm << 16) */ +void +ppc_addis(struct ppc_function *p, uint rt, uint ra, int imm) +{ + emit_d(p, 15, rt, ra, imm); +} + +/** rt = ra + rb */ +void +ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb) +{ + emit_xo(p, 31, rt, ra, rb, 0, 266, 0); +} + +/** rt = ra AND ra */ +void +ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb) +{ + emit_x(p, 31, ra, rt, rb, 28); /* note argument order */ +} + +/** rt = ra AND imm */ +void +ppc_andi(struct ppc_function *p, uint rt, uint ra, int imm) +{ + emit_d(p, 28, ra, rt, imm); /* note argument order */ +} + +/** rt = ra OR ra */ +void +ppc_or(struct ppc_function *p, uint rt, uint ra, uint rb) +{ + emit_x(p, 31, ra, rt, rb, 444); /* note argument order */ +} + +/** rt = ra OR imm */ +void +ppc_ori(struct ppc_function *p, uint rt, uint ra, int imm) +{ + emit_d(p, 24, ra, rt, imm); /* note argument order */ +} + +/** rt = ra XOR ra */ +void +ppc_xor(struct ppc_function *p, uint rt, uint ra, uint rb) +{ + emit_x(p, 31, ra, rt, rb, 316); /* note argument order */ +} + +/** rt = ra XOR imm */ +void +ppc_xori(struct ppc_function *p, uint rt, uint ra, int imm) +{ + emit_d(p, 26, ra, rt, imm); /* note argument order */ +} + +/** pseudo instruction: move: rt = ra */ +void +ppc_mr(struct ppc_function *p, uint rt, uint ra) +{ + ppc_or(p, rt, ra, ra); +} + +/** pseudo instruction: load immediate: rt = imm */ +void +ppc_li(struct ppc_function *p, uint rt, int imm) +{ + ppc_addi(p, rt, 0, imm); +} + +/** rt = imm << 16 */ +void +ppc_lis(struct ppc_function *p, uint rt, int imm) +{ + ppc_addis(p, rt, 0, imm); +} + +/** rt = imm */ +void +ppc_load_int(struct ppc_function *p, uint rt, int imm) +{ + ppc_lis(p, rt, (imm >> 16)); /* rt = imm >> 16 */ + ppc_ori(p, rt, rt, (imm & 0xffff)); /* rt = rt | (imm & 0xffff) */ +} + + + + +/** + ** integer load/store + **/ + +/** store rs at memory[(ra)+d], + * then update ra = (ra)+d + */ +void +ppc_stwu(struct ppc_function *p, uint rs, uint ra, int d) +{ + emit_d(p, 37, rs, ra, d); +} + +/** store rs at memory[(ra)+d] */ +void +ppc_stw(struct ppc_function *p, uint rs, uint ra, int d) +{ + emit_d(p, 36, rs, ra, d); +} + +/** Load rt = mem[(ra)+d]; then zero set high 32 bits to zero. */ +void +ppc_lwz(struct ppc_function *p, uint rt, uint ra, int d) +{ + emit_d(p, 32, rt, ra, d); +} + + + +/** + ** Float (non-vector) arithmetic + **/ + +/** add: frt = fra + frb */ +void +ppc_fadd(struct ppc_function *p, uint frt, uint fra, uint frb) +{ + emit_a(p, 63, frt, fra, frb, 21, 0); +} + +/** sub: frt = fra - frb */ +void +ppc_fsub(struct ppc_function *p, uint frt, uint fra, uint frb) +{ + emit_a(p, 63, frt, fra, frb, 20, 0); +} + +/** convert to int: rt = (int) ra */ +void +ppc_fctiwz(struct ppc_function *p, uint rt, uint fra) +{ + emit_x(p, 63, rt, 0, fra, 15); +} + +/** store frs at mem[(ra)+offset] */ +void +ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset) +{ + emit_d(p, 52, frs, ra, offset); +} + +/** store frs at mem[(ra)+(rb)] */ +void +ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb) +{ + emit_x(p, 31, frs, ra, rb, 983); +} + +/** load frt = mem[(ra)+offset] */ +void +ppc_lfs(struct ppc_function *p, uint frt, uint ra, int offset) +{ + emit_d(p, 48, frt, ra, offset); +} + + + + + +/** + ** branch instructions + **/ + +/** BLR: Branch to link register (p. 35) */ +void +ppc_blr(struct ppc_function *p) +{ + emit_i(p, 18, 0, 0, 1); +} + +/** Branch Conditional to Link Register (p. 36) */ +void +ppc_bclr(struct ppc_function *p, uint condOp, uint branchHint, uint condReg) +{ + emit_xl(p, 19, condOp, condReg, branchHint, 16, 0); +} + +/** Pseudo instruction: return from subroutine */ +void +ppc_return(struct ppc_function *p) +{ + ppc_bclr(p, BRANCH_COND_ALWAYS, BRANCH_HINT_SUB_RETURN, 0); +} diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index ed14e943df..f938d8d759 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -36,27 +36,47 @@ #define PPC_INST_SIZE 4 /**< 4 bytes / instruction */ +#define PPC_NUM_REGS 32 +#define PPC_NUM_FP_REGS 32 #define PPC_NUM_VEC_REGS 32 +/** Stack pointer register */ +#define PPC_REG_SP 1 + +/** Branch conditions */ +#define BRANCH_COND_ALWAYS 0x14 /* binary 1z1zz (z=ignored) */ + +/** Branch hints */ +#define BRANCH_HINT_SUB_RETURN 0x0 /* binary 00 */ + struct ppc_function { uint32_t *store; /**< instruction buffer */ uint num_inst; uint max_inst; - uint32_t vec_used; /** used/free vector registers bitmask */ uint32_t reg_used; /** used/free general-purpose registers bitmask */ + uint32_t fp_used; /** used/free floating point registers bitmask */ + uint32_t vec_used; /** used/free vector registers bitmask */ }; extern void ppc_init_func(struct ppc_function *p, unsigned max_inst); extern void ppc_release_func(struct ppc_function *p); - -extern int ppc_allocate_vec_register(struct ppc_function *p, int reg); +extern void (*ppc_get_func( struct ppc_function *p ))( void ); +extern void ppc_dump_func(const struct ppc_function *p); + +extern int ppc_reserve_register(struct ppc_function *p, int reg); +extern int ppc_allocate_register(struct ppc_function *p); +extern void ppc_release_register(struct ppc_function *p, int reg); +extern int ppc_allocate_fp_register(struct ppc_function *p); +extern void ppc_release_fp_register(struct ppc_function *p, int reg); +extern int ppc_allocate_vec_register(struct ppc_function *p); extern void ppc_release_vec_register(struct ppc_function *p, int reg); + /** ** float vector arithmetic **/ @@ -126,9 +146,22 @@ extern void ppc_vrfiz(struct ppc_function *p, uint vD, uint vB); +/** vector store: store vR at mem[vA+vB] */ +extern void +ppc_stvx(struct ppc_function *p, uint vR, uint vA, uint vB); + +/** vector load: vR = mem[vA+vB] */ +extern void +ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB); + +/** load vector element word: vR = mem_word[vA+vB] */ +extern void +ppc_lvewx(struct ppc_function *p, uint vR, uint vA, uint vB); + + /** - ** bitwise operations + ** vector bitwise operations **/ @@ -152,6 +185,15 @@ ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB); extern void ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB); +/** Pseudo-instruction: vector move */ +extern void +ppc_vmove(struct ppc_function *p, uint vD, uint vA); + +/** Set vector register to {0,0,0,0} */ +extern void +ppc_vzero(struct ppc_function *p, uint vr); + + /** ** Vector shuffle / select / splat / etc @@ -177,5 +219,106 @@ ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm); extern void ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm); +/** vector splat signed immediate word */ +extern void +ppc_vspltisw(struct ppc_function *p, uint vD, int imm); + +/** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */ +extern void +ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB); + + + +/** + ** scalar arithmetic + **/ + +extern void +ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb); + +extern void +ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb); + +extern void +ppc_andi(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_or(struct ppc_function *p, uint rt, uint ra, uint rb); + +extern void +ppc_ori(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_xor(struct ppc_function *p, uint rt, uint ra, uint rb); + +extern void +ppc_xori(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_mr(struct ppc_function *p, uint rt, uint ra); + +extern void +ppc_li(struct ppc_function *p, uint rt, int imm); + +extern void +ppc_lis(struct ppc_function *p, uint rt, int imm); + +extern void +ppc_load_int(struct ppc_function *p, uint rt, int imm); + + + +/** + ** scalar load/store + **/ + +extern void +ppc_stwu(struct ppc_function *p, uint rs, uint ra, int d); + +extern void +ppc_stw(struct ppc_function *p, uint rs, uint ra, int d); + +extern void +ppc_lwz(struct ppc_function *p, uint rs, uint ra, int d); + + + +/** + ** Float (non-vector) arithmetic + **/ + +extern void +ppc_fadd(struct ppc_function *p, uint frt, uint fra, uint frb); + +extern void +ppc_fsub(struct ppc_function *p, uint frt, uint fra, uint frb); + +extern void +ppc_fctiwz(struct ppc_function *p, uint rt, uint ra); + +extern void +ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset); + +extern void +ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb); + + + +/** + ** branch instructions + **/ + +extern void +ppc_blr(struct ppc_function *p); + +void +ppc_bclr(struct ppc_function *p, uint condOp, uint branchHint, uint condReg); + +extern void +ppc_return(struct ppc_function *p); + #endif /* RTASM_PPC_H */ diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile index c7155a9316..d7df9490cf 100644 --- a/src/gallium/auxiliary/tgsi/Makefile +++ b/src/gallium/auxiliary/tgsi/Makefile @@ -11,6 +11,7 @@ C_SOURCES = \ tgsi_info.c \ tgsi_iterate.c \ tgsi_parse.c \ + tgsi_ppc.c \ tgsi_scan.c \ tgsi_sse2.c \ tgsi_text.c \ diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript index 45bf3f6d57..8200cce42f 100644 --- a/src/gallium/auxiliary/tgsi/SConscript +++ b/src/gallium/auxiliary/tgsi/SConscript @@ -12,6 +12,7 @@ tgsi = env.ConvenienceLibrary( 'tgsi_parse.c', 'tgsi_sanity.c', 'tgsi_scan.c', + 'tgsi_ppc.c', 'tgsi_sse2.c', 'tgsi_text.c', 'tgsi_transform.c', diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c new file mode 100644 index 0000000000..9ad7ecd7cf --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -0,0 +1,910 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * TGSI to PowerPC code generation. + */ + +#include "pipe/p_config.h" + +#if defined(PIPE_ARCH_PPC) + +#include "pipe/p_debug.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_sse.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi_exec.h" +#include "tgsi_ppc.h" +#include "rtasm/rtasm_ppc.h" + + +/** + * Since it's pretty much impossible to form PPC vector immediates, load + * them from memory here: + */ +const float ppc_builtin_constants[] ALIGN16_ATTRIB = { + 1.0f, -128.0f, 128.0, 0.0 +}; + + +#define FOR_EACH_CHANNEL( CHAN )\ + for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) + +#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ + ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + +#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ + if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) + +#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ + FOR_EACH_CHANNEL( CHAN )\ + IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) + +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 + +#define TEMP_ONE_I TGSI_EXEC_TEMP_ONE_I +#define TEMP_ONE_C TGSI_EXEC_TEMP_ONE_C + +#define TEMP_R0 TGSI_EXEC_TEMP_R0 +#define TEMP_ADDR TGSI_EXEC_TEMP_ADDR + + +/** + * Context/state used during code gen. + */ +struct gen_context +{ + struct ppc_function *f; + int inputs_reg; /**< GP register pointing to input params */ + int outputs_reg; /**< GP register pointing to output params */ + int temps_reg; /**< GP register pointing to temporary "registers" */ + int immed_reg; /**< GP register pointing to immediates buffer */ + int const_reg; /**< GP register pointing to constants buffer */ + int builtins_reg; /**< GP register pointint to built-in constants */ + + int one_vec; /**< vector register with {1.0, 1.0, 1.0, 1.0} */ + int bit31_vec; /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */ +}; + + +/** + * Load the given vector register with {value, value, value, value}. + * The value must be in the ppu_builtin_constants[] array. + * We wouldn't need this if there was a simple way to load PPC vector + * registers with immediate values! + */ +static void +load_constant_vec(struct gen_context *gen, int dst_vec, float value) +{ + uint pos; + for (pos = 0; pos < Elements(ppc_builtin_constants); pos++) { + if (ppc_builtin_constants[pos] == value) { + int offset_reg = ppc_allocate_register(gen->f); + int offset = pos * 4; + + ppc_li(gen->f, offset_reg, offset); + /* Load 4-byte word into vector register. + * The vector slot depends on the effective address we load from. + * We know that our builtins start at a 16-byte boundary so we + * know that 'swizzle' tells us which vector slot will have the + * loaded word. The other vector slots will be undefined. + */ + ppc_lvewx(gen->f, dst_vec, gen->builtins_reg, offset_reg); + /* splat word[pos % 4] across the vector reg */ + ppc_vspltw(gen->f, dst_vec, dst_vec, pos % 4); + ppc_release_register(gen->f, offset_reg); + return; + } + } + assert(0 && "Need to add new constant to ppc_builtin_constants array"); +} + + +/** + * Return index of vector register containing {1.0, 1.0, 1.0, 1.0}. + */ +static int +gen_one_vec(struct gen_context *gen) +{ + if (gen->one_vec < 0) { + gen->one_vec = ppc_allocate_vec_register(gen->f); + load_constant_vec(gen, gen->one_vec, 1.0f); + } + return gen->one_vec; +} + +/** + * Return index of vector register containing {1<<31, 1<<31, 1<<31, 1<<31}. + */ +static int +gen_get_bit31_vec(struct gen_context *gen) +{ + if (gen->bit31_vec < 0) { + gen->bit31_vec = ppc_allocate_vec_register(gen->f); + ppc_vspltisw(gen->f, gen->bit31_vec, -1); + ppc_vslw(gen->f, gen->bit31_vec, gen->bit31_vec, gen->bit31_vec); + } + return gen->bit31_vec; +} + + +/** + * Register fetch, put result in 'dst_vec'. + */ +static void +emit_fetch(struct gen_context *gen, + unsigned dst_vec, + const struct tgsi_full_src_register *reg, + const unsigned chan_index) +{ + uint swizzle = tgsi_util_get_full_src_register_extswizzle(reg, chan_index); + + switch (swizzle) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch (reg->SrcRegister.File) { + case TGSI_FILE_INPUT: + { + int offset_reg = ppc_allocate_register(gen->f); + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; + ppc_li(gen->f, offset_reg, offset); + ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg); + ppc_release_register(gen->f, offset_reg); + } + break; + case TGSI_FILE_TEMPORARY: + { + int offset_reg = ppc_allocate_register(gen->f); + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; + ppc_li(gen->f, offset_reg, offset); + ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg); + ppc_release_register(gen->f, offset_reg); + } + break; + case TGSI_FILE_IMMEDIATE: + { + int offset_reg = ppc_allocate_register(gen->f); + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; + ppc_li(gen->f, offset_reg, offset); + ppc_lvx(gen->f, dst_vec, gen->immed_reg, offset_reg); + ppc_release_register(gen->f, offset_reg); + } + break; + case TGSI_FILE_CONSTANT: + { + int offset_reg = ppc_allocate_register(gen->f); + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; + ppc_li(gen->f, offset_reg, offset); + /* Load 4-byte word into vector register. + * The vector slot depends on the effective address we load from. + * We know that our constants start at a 16-byte boundary so we + * know that 'swizzle' tells us which vector slot will have the + * loaded word. The other vector slots will be undefined. + */ + ppc_lvewx(gen->f, dst_vec, gen->const_reg, offset_reg); + /* splat word[swizzle] across the vector reg */ + ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle); + ppc_release_register(gen->f, offset_reg); + } + break; + default: + assert( 0 ); + } + break; + case TGSI_EXTSWIZZLE_ZERO: + ppc_vzero(gen->f, dst_vec); + break; + case TGSI_EXTSWIZZLE_ONE: + { + int one_vec = gen_one_vec(gen); + ppc_vmove(gen->f, dst_vec, one_vec); + } + break; + default: + assert( 0 ); + } + + { + uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index); + if (sign_op != TGSI_UTIL_SIGN_KEEP) { + int bit31_vec = gen_get_bit31_vec(gen); + + switch (sign_op) { + case TGSI_UTIL_SIGN_CLEAR: + /* vec = vec & ~bit31 */ + ppc_vandc(gen->f, dst_vec, dst_vec, bit31_vec); + break; + case TGSI_UTIL_SIGN_SET: + /* vec = vec | bit31 */ + ppc_vor(gen->f, dst_vec, dst_vec, bit31_vec); + break; + case TGSI_UTIL_SIGN_TOGGLE: + /* vec = vec ^ bit31 */ + ppc_vxor(gen->f, dst_vec, dst_vec, bit31_vec); + break; + default: + assert(0); + } + } + } +} + +#define FETCH( GEN, INST, DST_VEC, SRC_REG, CHAN ) \ + emit_fetch( GEN, DST_VEC, &(INST).FullSrcRegisters[SRC_REG], CHAN ) + + + +/** + * Register store. Store 'src_vec' at location indicated by 'reg'. + */ +static void +emit_store(struct gen_context *gen, + unsigned src_vec, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + unsigned chan_index) +{ + switch (reg->DstRegister.File) { + case TGSI_FILE_OUTPUT: + { + int offset_reg = ppc_allocate_register(gen->f); + int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; + ppc_li(gen->f, offset_reg, offset); + ppc_stvx(gen->f, src_vec, gen->outputs_reg, offset_reg); + ppc_release_register(gen->f, offset_reg); + } + break; + case TGSI_FILE_TEMPORARY: + { + int offset_reg = ppc_allocate_register(gen->f); + int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; + ppc_li(gen->f, offset_reg, offset); + ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg); + ppc_release_register(gen->f, offset_reg); + } + break; +#if 0 + case TGSI_FILE_ADDRESS: + emit_addrs( + func, + xmm, + reg->DstRegister.Index, + chan_index ); + break; +#endif + default: + assert( 0 ); + } + +#if 0 + switch( inst->Instruction.Saturate ) { + case TGSI_SAT_NONE: + break; + + case TGSI_SAT_ZERO_ONE: + /* assert( 0 ); */ + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + assert( 0 ); + break; + } +#endif +} + + +#define STORE( GEN, INST, XMM, INDEX, CHAN )\ + emit_store( GEN, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN ) + + + +static void +emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int v0 = ppc_allocate_vec_register(gen->f); + int v1 = ppc_allocate_vec_register(gen->f); + uint chan_index; + + FETCH(gen, *inst, v0, 0, CHAN_X); + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_RSQ: + /* v1 = 1.0 / sqrt(v0) */ + ppc_vrsqrtefp(gen->f, v1, v0); + break; + case TGSI_OPCODE_RCP: + /* v1 = 1.0 / v0 */ + ppc_vrefp(gen->f, v1, v0); + break; + default: + assert(0); + } + + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE(gen, *inst, v1, 0, chan_index); + } + ppc_release_vec_register(gen->f, v0); + ppc_release_vec_register(gen->f, v1); +} + + +static void +emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int v0 = ppc_allocate_vec_register(gen->f); + uint chan_index; + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { + FETCH(gen, *inst, 0, 0, chan_index); /* v0 = srcreg[0] */ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + /* turn off the most significant bit of each vector float word */ + { + int v1 = ppc_allocate_vec_register(gen->f); + ppc_vspltisw(gen->f, v1, -1); /* v1 = {-1, -1, -1, -1} */ + ppc_vslw(gen->f, v1, v1, v1); /* v1 = {1<<31, 1<<31, 1<<31, 1<<31} */ + ppc_vandc(gen->f, v0, v0, v1); /* v0 = v0 & ~v1 */ + ppc_release_vec_register(gen->f, v1); + } + break; + case TGSI_OPCODE_FLOOR: + ppc_vrfim(gen->f, v0, v0); /* v0 = floor(v0) */ + break; + case TGSI_OPCODE_FRAC: + { + int v1 = ppc_allocate_vec_register(gen->f); + ppc_vrfim(gen->f, v1, v0); /* v1 = floor(v0) */ + ppc_vsubfp(gen->f, v0, v0, v1); /* v0 = v0 - v1 */ + ppc_release_vec_register(gen->f, v1); + } + break; + case TGSI_OPCODE_EXPBASE2: + ppc_vexptefp(gen->f, v0, v0); /* v0 = 2^v0 */ + break; + case TGSI_OPCODE_LOGBASE2: + /* XXX this may be broken! */ + ppc_vlogefp(gen->f, v0, v0); /* v0 = log2(v0) */ + break; + case TGSI_OPCODE_MOV: + /* nothing */ + break; + default: + assert(0); + } + STORE(gen, *inst, v0, 0, chan_index); /* store v0 */ + } + ppc_release_vec_register(gen->f, v0); +} + + +static void +emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int v0 = ppc_allocate_vec_register(gen->f); + int v1 = ppc_allocate_vec_register(gen->f); + int v2 = ppc_allocate_vec_register(gen->f); + uint chan_index; + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { + FETCH(gen, *inst, v0, 0, chan_index); /* v0 = srcreg[0] */ + FETCH(gen, *inst, v1, 1, chan_index); /* v1 = srcreg[1] */ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ADD: + ppc_vaddfp(gen->f, v2, v0, v1); + break; + case TGSI_OPCODE_SUB: + ppc_vsubfp(gen->f, v2, v0, v1); + break; + case TGSI_OPCODE_MUL: + ppc_vxor(gen->f, v2, v2, v2); /* v2 = {0, 0, 0, 0} */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v0 */ + break; + case TGSI_OPCODE_MIN: + ppc_vminfp(gen->f, v2, v0, v1); + break; + case TGSI_OPCODE_MAX: + ppc_vmaxfp(gen->f, v2, v0, v1); + break; + default: + assert(0); + } + STORE(gen, *inst, v2, 0, chan_index); /* store v2 */ + } + ppc_release_vec_register(gen->f, v0); + ppc_release_vec_register(gen->f, v1); + ppc_release_vec_register(gen->f, v2); +} + + +/** + * Vector comparisons, resulting in 1.0 or 0.0 values. + */ +static void +emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int v0 = ppc_allocate_vec_register(gen->f); + int v1 = ppc_allocate_vec_register(gen->f); + int v2 = ppc_allocate_vec_register(gen->f); + uint chan_index; + boolean complement = FALSE; + int one_vec = gen_one_vec(gen); + + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { + FETCH(gen, *inst, v0, 0, chan_index); /* v0 = srcreg[0] */ + FETCH(gen, *inst, v1, 1, chan_index); /* v1 = srcreg[1] */ + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_SNE: + complement = TRUE; + /* fall-through */ + case TGSI_OPCODE_SEQ: + ppc_vcmpeqfpx(gen->f, v2, v0, v1); /* v2 = v0 == v1 ? ~0 : 0 */ + break; + + case TGSI_OPCODE_SGE: + complement = TRUE; + /* fall-through */ + case TGSI_OPCODE_SLT: + ppc_vcmpgtfpx(gen->f, v2, v1, v0); /* v2 = v1 > v0 ? ~0 : 0 */ + break; + + case TGSI_OPCODE_SLE: + complement = TRUE; + /* fall-through */ + case TGSI_OPCODE_SGT: + ppc_vcmpgtfpx(gen->f, v2, v0, v1); /* v2 = v0 > v1 ? ~0 : 0 */ + break; + default: + assert(0); + } + + /* v2 is now {0,0,0,0} or {~0,~0,~0,~0} */ + + if (complement) + ppc_vandc(gen->f, v2, one_vec, v2); /* v2 = one_vec & ~v2 */ + else + ppc_vand(gen->f, v2, one_vec, v2); /* v2 = one_vec & v2 */ + + STORE(gen, *inst, v2, 0, chan_index); /* store v2 */ + } + + ppc_release_vec_register(gen->f, v0); + ppc_release_vec_register(gen->f, v1); + ppc_release_vec_register(gen->f, v2); +} + + +static void +emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int v0 = ppc_allocate_vec_register(gen->f); + int v1 = ppc_allocate_vec_register(gen->f); + int v2 = ppc_allocate_vec_register(gen->f); + uint chan_index; + + ppc_vxor(gen->f, v2, v2, v2); /* v2 = {0, 0, 0, 0} */ + + FETCH(gen, *inst, v0, 0, CHAN_X); /* v0 = src0.XXXX */ + FETCH(gen, *inst, v1, 1, CHAN_X); /* v1 = src1.XXXX */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ + + FETCH(gen, *inst, v0, 0, CHAN_Y); /* v0 = src0.YYYY */ + FETCH(gen, *inst, v1, 1, CHAN_Y); /* v1 = src1.YYYY */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ + + FETCH(gen, *inst, v0, 0, CHAN_Z); /* v0 = src0.ZZZZ */ + FETCH(gen, *inst, v1, 1, CHAN_Z); /* v1 = src1.ZZZZ */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ + + if (inst->Instruction.Opcode == TGSI_OPCODE_DP4) { + FETCH(gen, *inst, v0, 0, CHAN_W); /* v0 = src0.WWWW */ + FETCH(gen, *inst, v1, 1, CHAN_W); /* v1 = src1.WWWW */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ + } + else if (inst->Instruction.Opcode == TGSI_OPCODE_DPH) { + FETCH(gen, *inst, v1, 1, CHAN_W); /* v1 = src1.WWWW */ + ppc_vaddfp(gen->f, v2, v2, v1); /* v2 = v2 + v1 */ + } + + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { + STORE(gen, *inst, v2, 0, chan_index); /* store v2 */ + } + ppc_release_vec_register(gen->f, v0); + ppc_release_vec_register(gen->f, v1); + ppc_release_vec_register(gen->f, v2); +} + + +static void +emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int v0 = ppc_allocate_vec_register(gen->f); + int v1 = ppc_allocate_vec_register(gen->f); + int v2 = ppc_allocate_vec_register(gen->f); + int v3 = ppc_allocate_vec_register(gen->f); + uint chan_index; + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { + FETCH(gen, *inst, v0, 0, chan_index); /* v0 = srcreg[0] */ + FETCH(gen, *inst, v1, 1, chan_index); /* v1 = srcreg[1] */ + FETCH(gen, *inst, v2, 2, chan_index); /* v2 = srcreg[2] */ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_MAD: + ppc_vmaddfp(gen->f, v3, v0, v1, v2); /* v3 = v0 * v1 + v2 */ + break; + case TGSI_OPCODE_LRP: + ppc_vsubfp(gen->f, v3, v1, v2); /* v3 = v1 - v2 */ + ppc_vmaddfp(gen->f, v3, v0, v3, v2); /* v3 = v0 * v3 + v2 */ + break; + default: + assert(0); + } + STORE(gen, *inst, v3, 0, chan_index); /* store v3 */ + } + ppc_release_vec_register(gen->f, v0); + ppc_release_vec_register(gen->f, v1); + ppc_release_vec_register(gen->f, v2); + ppc_release_vec_register(gen->f, v3); +} + + + +/** Approximation for vr = pow(va, vb) */ +static void +ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb) +{ + /* pow(a,b) ~= exp2(log2(a) * b) */ + int t_vec = ppc_allocate_vec_register(f); + int zero_vec = ppc_allocate_vec_register(f); + + ppc_vzero(f, zero_vec); + + ppc_vlogefp(f, t_vec, va); /* t = log2(va) */ + ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec); /* t = t * vb */ + ppc_vexptefp(f, vr, t_vec); /* vr = 2^t */ + + ppc_release_vec_register(f, t_vec); + ppc_release_vec_register(f, zero_vec); +} + + +static void +emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int one_vec = gen_one_vec(gen); + + /* Compute X */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { + STORE(gen, *inst, one_vec, 0, CHAN_X); + } + + /* Compute Y, Z */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) || + IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + int x_vec = ppc_allocate_vec_register(gen->f); + int zero_vec = ppc_allocate_vec_register(gen->f); + + FETCH(gen, *inst, x_vec, 0, CHAN_X); /* x_vec = src[0].x */ + + ppc_vzero(gen->f, zero_vec); /* zero = {0,0,0,0} */ + ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */ + + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { + STORE(gen, *inst, x_vec, 0, CHAN_Y); /* store Y */ + } + + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + int y_vec = ppc_allocate_vec_register(gen->f); + int z_vec = ppc_allocate_vec_register(gen->f); + int w_vec = ppc_allocate_vec_register(gen->f); + int pow_vec = ppc_allocate_vec_register(gen->f); + int pos_vec = ppc_allocate_vec_register(gen->f); + int p128_vec = ppc_allocate_vec_register(gen->f); + int n128_vec = ppc_allocate_vec_register(gen->f); + + FETCH(gen, *inst, y_vec, 0, CHAN_Y); /* y_vec = src[0].y */ + ppc_vmaxfp(gen->f, y_vec, y_vec, zero_vec); /* y_vec = max(y_vec, 0) */ + + FETCH(gen, *inst, w_vec, 0, CHAN_W); /* w_vec = src[0].w */ + + /* clamp Y to [-128, 128] */ + load_constant_vec(gen, p128_vec, 128.0f); + load_constant_vec(gen, n128_vec, -128.0f); + ppc_vmaxfp(gen->f, y_vec, y_vec, n128_vec); /* y = max(y, -128) */ + ppc_vminfp(gen->f, y_vec, y_vec, p128_vec); /* y = min(y, 128) */ + + /* if temp.x > 0 + * z = pow(tmp.y, tmp.w) + * else + * z = 0.0 + */ + ppc_vec_pow(gen->f, pow_vec, y_vec, w_vec); /* pow = pow(y, w) */ + ppc_vcmpgtfpx(gen->f, pos_vec, x_vec, zero_vec); /* pos = x > 0 */ + ppc_vand(gen->f, z_vec, pow_vec, pos_vec); /* z = pow & pos */ + + STORE(gen, *inst, z_vec, 0, CHAN_Z); /* store Z */ + + ppc_release_vec_register(gen->f, y_vec); + ppc_release_vec_register(gen->f, z_vec); + ppc_release_vec_register(gen->f, w_vec); + ppc_release_vec_register(gen->f, pow_vec); + ppc_release_vec_register(gen->f, pos_vec); + ppc_release_vec_register(gen->f, p128_vec); + ppc_release_vec_register(gen->f, n128_vec); + } + + ppc_release_vec_register(gen->f, x_vec); + ppc_release_vec_register(gen->f, zero_vec); + } + + /* Compute W */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { + STORE(gen, *inst, one_vec, 0, CHAN_W); + } +} + + +static int +emit_instruction(struct gen_context *gen, + struct tgsi_full_instruction *inst) +{ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_ABS: + case TGSI_OPCODE_FLOOR: + case TGSI_OPCODE_FRAC: + case TGSI_OPCODE_EXPBASE2: + case TGSI_OPCODE_LOGBASE2: + emit_unaryop(gen, inst); + break; + case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_RCP: + emit_scalar_unaryop(gen, inst); + break; + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_SUB: + case TGSI_OPCODE_MUL: + case TGSI_OPCODE_MIN: + case TGSI_OPCODE_MAX: + emit_binop(gen, inst); + break; + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SNE: + case TGSI_OPCODE_SLT: + case TGSI_OPCODE_SGT: + case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SGE: + emit_inequality(gen, inst); + break; + case TGSI_OPCODE_MAD: + case TGSI_OPCODE_LRP: + emit_triop(gen, inst); + break; + case TGSI_OPCODE_DP3: + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_DPH: + emit_dotprod(gen, inst); + break; + case TGSI_OPCODE_LIT: + emit_lit(gen, inst); + break; + case TGSI_OPCODE_END: + /* normal end */ + return 1; + default: + return 0; + } + + + return 1; +} + +static void +emit_declaration( + struct ppc_function *func, + struct tgsi_full_declaration *decl ) +{ + if( decl->Declaration.File == TGSI_FILE_INPUT ) { +#if 0 + unsigned first, last, mask; + unsigned i, j; + + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + for( i = first; i <= last; i++ ) { + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + switch( decl->Declaration.Interpolate ) { + case TGSI_INTERPOLATE_CONSTANT: + emit_coef_a0( func, 0, i, j ); + emit_inputs( func, 0, i, j ); + break; + + case TGSI_INTERPOLATE_LINEAR: + emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); + emit_coef_dadx( func, 1, i, j ); + emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); + emit_coef_dady( func, 3, i, j ); + emit_mul( func, 0, 1 ); /* x * dadx */ + emit_coef_a0( func, 4, i, j ); + emit_mul( func, 2, 3 ); /* y * dady */ + emit_add( func, 0, 4 ); /* x * dadx + a0 */ + emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ + emit_inputs( func, 0, i, j ); + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); + emit_coef_dadx( func, 1, i, j ); + emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); + emit_coef_dady( func, 3, i, j ); + emit_mul( func, 0, 1 ); /* x * dadx */ + emit_tempf( func, 4, 0, TGSI_SWIZZLE_W ); + emit_coef_a0( func, 5, i, j ); + emit_rcp( func, 4, 4 ); /* 1.0 / w */ + emit_mul( func, 2, 3 ); /* y * dady */ + emit_add( func, 0, 5 ); /* x * dadx + a0 */ + emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ + emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */ + emit_inputs( func, 0, i, j ); + break; + + default: + assert( 0 ); + break; + } + } + } + } +#endif + } +} + + + +static void +emit_prologue(struct ppc_function *func) +{ + /* XXX set up stack frame */ +} + + +static void +emit_epilogue(struct ppc_function *func) +{ + ppc_return(func); + /* XXX restore prev stack frame */ +} + + + +/** + * Translate a TGSI vertex/fragment shader to PPC code. + * + * \param tokens the TGSI input shader + * \param func the output PPC code/function + * \param immediates buffer to place immediates, later passed to PPC func + * \return TRUE for success, FALSE if translation failed + */ +boolean +tgsi_emit_ppc(const struct tgsi_token *tokens, + struct ppc_function *func, + float (*immediates)[4], + boolean do_swizzles ) +{ + static int use_ppc_asm = -1; + struct tgsi_parse_context parse; + /*boolean instruction_phase = FALSE;*/ + unsigned ok = 1; + uint num_immediates = 0; + struct gen_context gen; + + if (use_ppc_asm < 0) { + /* If GALLIUM_NOPPC is set, don't use PPC codegen */ + use_ppc_asm = !debug_get_bool_option("GALLIUM_NOPPC", FALSE); + } + if (!use_ppc_asm) + return FALSE; + + util_init_math(); + + gen.f = func; + gen.inputs_reg = ppc_reserve_register(func, 3); /* first function param */ + gen.outputs_reg = ppc_reserve_register(func, 4); /* second function param */ + gen.temps_reg = ppc_reserve_register(func, 5); /* ... */ + gen.immed_reg = ppc_reserve_register(func, 6); + gen.const_reg = ppc_reserve_register(func, 7); + gen.builtins_reg = ppc_reserve_register(func, 8); + gen.one_vec = -1; + gen.bit31_vec = -1; + + emit_prologue(func); + + tgsi_parse_init( &parse, tokens ); + + while (!tgsi_parse_end_of_tokens(&parse) && ok) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + emit_declaration(func, &parse.FullToken.FullDeclaration ); + } + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + ok = emit_instruction(&gen, &parse.FullToken.FullInstruction); + + if (!ok) { + debug_printf("failed to translate tgsi opcode %d to PPC (%s)\n", + parse.FullToken.FullInstruction.Instruction.Opcode, + parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? + "vertex shader" : "fragment shader"); + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* splat each immediate component into a float[4] vector for SoA */ + { + const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; + float *imm = (float *) immediates; + uint i; + assert(size <= 4); + assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); + for (i = 0; i < size; i++) { + const float value = + parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; + imm[num_immediates * 4 + 0] = + imm[num_immediates * 4 + 1] = + imm[num_immediates * 4 + 2] = + imm[num_immediates * 4 + 3] = value; + num_immediates++; + } + } + break; + + default: + ok = 0; + assert( 0 ); + } + } + + emit_epilogue(func); + + tgsi_parse_free( &parse ); + + return ok; +} + +#endif /* PIPE_ARCH_PPC */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.h b/src/gallium/auxiliary/tgsi/tgsi_ppc.h new file mode 100644 index 0000000000..829ec075e7 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.h @@ -0,0 +1,51 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_PPC_H +#define TGSI_PPC_H + +#if defined __cplusplus +extern "C" { +#endif + +struct tgsi_token; +struct ppc_function; + +extern const float ppc_builtin_constants[]; + + +boolean +tgsi_emit_ppc(const struct tgsi_token *tokens, + struct ppc_function *function, + float (*immediates)[4], + boolean do_swizzles); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_PPC_H */ |