From be5d8bd07886157fe524b8715509cd03ade2fda9 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 12 Sep 2008 08:21:43 -0600 Subject: gallium: initial PPC/Altivec codegen --- src/gallium/auxiliary/rtasm/rtasm_ppc.h | 181 ++++++++++++++++++++++++++++++++ 1 file changed, 181 insertions(+) create mode 100644 src/gallium/auxiliary/rtasm/rtasm_ppc.h (limited to 'src/gallium/auxiliary/rtasm/rtasm_ppc.h') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h new file mode 100644 index 0000000000..ed14e943df --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -0,0 +1,181 @@ +/************************************************************************** + * + * Copyright (C) 2008 Tungsten Graphics, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * PPC code generation. + * \author Brian Paul + */ + + +#ifndef RTASM_PPC_H +#define RTASM_PPC_H + + +#include "pipe/p_compiler.h" + + +#define PPC_INST_SIZE 4 /**< 4 bytes / instruction */ + +#define PPC_NUM_VEC_REGS 32 + + +struct ppc_function +{ + uint32_t *store; /**< instruction buffer */ + uint num_inst; + uint max_inst; + uint32_t vec_used; /** used/free vector registers bitmask */ + uint32_t reg_used; /** used/free general-purpose registers bitmask */ +}; + + + +extern void ppc_init_func(struct ppc_function *p, unsigned max_inst); +extern void ppc_release_func(struct ppc_function *p); + +extern int ppc_allocate_vec_register(struct ppc_function *p, int reg); +extern void ppc_release_vec_register(struct ppc_function *p, int reg); + + +/** + ** float vector arithmetic + **/ + +/** vector float add */ +extern void +ppc_vaddfp(struct ppc_function *p,uint vD, uint vA, uint vB); + +/** vector float substract */ +extern void +ppc_vsubfp(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float min */ +extern void +ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float max */ +extern void +ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float mult add */ +extern void +ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC); + +/** vector float compare greater than */ +extern void +ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float compare greater than or equal to */ +extern void +ppc_vcmpgefpx(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float compare equal */ +extern void +ppc_vcmpeqfpx(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float 2^x */ +extern void +ppc_vexptefp(struct ppc_function *p, uint vD, uint vB); + +/** vector float log2(x) */ +extern void +ppc_vlogefp(struct ppc_function *p, uint vD, uint vB); + +/** vector float reciprocol */ +extern void +ppc_vrefp(struct ppc_function *p, uint vD, uint vB); + +/** vector float reciprocol sqrt estimate */ +extern void +ppc_vrsqrtefp(struct ppc_function *p, uint vD, uint vB); + +/** vector float round to negative infinity */ +extern void +ppc_vrfim(struct ppc_function *p, uint vD, uint vB); + +/** vector float round to positive infinity */ +extern void +ppc_vrfip(struct ppc_function *p, uint vD, uint vB); + +/** vector float round to nearest int */ +extern void +ppc_vrfin(struct ppc_function *p, uint vD, uint vB); + +/** vector float round to int toward zero */ +extern void +ppc_vrfiz(struct ppc_function *p, uint vD, uint vB); + + + +/** + ** bitwise operations + **/ + + +/** vector and */ +extern void +ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector and complement */ +extern void +ppc_vandc(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector or */ +extern void +ppc_vor(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector nor */ +extern void +ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector xor */ +extern void +ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB); + + +/** + ** Vector shuffle / select / splat / etc + **/ + +/** vector permute */ +extern void +ppc_vperm(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC); + +/** vector select */ +extern void +ppc_vsel(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC); + +/** vector splat byte */ +extern void +ppc_vspltb(struct ppc_function *p, uint vD, uint vB, uint imm); + +/** vector splat half word */ +extern void +ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm); + +/** vector splat word */ +extern void +ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm); + + +#endif /* RTASM_PPC_H */ -- cgit v1.2.3 From e0c6653a5fda956119239ef921daf1e3b950dfc8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 10:35:38 -0600 Subject: cell: implement many more PPC instructions for code gen --- src/gallium/auxiliary/rtasm/Makefile | 1 + src/gallium/auxiliary/rtasm/rtasm_ppc.c | 603 ++++++++++++++++++++++++++++++-- src/gallium/auxiliary/rtasm/rtasm_ppc.h | 141 +++++++- 3 files changed, 704 insertions(+), 41 deletions(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_ppc.h') diff --git a/src/gallium/auxiliary/rtasm/Makefile b/src/gallium/auxiliary/rtasm/Makefile index 39b8a4dbd7..252dc5274a 100644 --- a/src/gallium/auxiliary/rtasm/Makefile +++ b/src/gallium/auxiliary/rtasm/Makefile @@ -7,6 +7,7 @@ C_SOURCES = \ rtasm_cpu.c \ rtasm_execmem.c \ rtasm_x86sse.c \ + rtasm_ppc.c \ rtasm_ppc_spe.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index 534a23568d..4a94ed0460 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -23,10 +23,19 @@ /** * PPC code generation. + * For reference, see http://www.power.org/resources/reading/PowerISA_V2.05.pdf + * ABI info: http://www.cs.utsa.edu/~whaley/teach/cs6463FHPO/LEC/lec12_ho.pdf + * + * Other PPC refs: + * http://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/852569B20050FF778525699600719DF2 + * http://www.ibm.com/developerworks/eserver/library/es-archguide-v2.html + * http://www.freescale.com/files/product/doc/MPCFPE32B.pdf + * * \author Brian Paul */ +#include #include "util/u_memory.h" #include "pipe/p_debug.h" #include "rtasm_ppc.h" @@ -35,30 +44,125 @@ void ppc_init_func(struct ppc_function *p, unsigned max_inst) { - p->store = align_malloc(max_inst * PPC_INST_SIZE, 16); - p->num_inst = 0; - p->max_inst = max_inst; - p->vec_used = ~0; + uint i; + + p->store = align_malloc(max_inst * PPC_INST_SIZE, 16); + p->num_inst = 0; + p->max_inst = max_inst; + p->fp_used = ~0x0; + p->vec_used = ~0x0; + + /* only allow using gp registers 7..12 for now */ + p->reg_used = 0x0; + for (i = 7; i < 13; i++) + p->reg_used |= (1 << i); } void ppc_release_func(struct ppc_function *p) { - assert(p->num_inst <= p->max_inst); - if (p->store != NULL) { - align_free(p->store); - } - p->store = NULL; + assert(p->num_inst <= p->max_inst); + if (p->store != NULL) { + align_free(p->store); + } + p->store = NULL; +} + + +void (*ppc_get_func(struct ppc_function *p))(void) +{ +#if 0 + DUMP_END(); + if (DISASSEM && p->store) + debug_printf("disassemble %p %p\n", p->store, p->csr); + + if (p->store == p->error_overflow) + return (void (*)(void)) NULL; + else +#endif + return (void (*)(void)) p->store; +} + + +void +ppc_dump_func(const struct ppc_function *p) +{ + uint i; + for (i = 0; i < p->num_inst; i++) { + debug_printf("%3u: 0x%08x\n", i, p->store[i]); + } +} + + +/** + * Allocate a general purpose register. + * \return register index or -1 if none left. + */ +int +ppc_allocate_register(struct ppc_function *p) +{ + unsigned i; + for (i = 0; i < PPC_NUM_REGS; i++) { + const uint64_t mask = 1 << i; + if ((p->reg_used & mask) != 0) { + p->reg_used &= ~mask; + return i; + } + } + return -1; } /** - * Alloate a vector register. + * Mark the given general purpose register as "unallocated". + */ +void +ppc_release_register(struct ppc_function *p, int reg) +{ + assert(reg < PPC_NUM_REGS); + assert((p->reg_used & (1 << reg)) == 0); + p->reg_used |= (1 << reg); +} + + +/** + * Allocate a floating point register. * \return register index or -1 if none left. */ int -ppc_allocate_vec_register(struct ppc_function *p, int reg) +ppc_allocate_fp_register(struct ppc_function *p) +{ + unsigned i; + for (i = 0; i < PPC_NUM_FP_REGS; i++) { + const uint64_t mask = 1 << i; + if ((p->fp_used & mask) != 0) { + p->fp_used &= ~mask; + return i; + } + } + return -1; +} + + +/** + * Mark the given floating point register as "unallocated". + */ +void +ppc_release_fp_register(struct ppc_function *p, int reg) +{ + assert(reg < PPC_NUM_FP_REGS); + assert((p->fp_used & (1 << reg)) == 0); + p->fp_used |= (1 << reg); +} + + +/** + * Allocate a vector register. + * \return register index or -1 if none left. + */ +int +ppc_allocate_vec_register(struct ppc_function *p) { unsigned i; for (i = 0; i < PPC_NUM_VEC_REGS; i++) { @@ -68,7 +172,6 @@ ppc_allocate_vec_register(struct ppc_function *p, int reg) return i; } } - return -1; } @@ -81,7 +184,6 @@ ppc_release_vec_register(struct ppc_function *p, int reg) { assert(reg < PPC_NUM_VEC_REGS); assert((p->vec_used & (1 << reg)) == 0); - p->vec_used |= (1 << reg); } @@ -98,6 +200,20 @@ union vx_inst { } inst; }; +static inline void +emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) +{ + union vx_inst inst; + inst.inst.op = 4; + inst.inst.vD = vD; + inst.inst.vA = vA; + inst.inst.vB = vB; + inst.inst.op2 = op2; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +}; + + union vxr_inst { uint32_t bits; struct { @@ -110,6 +226,21 @@ union vxr_inst { } inst; }; +static inline void +emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) +{ + union vxr_inst inst; + inst.inst.op = 4; + inst.inst.vD = vD; + inst.inst.vA = vA; + inst.inst.vB = vB; + inst.inst.rC = 0; + inst.inst.op2 = op2; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +}; + + union va_inst { uint32_t bits; struct { @@ -122,49 +253,204 @@ union va_inst { } inst; }; - static inline void -emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) +emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC) { - union vx_inst inst; + union va_inst inst; inst.inst.op = 4; inst.inst.vD = vD; inst.inst.vA = vA; inst.inst.vB = vB; + inst.inst.vC = vC; inst.inst.op2 = op2; p->store[p->num_inst++] = inst.bits; assert(p->num_inst <= p->max_inst); }; -static inline void -emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) + +union i_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned li:24; + unsigned aa:1; + unsigned lk:1; + } inst; +}; + +static INLINE void +emit_i(struct ppc_function *p, uint op, uint li, uint aa, uint lk) { - union vxr_inst inst; - inst.inst.op = 4; - inst.inst.vD = vD; - inst.inst.vA = vA; - inst.inst.vB = vB; - inst.inst.rC = 0; + union i_inst inst; + inst.inst.op = op; + inst.inst.li = li; + inst.inst.aa = aa; + inst.inst.lk = lk; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +} + + +union xl_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned bo:5; + unsigned bi:5; + unsigned unused:3; + unsigned bh:2; + unsigned op2:10; + unsigned lk:1; + } inst; +}; + +static INLINE void +emit_xl(struct ppc_function *p, uint op, uint bo, uint bi, uint bh, + uint op2, uint lk) +{ + union xl_inst inst; + inst.inst.op = op; + inst.inst.bo = bo; + inst.inst.bi = bi; + inst.inst.unused = 0x0; + inst.inst.bh = bh; inst.inst.op2 = op2; + inst.inst.lk = lk; p->store[p->num_inst++] = inst.bits; assert(p->num_inst <= p->max_inst); +} + +static INLINE void +dump_xl(const char *name, uint inst) +{ + union xl_inst i; + + i.bits = inst; + debug_printf("%s = 0x%08x\n", name, inst); + debug_printf(" op: %d 0x%x\n", i.inst.op, i.inst.op); + debug_printf(" bo: %d 0x%x\n", i.inst.bo, i.inst.bo); + debug_printf(" bi: %d 0x%x\n", i.inst.bi, i.inst.bi); + debug_printf(" unused: %d 0x%x\n", i.inst.unused, i.inst.unused); + debug_printf(" bh: %d 0x%x\n", i.inst.bh, i.inst.bh); + debug_printf(" op2: %d 0x%x\n", i.inst.op2, i.inst.op2); + debug_printf(" lk: %d 0x%x\n", i.inst.lk, i.inst.lk); +} + + +union x_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned vrs:5; + unsigned ra:5; + unsigned rb:5; + unsigned op2:10; + unsigned unused:1; + } inst; }; -static inline void -emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC) +static INLINE void +emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2) { - union va_inst inst; - inst.inst.op = 4; - inst.inst.vD = vD; - inst.inst.vA = vA; - inst.inst.vB = vB; - inst.inst.vC = vC; + union x_inst inst; + inst.inst.op = op; + inst.inst.vrs = vrs; + inst.inst.ra = ra; + inst.inst.rb = rb; inst.inst.op2 = op2; + inst.inst.unused = 0x0; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +} + + +union d_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned rt:5; + unsigned ra:5; + unsigned si:16; + } inst; +}; + +static inline void +emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si) +{ + union d_inst inst; + assert(si >= -32768); + assert(si <= 32767); + inst.inst.op = op; + inst.inst.rt = rt; + inst.inst.ra = ra; + inst.inst.si = (unsigned) (si & 0xffff); p->store[p->num_inst++] = inst.bits; assert(p->num_inst <= p->max_inst); }; +union a_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned frt:5; + unsigned fra:5; + unsigned frb:5; + unsigned unused:5; + unsigned op2:5; + unsigned rc:1; + } inst; +}; + +static inline void +emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2, + uint rc) +{ + union a_inst inst; + inst.inst.op = op; + inst.inst.frt = frt; + inst.inst.fra = fra; + inst.inst.frb = frb; + inst.inst.unused = 0x0; + inst.inst.op2 = op2; + inst.inst.rc = rc; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +}; + + +union xo_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned rt:5; + unsigned ra:5; + unsigned rb:5; + unsigned oe:1; + unsigned op2:9; + unsigned rc:1; + } inst; +}; + +static INLINE void +emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe, + uint op2, uint rc) +{ + union xo_inst inst; + inst.inst.op = op; + inst.inst.rt = rt; + inst.inst.ra = ra; + inst.inst.rb = rb; + inst.inst.oe = oe; + inst.inst.op2 = op2; + inst.inst.rc = rc; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +} + + + + /** ** float vector arithmetic @@ -172,7 +458,7 @@ emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC) /** vector float add */ void -ppc_vaddfp(struct ppc_function *p,uint vD, uint vA, uint vB) +ppc_vaddfp(struct ppc_function *p, uint vD, uint vA, uint vB) { emit_vx(p, 10, vD, vA, vB); } @@ -198,11 +484,11 @@ ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB) emit_vx(p, 1034, vD, vA, vB); } -/** vector float mult add */ +/** vector float mult add: vD = vA * vB + vC */ void ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) { - emit_va(p, 46, vD, vA, vB, vC); + emit_va(p, 46, vD, vA, vC, vB); /* note arg order */ } /** vector float compare greater than */ @@ -282,13 +568,26 @@ ppc_vrfiz(struct ppc_function *p, uint vD, uint vB) emit_vx(p, 586, vD, 0, vB); } +/** vector store: store vR at mem[vA+vB] */ +void +ppc_stvx(struct ppc_function *p, uint vR, uint vA, uint vB) +{ + emit_x(p, 31, vR, vA, vB, 231); +} + +/** vector load: vR = mem[vA+vB] */ +void +ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB) +{ + emit_x(p, 31, vR, vA, vB, 103); +} + /** - ** bitwise operations + ** vector bitwise operations **/ - /** vector and */ void ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB) @@ -324,6 +623,14 @@ ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB) emit_vx(p, 1220, vD, vA, vB); } +/** Pseudo-instruction: vector move */ +void +ppc_vecmove(struct ppc_function *p, uint vD, uint vA) +{ + ppc_vor(p, vD, vA, vA); +} + + /** ** Vector shuffle / select / splat / etc @@ -363,3 +670,225 @@ ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm) { emit_vx(p, 652, vD, imm, vB); } + +/** vector splat signed immediate word */ +void +ppc_vspltisw(struct ppc_function *p, uint vD, int imm) +{ + assert(imm >= -16); + assert(imm < 15); + emit_vx(p, 908, vD, imm, 0); +} + +/** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */ +void +ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 388, vD, vA, vB); +} + + + + +/** + ** integer arithmetic + **/ + +/** rt = ra + imm */ +void +ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm) +{ + emit_d(p, 14, rt, ra, imm); +} + +/** rt = ra + (imm << 16) */ +void +ppc_addis(struct ppc_function *p, uint rt, uint ra, int imm) +{ + emit_d(p, 15, rt, ra, imm); +} + +/** rt = ra + rb */ +void +ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb) +{ + emit_xo(p, 31, rt, ra, rb, 0, 266, 0); +} + +/** rt = ra AND ra */ +void +ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb) +{ + emit_x(p, 31, ra, rt, rb, 28); /* note argument order */ +} + +/** rt = ra AND imm */ +void +ppc_andi(struct ppc_function *p, uint rt, uint ra, int imm) +{ + emit_d(p, 28, ra, rt, imm); /* note argument order */ +} + +/** rt = ra OR ra */ +void +ppc_or(struct ppc_function *p, uint rt, uint ra, uint rb) +{ + emit_x(p, 31, ra, rt, rb, 444); /* note argument order */ +} + +/** rt = ra OR imm */ +void +ppc_ori(struct ppc_function *p, uint rt, uint ra, int imm) +{ + emit_d(p, 24, ra, rt, imm); /* note argument order */ +} + +/** rt = ra XOR ra */ +void +ppc_xor(struct ppc_function *p, uint rt, uint ra, uint rb) +{ + emit_x(p, 31, ra, rt, rb, 316); /* note argument order */ +} + +/** rt = ra XOR imm */ +void +ppc_xori(struct ppc_function *p, uint rt, uint ra, int imm) +{ + emit_d(p, 26, ra, rt, imm); /* note argument order */ +} + +/** pseudo instruction: move: rt = ra */ +void +ppc_mr(struct ppc_function *p, uint rt, uint ra) +{ + ppc_or(p, rt, ra, ra); +} + +/** pseudo instruction: load immediate: rt = imm */ +void +ppc_li(struct ppc_function *p, uint rt, int imm) +{ + ppc_addi(p, rt, 0, imm); +} + +/** rt = imm << 16 */ +void +ppc_lis(struct ppc_function *p, uint rt, int imm) +{ + ppc_addis(p, rt, 0, imm); +} + +/** rt = imm */ +void +ppc_load_int(struct ppc_function *p, uint rt, int imm) +{ + ppc_lis(p, rt, (imm >> 16)); /* rt = imm >> 16 */ + ppc_ori(p, rt, rt, (imm & 0xffff)); /* rt = rt | (imm & 0xffff) */ +} + + + + +/** + ** integer load/store + **/ + +/** store rs at memory[(ra)+d], + * then update ra = (ra)+d + */ +void +ppc_stwu(struct ppc_function *p, uint rs, uint ra, int d) +{ + emit_d(p, 37, rs, ra, d); +} + +/** store rs at memory[(ra)+d] */ +void +ppc_stw(struct ppc_function *p, uint rs, uint ra, int d) +{ + emit_d(p, 36, rs, ra, d); +} + +/** Load rt = mem[(ra)+d]; then zero set high 32 bits to zero. */ +void +ppc_lwz(struct ppc_function *p, uint rt, uint ra, int d) +{ + emit_d(p, 32, rt, ra, d); +} + + + +/** + ** Float (non-vector) arithmetic + **/ + +/** add: frt = fra + frb */ +void +ppc_fadd(struct ppc_function *p, uint frt, uint fra, uint frb) +{ + emit_a(p, 63, frt, fra, frb, 21, 0); +} + +/** sub: frt = fra - frb */ +void +ppc_fsub(struct ppc_function *p, uint frt, uint fra, uint frb) +{ + emit_a(p, 63, frt, fra, frb, 20, 0); +} + +/** convert to int: rt = (int) ra */ +void +ppc_fctiwz(struct ppc_function *p, uint rt, uint fra) +{ + emit_x(p, 63, rt, 0, fra, 15); +} + +/** store frs at mem[(ra)+offset] */ +void +ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset) +{ + emit_d(p, 52, frs, ra, offset); +} + +/** store frs at mem[(ra)+(rb)] */ +void +ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb) +{ + emit_x(p, 31, frs, ra, rb, 983); +} + +/** load frt = mem[(ra)+offset] */ +void +ppc_lfs(struct ppc_function *p, uint frt, uint ra, int offset) +{ + emit_d(p, 48, frt, ra, offset); +} + + + + + +/** + ** branch instructions + **/ + +/** BLR: Branch to link register (p. 35) */ +void +ppc_blr(struct ppc_function *p) +{ + emit_i(p, 18, 0, 0, 1); +} + +/** Branch Conditional to Link Register (p. 36) */ +void +ppc_bclr(struct ppc_function *p, uint condOp, uint branchHint, uint condReg) +{ + emit_xl(p, 19, condOp, condReg, branchHint, 16, 0); +} + +/** Pseudo instruction: return from subroutine */ +void +ppc_return(struct ppc_function *p) +{ + ppc_bclr(p, BRANCH_COND_ALWAYS, BRANCH_HINT_SUB_RETURN, 0); +} diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index ed14e943df..6370b60494 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -36,27 +36,46 @@ #define PPC_INST_SIZE 4 /**< 4 bytes / instruction */ +#define PPC_NUM_REGS 32 +#define PPC_NUM_FP_REGS 32 #define PPC_NUM_VEC_REGS 32 +/** Stack pointer register */ +#define PPC_REG_SP 1 + +/** Branch conditions */ +#define BRANCH_COND_ALWAYS 0x14 /* binary 1z1zz (z=ignored) */ + +/** Branch hints */ +#define BRANCH_HINT_SUB_RETURN 0x0 /* binary 00 */ + struct ppc_function { uint32_t *store; /**< instruction buffer */ uint num_inst; uint max_inst; - uint32_t vec_used; /** used/free vector registers bitmask */ uint32_t reg_used; /** used/free general-purpose registers bitmask */ + uint32_t fp_used; /** used/free floating point registers bitmask */ + uint32_t vec_used; /** used/free vector registers bitmask */ }; extern void ppc_init_func(struct ppc_function *p, unsigned max_inst); extern void ppc_release_func(struct ppc_function *p); - -extern int ppc_allocate_vec_register(struct ppc_function *p, int reg); +extern void (*ppc_get_func( struct ppc_function *p ))( void ); +extern void ppc_dump_func(const struct ppc_function *p); + +extern int ppc_allocate_register(struct ppc_function *p); +extern void ppc_release_register(struct ppc_function *p, int reg); +extern int ppc_allocate_fp_register(struct ppc_function *p); +extern void ppc_release_fp_register(struct ppc_function *p, int reg); +extern int ppc_allocate_vec_register(struct ppc_function *p); extern void ppc_release_vec_register(struct ppc_function *p, int reg); + /** ** float vector arithmetic **/ @@ -126,9 +145,18 @@ extern void ppc_vrfiz(struct ppc_function *p, uint vD, uint vB); +/** vector store: store vR at mem[vA+vB] */ +extern void +ppc_stvx(struct ppc_function *p, uint vR, uint vA, uint vB); + +/** vector load: vR = mem[vA+vB] */ +extern void +ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB); + + /** - ** bitwise operations + ** vector bitwise operations **/ @@ -152,6 +180,10 @@ ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB); extern void ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB); +/** Pseudo-instruction: vector move */ +extern void +ppc_vecmove(struct ppc_function *p, uint vD, uint vA); + /** ** Vector shuffle / select / splat / etc @@ -177,5 +209,106 @@ ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm); extern void ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm); +/** vector splat signed immediate word */ +extern void +ppc_vspltisw(struct ppc_function *p, uint vD, int imm); + +/** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */ +extern void +ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB); + + + +/** + ** scalar arithmetic + **/ + +extern void +ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb); + +extern void +ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb); + +extern void +ppc_andi(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_or(struct ppc_function *p, uint rt, uint ra, uint rb); + +extern void +ppc_ori(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_xor(struct ppc_function *p, uint rt, uint ra, uint rb); + +extern void +ppc_xori(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_mr(struct ppc_function *p, uint rt, uint ra); + +extern void +ppc_li(struct ppc_function *p, uint rt, int imm); + +extern void +ppc_lis(struct ppc_function *p, uint rt, int imm); + +extern void +ppc_load_int(struct ppc_function *p, uint rt, int imm); + + + +/** + ** scalar load/store + **/ + +extern void +ppc_stwu(struct ppc_function *p, uint rs, uint ra, int d); + +extern void +ppc_stw(struct ppc_function *p, uint rs, uint ra, int d); + +extern void +ppc_lwz(struct ppc_function *p, uint rs, uint ra, int d); + + + +/** + ** Float (non-vector) arithmetic + **/ + +extern void +ppc_fadd(struct ppc_function *p, uint frt, uint fra, uint frb); + +extern void +ppc_fsub(struct ppc_function *p, uint frt, uint fra, uint frb); + +extern void +ppc_fctiwz(struct ppc_function *p, uint rt, uint ra); + +extern void +ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset); + +extern void +ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb); + + + +/** + ** branch instructions + **/ + +extern void +ppc_blr(struct ppc_function *p); + +void +ppc_bclr(struct ppc_function *p, uint condOp, uint branchHint, uint condReg); + +extern void +ppc_return(struct ppc_function *p); + #endif /* RTASM_PPC_H */ -- cgit v1.2.3 From 049f57f86a2cb8ff08fba819c581a034ca7ea52c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 11:06:39 -0600 Subject: gallium: added ppc_lvewx() --- src/gallium/auxiliary/rtasm/rtasm_ppc.c | 7 +++++++ src/gallium/auxiliary/rtasm/rtasm_ppc.h | 4 ++++ 2 files changed, 11 insertions(+) (limited to 'src/gallium/auxiliary/rtasm/rtasm_ppc.h') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index 4a94ed0460..aaec2d2191 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -582,6 +582,13 @@ ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB) emit_x(p, 31, vR, vA, vB, 103); } +/** load vector element word: vR = mem_word[vA+vB] */ +void +ppc_lvewx(struct ppc_function *p, uint vR, uint vA, uint vB) +{ + emit_x(p, 31, vR, vA, vB, 71); +} + /** diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index 6370b60494..53d5746dc8 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -153,6 +153,10 @@ ppc_stvx(struct ppc_function *p, uint vR, uint vA, uint vB); extern void ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB); +/** load vector element word: vR = mem_word[vA+vB] */ +extern void +ppc_lvewx(struct ppc_function *p, uint vR, uint vA, uint vB); + /** -- cgit v1.2.3 From ebdc399d83d6bd2f4e3594874483dbca5f9f5c0e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 13:57:56 -0600 Subject: gallium: fix-up confusing register allocation masks in rtasm_ppc.c Plus, add ppc_reserve_register() func. --- src/gallium/auxiliary/rtasm/rtasm_ppc.c | 56 ++++++++++++++++++++------------- src/gallium/auxiliary/rtasm/rtasm_ppc.h | 1 + 2 files changed, 36 insertions(+), 21 deletions(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_ppc.h') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index aaec2d2191..2d9f4e079e 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -49,13 +49,15 @@ ppc_init_func(struct ppc_function *p, unsigned max_inst) p->store = align_malloc(max_inst * PPC_INST_SIZE, 16); p->num_inst = 0; p->max_inst = max_inst; - p->fp_used = ~0x0; - p->vec_used = ~0x0; - - /* only allow using gp registers 7..12 for now */ p->reg_used = 0x0; - for (i = 7; i < 13; i++) - p->reg_used |= (1 << i); + p->fp_used = 0x0; + p->vec_used = 0x0; + + /* only allow using gp registers 3..12 for now */ + for (i = 0; i < 3; i++) + ppc_reserve_register(p, i); + for (i = 12; i < PPC_NUM_REGS; i++) + ppc_reserve_register(p, i); } @@ -95,6 +97,18 @@ ppc_dump_func(const struct ppc_function *p) } +/** + * Mark a register as being unavailable. + */ +int +ppc_reserve_register(struct ppc_function *p, int reg) +{ + assert(reg < PPC_NUM_REGS); + p->reg_used |= (1 << reg); + return reg; +} + + /** * Allocate a general purpose register. * \return register index or -1 if none left. @@ -105,8 +119,8 @@ ppc_allocate_register(struct ppc_function *p) unsigned i; for (i = 0; i < PPC_NUM_REGS; i++) { const uint64_t mask = 1 << i; - if ((p->reg_used & mask) != 0) { - p->reg_used &= ~mask; + if ((p->reg_used & mask) == 0) { + p->reg_used |= mask; return i; } } @@ -121,8 +135,8 @@ void ppc_release_register(struct ppc_function *p, int reg) { assert(reg < PPC_NUM_REGS); - assert((p->reg_used & (1 << reg)) == 0); - p->reg_used |= (1 << reg); + assert(p->reg_used & (1 << reg)); + p->reg_used &= ~(1 << reg); } @@ -136,8 +150,8 @@ ppc_allocate_fp_register(struct ppc_function *p) unsigned i; for (i = 0; i < PPC_NUM_FP_REGS; i++) { const uint64_t mask = 1 << i; - if ((p->fp_used & mask) != 0) { - p->fp_used &= ~mask; + if ((p->fp_used & mask) == 0) { + p->fp_used |= mask; return i; } } @@ -152,8 +166,8 @@ void ppc_release_fp_register(struct ppc_function *p, int reg) { assert(reg < PPC_NUM_FP_REGS); - assert((p->fp_used & (1 << reg)) == 0); - p->fp_used |= (1 << reg); + assert(p->fp_used & (1 << reg)); + p->fp_used &= ~(1 << reg); } @@ -167,8 +181,8 @@ ppc_allocate_vec_register(struct ppc_function *p) unsigned i; for (i = 0; i < PPC_NUM_VEC_REGS; i++) { const uint64_t mask = 1 << i; - if ((p->vec_used & mask) != 0) { - p->vec_used &= ~mask; + if ((p->vec_used & mask) == 0) { + p->vec_used |= mask; return i; } } @@ -183,8 +197,8 @@ void ppc_release_vec_register(struct ppc_function *p, int reg) { assert(reg < PPC_NUM_VEC_REGS); - assert((p->vec_used & (1 << reg)) == 0); - p->vec_used |= (1 << reg); + assert(p->vec_used & (1 << reg)); + p->vec_used &= ~(1 << reg); } @@ -582,11 +596,11 @@ ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB) emit_x(p, 31, vR, vA, vB, 103); } -/** load vector element word: vR = mem_word[vA+vB] */ +/** load vector element word: vR = mem_word[ra+rb] */ void -ppc_lvewx(struct ppc_function *p, uint vR, uint vA, uint vB) +ppc_lvewx(struct ppc_function *p, uint vr, uint ra, uint rb) { - emit_x(p, 31, vR, vA, vB, 71); + emit_x(p, 31, vr, ra, rb, 71); } diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index 53d5746dc8..85679b4886 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -67,6 +67,7 @@ extern void ppc_release_func(struct ppc_function *p); extern void (*ppc_get_func( struct ppc_function *p ))( void ); extern void ppc_dump_func(const struct ppc_function *p); +extern int ppc_reserve_register(struct ppc_function *p, int reg); extern int ppc_allocate_register(struct ppc_function *p); extern void ppc_release_register(struct ppc_function *p, int reg); extern int ppc_allocate_fp_register(struct ppc_function *p); -- cgit v1.2.3 From b06d0720194dfecaf45dc97cbd178411aed5205f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 14:48:33 -0600 Subject: gallium: added ppc_vload_float(), for limited cases --- src/gallium/auxiliary/rtasm/rtasm_ppc.c | 18 ++++++++++++++++++ src/gallium/auxiliary/rtasm/rtasm_ppc.h | 4 ++++ 2 files changed, 22 insertions(+) (limited to 'src/gallium/auxiliary/rtasm/rtasm_ppc.h') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index 2d9f4e079e..65df676eae 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -603,6 +603,24 @@ ppc_lvewx(struct ppc_function *p, uint vr, uint ra, uint rb) emit_x(p, 31, vr, ra, rb, 71); } +/** vector load float: vr = splats(imm) */ +void +ppc_vload_float(struct ppc_function *p, uint vr, float imm) +{ + if (imm == 0.0f) { + ppc_vxor(p, vr, vr, vr); + } + else if (imm == 1.0f) { + /* use 2^0=1 to get 1.0 */ + ppc_vxor(p, vr, vr, vr); /* vr = {0,0,0,0} */ + ppc_vexptefp(p, vr, vr); /* vr = 0^0 */ + } + else { + assert(0); + } +} + + /** diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index 85679b4886..9f1e3fcd84 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -158,6 +158,10 @@ ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB); extern void ppc_lvewx(struct ppc_function *p, uint vR, uint vA, uint vB); +/** vector load float: vr = splats(imm) */ +extern void +ppc_vload_float(struct ppc_function *p, uint vr, float imm); + /** -- cgit v1.2.3 From 3026616c48487a7561d8545c08950539f0ad51d1 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 17:17:11 -0600 Subject: gallium: added ppc_vzero() --- src/gallium/auxiliary/rtasm/rtasm_ppc.c | 8 ++++++++ src/gallium/auxiliary/rtasm/rtasm_ppc.h | 5 +++++ 2 files changed, 13 insertions(+) (limited to 'src/gallium/auxiliary/rtasm/rtasm_ppc.h') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index 65df676eae..51d9b53657 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -669,6 +669,14 @@ ppc_vecmove(struct ppc_function *p, uint vD, uint vA) ppc_vor(p, vD, vA, vA); } +/** Set vector register to {0,0,0,0} */ +void +ppc_vzero(struct ppc_function *p, uint vr) +{ + ppc_vxor(p, vr, vr, vr); +} + + /** diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index 9f1e3fcd84..f194d3be13 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -193,6 +193,11 @@ ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB); extern void ppc_vecmove(struct ppc_function *p, uint vD, uint vA); +/** Set vector register to {0,0,0,0} */ +extern void +ppc_vzero(struct ppc_function *p, uint vr); + + /** ** Vector shuffle / select / splat / etc -- cgit v1.2.3 From f8ab4feb75f4a592e23859813c093dcdbd4b8988 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 17:21:43 -0600 Subject: gallium: remove ppc_vload_float(), rename ppc_vecmove() -> ppc_vmove(). --- src/gallium/auxiliary/rtasm/rtasm_ppc.c | 19 +------------------ src/gallium/auxiliary/rtasm/rtasm_ppc.h | 6 +----- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 2 +- 3 files changed, 3 insertions(+), 24 deletions(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_ppc.h') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index 51d9b53657..7dd8263749 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -603,23 +603,6 @@ ppc_lvewx(struct ppc_function *p, uint vr, uint ra, uint rb) emit_x(p, 31, vr, ra, rb, 71); } -/** vector load float: vr = splats(imm) */ -void -ppc_vload_float(struct ppc_function *p, uint vr, float imm) -{ - if (imm == 0.0f) { - ppc_vxor(p, vr, vr, vr); - } - else if (imm == 1.0f) { - /* use 2^0=1 to get 1.0 */ - ppc_vxor(p, vr, vr, vr); /* vr = {0,0,0,0} */ - ppc_vexptefp(p, vr, vr); /* vr = 0^0 */ - } - else { - assert(0); - } -} - @@ -664,7 +647,7 @@ ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB) /** Pseudo-instruction: vector move */ void -ppc_vecmove(struct ppc_function *p, uint vD, uint vA) +ppc_vmove(struct ppc_function *p, uint vD, uint vA) { ppc_vor(p, vD, vA, vA); } diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index f194d3be13..f938d8d759 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -158,10 +158,6 @@ ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB); extern void ppc_lvewx(struct ppc_function *p, uint vR, uint vA, uint vB); -/** vector load float: vr = splats(imm) */ -extern void -ppc_vload_float(struct ppc_function *p, uint vr, float imm); - /** @@ -191,7 +187,7 @@ ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB); /** Pseudo-instruction: vector move */ extern void -ppc_vecmove(struct ppc_function *p, uint vD, uint vA); +ppc_vmove(struct ppc_function *p, uint vD, uint vA); /** Set vector register to {0,0,0,0} */ extern void diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 6b05fd16cf..96beec0cc6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -229,7 +229,7 @@ emit_fetch(struct gen_context *gen, case TGSI_EXTSWIZZLE_ONE: { int one_vec = gen_one_vec(gen); - ppc_vecmove(gen->f, dst_vec, one_vec); + ppc_vmove(gen->f, dst_vec, one_vec); } break; default: -- cgit v1.2.3 From 7640264064c2cbc9922f7f3df51f7caa7b449e8e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 29 Oct 2008 11:03:51 -0600 Subject: gallium: added ppc_vnmsubfp() --- src/gallium/auxiliary/rtasm/rtasm_ppc.c | 7 +++++++ src/gallium/auxiliary/rtasm/rtasm_ppc.h | 6 +++++- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_ppc.h') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index 7dd8263749..a90b5587b0 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -505,6 +505,13 @@ ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) emit_va(p, 46, vD, vA, vC, vB); /* note arg order */ } +/** vector float negative mult subtract: vD = vA - vB * vC */ +void +ppc_vnmsubfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) +{ + emit_va(p, 47, vD, vB, vA, vC); /* note arg order */ +} + /** vector float compare greater than */ void ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB) diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index f938d8d759..561e139bce 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -97,10 +97,14 @@ ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB); extern void ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB); -/** vector float mult add */ +/** vector float mult add: vD = vA * vB + vC */ extern void ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC); +/** vector float negative mult subtract: vD = vA - vB * vC */ +extern void +ppc_vnmsubfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC); + /** vector float compare greater than */ extern void ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB); -- cgit v1.2.3 From a5d920297a2affe34c535d30a2c49588f92f69ad Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 29 Oct 2008 16:26:10 -0600 Subject: gallium: use execmem for PPC code, grow instruction buffer as needed --- src/gallium/auxiliary/rtasm/rtasm_ppc.c | 70 +++++++++++++++++++++++---------- src/gallium/auxiliary/rtasm/rtasm_ppc.h | 1 + src/gallium/auxiliary/tgsi/tgsi_ppc.c | 8 ++++ 3 files changed, 58 insertions(+), 21 deletions(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_ppc.h') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index a90b5587b0..e73ed71a0b 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -38,6 +38,7 @@ #include #include "util/u_memory.h" #include "pipe/p_debug.h" +#include "rtasm_execmem.h" #include "rtasm_ppc.h" @@ -46,9 +47,9 @@ ppc_init_func(struct ppc_function *p, unsigned max_inst) { uint i; - p->store = align_malloc(max_inst * PPC_INST_SIZE, 16); p->num_inst = 0; - p->max_inst = max_inst; + p->max_inst = 100; /* first guess at buffer size */ + p->store = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE); p->reg_used = 0x0; p->fp_used = 0x0; p->vec_used = 0x0; @@ -66,12 +67,19 @@ ppc_release_func(struct ppc_function *p) { assert(p->num_inst <= p->max_inst); if (p->store != NULL) { - align_free(p->store); + rtasm_exec_free(p->store); } p->store = NULL; } +uint +ppc_num_instructions(const struct ppc_function *p) +{ + return p->num_inst; +} + + void (*ppc_get_func(struct ppc_function *p))(void) { #if 0 @@ -202,6 +210,35 @@ ppc_release_vec_register(struct ppc_function *p, int reg) } +/** + * Append instruction to instruction buffer. Grow buffer if out of room. + */ +static void +emit_instruction(struct ppc_function *p, uint32_t inst_bits) +{ + if (!p->store) + return; /* out of memory, drop the instruction */ + + if (p->num_inst == p->max_inst) { + /* allocate larger buffer */ + uint32_t *newbuf; + p->max_inst *= 2; /* 2x larger */ + newbuf = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE); + if (newbuf) { + memcpy(newbuf, p->store, p->num_inst * PPC_INST_SIZE); + } + rtasm_exec_free(p->store); + p->store = newbuf; + if (!p->store) { + /* out of memory */ + p->num_inst = 0; + return; + } + } + + p->store[p->num_inst++] = inst_bits; +} + union vx_inst { uint32_t bits; @@ -223,8 +260,7 @@ emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) inst.inst.vA = vA; inst.inst.vB = vB; inst.inst.op2 = op2; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); }; @@ -250,8 +286,7 @@ emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) inst.inst.vB = vB; inst.inst.rC = 0; inst.inst.op2 = op2; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); }; @@ -277,8 +312,7 @@ emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC) inst.inst.vB = vB; inst.inst.vC = vC; inst.inst.op2 = op2; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); }; @@ -300,8 +334,7 @@ emit_i(struct ppc_function *p, uint op, uint li, uint aa, uint lk) inst.inst.li = li; inst.inst.aa = aa; inst.inst.lk = lk; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); } @@ -330,8 +363,7 @@ emit_xl(struct ppc_function *p, uint op, uint bo, uint bi, uint bh, inst.inst.bh = bh; inst.inst.op2 = op2; inst.inst.lk = lk; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); } static INLINE void @@ -373,8 +405,7 @@ emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2) inst.inst.rb = rb; inst.inst.op2 = op2; inst.inst.unused = 0x0; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); } @@ -398,8 +429,7 @@ emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si) inst.inst.rt = rt; inst.inst.ra = ra; inst.inst.si = (unsigned) (si & 0xffff); - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); }; @@ -428,8 +458,7 @@ emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2, inst.inst.unused = 0x0; inst.inst.op2 = op2; inst.inst.rc = rc; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); }; @@ -458,8 +487,7 @@ emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe, inst.inst.oe = oe; inst.inst.op2 = op2; inst.inst.rc = rc; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); } diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index 561e139bce..d0477dec94 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -64,6 +64,7 @@ struct ppc_function extern void ppc_init_func(struct ppc_function *p, unsigned max_inst); extern void ppc_release_func(struct ppc_function *p); +extern uint ppc_num_instructions(const struct ppc_function *p); extern void (*ppc_get_func( struct ppc_function *p ))( void ); extern void ppc_dump_func(const struct ppc_function *p); diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 5d13070922..a92b1902e3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -1315,6 +1315,14 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, tgsi_parse_free( &parse ); + if (ppc_num_instructions(func) == 0) { + /* ran out of memory for instructions */ + ok = FALSE; + } + + if (!ok) + debug_printf("TGSI->PPC translation failed\n"); + return ok; } -- cgit v1.2.3 From 725ba94ce5701aa8690c7ab2ea792dda86cbbe7a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 29 Oct 2008 16:35:59 -0600 Subject: gallium: no longer pass max_inst to ppc_init_func() --- src/gallium/auxiliary/draw/draw_vs_ppc.c | 2 +- src/gallium/auxiliary/rtasm/rtasm_ppc.c | 2 +- src/gallium/auxiliary/rtasm/rtasm_ppc.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_ppc.h') diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index d720c7bbd5..8b75136144 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -197,7 +197,7 @@ draw_create_vs_ppc(struct draw_context *draw, vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * sizeof(float), 16); - ppc_init_func( &vs->ppc_program, 2000 ); /* XXX fix limit */ + ppc_init_func( &vs->ppc_program ); if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens, &vs->ppc_program, diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index e73ed71a0b..6d11263be8 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -43,7 +43,7 @@ void -ppc_init_func(struct ppc_function *p, unsigned max_inst) +ppc_init_func(struct ppc_function *p) { uint i; diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index d0477dec94..afb4704c39 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -62,7 +62,7 @@ struct ppc_function -extern void ppc_init_func(struct ppc_function *p, unsigned max_inst); +extern void ppc_init_func(struct ppc_function *p); extern void ppc_release_func(struct ppc_function *p); extern uint ppc_num_instructions(const struct ppc_function *p); extern void (*ppc_get_func( struct ppc_function *p ))( void ); -- cgit v1.2.3 From b44ec717c831bb2e3363ee79ae1faca7e0665bea Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 12 Nov 2008 11:09:12 -0700 Subject: gallium: add missing prototypes --- src/gallium/auxiliary/rtasm/rtasm_ppc.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/auxiliary/rtasm/rtasm_ppc.h') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index afb4704c39..08212a2a25 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -244,6 +244,9 @@ ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb); extern void ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm); +extern void +ppc_addis(struct ppc_function *p, uint rt, uint ra, int imm); + extern void ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb); @@ -310,6 +313,9 @@ ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset); extern void ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb); +extern void +ppc_lfs(struct ppc_function *p, uint frt, uint ra, int offset); + /** -- cgit v1.2.3 From 7acaeb87750226e7407908bc2dfa9989049202fa Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 9 Jan 2009 21:42:17 -0700 Subject: gallium: added comment/annotation support to PPC rtasm --- src/gallium/auxiliary/rtasm/rtasm_ppc.c | 242 ++++++++++++++++++++++++-------- src/gallium/auxiliary/rtasm/rtasm_ppc.h | 7 + 2 files changed, 187 insertions(+), 62 deletions(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_ppc.h') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index e9015ec2eb..1bb9026205 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -1,6 +1,7 @@ /************************************************************************** * * Copyright (C) 2008 Tungsten Graphics, Inc. All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -47,6 +48,8 @@ ppc_init_func(struct ppc_function *p) { uint i; + memset(p, 0, sizeof(*p)); + p->num_inst = 0; p->max_inst = 100; /* first guess at buffer size */ p->store = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE); @@ -54,6 +57,9 @@ ppc_init_func(struct ppc_function *p) p->fp_used = 0x0; p->vec_used = 0x0; + p->print = FALSE; + p->indent = 0; + /* only allow using gp registers 3..12 for now */ for (i = 0; i < 3; i++) ppc_reserve_register(p, i); @@ -105,6 +111,42 @@ ppc_dump_func(const struct ppc_function *p) } +void +ppc_print_code(struct ppc_function *p, boolean enable) +{ + p->print = enable; +} + + +void +ppc_indent(struct ppc_function *p, int spaces) +{ + p->indent += spaces; +} + + +static void +indent(const struct ppc_function *p) +{ + int i; + for (i = 0; i < p->indent; i++) { + putchar(' '); + } +} + + +void +ppc_comment(struct ppc_function *p, int rel_indent, const char *s) +{ + if (p->print) { + p->indent += rel_indent; + indent(p); + p->indent -= rel_indent; + printf("# %s\n", s); + } +} + + /** * Mark a register as being unavailable. */ @@ -132,6 +174,7 @@ ppc_allocate_register(struct ppc_function *p) return i; } } + printf("OUT OF PPC registers!\n"); return -1; } @@ -163,6 +206,7 @@ ppc_allocate_fp_register(struct ppc_function *p) return i; } } + printf("OUT OF PPC FP registers!\n"); return -1; } @@ -194,6 +238,7 @@ ppc_allocate_vec_register(struct ppc_function *p) return i; } } + printf("OUT OF PPC VEC registers!\n"); return -1; } @@ -252,7 +297,8 @@ union vx_inst { }; static INLINE void -emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) +emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, + const char *format, boolean transpose) { union vx_inst inst; inst.inst.op = 4; @@ -261,6 +307,13 @@ emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) inst.inst.vB = vB; inst.inst.op2 = op2; emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + if (transpose) + printf(format, vD, vB, vA); + else + printf(format, vD, vA, vB); + } } @@ -277,7 +330,8 @@ union vxr_inst { }; static INLINE void -emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) +emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, + const char *format) { union vxr_inst inst; inst.inst.op = 4; @@ -287,6 +341,10 @@ emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) inst.inst.rC = 0; inst.inst.op2 = op2; emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf(format, vD, vA, vB); + } } @@ -303,7 +361,8 @@ union va_inst { }; static INLINE void -emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC) +emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC, + const char *format) { union va_inst inst; inst.inst.op = 4; @@ -313,6 +372,10 @@ emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC) inst.inst.vC = vC; inst.inst.op2 = op2; emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf(format, vD, vA, vB, vC); + } } @@ -396,7 +459,8 @@ union x_inst { }; static INLINE void -emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2) +emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2, + const char *format) { union x_inst inst; inst.inst.op = op; @@ -406,6 +470,10 @@ emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2) inst.inst.op2 = op2; inst.inst.unused = 0x0; emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf(format, vrs, ra, rb); + } } @@ -420,7 +488,8 @@ union d_inst { }; static INLINE void -emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si) +emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si, + const char *format, boolean transpose) { union d_inst inst; assert(si >= -32768); @@ -430,6 +499,13 @@ emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si) inst.inst.ra = ra; inst.inst.si = (unsigned) (si & 0xffff); emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + if (transpose) + printf(format, rt, si, ra); + else + printf(format, rt, ra, si); + } } @@ -448,7 +524,7 @@ union a_inst { static INLINE void emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2, - uint rc) + uint rc, const char *format) { union a_inst inst; inst.inst.op = op; @@ -459,6 +535,10 @@ emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2, inst.inst.op2 = op2; inst.inst.rc = rc; emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf(format, frt, fra, frb); + } } @@ -477,7 +557,7 @@ union xo_inst { static INLINE void emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe, - uint op2, uint rc) + uint op2, uint rc, const char *format) { union xo_inst inst; inst.inst.op = op; @@ -488,6 +568,10 @@ emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe, inst.inst.op2 = op2; inst.inst.rc = rc; emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf(format, rt, ra, rb); + } } @@ -502,140 +586,142 @@ emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe, void ppc_vaddfp(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 10, vD, vA, vB); + emit_vx(p, 10, vD, vA, vB, "vaddfp\t%u, v%u, v%u\n", FALSE); } /** vector float substract */ void ppc_vsubfp(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 74, vD, vA, vB); + emit_vx(p, 74, vD, vA, vB, "vsubfp\tv%u, v%u, v%u\n", FALSE); } /** vector float min */ void ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 1098, vD, vA, vB); + emit_vx(p, 1098, vD, vA, vB, "vminfp\tv%u, v%u, v%u\n", FALSE); } /** vector float max */ void ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 1034, vD, vA, vB); + emit_vx(p, 1034, vD, vA, vB, "vmaxfp\tv%u, v%u, v%u\n", FALSE); } /** vector float mult add: vD = vA * vB + vC */ void ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) { - emit_va(p, 46, vD, vA, vC, vB); /* note arg order */ + /* note arg order */ + emit_va(p, 46, vD, vA, vC, vB, "vmaddfp\tv%u, v%u, v%u, v%u\n"); } /** vector float negative mult subtract: vD = vA - vB * vC */ void ppc_vnmsubfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) { - emit_va(p, 47, vD, vB, vA, vC); /* note arg order */ + /* note arg order */ + emit_va(p, 47, vD, vB, vA, vC, "vnmsubfp\tv%u, v%u, v%u, v%u\n"); } /** vector float compare greater than */ void ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vxr(p, 710, vD, vA, vB); + emit_vxr(p, 710, vD, vA, vB, "vcmpgtfpx\tv%u, v%u, v%u"); } /** vector float compare greater than or equal to */ void ppc_vcmpgefpx(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vxr(p, 454, vD, vA, vB); + emit_vxr(p, 454, vD, vA, vB, "vcmpgefpx\tv%u, v%u, v%u"); } /** vector float compare equal */ void ppc_vcmpeqfpx(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vxr(p, 198, vD, vA, vB); + emit_vxr(p, 198, vD, vA, vB, "vcmpeqfpx\tv%u, v%u, v%u"); } /** vector float 2^x */ void ppc_vexptefp(struct ppc_function *p, uint vD, uint vB) { - emit_vx(p, 394, vD, 0, vB); + emit_vx(p, 394, vD, 0, vB, "vexptefp\tv%u, 0%u, v%u\n", FALSE); } /** vector float log2(x) */ void ppc_vlogefp(struct ppc_function *p, uint vD, uint vB) { - emit_vx(p, 458, vD, 0, vB); + emit_vx(p, 458, vD, 0, vB, "vlogefp\tv%u, 0%u, v%u\n", FALSE); } /** vector float reciprocol */ void ppc_vrefp(struct ppc_function *p, uint vD, uint vB) { - emit_vx(p, 266, vD, 0, vB); + emit_vx(p, 266, vD, 0, vB, "vrefp\tv%u, 0%u, v%u\n", FALSE); } /** vector float reciprocol sqrt estimate */ void ppc_vrsqrtefp(struct ppc_function *p, uint vD, uint vB) { - emit_vx(p, 330, vD, 0, vB); + emit_vx(p, 330, vD, 0, vB, "vrsqrtefp\tv%u, 0%u, v%u\n", FALSE); } /** vector float round to negative infinity */ void ppc_vrfim(struct ppc_function *p, uint vD, uint vB) { - emit_vx(p, 714, vD, 0, vB); + emit_vx(p, 714, vD, 0, vB, "vrfim\tv%u, 0%u, v%u\n", FALSE); } /** vector float round to positive infinity */ void ppc_vrfip(struct ppc_function *p, uint vD, uint vB) { - emit_vx(p, 650, vD, 0, vB); + emit_vx(p, 650, vD, 0, vB, "vrfip\tv%u, 0%u, v%u\n", FALSE); } /** vector float round to nearest int */ void ppc_vrfin(struct ppc_function *p, uint vD, uint vB) { - emit_vx(p, 522, vD, 0, vB); + emit_vx(p, 522, vD, 0, vB, "vrfin\tv%u, 0%u, v%u\n", FALSE); } /** vector float round to int toward zero */ void ppc_vrfiz(struct ppc_function *p, uint vD, uint vB) { - emit_vx(p, 586, vD, 0, vB); + emit_vx(p, 586, vD, 0, vB, "vrfiz\tv%u, 0%u, v%u\n", FALSE); } -/** vector store: store vR at mem[vA+vB] */ +/** vector store: store vR at mem[rA+rB] */ void -ppc_stvx(struct ppc_function *p, uint vR, uint vA, uint vB) +ppc_stvx(struct ppc_function *p, uint vR, uint rA, uint rB) { - emit_x(p, 31, vR, vA, vB, 231); + emit_x(p, 31, vR, rA, rB, 231, "stvx\tv%u, r%u, r%u\n"); } -/** vector load: vR = mem[vA+vB] */ +/** vector load: vR = mem[rA+rB] */ void -ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB) +ppc_lvx(struct ppc_function *p, uint vR, uint rA, uint rB) { - emit_x(p, 31, vR, vA, vB, 103); + emit_x(p, 31, vR, rA, rB, 103, "lvx\tv%u, r%u, r%u\n"); } /** load vector element word: vR = mem_word[ra+rb] */ void -ppc_lvewx(struct ppc_function *p, uint vr, uint ra, uint rb) +ppc_lvewx(struct ppc_function *p, uint vR, uint rA, uint rB) { - emit_x(p, 31, vr, ra, rb, 71); + emit_x(p, 31, vR, rA, rB, 71, "lvewx\tv%u, r%u, r%u\n"); } @@ -649,49 +735,63 @@ ppc_lvewx(struct ppc_function *p, uint vr, uint ra, uint rb) void ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 1028, vD, vA, vB); + emit_vx(p, 1028, vD, vA, vB, "vand\tv%u, v%u, v%u\n", FALSE); } /** vector and complement */ void ppc_vandc(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 1092, vD, vA, vB); + emit_vx(p, 1092, vD, vA, vB, "vandc\tv%u, v%u, v%u\n", FALSE); } /** vector or */ void ppc_vor(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 1156, vD, vA, vB); + emit_vx(p, 1156, vD, vA, vB, "vor\tv%u, v%u, v%u\n", FALSE); } /** vector nor */ void ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 1284, vD, vA, vB); + emit_vx(p, 1284, vD, vA, vB, "vnor\tv%u, v%u, v%u\n", FALSE); } /** vector xor */ void ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 1220, vD, vA, vB); + emit_vx(p, 1220, vD, vA, vB, "vxor\tv%u, v%u, v%u\n", FALSE); } /** Pseudo-instruction: vector move */ void ppc_vmove(struct ppc_function *p, uint vD, uint vA) { + boolean print = p->print; + p->print = FALSE; ppc_vor(p, vD, vA, vA); + if (print) { + indent(p); + printf("vor\tv%u, v%u, v%u \t# v%u = v%u\n", vD, vA, vA, vD, vA); + } + p->print = print; } /** Set vector register to {0,0,0,0} */ void ppc_vzero(struct ppc_function *p, uint vr) { + boolean print = p->print; + p->print = FALSE; ppc_vxor(p, vr, vr, vr); + if (print) { + indent(p); + printf("vxor\tv%u, v%u, v%u \t# v%u = {0,0,0,0}\n", vr, vr, vr, vr); + } + p->print = print; } @@ -705,35 +805,35 @@ ppc_vzero(struct ppc_function *p, uint vr) void ppc_vperm(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) { - emit_va(p, 43, vD, vA, vB, vC); + emit_va(p, 43, vD, vA, vB, vC, "vperm\tr%u, r%u, r%u, r%u"); } /** vector select */ void ppc_vsel(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) { - emit_va(p, 42, vD, vA, vB, vC); + emit_va(p, 42, vD, vA, vB, vC, "vsel\tr%u, r%u, r%u, r%u"); } /** vector splat byte */ void ppc_vspltb(struct ppc_function *p, uint vD, uint vB, uint imm) { - emit_vx(p, 42, vD, imm, vB); + emit_vx(p, 42, vD, imm, vB, "vspltb\tv%u, v%u, %u\n", TRUE); } /** vector splat half word */ void ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm) { - emit_vx(p, 588, vD, imm, vB); + emit_vx(p, 588, vD, imm, vB, "vsplthw\tv%u, v%u, %u\n", TRUE); } /** vector splat word */ void ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm) { - emit_vx(p, 652, vD, imm, vB); + emit_vx(p, 652, vD, imm, vB, "vspltw\tv%u, v%u, %u\n", TRUE); } /** vector splat signed immediate word */ @@ -742,14 +842,14 @@ ppc_vspltisw(struct ppc_function *p, uint vD, int imm) { assert(imm >= -16); assert(imm < 15); - emit_vx(p, 908, vD, imm, 0); + emit_vx(p, 908, vD, imm, 0, "vspltisw\tv%u, %d, %u\n", FALSE); } /** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */ void ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 388, vD, vA, vB); + emit_vx(p, 388, vD, vA, vB, "vslw\tv%u, v%u, v%u\n", FALSE); } @@ -763,63 +863,66 @@ ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB) void ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm) { - emit_d(p, 14, rt, ra, imm); + emit_d(p, 14, rt, ra, imm, "addi\tr%u, r%u, %d\n", FALSE); } /** rt = ra + (imm << 16) */ void ppc_addis(struct ppc_function *p, uint rt, uint ra, int imm) { - emit_d(p, 15, rt, ra, imm); + emit_d(p, 15, rt, ra, imm, "addis\tr%u, r%u, %d\n", FALSE); } /** rt = ra + rb */ void ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb) { - emit_xo(p, 31, rt, ra, rb, 0, 266, 0); + emit_xo(p, 31, rt, ra, rb, 0, 266, 0, "add\tr%u, r%u, r%u\n"); } /** rt = ra AND ra */ void ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb) { - emit_x(p, 31, ra, rt, rb, 28); /* note argument order */ + emit_x(p, 31, ra, rt, rb, 28, "and\tr%u, r%u, r%u\n"); /* note argument order */ } /** rt = ra AND imm */ void ppc_andi(struct ppc_function *p, uint rt, uint ra, int imm) { - emit_d(p, 28, ra, rt, imm); /* note argument order */ + /* note argument order */ + emit_d(p, 28, ra, rt, imm, "andi\tr%u, r%u, %d\n", FALSE); } /** rt = ra OR ra */ void ppc_or(struct ppc_function *p, uint rt, uint ra, uint rb) { - emit_x(p, 31, ra, rt, rb, 444); /* note argument order */ + emit_x(p, 31, ra, rt, rb, 444, "or\tr%u, r%u, r%u\n"); /* note argument order */ } /** rt = ra OR imm */ void ppc_ori(struct ppc_function *p, uint rt, uint ra, int imm) { - emit_d(p, 24, ra, rt, imm); /* note argument order */ + /* note argument order */ + emit_d(p, 24, ra, rt, imm, "ori\tr%u, r%u, %d\n", FALSE); } /** rt = ra XOR ra */ void ppc_xor(struct ppc_function *p, uint rt, uint ra, uint rb) { - emit_x(p, 31, ra, rt, rb, 316); /* note argument order */ + emit_x(p, 31, ra, rt, rb, 316, "xor\tr%u, r%u, r%u\n"); /* note argument order */ } /** rt = ra XOR imm */ void ppc_xori(struct ppc_function *p, uint rt, uint ra, int imm) { - emit_d(p, 26, ra, rt, imm); /* note argument order */ + /* note argument order */ + emit_d(p, 26, ra, rt, imm, "xori\tr%u, r%u, %d\n", FALSE); } /** pseudo instruction: move: rt = ra */ @@ -833,7 +936,14 @@ ppc_mr(struct ppc_function *p, uint rt, uint ra) void ppc_li(struct ppc_function *p, uint rt, int imm) { + boolean print = p->print; + p->print = FALSE; ppc_addi(p, rt, 0, imm); + if (print) { + indent(p); + printf("addi\tr%u, r0, %d \t# r%u = %d\n", rt, imm, rt, imm); + } + p->print = print; } /** rt = imm << 16 */ @@ -864,21 +974,21 @@ ppc_load_int(struct ppc_function *p, uint rt, int imm) void ppc_stwu(struct ppc_function *p, uint rs, uint ra, int d) { - emit_d(p, 37, rs, ra, d); + emit_d(p, 37, rs, ra, d, "stwu\tr%u, %d(r%u)\n", TRUE); } /** store rs at memory[(ra)+d] */ void ppc_stw(struct ppc_function *p, uint rs, uint ra, int d) { - emit_d(p, 36, rs, ra, d); + emit_d(p, 36, rs, ra, d, "stw\tr%u, %d(r%u)\n", TRUE); } /** Load rt = mem[(ra)+d]; then zero set high 32 bits to zero. */ void ppc_lwz(struct ppc_function *p, uint rt, uint ra, int d) { - emit_d(p, 32, rt, ra, d); + emit_d(p, 32, rt, ra, d, "lwz\tr%u, %d(r%u)\n", TRUE); } @@ -891,42 +1001,42 @@ ppc_lwz(struct ppc_function *p, uint rt, uint ra, int d) void ppc_fadd(struct ppc_function *p, uint frt, uint fra, uint frb) { - emit_a(p, 63, frt, fra, frb, 21, 0); + emit_a(p, 63, frt, fra, frb, 21, 0, "fadd\tf%u, f%u, f%u\n"); } /** sub: frt = fra - frb */ void ppc_fsub(struct ppc_function *p, uint frt, uint fra, uint frb) { - emit_a(p, 63, frt, fra, frb, 20, 0); + emit_a(p, 63, frt, fra, frb, 20, 0, "fsub\tf%u, f%u, f%u\n"); } /** convert to int: rt = (int) ra */ void ppc_fctiwz(struct ppc_function *p, uint rt, uint fra) { - emit_x(p, 63, rt, 0, fra, 15); + emit_x(p, 63, rt, 0, fra, 15, "fctiwz\tr%u, r%u, r%u\n"); } /** store frs at mem[(ra)+offset] */ void ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset) { - emit_d(p, 52, frs, ra, offset); + emit_d(p, 52, frs, ra, offset, "stfs\tr%u, %d(r%u)\n", TRUE); } /** store frs at mem[(ra)+(rb)] */ void ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb) { - emit_x(p, 31, frs, ra, rb, 983); + emit_x(p, 31, frs, ra, rb, 983, "stfiwx\tr%u, r%u, r%u\n"); } /** load frt = mem[(ra)+offset] */ void ppc_lfs(struct ppc_function *p, uint frt, uint ra, int offset) { - emit_d(p, 48, frt, ra, offset); + emit_d(p, 48, frt, ra, offset, "stfs\tr%u, %d(r%u)\n", TRUE); } @@ -942,6 +1052,10 @@ void ppc_blr(struct ppc_function *p) { emit_i(p, 18, 0, 0, 1); + if (p->print) { + indent(p); + printf("blr\n"); + } } /** Branch Conditional to Link Register (p. 36) */ @@ -949,6 +1063,10 @@ void ppc_bclr(struct ppc_function *p, uint condOp, uint branchHint, uint condReg) { emit_xl(p, 19, condOp, condReg, branchHint, 16, 0); + if (p->print) { + indent(p); + printf("bclr\t%u %u %u\n", condOp, branchHint, condReg); + } } /** Pseudo instruction: return from subroutine */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index 08212a2a25..93e5f5187d 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -1,6 +1,7 @@ /************************************************************************** * * Copyright (C) 2008 Tungsten Graphics, Inc. All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -58,6 +59,8 @@ struct ppc_function uint32_t reg_used; /** used/free general-purpose registers bitmask */ uint32_t fp_used; /** used/free floating point registers bitmask */ uint32_t vec_used; /** used/free vector registers bitmask */ + int indent; + boolean print; }; @@ -68,6 +71,10 @@ extern uint ppc_num_instructions(const struct ppc_function *p); extern void (*ppc_get_func( struct ppc_function *p ))( void ); extern void ppc_dump_func(const struct ppc_function *p); +extern void ppc_print_code(struct ppc_function *p, boolean enable); +extern void ppc_indent(struct ppc_function *p, int spaces); +extern void ppc_comment(struct ppc_function *p, int rel_indent, const char *s); + extern int ppc_reserve_register(struct ppc_function *p, int reg); extern int ppc_allocate_register(struct ppc_function *p); extern void ppc_release_register(struct ppc_function *p, int reg); -- cgit v1.2.3