From bb5becf1e289b2c9240d98299e9447a9673da9fc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 5 Sep 2008 13:54:14 -0600 Subject: gallium: comments, assertions, etc --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 36 +++++++++++++++++++++++++---- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 20 +++++++++------- 2 files changed, 43 insertions(+), 13 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 285ddc0e3f..fe5beba456 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -300,7 +300,9 @@ void _name (struct spe_function *p, int imm) \ #include "rtasm_ppc_spe.h" -/* +/** + * Initialize an spe_function. + * \param code_size size of instruction buffer to allocate, in bytes. */ void spe_init_func(struct spe_function *p, unsigned code_size) { @@ -324,10 +326,14 @@ void spe_release_func(struct spe_function *p) } +/** + * Alloate a SPE register. + * \return register index or -1 if none left. + */ int spe_allocate_available_register(struct spe_function *p) { unsigned i; - for (i = 0; i < 128; i++) { + for (i = 0; i < SPE_NUM_REGS; i++) { const uint64_t mask = (1ULL << (i % 64)); const unsigned idx = i / 64; @@ -341,11 +347,15 @@ int spe_allocate_available_register(struct spe_function *p) } +/** + * Mark the given SPE register as "allocated". + */ int spe_allocate_register(struct spe_function *p, int reg) { const unsigned idx = reg / 64; const unsigned bit = reg % 64; + assert(reg < SPE_NUM_REGS); assert((p->regs[idx] & (1ULL << bit)) != 0); p->regs[idx] &= ~(1ULL << bit); @@ -353,57 +363,73 @@ int spe_allocate_register(struct spe_function *p, int reg) } +/** + * Mark the given SPE register as "unallocated". + */ void spe_release_register(struct spe_function *p, int reg) { const unsigned idx = reg / 64; const unsigned bit = reg % 64; + assert(reg < SPE_NUM_REGS); assert((p->regs[idx] & (1ULL << bit)) == 0); p->regs[idx] |= (1ULL << bit); } +/** + * For branch instructions: + * \param d if 1, disable interupts if branch is taken + * \param e if 1, enable interupts if branch is taken + * If d and e are both zero, don't change interupt status (right?) + */ - +/** Branch Indirect to address in rA */ void spe_bi(struct spe_function *p, unsigned rA, int d, int e) { emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4)); } +/** Interupt Return */ void spe_iret(struct spe_function *p, unsigned rA, int d, int e) { emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4)); } +/** Branch indirect and set link on external data */ void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) { emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4)); } +/** Branch indirect and set link. Save PC in rT, jump to rA. */ void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) { emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4)); } -void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d, - int e) +/** Branch indirect if zero word. If rT.word[0]==0, jump to rA. */ +void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) { emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4)); } +/** Branch indirect if non-zero word. If rT.word[0]!=0, jump to rA. */ void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) { emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4)); } +/** Branch indirect if zero halfword. If rT.halfword[1]==0, jump to rA. */ void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) { emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4)); } +/** Branch indirect if non-zero halfword. If rT.halfword[1]!=0, jump to rA. */ void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) { emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4)); diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index 1cacc717b1..7dd754ba77 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -32,13 +32,17 @@ #ifndef RTASM_PPC_SPE_H #define RTASM_PPC_SPE_H -struct spe_function { - /** - * - */ - uint32_t *store; - uint32_t *csr; - const char *fn; +/** 4 bytes per instruction */ +#define SPE_INST_SIZE 4 + +/** number of general-purpose SIMD registers */ +#define SPE_NUM_REGS 128 + +struct spe_function +{ + uint32_t *store; /**< instruction buffer */ + uint32_t *csr; /**< next free pos in instruction buffer */ + const char *fn; /**< unused */ /** * Mask of used / unused registers @@ -50,7 +54,7 @@ struct spe_function { * spe_allocate_register, spe_allocate_available_register, * spe_release_register */ - uint64_t regs[2]; + uint64_t regs[SPE_NUM_REGS / 64]; }; extern void spe_init_func(struct spe_function *p, unsigned code_size); -- cgit v1.2.3 From ee582fd3a7a9ddbcb5595249201cf213a6c6f014 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 10 Sep 2008 17:11:48 -0600 Subject: gallium: assorted additions and fixes to Cell SPE rtasm code Fix incorrect opcode for fsmbi. Added "macro" functions for loading floats/ints, register complement, zero, move. Added #defines for return address and stack pointer registers. Added assertions to check that the instruction buffer doesn't overflow. --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 88 +++++++++++++++++++++++------ src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 38 +++++++++++-- 2 files changed, 105 insertions(+), 21 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index fe5beba456..61010e4333 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -151,8 +151,8 @@ static void emit_RR(struct spe_function *p, unsigned op, unsigned rT, inst.inst.rB = rB; inst.inst.rA = rA; inst.inst.rT = rT; - *p->csr = inst.bits; - p->csr++; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); } @@ -165,8 +165,8 @@ static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT, inst.inst.rB = rB; inst.inst.rA = rA; inst.inst.rC = rC; - *p->csr = inst.bits; - p->csr++; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); } @@ -178,8 +178,8 @@ static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT, inst.inst.i7 = imm; inst.inst.rA = rA; inst.inst.rT = rT; - *p->csr = inst.bits; - p->csr++; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); } @@ -192,8 +192,8 @@ static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT, inst.inst.i8 = imm; inst.inst.rA = rA; inst.inst.rT = rT; - *p->csr = inst.bits; - p->csr++; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); } @@ -206,8 +206,8 @@ static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT, inst.inst.i10 = imm; inst.inst.rA = rA; inst.inst.rT = rT; - *p->csr = inst.bits; - p->csr++; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); } @@ -218,8 +218,8 @@ static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT, inst.inst.op = op; inst.inst.i16 = imm; inst.inst.rT = rT; - *p->csr = inst.bits; - p->csr++; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); } @@ -230,8 +230,8 @@ static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT, inst.inst.op = op; inst.inst.i18 = imm; inst.inst.rT = rT; - *p->csr = inst.bits; - p->csr++; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); } @@ -307,8 +307,9 @@ void _name (struct spe_function *p, int imm) \ void spe_init_func(struct spe_function *p, unsigned code_size) { p->store = align_malloc(code_size, 16); - p->csr = p->store; - + p->num_inst = 0; + p->max_inst = code_size / SPE_INST_SIZE; + /* Conservatively treat R0 - R2 and R80 - R127 as non-volatile. */ p->regs[0] = ~7; @@ -318,11 +319,11 @@ void spe_init_func(struct spe_function *p, unsigned code_size) void spe_release_func(struct spe_function *p) { + assert(p->num_inst <= p->max_inst); if (p->store != NULL) { align_free(p->store); } p->store = NULL; - p->csr = NULL; } @@ -337,6 +338,7 @@ int spe_allocate_available_register(struct spe_function *p) const uint64_t mask = (1ULL << (i % 64)); const unsigned idx = i / 64; + assert(idx < 2); if ((p->regs[idx] & mask) != 0) { p->regs[idx] &= ~mask; return i; @@ -371,6 +373,8 @@ void spe_release_register(struct spe_function *p, int reg) const unsigned idx = reg / 64; const unsigned bit = reg % 64; + assert(idx < 2); + assert(reg < SPE_NUM_REGS); assert((p->regs[idx] & (1ULL << bit)) == 0); @@ -458,4 +462,54 @@ EMIT_R (spe_mfspr, 0x00c); EMIT_R (spe_mtspr, 0x10c); #endif + +/** + ** Helper / "macro" instructions. + ** Use somewhat verbose names as a reminder that these aren't native + ** SPE instructions. + **/ + + +void +spe_load_float(struct spe_function *p, unsigned rT, float x) +{ + union { + float f; + unsigned u; + } bits; + bits.f = x; + spe_ilhu(p, rT, bits.u >> 16); + spe_iohl(p, rT, bits.u & 0xffff); +} + + +void +spe_load_int(struct spe_function *p, unsigned rT, int i) +{ + spe_ilhu(p, rT, i >> 16); + spe_iohl(p, rT, i & 0xffff); +} + + +void +spe_complement(struct spe_function *p, unsigned rT) +{ + spe_nor(p, rT, rT, rT); +} + + +void +spe_move(struct spe_function *p, unsigned rT, unsigned rA) +{ + spe_ori(p, rT, rA, 0); +} + + +void +spe_zero(struct spe_function *p, unsigned rT) +{ + spe_xor(p, rT, rT, rT); +} + + #endif /* GALLIUM_CELL */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index 7dd754ba77..dee8c55c4a 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -25,6 +25,7 @@ /** * \file * Real-time assembly generation interface for Cell B.E. SPEs. + * For details, see /opt/cell/sdk/docs/arch/SPU_ISA_v1.2_27Jan2007_pub.pdf * * \author Ian Romanick */ @@ -38,11 +39,18 @@ /** number of general-purpose SIMD registers */ #define SPE_NUM_REGS 128 +/** Return Address register */ +#define SPE_REG_RA 0 + +/** Stack Pointer register */ +#define SPE_REG_SP 1 + + struct spe_function { - uint32_t *store; /**< instruction buffer */ - uint32_t *csr; /**< next free pos in instruction buffer */ - const char *fn; /**< unused */ + uint32_t *store; /**< instruction buffer */ + uint num_inst; + uint max_inst; /** * Mask of used / unused registers @@ -123,7 +131,8 @@ EMIT_RI16(spe_ilhu, 0x082); EMIT_RI16(spe_il, 0x081); EMIT_RI18(spe_ila, 0x021); EMIT_RI16(spe_iohl, 0x0c1); -EMIT_RI16(spe_fsmbi, 0x0c5); +EMIT_RI16(spe_fsmbi, 0x065); + /* Integer and logical instructions @@ -275,6 +284,27 @@ extern void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e); +/** Load/splat immediate float into rT. */ +extern void +spe_load_float(struct spe_function *p, unsigned rT, float x); + +/** Load/splat immediate int into rT. */ +extern void +spe_load_int(struct spe_function *p, unsigned rT, int i); + +/** Complement/invert all bits in rT. */ +extern void +spe_complement(struct spe_function *p, unsigned rT); + +/** rT = rA. */ +extern void +spe_move(struct spe_function *p, unsigned rT, unsigned rA); + +/** rT = {0,0,0,0}. */ +extern void +spe_zero(struct spe_function *p, unsigned rT); + + /* Floating-point instructions */ EMIT_RR (spe_fa, 0x2c4); -- cgit v1.2.3 From 178bbaff80d079606a1135bd65f1a85bac9774c4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 11 Sep 2008 17:07:30 -0600 Subject: gallium: add special cases in spe_load_float(), spe_load_int(), added spe_splat() --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 45 +++++++++++++++++++++++------ src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 4 +++ 2 files changed, 40 insertions(+), 9 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 61010e4333..a04cc6c4ff 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -473,21 +473,48 @@ EMIT_R (spe_mtspr, 0x10c); void spe_load_float(struct spe_function *p, unsigned rT, float x) { - union { - float f; - unsigned u; - } bits; - bits.f = x; - spe_ilhu(p, rT, bits.u >> 16); - spe_iohl(p, rT, bits.u & 0xffff); + if (x == 0.0f) { + spe_il(p, rT, 0x0); + } + else if (x == 0.5f) { + spe_ilhu(p, rT, 0x3f00); + } + else if (x == 1.0f) { + spe_ilhu(p, rT, 0x3f80); + } + else if (x == -1.0f) { + spe_ilhu(p, rT, 0xbf80); + } + else { + union { + float f; + unsigned u; + } bits; + bits.f = x; + spe_ilhu(p, rT, bits.u >> 16); + spe_iohl(p, rT, bits.u & 0xffff); + } } void spe_load_int(struct spe_function *p, unsigned rT, int i) { - spe_ilhu(p, rT, i >> 16); - spe_iohl(p, rT, i & 0xffff); + if (-32768 <= i && i <= 32767) { + spe_il(p, rT, i); + } + else { + spe_ilhu(p, rT, i >> 16); + spe_iohl(p, rT, i & 0xffff); + } +} + + +void +spe_splat(struct spe_function *p, unsigned rT, unsigned rA) +{ + spe_ila(p, rT, 66051); + spe_shufb(p, rT, rA, rA, rT); } diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index dee8c55c4a..d95e5aace3 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -292,6 +292,10 @@ spe_load_float(struct spe_function *p, unsigned rT, float x); extern void spe_load_int(struct spe_function *p, unsigned rT, int i); +/** Replicate word 0 of rA across rT. */ +extern void +spe_splat(struct spe_function *p, unsigned rT, unsigned rA); + /** Complement/invert all bits in rT. */ extern void spe_complement(struct spe_function *p, unsigned rT); -- cgit v1.2.3