summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/rtasm
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/rtasm')
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_execmem.c11
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c124
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h52
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.c2
4 files changed, 152 insertions, 37 deletions
diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c
index dfa5c35ab6..19087589a8 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c
@@ -47,11 +47,12 @@
#include <unistd.h>
#include <sys/mman.h>
+#include "pipe/p_thread.h"
#include "util/u_mm.h"
#define EXEC_HEAP_SIZE (10*1024*1024)
-_glthread_DECLARE_STATIC_MUTEX(exec_mutex);
+pipe_static_mutex(exec_mutex);
static struct mem_block *exec_heap = NULL;
static unsigned char *exec_mem = NULL;
@@ -76,7 +77,7 @@ rtasm_exec_malloc(size_t size)
struct mem_block *block = NULL;
void *addr = NULL;
- _glthread_LOCK_MUTEX(exec_mutex);
+ pipe_mutex_lock(exec_mutex);
init_heap();
@@ -90,7 +91,7 @@ rtasm_exec_malloc(size_t size)
else
debug_printf("rtasm_exec_malloc failed\n");
- _glthread_UNLOCK_MUTEX(exec_mutex);
+ pipe_mutex_unlock(exec_mutex);
return addr;
}
@@ -99,7 +100,7 @@ rtasm_exec_malloc(size_t size)
void
rtasm_exec_free(void *addr)
{
- _glthread_LOCK_MUTEX(exec_mutex);
+ pipe_mutex_lock(exec_mutex);
if (exec_heap) {
struct mem_block *block = mmFindBlock(exec_heap, (unsigned char *)addr - exec_mem);
@@ -108,7 +109,7 @@ rtasm_exec_free(void *addr)
mmFreeMem(block);
}
- _glthread_UNLOCK_MUTEX(exec_mutex);
+ pipe_mutex_unlock(exec_mutex);
}
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
index 285ddc0e3f..61010e4333 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
@@ -151,8 +151,8 @@ static void emit_RR(struct spe_function *p, unsigned op, unsigned rT,
inst.inst.rB = rB;
inst.inst.rA = rA;
inst.inst.rT = rT;
- *p->csr = inst.bits;
- p->csr++;
+ p->store[p->num_inst++] = inst.bits;
+ assert(p->num_inst <= p->max_inst);
}
@@ -165,8 +165,8 @@ static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT,
inst.inst.rB = rB;
inst.inst.rA = rA;
inst.inst.rC = rC;
- *p->csr = inst.bits;
- p->csr++;
+ p->store[p->num_inst++] = inst.bits;
+ assert(p->num_inst <= p->max_inst);
}
@@ -178,8 +178,8 @@ static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT,
inst.inst.i7 = imm;
inst.inst.rA = rA;
inst.inst.rT = rT;
- *p->csr = inst.bits;
- p->csr++;
+ p->store[p->num_inst++] = inst.bits;
+ assert(p->num_inst <= p->max_inst);
}
@@ -192,8 +192,8 @@ static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT,
inst.inst.i8 = imm;
inst.inst.rA = rA;
inst.inst.rT = rT;
- *p->csr = inst.bits;
- p->csr++;
+ p->store[p->num_inst++] = inst.bits;
+ assert(p->num_inst <= p->max_inst);
}
@@ -206,8 +206,8 @@ static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT,
inst.inst.i10 = imm;
inst.inst.rA = rA;
inst.inst.rT = rT;
- *p->csr = inst.bits;
- p->csr++;
+ p->store[p->num_inst++] = inst.bits;
+ assert(p->num_inst <= p->max_inst);
}
@@ -218,8 +218,8 @@ static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT,
inst.inst.op = op;
inst.inst.i16 = imm;
inst.inst.rT = rT;
- *p->csr = inst.bits;
- p->csr++;
+ p->store[p->num_inst++] = inst.bits;
+ assert(p->num_inst <= p->max_inst);
}
@@ -230,8 +230,8 @@ static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT,
inst.inst.op = op;
inst.inst.i18 = imm;
inst.inst.rT = rT;
- *p->csr = inst.bits;
- p->csr++;
+ p->store[p->num_inst++] = inst.bits;
+ assert(p->num_inst <= p->max_inst);
}
@@ -300,13 +300,16 @@ void _name (struct spe_function *p, int imm) \
#include "rtasm_ppc_spe.h"
-/*
+/**
+ * Initialize an spe_function.
+ * \param code_size size of instruction buffer to allocate, in bytes.
*/
void spe_init_func(struct spe_function *p, unsigned code_size)
{
p->store = align_malloc(code_size, 16);
- p->csr = p->store;
-
+ p->num_inst = 0;
+ p->max_inst = code_size / SPE_INST_SIZE;
+
/* Conservatively treat R0 - R2 and R80 - R127 as non-volatile.
*/
p->regs[0] = ~7;
@@ -316,21 +319,26 @@ void spe_init_func(struct spe_function *p, unsigned code_size)
void spe_release_func(struct spe_function *p)
{
+ assert(p->num_inst <= p->max_inst);
if (p->store != NULL) {
align_free(p->store);
}
p->store = NULL;
- p->csr = NULL;
}
+/**
+ * Alloate a SPE register.
+ * \return register index or -1 if none left.
+ */
int spe_allocate_available_register(struct spe_function *p)
{
unsigned i;
- for (i = 0; i < 128; i++) {
+ for (i = 0; i < SPE_NUM_REGS; i++) {
const uint64_t mask = (1ULL << (i % 64));
const unsigned idx = i / 64;
+ assert(idx < 2);
if ((p->regs[idx] & mask) != 0) {
p->regs[idx] &= ~mask;
return i;
@@ -341,11 +349,15 @@ int spe_allocate_available_register(struct spe_function *p)
}
+/**
+ * Mark the given SPE register as "allocated".
+ */
int spe_allocate_register(struct spe_function *p, int reg)
{
const unsigned idx = reg / 64;
const unsigned bit = reg % 64;
+ assert(reg < SPE_NUM_REGS);
assert((p->regs[idx] & (1ULL << bit)) != 0);
p->regs[idx] &= ~(1ULL << bit);
@@ -353,57 +365,75 @@ int spe_allocate_register(struct spe_function *p, int reg)
}
+/**
+ * Mark the given SPE register as "unallocated".
+ */
void spe_release_register(struct spe_function *p, int reg)
{
const unsigned idx = reg / 64;
const unsigned bit = reg % 64;
+ assert(idx < 2);
+
+ assert(reg < SPE_NUM_REGS);
assert((p->regs[idx] & (1ULL << bit)) == 0);
p->regs[idx] |= (1ULL << bit);
}
+/**
+ * For branch instructions:
+ * \param d if 1, disable interupts if branch is taken
+ * \param e if 1, enable interupts if branch is taken
+ * If d and e are both zero, don't change interupt status (right?)
+ */
-
+/** Branch Indirect to address in rA */
void spe_bi(struct spe_function *p, unsigned rA, int d, int e)
{
emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4));
}
+/** Interupt Return */
void spe_iret(struct spe_function *p, unsigned rA, int d, int e)
{
emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4));
}
+/** Branch indirect and set link on external data */
void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, int d,
int e)
{
emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4));
}
+/** Branch indirect and set link. Save PC in rT, jump to rA. */
void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, int d,
int e)
{
emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4));
}
-void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d,
- int e)
+/** Branch indirect if zero word. If rT.word[0]==0, jump to rA. */
+void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e)
{
emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4));
}
+/** Branch indirect if non-zero word. If rT.word[0]!=0, jump to rA. */
void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e)
{
emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4));
}
+/** Branch indirect if zero halfword. If rT.halfword[1]==0, jump to rA. */
void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e)
{
emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4));
}
+/** Branch indirect if non-zero halfword. If rT.halfword[1]!=0, jump to rA. */
void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e)
{
emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4));
@@ -432,4 +462,54 @@ EMIT_R (spe_mfspr, 0x00c);
EMIT_R (spe_mtspr, 0x10c);
#endif
+
+/**
+ ** Helper / "macro" instructions.
+ ** Use somewhat verbose names as a reminder that these aren't native
+ ** SPE instructions.
+ **/
+
+
+void
+spe_load_float(struct spe_function *p, unsigned rT, float x)
+{
+ union {
+ float f;
+ unsigned u;
+ } bits;
+ bits.f = x;
+ spe_ilhu(p, rT, bits.u >> 16);
+ spe_iohl(p, rT, bits.u & 0xffff);
+}
+
+
+void
+spe_load_int(struct spe_function *p, unsigned rT, int i)
+{
+ spe_ilhu(p, rT, i >> 16);
+ spe_iohl(p, rT, i & 0xffff);
+}
+
+
+void
+spe_complement(struct spe_function *p, unsigned rT)
+{
+ spe_nor(p, rT, rT, rT);
+}
+
+
+void
+spe_move(struct spe_function *p, unsigned rT, unsigned rA)
+{
+ spe_ori(p, rT, rA, 0);
+}
+
+
+void
+spe_zero(struct spe_function *p, unsigned rT)
+{
+ spe_xor(p, rT, rT, rT);
+}
+
+
#endif /* GALLIUM_CELL */
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
index 1cacc717b1..dee8c55c4a 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
@@ -25,6 +25,7 @@
/**
* \file
* Real-time assembly generation interface for Cell B.E. SPEs.
+ * For details, see /opt/cell/sdk/docs/arch/SPU_ISA_v1.2_27Jan2007_pub.pdf
*
* \author Ian Romanick <idr@us.ibm.com>
*/
@@ -32,13 +33,24 @@
#ifndef RTASM_PPC_SPE_H
#define RTASM_PPC_SPE_H
-struct spe_function {
- /**
- *
- */
- uint32_t *store;
- uint32_t *csr;
- const char *fn;
+/** 4 bytes per instruction */
+#define SPE_INST_SIZE 4
+
+/** number of general-purpose SIMD registers */
+#define SPE_NUM_REGS 128
+
+/** Return Address register */
+#define SPE_REG_RA 0
+
+/** Stack Pointer register */
+#define SPE_REG_SP 1
+
+
+struct spe_function
+{
+ uint32_t *store; /**< instruction buffer */
+ uint num_inst;
+ uint max_inst;
/**
* Mask of used / unused registers
@@ -50,7 +62,7 @@ struct spe_function {
* spe_allocate_register, spe_allocate_available_register,
* spe_release_register
*/
- uint64_t regs[2];
+ uint64_t regs[SPE_NUM_REGS / 64];
};
extern void spe_init_func(struct spe_function *p, unsigned code_size);
@@ -119,7 +131,8 @@ EMIT_RI16(spe_ilhu, 0x082);
EMIT_RI16(spe_il, 0x081);
EMIT_RI18(spe_ila, 0x021);
EMIT_RI16(spe_iohl, 0x0c1);
-EMIT_RI16(spe_fsmbi, 0x0c5);
+EMIT_RI16(spe_fsmbi, 0x065);
+
/* Integer and logical instructions
@@ -271,6 +284,27 @@ extern void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA,
int d, int e);
+/** Load/splat immediate float into rT. */
+extern void
+spe_load_float(struct spe_function *p, unsigned rT, float x);
+
+/** Load/splat immediate int into rT. */
+extern void
+spe_load_int(struct spe_function *p, unsigned rT, int i);
+
+/** Complement/invert all bits in rT. */
+extern void
+spe_complement(struct spe_function *p, unsigned rT);
+
+/** rT = rA. */
+extern void
+spe_move(struct spe_function *p, unsigned rT, unsigned rA);
+
+/** rT = {0,0,0,0}. */
+extern void
+spe_zero(struct spe_function *p, unsigned rT);
+
+
/* Floating-point instructions
*/
EMIT_RR (spe_fa, 0x2c4);
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index f4ca282dd9..6d4c081e04 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -27,7 +27,7 @@
#include "pipe/p_compiler.h"
#include "pipe/p_debug.h"
-#include "pipe/p_pointer.h"
+#include "util/u_pointer.h"
#include "rtasm_execmem.h"
#include "rtasm_x86sse.h"