summaryrefslogtreecommitdiff
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorBrian Paul <brian.paul@tungstengraphics.com>2008-10-08 20:44:32 -0600
committerBrian Paul <brian.paul@tungstengraphics.com>2008-10-08 20:44:32 -0600
commitd48a92e88040470f93e2186f8eb23e4797a09860 (patch)
tree7fbfd01dc04b42e42b4457eae507693ef8dc5f3b /src/gallium/drivers
parenta4e477433f485a39b5de448d0a9cb6f4bf9bb90f (diff)
cell: implement function calls from shader code. fslight demo runs now.
Used for SIN, COS, EXP2, LOG2, POW instructions. TEX next. Fixed some bugs in MIN, MAX, DP3, DP4, DPH instructions. In rtasm code: Special-case spe_lqd(), spe_stqd() functions so they take byte offsets but low-order 4 bits are shifted out. This makes things consistant with SPU assembly language conventions. Added spe_get_registers_used() function.
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fp.c141
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vertex_fetch.c30
2 files changed, 109 insertions, 62 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
index 3065869d04..640ebcadbb 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -84,6 +84,8 @@ struct codegen
/** Index of execution mask register */
int exec_mask_reg;
+ int frame_size; /**< Stack frame size, in words */
+
struct spe_function *f;
boolean error;
};
@@ -208,7 +210,7 @@ get_src_reg(struct codegen *gen,
reg = get_itemp(gen);
reg_is_itemp = TRUE;
/* Load: reg = memory[(machine_reg) + offset] */
- spe_lqd(gen->f, reg, gen->inputs_reg, offset);
+ spe_lqd(gen->f, reg, gen->inputs_reg, offset * 16);
}
break;
case TGSI_FILE_IMMEDIATE:
@@ -221,7 +223,7 @@ get_src_reg(struct codegen *gen,
reg = get_itemp(gen);
reg_is_itemp = TRUE;
/* Load: reg = memory[(machine_reg) + offset] */
- spe_lqd(gen->f, reg, gen->constants_reg, offset);
+ spe_lqd(gen->f, reg, gen->constants_reg, offset * 16);
}
break;
default:
@@ -325,6 +327,7 @@ store_dest_reg(struct codegen *gen,
}
else {
/* we're not inside a condition or loop: do nothing special */
+
}
break;
case TGSI_FILE_OUTPUT:
@@ -337,17 +340,17 @@ store_dest_reg(struct codegen *gen,
/* First read the current value from memory:
* Load: curval = memory[(machine_reg) + offset]
*/
- spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset);
+ spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
/* Mix curval with newvalue according to exec mask:
* d[i] = mask_reg[i] ? value_reg : d_reg
*/
spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg);
/* Store: memory[(machine_reg) + offset] = curval */
- spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset);
+ spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
}
else {
/* Store: memory[(machine_reg) + offset] = reg */
- spe_stqd(gen->f, value_reg, gen->outputs_reg, offset);
+ spe_stqd(gen->f, value_reg, gen->outputs_reg, offset * 16);
}
}
break;
@@ -357,6 +360,41 @@ store_dest_reg(struct codegen *gen,
}
+
+static void
+emit_prologue(struct codegen *gen)
+{
+ gen->frame_size = 256+128; /* XXX temporary */
+
+ spe_comment(gen->f, -4, "Function prologue:");
+
+ /* save $lr on stack # stqd $lr,16($sp) */
+ spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
+
+ /* save stack pointer # stqd $sp,-frameSize($sp) */
+ spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
+
+ /* adjust stack pointer # ai $sp,$sp,-frameSize */
+ spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
+}
+
+
+static void
+emit_epilogue(struct codegen *gen)
+{
+ spe_comment(gen->f, -4, "Function epilogue:");
+
+ /* restore stack pointer # ai $sp,$sp,frameSize */
+ spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size);
+
+ /* restore $lr # lqd $lr,16($sp) */
+ spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
+
+ /* return from function call */
+ spe_bi(gen->f, SPE_REG_RA, 0, 0);
+}
+
+
static boolean
emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
@@ -588,6 +626,7 @@ emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst)
int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
int tmp_reg = get_itemp(gen);
+
/* t = x0 * x1 */
spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
@@ -603,7 +642,9 @@ emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst)
for (ch = 0; ch < 4; ch++) {
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ spe_move(gen->f, d_reg, tmp_reg);
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
}
}
@@ -623,6 +664,7 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst)
int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
int tmp_reg = get_itemp(gen);
+
/* t = x0 * x1 */
spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
@@ -643,6 +685,8 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst)
for (ch = 0; ch < 4; ch++) {
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ spe_move(gen->f, d_reg, tmp_reg);
store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
}
}
@@ -683,6 +727,8 @@ emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst)
for (ch = 0; ch < 4; ch++) {
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ spe_move(gen->f, d_reg, tmp_reg);
store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
}
}
@@ -1112,9 +1158,6 @@ emit_function_call(struct codegen *gen,
uint addr;
int ch;
- /* XXX temporary value */
- const int frameSize = 64; /* stack frame (activation record) size */
-
assert(num_args <= 3);
/* lookup function address */
@@ -1136,48 +1179,45 @@ emit_function_call(struct codegen *gen,
for (ch = 0; ch < 4; ch++) {
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- int s_regs[3];
- uint a;
+ int s_regs[3], d_reg;
+ ubyte usedRegs[SPE_NUM_REGS];
+ uint a, i, numUsed;
+
for (a = 0; a < num_args; a++) {
s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]);
}
+ d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- /* Basically:
- * save registers on stack
- * move parameters to registers 3, 4, 5...
- * call function
- * save return value (reg 3)
- * restore registers from stack
- */
+ numUsed = spe_get_registers_used(gen->f, usedRegs);
+ assert(numUsed < gen->frame_size / 16 - 32);
- /* XXX hack: load first function param */
- spe_move(gen->f, 3, s_regs[0]);
-
- /* save $lr on stack # stqd $lr,16($sp) */
- spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
- /* save stack pointer # stqd $sp,-frameSize($sp) */
- spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -frameSize);
-
- /* XXX save registers to stack here */
+ /* save registers to stack */
+ for (i = 0; i < numUsed; i++) {
+ uint reg = usedRegs[i];
+ int offset = 2 + i;
+ spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
+ }
- /* adjust stack pointer # ai $sp,$sp,-frameSize */
- spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -frameSize);
+ /* setup function arguments */
+ for (a = 0; a < num_args; a++) {
+ spe_move(gen->f, 3 + a, s_regs[a]);
+ }
/* branch to function, save return addr */
spe_brasl(gen->f, SPE_REG_RA, addr);
- /* restore stack pointer # ai $sp,$sp,frameSize */
- spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, frameSize);
-
- /* XXX restore registers from stack here */
-
- /* restore $lr # lqd $lr,16($sp) */
- spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
-
- /* XXX hack: save function's return value */
+ /* save function's return value */
spe_move(gen->f, d_reg, 3);
+ /* restore registers from stack */
+ for (i = 0; i < numUsed; i++) {
+ uint reg = usedRegs[i];
+ if (reg != d_reg) {
+ int offset = 2 + i;
+ spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
+ }
+ }
+
store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
free_itemps(gen);
}
@@ -1202,10 +1242,11 @@ emit_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst)
int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ int tmp_reg = get_itemp(gen);
/* d = (s1 > s2) ? s1 : s2 */
- spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
- spe_selb(gen->f, d_reg, s2_reg, s1_reg, d_reg);
+ spe_fcgt(gen->f, tmp_reg, s1_reg, s2_reg);
+ spe_selb(gen->f, d_reg, s2_reg, s1_reg, tmp_reg);
store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
free_itemps(gen);
@@ -1230,10 +1271,11 @@ emit_MIN(struct codegen *gen, const struct tgsi_full_instruction *inst)
int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ int tmp_reg = get_itemp(gen);
/* d = (s2 > s1) ? s1 : s2 */
- spe_fcgt(gen->f, d_reg, s2_reg, s1_reg);
- spe_selb(gen->f, d_reg, s2_reg, s1_reg, d_reg);
+ spe_fcgt(gen->f, tmp_reg, s2_reg, s1_reg);
+ spe_selb(gen->f, d_reg, s2_reg, s1_reg, tmp_reg);
store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
free_itemps(gen);
@@ -1346,8 +1388,7 @@ static boolean
emit_END(struct codegen *gen)
{
spe_comment(gen->f, -4, "END:");
- /* return from function call */
- spe_bi(gen->f, SPE_REG_RA, 0, 0);
+ emit_epilogue(gen);
return true;
}
@@ -1420,6 +1461,10 @@ emit_instruction(struct codegen *gen,
return emit_function_call(gen, inst, "spu_sin", 1);
case TGSI_OPCODE_POW:
return emit_function_call(gen, inst, "spu_pow", 2);
+ case TGSI_OPCODE_EXPBASE2:
+ return emit_function_call(gen, inst, "spu_exp2", 1);
+ case TGSI_OPCODE_LOGBASE2:
+ return emit_function_call(gen, inst, "spu_log2", 1);
case TGSI_OPCODE_IF:
return emit_IF(gen, inst);
@@ -1532,6 +1577,7 @@ emit_declaration(struct cell_context *cell,
}
+
/**
* Translate TGSI shader code to SPE instructions. This is done when
* the state tracker gives us a new shader (via pipe->create_fs_state()).
@@ -1571,12 +1617,14 @@ cell_gen_fragment_program(struct cell_context *cell,
tgsi_parse_init(&parse, tokens);
+ emit_prologue(&gen);
+
while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) {
tgsi_parse_token(&parse);
switch (parse.FullToken.Token.Type) {
case TGSI_TOKEN_TYPE_IMMEDIATE:
- if (!emit_immediate(&gen, &parse.FullToken.FullImmediate))
+ if (!emit_immediate(&gen, &parse.FullToken.FullImmediate))
gen.error = true;
break;
@@ -1595,7 +1643,6 @@ cell_gen_fragment_program(struct cell_context *cell,
}
}
-
if (gen.error) {
/* terminate the SPE code */
return emit_END(&gen);
diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
index 566df7f59e..18969005b0 100644
--- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
+++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
@@ -73,8 +73,8 @@ emit_matrix_transpose(struct spe_function *p,
int col3;
- spe_lqd(p, shuf_hi, shuf_ptr, 3);
- spe_lqd(p, shuf_lo, shuf_ptr, 4);
+ spe_lqd(p, shuf_hi, shuf_ptr, 3*16);
+ spe_lqd(p, shuf_lo, shuf_ptr, 4*16);
spe_shufb(p, t1, row0, row2, shuf_hi);
spe_shufb(p, t2, row0, row2, shuf_lo);
@@ -122,13 +122,13 @@ emit_matrix_transpose(struct spe_function *p,
*/
switch (count) {
case 4:
- spe_stqd(p, col3, dest_ptr, 3);
+ spe_stqd(p, col3, dest_ptr, 3 * 16);
case 3:
- spe_stqd(p, col2, dest_ptr, 2);
+ spe_stqd(p, col2, dest_ptr, 2 * 16);
case 2:
- spe_stqd(p, col1, dest_ptr, 1);
+ spe_stqd(p, col1, dest_ptr, 1 * 16);
case 1:
- spe_stqd(p, col0, dest_ptr, 0);
+ spe_stqd(p, col0, dest_ptr, 0 * 16);
}
@@ -166,17 +166,17 @@ emit_fetch(struct spe_function *p,
float scale_signed = 0.0;
float scale_unsigned = 0.0;
- spe_lqd(p, v0, in_ptr, 0 + offset[0]);
- spe_lqd(p, v1, in_ptr, 1 + offset[0]);
- spe_lqd(p, v2, in_ptr, 2 + offset[0]);
- spe_lqd(p, v3, in_ptr, 3 + offset[0]);
+ spe_lqd(p, v0, in_ptr, (0 + offset[0]) * 16);
+ spe_lqd(p, v1, in_ptr, (1 + offset[0]) * 16);
+ spe_lqd(p, v2, in_ptr, (2 + offset[0]) * 16);
+ spe_lqd(p, v3, in_ptr, (3 + offset[0]) * 16);
offset[0] += 4;
switch (bytes) {
case 1:
scale_signed = 1.0f / 127.0f;
scale_unsigned = 1.0f / 255.0f;
- spe_lqd(p, tmp, shuf_ptr, 1);
+ spe_lqd(p, tmp, shuf_ptr, 1 * 16);
spe_shufb(p, v0, v0, v0, tmp);
spe_shufb(p, v1, v1, v1, tmp);
spe_shufb(p, v2, v2, v2, tmp);
@@ -185,7 +185,7 @@ emit_fetch(struct spe_function *p,
case 2:
scale_signed = 1.0f / 32767.0f;
scale_unsigned = 1.0f / 65535.0f;
- spe_lqd(p, tmp, shuf_ptr, 2);
+ spe_lqd(p, tmp, shuf_ptr, 2 * 16);
spe_shufb(p, v0, v0, v0, tmp);
spe_shufb(p, v1, v1, v1, tmp);
spe_shufb(p, v2, v2, v2, tmp);
@@ -241,11 +241,11 @@ emit_fetch(struct spe_function *p,
switch (count) {
case 1:
- spe_stqd(p, float_zero, out_ptr, 1);
+ spe_stqd(p, float_zero, out_ptr, 1 * 16);
case 2:
- spe_stqd(p, float_zero, out_ptr, 2);
+ spe_stqd(p, float_zero, out_ptr, 2 * 16);
case 3:
- spe_stqd(p, float_one, out_ptr, 3);
+ spe_stqd(p, float_one, out_ptr, 3 * 16);
}
if (float_zero != -1) {