summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/r600/r600_asm.c21
-rw-r--r--src/gallium/drivers/r600/r600_asm.h17
-rw-r--r--src/gallium/drivers/r600/r600_shader.c232
3 files changed, 247 insertions, 23 deletions
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index d83bb34648..03fe950186 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -200,6 +200,10 @@ int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
}
if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_JUMP ||
bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_ELSE ||
+ bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL ||
+ bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK ||
+ bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE ||
+ bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END ||
bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) {
return 0;
}
@@ -414,6 +418,10 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
S_SQ_CF_WORD1_BARRIER(1) |
@@ -437,6 +445,9 @@ int r600_bc_build(struct r600_bc *bc)
unsigned addr;
int r;
+ if (bc->callstack[0].max > 0)
+ bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2;
+
/* first path compute addr of each CF block */
/* addr start after all the CF instructions */
addr = bc->cf_last->id + 2;
@@ -458,8 +469,10 @@ int r600_bc_build(struct r600_bc *bc)
case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
case V_SQ_CF_WORD1_SQ_CF_INST_POP:
- /* hack */
- bc->nstack = 3;
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
break;
default:
R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
@@ -520,6 +533,10 @@ int r600_bc_build(struct r600_bc *bc)
break;
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
case V_SQ_CF_WORD1_SQ_CF_INST_POP:
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index dbd885caf9..bb4f4b77b3 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -127,11 +127,23 @@ struct r600_bc_cf {
#define FC_NONE 0
#define FC_IF 1
#define FC_LOOP 2
+#define FC_REP 3
+#define FC_PUSH_VPM 4
+#define FC_PUSH_WQM 5
struct r600_cf_stack_entry {
int type;
struct r600_bc_cf *start;
- struct r600_bc_cf *mid; /* used to store the else point */
+ struct r600_bc_cf **mid; /* used to store the else point */
+ int num_mid;
+};
+
+#define SQ_MAX_CALL_DEPTH 0x00000020
+struct r600_cf_callstack {
+ unsigned fc_sp_before_entry;
+ int sub_desc_index;
+ int current;
+ int max;
};
struct r600_bc {
@@ -149,6 +161,9 @@ struct r600_bc {
u32 fc_sp;
struct r600_cf_stack_entry fc_stack[32];
+
+ unsigned call_sp;
+ struct r600_cf_callstack callstack[SQ_MAX_CALL_DEPTH];
};
int r600_bc_init(struct r600_bc *bc, enum radeon_family family);
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index dabc7bec30..82f4d73e43 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1650,8 +1650,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
- uint32_t use_temp = 0;
- int i, r;
+ int r;
/* result.x = 2^floor(src); */
if (inst->Dst[0].Register.WriteMask & 1) {
@@ -1753,8 +1752,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu alu, *lalu;
- struct r600_bc_cf *last;
+ struct r600_bc_alu alu;
int r;
memset(&alu, 0, sizeof(struct r600_bc_alu));
@@ -1777,7 +1775,6 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
r = r600_bc_add_alu_type(ctx->bc, &alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE);
if (r)
return r;
-
return 0;
}
@@ -1788,29 +1785,158 @@ static int pops(struct r600_shader_ctx *ctx, int pops)
return 0;
}
-static int tgsi_if(struct r600_shader_ctx *ctx)
+static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ switch(reason) {
+ case FC_PUSH_VPM:
+ ctx->bc->callstack[ctx->bc->call_sp].current--;
+ break;
+ case FC_PUSH_WQM:
+ case FC_LOOP:
+ ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
+ break;
+ case FC_REP:
+ /* TOODO : for 16 vp asic should -= 2; */
+ ctx->bc->callstack[ctx->bc->call_sp].current --;
+ break;
+ }
+}
- emit_logic_pred(ctx, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE);
+static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
+{
+ if (check_max_only) {
+ int diff;
+ switch (reason) {
+ case FC_PUSH_VPM:
+ diff = 1;
+ break;
+ case FC_PUSH_WQM:
+ diff = 4;
+ break;
+ }
+ if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
+ ctx->bc->callstack[ctx->bc->call_sp].max) {
+ ctx->bc->callstack[ctx->bc->call_sp].max =
+ ctx->bc->callstack[ctx->bc->call_sp].current + diff;
+ }
+ return;
+ }
+ switch (reason) {
+ case FC_PUSH_VPM:
+ ctx->bc->callstack[ctx->bc->call_sp].current++;
+ break;
+ case FC_PUSH_WQM:
+ case FC_LOOP:
+ ctx->bc->callstack[ctx->bc->call_sp].current += 4;
+ break;
+ case FC_REP:
+ ctx->bc->callstack[ctx->bc->call_sp].current++;
+ break;
+ }
+
+ if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
+ ctx->bc->callstack[ctx->bc->call_sp].max) {
+ ctx->bc->callstack[ctx->bc->call_sp].max =
+ ctx->bc->callstack[ctx->bc->call_sp].current;
+ }
+}
+
+static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
+{
+ struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
+
+ sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
+ sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
+ sp->mid[sp->num_mid] = ctx->bc->cf_last;
+ sp->num_mid++;
+}
+static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
+{
ctx->bc->fc_sp++;
- ctx->bc->fc_stack[ctx->bc->fc_sp].type = FC_IF;
- ctx->bc->fc_stack[ctx->bc->fc_sp].mid = NULL;
+ ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
+ ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
+}
+
+static void fc_poplevel(struct r600_shader_ctx *ctx)
+{
+ struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
+ if (sp->mid) {
+ free(sp->mid);
+ sp->mid = NULL;
+ }
+ sp->num_mid = 0;
+ sp->start = NULL;
+ sp->type = 0;
+ ctx->bc->fc_sp--;
+}
+
+#if 0
+static int emit_return(struct r600_shader_ctx *ctx)
+{
+ r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
+ return 0;
+}
+
+static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
+{
+
r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
+ ctx->bc->cf_last->pop_count = pops;
+ /* TODO work out offset */
+ return 0;
+}
- ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
+static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
+{
+ return 0;
+}
+
+static void emit_testflag(struct r600_shader_ctx *ctx)
+{
+
+}
+
+static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
+{
+ emit_testflag(ctx);
+ emit_jump_to_offset(ctx, 1, 4);
+ emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
+ pops(ctx, ifidx + 1);
+ emit_return(ctx);
+}
+
+static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
+{
+ emit_testflag(ctx);
+
+ r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
+ ctx->bc->cf_last->pop_count = 1;
+
+ fc_set_mid(ctx, fc_sp);
+
+ pops(ctx, 1);
+}
+#endif
+
+static int tgsi_if(struct r600_shader_ctx *ctx)
+{
+ emit_logic_pred(ctx, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE);
+
+ r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
+
+ fc_pushlevel(ctx, FC_IF);
+
+ callstack_check_depth(ctx, FC_PUSH_VPM, 0);
return 0;
}
static int tgsi_else(struct r600_shader_ctx *ctx)
{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_ELSE);
ctx->bc->cf_last->pop_count = 1;
- /* fixup mid */
- ctx->bc->fc_stack[ctx->bc->fc_sp].mid = ctx->bc->cf_last;
+ fc_set_mid(ctx, ctx->bc->fc_sp);
ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
return 0;
}
@@ -1827,10 +1953,76 @@ static int tgsi_endif(struct r600_shader_ctx *ctx)
ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
} else {
- ctx->bc->fc_stack[ctx->bc->fc_sp].mid->cf_addr = ctx->bc->cf_last->id + 2;
+ ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
}
- ctx->bc->fc_sp--;
+ fc_poplevel(ctx);
+
+ callstack_decrease_current(ctx, FC_PUSH_VPM);
+ return 0;
+}
+
+static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
+{
+ r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL);
+
+ fc_pushlevel(ctx, FC_LOOP);
+ /* check stack depth */
+ callstack_check_depth(ctx, FC_LOOP, 0);
+ return 0;
+}
+
+static int tgsi_endloop(struct r600_shader_ctx *ctx)
+{
+ int i;
+
+ r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END);
+
+ if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
+ R600_ERR("loop/endloop in shader code are not paired.\n");
+ return -EINVAL;
+ }
+
+ /* fixup loop pointers - from r600isa
+ LOOP END points to CF after LOOP START,
+ LOOP START point to CF after LOOP END
+ BRK/CONT point to LOOP END CF
+ */
+ ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
+
+ ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
+
+ for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
+ ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
+ }
+ /* TODO add LOOPRET support */
+ fc_poplevel(ctx);
+ callstack_decrease_current(ctx, FC_LOOP);
+ return 0;
+}
+
+static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
+{
+ unsigned int fscp;
+
+ for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
+ {
+ if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
+ break;
+ }
+
+ if (fscp == 0) {
+ R600_ERR("Break not inside loop/endloop pair\n");
+ return -EINVAL;
+ }
+
+ r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
+ ctx->bc->cf_last->pop_count = 1;
+
+ fc_set_mid(ctx, fscp);
+
+ pops(ctx, 1);
+ callstack_check_depth(ctx, FC_PUSH_VPM, 1);
return 0;
}
@@ -1911,7 +2103,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
{TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_BRK, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
{TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
/* gap */
{75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
@@ -1937,12 +2129,12 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_CONT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
{TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
{TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
{TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
/* gap */
{103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},