summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2010-08-25 16:02:38 +1000
committerDave Airlie <airlied@redhat.com>2010-08-27 15:29:04 +1000
commitd8fb13bae30b6ff214c5d5158b9bcaf430f56b43 (patch)
treeb70c22de18d2c9b0f51713b1e0bd63351d358fea /src
parent36d91be75ea9b79878fdf4b789ea022d781e87f4 (diff)
r600g: optimise op2 and swapped op2 emission.
this makes op2 emission smaller, since it skips instructions that don't write to the dst. not sure if this could have unwanted side effects but try it and see.
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/r600/r600_shader.c85
1 files changed, 37 insertions, 48 deletions
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 5b43a1303b..1470bb5072 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -642,30 +642,44 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s
return 0;
}
-static int tgsi_op2(struct r600_shader_ctx *ctx)
+static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, j, r;
+ int lasti = 0;
+
+ for (i = 0; i < 4; i++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << i)) {
+ lasti = i;
+ }
+ }
r = tgsi_split_constant(ctx, r600_src);
if (r)
return r;
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
- if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
- alu.dst.chan = i;
- } else {
- alu.inst = ctx->inst_info->r600_opcode;
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+
+ alu.inst = ctx->inst_info->r600_opcode;
+ if (!swap) {
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
alu.src[j] = r600_src[j];
alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
}
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ } else {
+ alu.src[0] = r600_src[1];
+ alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
+
+ alu.src[1] = r600_src[0];
+ alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
}
/* handle some special cases */
switch (ctx->inst_info->tgsi_opcode) {
@@ -678,7 +692,7 @@ static int tgsi_op2(struct r600_shader_ctx *ctx)
default:
break;
}
- if (i == 3) {
+ if (i == lasti) {
alu.last = 1;
}
r = r600_bc_add_alu(ctx->bc, &alu);
@@ -688,6 +702,16 @@ static int tgsi_op2(struct r600_shader_ctx *ctx)
return 0;
}
+static int tgsi_op2(struct r600_shader_ctx *ctx)
+{
+ return tgsi_op2_s(ctx, 0);
+}
+
+static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
+{
+ return tgsi_op2_s(ctx, 1);
+}
+
/*
* r600 - trunc to -PI..PI range
* r700 - normalize by dividing by 2PI
@@ -833,41 +857,6 @@ static int tgsi_kill(struct r600_shader_ctx *ctx)
return 0;
}
-static int tgsi_slt(struct r600_shader_ctx *ctx)
-{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
- struct r600_bc_alu alu;
- int i, r;
-
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- for (i = 0; i < 4; i++) {
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
- alu.dst.chan = i;
- } else {
- alu.inst = ctx->inst_info->r600_opcode;
- alu.src[1] = r600_src[0];
- alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
- alu.src[0] = r600_src[1];
- alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
- }
- if (i == 3) {
- alu.last = 1;
- }
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
- return 0;
-}
-
static int tgsi_lit(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -1747,7 +1736,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
{TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
- {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_slt},
+ {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
{TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
{TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
{TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
@@ -1785,7 +1774,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
{TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
- {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_slt},
+ {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
{TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
{TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},