From c60cb25bfb15fc83e78d9f2c74646dcc5ad07792 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 16 Dec 2010 16:42:14 +0100 Subject: r600g: implement output modifiers and use them to further optimize LRP --- src/gallium/drivers/r600/r600_asm.c | 2 ++ src/gallium/drivers/r600/r600_asm.h | 1 + src/gallium/drivers/r600/r600_shader.c | 29 +++++++++++++++++++++++++++++ src/gallium/drivers/r600/r700_asm.c | 1 + 4 files changed, 33 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 3ee54a2af1..5be5e1823d 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -831,6 +831,7 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | + S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) | S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) | S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) | S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) | @@ -1205,6 +1206,7 @@ void r600_bc_dump(struct r600_bc *bc) fprintf(stderr, "SRC0_ABS:%d ", alu->src[0].abs); fprintf(stderr, "SRC1_ABS:%d ", alu->src[1].abs); fprintf(stderr, "WRITE_MASK:%d ", alu->dst.write); + fprintf(stderr, "OMOD:%d ", alu->omod); fprintf(stderr, "EXECUTE_MASK:%d ", alu->predicate); fprintf(stderr, "UPDATE_PRED:%d\n", alu->predicate); } diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index a5504ad39f..4763ce03ec 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -62,6 +62,7 @@ struct r600_bc_alu { unsigned bank_swizzle_force; u32 value[4]; int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS]; + unsigned omod; }; struct r600_bc_tex { diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index b853fd9dc8..78739bf89d 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -2004,6 +2004,35 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) r = tgsi_split_literal_constant(ctx, r600_src); if (r) return r; + + /* optimize if it's just an equal balance */ + if(r600_src[0].sel == V_SQ_ALU_SRC_0_5) { + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); + alu.src[0] = r600_src[1]; + alu.src[0].chan = tgsi_chan(&inst->Src[1], i); + alu.src[1] = r600_src[2]; + alu.src[1].chan = tgsi_chan(&inst->Src[2], i); + alu.omod = 3; + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + + alu.dst.chan = i; + if (i == lasti) { + alu.last = 1; + } + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; + } + /* 1 - src0 */ for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c index 892dee86ba..3eb6fb50ca 100644 --- a/src/gallium/drivers/r600/r700_asm.c +++ b/src/gallium/drivers/r600/r700_asm.c @@ -61,6 +61,7 @@ int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | + S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) | S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) | S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) | S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) | -- cgit v1.2.3