summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Bumiller <e0425955@student.tuwien.ac.at>2009-05-28 00:17:14 +0200
committerBen Skeggs <bskeggs@redhat.com>2009-05-28 16:06:25 +1000
commitc11c107f7d8f289f0cb83968c265fb78a274024a (patch)
tree29dbcd1e40f614b83bfa0424328c1eed3a2f8c7f
parent87eea6e2b2df57bddb7155df794f02143e1ece40 (diff)
nv50: negate sources directly where supported
-rw-r--r--src/gallium/drivers/nv50/nv50_program.c110
1 files changed, 68 insertions, 42 deletions
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 89051069bc..aada285f2c 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -409,7 +409,8 @@ set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
static INLINE void
set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
{
- unsigned val = fui(pc->immd_buf[imm->hw]);
+ float f = pc->immd_buf[imm->hw];
+ unsigned val = fui(imm->neg ? -f : f);
set_long(pc, e);
/*XXX: can't be predicated - bits overlap.. catch cases where both
@@ -627,10 +628,19 @@ emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
check_swap_src_0_1(pc, &src0, &src1);
set_dst(pc, dst, e);
set_src_0(pc, src0, e);
- if (src1->type == P_IMMD && !is_long(e))
+ if (src1->type == P_IMMD && !is_long(e)) {
+ if (src0->neg)
+ e->inst[0] |= 0x00008000;
set_immd(pc, src1, e);
- else
+ } else {
set_src_1(pc, src1, e);
+ if (src0->neg ^ src1->neg) {
+ if (is_long(e))
+ e->inst[1] |= 0x08000000;
+ else
+ e->inst[0] |= 0x00008000;
+ }
+ }
emit(pc, e);
}
@@ -643,13 +653,16 @@ emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
e->inst[0] |= 0xb0000000;
- if (!pc->allow32)
+ check_swap_src_0_1(pc, &src0, &src1);
+
+ if (!pc->allow32 || src0->neg || src1->neg) {
set_long(pc, e);
+ e->inst[1] |= (src0->neg << 26) | (src1->neg << 27);
+ }
- check_swap_src_0_1(pc, &src0, &src1);
set_dst(pc, dst, e);
set_src_0(pc, src0, e);
- if (is_long(e) || src1->type == P_CONST || src1->type == P_ATTR)
+ if (src1->type == P_CONST || src1->type == P_ATTR || is_long(e))
set_src_2(pc, src1, e);
else
if (src1->type == P_IMMD)
@@ -678,25 +691,13 @@ emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
emit(pc, e);
}
-static void
+static INLINE void
emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
struct nv50_reg *src1)
{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] |= 0xb0000000;
-
- set_long(pc, e);
- if (check_swap_src_0_1(pc, &src0, &src1))
- e->inst[1] |= 0x04000000;
- else
- e->inst[1] |= 0x08000000;
-
- set_dst(pc, dst, e);
- set_src_0(pc, src0, e);
- set_src_2(pc, src1, e);
-
- emit(pc, e);
+ src1->neg ^= 1;
+ emit_add(pc, dst, src0, src1);
+ src1->neg ^= 1;
}
static void
@@ -713,26 +714,21 @@ emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
set_src_1(pc, src1, e);
set_src_2(pc, src2, e);
+ if (src0->neg ^ src1->neg)
+ e->inst[1] |= 0x04000000;
+ if (src2->neg)
+ e->inst[1] |= 0x08000000;
+
emit(pc, e);
}
-static void
+static INLINE void
emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
struct nv50_reg *src1, struct nv50_reg *src2)
{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] |= 0xe0000000;
- set_long(pc, e);
- e->inst[1] |= 0x08000000; /* src0 * src1 - src2 */
-
- check_swap_src_0_1(pc, &src0, &src1);
- set_dst(pc, dst, e);
- set_src_0(pc, src0, e);
- set_src_1(pc, src1, e);
- set_src_2(pc, src2, e);
-
- emit(pc, e);
+ src2->neg ^= 1;
+ emit_mad(pc, dst, src0, src1, src2);
+ src2->neg ^= 1;
}
static void
@@ -976,6 +972,8 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
e->inst[1] = 0xc4014788;
set_src_0(pc, src, e);
set_pred_wr(pc, 1, r_pred, e);
+ if (src->neg)
+ e->inst[1] |= 0x20000000;
emit(pc, e);
/* This is probably KILP */
@@ -1141,6 +1139,25 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e)
e->inst[1] |= q;
}
+static boolean
+negate_supported(const struct tgsi_full_instruction *insn, int i)
+{
+ switch (insn->Instruction.Opcode) {
+ case TGSI_OPCODE_DP3:
+ case TGSI_OPCODE_DP4:
+ case TGSI_OPCODE_MUL:
+ case TGSI_OPCODE_KIL:
+ case TGSI_OPCODE_ADD:
+ case TGSI_OPCODE_SUB:
+ case TGSI_OPCODE_MAD:
+ return TRUE;
+ case TGSI_OPCODE_POW:
+ return (i == 1) ? TRUE : FALSE;
+ default:
+ return FALSE;
+ }
+}
+
static struct nv50_reg *
tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
{
@@ -1159,7 +1176,8 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
}
static struct nv50_reg *
-tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
+tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
+ boolean neg)
{
struct nv50_reg *r = NULL;
struct nv50_reg *temp;
@@ -1214,14 +1232,21 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
r = temp;
break;
case TGSI_UTIL_SIGN_TOGGLE:
- temp = temp_temp(pc);
- emit_neg(pc, temp, r);
- r = temp;
+ if (neg)
+ r->neg = 1;
+ else {
+ temp = temp_temp(pc);
+ emit_neg(pc, temp, r);
+ r = temp;
+ }
break;
case TGSI_UTIL_SIGN_SET:
temp = temp_temp(pc);
emit_abs(pc, temp, r);
- emit_neg(pc, temp, temp);
+ if (neg)
+ temp->neg = 1;
+ else
+ emit_neg(pc, temp, temp);
r = temp;
break;
default:
@@ -1289,7 +1314,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
unit = fs->SrcRegister.Index;
for (c = 0; c < 4; c++)
- src[i][c] = tgsi_src(pc, c, fs);
+ src[i][c] = tgsi_src(pc, c, fs,
+ negate_supported(inst, i));
}
if (sat) {