summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/nv50/nv50_program.c
diff options
context:
space:
mode:
authorBen Skeggs <skeggsb@gmail.com>2008-06-02 23:52:58 +1000
committerBen Skeggs <skeggsb@gmail.com>2008-06-29 15:46:14 +1000
commit55b2fe1047b37d0d86641a252e1c745111030393 (patch)
treea6f706a62e8121e783902ea2ae74e1d9cc01b18e /src/gallium/drivers/nv50/nv50_program.c
parent22e0acc466947b203574c88f4964f61ef46ae3fd (diff)
nv50: drop the majority of the old shader code, reimplement, only MOV so far.
Diffstat (limited to 'src/gallium/drivers/nv50/nv50_program.c')
-rw-r--r--src/gallium/drivers/nv50/nv50_program.c411
1 files changed, 146 insertions, 265 deletions
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 97e3eab906..aa848faa49 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -10,13 +10,8 @@
#include "nv50_context.h"
#include "nv50_state.h"
-#define OP_MOV 0x001
-#define OP_INTERP 0x008
-#define OP_RCP 0x009
-#define OP_ADD 0x00b
-#define OP_MUL 0x00c
-#define OP_MAD 0x00e
#define NV50_SU_MAX_TEMP 64
+#define TX_FRAGPROG 0
struct nv50_reg {
enum {
@@ -106,181 +101,6 @@ free_temp(struct nv50_pc *pc, struct nv50_reg *r)
}
}
-#if 0
-static struct nv50_reg *
-constant(struct nv50_pc *pc, int pipe, int c, float v)
-{
- struct nv50_reg *r = CALLOC_STRUCT(nv50_reg);
- struct nv50_program *p = pc->p;
- struct nv50_program_data *pd;
- int idx;
-
- if (pipe >= 0) {
- for (idx = 0; idx < p->nr_consts; idx++) {
- if (p->consts[idx].index == pipe)
- return nv40_sr(NV40SR_CONST, idx);
- }
- }
-
- idx = p->nr_consts++;
- p->consts = realloc(p->consts, sizeof(*pd) * p->nr_consts);
- pd = &p->consts[idx];
-
- pd->index = pipe;
- pd->component = c;
- pd->value = v;
- return nv40_sr(NV40SR_CONST, idx);
-}
-#endif
-
-static void
-emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst,
- struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2)
-{
- struct nv50_program *p = pc->p;
- struct nv50_reg *tmp0 = NULL, *tmp = NULL, *tmp2 = NULL;
- unsigned inst[2] = { 0, 0 };
-
- /* Grr.. Fun restrictions on where attribs can be sourced from.. */
- if (src0 && (src0->type == P_CONST || src0->type == P_IMMD) &&
- (op == OP_MUL || op == OP_MAD)) {
- tmp = src1;
- src1 = src0;
- src0 = tmp;
- tmp = NULL;
- }
-
- if (src1 && src1->type == P_ATTR) {
- tmp = alloc_temp(pc, dst);
- tmp->neg = src1->neg; src1->neg = 0;
- emit(pc, 1, tmp, src1, NULL, NULL);
- src1 = tmp;
- }
-
- if (src2 && src2->type == P_ATTR) {
- tmp2 = alloc_temp(pc, dst);
- tmp2->neg = src2->neg; src2->neg = 0;
- emit(pc, 1, tmp2, src2, NULL, NULL);
- src2 = tmp2;
- }
-
- /* Get this out of the way first. What type of opcode do we
- * want/need to build?
- */
- if ((op & 0x3f0) || dst->type == P_RESULT ||
- (src0 && src0->type == P_ATTR) || src1 || src2)
- inst[0] |= 0x00000001;
-
- if (inst[0] & 0x00000001) {
- inst[0] |= ((op & 0xf) << 28);
- inst[1] |= ((op >> 4) << 26);
-
- alloc_reg(pc, dst);
- if (dst->type == P_RESULT)
- inst[1] |= 0x00000008;
- inst[0] |= (dst->hw << 2);
-
- if (src0) {
- if (src0->type == P_ATTR)
- inst[1] |= 0x00200000;
- else
- if (src0->type == P_CONST || src0->type == P_IMMD)
- assert(0);
- alloc_reg(pc, src0);
- inst[0] |= (src0->hw << 9);
- }
-
- if (src1) {
- if (src1->type == P_CONST || src1->type == P_IMMD) {
- if (src1->type == P_IMMD)
- inst[1] |= (NV50_CB_PMISC << 22);
- else
- inst[1] |= (NV50_CB_PVP << 22);
- inst[0] |= 0x00800000; /* src1 is const */
- /*XXX: does src1 come from "src2" now? */
- alloc_reg(pc, src1);
- inst[0] |= (src1->hw << 16);
- } else {
- alloc_reg(pc, src1);
- if (op == OP_MUL || op == OP_MAD)
- inst[0] |= (src1->hw << 16);
- else
- inst[1] |= (src1->hw << 14);
- }
- } else {
- inst[1] |= 0x0003c000; /*XXX FIXME */
- }
-
- if (src2) {
- if (src2->type == P_CONST || src2->type == P_IMMD) {
- if (src2->type == P_IMMD)
- inst[1] |= (NV50_CB_PMISC << 22);
- else
- inst[1] |= (NV50_CB_PVP << 22);
- inst[0] |= 0x01000000; /* src2 is const */
- inst[1] |= (src2->hw << 14);
- } else {
- alloc_reg(pc, src2);
- if (inst[0] & 0x00800000 || op == OP_MAD)
- inst[1] |= (src2->hw << 14);
- else
- inst[0] |= (src2->hw << 16);
- }
- }
-
- /*XXX: FIXME */
- switch (op) {
- case OP_ADD:
- case OP_MUL:
- case OP_RCP:
- case OP_MAD:
- /* 0x04000000 negates arg0 */
- /* 0x08000000 negates arg1 */
- /*XXX: true for !0xb also ? */
- if (src0 && src0->neg)
- inst[1] |= 0x04000000;
- if (src1 && src1->neg)
- inst[1] |= 0x08000000;
- inst[1] |= 0x00000780;
- break;
- default:
- /* 0x04000000 == arg0 32 bit, otherwise 16 bit */
- inst[1] |= 0x04000780;
- break;
- }
- } else {
- inst[0] |= ((op & 0xf) << 28);
-
- alloc_reg(pc, dst);
- inst[0] |= (dst->hw << 2);
-
- if (src0) {
- alloc_reg(pc, src0);
- inst[0] |= (src0->hw << 9);
- }
-
- /*XXX: NFI if this even works - probably not.. */
- if (src1) {
- alloc_reg(pc, src1);
- inst[0] |= (src1->hw << 16);
- }
- }
-
- if (tmp0) free_temp(pc, tmp0);
- if (tmp) free_temp(pc, tmp);
- if (tmp2) free_temp(pc, tmp2);
-
- if (inst[0] & 1) {
- p->insns_nr += 2;
- p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr);
- memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2);
- } else {
- p->insns_nr += 1;
- p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr);
- memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned));
- }
-}
-
static struct nv50_reg *
tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
{
@@ -327,11 +147,147 @@ tgsi_src(struct nv50_pc *pc, int c, const struct tgsi_full_src_register *src)
return NULL;
}
+static void
+emit(struct nv50_pc *pc, unsigned *inst)
+{
+ struct nv50_program *p = pc->p;
+
+ if (inst[0] & 1) {
+ p->insns_nr += 2;
+ p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr);
+ memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2);
+ } else {
+ p->insns_nr += 1;
+ p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr);
+ memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned));
+ }
+}
+
+static INLINE void set_long(struct nv50_pc *, unsigned *);
+
+static boolean
+is_long(unsigned *inst)
+{
+ if (inst[0] & 1)
+ return TRUE;
+ return FALSE;
+}
+
+static boolean
+is_immd(unsigned *inst)
+{
+ if (is_long(inst) && (inst[1] & 3) == 3)
+ return TRUE;
+ return FALSE;
+}
+
+static INLINE void
+set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst)
+{
+ set_long(pc, inst);
+ inst[1] &= ~((0x1f << 7) | (0x3 << 12));
+ inst[1] |= (pred << 7) | (idx << 12);
+}
+
+static INLINE void
+set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst)
+{
+ set_long(pc, inst);
+ inst[1] &= ~((0x3 << 4) | (1 << 6));
+ inst[1] |= (idx << 4) | (on << 6);
+}
+
+static INLINE void
+set_long(struct nv50_pc *pc, unsigned *inst)
+{
+ if (is_long(inst))
+ return;
+
+ inst[0] |= 1;
+ set_pred(pc, 0xf, 0, inst);
+ set_pred_wr(pc, 0, 0, inst);
+}
+
+static INLINE void
+set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst)
+{
+ if (dst->type == P_RESULT) {
+ set_long(pc, inst);
+ inst[1] |= 0x00000008;
+ }
+
+ alloc_reg(pc, dst);
+ inst[0] |= (dst->hw << 2);
+}
+
+static INLINE void
+set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst)
+{
+ unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */
+
+ set_long(pc, inst);
+ /*XXX: can't be predicated - bits overlap.. catch cases where both
+ * are required and avoid them. */
+ set_pred(pc, 0, 0, inst);
+ set_pred_wr(pc, 0, 0, inst);
+
+ inst[1] |= 0x00000002 | 0x00000001;
+ inst[0] |= (val & 0x3f) << 16;
+ inst[1] |= (val >> 6) << 2;
+}
+
+static void
+emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ unsigned inst[2] = { 0, 0 };
+ int i;
+
+ inst[0] |= 0x10000000;
+
+ set_dst(pc, dst, inst);
+
+ if (dst->type != P_RESULT && src->type == P_IMMD) {
+ set_immd(pc, src, inst);
+ /*XXX: 32-bit, but steals part of "half" reg space - need to
+ * catch and handle this case if/when we do half-regs
+ */
+ inst[0] |= 0x00008000;
+ } else
+ if (src->type == P_IMMD || src->type == P_CONST) {
+ set_long(pc, inst);
+ if (src->type == P_IMMD)
+ inst[1] |= (NV50_CB_PMISC << 22);
+ else
+ inst[1] |= (NV50_CB_PVP << 22);
+ inst[0] |= (src->hw << 9);
+ inst[1] |= 0x20000000; /* src0 const? */
+ } else {
+ if (src->type == P_ATTR) {
+ set_long(pc, inst);
+ inst[1] |= 0x00200000;
+ }
+
+ alloc_reg(pc, src);
+ inst[0] |= (src->hw << 9);
+ }
+
+ /* We really should support "half" instructions here at some point,
+ * but I don't feel confident enough about them yet.
+ */
+ set_long(pc, inst);
+ if (is_long(inst) && !is_immd(inst)) {
+ inst[1] |= 0x04000000; /* 32-bit */
+ inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
+ }
+
+ emit(pc, inst);
+}
+
static boolean
nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
{
const struct tgsi_full_instruction *inst = &tok->FullInstruction;
- struct nv50_reg *dst[4], *src[3][4], *none = NULL, *tmp;
+ struct nv50_reg *dst[4], *src[3][4];
unsigned mask;
int i, c;
@@ -352,85 +308,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
}
switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_ADD:
- for (c = 0; c < 4; c++) {
- if (mask & (1 << c)) {
- emit(pc, OP_ADD, dst[c],
- src[0][c], src[1][c], none);
- }
- }
- break;
- case TGSI_OPCODE_DP3:
- tmp = alloc_temp(pc, NULL);
- emit(pc, OP_MUL, tmp, src[0][0], src[1][0], NULL);
- emit(pc, OP_MAD, tmp, src[0][1], src[1][1], tmp);
- emit(pc, OP_MAD, tmp, src[0][2], src[1][2], tmp);
- for (c = 0; c < 4; c++) {
- if (mask & (1 << c))
- emit(pc, OP_MOV, dst[c], tmp, none, none);
- }
- free_temp(pc, tmp);
- break;
- case TGSI_OPCODE_DP4:
- tmp = alloc_temp(pc, NULL);
- emit(pc, OP_MUL, tmp, src[0][0], src[1][0], NULL);
- emit(pc, OP_MAD, tmp, src[0][1], src[1][1], tmp);
- emit(pc, OP_MAD, tmp, src[0][2], src[1][2], tmp);
- emit(pc, OP_MAD, tmp, src[0][3], src[1][3], tmp);
- for (c = 0; c < 4; c++) {
- if (mask & (1 << c))
- emit(pc, OP_MOV, dst[c], tmp, none, none);
- }
- free_temp(pc, tmp);
- break;
- case TGSI_OPCODE_DPH:
- tmp = alloc_temp(pc, NULL);
- emit(pc, OP_MUL, tmp, src[0][0], src[1][0], NULL);
- emit(pc, OP_MAD, tmp, src[0][1], src[1][1], tmp);
- emit(pc, OP_MAD, tmp, src[0][2], src[1][2], tmp);
- emit(pc, OP_ADD, tmp, src[1][3], tmp, NULL);
- for (c = 0; c < 4; c++) {
- if (mask & (1 << c))
- emit(pc, OP_MOV, dst[c], tmp, none, none);
- }
- free_temp(pc, tmp);
- break;
- case TGSI_OPCODE_MAD:
- for (c = 0; c < 4; c++) {
- if (mask & (1 << c))
- emit(pc, OP_MAD, dst[c],
- src[0][c], src[1][c], src[2][c]);
- }
- break;
case TGSI_OPCODE_MOV:
- for (c = 0; c < 4; c++) {
- if (mask & (1 << c))
- emit(pc, OP_MOV, dst[c], src[0][c], none, none);
- }
- break;
- case TGSI_OPCODE_MUL:
- for (c = 0; c < 4; c++) {
- if (mask & (1 << c))
- emit(pc, OP_MUL, dst[c],
- src[0][c], src[1][c], none);
- }
- break;
- case TGSI_OPCODE_RCP:
- for (c = 0; c < 4; c++) {
- if (mask & (1 << c))
- emit(pc, OP_RCP, dst[c],
- src[0][c], none, none);
- }
- break;
- case TGSI_OPCODE_SUB:
- for (c = 0; c < 4; c++) {
- if (mask & (1 << c)) {
- src[1][c]->neg = 1;
- emit(pc, OP_ADD, dst[c],
- src[0][c], src[1][c], none);
- src[1][c]->neg = 0;
- }
- }
+ for (c = 0; c < 4; c++)
+ emit_mov(pc, dst[c], src[0][c]);
break;
case TGSI_OPCODE_END:
break;
@@ -648,6 +528,7 @@ out_cleanup:
static void
nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
{
+#if TX_FRAGPROG == 0
struct tgsi_parse_context pc;
tgsi_parse_init(&pc, p->pipe.tokens);
@@ -665,6 +546,7 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
p->insns[7] = 0x00020781;
} else
if (pc.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) {
+#endif
int i;
if (nv50_program_tx(p) == FALSE)
@@ -673,6 +555,7 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
for (i = 0; i < p->insns_nr; i++)
NOUVEAU_ERR("%d 0x%08x\n", i, p->insns[i]);
+#if TX_FRAGPROG == 0
} else {
NOUVEAU_ERR("invalid TGSI processor\n");
tgsi_parse_free(&pc);
@@ -680,6 +563,7 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
}
tgsi_parse_free(&pc);
+#endif
p->translated = TRUE;
}
@@ -758,11 +642,8 @@ nv50_fragprog_validate(struct nv50_context *nv50)
assert(0);
}
- if (!p->buffer)
- p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4);
- map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
- memcpy(map, p->insns, p->insns_nr * 4);
- ws->buffer_unmap(ws, p->buffer);
+ nv50_program_validate_data(nv50, p);
+ nv50_program_validate_code(nv50, p);
so = so_new(3, 2);
so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);